Source code for graspologic.embed.lse

# Copyright (c) Microsoft Corporation and contributors.
# Licensed under the MIT License.

import warnings

from .base import BaseSpectralEmbed
from ..utils import import_graph, to_laplace, is_fully_connected

[docs]class LaplacianSpectralEmbed(BaseSpectralEmbed): r""" Class for computing the laplacian spectral embedding of a graph. The laplacian spectral embedding (LSE) is a k-dimensional Euclidean representation of the graph based on its Laplacian matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically. Read more in the :ref:`tutorials <embed_tutorials>` Parameters ---------- form : {'DAD' (default), 'I-DAD', 'R-DAD'}, optional Specifies the type of Laplacian normalization to use. n_components : int or None, default = None Desired dimensionality of output data. If "full", ``n_components`` must be ``<= min(X.shape)``. Otherwise, ``n_components`` must be ``< min(X.shape)``. If None, then optimal dimensions will be chosen by :func:`~graspologic.embed.select_dimension` using ``n_elbows`` argument. n_elbows : int, optional, default: 2 If ``n_components`` is None, then compute the optimal embedding dimension using :func:`~graspologic.embed.select_dimension`. Otherwise, ignored. algorithm : {'randomized' (default), 'full', 'truncated'}, optional SVD solver to use: - 'randomized' Computes randomized svd using :func:`sklearn.utils.extmath.randomized_svd` - 'full' Computes full svd using :func:`scipy.linalg.svd` - 'truncated' Computes truncated svd using :func:`scipy.sparse.linalg.svds` n_iter : int, optional (default = 5) Number of iterations for randomized SVD solver. Not used by 'full' or 'truncated'. The default is larger than the default in randomized_svd to handle sparse matrices that may have large slowly decaying spectrum. check_lcc : bool , optional (defult = True) Whether to check if input graph is connected. May result in non-optimal results if the graph is unconnected. If True and input is unconnected, a UserWarning is thrown. Not checking for connectedness may result in faster computation. regularizer: int, float or None, optional (default=None) Constant to be added to the diagonal of degree matrix. If None, average node degree is added. If int or float, must be >= 0. Only used when ``form`` is 'R-DAD'. concat : bool, optional (default False) If graph is directed, whether to concatenate left and right (out and in) latent positions along axis 1. Attributes ---------- n_features_in_: int Number of features passed to the :func:`` method. latent_left_ : array, shape (n_samples, n_components) Estimated left latent positions of the graph. latent_right_ : array, shape (n_samples, n_components), or None Only computed when the graph is directed, or adjacency matrix is assymetric. Estimated right latent positions of the graph. Otherwise, None. singular_values_ : array, shape (n_components) Singular values associated with the latent position matrices. See Also -------- graspologic.embed.selectSVD graspologic.embed.select_dimension graspologic.utils.to_laplace Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the Laplacian matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012. .. [2] Von Luxburg, Ulrike. "A tutorial on spectral clustering," Statistics and computing, Vol. 17(4), pp. 395-416, 2007. .. [3] Rohe, Karl, Sourav Chatterjee, and Bin Yu. "Spectral clustering and the high-dimensional stochastic blockmodel," The Annals of Statistics, Vol. 39(4), pp. 1878-1915, 2011. """ def __init__( self, form="DAD", n_components=None, n_elbows=2, algorithm="randomized", n_iter=5, check_lcc=True, regularizer=None, concat=False, ): super().__init__( n_components=n_components, n_elbows=n_elbows, algorithm=algorithm, n_iter=n_iter, check_lcc=check_lcc, concat=concat, ) self.form = form self.regularizer = regularizer
[docs] def fit(self, graph, y=None): """ Fit LSE model to input graph By default, uses the Laplacian normalization of the form: .. math:: L = D^{-1/2} A D^{-1/2} Parameters ---------- graph : array-like, scipy.sparse.csr_matrix, or networkx.Graph Input graph to embed. see graspologic.utils.import_graph Returns ------- self : object Returns an instance of self. """ A = import_graph(graph) if self.check_lcc: if not is_fully_connected(A): msg = ( "Input graph is not fully connected. Results may not" + "be optimal. You can compute the largest connected component by" + "using ``graspologic.utils.get_lcc``." ) warnings.warn(msg, UserWarning) self.n_features_in_ = A.shape[0] L_norm = to_laplace(A, form=self.form, regularizer=self.regularizer) self._reduce_dim(L_norm) return self