Source code for graspy.embed.ase

# Copyright 2019 NeuroData (http://neurodata.io)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings

from .base import BaseEmbed
from ..utils import import_graph, is_fully_connected


[docs]class AdjacencySpectralEmbed(BaseEmbed): r""" Class for computing the adjacency spectral embedding of a graph The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix [1]_. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically (see graspy.embed.selectSVD). Parameters ---------- n_components : int or None, default = None Desired dimensionality of output data. If "full", n_components must be <= min(X.shape). Otherwise, n_components must be < min(X.shape). If None, then optimal dimensions will be chosen by ``select_dimension`` using ``n_elbows`` argument. n_elbows : int, optional, default: 2 If `n_components=None`, then compute the optimal embedding dimension using `select_dimension`. Otherwise, ignored. algorithm : {'randomized' (default), 'full', 'truncated'}, optional SVD solver to use: - 'randomized' Computes randomized svd using ``sklearn.utils.extmath.randomized_svd`` - 'full' Computes full svd using ``scipy.linalg.svd`` - 'truncated' Computes truncated svd using ``scipy.sparse.linalg.svd`` n_iter : int, optional (default = 5) Number of iterations for randomized SVD solver. Not used by 'full' or 'truncated'. The default is larger than the default in randomized_svd to handle sparse matrices that may have large slowly decaying spectrum. check_lcc : bool , optional (defult = True) Whether to check if input graph is connected. May result in non-optimal results if the graph is unconnected. If True and input is unconnected, a UserWarning is thrown. Not checking for connectedness may result in faster computation. Attributes ---------- latent_left_ : array, shape (n_samples, n_components) Estimated left latent positions of the graph. latent_right_ : array, shape (n_samples, n_components), or None Only computed when the graph is directed, or adjacency matrix is assymetric. Estimated right latent positions of the graph. Otherwise, None. singular_values_ : array, shape (n_components) Singular values associated with the latent position matrices. See Also -------- graspy.embed.selectSVD graspy.embed.select_dimension Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ def __init__( self, n_components=None, n_elbows=2, algorithm="randomized", n_iter=5, check_lcc=True, ): super().__init__( n_components=n_components, n_elbows=n_elbows, algorithm=algorithm, n_iter=n_iter, check_lcc=check_lcc, )
[docs] def fit(self, graph, y=None): """ Fit ASE model to input graph Parameters ---------- graph : array_like or networkx.Graph Input graph to embed. Returns ------- self : returns an instance of self. """ A = import_graph(graph) if self.check_lcc: if not is_fully_connected(A): msg = ( "Input graph is not fully connected. Results may not" + "be optimal. You can compute the largest connected component by" + "using ``graspy.utils.get_lcc``." ) warnings.warn(msg, UserWarning) self._reduce_dim(A) return self