# Source code for graspy.embed.mase

# Copyright 2019 NeuroData (http://neurodata.io)
#
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

import numpy as np
from sklearn.utils.validation import check_is_fitted

from ..utils import import_graph, is_almost_symmetric
from .base import BaseEmbedMulti
from .svd import select_dimension, selectSVD

r"""
Multiple Adjacency Spectral Embedding (MASE) embeds arbitrary number of input
graphs with matched vertex sets.

For a population of undirected graphs, MASE assumes that the population of graphs
is sampled from :math:VR^{(i)}V^T where :math:V \in \mathbb{R}^{n\times d} and
:math:R^{(i)} \in \mathbb{R}^{d\times d}. Score matrices, :math:R^{(i)}, are
allowed to vary for each graph, but are symmetric. All graphs share a common a
latent position matrix :math:V.

For a population of directed graphs, MASE assumes that the population is sampled
from :math:UR^{(i)}V^T where :math:U \in \mathbb{R}^{n\times d_1},
:math:V \in \mathbb{R}^{n\times d_2}, and
:math:R^{(i)} \in \mathbb{R}^{d_1\times d_2}. In this case, score matrices
:math:R^{(i)} can be assymetric and non-square, but all graphs still share a
common latent position matrices :math:U and :math:V.

Parameters
----------
n_components : int or None, default = None
Desired dimensionality of output data. If "full",
n_components must be <= min(X.shape). Otherwise, n_components must be
< min(X.shape). If None, then optimal dimensions will be chosen by
:func:~graspy.embed.select_dimension using n_elbows argument.

n_elbows : int, optional, default: 2
If n_components=None, then compute the optimal embedding dimension using
:func:~graspy.embed.select_dimension. Otherwise, ignored.

algorithm : {'randomized' (default), 'full', 'truncated'}, optional
SVD solver to use:

- 'randomized'
Computes randomized svd using
:func:sklearn.utils.extmath.randomized_svd
- 'full'
Computes full svd using :func:scipy.linalg.svd
- 'truncated'
Computes truncated svd using :func:scipy.sparse.linalg.svds

n_iter : int, optional (default = 5)
Number of iterations for randomized SVD solver. Not used by 'full' or
'truncated'. The default is larger than the default in randomized_svd
to handle sparse matrices that may have large slowly decaying spectrum.

scaled : bool, optional (default=False)
Whether to scale individual eigenvectors with eigenvalues in first embedding
stage.

Attributes
----------
n_graphs_ : int
Number of graphs

n_vertices_ : int
Number of vertices in each graph

latent_left_ : array, shape (n_samples, n_components)
Estimated left latent positions of the graph.

latent_right_ : array, shape (n_samples, n_components), or None
Estimated right latent positions of the graph. Only computed when the an input
graph is directed, or adjacency matrix is assymetric. Otherwise, None.

scores_ : array, shape (n_samples, n_components, n_components)
Estimated :math:\hat{R} matrices for each input graph.

Notes
-----
When an input graph is directed, n_components of latent_left_ may not be equal
to n_components of latent_right_.
"""

def __init__(
self,
n_components=None,
n_elbows=2,
algorithm="randomized",
n_iter=5,
scaled=False,
):
if not isinstance(scaled, bool):
msg = "scaled must be a boolean, not {}".format(scaled)
raise TypeError(msg)

super().__init__(
n_components=n_components,
n_elbows=n_elbows,
algorithm=algorithm,
n_iter=n_iter,
)
self.scaled = scaled

def _reduce_dim(self, graphs):
# first embed into log2(n_vertices) for each graph
n_components = int(np.ceil(np.log2(np.min(self.n_vertices_))))

# embed individual graphs
embeddings = [
selectSVD(
graph,
n_components=n_components,
algorithm=self.algorithm,
n_iter=self.n_iter,
)
for graph in graphs
]
Us, Ds, Vs = zip(*embeddings)

# Choose the best embedding dimension for each graphs
if self.n_components is None:
embedding_dimensions = []
for D in Ds:
elbows, _ = select_dimension(D, n_elbows=self.n_elbows)
embedding_dimensions.append(elbows[-1])

# Choose the max of all of best embedding dimension of all graphs
best_dimension = int(np.ceil(np.max(embedding_dimensions)))
else:
best_dimension = self.n_components

if not self.scaled:
Us = np.hstack([U[:, :best_dimension] for U in Us])
Vs = np.hstack([V.T[:, :best_dimension] for V in Vs])
else:
# Equivalent to ASE
Us = np.hstack(
[
U[:, :best_dimension] @ np.diag(np.sqrt(D[:best_dimension]))
for U, D in zip(Us, Ds)
]
)
Vs = np.hstack(
[
V.T[:, :best_dimension] @ np.diag(np.sqrt(D[:best_dimension]))
for V, D in zip(Vs, Ds)
]
)

# Second SVD for vertices
# The notation is slightly different than the paper
Uhat, _, _ = selectSVD(
Us,
n_components=self.n_components,
n_elbows=self.n_elbows,
algorithm=self.algorithm,
n_iter=self.n_iter,
)

Vhat, _, _ = selectSVD(
Vs,
n_components=self.n_components,
n_elbows=self.n_elbows,
algorithm=self.algorithm,
n_iter=self.n_iter,
)
return Uhat, Vhat

[docs]    def fit(self, graphs, y=None):
"""
Fit the model with graphs.

Parameters
----------
graphs : list of nx.Graph or ndarray, or ndarray
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).

y : Ignored

Returns
-------
self : returns an instance of self.
"""
graphs = self._check_input_graphs(graphs)

# Check if undirected
undirected = all(is_almost_symmetric(g) for g in graphs)

# embed
Uhat, Vhat = self._reduce_dim(graphs)
self.latent_left_ = Uhat
if not undirected:
self.latent_right_ = Vhat
self.scores_ = Uhat.T @ graphs @ Vhat
else:
self.latent_right_ = None
self.scores_ = Uhat.T @ graphs @ Uhat

return self

[docs]    def fit_transform(self, graphs, y=None):
"""
Fit the model with graphs and apply the embedding on graphs.
n_components is either automatically determined or based on user input.

Parameters
----------
graphs : list of nx.Graph or ndarray, or ndarray
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).

y : Ignored

Returns
-------
out : array-like, shape (n_vertices, n_components) if input
graphs were symmetric. If graphs were directed, returns tuple of
two arrays (same shape as above) where the first corresponds to the
left latent positions, and the right to the right latent positions
"""
return self._fit_transform(graphs)