# Source code for graspy.pipeline.mug2vec

# Copyright 2019 NeuroData (http://neurodata.io)
#
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

import numpy as np
from sklearn.base import BaseEstimator

from ..embed import ClassicalMDS, OmnibusEmbed
from ..utils import pass_to_ranks

[docs]class mug2vec(BaseEstimator):
r"""
Multigraphs-2-vectors (mug2vec).

mug2vec is a sequence of three algorithms that learns a feature vector for each
input graph.

Steps:

1. Pass to ranks - ranks all edge weights from smallest to largest valued edges
then normalize by a constant.

2. Omnibus embedding - jointly learns a low dimensional matrix representation for
all graphs under the random dot product model (RDPG).

3. Classical MDS (cMDS) - learns a feature vector for each graph by computing
Euclidean distance between each pair of graph embeddings from omnibus embedding,
followed by an eigen decomposition.

Parameters
----------
pass_to_ranks: {'simple-nonzero' (default), 'simple-all', 'zero-boost'} string, or None

- 'simple-nonzero'
assigns ranks to all non-zero edges, settling ties using
the average. Ranks are then scaled by
:math:\frac{rank(\text{non-zero edges})}{\text{total non-zero edges} + 1}
- 'simple-all'
assigns ranks to all non-zero edges, settling ties using
the average. Ranks are then scaled by
:math:\frac{rank(\text{non-zero edges})}{n^2 + 1}
where n is the number of nodes
- 'zero-boost'
preserves the edge weight for all 0s, but ranks the other
edges as if the ranks of all 0 edges has been assigned. If there are
10 0-valued edges, the lowest non-zero edge gets weight 11 / (number
of possible edges). Ties settled by the average of the weight that those
edges would have received. Number of possible edges is determined
by the type of graph (loopless or looped, directed or undirected).
- None
No pass to ranks applied.

omnibus_components, cmds_components : int or None, default = None
Desired dimensionality of output data. If "full",
n_components must be <= min(X.shape). Otherwise, n_components must be
< min(X.shape). If None, then optimal dimensions will be chosen by
select_dimension using n_elbows argument.

omnibus_n_elbows, cmds_n_elbows: int, optional, default: 2
If n_components=None, then compute the optimal embedding dimension using
select_dimension. Otherwise, ignored.

Attributes
----------
omnibus_n_components_ : int
Equals the parameter n_components. If input n_components was None,
then equals the optimal embedding dimension.

cmds_n_components_ : int
Equals the parameter n_components. If input n_components was None,
then equals the optimal embedding dimension.

embeddings_ : array, shape (n_components, n_features)
Embeddings from the pipeline. Each graph is a point in n_features
dimensions.

--------
graspy.utils.pass_to_ranks
graspy.embed.OmnibusEmbed
graspy.embed.ClassicalMDS
graspy.embed.select_dimension
"""

def __init__(
self,
pass_to_ranks="simple-nonzero",
omnibus_components=None,
omnibus_n_elbows=2,
cmds_components=None,
cmds_n_elbows=2,
):
self.pass_to_ranks = pass_to_ranks
self.omnibus_components = omnibus_components
self.omnibus_n_elbows = omnibus_n_elbows
self.cmds_components = cmds_components
self.cmds_n_elbows = cmds_n_elbows

def _check_inputs(self):
variables = self.get_params()
variables.pop("pass_to_ranks")

for name, val in variables.items():
if val is not None:
if not isinstance(val, int):
msg = "{} must be an int or None.".format(name)
raise ValueError(msg)
elif val <= 0:
msg = "{} must be > 0.".format(name)
raise ValueError(msg)

[docs]    def fit(self, graphs, y=None):
"""
Computes a vector for each graph.

Parameters
----------
graphs : list of nx.Graph or ndarray, or ndarray
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).

y : Ignored

Returns
-------
self : returns an instance of self.
"""
# Check these prior to PTR just in case
self._check_inputs()

if pass_to_ranks is not None:
graphs = [pass_to_ranks(g, self.pass_to_ranks) for g in graphs]

omni = OmnibusEmbed(
n_components=self.omnibus_components, n_elbows=self.omnibus_n_elbows
)
omnibus_embedding = omni.fit_transform(graphs)

self.omnibus_n_components_ = omnibus_embedding.shape[-1]

cmds = ClassicalMDS(
n_components=self.cmds_components, n_elbows=self.cmds_n_elbows
)
self.embeddings_ = cmds.fit_transform(omnibus_embedding)
self.cmds_components_ = self.embeddings_.shape[-1]

return self

[docs]    def fit_transform(self, graphs, y=None):
"""
Computes a vector for each graph.

Parameters
----------
graphs : list of nx.Graph or ndarray, or ndarray
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).

y : Ignored

Returns
-------
embeddings : returns an instance of self.
"""
self.fit(graphs)

return self.embeddings_