# Source code for netrd.distance.deltacon

"""
deltacon.py
--------------------------

Deltacon measure for graph distance, after:

Koutra, Danai, Joshua T. Vogelstein, and Christos Faloutsos. 2013. “Deltacon: A
Principled Massive-Graph Similarity Function.” In Proceedings of the 2013 SIAM
International Conference on Data Mining, 162–70. Society for Industrial and
Applied Mathematics. https://doi.org/10.1137/1.9781611972832.18.

author: Stefan McCabe
email: stefanmccabe at gmail dot com
Submitted as part of the 2019 NetSI Collabathon.

"""

import numpy as np
import networkx as nx
from .base import BaseDistance
from ..utilities import undirected

[docs]class DeltaCon(BaseDistance):
"""Compare matrices related to Fast Belief Propagation."""

[docs]    @undirected
def dist(self, G1, G2, exact=True, g=None):
"""DeltaCon is based on the Matsusita between matrices created from fast
belief propagation (FBP) on graphs G1 and G2.

Because the FBP algorithm requires a costly matrix inversion, there
is a faster, roughly linear, algorithm that gives approximate
results.

Parameters
----------

G1, G2 (nx.Graph)
two networkx graphs to be compared.

exact (bool)
if True, use the slower but exact algorithm (DeltaCon_0)

g (int)
the number of groups to use in the efficient algorithm. If
exact is set to False but g is not set, the efficient algorithm
will still behave like the exact algorithm, since each node is
put in its own group.

Returns
-------

dist (float)
the distance between G1 and G2.

References
----------

.. [1] Koutra, Danai, Joshua T. Vogelstein, and Christos
Faloutsos. 2013. "Deltacon: A Principled Massive-Graph
Similarity Function." In Proceedings of the 2013 SIAM
International Conference on Data Mining, 162–70. Society for
Industrial and Applied
Mathematics. https://doi.org/10.1137/1.9781611972832.18.

"""
assert G1.number_of_nodes() == G2.number_of_nodes()
N = G1.number_of_nodes()

if not exact and g is None:
g = N

A1 = nx.to_numpy_array(G1)
L1 = nx.laplacian_matrix(G1).toarray()
D1 = L1 + A1

A2 = nx.to_numpy_array(G2)
L2 = nx.laplacian_matrix(G2).toarray()
D2 = L2 + A2

eps_1 = 1 / (1 + np.max(D1))
eps_2 = 1 / (1 + np.max(D2))

if exact:
S1 = np.linalg.inv(np.eye(N) + (eps_1**2) * D1 - eps_1 * A1)
S2 = np.linalg.inv(np.eye(N) + (eps_2**2) * D2 - eps_2 * A2)
else:
raise NotImplementedError(
"The efficient algorithm is not "
"implemented. Please use the exact "
"algorithm."
)

def matusita_dist(X, Y):
r"""Return the Matusita distance

.. math::

\sqrt{\sum_i \sum_j \left( \sqrt{X_{ij}} - \sqrt{Y_{ij}} \right)^{2}}

between X and Y.
"""
return np.sqrt(np.sum(np.square(np.sqrt(X) - np.sqrt(Y))))

dist = matusita_dist(S1, S2)

self.results['belief_matrix_1'] = S1
self.results['belief_matrix_2'] = S2

self.results['dist'] = dist
return dist