Source code for netrd.distance.deltacon

"""
deltacon.py
--------------------------

Deltacon measure for graph distance, after:

Koutra, Danai, Joshua T. Vogelstein, and Christos Faloutsos. 2013. “Deltacon: A
Principled Massive-Graph Similarity Function.” In Proceedings of the 2013 SIAM
International Conference on Data Mining, 162–70. Society for Industrial and
Applied Mathematics. https://doi.org/10.1137/1.9781611972832.18.

author: Stefan McCabe
email: stefanmccabe at gmail dot com
Submitted as part of the 2019 NetSI Collabathon.

"""

import numpy as np
import networkx as nx
from .base import BaseDistance
from ..utilities import undirected


[docs]class DeltaCon(BaseDistance): """Compare matrices related to Fast Belief Propagation."""
[docs] @undirected def dist(self, G1, G2, exact=True, g=None): """DeltaCon is based on the Matsusita between matrices created from fast belief propagation (FBP) on graphs G1 and G2. Because the FBP algorithm requires a costly matrix inversion, there is a faster, roughly linear, algorithm that gives approximate results. Parameters ---------- G1, G2 (nx.Graph) two networkx graphs to be compared. exact (bool) if True, use the slower but exact algorithm (DeltaCon_0) g (int) the number of groups to use in the efficient algorithm. If exact is set to False but g is not set, the efficient algorithm will still behave like the exact algorithm, since each node is put in its own group. Returns ------- dist (float) the distance between G1 and G2. References ---------- .. [1] Koutra, Danai, Joshua T. Vogelstein, and Christos Faloutsos. 2013. "Deltacon: A Principled Massive-Graph Similarity Function." In Proceedings of the 2013 SIAM International Conference on Data Mining, 162–70. Society for Industrial and Applied Mathematics. https://doi.org/10.1137/1.9781611972832.18. """ assert G1.number_of_nodes() == G2.number_of_nodes() N = G1.number_of_nodes() if not exact and g is None: g = N A1 = nx.to_numpy_array(G1) L1 = nx.laplacian_matrix(G1).toarray() D1 = L1 + A1 A2 = nx.to_numpy_array(G2) L2 = nx.laplacian_matrix(G2).toarray() D2 = L2 + A2 eps_1 = 1 / (1 + np.max(D1)) eps_2 = 1 / (1 + np.max(D2)) if exact: S1 = np.linalg.inv(np.eye(N) + (eps_1**2) * D1 - eps_1 * A1) S2 = np.linalg.inv(np.eye(N) + (eps_2**2) * D2 - eps_2 * A2) else: raise NotImplementedError( "The efficient algorithm is not " "implemented. Please use the exact " "algorithm." ) def matusita_dist(X, Y): r"""Return the Matusita distance .. math:: \sqrt{\sum_i \sum_j \left( \sqrt{X_{ij}} - \sqrt{Y_{ij}} \right)^{2}} between X and Y. """ return np.sqrt(np.sum(np.square(np.sqrt(X) - np.sqrt(Y)))) dist = matusita_dist(S1, S2) self.results['belief_matrix_1'] = S1 self.results['belief_matrix_2'] = S2 self.results['dist'] = dist return dist