Source code for netrd.distance.distributional_nbd

"""
distributional_nbd.py
------

Distributional Non-backtracking Spectral Distance.

"""

import numpy as np
import networkx as nx
import scipy.sparse as sp
from scipy.spatial.distance import euclidean, chebyshev
from ..utilities.graph import unweighted

from .base import BaseDistance


[docs]class DistributionalNBD(BaseDistance): """ Distributional Non-backtracking Spectral Distance. Computes the distance between two graphs using the empirical spectral density of the non-backtracking operator. See: "Graph Comparison via the Non-backtracking Spectrum" A. Mellor & A. Grusovin arXiv:1812.05457 / 10.1103/PhysRevE.99.052309 """ VECTOR_DISTANCES = {'euclidean': euclidean, 'chebyshev': chebyshev}
[docs] @unweighted def dist( self, G1, G2, sparse=False, shave=True, keep_evals=True, k=None, vector_distance='euclidean', **kwargs ): """ Distributional Non-backtracking Spectral Distance. Parameters ---------- G1, G2 (nx.Graph) The two graphs to compare. sparse (bool) If sparse, matrices and eigenvalues found using sparse methods. If sparse, parameter 'k' should also be specified. Default: False k (int) The number of largest eigenvalues to be calculated for the spectral density. vector_distance (str) The distance measure used to compare two empirical distributions. Currently available are 'euclidean' and 'chebyshev', implemented using SciPy. Default: 'euclidean' keep_evals (bool) If True, stores the eigenvalues of the reduced non-backtracking matrix in self.results['evals'] Default: False Returns ------- float The distance between `G1` and `G2` """ B1 = reduced_hashimoto(G1, shave=shave, sparse=sparse, **kwargs) B2 = reduced_hashimoto(G2, shave=shave, sparse=sparse, **kwargs) # Find spectrum evals1 = nb_eigenvalues(B1, k=k) evals2 = nb_eigenvalues(B2, k=k) # Save spectrum if keep_evals: self.results['eigenvalues'] = (evals1, evals2) # Find rescaled spectral density distribution_1 = spectral_distribution(evals1) distribution_2 = spectral_distribution(evals2) # Compute distance distance_metric = self.__class__.VECTOR_DISTANCES[vector_distance] return distance_metric(distribution_1, distribution_2)
def shave_graph(graph): """ Returns the two-core of a graph. Iteratively remove the nodes of degree 0 or 1, until all nodes have degree at least 2. NOTE: duplicated from "nbd.py" to avoid excessive imports. """ core = graph.copy() while True: to_remove = [node for node, neighbors in core.adj.items() if len(neighbors) < 2] core.remove_nodes_from(to_remove) if len(to_remove) == 0: break return core def pseudo_hashimoto(graph): """ Return the pseudo-Hashimoto matrix. The pseudo Hashimoto matrix of a graph is the block matrix defined as B' = [0 D-I] [-I A ] Where D is the degree-diagonal matrix, I is the identity matrix and A is the adjacency matrix. The eigenvalues of B' are always eigenvalues of B, the non-backtracking or Hashimoto matrix. Parameters ---------- graph (nx.Graph): A NetworkX graph object. Returns ------- A sparse matrix in csr format. NOTE: duplicated from "nbd.py" to avoid excessive imports. """ # Note: the rows of nx.adjacency_matrix(graph) are in the same order as # the list returned by graph.nodes(). degrees = graph.degree() degrees = sp.diags([degrees[n] for n in graph.nodes()]) adj = nx.adjacency_matrix(graph) ident = sp.eye(graph.order()) pseudo = sp.bmat([[None, degrees - ident], [-ident, adj]]) return pseudo.asformat('csr') def reduced_hashimoto(graph, shave=True, sparse=True): """ Parameters ---------- shave (bool) If True, first reduce the graph to its two-core. Else graph processed in its entirety. sparse (bool) If True, returned matrix will be sparse, else it will be dense. Returns ------- np.ndarray/sp.csr_matrix The reduced Hashimoto Matrix. """ if shave: graph = shave_graph(graph) if len(graph) == 0: # We can provide a workaround for this case, however it is best # that it is brought to the attention of the user. raise NotImplementedError( "Graph two-core is empty: non-backtracking methods unsuitable." ) B = pseudo_hashimoto(graph) if not sparse: B = B.todense() return B def nb_eigenvalues(B, k=None, **kwargs): """ Calculates the eigenvalues of a matrix B. Detects whether B is sparse/dense and uses the appropriate method. If B is sparse then parameter 'k' should be provided. """ if isinstance(B, np.ndarray): return np.linalg.eigvals(B) elif isinstance(B, sp.csr_matrix): random_state = np.random.RandomState( 1 ) # Ensures that eigenvalue calculation is deterministic. return sp.linalg.eigs( B, k=k, v0=random_state.random(B.shape[0]), return_eigenvectors=False ) else: raise Exception("Matrix must be of type np.ndarray or scipy.sparse.csr") def logr(r, rmax): """ Logarithm to the base r. NOTE:Maps zero to zero as a special case. """ if r == 0: return 0 return np.log(r) / np.log(rmax) def spectral_distribution(points, cumulative=True): """ Returns the distribution of complex values (in r,theta-space). """ points = np.array([(np.abs(z), np.angle(z)) for z in points]) r, theta = np.split(points, 2, axis=1) r = np.array([logr(x, r.max()) for x in r]) Z, R, THETA = np.histogram2d( x=r[:, 0], y=theta[:, 0], bins=(np.linspace(0, 1, 101), np.linspace(0, np.pi, 101)), ) if cumulative: Z = Z.cumsum(axis=0).cumsum(axis=1) Z = Z / Z.max() return Z.flatten()