Source code for netrd.distance.laplacian_spectral_method

"""
laplacian_spectral_method.py
----------------------------

Graph distance based on :
https://www.sciencedirect.com/science/article/pii/S0303264711001869
https://arxiv.org/pdf/1005.0103.pdf
https://www.nature.com/articles/s41598-018-37534-2

author: Guillaume St-Onge
email: guillaume.st-onge.4@ulaval.ca
Submitted as part of the 2019 NetSI Collabathon.

"""
import numpy as np
import networkx as nx
from .base import BaseDistance
from ..utilities.graph import unweighted
from scipy.special import erf
from scipy.integrate import quad
from scipy.linalg import eigvalsh
from scipy.sparse.csgraph import csgraph_from_dense
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigsh


[docs]class LaplacianSpectral(BaseDistance): """Flexible distance able to compare the spectrum of the Laplacian in many ways."""
[docs] @unweighted def dist( self, G1, G2, normed=True, kernel='normal', hwhm=0.011775, measure='jensen-shannon', k=None, which='LM', ): """Graph distances using different measure between the Laplacian spectra of the two graphs The spectra of both Laplacian matrices (normalized or not) is computed. Then, the discrete spectra are convolved with a kernel to produce continuous ones. Finally, these distribution are compared using a metric. The results dictionary also stores a 2-tuple of the underlying adjacency matrices in the key `'adjacency_matrices'`, the Laplacian matrices in `'laplacian_matrices'`, the eigenvalues of the Laplacians in `'eigenvalues'`. If the networks being compared are directed, the augmented adjacency matrices are calculated and stored in `'augmented_adjacency_matrices'`. Parameters ---------- G1, G2 (nx.Graph) two networkx graphs to be compared. normed (bool) If True, uses the normalized laplacian matrix, otherwise the raw laplacian matrix is used. kernel (str) kernel to obtain a continuous spectrum. Choices available are 'normal', 'lorentzian', or None. If None is chosen, the discrete spectrum is used instead, and the measure is simply the euclidean distance between the vector of eigenvalues for each graph. hwhm (float) half-width at half-maximum for the kernel. The default value is chosen such that the standard deviation for the normal distribution is :math:`0.01`, as in reference [1]_. This option is relevant only if kernel is not None. measure (str) metric between the two continuous spectra. Choices available are 'jensen-shannon' or 'euclidean'. This option is relevant only if kernel is not None. k (int) number of eigenvalues kept for the (discrete) spectrum, also used to create the continuous spectrum. If None, all the eigenvalues are used. k must be smaller (strictly) than the size of both graphs. which (str) if k is not None, this option specifies the eigenvalues that are kept. See the choices offered by `scipy.sparse.linalg.eigsh`. The largest eigenvalues in magnitude are kept by default. Returns ------- dist (float) the distance between G1 and G2. Notes ----- The methods are usually applied to undirected (unweighted) networks. We however relax this assumption using the same method proposed for the Hamming-Ipsen-Mikhailov. See [2]_. References ---------- .. [1] https://www.sciencedirect.com/science/article/pii/S0303264711001869. .. [2] https://ieeexplore.ieee.org/abstract/document/7344816. """ adj1 = nx.to_numpy_array(G1) adj2 = nx.to_numpy_array(G2) self.results['adjacency_matrices'] = adj1, adj2 directed = nx.is_directed(G1) or nx.is_directed(G2) if directed: # create augmented adjacency matrices N1 = len(G1) N2 = len(G2) null_mat1 = np.zeros((N1, N1)) null_mat2 = np.zeros((N2, N2)) adj1 = np.block([[null_mat1, adj1.T], [adj1, null_mat1]]) adj2 = np.block([[null_mat2, adj2.T], [adj2, null_mat2]]) self.results['augmented_adjacency_matrices'] = adj1, adj2 # get the laplacian matrices lap1 = laplacian(adj1, normed=normed) lap2 = laplacian(adj2, normed=normed) self.results['laplacian_matrices'] = lap1, lap2 # get the eigenvalues of the laplacian matrices if k is None: ev1 = np.abs(eigvalsh(lap1)) ev2 = np.abs(eigvalsh(lap2)) else: # transform the dense laplacian matrices to sparse representations lap1 = csgraph_from_dense(lap1) lap2 = csgraph_from_dense(lap2) ev1 = np.abs(eigsh(lap1, k=k, which=which)[0]) ev2 = np.abs(eigsh(lap2, k=k, which=which)[0]) self.results['eigenvalues'] = ev1, ev2 if kernel is not None: # define the proper support a = 0 if normed: b = 2 else: b = np.inf # create continuous spectra density1 = _create_continuous_spectrum(ev1, kernel, hwhm, a, b) density2 = _create_continuous_spectrum(ev2, kernel, hwhm, a, b) # compare the spectra dist = _spectra_comparison(density1, density2, a, b, measure) self.results['dist'] = dist else: # euclidean distance between the two discrete spectra dist = np.linalg.norm(ev1 - ev2) self.results['dist'] = dist return dist
def _create_continuous_spectrum(eigenvalues, kernel, hwhm, a, b): """Convert a set of eigenvalues into a normalized density function The discret spectrum (sum of dirac delta) is convolved with a kernel and renormalized. Parameters ---------- eigenvalues (array): list of eigenvalues. kernel (str): kernel to be used for the convolution with the discrete spectrum. hwhm (float): half-width at half-maximum for the kernel. a,b (float): lower and upper bounds of the support for the eigenvalues. Returns ------- density (function): one argument function for the continuous spectral density. """ # define density and repartition function for each eigenvalue if kernel == "normal": std = hwhm / 1.1775 f = lambda x, xp: np.exp(-((x - xp) ** 2) / (2 * std**2)) / np.sqrt( 2 * np.pi * std**2 ) F = lambda x, xp: (1 + erf((x - xp) / (np.sqrt(2) * std))) / 2 elif kernel == "lorentzian": f = lambda x, xp: hwhm / (np.pi * (hwhm**2 + (x - xp) ** 2)) F = lambda x, xp: np.arctan((x - xp) / hwhm) / np.pi + 1 / 2 # compute normalization factor and define density function Z = np.sum(F(b, eigenvalues) - F(a, eigenvalues)) density = lambda x: np.sum(f(x, eigenvalues)) / Z return density def _spectra_comparison(density1, density2, a, b, measure): """Apply a metric to compare the continuous spectra Parameters ---------- density1, density2 (function): one argument functions for the continuous spectral densities. a,b (float): lower and upper bounds of the support for the eigenvalues. measure (str): metric between the two continuous spectra. Returns ------- dist (float): distance between the spectra. """ if measure == "jensen-shannon": M = lambda x: (density1(x) + density2(x)) / 2 jensen_shannon = ( _kullback_leiber(density1, M, a, b) + _kullback_leiber(density2, M, a, b) ) / 2 dist = np.sqrt(jensen_shannon) elif measure == "euclidean": integrand = lambda x: (density1(x) - density2(x)) ** 2 dist = np.sqrt(quad(integrand, a, b)[0]) return dist def _kullback_leiber(f1, f2, a, b): def integrand(x): if f1(x) > 0 and f2(x) > 0: result = f1(x) * np.log(f1(x) / f2(x)) else: result = 0 return result return quad(integrand, a, b)[0]