# Source code for netrd.distance.laplacian_spectral_method

"""
laplacian_spectral_method.py
----------------------------

Graph distance based on :
https://www.sciencedirect.com/science/article/pii/S0303264711001869
https://arxiv.org/pdf/1005.0103.pdf
https://www.nature.com/articles/s41598-018-37534-2

author: Guillaume St-Onge
email: guillaume.st-onge.4@ulaval.ca
Submitted as part of the 2019 NetSI Collabathon.

"""
import numpy as np
import networkx as nx
from .base import BaseDistance
from ..utilities.graph import unweighted
from scipy.special import erf
from scipy.linalg import eigvalsh
from scipy.sparse.csgraph import csgraph_from_dense
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigsh

[docs]class LaplacianSpectral(BaseDistance):
"""Flexible distance able to compare the spectrum of the Laplacian in many ways."""

[docs]    @unweighted
def dist(
self,
G1,
G2,
normed=True,
kernel='normal',
hwhm=0.011775,
measure='jensen-shannon',
k=None,
which='LM',
):
"""Graph distances using different measure between the Laplacian
spectra of the two graphs

The spectra of both Laplacian matrices (normalized or not) is
computed. Then, the discrete spectra are convolved with a kernel to
produce continuous ones. Finally, these distribution are compared
using a metric.

The results dictionary also stores a 2-tuple of the underlying
adjacency matrices in the key 'adjacency_matrices', the Laplacian
matrices in 'laplacian_matrices', the eigenvalues of the
Laplacians in 'eigenvalues'. If the networks being compared are
directed, the augmented adjacency matrices are calculated and
stored in 'augmented_adjacency_matrices'.

Parameters
----------

G1, G2 (nx.Graph)
two networkx graphs to be compared.

normed (bool)
If True, uses the normalized laplacian matrix, otherwise the
raw laplacian matrix is used.

kernel (str)
kernel to obtain a continuous spectrum. Choices available are
'normal', 'lorentzian', or None. If None is chosen, the
discrete spectrum is used instead, and the measure is simply
the euclidean distance between the vector of eigenvalues for
each graph.

hwhm (float)
half-width at half-maximum for the kernel. The default value is
chosen such that the standard deviation for the normal
distribution is :math:0.01, as in reference [1]_. This option
is relevant only if kernel is not None.

measure (str)
metric between the two continuous spectra. Choices available
are 'jensen-shannon' or 'euclidean'. This option is relevant
only if kernel is not None.

k (int)
number of eigenvalues kept for the (discrete) spectrum, also
used to create the continuous spectrum. If None, all the
eigenvalues are used. k must be smaller (strictly) than the
size of both graphs.

which (str)
if k is not None, this option specifies the eigenvalues that
are kept. See the choices offered by
scipy.sparse.linalg.eigsh.  The largest eigenvalues in
magnitude are kept by default.

Returns
-------

dist (float)
the distance between G1 and G2.

Notes
-----
The methods are usually applied to undirected (unweighted)
networks. We however relax this assumption using the same method
proposed for the Hamming-Ipsen-Mikhailov. See [2]_.

References
----------

.. [1] https://www.sciencedirect.com/science/article/pii/S0303264711001869.

.. [2] https://ieeexplore.ieee.org/abstract/document/7344816.

"""
directed = nx.is_directed(G1) or nx.is_directed(G2)

if directed:
N1 = len(G1)
N2 = len(G2)
null_mat1 = np.zeros((N1, N1))
null_mat2 = np.zeros((N2, N2))

# get the laplacian matrices
self.results['laplacian_matrices'] = lap1, lap2

# get the eigenvalues of the laplacian matrices
if k is None:
ev1 = np.abs(eigvalsh(lap1))
ev2 = np.abs(eigvalsh(lap2))
else:
# transform the dense laplacian matrices to sparse representations
lap1 = csgraph_from_dense(lap1)
lap2 = csgraph_from_dense(lap2)
ev1 = np.abs(eigsh(lap1, k=k, which=which)[0])
ev2 = np.abs(eigsh(lap2, k=k, which=which)[0])
self.results['eigenvalues'] = ev1, ev2

if kernel is not None:
# define the proper support
a = 0
if normed:
b = 2
else:
b = np.inf

# create continuous spectra
density1 = _create_continuous_spectrum(ev1, kernel, hwhm, a, b)
density2 = _create_continuous_spectrum(ev2, kernel, hwhm, a, b)

# compare the spectra
dist = _spectra_comparison(density1, density2, a, b, measure)
self.results['dist'] = dist
else:
# euclidean distance between the two discrete spectra
dist = np.linalg.norm(ev1 - ev2)
self.results['dist'] = dist

return dist

def _create_continuous_spectrum(eigenvalues, kernel, hwhm, a, b):
"""Convert a set of eigenvalues into a normalized density function

The discret spectrum (sum of dirac delta) is convolved with a kernel and
renormalized.

Parameters
----------

eigenvalues (array): list of eigenvalues.

kernel (str): kernel to be used for the convolution with the discrete
spectrum.

hwhm (float): half-width at half-maximum for the kernel.

a,b (float): lower and upper bounds of the support for the eigenvalues.

Returns
-------

density (function): one argument function for the continuous spectral
density.

"""
# define density and repartition function for each eigenvalue
if kernel == "normal":
std = hwhm / 1.1775
f = lambda x, xp: np.exp(-((x - xp) ** 2) / (2 * std**2)) / np.sqrt(
2 * np.pi * std**2
)
F = lambda x, xp: (1 + erf((x - xp) / (np.sqrt(2) * std))) / 2
elif kernel == "lorentzian":
f = lambda x, xp: hwhm / (np.pi * (hwhm**2 + (x - xp) ** 2))
F = lambda x, xp: np.arctan((x - xp) / hwhm) / np.pi + 1 / 2

# compute normalization factor and define density function
Z = np.sum(F(b, eigenvalues) - F(a, eigenvalues))
density = lambda x: np.sum(f(x, eigenvalues)) / Z

return density

def _spectra_comparison(density1, density2, a, b, measure):
"""Apply a metric to compare the continuous spectra

Parameters
----------

density1, density2 (function): one argument functions for the continuous
spectral densities.

a,b (float): lower and upper bounds of the support for the eigenvalues.

measure (str): metric between the two continuous spectra.

Returns
-------

dist (float): distance between the spectra.

"""
if measure == "jensen-shannon":
M = lambda x: (density1(x) + density2(x)) / 2
jensen_shannon = (
_kullback_leiber(density1, M, a, b) + _kullback_leiber(density2, M, a, b)
) / 2
dist = np.sqrt(jensen_shannon)

elif measure == "euclidean":
integrand = lambda x: (density1(x) - density2(x)) ** 2

return dist

def _kullback_leiber(f1, f2, a, b):
def integrand(x):
if f1(x) > 0 and f2(x) > 0:
result = f1(x) * np.log(f1(x) / f2(x))
else:
result = 0
return result