# Source code for netrd.distance.graph_diffusion

"""
graph_diffusion.py
--------------------------

Graph diffusion distance, from

Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December). Graph diffusion
distance: A difference measure for weighted graphs based on the graph Laplacian
exponential kernel. In Global Conference on Signal and Information Processing,
2013 IEEE (pp 419-422). IEEE. https://doi.org/10.1109/GlobalSIP.2013.6736904

This implementation is adapted from the authors' MATLAB code, available at
https://rb.gy/txbfrh, and available under an MIT license with the authors'
permission.

author: Brennan Klein
email: brennanjamesklein at gmail dot com
Submitted as part of the 2019 NetSI Collabathon.

"""

import numpy as np
import networkx as nx
from scipy.sparse.csgraph import laplacian
from .base import BaseDistance
from ..utilities import undirected

[docs]class GraphDiffusion(BaseDistance):
"""Find the maximally dissimilar diffusion kernels between two graphs."""

[docs]    @undirected
def dist(self, G1, G2, thresh=1e-08, resolution=1000):
r"""The graph diffusion distance between two graphs, :math:G and :math:G',
is a distance measure based on the notion of flow within each graph. As
such, this measure uses the unnormalized Laplacian matrices of both
graphs, :math:\mathcal{L} and :math:\mathcal{L}', and uses them to
construct time-varying Laplacian exponential diffusion kernels,
:math:e^{-t\mathcal{L}} and :math:e^{-t\mathcal{L}'}, by
effectively simulating a diffusion process for :math:t timesteps,
creating a column vector of node-level activity at each timestep. The
distance :math:d_\texttt{GDD}(G, G') is defined as the Frobenius norm
between the two diffusion kernels at the timestep :math:t^{*} where
the two kernels are maximally different. That is, we compute the
Frobenius norms and their differences for each timestep, and return the
maximum difference.

.. math::
D_{GDD}(G,G') = \sqrt{||e^{-t^{*}\mathcal{L}}-e^{-t^{*}\mathcal{L}'}||}

The results dictionary also stores a 2-tuple of the underlying
adjacency matrices in adjacency_matrices, the Laplacian matrices in
laplacian_matrices, and the output of the optimization process
(peak_diffusion_time and peak_deviation).

Adapted from the authors' MATLAB code, available at: https://rb.gy/txbfrh

Parameters
----------

G1, G2 (nx.Graph)
two networkx graphs to be compared.

thresh (float)
minimum value above which the eigenvalues will be considered.

resolution (int)
number of :math:t values to span through.

Returns
-------
dist (float)
the distance between G1 and G2.

References
----------

.. [1] Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December).
Graph diffusion distance: A difference measure for weighted graphs based on the
graph Laplacian exponential kernel. In Global Conference on Signal and
Information Processing, 2013 IEEE (pp 419-422). IEEE.
https://doi.org/10.1109/GlobalSIP.2013.6736904

"""

A1 = nx.to_numpy_array(G1)
A2 = nx.to_numpy_array(G2)

L1 = laplacian(A1)
L2 = laplacian(A2)

def sort_eigs(eigs):
vals, vecs = eigs
idx = np.argsort(abs(vals))
return vals[idx], vecs[:, idx]

vals1, vecs1 = sort_eigs(np.linalg.eig(L1))
vals2, vecs2 = sort_eigs(np.linalg.eig(L2))

eigs = np.hstack((np.diag(vals1), np.diag(vals2)))
eigs = eigs[np.where(eigs > thresh)]
eigs = np.sort(eigs)

if len(eigs) == 0:
dist = 0
self.results["dist"] = dist
return dist

t_upperbound = np.real(1.0 / eigs[0])
ts = np.linspace(0, t_upperbound, resolution)

# Find the Frobenius norms between all the diffusion kernels at
# different times. Return the value and where this vector is minimized.
E = -exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts)
f_val, t_star = (np.nanmin(E), np.argmin(E))

dist = np.sqrt(-f_val)

self.results["adjacency_matrices"] = A1, A2
self.results["laplacian_matrices"] = L1, L2
self.results["peak_diffusion_time"] = t_star
self.results["peak_deviation"] = f_val

self.results["dist"] = dist

return dist

def exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts):
"""
Computes Frobenius norm of difference of Laplacian exponential diffusion
kernels, at specified timepoints.

Parameters
----------

vecs1, vecs2 (np.array)
eigenvectors of the Laplacians of G1 and G2

vals1, vals2 (np.array)
eigenvalues of the Laplacians of G1 and G2

ts (np.array)
times at which to compute the difference in Frobenius norms

Returns
-------

diffs (np.array)
same shape as :math:t, contains differences of Frobenius norms

"""

diffs = np.zeros(len(ts))

for kt, t in enumerate(ts):
exp_diag_1 = np.diag(np.exp(-t * np.diag(vals1)))
exp_diag_2 = np.diag(np.exp(-t * np.diag(vals2)))

# multiply the eigenvectors element-wise by the appropriate diffusion value
# before left-multiplying the eigenvectors again.
norm1 = vecs1.dot(np.multiply(exp_diag_1, vecs1).T)
norm2 = vecs2.dot(np.multiply(exp_diag_2, vecs2).T)
diff = norm1 - norm2

diffs[kt] = (diff**2).sum()

return diffs