# Source code for netrd.distance.graph_diffusion

```
"""
graph_diffusion.py
--------------------------
Graph diffusion distance, from
Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December). Graph diffusion
distance: A difference measure for weighted graphs based on the graph Laplacian
exponential kernel. In Global Conference on Signal and Information Processing,
2013 IEEE (pp 419-422). IEEE. https://doi.org/10.1109/GlobalSIP.2013.6736904
This implementation is adapted from the authors' MATLAB code, available at
https://rb.gy/txbfrh, and available under an MIT license with the authors'
permission.
author: Brennan Klein
email: brennanjamesklein at gmail dot com
Submitted as part of the 2019 NetSI Collabathon.
"""
import numpy as np
import networkx as nx
from scipy.sparse.csgraph import laplacian
from .base import BaseDistance
from ..utilities import undirected
[docs]class GraphDiffusion(BaseDistance):
"""Find the maximally dissimilar diffusion kernels between two graphs."""
[docs] @undirected
def dist(self, G1, G2, thresh=1e-08, resolution=1000):
r"""The graph diffusion distance between two graphs, :math:`G` and :math:`G'`,
is a distance measure based on the notion of flow within each graph. As
such, this measure uses the unnormalized Laplacian matrices of both
graphs, :math:`\mathcal{L}` and :math:`\mathcal{L}'`, and uses them to
construct time-varying Laplacian exponential diffusion kernels,
:math:`e^{-t\mathcal{L}}` and :math:`e^{-t\mathcal{L}'}`, by
effectively simulating a diffusion process for :math:`t` timesteps,
creating a column vector of node-level activity at each timestep. The
distance :math:`d_\texttt{GDD}(G, G')` is defined as the Frobenius norm
between the two diffusion kernels at the timestep :math:`t^{*}` where
the two kernels are maximally different. That is, we compute the
Frobenius norms and their differences for each timestep, and return the
maximum difference.
.. math::
D_{GDD}(G,G') = \sqrt{||e^{-t^{*}\mathcal{L}}-e^{-t^{*}\mathcal{L}'}||}
The results dictionary also stores a 2-tuple of the underlying
adjacency matrices in `adjacency_matrices`, the Laplacian matrices in
`laplacian_matrices`, and the output of the optimization process
(`peak_diffusion_time` and `peak_deviation`).
Adapted from the authors' MATLAB code, available at: https://rb.gy/txbfrh
Parameters
----------
G1, G2 (nx.Graph)
two networkx graphs to be compared.
thresh (float)
minimum value above which the eigenvalues will be considered.
resolution (int)
number of :math:`t` values to span through.
Returns
-------
dist (float)
the distance between `G1` and `G2`.
References
----------
.. [1] Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December).
Graph diffusion distance: A difference measure for weighted graphs based on the
graph Laplacian exponential kernel. In Global Conference on Signal and
Information Processing, 2013 IEEE (pp 419-422). IEEE.
https://doi.org/10.1109/GlobalSIP.2013.6736904
"""
A1 = nx.to_numpy_array(G1)
A2 = nx.to_numpy_array(G2)
L1 = laplacian(A1)
L2 = laplacian(A2)
def sort_eigs(eigs):
vals, vecs = eigs
idx = np.argsort(abs(vals))
return vals[idx], vecs[:, idx]
vals1, vecs1 = sort_eigs(np.linalg.eig(L1))
vals2, vecs2 = sort_eigs(np.linalg.eig(L2))
eigs = np.hstack((np.diag(vals1), np.diag(vals2)))
eigs = eigs[np.where(eigs > thresh)]
eigs = np.sort(eigs)
if len(eigs) == 0:
dist = 0
self.results["dist"] = dist
return dist
t_upperbound = np.real(1.0 / eigs[0])
ts = np.linspace(0, t_upperbound, resolution)
# Find the Frobenius norms between all the diffusion kernels at
# different times. Return the value and where this vector is minimized.
E = -exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts)
f_val, t_star = (np.nanmin(E), np.argmin(E))
dist = np.sqrt(-f_val)
self.results["adjacency_matrices"] = A1, A2
self.results["laplacian_matrices"] = L1, L2
self.results["peak_diffusion_time"] = t_star
self.results["peak_deviation"] = f_val
self.results["dist"] = dist
return dist
def exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts):
"""
Computes Frobenius norm of difference of Laplacian exponential diffusion
kernels, at specified timepoints.
Parameters
----------
vecs1, vecs2 (np.array)
eigenvectors of the Laplacians of `G1` and `G2`
vals1, vals2 (np.array)
eigenvalues of the Laplacians of `G1` and `G2`
ts (np.array)
times at which to compute the difference in Frobenius norms
Returns
-------
diffs (np.array)
same shape as :math:`t`, contains differences of Frobenius norms
"""
diffs = np.zeros(len(ts))
for kt, t in enumerate(ts):
exp_diag_1 = np.diag(np.exp(-t * np.diag(vals1)))
exp_diag_2 = np.diag(np.exp(-t * np.diag(vals2)))
# multiply the eigenvectors element-wise by the appropriate diffusion value
# before left-multiplying the eigenvectors again.
norm1 = vecs1.dot(np.multiply(exp_diag_1, vecs1).T)
norm2 = vecs2.dot(np.multiply(exp_diag_2, vecs2).T)
diff = norm1 - norm2
diffs[kt] = (diff**2).sum()
return diffs
```