Source code for netrd.distance.graph_diffusion

"""
graph_diffusion.py
--------------------------

Graph diffusion distance, from

Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December). Graph diffusion
distance: A difference measure for weighted graphs based on the graph Laplacian
exponential kernel. In Global Conference on Signal and Information Processing,
2013 IEEE (pp 419-422). IEEE. https://doi.org/10.1109/GlobalSIP.2013.6736904

This implementation is adapted from the authors' MATLAB code, available at
https://rb.gy/txbfrh, and available under an MIT license with the authors'
permission.

author: Brennan Klein
email: brennanjamesklein at gmail dot com
Submitted as part of the 2019 NetSI Collabathon.

"""

import numpy as np
import networkx as nx
from scipy.sparse.csgraph import laplacian
from .base import BaseDistance
from ..utilities import undirected


[docs]class GraphDiffusion(BaseDistance):
    """Find the maximally dissimilar diffusion kernels between two graphs."""

[docs]    @undirected
    def dist(self, G1, G2, thresh=1e-08, resolution=1000):
        r"""The graph diffusion distance between two graphs, :math:`G` and :math:`G'`,
        is a distance measure based on the notion of flow within each graph. As
        such, this measure uses the unnormalized Laplacian matrices of both
        graphs, :math:`\mathcal{L}` and :math:`\mathcal{L}'`, and uses them to
        construct time-varying Laplacian exponential diffusion kernels,
        :math:`e^{-t\mathcal{L}}` and :math:`e^{-t\mathcal{L}'}`, by
        effectively simulating a diffusion process for :math:`t` timesteps,
        creating a column vector of node-level activity at each timestep. The
        distance :math:`d_\texttt{GDD}(G, G')` is defined as the Frobenius norm
        between the two diffusion kernels at the timestep :math:`t^{*}` where
        the two kernels are maximally different. That is, we compute the
        Frobenius norms and their differences for each timestep, and return the
        maximum difference.

        .. math::
            D_{GDD}(G,G') = \sqrt{||e^{-t^{*}\mathcal{L}}-e^{-t^{*}\mathcal{L}'}||}

        The results dictionary also stores a 2-tuple of the underlying
        adjacency matrices in `adjacency_matrices`, the Laplacian matrices in
        `laplacian_matrices`, and the output of the optimization process
        (`peak_diffusion_time` and `peak_deviation`).

        Adapted from the authors' MATLAB code, available at: https://rb.gy/txbfrh


        Parameters
        ----------

        G1, G2 (nx.Graph)
            two networkx graphs to be compared.

        thresh (float)
            minimum value above which the eigenvalues will be considered.

        resolution (int)
            number of :math:`t` values to span through.

        Returns
        -------
        dist (float)
            the distance between `G1` and `G2`.

        References
        ----------

        .. [1] Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December).
               Graph diffusion distance: A difference measure for weighted graphs based on the
               graph Laplacian exponential kernel. In Global Conference on Signal and
               Information Processing, 2013 IEEE (pp 419-422). IEEE.
               https://doi.org/10.1109/GlobalSIP.2013.6736904

        """

        A1 = nx.to_numpy_array(G1)
        A2 = nx.to_numpy_array(G2)

        L1 = laplacian(A1)
        L2 = laplacian(A2)

        def sort_eigs(eigs):
            vals, vecs = eigs
            idx = np.argsort(abs(vals))
            return vals[idx], vecs[:, idx]

        vals1, vecs1 = sort_eigs(np.linalg.eig(L1))
        vals2, vecs2 = sort_eigs(np.linalg.eig(L2))

        eigs = np.hstack((np.diag(vals1), np.diag(vals2)))
        eigs = eigs[np.where(eigs > thresh)]
        eigs = np.sort(eigs)

        if len(eigs) == 0:
            dist = 0
            self.results["dist"] = dist
            return dist

        t_upperbound = np.real(1.0 / eigs[0])
        ts = np.linspace(0, t_upperbound, resolution)

        # Find the Frobenius norms between all the diffusion kernels at
        # different times. Return the value and where this vector is minimized.
        E = -exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts)
        f_val, t_star = (np.nanmin(E), np.argmin(E))

        dist = np.sqrt(-f_val)

        self.results["adjacency_matrices"] = A1, A2
        self.results["laplacian_matrices"] = L1, L2
        self.results["peak_diffusion_time"] = t_star
        self.results["peak_deviation"] = f_val

        self.results["dist"] = dist

        return dist


def exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts):
    """
    Computes Frobenius norm of difference of Laplacian exponential diffusion
    kernels, at specified timepoints.

    Parameters
    ----------

    vecs1, vecs2 (np.array)
        eigenvectors of the Laplacians of `G1` and `G2`

    vals1, vals2 (np.array)
        eigenvalues of the Laplacians of `G1` and `G2`

    ts (np.array)
        times at which to compute the difference in Frobenius norms

    Returns
    -------

    diffs (np.array)
        same shape as :math:`t`, contains differences of Frobenius norms

    """

    diffs = np.zeros(len(ts))

    for kt, t in enumerate(ts):
        exp_diag_1 = np.diag(np.exp(-t * np.diag(vals1)))
        exp_diag_2 = np.diag(np.exp(-t * np.diag(vals2)))

        # multiply the eigenvectors element-wise by the appropriate diffusion value
        # before left-multiplying the eigenvectors again.
        norm1 = vecs1.dot(np.multiply(exp_diag_1, vecs1).T)
        norm2 = vecs2.dot(np.multiply(exp_diag_2, vecs2).T)
        diff = norm1 - norm2

        diffs[kt] = (diff**2).sum()

    return diffs