Source code for netrd.distance.distributional_nbd

"""
distributional_nbd.py
------

Distributional Non-backtracking Spectral Distance.

"""

import numpy as np
import networkx as nx
import scipy.sparse as sp
from scipy.spatial.distance import euclidean, chebyshev
from ..utilities.graph import unweighted

from .base import BaseDistance


[docs]class DistributionalNBD(BaseDistance):
    """
    Distributional Non-backtracking Spectral Distance.

    Computes the distance between two graphs using the empirical spectral density
    of the non-backtracking operator.

    See:
    "Graph Comparison via the Non-backtracking Spectrum"
    A. Mellor & A. Grusovin
    arXiv:1812.05457 / 10.1103/PhysRevE.99.052309

    """

    VECTOR_DISTANCES = {'euclidean': euclidean, 'chebyshev': chebyshev}

[docs]    @unweighted
    def dist(
        self,
        G1,
        G2,
        sparse=False,
        shave=True,
        keep_evals=True,
        k=None,
        vector_distance='euclidean',
        **kwargs
    ):
        """
        Distributional Non-backtracking Spectral Distance.

        Parameters
        ----------

        G1, G2 (nx.Graph)
            The two graphs to compare.

        sparse (bool)
            If sparse, matrices and eigenvalues found using sparse methods.
            If sparse, parameter 'k' should also be specified.
            Default: False

        k (int)
            The number of largest eigenvalues to be calculated for the
            spectral density.

        vector_distance (str)
            The distance measure used to compare two empirical distributions.
            Currently available are 'euclidean' and 'chebyshev', implemented
            using SciPy.
            Default: 'euclidean'

        keep_evals (bool)
            If True, stores the eigenvalues of the reduced non-backtracking
            matrix in self.results['evals']
            Default: False


        Returns
        -------
        float
            The distance between `G1` and `G2`

        """
        B1 = reduced_hashimoto(G1, shave=shave, sparse=sparse, **kwargs)
        B2 = reduced_hashimoto(G2, shave=shave, sparse=sparse, **kwargs)

        # Find spectrum
        evals1 = nb_eigenvalues(B1, k=k)
        evals2 = nb_eigenvalues(B2, k=k)

        # Save spectrum
        if keep_evals:
            self.results['eigenvalues'] = (evals1, evals2)

        # Find rescaled spectral density
        distribution_1 = spectral_distribution(evals1)
        distribution_2 = spectral_distribution(evals2)

        # Compute distance
        distance_metric = self.__class__.VECTOR_DISTANCES[vector_distance]

        return distance_metric(distribution_1, distribution_2)


def shave_graph(graph):
    """
    Returns the two-core of a graph.

    Iteratively remove the nodes of degree 0 or 1, until all nodes have
    degree at least 2.

    NOTE: duplicated from "nbd.py" to avoid excessive imports.

    """
    core = graph.copy()
    while True:
        to_remove = [node for node, neighbors in core.adj.items() if len(neighbors) < 2]
        core.remove_nodes_from(to_remove)
        if len(to_remove) == 0:
            break
    return core


def pseudo_hashimoto(graph):
    """
    Return the pseudo-Hashimoto matrix.

    The pseudo Hashimoto matrix of a graph is the block matrix defined as
    B' = [0  D-I]
         [-I  A ]

    Where D is the degree-diagonal matrix, I is the identity matrix and A
    is the adjacency matrix.  The eigenvalues of B' are always eigenvalues
    of B, the non-backtracking or Hashimoto matrix.

    Parameters
    ----------

    graph (nx.Graph): A NetworkX graph object.

    Returns
    -------

    A sparse matrix in csr format.

    NOTE: duplicated from "nbd.py" to avoid excessive imports.

    """
    # Note: the rows of nx.adjacency_matrix(graph) are in the same order as
    # the list returned by graph.nodes().
    degrees = graph.degree()
    degrees = sp.diags([degrees[n] for n in graph.nodes()])
    adj = nx.adjacency_matrix(graph)
    ident = sp.eye(graph.order())
    pseudo = sp.bmat([[None, degrees - ident], [-ident, adj]])
    return pseudo.asformat('csr')


def reduced_hashimoto(graph, shave=True, sparse=True):
    """


    Parameters
    ----------

    shave (bool)
        If True, first reduce the graph to its two-core.
        Else graph processed in its entirety.

    sparse (bool)
        If True, returned matrix will be sparse,
        else it will be dense.

    Returns
    -------

    np.ndarray/sp.csr_matrix
        The reduced Hashimoto Matrix.

    """

    if shave:
        graph = shave_graph(graph)
        if len(graph) == 0:
            # We can provide a workaround for this case, however it is best
            # that it is brought to the attention of the user.
            raise NotImplementedError(
                "Graph two-core is empty: non-backtracking methods unsuitable."
            )

    B = pseudo_hashimoto(graph)

    if not sparse:
        B = B.todense()

    return B


def nb_eigenvalues(B, k=None, **kwargs):
    """
    Calculates the eigenvalues of a matrix B.

    Detects whether B is sparse/dense and uses the appropriate method.
    If B is sparse then parameter 'k' should be provided.
    """

    if isinstance(B, np.ndarray):
        return np.linalg.eigvals(B)

    elif isinstance(B, sp.csr_matrix):
        random_state = np.random.RandomState(
            1
        )  # Ensures that eigenvalue calculation is deterministic.
        return sp.linalg.eigs(
            B, k=k, v0=random_state.random(B.shape[0]), return_eigenvectors=False
        )
    else:
        raise Exception("Matrix must be of type np.ndarray or scipy.sparse.csr")


def logr(r, rmax):
    """
    Logarithm to the base r.

    NOTE:Maps zero to zero as a special case.
    """

    if r == 0:
        return 0
    return np.log(r) / np.log(rmax)


def spectral_distribution(points, cumulative=True):
    """
    Returns the distribution of complex values (in r,theta-space).
    """

    points = np.array([(np.abs(z), np.angle(z)) for z in points])
    r, theta = np.split(points, 2, axis=1)

    r = np.array([logr(x, r.max()) for x in r])

    Z, R, THETA = np.histogram2d(
        x=r[:, 0],
        y=theta[:, 0],
        bins=(np.linspace(0, 1, 101), np.linspace(0, np.pi, 101)),
    )

    if cumulative:
        Z = Z.cumsum(axis=0).cumsum(axis=1)
        Z = Z / Z.max()

    return Z.flatten()