Source code for netrd.distance.hamming

"""
hamming.py
--------------

Hamming distance, wrapper for scipy function:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming

"""

import scipy
import numpy as np
import networkx as nx
from .base import BaseDistance
from ..utilities import unweighted


[docs]class Hamming(BaseDistance): """Entry-wise disagreement between adjacency matrices."""
[docs] @unweighted def dist(self, G1, G2): r"""The proportion of disagreeing nodes between the flattened adjacency matrices. If :math:`u` and :math:`v` are boolean vectors, then Hamming distance is: .. math:: \frac{c_{01} + c_{10}}{n} where :math:`c_{ij}` is the number of occurrences of where :math:`u[k] = i` and :math:`v[k] = j` for :math:`k < n`. The graphs must have the same number of nodes. A small modification to this code could allow weights can be applied, but only one set of weights that apply to both graphs. The results dictionary also stores a 2-tuple of the underlying adjacency matrices in the key `'adjacency_matrices'`. Parameters ---------- G1, G2 (nx.Graph) two networkx graphs to be compared. Returns ------- dist (float) the distance between `G1` and `G2`. References ---------- .. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming """ if G1.number_of_nodes() == G2.number_of_nodes(): N = G1.number_of_nodes() else: raise ValueError("Graphs have the same number of nodes") adj1 = nx.to_numpy_array(G1) adj2 = nx.to_numpy_array(G2) # undirected case: consider only upper triangular mask = np.triu_indices(N, k=1) # directed case: consider all but the diagonal if nx.is_directed(G1) or nx.is_directed(G2): new_mask = np.tril_indices(N, k=-1) mask = (np.append(mask[0], new_mask[0]), np.append(mask[1], new_mask[1])) # only if there are self-loops include the diagonal # this corrects the implicit denominator of Hamming, which # should be N^2 for networks with self-loops and N(N-1) for # those without if next(nx.selfloop_edges(G1), False) or next(nx.selfloop_edges(G2), False): new_mask = np.diag_indices(N) mask = (np.append(mask[0], new_mask[0]), np.append(mask[1], new_mask[1])) dist = scipy.spatial.distance.hamming( adj1[mask].flatten(), adj2[mask].flatten() ) self.results["dist"] = dist self.results["adjacency_matrices"] = adj1, adj2 return dist