# Source code for netrd.distance.hamming

"""
hamming.py
--------------

Hamming distance, wrapper for scipy function:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming

"""

import scipy
import numpy as np
import networkx as nx
from .base import BaseDistance
from ..utilities import unweighted

[docs]class Hamming(BaseDistance):

[docs]    @unweighted
def dist(self, G1, G2):
r"""The proportion of disagreeing nodes between the flattened adjacency
matrices.

If :math:u and :math:v are boolean vectors, then Hamming
distance is:

.. math::

\frac{c_{01} + c_{10}}{n}

where :math:c_{ij} is the number of occurrences of where
:math:u[k] = i and :math:v[k] = j for :math:k < n.

The graphs must have the same number of nodes. A small modification
to this code could allow weights can be applied, but only one set
of weights that apply to both graphs.

The results dictionary also stores a 2-tuple of the underlying
adjacency matrices in the key 'adjacency_matrices'.

Parameters
----------

G1, G2 (nx.Graph)
two networkx graphs to be compared.

Returns
-------

dist (float)
the distance between G1 and G2.

References
----------

.. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming

"""

if G1.number_of_nodes() == G2.number_of_nodes():
N = G1.number_of_nodes()
else:
raise ValueError("Graphs have the same number of nodes")

# undirected case: consider only upper triangular

# directed case: consider all but the diagonal
if nx.is_directed(G1) or nx.is_directed(G2):

# only if there are self-loops include the diagonal
# this corrects the implicit denominator of Hamming, which
# should be N^2 for networks with self-loops and N(N-1) for
# those without
if next(nx.selfloop_edges(G1), False) or next(nx.selfloop_edges(G2), False):