Source code for netrd.distance.hamming_ipsen_mikhailov

"""
hamming_ipsen_mikhailov.py
--------------------------

Graph distance based on paper:
The HIM glocal metric and kernel for network comparison and classification
Available here:
https://ieeexplore.ieee.org/abstract/document/7344816

author: Guillaume St-Onge
email: guillaume.st-onge.4@ulaval.ca
Submitted as part of the 2019 NetSI Collabathon.

"""

import numpy as np
import networkx as nx
from .base import BaseDistance
from scipy.optimize import fsolve
from .ipsen_mikhailov import _im_distance
from ..utilities.graph import unweighted


[docs]class HammingIpsenMikhailov(BaseDistance): """Combination of Hamming and Ipsen-Mikhailov distances."""
[docs] @unweighted def dist(self, G1, G2, combination_factor=1): """Graph distance combining local and global distances. The local metric H is the Hamming distance, corresponding to the difference for the edges in both networks. The global (spectral) metric IM is the Ipsen-Mikailov distance, corresponding to the square-root of the squared difference of the Laplacian spectrum for each network. The Hamming-Ipsen-Mikhailov (HIM) distance is an Euclidean metric on the space created by the Cartesian product of the metric space associated with H and IM. The trade-off between global and local information is governed by a combination factor: when this is one, local and global information are balanced; when it is zero, it reduces to the (local) Hamming distance; and as it approaches infinity it becomes the (global) Ipsen-Mikhailov distance. The results dictionary also stores a 2-tuple of the underlying adjacency matrices in the key `'adjacency_matrices'`, the Hamming distance in `'hamming_dist'`, the Ipsen-Mikhailov distance in `'ipsen_mikhailov_dist'`, and the Lorentzian half-width at half-maximum in `'hwhm'`. If the networks being compared are directed, the augmented adjacency matrices are calculated and stored in `'augmented_adjacency_matrices'`. Parameters ---------- G1, G2 (nx.Graph) two networkx graphs to be compared. combination_factor (float) positive factor in front of the IM metric. Returns ------- dist (float) the distance between G1 and G2. Notes ----- Requires networks with the same number of nodes. The networks can be directed and weighted (with weights in the range :math:`[0,1]`). Both (H and IM) are also saved in the results dictionary. References ---------- [1] https://ieeexplore.ieee.org/abstract/document/7344816 """ N = len(G1) # get the adjacency matrices adj1 = nx.to_numpy_array(G1) adj2 = nx.to_numpy_array(G2) self.results['adjacency_matrices'] = adj1, adj2 # verify if the graphs are directed directed = nx.is_directed(G1) or nx.is_directed(G2) if directed: null_mat = np.zeros((N, N)) # create augmented adjacency matrices adj1_aug = np.block([[null_mat, adj1.T], [adj1, null_mat]]) adj2_aug = np.block([[null_mat, adj2.T], [adj2, null_mat]]) self.results['augmented_adjacency_matrices'] = adj1_aug, adj2_aug # get the normalized Hamming distance H = np.sum(np.abs(adj1_aug - adj2_aug)) / (2 * N * (N - 1)) self.results['hamming_dist'] = H # get the appropriate hwhm for the network size hwhm = _get_hwhm_directed(N) self.results['hwhm'] = hwhm # get the IM distance IM = _im_distance(adj1_aug, adj2_aug, hwhm) self.results['ipsen_mikhailov_dist'] = IM else: # get the normalized Hamming distance H = np.sum(np.abs(adj1 - adj2)) / (N * (N - 1)) self.results['hamming_dist'] = H # get the appropriate hwhm for the network size hwhm = _get_hwhm_undirected(N) self.results['hwhm'] = hwhm # get the IM distance IM = _im_distance(adj1, adj2, hwhm) self.results['ipsen_mikhailov_dist'] = IM # determine the glocal distance from the combination HIM = np.sqrt(H**2 + combination_factor * IM**2) / np.sqrt( 1 + combination_factor ) self.results['dist'] = HIM return HIM
def _get_hwhm_undirected(N): """Obtain the lorentzian half-width at half-maximum (hwhm) to get a normalized HIM distance For undirected networks. Parameters ---------- N (int): Number of nodes. Returns ------- hwhm (float) : hwhm of the lorentzian distribution. """ def func(g): sN = np.sqrt(N) v = np.arctan(sN / g) return ( -1 + 1 / (np.pi * g) + (np.pi / 2 + g * sN / (g**2 + N) + v) / (2 * g * (np.pi / 2 + v) ** 2) - 4 * g * (np.pi - g * np.log(g**2 / (g**2 + N)) / sN + v) / ((np.pi / 2 + v) * np.pi * (4 * g**2 + N)) ) return fsolve(func, 0.5)[0] def _get_hwhm_directed(N): """Obtain the lorentzian half-width at half-maximum (hwhm) to get a normalized HIM distance For directed networks. Parameters ---------- N (int): Number of nodes. Returns ------- hwhm (float) : hwhm of the lorentzian distribution. """ def func(g): Nm2 = N - 2 sN = np.sqrt(N) sNm2 = np.sqrt(N - 2) s2Nm2 = np.sqrt(2 * N - 2) atN = np.arctan(sN / g) atNm2 = np.arctan(sNm2 / g) at2Nm2 = np.arctan(s2Nm2 / g) K = 1 / ((2 * N - 1) * np.pi / 2 + (N - 1) * (atNm2 + atN) + at2Nm2) Z = 2 * g / np.pi W = g * (N - 1) * K Wp = W / (N - 1) M0 = np.pi / (4 * g**3) MN = (g**2 * atN + N * atN + g * sN) / (2 * (g**5 + N * g**3)) + np.pi / ( 4 * g**3 ) MNm2 = (g**2 * atNm2 + Nm2 * atNm2 + g * sNm2) / ( 2 * (g**5 + Nm2 * g**3) ) + np.pi / (4 * g**3) M2Nm2 = (g**2 * at2Nm2 + (2 * N - 2) * at2Nm2 + g * s2Nm2) / ( 2 * (g**5 + (2 * N - 2) * g**3) ) + np.pi / (4 * g**3) L = lambda T, U: (-np.log(g**2 + U) + np.log(g**2 + T)) / ( (4 * g**2 + T + 3 * U) * np.sqrt(T) - (4 * g**2 + 3 * T + U) * np.sqrt(U) ) + (np.pi + np.arctan(np.sqrt(T) / g) + np.arctan(np.sqrt(U) / g)) / ( 4 * g**3 + g * T - 2 * g * np.sqrt(U * T) + g * U ) return ( -1 + Z**2 * M0 + W**2 * (MNm2 + MN) + Wp**2 * M2Nm2 - 2 * Z * W * L(0, Nm2) - 2 * Z * W * L(0, N) - 2 * Z * Wp * L(0, 2 * N - 2) + 2 * W**2 * L(Nm2, N) + 2 * W * Wp * L(Nm2, 2 * N - 2) + 2 * W * Wp * L(N, 2 * N - 2) ) return fsolve(func, 0.5)[0]