"""
hamming_ipsen_mikhailov.py
--------------------------
Graph distance based on paper:
The HIM glocal metric and kernel for network comparison and classification
Available here:
https://ieeexplore.ieee.org/abstract/document/7344816
author: Guillaume St-Onge
email: guillaume.st-onge.4@ulaval.ca
Submitted as part of the 2019 NetSI Collabathon.
"""
import numpy as np
import networkx as nx
from .base import BaseDistance
from scipy.optimize import fsolve
from .ipsen_mikhailov import _im_distance
from ..utilities.graph import unweighted
[docs]class HammingIpsenMikhailov(BaseDistance):
"""Combination of Hamming and Ipsen-Mikhailov distances."""
[docs] @unweighted
def dist(self, G1, G2, combination_factor=1):
"""Graph distance combining local and global distances.
The local metric H is the Hamming distance, corresponding to the
difference for the edges in both networks.
The global (spectral) metric IM is the Ipsen-Mikailov distance,
corresponding to the square-root of the squared difference of the
Laplacian spectrum for each network.
The Hamming-Ipsen-Mikhailov (HIM) distance is an Euclidean metric
on the space created by the Cartesian product of the metric space
associated with H and IM. The trade-off between global and local
information is governed by a combination factor: when this is
one, local and global information are balanced; when it is zero,
it reduces to the (local) Hamming distance; and as it approaches
infinity it becomes the (global) Ipsen-Mikhailov distance.
The results dictionary also stores a 2-tuple of the underlying
adjacency matrices in the key `'adjacency_matrices'`, the Hamming
distance in `'hamming_dist'`, the Ipsen-Mikhailov distance in
`'ipsen_mikhailov_dist'`, and the Lorentzian half-width at
half-maximum in `'hwhm'`. If the networks being compared are
directed, the augmented adjacency matrices are calculated and
stored in `'augmented_adjacency_matrices'`.
Parameters
----------
G1, G2 (nx.Graph)
two networkx graphs to be compared.
combination_factor (float)
positive factor in front of the IM metric.
Returns
-------
dist (float)
the distance between G1 and G2.
Notes
-----
Requires networks with the same number of nodes. The networks can
be directed and weighted (with weights in the range
:math:`[0,1]`). Both (H and IM) are also saved in the results
dictionary.
References
----------
[1] https://ieeexplore.ieee.org/abstract/document/7344816
"""
N = len(G1)
# get the adjacency matrices
adj1 = nx.to_numpy_array(G1)
adj2 = nx.to_numpy_array(G2)
self.results['adjacency_matrices'] = adj1, adj2
# verify if the graphs are directed
directed = nx.is_directed(G1) or nx.is_directed(G2)
if directed:
null_mat = np.zeros((N, N))
# create augmented adjacency matrices
adj1_aug = np.block([[null_mat, adj1.T], [adj1, null_mat]])
adj2_aug = np.block([[null_mat, adj2.T], [adj2, null_mat]])
self.results['augmented_adjacency_matrices'] = adj1_aug, adj2_aug
# get the normalized Hamming distance
H = np.sum(np.abs(adj1_aug - adj2_aug)) / (2 * N * (N - 1))
self.results['hamming_dist'] = H
# get the appropriate hwhm for the network size
hwhm = _get_hwhm_directed(N)
self.results['hwhm'] = hwhm
# get the IM distance
IM = _im_distance(adj1_aug, adj2_aug, hwhm)
self.results['ipsen_mikhailov_dist'] = IM
else:
# get the normalized Hamming distance
H = np.sum(np.abs(adj1 - adj2)) / (N * (N - 1))
self.results['hamming_dist'] = H
# get the appropriate hwhm for the network size
hwhm = _get_hwhm_undirected(N)
self.results['hwhm'] = hwhm
# get the IM distance
IM = _im_distance(adj1, adj2, hwhm)
self.results['ipsen_mikhailov_dist'] = IM
# determine the glocal distance from the combination
HIM = np.sqrt(H**2 + combination_factor * IM**2) / np.sqrt(
1 + combination_factor
)
self.results['dist'] = HIM
return HIM
def _get_hwhm_undirected(N):
"""Obtain the lorentzian half-width at half-maximum (hwhm)
to get a normalized HIM distance
For undirected networks.
Parameters
----------
N (int): Number of nodes.
Returns
-------
hwhm (float) : hwhm of the lorentzian distribution.
"""
def func(g):
sN = np.sqrt(N)
v = np.arctan(sN / g)
return (
-1
+ 1 / (np.pi * g)
+ (np.pi / 2 + g * sN / (g**2 + N) + v) / (2 * g * (np.pi / 2 + v) ** 2)
- 4
* g
* (np.pi - g * np.log(g**2 / (g**2 + N)) / sN + v)
/ ((np.pi / 2 + v) * np.pi * (4 * g**2 + N))
)
return fsolve(func, 0.5)[0]
def _get_hwhm_directed(N):
"""Obtain the lorentzian half-width at half-maximum (hwhm)
to get a normalized HIM distance
For directed networks.
Parameters
----------
N (int): Number of nodes.
Returns
-------
hwhm (float) : hwhm of the lorentzian distribution.
"""
def func(g):
Nm2 = N - 2
sN = np.sqrt(N)
sNm2 = np.sqrt(N - 2)
s2Nm2 = np.sqrt(2 * N - 2)
atN = np.arctan(sN / g)
atNm2 = np.arctan(sNm2 / g)
at2Nm2 = np.arctan(s2Nm2 / g)
K = 1 / ((2 * N - 1) * np.pi / 2 + (N - 1) * (atNm2 + atN) + at2Nm2)
Z = 2 * g / np.pi
W = g * (N - 1) * K
Wp = W / (N - 1)
M0 = np.pi / (4 * g**3)
MN = (g**2 * atN + N * atN + g * sN) / (2 * (g**5 + N * g**3)) + np.pi / (
4 * g**3
)
MNm2 = (g**2 * atNm2 + Nm2 * atNm2 + g * sNm2) / (
2 * (g**5 + Nm2 * g**3)
) + np.pi / (4 * g**3)
M2Nm2 = (g**2 * at2Nm2 + (2 * N - 2) * at2Nm2 + g * s2Nm2) / (
2 * (g**5 + (2 * N - 2) * g**3)
) + np.pi / (4 * g**3)
L = lambda T, U: (-np.log(g**2 + U) + np.log(g**2 + T)) / (
(4 * g**2 + T + 3 * U) * np.sqrt(T)
- (4 * g**2 + 3 * T + U) * np.sqrt(U)
) + (np.pi + np.arctan(np.sqrt(T) / g) + np.arctan(np.sqrt(U) / g)) / (
4 * g**3 + g * T - 2 * g * np.sqrt(U * T) + g * U
)
return (
-1
+ Z**2 * M0
+ W**2 * (MNm2 + MN)
+ Wp**2 * M2Nm2
- 2 * Z * W * L(0, Nm2)
- 2 * Z * W * L(0, N)
- 2 * Z * Wp * L(0, 2 * N - 2)
+ 2 * W**2 * L(Nm2, N)
+ 2 * W * Wp * L(Nm2, 2 * N - 2)
+ 2 * W * Wp * L(N, 2 * N - 2)
)
return fsolve(func, 0.5)[0]