# Source code for netrd.distance.onion_divergence

"""
onion_divergence.py
--------------------------

The Jensen-Shannon divergence between the two graphs onion decomposition.

Graph distance based on :
https://www.nature.com/articles/srep31708
https://journals.aps.org/prx/abstract/10.1103/PhysRevX.9.011023

authors: Laurent Hébert-Dufresne and Guillaume St-Onge
email: guillaume.st-onge.4@ulaval.ca
Submitted as part of the 2019 NetSI Collabathon.

"""

import numpy as np
import networkx as nx
from .base import BaseDistance
from functools import reduce
from netrd.utilities import undirected, unweighted

[docs]class OnionDivergence(BaseDistance):
"""Compares various types of feature distributions."""

[docs]    @undirected
@unweighted
def dist(self, G1, G2, dist='lccm'):
"""Jensen-Shannon divergence between the feature distributions fixed by dist.

Assumes simple graphs.

Parameters
----------

G1, G2 (nx.Graph)
two networkx graphs to be compared.

dist (str)
type of distribution divergence to output. Choices are 'cm',
'ccm', 'lccm_node' and 'lccm'. The type stand for the
associated random graph ensemble. 'cm' compares only the degree
distribution. 'ccm' compares the networks according to the
edges degree-degree distribution.  'lccm_node' compares the
distribution of nodes according to their onion centrality
(degree, coreness, and layer within core). Finally, 'lccm'
compares the networks according to the edges joint
degree, coreness and layer distribution for both endpoints.

Returns
-------
dist (float)
the distance between G1 and G2.

References
----------
.. [1] https://www.nature.com/articles/srep31708

.. [2] https://journals.aps.org/prx/abstract/10.1103/PhysRevX.9.011023

"""
# take the simple graph version
G1_simple = G1
G2_simple = G2
G1_simple.remove_edges_from(nx.selfloop_edges(G1_simple))
G2_simple.remove_edges_from(nx.selfloop_edges(G2_simple))

# get sparse matrices values for each graph
matrices_G1 = _create_sparse_matrices_for_graph(G1_simple)
matrices_G2 = _create_sparse_matrices_for_graph(G2_simple)

# get the different distances
cm_dist = _divergence_of_sparse_matrices(*matrices_G1['cm'], *matrices_G2['cm'])
ccm_dist = _divergence_of_sparse_matrices(
*matrices_G1['ccm'], *matrices_G2['ccm']
)
lccm_node_dist = _divergence_of_sparse_matrices(
*matrices_G1['lccm_node'], *matrices_G2['lccm_node']
)
lccm_dist = _divergence_of_sparse_matrices(
*matrices_G1['lccm'], *matrices_G2['lccm']
)
# store the distances
self.results['cm_dist'] = cm_dist
self.results['ccm_dist'] = ccm_dist
self.results['lccm_node_dist'] = lccm_node_dist
self.results['lccm_dist'] = lccm_dist
dist_id = '{}_dist'.format(dist)
self.results['dist'] = self.results[dist_id]

return self.results[dist_id]

def _onion_decomposition(G):
# Creates a copy of the graph (to be able to remove vertices and edges)
G_copy = G.copy()
# Dictionaries to register the k-core/onion decompositions.
coreness_map = {}
layer_map = {}
local_layer_map = {}
# Performs the onion decomposition.
current_core = 0
current_layer = 1
local_layer = 1
while G_copy.number_of_nodes() > 0:
# Sets properly the current core.
degree_sequence = [d for n, d in G_copy.degree()]
min_degree = min(degree_sequence)
if min_degree >= (current_core + 1):
current_core = min_degree
local_layer = 1
# Identifies vertices in the current layer.
this_layer_ = []
for v in G_copy.nodes():
if G_copy.degree()[v] <= current_core:
this_layer_.append(v)
# Identifies the core/layer of the vertices in the current layer.
for v in this_layer_:
coreness_map[v] = current_core
layer_map[v] = current_layer
local_layer_map[v] = local_layer
G_copy.remove_node(v)
current_layer = current_layer + 1
local_layer = local_layer + 1
# Returns the dictionaries containing the k-shell and onion layer of
# each vertices.
return (layer_map, local_layer_map, coreness_map)

def _update_sparse_matrix(dictionary, values, key, index):
# Creates entry if it doesn't exist or else update the corresponding value
if not key in dictionary:
dictionary[key] = index
values.append(1)
index += 1
else:
index_i = dictionary[key]
values[index_i] += 1
# Returns new index (i.e. the nb of nonzero entry in sparse matrix)
return index

def _create_sparse_matrices_for_graph(G):
onion, local_layer, kcore = _onion_decomposition(G)
# creates of copy of the graph
G_copy = G.copy()
cm_sparse_stub_matrix = {}
ccm_sparse_stub_matrix = {}
lccm_sparse_stub_matrix = {}
lccm_sparse_node_matrix = {}
cm_values = []
cm_index = 0
ccm_values = []
ccm_index = 0
lccm_node_values = []
lccm_node_index = 0
lccm_values = []
lccm_index = 0
for n in G_copy.nodes():
d1 = G_copy.degree(n)
c1 = kcore[n]
l1 = local_layer[n]
cm_index = _update_sparse_matrix(cm_sparse_stub_matrix, cm_values, d1, cm_index)
lccm_node_index = _update_sparse_matrix(
lccm_sparse_node_matrix, lccm_node_values, (d1, c1, l1), lccm_node_index
)
for e in G_copy.edges():
d1 = G_copy.degree(e[0])
d2 = G_copy.degree(e[1])
c1 = kcore[e[0]]
c2 = kcore[e[1]]
l1 = local_layer[e[0]]
l2 = local_layer[e[1]]
ccm_index = _update_sparse_matrix(
ccm_sparse_stub_matrix, ccm_values, (d1, d2), ccm_index
)
lccm_index = _update_sparse_matrix(
lccm_sparse_stub_matrix, lccm_values, (d1, c1, l1, d2, c2, l2), lccm_index
)
ccm_index = _update_sparse_matrix(
ccm_sparse_stub_matrix, ccm_values, (d2, d1), ccm_index
)
lccm_index = _update_sparse_matrix(
lccm_sparse_stub_matrix, lccm_values, (d2, c2, l2, d1, c1, l1), lccm_index
)
cm_values = [x / G_copy.number_of_nodes() for x in cm_values]
ccm_values = [x / (2 * G_copy.number_of_edges()) for x in ccm_values]
lccm_values = [x / (2 * G_copy.number_of_edges()) for x in lccm_values]
lccm_node_values = [x / G_copy.number_of_nodes() for x in lccm_node_values]

# output the results in a dictionary
results = dict()
results['cm'] = (cm_sparse_stub_matrix, cm_values)
results['ccm'] = (ccm_sparse_stub_matrix, ccm_values)
results['lccm'] = (lccm_sparse_stub_matrix, lccm_values)
results['lccm_node'] = (lccm_sparse_node_matrix, lccm_node_values)
return results

def _divergence_of_sparse_matrices(keys1, values1, keys2, values2):
keys = reduce(set.union, map(set, map(dict.keys, [keys1, keys2])))
JSD = 0
for key in keys:
if key in keys1:
value1 = values1[keys1[key]]
else:
value1 = 0.0
if key in keys2:
value2 = values2[keys2[key]]
else:
value2 = 0.0
avg = (value1 + value2) / 2
if value1 > 0:
JSD += 0.5 * value1 * np.log2(value1 / avg)
if value2 > 0:
JSD += 0.5 * value2 * np.log2(value2 / avg)
return JSD