Source code for netrd.distance.onion_divergence

"""
onion_divergence.py
--------------------------

The Jensen-Shannon divergence between the two graphs onion decomposition.

Graph distance based on :
https://www.nature.com/articles/srep31708
https://journals.aps.org/prx/abstract/10.1103/PhysRevX.9.011023

authors: Laurent Hébert-Dufresne and Guillaume St-Onge
email: guillaume.st-onge.4@ulaval.ca
Submitted as part of the 2019 NetSI Collabathon.

"""

import numpy as np
import networkx as nx
from .base import BaseDistance
from functools import reduce
from netrd.utilities import undirected, unweighted


[docs]class OnionDivergence(BaseDistance): """Compares various types of feature distributions."""
[docs] @undirected @unweighted def dist(self, G1, G2, dist='lccm'): """Jensen-Shannon divergence between the feature distributions fixed by dist. Assumes simple graphs. Parameters ---------- G1, G2 (nx.Graph) two networkx graphs to be compared. dist (str) type of distribution divergence to output. Choices are 'cm', 'ccm', 'lccm_node' and 'lccm'. The type stand for the associated random graph ensemble. 'cm' compares only the degree distribution. 'ccm' compares the networks according to the edges degree-degree distribution. 'lccm_node' compares the distribution of nodes according to their onion centrality (degree, coreness, and layer within core). Finally, 'lccm' compares the networks according to the edges joint degree, coreness and layer distribution for both endpoints. Returns ------- dist (float) the distance between `G1` and `G2`. References ---------- .. [1] https://www.nature.com/articles/srep31708 .. [2] https://journals.aps.org/prx/abstract/10.1103/PhysRevX.9.011023 """ # take the simple graph version G1_simple = G1 G2_simple = G2 G1_simple.remove_edges_from(nx.selfloop_edges(G1_simple)) G2_simple.remove_edges_from(nx.selfloop_edges(G2_simple)) # get sparse matrices values for each graph matrices_G1 = _create_sparse_matrices_for_graph(G1_simple) matrices_G2 = _create_sparse_matrices_for_graph(G2_simple) # get the different distances cm_dist = _divergence_of_sparse_matrices(*matrices_G1['cm'], *matrices_G2['cm']) ccm_dist = _divergence_of_sparse_matrices( *matrices_G1['ccm'], *matrices_G2['ccm'] ) lccm_node_dist = _divergence_of_sparse_matrices( *matrices_G1['lccm_node'], *matrices_G2['lccm_node'] ) lccm_dist = _divergence_of_sparse_matrices( *matrices_G1['lccm'], *matrices_G2['lccm'] ) # store the distances self.results['cm_dist'] = cm_dist self.results['ccm_dist'] = ccm_dist self.results['lccm_node_dist'] = lccm_node_dist self.results['lccm_dist'] = lccm_dist dist_id = '{}_dist'.format(dist) self.results['dist'] = self.results[dist_id] return self.results[dist_id]
def _onion_decomposition(G): # Creates a copy of the graph (to be able to remove vertices and edges) G_copy = G.copy() # Dictionaries to register the k-core/onion decompositions. coreness_map = {} layer_map = {} local_layer_map = {} # Performs the onion decomposition. current_core = 0 current_layer = 1 local_layer = 1 while G_copy.number_of_nodes() > 0: # Sets properly the current core. degree_sequence = [d for n, d in G_copy.degree()] min_degree = min(degree_sequence) if min_degree >= (current_core + 1): current_core = min_degree local_layer = 1 # Identifies vertices in the current layer. this_layer_ = [] for v in G_copy.nodes(): if G_copy.degree()[v] <= current_core: this_layer_.append(v) # Identifies the core/layer of the vertices in the current layer. for v in this_layer_: coreness_map[v] = current_core layer_map[v] = current_layer local_layer_map[v] = local_layer G_copy.remove_node(v) # Updates the layer count. current_layer = current_layer + 1 local_layer = local_layer + 1 # Returns the dictionaries containing the k-shell and onion layer of # each vertices. return (layer_map, local_layer_map, coreness_map) def _update_sparse_matrix(dictionary, values, key, index): # Creates entry if it doesn't exist or else update the corresponding value if not key in dictionary: dictionary[key] = index values.append(1) index += 1 else: index_i = dictionary[key] values[index_i] += 1 # Returns new index (i.e. the nb of nonzero entry in sparse matrix) return index def _create_sparse_matrices_for_graph(G): onion, local_layer, kcore = _onion_decomposition(G) # creates of copy of the graph G_copy = G.copy() cm_sparse_stub_matrix = {} ccm_sparse_stub_matrix = {} lccm_sparse_stub_matrix = {} lccm_sparse_node_matrix = {} cm_values = [] cm_index = 0 ccm_values = [] ccm_index = 0 lccm_node_values = [] lccm_node_index = 0 lccm_values = [] lccm_index = 0 for n in G_copy.nodes(): d1 = G_copy.degree(n) c1 = kcore[n] l1 = local_layer[n] cm_index = _update_sparse_matrix(cm_sparse_stub_matrix, cm_values, d1, cm_index) lccm_node_index = _update_sparse_matrix( lccm_sparse_node_matrix, lccm_node_values, (d1, c1, l1), lccm_node_index ) for e in G_copy.edges(): d1 = G_copy.degree(e[0]) d2 = G_copy.degree(e[1]) c1 = kcore[e[0]] c2 = kcore[e[1]] l1 = local_layer[e[0]] l2 = local_layer[e[1]] ccm_index = _update_sparse_matrix( ccm_sparse_stub_matrix, ccm_values, (d1, d2), ccm_index ) lccm_index = _update_sparse_matrix( lccm_sparse_stub_matrix, lccm_values, (d1, c1, l1, d2, c2, l2), lccm_index ) ccm_index = _update_sparse_matrix( ccm_sparse_stub_matrix, ccm_values, (d2, d1), ccm_index ) lccm_index = _update_sparse_matrix( lccm_sparse_stub_matrix, lccm_values, (d2, c2, l2, d1, c1, l1), lccm_index ) cm_values = [x / G_copy.number_of_nodes() for x in cm_values] ccm_values = [x / (2 * G_copy.number_of_edges()) for x in ccm_values] lccm_values = [x / (2 * G_copy.number_of_edges()) for x in lccm_values] lccm_node_values = [x / G_copy.number_of_nodes() for x in lccm_node_values] # output the results in a dictionary results = dict() results['cm'] = (cm_sparse_stub_matrix, cm_values) results['ccm'] = (ccm_sparse_stub_matrix, ccm_values) results['lccm'] = (lccm_sparse_stub_matrix, lccm_values) results['lccm_node'] = (lccm_sparse_node_matrix, lccm_node_values) return results def _divergence_of_sparse_matrices(keys1, values1, keys2, values2): keys = reduce(set.union, map(set, map(dict.keys, [keys1, keys2]))) JSD = 0 for key in keys: if key in keys1: value1 = values1[keys1[key]] else: value1 = 0.0 if key in keys2: value2 = values2[keys2[key]] else: value2 = 0.0 avg = (value1 + value2) / 2 if value1 > 0: JSD += 0.5 * value1 * np.log2(value1 / avg) if value2 > 0: JSD += 0.5 * value2 * np.log2(value2 / avg) return JSD