Source code for netrd.distance.netsimile

"""
netsimile.py
--------------

Graph distance based on:
Berlingerio, M., Koutra, D., Eliassi-Rad, T. & Faloutsos, C. NetSimile: A Scalable Approach to Size-Independent Network Similarity. arXiv (2012)

author: Alex Gates
email: ajgates42@gmail.com (optional)
Submitted as part of the 2019 NetSI Collabathon.

"""
import networkx as nx
import numpy as np
from scipy.spatial.distance import canberra
from scipy.stats import skew, kurtosis

from .base import BaseDistance
from ..utilities import undirected, unweighted


[docs]class NetSimile(BaseDistance): """Compares node signature distributions."""
[docs] @undirected @unweighted def dist(self, G1, G2): """A scalable approach to network similarity. A network similarity measure based on node signature distributions. The results dictionary includes the underlying feature matrices in `'feature_matrices'` and the underlying signature vectors in `'signature_vectors'`. Parameters ---------- G1, G2 (nx.Graph) two undirected networkx graphs to be compared. Returns ------- dist (float) the distance between `G1` and `G2`. References ---------- .. [1] Michele Berlingerio, Danai Koutra, Tina Eliassi-Rad, Christos Faloutsos: NetSimile: A Scalable Approach to Size-Independent Network Similarity. CoRR abs/1209.2684 (2012) """ # find the graph node feature matrices G1_node_features = feature_extraction(G1) G2_node_features = feature_extraction(G2) # get the graph signature vectors G1_signature = graph_signature(G1_node_features) G2_signature = graph_signature(G2_node_features) # the final distance is the absolute canberra distance dist = abs(canberra(G1_signature, G2_signature)) self.results['feature_matrices'] = G1_node_features, G2_node_features self.results['signature_vectors'] = G1_signature, G2_signature self.results['dist'] = dist return dist
def feature_extraction(G): """Node feature extraction. Parameters ---------- G (nx.Graph): a networkx graph. Returns ------- node_features (float): the Nx7 matrix of node features.""" # necessary data structures node_features = np.zeros(shape=(G.number_of_nodes(), 7)) node_list = sorted(G.nodes()) node_degree_dict = dict(G.degree()) node_clustering_dict = dict(nx.clustering(G)) egonets = {n: nx.ego_graph(G, n) for n in node_list} # node degrees degs = [node_degree_dict[n] for n in node_list] # clustering coefficient clusts = [node_clustering_dict[n] for n in node_list] # average degree of neighborhood neighbor_degs = [ np.mean([node_degree_dict[m] for m in egonets[n].nodes if m != n]) if node_degree_dict[n] > 0 else 0 for n in node_list ] # average clustering coefficient of neighborhood neighbor_clusts = [ np.mean([node_clustering_dict[m] for m in egonets[n].nodes if m != n]) if node_degree_dict[n] > 0 else 0 for n in node_list ] # number of edges in the neighborhood neighbor_edges = [ egonets[n].number_of_edges() if node_degree_dict[n] > 0 else 0 for n in node_list ] # number of outgoing edges from the neighborhood # the sum of neighborhood degrees = 2*(internal edges) + external edges # node_features[:,5] = node_features[:,0] * node_features[:,2] - 2*node_features[:,4] neighbor_outgoing_edges = [ len( [ edge for edge in set.union(*[set(G.edges(j)) for j in egonets[i].nodes]) if not egonets[i].has_edge(*edge) ] ) for i in node_list ] # number of neighbors of neighbors (not in neighborhood) neighbors_of_neighbors = [ len( set([p for m in G.neighbors(n) for p in G.neighbors(m)]) - set(G.neighbors(n)) - set([n]) ) if node_degree_dict[n] > 0 else 0 for n in node_list ] # assembling the features node_features[:, 0] = degs node_features[:, 1] = clusts node_features[:, 2] = neighbor_degs node_features[:, 3] = neighbor_clusts node_features[:, 4] = neighbor_edges node_features[:, 5] = neighbor_outgoing_edges node_features[:, 6] = neighbors_of_neighbors return np.nan_to_num(node_features) def graph_signature(node_features): signature_vec = np.zeros(7 * 5) # for each of the 7 features for k in range(7): # find the mean signature_vec[k * 5] = node_features[:, k].mean() # find the median signature_vec[k * 5 + 1] = np.median(node_features[:, k]) # find the std signature_vec[k * 5 + 2] = node_features[:, k].std() # find the skew signature_vec[k * 5 + 3] = skew(node_features[:, k]) # find the kurtosis signature_vec[k * 5 + 4] = kurtosis(node_features[:, k]) return signature_vec """ # sample usage >>>from netrd.distance import NetSimile >>>G1 = nx.karate_club_graph() >>>G2 = nx.krackhardt_kite_graph() >>>test = NetSimile() >>>print(test.dist(G1, G2)) 20.180783067167326 """