Source code for netrd.reconstruction.correlation_spanning_tree

"""
correlation_spanning_tree.py
----------------------------

Graph reconstruction algorithm based on Mantegna, R. N. (1999). Hierarchical structure in
financial markets. The European Physical Journal B-Condensed Matter and Complex Systems,
11(1), 193-197. DOI https://doi.org/10.1007/s100510050929
https://link.springer.com/article/10.1007/s100510050929

author: Matteo Chinazzi
Submitted as part of the 2019 NetSI Collabathon.
"""

from .base import BaseReconstructor
import numpy as np
from scipy.sparse.csgraph import minimum_spanning_tree

try:
    from networkx import from_scipy_sparse_array as from_sparse
except ImportError:
    from networkx import from_scipy_sparse_matrix as from_sparse


[docs]class CorrelationSpanningTree(BaseReconstructor): """Minimum spanning tree connecting the sensors."""
[docs] def fit(self, TS, distance='root_inv', **kwargs): r"""Create a minimum spanning tree connecting the sensors. The empirical correlation matrix is used to first compute a distance matrix and then to create a minimum spanning tree connecting all the sensors in the data. This method implements the methodology described in [1]_ and applied in the context of creating a graph connecting the stocks of a portfolio of generated by looking at the correlations between the daily time series of stock prices. The results dictionary also stores the distance matrix (computed from the correlations) as `'distance_matrix'`. Parameters ---------- TS (np.ndarray) :math:`N \times L` array consisting of :math:`L` observations from :math:`N` sensors. distance (str) 'inv_square' calculates distance as :math:`1-corr_{ij}^2` as in [1]_. 'root_inv' calculates distance as :math:`\sqrt{2 (1-corr_{ij})}` [2]_. Returns ------- G (nx.Graph) A reconstructed graph with :math:`N` nodes. Examples -------- .. code:: python import numpy as np import networkx as nx from matplotlib import pyplot as plt from netrd.reconstruction import CorrelationSpanningTree N = 25 T = 300 M = np.random.normal(size=(N,T)) print('Create correlated time series') market_mode = 0.4*np.random.normal(size=(1,T)) M += market_mode sector_modes = {d: 0.5*np.random.normal(size=(1,T)) for d in range(5)} for sector_mode, vals in sector_modes.items(): M[sector_mode*5:(sector_mode+1)*5,:] += vals print('Link node colors to sectors') colors = ['b','r','g','y','m'] node_colors = [color for color in colors for __ in range(5)] print('Network reconstruction step') cst_net = CorrelationSpanningTree() G = cst_net.fit(M) print('Plot reconstructed spanning tree') fig, ax = plt.subplots() nx.draw(G, ax=ax, node_color=node_colors) References ---------- .. [1] Mantegna, R. N. (1999). Hierarchical structure in financial markets. The European Physical Journal B-Condensed Matter and Complex Systems, 11(1), 193-197. DOI https://doi.org/10.1007/s100510050929 https://link.springer.com/article/10.1007/s100510050929 .. [2] Bonanno, G., Caldarelli, G., Lillo, F. & Mantegna, R. N. (2003) Topology of correlation-based minimal spanning trees in real and model markets. Physical Review E 68. .. [3] Vandewalle, N., Brisbois, F. & Tordoir, X. (2001) Non-random topology of stock markets. Quantitative Finance 1, 372–374. """ C = np.corrcoef(TS) # Empirical correlation matrix D = ( np.sqrt(2 * (1 - C)) if distance == 'root_inv' else 1 - np.square(C) ) # Distance matrix self.results['distance_matrix'] = D MST = minimum_spanning_tree(D) # Minimum Spanning Tree G = from_sparse(MST) self.results['graph'] = G return G