# Source code for netrd.reconstruction.mutual_information_matrix

"""
mutual_information_matrix.py
--------------

Graph reconstruction algorithm based on
Donges, Zou, Marwan, & Kurths. "The backbone of the climate network."
EPL (Europhysics Letters) 87.4 (2009): 48007
http://iopscience.iop.org/article/10.1209/0295-5075/87/48007.

author: Harrison Hartle
email: hartle.h@husky.neu.edu
Submitted as part of the 2019 NetSI Collabathon.
"""

from .base import BaseReconstructor
import numpy as np
from ..utilities import create_graph, threshold

[docs]class MutualInformationMatrix(BaseReconstructor):
"""Uses the mutual information between nodes."""

[docs]    def fit(self, TS, nbins=10, threshold_type='degree', **kwargs):
"""Calculates the mutual information between the probability distributions
of the (binned) values of the time series of pairs of nodes.

First, the mutual information is computed between each pair of
vertices.  Then, a thresholding condition is applied to obtain
edges.

The results dictionary also stores the weight matrix as
'weights_matrix' and the thresholded version of the weight matrix
as 'thresholded_matrix'.

Parameters
----------

TS (np.ndarray)
Array consisting of :math:L observations from :math:N
sensors.

nbins (int)
number of bins for the pre-processing step (to yield a discrete
probability distribution)

threshold_type (str)
Which thresholding function to use on the matrix of
weights. See netrd.utilities.threshold.py for
documentation. Pass additional arguments to the thresholder
using **kwargs.

Returns
-------

G (nx.Graph)
A reconstructed graph with :math:N nodes.

"""
N = TS.shape[0]
rang = [np.min(TS), np.max(TS)]

# saving these lines because there is a chance the "binned_edges" data would be useful
#         _,bin_edges = np.histogram(TS[0], range=np.array(rang), bins=nbins)
#         self.results['bin_edges'] = bin_edges

# mutual information requires a joint probability and a "product probability" distribution
IndivP = find_individual_probability_distribution(TS, rang, nbins)
ProduP = find_product_probability_distribution(IndivP, N)
JointP = find_joint_probability_distribution(TS, rang, nbins)

# calculate the mutual information between each pair of nodes--this is the
# mutual information matrix
I = mutual_info_all_pairs(JointP, ProduP, N)
self.results['weights_matrix'] = I

# the adjacency matrix is the binarized thresholded mutual information matrix
# tau=threshold_from_degree(deg,I)
# A = np.array(I>tau, dtype=int)
A = threshold(I, threshold_type, **kwargs)
self.results['thresholded_matrix'] = A

G = create_graph(A)
self.results['graph'] = G

return G

def find_individual_probability_distribution(TS, rang, nbins):
"""
Assign each node to a vector of length nbins where each element is the probability of the
node in the time series being in that binned "state"

Parameters
----------
TS (np.ndarray): Array consisting of $L$ observations from $N$ sensors.
rang (list): list of the minimum and maximum value in the time series
nbins (int): number of bins for the pre-processing step (to yield a discrete probability distribution)

Returns
-------
IndivP (dict): a dictionary where the keys are nodes in the graph and the values are vectors
of empirical probabilities of that node's time series being in that binned state.
"""

N, L = TS.shape  # N nodes and L length
IndivP = (
dict()
)  # create a dict to put the individual binned probability vectors into

for j in range(N):
P, _ = np.histogram(
TS[j], bins=nbins, range=rang
)  # bin node j's time series data into nbins
IndivP[j] = (
P / L
)  # normalize that by the length of time series data to make it a vector of probs

return IndivP

def find_product_probability_distribution(IndivP, N):
"""
Assign each node j to a vector of length nbins where each element is the product of its own
individual_probability_distribution and its neighbors'. P(x) * P(y) <-- as opposed to P(x,y)

Parameters
----------
IndivP (dict): dictionary that gets output by find_individual_probability_distribution()
N (int): number of nodes in the graph

Returns
-------
ProduP (dict): a dictionary where the keys are nodes in the graph and the values
are nbins x nbins arrays corresponding to products of two probability vectors
"""

ProduP = dict()  # create a dict to put the product prob distributions into

for l in range(N):  # for each node
for j in range(l):  # for each possible edge (this method is symmetric)
ProduP[(j, l)] = np.outer(
IndivP[j], IndivP[l]
)  # outer product between two vectors

return ProduP

def find_joint_probability_distribution(TS, rang, nbins):
"""
Assign each node j to a vector of length nbins where each element is the product of its own
individual_probability_distribution and its neighbors'. P(x) * P(y) <-- as opposed to P(x,y)

Parameters
----------
TS (np.ndarray): Array consisting of $L$ observations from $N$ sensors.
rang (list): list of the minimum and maximum value in the time series
nbins (int): number of bins for the pre-processing step (to yield a discrete probability distribution)

Returns
-------
JointP (dict): a dictionary where the keys are nodes in the graph and the
are nbins x nbins arrays corresponding to joint probability vectors
"""

N, L = TS.shape  # N nodes and L length
JointP = dict()  # create a dict to put the joint prob distributions into

for l in range(N):  # for each node
for j in range(l):  # for each possible edge
P, _, _ = np.histogram2d(
TS[j], TS[l], bins=nbins, range=np.array([rang, rang])
)
JointP[(j, l)] = P / L

return JointP

def mutual_info_node_pair(JointP_jl, ProduP_jl):
"""
Calculate the mutual information between two nodes.

Parameters
----------
JointP_jl (np.ndarray): nbins x nbins array of two nodes' joint probability distributions
ProduP_jl (np.ndarray): nbins x nbins array of two nodes' product probability distributions

Returns
-------
I_jl (float): the mutual information between j and l, or
the (j,l)'th entry of the mutual information matrix
Note: np.log returns an information value in nats
"""

I_jl = 0  # initialize the mutual information to zero

for q, p in zip(JointP_jl.flatten(), ProduP_jl.flatten()):
if q > 0 and p > 0:  # avoid log(0) or dividing by zero
I_jl += q * np.log(q / p)  # an instance of the KL divergence

return I_jl

def mutual_info_all_pairs(JointP, ProduP, N):
"""
Calculate the mutual information between all pairs of nodes.

Parameters
----------
JointP (dict): a dictionary where the keys are pairs of nodes in the graph and the
are nbins x nbins arrays corresponding to joint probability vectors
ProduP (dict): a dictionary where the keys are pairs of nodes in the graph and the values
are nbins x nbins arrays corresponding to products of two probability vectors
N (int): list of the minimum and maximum value in the time series

Returns
-------
I (np.ndarray): the NxN mutual information matrix from a time series
"""

I = np.zeros((N, N))  # initialize an empty matrix

for l in range(N):
for j in range(l):
JointP_jl = JointP[(j, l)]
ProduP_jl = ProduP[(j, l)]

I[j, l] = mutual_info_node_pair(JointP_jl, ProduP_jl)  # fill in the matrix
I[l, j] = I[j, l]  # this method is symmetric

return I

def threshold_from_degree(deg, M):
"""
Compute the required threshold (tau) in order to yield a reconstructed graph of mean degree deg.
Parameters
----------
deg (int): Target degree for which the appropriate threshold will be computed
M (np.ndarray): Pre-thresholded NxN array
Returns
------
tau (float): Required threshold for A=np.array(I<tau,dtype=int) to have an average of deg ones per row/column

"""
N = len(M)
A = np.ones((N, N))  # start with a complete graph (lowest possible threshold)
for tau in sorted(M.flatten()):  # consider increasingly large entry-values
A[M == tau] = 0  # remove edges of weight less than the current entry
if (
np.mean(np.sum(A, 1)) < deg
):  # stop once the matrix is trimmed down to mean degree deg
break
return tau  # return this critical threshold value