"""
threshold.py
------------
Utilities for thresholding matrices based on different criteria
author: Stefan McCabe (stefanmccabe at gmail dot com)
Submitted as part of the 2019 NetSI Collabathon.
"""
import numpy as np
import warnings
[docs]def threshold_in_range(mat, **kwargs):
r"""Threshold by setting values not within a list of ranges to zero.
Parameters
----------
mat (np.ndarray)
A numpy array.
cutoffs (list of tuples)
When thresholding, include only edges whose correlations fall
within a given range or set of ranges. The lower value must come
first in each tuple. For example, to keep those values whose
absolute value is between :math:`0.5` and :math:`1`, pass
``cutoffs=[(-1, -0.5), (0.5, 1)]``.
Returns
-------
thresholded_mat (np.ndarray)
the thresholded numpy array
"""
if 'cutoffs' in kwargs:
cutoffs = kwargs['cutoffs']
else:
warnings.warn(
"Setting 'cutoffs' argument is strongly encouraged. Using cutoff range of (-1, 1).",
RuntimeWarning,
)
cutoffs = [(-1, 1)]
mask_function = np.vectorize(
lambda x: any([x >= cutoff[0] and x <= cutoff[1] for cutoff in cutoffs])
)
mask = mask_function(mat)
thresholded_mat = mat * mask
if kwargs.get('binary', False):
thresholded_mat = np.abs(np.sign(thresholded_mat))
if kwargs.get('remove_self_loops', True):
np.fill_diagonal(thresholded_mat, 0)
return thresholded_mat
[docs]def threshold_on_quantile(mat, **kwargs):
"""Threshold by setting values below a given quantile to zero.
Parameters
----------
mat (np.ndarray)
A numpy array.
quantile (float)
The threshold above which to keep an element of the array, e.g.,
set to zero elements below the 90th quantile of the array.
Returns
-------
thresholded_mat
the thresholded numpy array
"""
if 'quantile' in kwargs:
quantile = kwargs['quantile']
else:
warnings.warn(
"Setting 'quantile' argument is strongly recommended. Using target quantile of 0.9 for thresholding.",
RuntimeWarning,
)
quantile = 0.9
if kwargs.get('remove_self_loops', True):
np.fill_diagonal(mat, 0)
if quantile != 0:
thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100))
else:
thresholded_mat = mat
if kwargs.get('binary', False):
thresholded_mat = np.abs(np.sign(thresholded_mat))
return thresholded_mat
[docs]def threshold_on_degree(mat, **kwargs):
"""Threshold by setting values below a given quantile to zero.
Parameters
----------
mat (np.ndarray)
A numpy array.
avg_k (float)
The average degree to target when thresholding the matrix.
Returns
-------
thresholded_mat
the thresholded numpy array
"""
if 'avg_k' in kwargs:
avg_k = kwargs['avg_k']
else:
warnings.warn(
"Setting 'avg_k' argument is strongly encouraged. Using average "
"degree of 1 for thresholding.",
RuntimeWarning,
)
avg_k = 1
n = len(mat)
A = np.ones((n, n))
if kwargs.get('remove_self_loops', True):
np.fill_diagonal(A, 0)
np.fill_diagonal(mat, 0)
if np.mean(np.sum(A, 1)) <= avg_k:
# degenerate case: threshold the whole matrix
thresholded_mat = mat
else:
for m in sorted(mat.flatten()):
A[mat == m] = 0
if np.mean(np.sum(A, 1)) <= avg_k:
break
thresholded_mat = mat * (mat > m)
if kwargs.get('binary', False):
thresholded_mat = np.abs(np.sign(thresholded_mat))
return thresholded_mat
[docs]def threshold(mat, rule, **kwargs):
"""A flexible interface to other thresholding functions.
Parameters
----------
mat (np.ndarray)
A numpy array.
rule (str)
A string indicating which thresholding function to invoke.
kwargs (dict)
Named arguments to pass to the underlying threshold function.
Returns
-------
thresholded_mat
the thresholded numpy array
"""
try:
if rule == 'degree':
return threshold_on_degree(mat, **kwargs)
elif rule == 'range':
return threshold_in_range(mat, **kwargs)
elif rule == 'quantile':
return threshold_on_quantile(mat, **kwargs)
elif rule == 'custom':
return kwargs['custom_thresholder'](mat)
except KeyError:
raise ValueError("missing threshold parameter")