Skip to content

Commit

Permalink
v0.6.3 numpy2 scipy1.14 fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
asistradition committed Jul 8, 2024
1 parent 17bf404 commit eb7bbb1
Show file tree
Hide file tree
Showing 14 changed files with 207 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10']
python-version: ['3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v3
Expand Down
9 changes: 5 additions & 4 deletions inferelator/preprocessing/data_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from inferelator.utils.debug import Debug
from inferelator.utils.data import convert_array_to_float
from inferelator.utils.sparse import todense


class TruncRobustScaler(RobustScaler):
Expand Down Expand Up @@ -303,7 +304,7 @@ def scale_array(
:type magnitude_limit: numeric, optional
"""

if sparse.isspmatrix(array):
if sparse.issparse(array):
out = np.empty(
shape=array.shape,
dtype=float
Expand Down Expand Up @@ -340,8 +341,8 @@ def scale_vector(
"""

# Convert a sparse vector to a dense vector
if sparse.isspmatrix(vec):
vec = vec.A.ravel()
if sparse.issparse(vec):
vec = todense(vec).ravel()

# Return 0s if the variance is 0
if np.var(vec) == 0:
Expand All @@ -358,7 +359,7 @@ def scale_vector(

def _magnitude_limit(x, lim):

ref = x.data if sparse.isspmatrix(x) else x
ref = x.data if sparse.issparse(x) else x

np.minimum(ref, lim, out=ref)
np.maximum(ref, -1 * lim, out=ref)
Expand Down
9 changes: 5 additions & 4 deletions inferelator/preprocessing/velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Debug
)
from inferelator.utils import Validator as check
from inferelator.utils.sparse import todense


def extract_transcriptional_output(
Expand Down Expand Up @@ -290,9 +291,9 @@ def _sparse_safe_multiply(x, y):
:rtype: np.ndarray, sp.spmatrix
"""

if sparse.isspmatrix(x):
if sparse.issparse(x):
return x.multiply(y).tocsr()
elif sparse.isspmatrix(y):
elif sparse.issparse(y):
return y.multiply(x).tocsr()
else:
return np.multiply(x, y)
Expand All @@ -310,7 +311,7 @@ def _sparse_safe_add(x, y):
:rtype: np.ndarray
"""

if sparse.isspmatrix(x) or sparse.isspmatrix(y):
return (x + y).A
if sparse.issparse(x) or sparse.issparse(y):
return todense(x + y).A
else:
return np.add(x, y)
147 changes: 104 additions & 43 deletions inferelator/regression/mi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from inferelator.utils import (
Debug,
array_set_diag,
safe_apply_to_array
safe_apply_to_array,
todense
)
from inferelator.utils import Validator as check

Expand All @@ -17,22 +18,44 @@
# DDOF for CLR
CLR_DDOF = 1

# Log type for MI calculations. np.log2 gives results in bits; np.log gives results in nats
# Log type for MI calculations
# np.log2 gives results in bits; np.log gives results in nats
DEFAULT_LOG_TYPE = np.log


class MIDriver:

@staticmethod
def run(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE, return_mi=True):
return context_likelihood_mi(x, y, bins=bins, logtype=logtype, return_mi=return_mi)
def run(
x,
y,
bins=DEFAULT_NUM_BINS,
logtype=DEFAULT_LOG_TYPE,
return_mi=True
):
return context_likelihood_mi(
x,
y,
bins=bins,
logtype=logtype,
return_mi=return_mi
)


def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE, return_mi=True):
def context_likelihood_mi(
x,
y,
bins=DEFAULT_NUM_BINS,
logtype=DEFAULT_LOG_TYPE,
return_mi=True
):
"""
Wrapper to calculate the Context Likelihood of Relatedness and Mutual Information for two data sets that have
common condition rows. The y argument will be used to calculate background MI for the x & y MI.
As an implementation detail, y will be cast to a dense array if it is sparse.
Wrapper to calculate the Context Likelihood of Relatedness and
Mutual Information for two data sets that have
common condition rows. The y argument will be used to calculate
background MI for the x & y MI.
As an implementation detail, y will be cast to a dense array if
it is sparse.
X can be sparse with no internal copy.
This function handles unpacking and packing the InferelatorData.
Expand All @@ -41,13 +64,15 @@ def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE,
:type x: InferelatorData [N x G]
:param y: An N x K InferelatorData object
:type y: InferelatorData [N x K]
:param logtype: The logarithm function to use when calculating information. Defaults to natural log (np.log)
:param logtype: The logarithm function to use when calculating information.
Defaults to natural log (np.log)
:type logtype: np.log func
:param bins: Number of bins for discretizing continuous variables
:type bins: int
:param return_mi: Boolean for returning a MI object. Defaults to True
:type return_mi: bool
:return clr, mi: CLR and MI InferelatorData objects. Returns (CLR, None) if return_mi is False.
:return clr, mi: CLR and MI InferelatorData objects.
Returns (CLR, None) if return_mi is False.
:rtype InferelatorData, InferelatorData:
"""

Expand All @@ -61,11 +86,21 @@ def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE,
mi_c = y.gene_names

# Build a [G x K] mutual information array
mi = mutual_information(x.expression_data, y.expression_data, bins, logtype=logtype)
mi = mutual_information(
x.expression_data,
y.expression_data,
bins,
logtype=logtype
)
array_set_diag(mi, 0., mi_r, mi_c)

# Build a [K x K] mutual information array
mi_bg = mutual_information(y.expression_data, y.expression_data, bins, logtype=logtype)
mi_bg = mutual_information(
y.expression_data,
y.expression_data,
bins,
logtype=logtype
)
array_set_diag(mi_bg, 0., mi_c, mi_c)

# Calculate CLR
Expand All @@ -79,24 +114,27 @@ def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE,

def mutual_information(x, y, bins, logtype=DEFAULT_LOG_TYPE):
"""
Calculate the mutual information matrix between two data matrices, where the columns are equivalent conditions
Calculate the mutual information matrix between two data matrices,
where the columns are equivalent conditions
:param x: np.array (n x m1)
The data from m1 variables across n conditions
:param y: np.array (n x m2)
The data from m2 variables across n conditions
:param bins: int
Number of bins to discretize continuous data into for the generation of a contingency table
Number of bins to discretize continuous data into for the generation
of a contingency table
:param logtype: np.log func
Which type of log function should be used (log2 results in MI bits, log results in MI nats, log10... is weird)
Which type of log function should be used (log2 results in MI bits,
log results in MI nats, log10... is weird)
:return mi: pd.DataFrame (m1 x m2)
The mutual information between variables m1 and m2
"""

# Discretize the input matrix y
y = _make_array_discrete(
y.A if sps.isspmatrix(y) else y,
todense(y),
bins,
axis=0
)
Expand All @@ -120,31 +158,30 @@ def mutual_information(x, y, bins, logtype=DEFAULT_LOG_TYPE):

return mi


def _mi_wrapper(x, Y, i, bins, logtype, m1):

Debug.vprint(
f"Mutual Information Calculation [{i} / {m1}]",
level=2 if i % 1000 == 0 else 3
)

# Turn off runtime warnings (there is an explicit check for NaNs and INFs in-function)
with np.errstate(divide='ignore', invalid='ignore'):
discrete_X = _make_discrete(
todense(x).ravel(),
bins
)

discrete_X = _make_discrete(
x.A.ravel() if sps.isspmatrix(x) else x.ravel(),
bins
return [
_calc_mi(
_make_table(
discrete_X, Y[:, j],
bins
),
logtype=logtype
)
for j in range(Y.shape[1])
]

return [
_calc_mi(
_make_table(
discrete_X, Y[:, j],
bins
),
logtype=logtype)
for j in range(Y.shape[1]
)
]

def _x_generator(X):

Expand All @@ -154,7 +191,8 @@ def _x_generator(X):

def calc_mixed_clr(mi, mi_bg):
"""
Calculate the context liklihood of relatedness from mutual information and the background mutual information
Calculate the context liklihood of relatedness from mutual information
and the background mutual information
:param mi: Mutual information array [m1 x m2]
:type mi: np.ndarray
Expand All @@ -167,12 +205,16 @@ def calc_mixed_clr(mi, mi_bg):
with np.errstate(invalid='ignore'):

# Calculate the zscore for the dynamic CLR
z_dyn = np.round(mi, 10) # Rounding so that float precision differences don't turn into huge CLR differences
# Rounding so that float precision differences don't turn
# into huge CLR differences
z_dyn = np.round(mi, 10)
z_dyn = np.subtract(z_dyn, np.mean(mi, axis=0))
z_dyn = np.divide(z_dyn, np.std(mi, axis=0, ddof=CLR_DDOF))

# Calculate the zscore for the static CLR
z_stat = np.round(mi, 10) # Rounding so that float precision differences don't turn into huge CLR differences
# Rounding so that float precision differences don't turn
# into huge CLR differences
z_stat = np.round(mi, 10)
z_stat = np.subtract(z_stat, np.mean(mi_bg, axis=0))
z_stat = np.divide(z_stat, np.std(mi_bg, axis=0, ddof=CLR_DDOF))

Expand Down Expand Up @@ -222,15 +264,17 @@ def _make_discrete(arr_vec, num_bins):
# Continuous values to discrete bins [0, num_bins)
# Write directly into a np.int16 array with the standard unsafe conversion
return np.floor(
(arr_vec - arr_min) / (arr_max - arr_min + np.spacing(arr_max - arr_min)) * num_bins,
(arr_vec - arr_min) /
(arr_max - arr_min + np.spacing(arr_max - arr_min)) * num_bins,
out=np.zeros(shape=arr_vec.shape, dtype=np.int16),
casting='unsafe'
)


def _make_table(x, y, num_bins):
"""
Takes two variable vectors which have been made into discrete integer bins and constructs a contingency table
Takes two variable vectors which have been made into discrete integer
bins and constructs a contingency table
:param x: np.ndarray
1d array of discrete data
:param y: np.ndarray
Expand All @@ -242,7 +286,8 @@ def _make_table(x, y, num_bins):
"""

# The only fast way to do this is by reindexing the table as an index array
# Then piling everything up with bincount and reshaping it back into the table
# Then piling everything up with bincount and reshaping it back into the
# table
return np.bincount(
x * num_bins + y,
minlength=num_bins ** 2
Expand All @@ -264,24 +309,40 @@ def _calc_mi(table, logtype=DEFAULT_LOG_TYPE):
Mutual information between variable x & y
"""

# [n]
total = np.sum(table)

# (PxPy) [n x n]
mi_val = np.dot((np.sum(table, axis=1) / total).reshape(-1, 1),
(np.sum(table, axis=0) / total).reshape(1, -1))
mi_val = np.dot(
(np.sum(table, axis=1) / total).reshape(-1, 1),
(np.sum(table, axis=0) / total).reshape(1, -1)
)

# (Pxy) [n x n]
table = np.divide(table, total)
table = np.divide(
table,
total,
out=np.zeros(table.shape, float),
where=total != 0
)

# (Pxy)/(PxPy) [n x n]
mi_val = np.divide(table, mi_val)
mi_val = np.divide(
table,
mi_val,
out=np.zeros(table.shape, float),
where=mi_val != 0
)

# log[(Pxy)/(PxPy)] [n x n]
mi_val = logtype(mi_val)
mi_val = logtype(
mi_val,
out=mi_val,
where=mi_val != 0
)

# Pxy(log[(Pxy)/(PxPy)]) [n x n]
mi_val = np.multiply(table, mi_val)
mi_val[np.isnan(mi_val)] = 0

# Summation
return np.sum(mi_val)
Loading

0 comments on commit eb7bbb1

Please sign in to comment.