From eb7bbb13535a8073b6c85c8718c81bff48138fa1 Mon Sep 17 00:00:00 2001 From: asistradition Date: Mon, 8 Jul 2024 12:38:26 -0400 Subject: [PATCH] v0.6.3 numpy2 scipy1.14 fixes --- .github/workflows/python-package.yml | 2 +- .../preprocessing/data_normalization.py | 9 +- inferelator/preprocessing/velocity.py | 9 +- inferelator/regression/mi.py | 147 +++++++++++++----- inferelator/tests/test_data_loader.py | 17 +- inferelator/tests/test_data_normalization.py | 14 +- inferelator/tests/test_data_wrapper.py | 32 ++-- inferelator/tests/test_noising_data.py | 11 +- inferelator/tests/test_workflow_base.py | 6 +- inferelator/tfa/pinv_tfa.py | 2 +- inferelator/utils/__init__.py | 1 + inferelator/utils/data.py | 15 +- inferelator/utils/inferelator_data.py | 20 +-- inferelator/utils/sparse.py | 21 +++ 14 files changed, 207 insertions(+), 99 deletions(-) create mode 100644 inferelator/utils/sparse.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1752e0a9..990441ff 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 diff --git a/inferelator/preprocessing/data_normalization.py b/inferelator/preprocessing/data_normalization.py index 889fbb87..fb9d94a8 100644 --- a/inferelator/preprocessing/data_normalization.py +++ b/inferelator/preprocessing/data_normalization.py @@ -10,6 +10,7 @@ from inferelator.utils.debug import Debug from inferelator.utils.data import convert_array_to_float +from inferelator.utils.sparse import todense class TruncRobustScaler(RobustScaler): @@ -303,7 +304,7 @@ def scale_array( :type magnitude_limit: numeric, optional """ - if sparse.isspmatrix(array): + if sparse.issparse(array): out = np.empty( shape=array.shape, dtype=float @@ -340,8 +341,8 @@ def scale_vector( """ # Convert a sparse vector to a dense vector - if sparse.isspmatrix(vec): - vec = vec.A.ravel() + if sparse.issparse(vec): + vec = todense(vec).ravel() # Return 0s if the variance is 0 if np.var(vec) == 0: @@ -358,7 +359,7 @@ def scale_vector( def _magnitude_limit(x, lim): - ref = x.data if sparse.isspmatrix(x) else x + ref = x.data if sparse.issparse(x) else x np.minimum(ref, lim, out=ref) np.maximum(ref, -1 * lim, out=ref) diff --git a/inferelator/preprocessing/velocity.py b/inferelator/preprocessing/velocity.py index 66c4de12..29ae1b54 100644 --- a/inferelator/preprocessing/velocity.py +++ b/inferelator/preprocessing/velocity.py @@ -6,6 +6,7 @@ Debug ) from inferelator.utils import Validator as check +from inferelator.utils.sparse import todense def extract_transcriptional_output( @@ -290,9 +291,9 @@ def _sparse_safe_multiply(x, y): :rtype: np.ndarray, sp.spmatrix """ - if sparse.isspmatrix(x): + if sparse.issparse(x): return x.multiply(y).tocsr() - elif sparse.isspmatrix(y): + elif sparse.issparse(y): return y.multiply(x).tocsr() else: return np.multiply(x, y) @@ -310,7 +311,7 @@ def _sparse_safe_add(x, y): :rtype: np.ndarray """ - if sparse.isspmatrix(x) or sparse.isspmatrix(y): - return (x + y).A + if sparse.issparse(x) or sparse.issparse(y): + return todense(x + y).A else: return np.add(x, y) diff --git a/inferelator/regression/mi.py b/inferelator/regression/mi.py index 4834237d..792ec967 100644 --- a/inferelator/regression/mi.py +++ b/inferelator/regression/mi.py @@ -7,7 +7,8 @@ from inferelator.utils import ( Debug, array_set_diag, - safe_apply_to_array + safe_apply_to_array, + todense ) from inferelator.utils import Validator as check @@ -17,22 +18,44 @@ # DDOF for CLR CLR_DDOF = 1 -# Log type for MI calculations. np.log2 gives results in bits; np.log gives results in nats +# Log type for MI calculations +# np.log2 gives results in bits; np.log gives results in nats DEFAULT_LOG_TYPE = np.log class MIDriver: @staticmethod - def run(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE, return_mi=True): - return context_likelihood_mi(x, y, bins=bins, logtype=logtype, return_mi=return_mi) + def run( + x, + y, + bins=DEFAULT_NUM_BINS, + logtype=DEFAULT_LOG_TYPE, + return_mi=True + ): + return context_likelihood_mi( + x, + y, + bins=bins, + logtype=logtype, + return_mi=return_mi + ) -def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE, return_mi=True): +def context_likelihood_mi( + x, + y, + bins=DEFAULT_NUM_BINS, + logtype=DEFAULT_LOG_TYPE, + return_mi=True +): """ - Wrapper to calculate the Context Likelihood of Relatedness and Mutual Information for two data sets that have - common condition rows. The y argument will be used to calculate background MI for the x & y MI. - As an implementation detail, y will be cast to a dense array if it is sparse. + Wrapper to calculate the Context Likelihood of Relatedness and + Mutual Information for two data sets that have + common condition rows. The y argument will be used to calculate + background MI for the x & y MI. + As an implementation detail, y will be cast to a dense array if + it is sparse. X can be sparse with no internal copy. This function handles unpacking and packing the InferelatorData. @@ -41,13 +64,15 @@ def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE, :type x: InferelatorData [N x G] :param y: An N x K InferelatorData object :type y: InferelatorData [N x K] - :param logtype: The logarithm function to use when calculating information. Defaults to natural log (np.log) + :param logtype: The logarithm function to use when calculating information. + Defaults to natural log (np.log) :type logtype: np.log func :param bins: Number of bins for discretizing continuous variables :type bins: int :param return_mi: Boolean for returning a MI object. Defaults to True :type return_mi: bool - :return clr, mi: CLR and MI InferelatorData objects. Returns (CLR, None) if return_mi is False. + :return clr, mi: CLR and MI InferelatorData objects. + Returns (CLR, None) if return_mi is False. :rtype InferelatorData, InferelatorData: """ @@ -61,11 +86,21 @@ def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE, mi_c = y.gene_names # Build a [G x K] mutual information array - mi = mutual_information(x.expression_data, y.expression_data, bins, logtype=logtype) + mi = mutual_information( + x.expression_data, + y.expression_data, + bins, + logtype=logtype + ) array_set_diag(mi, 0., mi_r, mi_c) # Build a [K x K] mutual information array - mi_bg = mutual_information(y.expression_data, y.expression_data, bins, logtype=logtype) + mi_bg = mutual_information( + y.expression_data, + y.expression_data, + bins, + logtype=logtype + ) array_set_diag(mi_bg, 0., mi_c, mi_c) # Calculate CLR @@ -79,16 +114,19 @@ def context_likelihood_mi(x, y, bins=DEFAULT_NUM_BINS, logtype=DEFAULT_LOG_TYPE, def mutual_information(x, y, bins, logtype=DEFAULT_LOG_TYPE): """ - Calculate the mutual information matrix between two data matrices, where the columns are equivalent conditions + Calculate the mutual information matrix between two data matrices, + where the columns are equivalent conditions :param x: np.array (n x m1) The data from m1 variables across n conditions :param y: np.array (n x m2) The data from m2 variables across n conditions :param bins: int - Number of bins to discretize continuous data into for the generation of a contingency table + Number of bins to discretize continuous data into for the generation + of a contingency table :param logtype: np.log func - Which type of log function should be used (log2 results in MI bits, log results in MI nats, log10... is weird) + Which type of log function should be used (log2 results in MI bits, + log results in MI nats, log10... is weird) :return mi: pd.DataFrame (m1 x m2) The mutual information between variables m1 and m2 @@ -96,7 +134,7 @@ def mutual_information(x, y, bins, logtype=DEFAULT_LOG_TYPE): # Discretize the input matrix y y = _make_array_discrete( - y.A if sps.isspmatrix(y) else y, + todense(y), bins, axis=0 ) @@ -120,6 +158,7 @@ def mutual_information(x, y, bins, logtype=DEFAULT_LOG_TYPE): return mi + def _mi_wrapper(x, Y, i, bins, logtype, m1): Debug.vprint( @@ -127,24 +166,22 @@ def _mi_wrapper(x, Y, i, bins, logtype, m1): level=2 if i % 1000 == 0 else 3 ) - # Turn off runtime warnings (there is an explicit check for NaNs and INFs in-function) - with np.errstate(divide='ignore', invalid='ignore'): + discrete_X = _make_discrete( + todense(x).ravel(), + bins + ) - discrete_X = _make_discrete( - x.A.ravel() if sps.isspmatrix(x) else x.ravel(), - bins + return [ + _calc_mi( + _make_table( + discrete_X, Y[:, j], + bins + ), + logtype=logtype ) + for j in range(Y.shape[1]) + ] - return [ - _calc_mi( - _make_table( - discrete_X, Y[:, j], - bins - ), - logtype=logtype) - for j in range(Y.shape[1] - ) - ] def _x_generator(X): @@ -154,7 +191,8 @@ def _x_generator(X): def calc_mixed_clr(mi, mi_bg): """ - Calculate the context liklihood of relatedness from mutual information and the background mutual information + Calculate the context liklihood of relatedness from mutual information + and the background mutual information :param mi: Mutual information array [m1 x m2] :type mi: np.ndarray @@ -167,12 +205,16 @@ def calc_mixed_clr(mi, mi_bg): with np.errstate(invalid='ignore'): # Calculate the zscore for the dynamic CLR - z_dyn = np.round(mi, 10) # Rounding so that float precision differences don't turn into huge CLR differences + # Rounding so that float precision differences don't turn + # into huge CLR differences + z_dyn = np.round(mi, 10) z_dyn = np.subtract(z_dyn, np.mean(mi, axis=0)) z_dyn = np.divide(z_dyn, np.std(mi, axis=0, ddof=CLR_DDOF)) # Calculate the zscore for the static CLR - z_stat = np.round(mi, 10) # Rounding so that float precision differences don't turn into huge CLR differences + # Rounding so that float precision differences don't turn + # into huge CLR differences + z_stat = np.round(mi, 10) z_stat = np.subtract(z_stat, np.mean(mi_bg, axis=0)) z_stat = np.divide(z_stat, np.std(mi_bg, axis=0, ddof=CLR_DDOF)) @@ -222,7 +264,8 @@ def _make_discrete(arr_vec, num_bins): # Continuous values to discrete bins [0, num_bins) # Write directly into a np.int16 array with the standard unsafe conversion return np.floor( - (arr_vec - arr_min) / (arr_max - arr_min + np.spacing(arr_max - arr_min)) * num_bins, + (arr_vec - arr_min) / + (arr_max - arr_min + np.spacing(arr_max - arr_min)) * num_bins, out=np.zeros(shape=arr_vec.shape, dtype=np.int16), casting='unsafe' ) @@ -230,7 +273,8 @@ def _make_discrete(arr_vec, num_bins): def _make_table(x, y, num_bins): """ - Takes two variable vectors which have been made into discrete integer bins and constructs a contingency table + Takes two variable vectors which have been made into discrete integer + bins and constructs a contingency table :param x: np.ndarray 1d array of discrete data :param y: np.ndarray @@ -242,7 +286,8 @@ def _make_table(x, y, num_bins): """ # The only fast way to do this is by reindexing the table as an index array - # Then piling everything up with bincount and reshaping it back into the table + # Then piling everything up with bincount and reshaping it back into the + # table return np.bincount( x * num_bins + y, minlength=num_bins ** 2 @@ -264,24 +309,40 @@ def _calc_mi(table, logtype=DEFAULT_LOG_TYPE): Mutual information between variable x & y """ + # [n] total = np.sum(table) # (PxPy) [n x n] - mi_val = np.dot((np.sum(table, axis=1) / total).reshape(-1, 1), - (np.sum(table, axis=0) / total).reshape(1, -1)) + mi_val = np.dot( + (np.sum(table, axis=1) / total).reshape(-1, 1), + (np.sum(table, axis=0) / total).reshape(1, -1) + ) # (Pxy) [n x n] - table = np.divide(table, total) + table = np.divide( + table, + total, + out=np.zeros(table.shape, float), + where=total != 0 + ) # (Pxy)/(PxPy) [n x n] - mi_val = np.divide(table, mi_val) + mi_val = np.divide( + table, + mi_val, + out=np.zeros(table.shape, float), + where=mi_val != 0 + ) # log[(Pxy)/(PxPy)] [n x n] - mi_val = logtype(mi_val) + mi_val = logtype( + mi_val, + out=mi_val, + where=mi_val != 0 + ) # Pxy(log[(Pxy)/(PxPy)]) [n x n] mi_val = np.multiply(table, mi_val) - mi_val[np.isnan(mi_val)] = 0 # Summation return np.sum(mi_val) diff --git a/inferelator/tests/test_data_loader.py b/inferelator/tests/test_data_loader.py index 05ea9f54..19025af2 100644 --- a/inferelator/tests/test_data_loader.py +++ b/inferelator/tests/test_data_loader.py @@ -9,7 +9,7 @@ import pandas.testing as pdt import bio_test_artifacts.prebuilt as test_prebuilt from inferelator.workflow import inferelator_workflow -from inferelator.utils import loader +from inferelator.utils import loader, todense class TestExpressionLoader(unittest.TestCase): @@ -59,7 +59,10 @@ def test_mtx(self): self.worker.set_expression_file(mtx=file1, mtx_feature=file2, mtx_barcode=file3) self.worker.read_expression() - npt.assert_array_almost_equal(data.values, self.worker.data.expression_data.A) + npt.assert_array_almost_equal( + data.values, + todense(self.worker.data.expression_data) + ) def test_10x(self): (file1, file2, file3), data = test_prebuilt.counts_yeast_single_cell_chr01(filetype='mtx') @@ -72,7 +75,10 @@ def test_10x(self): self.worker.set_expression_file(tenx_path=txdir) self.worker.read_expression() - npt.assert_array_almost_equal(data.values, self.worker.data.expression_data.A) + npt.assert_array_almost_equal( + data.values, + todense(self.worker.data.expression_data) + ) def test_10x_ranger3(self): (file1, file2, file3), data = test_prebuilt.counts_yeast_single_cell_chr01(filetype='mtx', gzip=True) @@ -85,7 +91,10 @@ def test_10x_ranger3(self): self.worker.set_expression_file(tenx_path=txdir) self.worker.read_expression() - npt.assert_array_almost_equal(data.values, self.worker.data.expression_data.A) + npt.assert_array_almost_equal( + data.values, + todense(self.worker.data.expression_data) + ) def test_df_decode(self): idx = pd.Index(['str1', b'str2', b'str3', 'str4', 5, 17.4, np.inf, ('str1',)]) diff --git a/inferelator/tests/test_data_normalization.py b/inferelator/tests/test_data_normalization.py index 39d7937a..3e85152e 100644 --- a/inferelator/tests/test_data_normalization.py +++ b/inferelator/tests/test_data_normalization.py @@ -9,7 +9,7 @@ TestDataSingleCellLike ) -from inferelator.utils import InferelatorData +from inferelator.utils import InferelatorData, todense from inferelator.preprocessing.data_normalization import PreprocessData @@ -260,7 +260,7 @@ def test_no_limit_s(self): design = PreprocessData.preprocess_design(self.adata_sparse) design_sklearn = RobustScaler(with_centering=False).fit_transform(self.expr) npt.assert_almost_equal( - design.values.A, + todense(design.values), design_sklearn ) @@ -288,7 +288,7 @@ def test_limit_s(self): design_sklearn[design_sklearn < -1] = -1 npt.assert_almost_equal( - design.values.A, + todense(design.values), design_sklearn ) @@ -341,7 +341,7 @@ def test_no_limit_d(self): def test_no_limit_s(self): design = PreprocessData.preprocess_design(self.adata_sparse) npt.assert_almost_equal( - design.values.A, + todense(design.values), self.expr ) @@ -361,7 +361,7 @@ def test_limit_s(self): ) design = PreprocessData.preprocess_design(self.adata_sparse) npt.assert_almost_equal( - design.values.A, + todense(design.values), self.expr ) @@ -421,7 +421,7 @@ def test_no_limit_s(self): design = PreprocessData.preprocess_design(self.adata_sparse) design_sklearn = self._right_answer(self.expr) npt.assert_almost_equal( - design.values.A, + todense(design.values), design_sklearn ) @@ -449,7 +449,7 @@ def test_limit_s(self): design_sklearn[design_sklearn < -1] = -1 npt.assert_almost_equal( - design.values.A, + todense(design.values), design_sklearn ) diff --git a/inferelator/tests/test_data_wrapper.py b/inferelator/tests/test_data_wrapper.py index fc2ff33d..caff056f 100644 --- a/inferelator/tests/test_data_wrapper.py +++ b/inferelator/tests/test_data_wrapper.py @@ -9,7 +9,7 @@ CORRECT_GENES_INTERSECT, CORRECT_GENES_NZ_VAR ) -from inferelator.utils import InferelatorData +from inferelator.utils import InferelatorData, todense class TestWrapperSetup(unittest.TestCase): @@ -214,7 +214,7 @@ def test_transform_log2_d_ineff(self): def test_transform_log2_s(self): self.adata_sparse.transform(np.log2, add_pseudocount=True) - npt.assert_array_almost_equal(self.adata_sparse.expression_data.A, + npt.assert_array_almost_equal(todense(self.adata_sparse.expression_data), np.log2(self.expr.loc[:, self.adata.gene_names].values + 1)) def test_apply_log2_d(self): @@ -240,7 +240,7 @@ def test_apply_normalizer_s(self): lambda x: StandardScaler(with_mean=False).fit_transform(x) ) npt.assert_array_almost_equal( - self.adata_sparse.expression_data.A, + todense(self.adata_sparse.expression_data), StandardScaler(with_mean=False).fit_transform( self.expr.loc[:, self.adata.gene_names].values ) @@ -260,24 +260,30 @@ def test_dot_dense(self): npt.assert_array_almost_equal(dot3, eye_expr) def test_dot_sparse(self): - inv_expr = np.asarray(linalg.pinv(self.adata_sparse.expression_data.A), order="C") + inv_expr = np.asarray(linalg.pinv(todense(self.adata_sparse.expression_data)), order="C") eye_expr = np.eye(self.adata_sparse.shape[1]) sdot1a = self.adata_sparse.dot(eye_expr) - sdot1b = self.adata_sparse.dot(sparse.csr_matrix(eye_expr)).A + sdot1b = todense(self.adata_sparse.dot(sparse.csr_matrix(eye_expr))) npt.assert_array_almost_equal(sdot1a, sdot1b) - original_data = self.expr_sparse[:, TestDataSingleCellLike.expression_matrix.index.isin(CORRECT_GENES_NZ_VAR)].A - npt.assert_array_almost_equal(self.adata_sparse.expression_data.A, original_data) + original_data = todense( + self.expr_sparse[ + :, + TestDataSingleCellLike.expression_matrix.index.isin(CORRECT_GENES_NZ_VAR)] + ) + npt.assert_array_almost_equal(todense(self.adata_sparse.expression_data), original_data) npt.assert_array_almost_equal(sdot1b, original_data) sdot2a = self.adata_sparse.dot(inv_expr, other_is_right_side=False) - sdot2b = self.adata_sparse.dot(sparse.csr_matrix(inv_expr), other_is_right_side=False).A + sdot2b = todense( + self.adata_sparse.dot(sparse.csr_matrix(inv_expr), other_is_right_side=False) + ) npt.assert_array_almost_equal(sdot2a, sdot2b) npt.assert_array_almost_equal(sdot2b, eye_expr) def test_dot_force_dense(self): - inv_expr = np.asarray(linalg.pinv(self.adata_sparse.expression_data.A), order="C") + inv_expr = np.asarray(linalg.pinv(todense(self.adata_sparse.expression_data)), order="C") eye_expr = np.eye(self.adata_sparse.shape[1]) sdot1 = self.adata_sparse.dot(inv_expr, other_is_right_side=False, force_dense=True) @@ -336,12 +342,12 @@ def test_divide_dense(self): def test_divide_sparse(self): self.adata_sparse.divide(0.5, axis=None) - npt.assert_array_almost_equal(self.adata_sparse.expression_data.A, + npt.assert_array_almost_equal(todense(self.adata_sparse.expression_data), self.expr.loc[:, self.adata_sparse.gene_names].values.astype(float) * 2) self.adata_sparse.divide(self.adata_sparse.sample_counts, axis=1) - npt.assert_array_almost_equal(np.sum(self.adata_sparse.expression_data.A, axis=1), + npt.assert_array_almost_equal(np.sum(todense(self.adata_sparse.expression_data), axis=1), np.ones(self.adata_sparse.num_obs, dtype=float)) with self.assertRaises(ValueError): @@ -364,12 +370,12 @@ def test_multiply_dense(self): def test_multiply_sparse(self): self.adata_sparse.multiply(2, axis=None) - npt.assert_array_almost_equal(self.adata_sparse.expression_data.A, + npt.assert_array_almost_equal(todense(self.adata_sparse.expression_data), self.expr.loc[:, self.adata_sparse.gene_names].values.astype(float) / 0.5) self.adata_sparse.multiply(1 / self.adata_sparse.sample_counts, axis=1) - npt.assert_array_almost_equal(np.sum(self.adata_sparse.expression_data.A, axis=1), + npt.assert_array_almost_equal(np.sum(todense(self.adata_sparse.expression_data), axis=1), np.ones(self.adata_sparse.num_obs, dtype=float)) with self.assertRaises(ValueError): diff --git a/inferelator/tests/test_noising_data.py b/inferelator/tests/test_noising_data.py index cd2cb3a3..3cd0def0 100644 --- a/inferelator/tests/test_noising_data.py +++ b/inferelator/tests/test_noising_data.py @@ -3,6 +3,7 @@ from inferelator.preprocessing import simulate_data from inferelator import MPControl, inferelator_workflow from inferelator.tests.artifacts.test_stubs import FakeRegressionMixin +from inferelator.utils import todense import os import numpy.testing as npt from scipy import sparse as _sparse @@ -45,7 +46,10 @@ def test_noise_int_data_sparse(self): simulate_data.make_data_noisy(noise_data, random_seed=100) with self.assertRaises(AssertionError): - npt.assert_array_almost_equal(self.data.expression_data, noise_data.expression_data.A) + npt.assert_array_almost_equal( + self.data.expression_data, + todense(noise_data.expression_data) + ) self.assertTrue(noise_data.is_sparse) @@ -60,7 +64,10 @@ def test_noise_float_data_sparse(self): self.assertFalse(noise_data.is_sparse) with self.assertRaises(AssertionError): - npt.assert_array_almost_equal(float_data.expression_data.A, noise_data.expression_data) + npt.assert_array_almost_equal( + todense(float_data.expression_data), + noise_data.expression_data + ) class NoiseWorkflowData(unittest.TestCase): diff --git a/inferelator/tests/test_workflow_base.py b/inferelator/tests/test_workflow_base.py index c222d959..30bbf7e2 100644 --- a/inferelator/tests/test_workflow_base.py +++ b/inferelator/tests/test_workflow_base.py @@ -17,6 +17,7 @@ from inferelator.regression.base_regression import _RegressionWorkflowMixin from inferelator.distributed.inferelator_mp import MPControl from inferelator.preprocessing.metadata_parser import MetadataParserBranching +from inferelator.utils import todense my_dir = os.path.dirname(__file__) @@ -300,7 +301,10 @@ def test_load_to_h5ad(self): data = ad.read_h5ad(sname) self.assertTrue(sps.isspmatrix_csr(data.X)) - npt.assert_array_almost_equal_nulp(data.X.A, self.workflow.data.values.A) + npt.assert_array_almost_equal_nulp( + todense(data.X), + todense(self.workflow.data.values) + ) os.remove(sname) diff --git a/inferelator/tfa/pinv_tfa.py b/inferelator/tfa/pinv_tfa.py index 555312c8..bb90b7f0 100644 --- a/inferelator/tfa/pinv_tfa.py +++ b/inferelator/tfa/pinv_tfa.py @@ -31,7 +31,7 @@ def _calculate_activity( _arr_piv = linalg.pinv(prior).T.astype(_prior_dtype) - if sparse.isspmatrix(expression_data): + if sparse.issparse(expression_data): _arr_piv = sparse.csr_matrix(_arr_piv) return utils.DotProduct.dot( diff --git a/inferelator/utils/__init__.py b/inferelator/utils/__init__.py index 8017db75..665090ac 100644 --- a/inferelator/utils/__init__.py +++ b/inferelator/utils/__init__.py @@ -1,3 +1,4 @@ +from .sparse import todense from .validator import ( Validator, is_string diff --git a/inferelator/utils/data.py b/inferelator/utils/data.py index bc2ae532..5e8ce10b 100644 --- a/inferelator/utils/data.py +++ b/inferelator/utils/data.py @@ -5,6 +5,7 @@ import pandas.api.types as pat from inferelator.utils import Debug +from inferelator.utils.sparse import todense # Numpy / scipy matrix math function @@ -26,12 +27,12 @@ def dot_product( :return: A @ B array :rtype: np.ndarray, sp.sparse.csr_matrix """ - if sparse.isspmatrix(a) and sparse.isspmatrix(b): - return a.dot(b).A if dense else a.dot(b) - elif sparse.isspmatrix(a) and dense: + if sparse.issparse(a) and sparse.issparse(b): + return todense(a.dot(b)) if dense else a.dot(b) + elif sparse.issparse(a) and dense: _arr = a.dot(b) - return _arr.A if sparse.isspmatrix(_arr) else _arr - elif sparse.isspmatrix(a) or sparse.isspmatrix(b): + return todense(_arr) if sparse.issparse(_arr) else _arr + elif sparse.issparse(a) or sparse.issparse(b): return a @ b else: return np.dot(a, b) @@ -368,7 +369,7 @@ def safe_apply_to_array( if axis == 0: for i in range(array.shape[1]): out_arr[:, i] = func( - array[:, i].A.ravel(), + todense(array[:, i]).ravel(), *args, **kwargs ) @@ -376,7 +377,7 @@ def safe_apply_to_array( elif axis == 1: for i in range(array.shape[0]): out_arr[i, :] = func( - array[i, :].A.ravel(), + todense(array[i, :]).ravel(), *args, **kwargs ) diff --git a/inferelator/utils/inferelator_data.py b/inferelator/utils/inferelator_data.py index d114693b..c69f5404 100644 --- a/inferelator/utils/inferelator_data.py +++ b/inferelator/utils/inferelator_data.py @@ -22,6 +22,8 @@ convert_array_to_float ) +from inferelator.utils.sparse import todense + class InferelatorData(object): """ @@ -465,7 +467,7 @@ def trim_genes(self, remove_constant_genes=True, trim_gene_list=None): if remove_constant_genes: nz_var = self.values.max(axis=0) - self.values.min(axis=0) - nz_var = nz_var.A.flatten() if self.is_sparse else nz_var + nz_var = todense(nz_var).flatten() if self.is_sparse else nz_var if np.any(np.isnan(nz_var)): raise ValueError( @@ -520,7 +522,7 @@ def get_gene_data( labels = x.var_names if (force_dense or to_df) and self.is_sparse: - x = x.X.A + x = todense(x.X) else: # Copy is necessary to get the numpy array @@ -552,7 +554,7 @@ def get_sample_data( labels = x.obs_names if (force_dense or to_df) and self.is_sparse: - x = x.X.A + x = todense(x.X) else: x = x.X @@ -992,7 +994,7 @@ def to_csr(self): def to_dense(self): if self.is_sparse: - self._adata.X = self._adata.X.A + self._adata.X = todense(self._adata.X) def to_sparse(self, mode="csr"): @@ -1034,16 +1036,10 @@ def _make_idx_str(df): df.columns = df.columns.astype(str) def _counts(self, axis=None): - if self.is_sparse: - return self._adata.X.sum(axis=axis).A.flatten() - else: - return self._adata.X.sum(axis=axis) + return todense(self._adata.X.sum(axis=axis)).ravel() def _means(self, axis=None): - if self.is_sparse: - return self._adata.X.mean(axis=axis).A.flatten() - else: - return self._adata.X.mean(axis=axis) + return todense(self._adata.X.mean(axis=axis)).ravel() def _vars(self, axis=None, ddof=1): if self.is_sparse: diff --git a/inferelator/utils/sparse.py b/inferelator/utils/sparse.py new file mode 100644 index 00000000..89bee594 --- /dev/null +++ b/inferelator/utils/sparse.py @@ -0,0 +1,21 @@ +import scipy.sparse as sps + + +def todense(sarr): + + if ( + sps.issparse(sarr) or + sps.isspmatrix(sarr) + ): + + try: + sarr = sarr.todense() + except AttributeError: + pass + + try: + sarr = sarr.A + except AttributeError: + pass + + return sarr