Merge branch 'main' into no-positional-bools

scverse · Oct 21, 2024 · 2854e0a · 2854e0a
2 parents 042a5e9 + 3260222
commit 2854e0a
Show file tree

Hide file tree

Showing 29 changed files with 206 additions and 177 deletions.
diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
@@ -18,14 +18,14 @@ jobs:
           python.version: "3.12"
           RUN_COVERAGE: yes
           TEST_TYPE: "coverage"
-        Python3.9:
-          python.version: "3.9"
+        Python3.10:
+          python.version: "3.10"
         PreRelease:
           python.version: "3.12"
           DEPENDENCIES_VERSION: "pre-release"
           TEST_TYPE: "strict-warning"
         minimum_versions:
-          python.version: "3.9"
+          python.version: "3.10"
           DEPENDENCIES_VERSION: "minimum"
           TEST_TYPE: "coverage"
     steps:

diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml
@@ -51,10 +51,20 @@ jobs:
       - name: Nvidia SMI sanity check
         run: nvidia-smi
 
+      - name: Install yq
+        run: |
+          sudo snap install yq
+
+      - name: Extract max Python version from classifiers
+        run: |
+          classifiers=$(yq .project.classifiers pyproject.toml -oy  | grep --only-matching --perl-regexp '(?<=Python :: )(\d\.\d+)')
+          max_version=$(echo "$classifiers" | sort -V | tail -1)
+          echo "max_python_version=$max_version" >> $GITHUB_ENV
+
       - name: Install Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.x"
+          python-version: ${{ env.max_python_version }}
 
       - name: Install UV
         uses: hynek/setup-cached-uv@v2

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.7
+    rev: v0.6.8
     hooks:
       - id: ruff
         types_or: [python, pyi, jupyter]

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 [![PyPI](https://img.shields.io/pypi/v/anndata.svg)](https://pypi.org/project/anndata)
 [![Downloads](https://static.pepy.tech/badge/anndata/month)](https://pepy.tech/project/anndata)
 [![Downloads](https://static.pepy.tech/badge/anndata)](https://pepy.tech/project/anndata)
-[![Stars](https://img.shields.io/github/stars/scverse/anndata?logo=GitHub&color=yellow)](https://github.com/scverse/anndata/stargazers)
+[![Stars](https://img.shields.io/github/stars/scverse/anndata?style=flat&logo=github&color=yellow)](https://github.com/scverse/anndata/stargazers)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org)
 
 <img
@@ -52,13 +52,13 @@ That is, if something is missing from our public API as documented, for example
 
 ## Citation
 
-If you use `anndata` in your work, please cite the `anndata` pre-print as follows:
+If you use `anndata` in your work, please cite the `anndata` publication as follows:
 
 > **anndata: Annotated data**
 >
 > Isaac Virshup, Sergei Rybakov, Fabian J. Theis, Philipp Angerer, F. Alexander Wolf
 >
-> _bioRxiv_ 2021 Dec 19. doi: [10.1101/2021.12.16.473007](https://doi.org/10.1101/2021.12.16.473007).
+> _JOSS_ 2024 Sep 16. doi: [10.21105/joss.04371](https://doi.org/10.21105/joss.04371).
 
 You can cite the scverse publication as follows:
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -121,17 +121,17 @@ def setup(app: Sphinx):
 
 
 intersphinx_mapping = dict(
-    h5py=("https://docs.h5py.org/en/latest/", None),
-    hdf5plugin=("https://hdf5plugin.readthedocs.io/en/latest/", None),
-    loompy=("https://linnarssonlab.org/loompy/", None),
-    numpy=("https://numpy.org/doc/stable/", None),
-    pandas=("https://pandas.pydata.org/pandas-docs/stable/", None),
+    h5py=("https://docs.h5py.org/en/latest", None),
+    hdf5plugin=("https://hdf5plugin.readthedocs.io/en/latest", None),
+    loompy=("https://linnarssonlab.org/loompy", None),
+    numpy=("https://numpy.org/doc/stable", None),
+    pandas=("https://pandas.pydata.org/pandas-docs/stable", None),
     python=("https://docs.python.org/3", None),
-    scipy=("https://docs.scipy.org/doc/scipy/", None),
-    sklearn=("https://scikit-learn.org/stable/", None),
-    zarr=("https://zarr.readthedocs.io/en/stable/", None),
-    xarray=("https://docs.xarray.dev/en/stable/", None),
-    dask=("https://docs.dask.org/en/stable/", None),
+    scipy=("https://docs.scipy.org/doc/scipy", None),
+    sklearn=("https://scikit-learn.org/stable", None),
+    zarr=("https://zarr.readthedocs.io/en/stable", None),
+    xarray=("https://docs.xarray.dev/en/stable", None),
+    dask=("https://docs.dask.org/en/stable", None),
 )
 qualname_overrides = {
     "h5py._hl.group.Group": "h5py.Group",

diff --git a/docs/release-notes/0.11.0rc1.md b/docs/release-notes/0.11.0rc1.md
@@ -18,10 +18,10 @@
 #### Features
 
 - `scipy.sparse.csr_array` and `scipy.sparse.csc_array` are now supported when constructing `AnnData` objects {user}`ilan-gold` {user}`isaac-virshup` ({pr}`1028`)
-- Allow `axis` parameter of e.g. :func:`anndata.concat` to accept `'obs'` and `'var'` {user}`flying-sheep` ({pr}`1244`)
+- Allow `axis` parameter of e.g. {func}`anndata.concat` to accept `'obs'` and `'var'` {user}`flying-sheep` ({pr}`1244`)
 - Add `settings` object with methods for altering internally-used options, like checking for uniqueness on `obs`' index {user}`ilan-gold` ({pr}`1270`)
 - Add {attr}`~anndata.settings.remove_unused_categories` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold` ({pr}`1340`)
-- Add :func:`~anndata.experimental.read_elem_as_dask` function to handle i/o with sparse and dense arrays {user}`ilan-gold` ({pr}`1469`)
+- Add {func}`~anndata.experimental.read_elem_as_dask` function to handle i/o with sparse and dense arrays {user}`ilan-gold` ({pr}`1469`)
 - Add ability to convert strings to categoricals on write in {meth}`~anndata.AnnData.write_h5ad` and {meth}`~anndata.AnnData.write_zarr` via `convert_strings_to_categoricals` parameter {user}` falexwolf` ({pr}`1474`)
 - Add {attr}`~anndata.settings.check_uniqueness` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold` ({pr}`1507`)
 - Add functionality to write from GPU {class}`dask.array.Array` to disk {user}`ilan-gold` ({pr}`1550`)

diff --git a/docs/release-notes/0.11.0rc3.md b/docs/release-notes/0.11.0rc3.md
@@ -0,0 +1,6 @@
+(v0.11.0rc3)=
+### 0.11.0rc3 {small}`2024-10-14`
+
+### Breaking changes
+
+- Drop support for `python` 3.9 {user}`ilan-gold` ({pr}`1712`)
diff --git a/docs/release-notes/1719.bugfix.md b/docs/release-notes/1719.bugfix.md
@@ -0,0 +1 @@
+Ensure {func}`anndata.concat` of {class}`~anndata.AnnData` object with {class}`scipy.sparse.spmatrix` and {class}`scipy.sparse.sparray` dask arrays uses the correct fill value of 0. {user}`ilan-gold`
diff --git a/hatch.toml b/hatch.toml
@@ -21,7 +21,7 @@ overrides.matrix.deps.env-vars = [
     { key = "UV_RESOLUTION", value = "lowest-direct", if = ["min"] },
 ]
 overrides.matrix.deps.python = [
-    { if = ["min"], value = "3.9" },
+    { if = ["min"], value = "3.10" },
     { if = ["stable", "pre"], value = "3.12" },
 ]
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ requires = ["hatchling", "hatch-vcs"]
 [project]
 name = "anndata"
 description = "Annotated data."
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = "BSD-3-Clause"
 authors = [
     { name = "Philipp Angerer" },
@@ -29,7 +29,6 @@ classifiers = [
     "Operating System :: Microsoft :: Windows",
     "Operating System :: POSIX :: Linux",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
@@ -43,7 +42,7 @@ dependencies = [
     "numpy>=1.23",
     # https://github.com/scverse/anndata/issues/1434
     "scipy >1.8",
-    "h5py>=3.1",
+    "h5py>=3.6",
     "exceptiongroup; python_version<'3.11'",
     "natsort",
     "packaging>=20.0",

diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py
@@ -5,7 +5,7 @@
 from collections.abc import MutableMapping, Sequence
 from copy import copy
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Generic, TypeVar, Union
+from typing import TYPE_CHECKING, Generic, TypeVar
 
 import numpy as np
 import pandas as pd
@@ -33,10 +33,10 @@
     from .raw import Raw
 
 
-OneDIdx = Union[Sequence[int], Sequence[bool], slice]
+OneDIdx = Sequence[int] | Sequence[bool] | slice
 TwoDIdx = tuple[OneDIdx, OneDIdx]
 # TODO: pd.DataFrame only allowed in AxisArrays?
-Value = Union[pd.DataFrame, spmatrix, np.ndarray]
+Value = pd.DataFrame | spmatrix | np.ndarray
 
 P = TypeVar("P", bound="AlignedMappingBase")
 """Parent mapping an AlignedView is based on."""
@@ -376,9 +376,14 @@ class PairwiseArraysView(AlignedView[PairwiseArraysBase, OneDIdx], PairwiseArray
 PairwiseArraysBase._actual_class = PairwiseArrays
 
 
-AlignedMapping = Union[
-    AxisArrays, AxisArraysView, Layers, LayersView, PairwiseArrays, PairwiseArraysView
-]
+AlignedMapping = (
+    AxisArrays
+    | AxisArraysView
+    | Layers
+    | LayersView
+    | PairwiseArrays
+    | PairwiseArraysView
+)
 T = TypeVar("T", bound=AlignedMapping)
 """Pair of types to be aligned."""
 
@@ -408,9 +413,7 @@ def fget(self) -> Callable[[], None]:
 
         def fake(): ...
 
-        fake.__annotations__ = {
-            "return": Union[self.cls._actual_class, self.cls._view_class]
-        }
+        fake.__annotations__ = {"return": self.cls._actual_class | self.cls._view_class}
         return fake
 
     def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T:

diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py
@@ -283,12 +283,12 @@ def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
                 "that is, you cannot make a view of a view."
             )
         self._is_view = True
-        if isinstance(oidx, (int, np.integer)):
+        if isinstance(oidx, int | np.integer):
             if not (-adata_ref.n_obs <= oidx < adata_ref.n_obs):
                 raise IndexError(f"Observation index `{oidx}` is out of range.")
             oidx += adata_ref.n_obs * (oidx < 0)
             oidx = slice(oidx, oidx + 1, 1)
-        if isinstance(vidx, (int, np.integer)):
+        if isinstance(vidx, int | np.integer):
             if not (-adata_ref.n_vars <= vidx < adata_ref.n_vars):
                 raise IndexError(f"Variable index `{vidx}` is out of range.")
             vidx += adata_ref.n_vars * (vidx < 0)
@@ -417,7 +417,7 @@ def _init_as_actual(
                     #       as in readwrite.read_10x_h5
                     if X.dtype != np.dtype(dtype):
                         X = X.astype(dtype)
-                elif isinstance(X, (ZarrArray, DaskArray)):
+                elif isinstance(X, ZarrArray | DaskArray):
                     X = X.astype(dtype)
                 else:  # is np.ndarray or a subclass, convert to true np.ndarray
                     X = np.asarray(X, dtype)
@@ -777,16 +777,14 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index:
             raise ValueError(
                 f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}"
             )
-        if isinstance(value, pd.Index) and not isinstance(
-            value.name, (str, type(None))
-        ):
+        if isinstance(value, pd.Index) and not isinstance(value.name, str | type(None)):
             raise ValueError(
                 f"AnnData expects .{attr}.index.name to be a string or None, "
                 f"but you passed a name of type {type(value.name).__name__!r}"
             )
         else:
             value = pd.Index(value)
-            if not isinstance(value.name, (str, type(None))):
+            if not isinstance(value.name, str | type(None)):
                 value.name = None
         if (
             len(value) > 0
@@ -1995,7 +1993,7 @@ def chunk_X(
         if isinstance(select, int):
             select = select if select < self.n_obs else self.n_obs
             choice = np.random.choice(self.n_obs, select, replace)
-        elif isinstance(select, (np.ndarray, Sequence)):
+        elif isinstance(select, np.ndarray | Sequence):
             choice = np.asarray(select)
         else:
             raise ValueError("select should be int or array")

diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py
@@ -70,25 +70,25 @@ def name_idx(i):
             stop = None if stop is None else stop + 1
         step = indexer.step
         return slice(start, stop, step)
-    elif isinstance(indexer, (np.integer, int)):
+    elif isinstance(indexer, np.integer | int):
         return indexer
     elif isinstance(indexer, str):
         return index.get_loc(indexer)  # int
     elif isinstance(
-        indexer, (Sequence, np.ndarray, pd.Index, spmatrix, np.matrix, SpArray)
+        indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | SpArray
     ):
         if hasattr(indexer, "shape") and (
             (indexer.shape == (index.shape[0], 1))
             or (indexer.shape == (1, index.shape[0]))
         ):
-            if isinstance(indexer, (spmatrix, SpArray)):
+            if isinstance(indexer, spmatrix | SpArray):
                 indexer = indexer.toarray()
             indexer = np.ravel(indexer)
-        if not isinstance(indexer, (np.ndarray, pd.Index)):
+        if not isinstance(indexer, np.ndarray | pd.Index):
             indexer = np.array(indexer)
             if len(indexer) == 0:
                 indexer = indexer.astype(int)
-        if issubclass(indexer.dtype.type, (np.integer, np.floating)):
+        if issubclass(indexer.dtype.type, np.integer | np.floating):
             return indexer  # Might not work for range indexes
         elif issubclass(indexer.dtype.type, np.bool_):
             if indexer.shape != index.shape:

diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py
@@ -174,7 +174,7 @@ def equal_sparse(a, b) -> bool:
 
     xp = array_api_compat.array_namespace(a.data)
 
-    if isinstance(b, (CupySparseMatrix, sparse.spmatrix, SpArray)):
+    if isinstance(b, CupySparseMatrix | sparse.spmatrix | SpArray):
         if isinstance(a, CupySparseMatrix):
             # Comparison broken for CSC matrices
             # https://github.com/cupy/cupy/issues/7757
@@ -206,7 +206,7 @@ def equal_awkward(a, b) -> bool:
 
 
 def as_sparse(x, *, use_sparse_array=False):
-    if not isinstance(x, (sparse.spmatrix, SpArray)):
+    if not isinstance(x, sparse.spmatrix | SpArray):
         if CAN_USE_SPARSE_ARRAY and use_sparse_array:
             return sparse.csr_array(x)
         return sparse.csr_matrix(x)
@@ -536,7 +536,7 @@ def apply(self, el, *, axis, fill_value=None):
             return el
         if isinstance(el, pd.DataFrame):
             return self._apply_to_df(el, axis=axis, fill_value=fill_value)
-        elif isinstance(el, (sparse.spmatrix, SpArray, CupySparseMatrix)):
+        elif isinstance(el, sparse.spmatrix | SpArray | CupySparseMatrix):
             return self._apply_to_sparse(el, axis=axis, fill_value=fill_value)
         elif isinstance(el, AwkArray):
             return self._apply_to_awkward(el, axis=axis, fill_value=fill_value)
@@ -723,7 +723,14 @@ def default_fill_value(els):
 
     This is largely due to backwards compat, and might not be the ideal solution.
     """
-    if any(isinstance(el, (sparse.spmatrix, SpArray)) for el in els):
+    if any(
+        isinstance(el, sparse.spmatrix | SpArray)
+        or (
+            isinstance(el, DaskArray)
+            and isinstance(el._meta, sparse.spmatrix | SpArray)
+        )
+        for el in els
+    ):
         return 0
     else:
         return np.nan
@@ -794,7 +801,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
         import cupyx.scipy.sparse as cpsparse
 
         if not all(
-            isinstance(a, (CupySparseMatrix, CupyArray)) or 0 in a.shape for a in arrays
+            isinstance(a, CupySparseMatrix | CupyArray) or 0 in a.shape for a in arrays
         ):
             raise NotImplementedError(
                 "Cannot concatenate a cupy array with other array types."
@@ -821,7 +828,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
             ],
             axis=axis,
         )
-    elif any(isinstance(a, (sparse.spmatrix, SpArray)) for a in arrays):
+    elif any(isinstance(a, sparse.spmatrix | SpArray) for a in arrays):
         sparse_stack = (sparse.vstack, sparse.hstack)[axis]
         use_sparse_array = any(issubclass(type(a), SpArray) for a in arrays)
         return sparse_stack(
@@ -980,7 +987,7 @@ def concat_pairwise_mapping(
         els = [
             m.get(k, sparse_class((s, s), dtype=bool)) for m, s in zip(mappings, shapes)
         ]
-        if all(isinstance(el, (CupySparseMatrix, CupyArray)) for el in els):
+        if all(isinstance(el, CupySparseMatrix | CupyArray) for el in els):
             result[k] = _cp_block_diag(els, format="csr")
         elif all(isinstance(el, DaskArray) for el in els):
             result[k] = _dask_block_diag(els)

diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py
@@ -40,7 +40,7 @@ def __init__(
         # construct manually
         if adata.isbacked == (X is None):
             # Move from GPU to CPU since it's large and not always used
-            if isinstance(X, (CupyArray, CupySparseMatrix)):
+            if isinstance(X, CupyArray | CupySparseMatrix):
                 self._X = X.get()
             else:
                 self._X = X
@@ -51,7 +51,7 @@ def __init__(
             self.varm = varm
         elif X is None:  # construct from adata
             # Move from GPU to CPU since it's large and not always used
-            if isinstance(adata.X, (CupyArray, CupySparseMatrix)):
+            if isinstance(adata.X, CupyArray | CupySparseMatrix):
                 self._X = adata.X.get()
             else:
                 self._X = adata.X.copy()
@@ -124,9 +124,9 @@ def __getitem__(self, index):
         oidx, vidx = self._normalize_indices(index)
 
         # To preserve two dimensional shape
-        if isinstance(vidx, (int, np.integer)):
+        if isinstance(vidx, int | np.integer):
             vidx = slice(vidx, vidx + 1, 1)
-        if isinstance(oidx, (int, np.integer)):
+        if isinstance(oidx, int | np.integer):
             oidx = slice(oidx, oidx + 1, 1)
 
         if not self._adata.isbacked: