From c497c07a29edb31a66fe0285ad859adf5e78f96c Mon Sep 17 00:00:00 2001 From: Alvaro Martinez Barrio Date: Thu, 18 Jul 2024 08:47:49 +0200 Subject: [PATCH 01/18] fix: citations to the nat methods paper in the repo --- CITATIONS.md | 2 +- README.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 9f77e8f4..6ba2930c 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -4,7 +4,7 @@ - [pixelator](https://doi.org/10.1101/2023.06.05.543770) - > Karlsson, Filip, Tomasz Kallas, Divya Thiagarajan, Max Karlsson, Maud Schweitzer, Jose Fernandez Navarro, Louise Leijonancker, et al. “Molecular Pixelation: Single Cell Spatial Proteomics by Sequencing.” bioRxiv, June 8, 2023. https://doi.org/10.1101/2023.06.05.543770. + > Karlsson, Filip, Tomasz Kallas, Divya Thiagarajan, Max Karlsson, Maud Schweitzer, Jose Fernandez Navarro, Louise Leijonancker, et al. “Molecular pixelation: spatial proteomics of single cells by sequencing.” Nature Methods, May 8, 2024. https://doi.org/10.1038/s41592-024-02268-9. - [cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) diff --git a/README.md b/README.md index 7b00888c..39a5ae31 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ![python-version](https://img.shields.io/badge/python-3.10%20%7C%203.11-blue) [![MIT](https://img.shields.io/badge/license-MIT-blue)](https://opensource.org/licenses/MIT) -[![DOI](https://img.shields.io/badge/DOI-10.1101/2023.06.05.543770-B31B1B.svg)](https://www.biorxiv.org/content/10.1101/2023.06.05.543770v1) +[![DOI](https://img.shields.io/badge/DOI-10.1038/S41592--024--02268--9-B31B1B.svg)](https://doi.org/10.1038/s41592-024-02268-9) [![conda](https://anaconda.org/bioconda/pixelator/badges/version.svg)](https://bioconda.github.io/recipes/pixelator/README.html#package-pixelator) [![pypi](https://img.shields.io/pypi/v/pixelgen-pixelator)](https://pypi.org/project/pixelgen-pixelator/) [![Tests](https://github.com/PixelgenTechnologies/pixelator/actions/workflows/tests.yml/badge.svg)](https://github.com/PixelgenTechnologies/pixelator/actions/workflows/tests.yml) @@ -113,7 +113,7 @@ Pixelator is developed and maintained by the [developers](https://github.com/Pix When using pixelator in your research, please cite the following publication: -> Karlsson, Filip, Tomasz Kallas, Divya Thiagarajan, Max Karlsson, Maud Schweitzer, Jose Fernandez Navarro, Louise Leijonancker, _et al._ “Molecular Pixelation: Single Cell Spatial Proteomics by Sequencing.” bioRxiv, June 8, 2023. https://doi.org/10.1101/2023.06.05.543770. +> Karlsson, Filip, Tomasz Kallas, Divya Thiagarajan, Max Karlsson, Maud Schweitzer, Jose Fernandez Navarro, Louise Leijonancker, _et al._ "Molecular pixelation: spatial proteomics of single cells by sequencing." Nature Methods, May 8, 2024. https://doi.org/10.1038/s41592-024-02268-9. Main development happened thanks to: From bceb7f42b4988d488d35a7b62d536aa8148667a3 Mon Sep 17 00:00:00 2001 From: fbdtemme Date: Thu, 1 Aug 2024 17:13:52 +0200 Subject: [PATCH 02/18] Do not user user-local site for python packages in dev container --- containers/base.Dockerfile | 9 ++++----- containers/prod.Dockerfile | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/containers/base.Dockerfile b/containers/base.Dockerfile index 206a376e..14f7618a 100644 --- a/containers/base.Dockerfile +++ b/containers/base.Dockerfile @@ -4,7 +4,7 @@ ARG USE_ENTRYPOINT=false ARG MAKEJOBS=4 # Install pixelator dependencies in a separate stage to improve caching -FROM registry.fedoraproject.org/fedora-minimal:39 as runtime-base +FROM registry.fedoraproject.org/fedora-minimal:40 as runtime-base RUN microdnf install -y \ python3.11 \ git \ @@ -49,7 +49,7 @@ RUN microdnf install -y \ && microdnf clean all -# Build Fastp from source +# Build Fastp and isal from source FROM builder-base as build-fastp RUN git clone https://github.com/intel/isa-l.git @@ -132,7 +132,7 @@ FROM runtime-base as runtime-amd64 # Copy both fastp executable and isa-l library COPY --from=build-fastp /usr/local/ /usr/local/ -COPY --from=poetry-deps-install-amd64 /runtime/ /usr/local/ +COPY --from=poetry-deps-install-amd64 /runtime/ /usr/ # ------------------------------------------ # -- Build the runtime environment for arm64 @@ -142,7 +142,7 @@ FROM runtime-base as runtime-arm64 # Copy both fastp executable and isa-l library COPY --from=build-fastp /usr/local/ /usr/local/ -COPY --from=poetry-deps-install-arm64 /runtime/ /usr/local/ +COPY --from=poetry-deps-install-arm64 /runtime/ /usr/ # ------------------------------------------ # -- Build the final image @@ -154,7 +154,6 @@ FROM runtime-${TARGETARCH} as runtime-final # We add this explicitly since nextflow often runs with PYTHONNOUSERSITE set # to fix interference with conda and this can cause problems. # Fastp will also build isal and we need to make that available -ENV PYTHONPATH="$PYTHONPATH:/usr/local/lib/python3.11/site-packages:/usr/local/lib64/python3.11/site-packages" RUN ldconfig /usr/local/lib64 COPY --from=build-pixelator /dist /dist diff --git a/containers/prod.Dockerfile b/containers/prod.Dockerfile index ad138c75..204aa3f0 100644 --- a/containers/prod.Dockerfile +++ b/containers/prod.Dockerfile @@ -11,7 +11,7 @@ LABEL org.opencontainers.image.licenses = "MIT" # Install pixelator dependencies in a separate stage to improve caching FROM runtime AS entrypoint-true -ENTRYPOINT [ "/usr/local/bin/pixelator" ] +ENTRYPOINT [ "/usr/bin/pixelator" ] FROM runtime AS entrypoint-false ENTRYPOINT [] From 4c7292c57d6cf928117beb103e805029e6347710 Mon Sep 17 00:00:00 2001 From: fbdtemme Date: Thu, 1 Aug 2024 18:17:20 +0200 Subject: [PATCH 03/18] Force reinstall user-site packages to avoid missing deps --- containers/base.Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/containers/base.Dockerfile b/containers/base.Dockerfile index 14f7618a..87ce3f67 100644 --- a/containers/base.Dockerfile +++ b/containers/base.Dockerfile @@ -86,10 +86,10 @@ ENV ANNOY_TARGET_VARIANT="${TARGETVARIANT:-v3}" RUN if [ -n "$ANNOY_TARGET_VARIANT" ]; then \ export ANNOY_COMPILER_ARGS="-D_CRT_SECURE_NO_WARNINGS,-DANNOYLIB_MULTITHREADED_BUILD,-march=x86-64-$ANNOY_TARGET_VARIANT"; \ echo "Building Annoy for explicit target $TARGETPLATFORM/$ANNOY_TARGET_VARIANT"; \ - pip3.11 install --prefix=/runtime -r requirements.txt; \ + pip3.11 install -I --prefix=/runtime -r requirements.txt; \ else \ echo "Building Annoy without implicit target $TARGETPLATFORM"; \ - pip3.11 install --prefix=/runtime -r requirements.txt; \ + pip3.11 install -I --prefix=/runtime -r requirements.txt; \ fi \ && rm requirements.txt @@ -101,7 +101,7 @@ COPY poetry.lock pyproject.toml /pixelator/ COPY .git /pixelator/.git RUN poetry export --output requirements.txt --without-hashes --no-interaction --no-ansi -RUN pip3.11 install --prefix=/runtime -r requirements.txt && rm requirements.txt +RUN pip3.11 install -I --prefix=/runtime -r requirements.txt && rm requirements.txt # ------------------------------------------ # -- Build the pixelator package @@ -158,7 +158,7 @@ RUN ldconfig /usr/local/lib64 COPY --from=build-pixelator /dist /dist RUN ls -alh /dist/ -RUN pip3.11 install /dist/*.tar.gz +RUN pip3.11 install --prefix /usr/ /dist/*.tar.gz RUN rm -rf /dist RUN pip3.11 cache purge From f94fc676289ab2af09c4b5c0b9d98c84ebf2fbb2 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Fri, 9 Aug 2024 16:26:17 +0200 Subject: [PATCH 04/18] Fix memory issues in layout aggregation --- src/pixelator/pixeldataset/aggregation.py | 5 +- .../pixeldataset/precomputed_layouts.py | 19 +++-- tests/pixeldataset/test_aggregation.py | 3 +- .../pixeldataset/test_precomputed_layouts.py | 77 +++++++++++-------- 4 files changed, 59 insertions(+), 45 deletions(-) diff --git a/src/pixelator/pixeldataset/aggregation.py b/src/pixelator/pixeldataset/aggregation.py index c4ef5fde..82ad64f8 100644 --- a/src/pixelator/pixeldataset/aggregation.py +++ b/src/pixelator/pixeldataset/aggregation.py @@ -148,10 +148,7 @@ def _add_sample_name_as_obs_col(adata, name): } precomputed_layouts = aggregate_precomputed_layouts( - [ - (name, dataset.precomputed_layouts) - for name, dataset in zip(sample_names, datasets) - ], + [(name, dataset) for name, dataset in zip(sample_names, datasets)], all_markers=set(datasets[0].adata.var.index), ) diff --git a/src/pixelator/pixeldataset/precomputed_layouts.py b/src/pixelator/pixeldataset/precomputed_layouts.py index 78f0f052..5d64226c 100644 --- a/src/pixelator/pixeldataset/precomputed_layouts.py +++ b/src/pixelator/pixeldataset/precomputed_layouts.py @@ -315,13 +315,17 @@ def component_iterator( else: unique_components = self._convert_to_set(component_ids) # type: ignore - for component_id in unique_components: + # We read in batches since it makes the read operations slightly + # faster than iterating them one at the time + for component_ids in batched(unique_components, 20): data = self.filter( - component_ids=component_id, + component_ids=component_ids, graph_projection=graph_projections, layout_method=layout_methods, ) - yield data.to_df(columns) + + for _, df in data.lazy.collect().group_by("component"): + yield df.select(columns if columns else pl.all()).to_pandas() @staticmethod def _convert_to_set( @@ -339,7 +343,7 @@ def copy(self) -> PreComputedLayouts: def aggregate_precomputed_layouts( - precomputed_layouts: Iterable[tuple[str, PreComputedLayouts | None]], + pxl_datasets: Iterable[tuple[str, PixelDataset | None]], all_markers: set[str], ) -> PreComputedLayouts: """Aggregate precomputed layouts into a single PreComputedLayouts instance.""" @@ -347,13 +351,14 @@ def aggregate_precomputed_layouts( def zero_fill_missing_markers( lazyframe: pl.LazyFrame, all_markers: set[str] ) -> pl.LazyFrame: - missing_markers = all_markers - set(lazyframe.columns) + missing_markers = all_markers - set(lazyframe.collect_schema().names()) return lazyframe.with_columns( **{marker: pl.lit(0) for marker in missing_markers} ) def data(): - for sample_name, layout in precomputed_layouts: + for sample_name, pxl_dataset in pxl_datasets: + layout = pxl_dataset.precomputed_layouts if layout is None: continue if layout.is_empty: @@ -365,7 +370,7 @@ def data(): try: return PreComputedLayouts( - pl.concat(data(), rechunk=True).collect(), + pl.concat(data()), partitioning=["sample"] + PreComputedLayouts.DEFAULT_PARTITIONING, ) except ValueError: diff --git a/tests/pixeldataset/test_aggregation.py b/tests/pixeldataset/test_aggregation.py index a3c2b2f1..35a1bddf 100644 --- a/tests/pixeldataset/test_aggregation.py +++ b/tests/pixeldataset/test_aggregation.py @@ -200,9 +200,10 @@ def test_simple_aggregate_do_not_have_problems_with_layouts_when_working_with_fi dataset_1.save(tmp_data_set_path_1) dataset_1.save(tmp_data_set_path_2) + datasets = list([read(tmp_data_set_path_1), read(tmp_data_set_path_2)]) result = simple_aggregate( sample_names=["sample1", "sample2"], - datasets=[read(tmp_data_set_path_1), read(tmp_data_set_path_2)], + datasets=datasets, ) assert len(result.precomputed_layouts.to_df()) == 2 * len( diff --git a/tests/pixeldataset/test_precomputed_layouts.py b/tests/pixeldataset/test_precomputed_layouts.py index d9d2da65..25a29fbe 100644 --- a/tests/pixeldataset/test_precomputed_layouts.py +++ b/tests/pixeldataset/test_precomputed_layouts.py @@ -98,6 +98,11 @@ def precomputed_layouts_fixture(request) -> PreComputedLayouts: raise Exception("We should never get here!") +class MockPixelDataset: + def __init__(self, precomputed_layouts): + self.precomputed_layouts = precomputed_layouts + + class TestPreComputedLayouts: def test_is_empty_returns_true_for_empty_layout(self): layouts_lazy = pl.DataFrame({"component": []}).lazy() @@ -285,30 +290,34 @@ def test_iterator_returns_filtered_dataframes_and_requested_columns(self): def test_aggregate_precomputed_layouts(self): # Create some sample PreComputedLayouts - layout1 = PreComputedLayouts( - pl.DataFrame( - { - "x": [1, 2, 3], - "y": [4, 5, 6], - "component": ["A", "B", "C"], - "sample": ["sample1", "sample1", "sample1"], - } - ).lazy() + pxl_1 = MockPixelDataset( + PreComputedLayouts( + pl.DataFrame( + { + "x": [1, 2, 3], + "y": [4, 5, 6], + "component": ["A", "B", "C"], + "sample": ["sample1", "sample1", "sample1"], + } + ).lazy() + ) ) - layout2 = PreComputedLayouts( - pl.DataFrame( - { - "x": [7, 8, 9], - "y": [10, 11, 12], - "component": ["A", "B", "C"], - "sample": ["sample2", "sample2", "sample2"], - } - ).lazy() + pxl_2 = MockPixelDataset( + PreComputedLayouts( + pl.DataFrame( + { + "x": [7, 8, 9], + "y": [10, 11, 12], + "component": ["A", "B", "C"], + "sample": ["sample2", "sample2", "sample2"], + } + ).lazy() + ) ) # Aggregate the layouts aggregated_layouts = aggregate_precomputed_layouts( - [("sample1", layout1), ("sample2", layout2)], + [("sample1", pxl_1), ("sample2", pxl_2)], all_markers={"x", "y", "component", "sample"}, ) @@ -332,21 +341,23 @@ def test_aggregate_precomputed_layouts(self): def test_aggregate_precomputed_layouts_one_empty_data_frame(self): # Create some sample PreComputedLayouts - layout1 = PreComputedLayouts(None) - layout2 = PreComputedLayouts( - pl.DataFrame( - { - "x": [7, 8, 9], - "y": [10, 11, 12], - "component": ["A", "B", "C"], - "sample": ["sample2", "sample2", "sample2"], - } - ).lazy() + mock_pxl_1 = MockPixelDataset(PreComputedLayouts(None)) + mock_pxl_2 = MockPixelDataset( + PreComputedLayouts( + pl.DataFrame( + { + "x": [7, 8, 9], + "y": [10, 11, 12], + "component": ["A", "B", "C"], + "sample": ["sample2", "sample2", "sample2"], + } + ).lazy() + ) ) # Aggregate the layouts aggregated_layouts = aggregate_precomputed_layouts( - [("sample1", layout1), ("sample2", layout2)], + [("sample1", mock_pxl_1), ("sample2", mock_pxl_2)], all_markers={"x", "y", "component", "sample"}, ) @@ -367,12 +378,12 @@ def test_aggregate_precomputed_layouts_one_empty_data_frame(self): def test_aggregate_precomputed_layouts_no_layouts_in_data(self): # Create some sample PreComputedLayouts - layout1 = PreComputedLayouts(None) - layout2 = PreComputedLayouts(None) + mock_pxl_1 = MockPixelDataset(PreComputedLayouts(None)) + mock_pxl_2 = MockPixelDataset(PreComputedLayouts(None)) # Aggregate the layouts aggregated_layouts = aggregate_precomputed_layouts( - [("sample1", layout1), ("sample2", layout2)], + [("sample1", mock_pxl_1), ("sample2", mock_pxl_2)], all_markers={"x", "y", "component", "sample"}, ) From c6cab983bf22efd5b3f5bf1ba1ec444ff5e9c09e Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Mon, 12 Aug 2024 11:58:05 +0200 Subject: [PATCH 05/18] Make component name depend on sample --- src/pixelator/pixeldataset/precomputed_layouts.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pixelator/pixeldataset/precomputed_layouts.py b/src/pixelator/pixeldataset/precomputed_layouts.py index 5d64226c..49de6934 100644 --- a/src/pixelator/pixeldataset/precomputed_layouts.py +++ b/src/pixelator/pixeldataset/precomputed_layouts.py @@ -364,7 +364,10 @@ def data(): if layout.is_empty: continue layout_with_name = layout.lazy.with_columns( - sample=pl.lit(sample_name) + sample=pl.lit(sample_name), + component=pl.concat_str( + pl.col("component"), pl.lit(sample_name), separator="_" + ), ).pipe(zero_fill_missing_markers, all_markers=all_markers) yield layout_with_name From 0dc367a138aae68cf3c8b297f379b3e5f2f53dbf Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Mon, 12 Aug 2024 16:31:32 +0200 Subject: [PATCH 06/18] Can write one sample at the time --- src/pixelator/pixeldataset/datastores.py | 45 +++++++++++-------- .../pixeldataset/precomputed_layouts.py | 34 +++++++++++++- .../pixeldataset/test_precomputed_layouts.py | 11 ++++- 3 files changed, 69 insertions(+), 21 deletions(-) diff --git a/src/pixelator/pixeldataset/datastores.py b/src/pixelator/pixeldataset/datastores.py index ad44444a..695a5861 100644 --- a/src/pixelator/pixeldataset/datastores.py +++ b/src/pixelator/pixeldataset/datastores.py @@ -464,30 +464,39 @@ def write_precomputed_layouts( logger.debug("No layouts to write, will skip.") return - self._check_if_writeable(self.LAYOUTS_KEY) - logger.debug("Starting to write layouts...") # This option is in place to allow collecting all the layouts into # as single dataframe before writing (they will still be written into # partitions), but this is much faster than writing them one by one # for scenarios with many very small layouts. - if collapse_to_single_dataframe: - logger.debug("Writing from a single dataframe...") - self.write_dataframe( - layouts.to_df(), - self.LAYOUTS_KEY, - partitioning=PreComputedLayouts.DEFAULT_PARTITIONING, + + from tempfile import TemporaryDirectory + + self._set_to_write_mode() + self._check_if_writeable(self.LAYOUTS_KEY) + with TemporaryDirectory(prefix="pixelator-") as tmp_dir: + tmp_path = Path(tmp_dir) + local_tmp_target = tmp_path / "local.layouts.parquet" + layouts.write_parquet(local_tmp_target, partitioning=layouts.partitioning) + pa_dataset = ds.dataset(local_tmp_target, partitioning="hive") + local_tmp_target = tmp_path / "local.partitioned.layouts.parquet" + ds.write_dataset( + pa_dataset, + local_tmp_target, + format="parquet", + partitioning_flavor="hive", + partitioning=layouts.partitioning, + use_threads=False, + existing_data_behavior="overwrite_or_ignore", ) - else: - logger.debug("Writing by iterating components...") - for idx, layouts_to_write in enumerate(layouts.component_iterator()): - if idx % 100 == 0: - logger.debug("Writing layouts...") - self.write_dataframe( - layouts_to_write, - self.LAYOUTS_KEY, - partitioning=PreComputedLayouts.DEFAULT_PARTITIONING, - ) + + for file_ in local_tmp_target.rglob("*"): + if file_.is_file(): + # TODO Make sure written without compression + file_name = file_.relative_to(local_tmp_target) + self._file_system.zip.write( + file_, arcname=f"{self.LAYOUTS_KEY}/{file_name}" + ) logger.debug("Completed writing layouts...") diff --git a/src/pixelator/pixeldataset/precomputed_layouts.py b/src/pixelator/pixeldataset/precomputed_layouts.py index 49de6934..256a83bd 100644 --- a/src/pixelator/pixeldataset/precomputed_layouts.py +++ b/src/pixelator/pixeldataset/precomputed_layouts.py @@ -10,6 +10,7 @@ from __future__ import annotations +from pathlib import Path from typing import TYPE_CHECKING, Iterable, Optional, Protocol import pandas as pd @@ -49,12 +50,20 @@ def filter( layout_methods: str | set[str] | None = None, ) -> pl.LazyFrame | list[pl.LazyFrame]: ... + def write_parquet(self, path: Path, partitioning: list[str]) -> None: + """Write a parquet file to the provided path.""" + ... + class _EmptyDataProvider(_DataProvider): def __init__(self) -> None: # This class needs no parameters pass + def write_parquet(self, path: Path, partitioning: list[str]) -> None: + """Write a parquet file to the provided path.""" + return + def is_empty(self) -> bool: return True @@ -88,6 +97,16 @@ def to_df(self, columns: list[str] | None = None) -> pd.DataFrame: return self._lazy_frame.select(columns).collect().to_pandas() return self.lazy().collect().to_pandas() + def write_parquet(self, path: Path, partitioning: list[str]) -> None: + """Write a parquet file to the provided path.""" + self.lazy().collect().write_parquet( + path, + use_pyarrow=True, + pyarrow_options={ + "partition_cols": partitioning, + }, + ) + def lazy(self): return self._lazy_frame @@ -192,6 +211,15 @@ def data(): return list(data()) + def write_parquet(self, path: Path, partitioning: list[str]) -> None: + """Write a parquet file to the provided path.""" + for frame in self._lazy_frames: + frame.collect(streaming=True).write_parquet( + path, + use_pyarrow=True, + pyarrow_options={"partition_cols": partitioning}, + ) + class PreComputedLayouts: """Pre-computed layouts for a set of graphs, per component.""" @@ -252,6 +280,10 @@ def partitioning(self) -> list[str]: """ return self._partitioning + def write_parquet(self, path: Path, partitioning: list[str]) -> None: + """Write a parquet file to the provided path.""" + self._data_provider.write_parquet(path, partitioning) + def unique_components(self) -> set[str]: """Return the unique components in the layouts.""" return self._data_provider.unique_components() @@ -373,7 +405,7 @@ def data(): try: return PreComputedLayouts( - pl.concat(data()), + data(), partitioning=["sample"] + PreComputedLayouts.DEFAULT_PARTITIONING, ) except ValueError: diff --git a/tests/pixeldataset/test_precomputed_layouts.py b/tests/pixeldataset/test_precomputed_layouts.py index 25a29fbe..522b619d 100644 --- a/tests/pixeldataset/test_precomputed_layouts.py +++ b/tests/pixeldataset/test_precomputed_layouts.py @@ -326,7 +326,14 @@ def test_aggregate_precomputed_layouts(self): { "x": [1, 2, 3, 7, 8, 9], "y": [4, 5, 6, 10, 11, 12], - "component": ["A", "B", "C", "A", "B", "C"], + "component": [ + "A_sample1", + "B_sample1", + "C_sample1", + "A_sample2", + "B_sample2", + "C_sample2", + ], "sample": [ "sample1", "sample1", @@ -366,7 +373,7 @@ def test_aggregate_precomputed_layouts_one_empty_data_frame(self): { "x": [7, 8, 9], "y": [10, 11, 12], - "component": ["A", "B", "C"], + "component": ["A_sample2", "B_sample2", "C_sample2"], "sample": [ "sample2", "sample2", From 0bcabbd21b8855cab728242e65d613322f3b1c82 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Wed, 14 Aug 2024 13:24:29 +0200 Subject: [PATCH 07/18] Adding custom zip file system, better performance --- src/pixelator/pixeldataset/backends.py | 7 +-- src/pixelator/pixeldataset/datastores.py | 63 +++++++++++++++++++++++- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/src/pixelator/pixeldataset/backends.py b/src/pixelator/pixeldataset/backends.py index 3a36fc93..4c65193a 100644 --- a/src/pixelator/pixeldataset/backends.py +++ b/src/pixelator/pixeldataset/backends.py @@ -284,14 +284,15 @@ def precomputed_layouts(self) -> PreComputedLayouts | None: """Get the precomputed layouts.""" # If it is None it means it is uninitialized, and we should # attempt to read it lazily + if self._precomputed_layouts is None: - return self._datastore.read_precomputed_layouts() + self._precomputed_layouts = self._datastore.read_precomputed_layouts() # It can also be empty, in which case it has been read and # found to be empty. Or it has been initialized to be empty, # which means that it should be cleared. - if self._precomputed_layouts.is_empty: - return None + if self._precomputed_layouts.is_empty: # type: ignore + self._precomputed_layouts = PreComputedLayouts.create_empty() return self._precomputed_layouts diff --git a/src/pixelator/pixeldataset/datastores.py b/src/pixelator/pixeldataset/datastores.py index 695a5861..b62cf781 100644 --- a/src/pixelator/pixeldataset/datastores.py +++ b/src/pixelator/pixeldataset/datastores.py @@ -267,6 +267,59 @@ def write_precomputed_layouts( ... +class _CustomZipFileSystem(ZipFileSystem): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + # TODO Handle details True/False + # TODO Add tests to make sure we have parity with the original implementation + + result = {} + + def _below_max_recursion_depth(path): + if not maxdepth: + return True + + depth = len(path.split("/")) + return depth <= maxdepth + + for zip_info in self.zip.infolist(): + file_name = zip_info.filename + if not file_name.startswith(path): + continue + + # zip files can contain explicit or implicit directories + # hence the need to either add them directly or infer them + # from the file paths + if zip_info.is_dir() and withdirs: + if not result.get(file_name) and _below_max_recursion_depth(file_name): + result[file_name] = self.info(file_name) + continue + + if not result.get(file_name): + if _below_max_recursion_depth(file_name): + result[file_name] = self.info(file_name) + + if withdirs: + directories = file_name.split("/") + for i in range(1, len(directories)): + dir_path = "/".join(directories[:i]) + if not result.get(dir_path) and _below_max_recursion_depth( + dir_path + ): + result[dir_path] = { + "name": dir_path, + "size": 0, + "type": "directory", + } + + return result + + class ZipBasedPixelFile(PixelDataStore): """Superclass for all zip-based pixel data stores.""" @@ -300,7 +353,7 @@ def __del__(self) -> None: self.close() def _setup_file_system(self, mode): - files_system = ZipFileSystem(fo=self.path, mode=mode, allowZip64=True) + files_system = _CustomZipFileSystem(fo=self.path, mode=mode, allowZip64=True) # For now we are overwriting the zip open method to force it to # always have force_zip=True, otherwise it won't work for large file @@ -696,7 +749,13 @@ def _read_dataframe_from_zip(self, key: str) -> Optional[pd.DataFrame]: def _read_dataframe_from_zip_lazy(self, key: str) -> Optional[pl.LazyFrame]: try: self._set_to_read_mode() - dataset = ds.dataset(key, filesystem=self._file_system, partitioning="hive") + dataset = ds.dataset( + key, + filesystem=self._file_system, + partitioning="hive", + format="parquet", + partition_base_dir=key, + ) return pl.scan_pyarrow_dataset(dataset) except FileNotFoundError: return None From 0b6279ba169f04f4c31aafb212079724cb909329 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Wed, 14 Aug 2024 13:58:35 +0200 Subject: [PATCH 08/18] Better handling of precomputed layout setters --- src/pixelator/pixeldataset/__init__.py | 8 ++++++-- src/pixelator/pixeldataset/backends.py | 26 +++++++++--------------- src/pixelator/pixeldataset/datastores.py | 16 ++++----------- tests/pixeldataset/test_backends.py | 2 +- 4 files changed, 21 insertions(+), 31 deletions(-) diff --git a/src/pixelator/pixeldataset/__init__.py b/src/pixelator/pixeldataset/__init__.py index 0880891a..fc9a735b 100644 --- a/src/pixelator/pixeldataset/__init__.py +++ b/src/pixelator/pixeldataset/__init__.py @@ -210,14 +210,18 @@ def metadata(self, value: Dict) -> None: @property def precomputed_layouts( self, - ) -> PreComputedLayouts | None: + ) -> PreComputedLayouts: """Get the precomputed layouts.""" return self._backend.precomputed_layouts @precomputed_layouts.setter def precomputed_layouts(self, value: PreComputedLayouts | None) -> None: """Set the precomputed layouts.""" - self._backend.precomputed_layouts = value + # Note that the type ignore here is to handle the fact that the setter + # needs to be able to take None (in order to make it easier to the user) + # but that will be transformed into a empty PreComputedLayouts object + # by the backend as and when it sees fit. + self._backend.precomputed_layouts = value # type: ignore def graph( self, diff --git a/src/pixelator/pixeldataset/backends.py b/src/pixelator/pixeldataset/backends.py index 4c65193a..d64858f5 100644 --- a/src/pixelator/pixeldataset/backends.py +++ b/src/pixelator/pixeldataset/backends.py @@ -82,7 +82,7 @@ def metadata(self, value: Dict) -> Optional[Dict]: """Set the metadata object.""" @property - def precomputed_layouts(self) -> Optional[PreComputedLayouts]: + def precomputed_layouts(self) -> PreComputedLayouts: """Get the precomputed layouts for the component graphs. Please note that since these have been pre-computed, if you have made @@ -91,12 +91,12 @@ def precomputed_layouts(self) -> Optional[PreComputedLayouts]: ... @precomputed_layouts.setter - def precomputed_layouts(self, value: PreComputedLayouts) -> None: + def precomputed_layouts(self, value: PreComputedLayouts | None) -> None: """Set the precomputed layouts for the component graphs.""" ... -class ObjectBasedPixelDatasetBackend: +class ObjectBasedPixelDatasetBackend(PixelDatasetBackend): """A backend for PixelDataset that is backed by in memory objects. `ObjectBasedPixelDatasetBackend` provides a backend for PixelDatasets that @@ -209,12 +209,10 @@ def colocalization(self, value: pd.DataFrame) -> None: self._colocalization = value @property - def precomputed_layouts(self) -> PreComputedLayouts | None: + def precomputed_layouts(self) -> PreComputedLayouts: """Get the precomputed layouts.""" if self._precomputed_layouts is None: - return None - if self._precomputed_layouts.is_empty: - return None + return PreComputedLayouts.create_empty() return self._precomputed_layouts @precomputed_layouts.setter @@ -225,7 +223,7 @@ def precomputed_layouts(self, value: PreComputedLayouts | None) -> None: self._precomputed_layouts = value -class FileBasedPixelDatasetBackend: +class FileBasedPixelDatasetBackend(PixelDatasetBackend): """A file based backend for PixelDataset. `FileBasedPixelDatasetBackend` is used to lazily fetch information from @@ -280,20 +278,16 @@ def metadata(self) -> Optional[Dict]: return self._datastore.read_metadata() @property - def precomputed_layouts(self) -> PreComputedLayouts | None: + def precomputed_layouts(self) -> PreComputedLayouts: """Get the precomputed layouts.""" # If it is None it means it is uninitialized, and we should # attempt to read it lazily + if isinstance(self._precomputed_layouts, PreComputedLayouts): + return self._precomputed_layouts + if self._precomputed_layouts is None: self._precomputed_layouts = self._datastore.read_precomputed_layouts() - - # It can also be empty, in which case it has been read and - # found to be empty. Or it has been initialized to be empty, - # which means that it should be cleared. - if self._precomputed_layouts.is_empty: # type: ignore - self._precomputed_layouts = PreComputedLayouts.create_empty() - return self._precomputed_layouts @precomputed_layouts.setter diff --git a/src/pixelator/pixeldataset/datastores.py b/src/pixelator/pixeldataset/datastores.py index b62cf781..8f3afd46 100644 --- a/src/pixelator/pixeldataset/datastores.py +++ b/src/pixelator/pixeldataset/datastores.py @@ -249,14 +249,13 @@ def save(self, dataset: PixelDataset, force_overwrite: bool = False) -> None: def read_precomputed_layouts( self, - ) -> PreComputedLayouts | None: + ) -> PreComputedLayouts: """Read pre-computed layouts from the data store.""" ... def write_precomputed_layouts( self, layouts: PreComputedLayouts, - collapse_to_single_dataframe: bool = False, ) -> None: """Write pre-computed layouts to the data store. @@ -479,11 +478,11 @@ def read_metadata(self) -> Dict: def read_precomputed_layouts( self, - ) -> PreComputedLayouts | None: + ) -> PreComputedLayouts: """Read pre-computed layouts from the .pxl file.""" layouts_lazy = self.read_dataframe_lazy(self.LAYOUTS_KEY) if layouts_lazy is None: - return None + return PreComputedLayouts.create_empty() return PreComputedLayouts(layouts_lazy=layouts_lazy) def write_metadata(self, metadata: Dict[str, Any]) -> None: @@ -510,7 +509,6 @@ def write_colocalization(self, colocalization: pd.DataFrame) -> None: def write_precomputed_layouts( self, layouts: Optional[PreComputedLayouts], - collapse_to_single_dataframe: bool = False, ) -> None: """Write pre-computed layouts to the data store.""" if layouts is None: @@ -582,17 +580,12 @@ def save(self, dataset: PixelDataset, force_overwrite: bool = False) -> None: logger.debug("Writing colocalization scores") self.write_colocalization(dataset.colocalization) - if dataset.precomputed_layouts is not None: + if not dataset.precomputed_layouts.is_empty: logger.debug("Writing precomputed layouts") # This speeds things up massively when you have many, very small # layouts, like we do in some test data. - try: - write_layouts_in_one_go = dataset.adata.obs["vertices"].sum() < 100_000 - except KeyError: - write_layouts_in_one_go = False self.write_precomputed_layouts( dataset.precomputed_layouts, - collapse_to_single_dataframe=write_layouts_in_one_go, ) logger.debug("PixelDataset saved to %s", self.path) @@ -647,7 +640,6 @@ def read_dataframe_lazy(self, key: str) -> Optional[pl.LazyFrame]: def write_precomputed_layouts( self, layouts: Optional[PreComputedLayouts], - collapse_to_single_dataframe: bool = False, ) -> None: """Write pre-computed layouts to the data store (NB: Not implemented!).""" raise NotImplementedError( diff --git a/tests/pixeldataset/test_backends.py b/tests/pixeldataset/test_backends.py index 3845d72d..208e9ac8 100644 --- a/tests/pixeldataset/test_backends.py +++ b/tests/pixeldataset/test_backends.py @@ -33,7 +33,7 @@ def assert_backend_can_set_values(pixel_dataset_backend): assert pixel_dataset_backend.precomputed_layouts pixel_dataset_backend.precomputed_layouts = None - assert not pixel_dataset_backend.precomputed_layouts + assert pixel_dataset_backend.precomputed_layouts.is_empty def test_file_based_pixel_dataset_backend_set_attrs(pixel_dataset_file): From c599280bd8e7ae2165efdf45a3490ed65bb2c818 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Wed, 14 Aug 2024 15:50:42 +0200 Subject: [PATCH 09/18] Parity tests and fixes --- src/pixelator/pixeldataset/datastores.py | 30 ++++--- tests/pixeldataset/test_datastores.py | 109 ++++++++++++++++++++++- 2 files changed, 124 insertions(+), 15 deletions(-) diff --git a/src/pixelator/pixeldataset/datastores.py b/src/pixelator/pixeldataset/datastores.py index 8f3afd46..3640d5e2 100644 --- a/src/pixelator/pixeldataset/datastores.py +++ b/src/pixelator/pixeldataset/datastores.py @@ -274,9 +274,6 @@ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): if maxdepth is not None and maxdepth < 1: raise ValueError("maxdepth must be at least 1") - # TODO Handle details True/False - # TODO Add tests to make sure we have parity with the original implementation - result = {} def _below_max_recursion_depth(path): @@ -288,25 +285,36 @@ def _below_max_recursion_depth(path): for zip_info in self.zip.infolist(): file_name = zip_info.filename - if not file_name.startswith(path): + if not file_name.startswith(path.lstrip("/")): continue # zip files can contain explicit or implicit directories # hence the need to either add them directly or infer them # from the file paths - if zip_info.is_dir() and withdirs: - if not result.get(file_name) and _below_max_recursion_depth(file_name): - result[file_name] = self.info(file_name) - continue + if zip_info.is_dir(): + if withdirs: + if not result.get(file_name) and _below_max_recursion_depth( + file_name + ): + result[file_name.strip("/")] = ( + self.info(file_name) if detail else None + ) + continue + else: + continue # Skip along to the next entry if we don't want to add the dirs if not result.get(file_name): if _below_max_recursion_depth(file_name): - result[file_name] = self.info(file_name) + result[file_name] = self.info(file_name) if detail else None + # Here we handle the case of implicitly adding the + # directories if they have been requested if withdirs: directories = file_name.split("/") for i in range(1, len(directories)): - dir_path = "/".join(directories[:i]) + dir_path = "/".join(directories[:i]).strip( + "/" + ) # remove the trailing slash, as this is not expected if not result.get(dir_path) and _below_max_recursion_depth( dir_path ): @@ -316,7 +324,7 @@ def _below_max_recursion_depth(path): "type": "directory", } - return result + return result if detail else sorted(list(result.keys())) class ZipBasedPixelFile(PixelDataStore): diff --git a/tests/pixeldataset/test_datastores.py b/tests/pixeldataset/test_datastores.py index 974aab92..03bff8f4 100644 --- a/tests/pixeldataset/test_datastores.py +++ b/tests/pixeldataset/test_datastores.py @@ -4,6 +4,7 @@ """ from pathlib import Path +from shutil import make_archive from unittest.mock import patch from zipfile import ZipFile @@ -11,6 +12,7 @@ import polars as pl import pytest from anndata import AnnData +from fsspec.implementations.zip import ZipFileSystem from pandas.core.frame import DataFrame from pandas.testing import assert_frame_equal from pixelator.pixeldataset import PixelDataset @@ -21,6 +23,7 @@ ZipBasedPixelFile, ZipBasedPixelFileWithCSV, ZipBasedPixelFileWithParquet, + _CustomZipFileSystem, ) from pixelator.pixeldataset.precomputed_layouts import PreComputedLayouts @@ -40,7 +43,7 @@ def test_pixel_data_store_guess_from_path_csv( dataset, *_ = setup_basic_pixel_dataset file_target = tmp_path / "dataset.pxl" # Writing pre-computed layouts is not supported for csv files - dataset.precomputed_layouts = None + dataset.precomputed_layouts = None # type: ignore dataset.save(str(file_target), file_format="csv") res = PixelDataStore.guess_datastore_from_path(file_target) assert isinstance(res, ZipBasedPixelFileWithCSV) @@ -67,7 +70,7 @@ def test_pixel_data_store_from_file_provides_correct_datastore_csv( ): dataset, *_ = setup_basic_pixel_dataset # Writing pre-computed layouts is not supported for csv files - dataset.precomputed_layouts = None + dataset.precomputed_layouts = None # type: ignore file_target = tmp_path / "dataset.pxl" dataset.save(str(file_target), file_format="csv") res = PixelDataStore.from_path(file_target) @@ -141,7 +144,7 @@ def test_pixelfile_datastore_trying_to_write_with_same_name_raises_for_csv( ): dataset, *_ = setup_basic_pixel_dataset # Writing pre-computed layouts is not supported for csv files - dataset.precomputed_layouts = None + dataset.precomputed_layouts = None # type: ignore file_target = tmp_path / "dataset.pxl" dataset.save( str(file_target), @@ -338,6 +341,104 @@ def test_pixel_file_csv_format_spec_can_save( file_target = tmp_path / "dataset.pxl" assert not file_target.is_file() # Writing pre-computed layouts is not supported for csv files - dataset.precomputed_layouts = None + dataset.precomputed_layouts = None # type: ignore ZipBasedPixelFileWithCSV(file_target).save(dataset) assert file_target.is_file() + + +@pytest.mark.test_this +class TestCustomZipFileSystem: + @pytest.fixture(name="zip_file") + def zip_file_fixture(self, tmp_path): + data_dir = tmp_path / "data/" + data_dir.mkdir() + file1 = data_dir / "file1.txt" + file1.write_text("Hello, World!") + file2 = data_dir / "file2.txt" + file2.write_text("Lorem ipsum dolor sit amet") + + empty_dir = data_dir / "dir1" + empty_dir.mkdir() + + dir_with_files = data_dir / "dir2" + dir_with_files.mkdir() + file3 = dir_with_files / "file3.txt" + file3.write_text("Hello!") + + zip_file = tmp_path / "test" + return Path(make_archive(zip_file, "zip", data_dir)) + + @pytest.mark.parametrize("detail", [True, False]) + @pytest.mark.parametrize("withdirs", [True, False]) + @pytest.mark.parametrize("max_depth", [None, 1, 2]) + def test_ensure_parity(self, zip_file, detail, withdirs, max_depth): + custom_zip_file_system = _CustomZipFileSystem(zip_file) + zip_file_system = ZipFileSystem(zip_file) + result = custom_zip_file_system.find( + "/", detail=detail, withdirs=withdirs, max_depth=max_depth + ) + expected_result = zip_file_system.find( + "/", detail=detail, withdirs=withdirs, max_depth=max_depth + ) + assert result + assert result == expected_result + + def test_find_returns_expected_result_detail_true(self, zip_file): + custom_zip_file_system = _CustomZipFileSystem(zip_file) + zip_file_system = ZipFileSystem(zip_file) + + result = custom_zip_file_system.find("/", detail=True) + expected_result = zip_file_system.find("/", detail=True) + + assert result + assert result == expected_result + + def test_find_returns_expected_result_detail_false(self, zip_file): + custom_zip_file_system = _CustomZipFileSystem(zip_file) + zip_file_system = ZipFileSystem(zip_file) + + result = custom_zip_file_system.find("/", detail=False) + expected_result = zip_file_system.find("/", detail=False) + + assert result + assert result == expected_result + + def test_find_returns_expected_result_detail_true_include_dirs(self, zip_file): + custom_zip_file_system = _CustomZipFileSystem(zip_file) + zip_file_system = ZipFileSystem(zip_file) + + result = custom_zip_file_system.find("/", detail=True, withdirs=True) + expected_result = zip_file_system.find("/", detail=True, withdirs=True) + + assert result + assert result == expected_result + + def test_find_returns_expected_result_detail_false_include_dirs(self, zip_file): + custom_zip_file_system = _CustomZipFileSystem(zip_file) + zip_file_system = ZipFileSystem(zip_file) + + result = custom_zip_file_system.find("/", detail=False, withdirs=True) + expected_result = zip_file_system.find("/", detail=False, withdirs=True) + + assert result + assert result == expected_result + + def test_find_returns_expected_result_recursion_depth_set(self, zip_file): + custom_zip_file_system = _CustomZipFileSystem(zip_file) + zip_file_system = ZipFileSystem(zip_file) + + result = custom_zip_file_system.find("/", maxdepth=1) + expected_result = zip_file_system.find("/", maxdepth=1) + + assert result + assert result == expected_result + + def test_find_returns_expected_result_path_set(self, zip_file): + custom_zip_file_system = _CustomZipFileSystem(zip_file) + zip_file_system = ZipFileSystem(zip_file) + + result = custom_zip_file_system.find("/dir2") + expected_result = zip_file_system.find("/dir2") + + assert result + assert result == expected_result From c9a4faa3f2c13aaebc2f2e22d68c5a2521d50d60 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Thu, 15 Aug 2024 09:35:45 +0200 Subject: [PATCH 10/18] Simplify the tmp file writing --- src/pixelator/pixeldataset/datastores.py | 23 ++++--------- .../pixeldataset/precomputed_layouts.py | 32 ++++++++++++------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/pixelator/pixeldataset/datastores.py b/src/pixelator/pixeldataset/datastores.py index 3640d5e2..bf95b384 100644 --- a/src/pixelator/pixeldataset/datastores.py +++ b/src/pixelator/pixeldataset/datastores.py @@ -10,6 +10,7 @@ import logging from functools import partial from pathlib import Path +from tempfile import TemporaryDirectory from typing import ( TYPE_CHECKING, Any, @@ -524,30 +525,18 @@ def write_precomputed_layouts( return logger.debug("Starting to write layouts...") - # This option is in place to allow collecting all the layouts into - # as single dataframe before writing (they will still be written into - # partitions), but this is much faster than writing them one by one - # for scenarios with many very small layouts. - - from tempfile import TemporaryDirectory self._set_to_write_mode() self._check_if_writeable(self.LAYOUTS_KEY) + + # This is a work around for the fact that sinking into parquet files + # from multiple sources is not supported. We therefore do this somewhat + # round about thing of first writing the parquet files to + # a temporary directory and then zipping them into the .pxl file. with TemporaryDirectory(prefix="pixelator-") as tmp_dir: tmp_path = Path(tmp_dir) local_tmp_target = tmp_path / "local.layouts.parquet" layouts.write_parquet(local_tmp_target, partitioning=layouts.partitioning) - pa_dataset = ds.dataset(local_tmp_target, partitioning="hive") - local_tmp_target = tmp_path / "local.partitioned.layouts.parquet" - ds.write_dataset( - pa_dataset, - local_tmp_target, - format="parquet", - partitioning_flavor="hive", - partitioning=layouts.partitioning, - use_threads=False, - existing_data_behavior="overwrite_or_ignore", - ) for file_ in local_tmp_target.rglob("*"): if file_.is_file(): diff --git a/src/pixelator/pixeldataset/precomputed_layouts.py b/src/pixelator/pixeldataset/precomputed_layouts.py index 256a83bd..0e3f3d21 100644 --- a/src/pixelator/pixeldataset/precomputed_layouts.py +++ b/src/pixelator/pixeldataset/precomputed_layouts.py @@ -15,6 +15,8 @@ import pandas as pd import polars as pl +import pyarrow.dataset as ds +import pyarrow.parquet as pq from pixelator.exceptions import PixelatorBaseException from pixelator.graph import Graph @@ -30,6 +32,22 @@ logger = logging.getLogger(__name__) +def _write_parquet(frame: pl.LazyFrame, path: Path, partitioning: list[str]) -> None: + table = frame.collect().to_arrow() + file_options = ds.ParquetFileFormat().make_write_options( + compression="zstd", + ) + ds.write_dataset( + table, + path, + format="parquet", + partitioning_flavor="hive", + partitioning=partitioning, + file_options=file_options, + existing_data_behavior="overwrite_or_ignore", + ) + + class PreComputedLayoutsEmpty(PixelatorBaseException): """Raised when trying to access an empty PreComputedLayouts instance.""" @@ -99,13 +117,7 @@ def to_df(self, columns: list[str] | None = None) -> pd.DataFrame: def write_parquet(self, path: Path, partitioning: list[str]) -> None: """Write a parquet file to the provided path.""" - self.lazy().collect().write_parquet( - path, - use_pyarrow=True, - pyarrow_options={ - "partition_cols": partitioning, - }, - ) + _write_parquet(self.lazy(), path, partitioning) def lazy(self): return self._lazy_frame @@ -214,11 +226,7 @@ def data(): def write_parquet(self, path: Path, partitioning: list[str]) -> None: """Write a parquet file to the provided path.""" for frame in self._lazy_frames: - frame.collect(streaming=True).write_parquet( - path, - use_pyarrow=True, - pyarrow_options={"partition_cols": partitioning}, - ) + _write_parquet(frame, path, partitioning) class PreComputedLayouts: From cc0a85e30f24dfe776cbec87707393c0f2c978d2 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Thu, 15 Aug 2024 09:50:30 +0200 Subject: [PATCH 11/18] Update changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 562e9dc7..e68ec151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [x.x.x] - 2024-xx-xx + + +### Fixed + + - Improved memory usage when aggregating pixel files with precomputed layouts. + ## [0.18.2] - 2024-07-16 ### Changed From 5fef3004bad11517a91d9464b0c11685a63f22b0 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 20 Aug 2024 14:56:07 +0200 Subject: [PATCH 12/18] Fix quote problem in task file --- tests/Taskfile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Taskfile.yml b/tests/Taskfile.yml index 4feb5115..74c0a05f 100644 --- a/tests/Taskfile.yml +++ b/tests/Taskfile.yml @@ -56,7 +56,7 @@ tasks: requires: vars: ["PIPELINE_SOURCE_DIR"] cmds: - - nf_options=(' --save_all "); + - nf_options=(' --save_all '); [[ {{ .RESUME }} == 'true' ]] && nf_options+=('-resume '); cd "{{ .PIPELINE_SOURCE_DIR }}"; nextflow run . -profile test --outdir {{ .PIPELINE_RESULTS_DIR }} ${nf_options[@]}; From 739d011dc1e04ca9aabaab48b60e9c2ca03a9e90 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 20 Aug 2024 14:56:58 +0200 Subject: [PATCH 13/18] Fix type missmatch --- src/pixelator/pixeldataset/backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pixelator/pixeldataset/backends.py b/src/pixelator/pixeldataset/backends.py index d64858f5..06cc156f 100644 --- a/src/pixelator/pixeldataset/backends.py +++ b/src/pixelator/pixeldataset/backends.py @@ -258,7 +258,7 @@ def edgelist(self) -> pd.DataFrame: return self._datastore.read_edgelist() @property - def edgelist_lazy(self) -> Optional[pl.LazyFrame]: + def edgelist_lazy(self) -> pl.LazyFrame: """Get a lazy frame representation of the edgelist.""" return self._datastore.read_edgelist_lazy() From 0b5ac93856371615118cad36441911ec252af971 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 20 Aug 2024 15:04:22 +0200 Subject: [PATCH 14/18] Update ruff --- poetry.lock | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2a9246aa..444b5a5a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "alabaster" @@ -3368,29 +3368,29 @@ files = [ [[package]] name = "ruff" -version = "0.5.2" +version = "0.6.1" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.5.2-py3-none-linux_armv6l.whl", hash = "sha256:7bab8345df60f9368d5f4594bfb8b71157496b44c30ff035d1d01972e764d3be"}, - {file = "ruff-0.5.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1aa7acad382ada0189dbe76095cf0a36cd0036779607c397ffdea16517f535b1"}, - {file = "ruff-0.5.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:aec618d5a0cdba5592c60c2dee7d9c865180627f1a4a691257dea14ac1aa264d"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b62adc5ce81780ff04077e88bac0986363e4a3260ad3ef11ae9c14aa0e67ef"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc42ebf56ede83cb080a50eba35a06e636775649a1ffd03dc986533f878702a3"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c15c6e9f88c67ffa442681365d11df38afb11059fc44238e71a9d9f1fd51de70"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d3de9a5960f72c335ef00763d861fc5005ef0644cb260ba1b5a115a102157251"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe5a968ae933e8f7627a7b2fc8893336ac2be0eb0aace762d3421f6e8f7b7f83"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a04f54a9018f75615ae52f36ea1c5515e356e5d5e214b22609ddb546baef7132"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ed02fb52e3741f0738db5f93e10ae0fb5c71eb33a4f2ba87c9a2fa97462a649"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3cf8fe659f6362530435d97d738eb413e9f090e7e993f88711b0377fbdc99f60"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:237a37e673e9f3cbfff0d2243e797c4862a44c93d2f52a52021c1a1b0899f846"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2a2949ce7c1cbd8317432ada80fe32156df825b2fd611688814c8557824ef060"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:481af57c8e99da92ad168924fd82220266043c8255942a1cb87958b108ac9335"}, - {file = "ruff-0.5.2-py3-none-win32.whl", hash = "sha256:f1aea290c56d913e363066d83d3fc26848814a1fed3d72144ff9c930e8c7c718"}, - {file = "ruff-0.5.2-py3-none-win_amd64.whl", hash = "sha256:8532660b72b5d94d2a0a7a27ae7b9b40053662d00357bb2a6864dd7e38819084"}, - {file = "ruff-0.5.2-py3-none-win_arm64.whl", hash = "sha256:73439805c5cb68f364d826a5c5c4b6c798ded6b7ebaa4011f01ce6c94e4d5583"}, - {file = "ruff-0.5.2.tar.gz", hash = "sha256:2c0df2d2de685433794a14d8d2e240df619b748fbe3367346baa519d8e6f1ca2"}, + {file = "ruff-0.6.1-py3-none-linux_armv6l.whl", hash = "sha256:b4bb7de6a24169dc023f992718a9417380301b0c2da0fe85919f47264fb8add9"}, + {file = "ruff-0.6.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:45efaae53b360c81043e311cdec8a7696420b3d3e8935202c2846e7a97d4edae"}, + {file = "ruff-0.6.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bc60c7d71b732c8fa73cf995efc0c836a2fd8b9810e115be8babb24ae87e0850"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c7477c3b9da822e2db0b4e0b59e61b8a23e87886e727b327e7dcaf06213c5cf"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a0af7ab3f86e3dc9f157a928e08e26c4b40707d0612b01cd577cc84b8905cc9"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:392688dbb50fecf1bf7126731c90c11a9df1c3a4cdc3f481b53e851da5634fa5"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5278d3e095ccc8c30430bcc9bc550f778790acc211865520f3041910a28d0024"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe6d5f65d6f276ee7a0fc50a0cecaccb362d30ef98a110f99cac1c7872df2f18"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2e0dd11e2ae553ee5c92a81731d88a9883af8db7408db47fc81887c1f8b672e"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d812615525a34ecfc07fd93f906ef5b93656be01dfae9a819e31caa6cfe758a1"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faaa4060f4064c3b7aaaa27328080c932fa142786f8142aff095b42b6a2eb631"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99d7ae0df47c62729d58765c593ea54c2546d5de213f2af2a19442d50a10cec9"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9eb18dfd7b613eec000e3738b3f0e4398bf0153cb80bfa3e351b3c1c2f6d7b15"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c62bc04c6723a81e25e71715aa59489f15034d69bf641df88cb38bdc32fd1dbb"}, + {file = "ruff-0.6.1-py3-none-win32.whl", hash = "sha256:9fb4c4e8b83f19c9477a8745e56d2eeef07a7ff50b68a6998f7d9e2e3887bdc4"}, + {file = "ruff-0.6.1-py3-none-win_amd64.whl", hash = "sha256:c2ebfc8f51ef4aca05dad4552bbcf6fe8d1f75b2f6af546cc47cc1c1ca916b5b"}, + {file = "ruff-0.6.1-py3-none-win_arm64.whl", hash = "sha256:3bc81074971b0ffad1bd0c52284b22411f02a11a012082a76ac6da153536e014"}, + {file = "ruff-0.6.1.tar.gz", hash = "sha256:af3ffd8c6563acb8848d33cd19a69b9bfe943667f0419ca083f8ebe4224a3436"}, ] [[package]] From b46c12fdd03589eb65dad32bf134054e6f8b8b93 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 20 Aug 2024 15:06:59 +0200 Subject: [PATCH 15/18] Update ruff in pre-commit config --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ec20bc93..5abe8750 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: - repo: https://github.com/charliermarsh/ruff-pre-commit # Ruff version. - rev: "v0.3.0" + rev: "v0.6.1" hooks: - id: ruff - id: ruff-format From 8d2e3e59bf028802475eb0f371c334a013e6bca4 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 20 Aug 2024 15:07:19 +0200 Subject: [PATCH 16/18] Let ruff autofix all issues --- tests/amplicon/test_amplicon.py | 1 + tests/amplicon/test_cli.py | 1 + tests/amplicon/test_statistics.py | 1 + tests/analysis/colocalization/test_colocalization.py | 1 + tests/analysis/colocalization/test_estimate.py | 1 + tests/analysis/colocalization/test_permute.py | 1 + tests/analysis/colocalization/test_prepare.py | 1 + tests/analysis/colocalization/test_statistics.py | 1 + tests/analysis/normalization/test_normalization.py | 1 + tests/analysis/polarization/test_polarization.py | 2 +- tests/analysis/test_analysis_engine.py | 1 + tests/annotate/test_aggregates.py | 1 + tests/annotate/test_annotate.py | 1 + tests/annotate/test_cell_calling.py | 1 + tests/collapse/test_process.py | 1 + tests/config/test_config.py | 1 + tests/conftest.py | 2 +- tests/graph/backends/test_implementations.py | 1 + tests/graph/conftest.py | 2 +- tests/graph/networkx/test_tools.py | 1 + tests/graph/test_community_detection.py | 1 + tests/graph/test_graph.py | 2 +- tests/graph/test_graph_utils.py | 1 + tests/graph/test_node_metrics.py | 1 + tests/integration/conftest.py | 1 + tests/pixeldataset/test_aggregation.py | 1 + tests/pixeldataset/test_datastores.py | 1 + tests/pixeldataset/test_pixeldataset.py | 1 + tests/pixeldataset/test_precomputed_layouts.py | 2 +- tests/pixeldataset/test_utils.py | 1 + tests/plot/test_plot.py | 3 ++- tests/report/conftest.py | 1 + tests/report/test_adapterqc.py | 1 + tests/report/test_amplicon.py | 1 + tests/report/test_analysis.py | 1 + tests/report/test_annotate.py | 1 + tests/report/test_cli_info.py | 1 + tests/report/test_collapse.py | 1 + tests/report/test_data_collection.py | 1 + tests/report/test_demux.py | 1 + tests/report/test_graph.py | 1 + tests/report/test_layout.py | 1 + tests/report/test_preqc.py | 1 + tests/report/test_qc_report.py | 3 ++- tests/report/test_report_sample_metadata.py | 1 + tests/report/test_reporting.py | 1 + tests/report/test_workdir.py | 1 + tests/resources/test_panel.py | 1 + tests/test_integration.py | 1 + tests/test_logging.py | 1 + tests/test_marks.py | 1 + tests/test_plugin.py | 1 + tests/test_simplification.py | 1 + tests/test_statistics.py | 1 + tests/test_utils.py | 1 + tests/utils/test_utils.py | 1 + 56 files changed, 58 insertions(+), 7 deletions(-) diff --git a/tests/amplicon/test_amplicon.py b/tests/amplicon/test_amplicon.py index 2d8ce039..3d914473 100644 --- a/tests/amplicon/test_amplicon.py +++ b/tests/amplicon/test_amplicon.py @@ -11,6 +11,7 @@ import numpy as np import pyfastx import pytest + from pixelator.amplicon.process import generate_amplicon from pixelator.config import config, get_position_in_parent diff --git a/tests/amplicon/test_cli.py b/tests/amplicon/test_cli.py index f0efe846..d758c716 100644 --- a/tests/amplicon/test_cli.py +++ b/tests/amplicon/test_cli.py @@ -11,6 +11,7 @@ import pytest from click.testing import CliRunner + from pixelator import cli diff --git a/tests/amplicon/test_statistics.py b/tests/amplicon/test_statistics.py index 4af5d585..3f5ebb2c 100644 --- a/tests/amplicon/test_statistics.py +++ b/tests/amplicon/test_statistics.py @@ -4,6 +4,7 @@ """ import numpy as np + from pixelator.amplicon.statistics import ( SequenceQualityStats, SequenceQualityStatsCollector, diff --git a/tests/analysis/colocalization/test_colocalization.py b/tests/analysis/colocalization/test_colocalization.py index 968f74a5..e0695918 100644 --- a/tests/analysis/colocalization/test_colocalization.py +++ b/tests/analysis/colocalization/test_colocalization.py @@ -10,6 +10,7 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal, assert_series_equal + from pixelator.analysis.colocalization import ( ColocalizationAnalysis, colocalization_from_component_edgelist, diff --git a/tests/analysis/colocalization/test_estimate.py b/tests/analysis/colocalization/test_estimate.py index 506ff609..e1348154 100644 --- a/tests/analysis/colocalization/test_estimate.py +++ b/tests/analysis/colocalization/test_estimate.py @@ -8,6 +8,7 @@ import pandas as pd from numpy.random import default_rng from pandas.testing import assert_frame_equal + from pixelator.analysis.colocalization.estimate import ( estimate_observation_statistics, permutation_analysis_results, diff --git a/tests/analysis/colocalization/test_permute.py b/tests/analysis/colocalization/test_permute.py index 5426b1c2..231e7927 100644 --- a/tests/analysis/colocalization/test_permute.py +++ b/tests/analysis/colocalization/test_permute.py @@ -7,6 +7,7 @@ import pandas as pd from numpy.random import default_rng from pandas.testing import assert_series_equal + from pixelator.analysis.permute import ( permutations, permute, diff --git a/tests/analysis/colocalization/test_prepare.py b/tests/analysis/colocalization/test_prepare.py index 55ee956d..ffc190b0 100644 --- a/tests/analysis/colocalization/test_prepare.py +++ b/tests/analysis/colocalization/test_prepare.py @@ -10,6 +10,7 @@ import pytest from numpy.random import default_rng from pandas.testing import assert_frame_equal + from pixelator.analysis.colocalization.prepare import ( filter_by_marker_counts, filter_by_region_counts, diff --git a/tests/analysis/colocalization/test_statistics.py b/tests/analysis/colocalization/test_statistics.py index 5f17c9a3..9af87434 100644 --- a/tests/analysis/colocalization/test_statistics.py +++ b/tests/analysis/colocalization/test_statistics.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd from pandas.testing import assert_frame_equal + from pixelator.analysis.colocalization.statistics import ( Jaccard, Pearson, diff --git a/tests/analysis/normalization/test_normalization.py b/tests/analysis/normalization/test_normalization.py index d0a17ac9..f8147490 100644 --- a/tests/analysis/normalization/test_normalization.py +++ b/tests/analysis/normalization/test_normalization.py @@ -8,6 +8,7 @@ import pandas as pd from pandas.testing import assert_frame_equal + from pixelator.analysis.normalization import dsb_normalize DATA_ROOT = Path(__file__).parents[2] / "data" diff --git a/tests/analysis/polarization/test_polarization.py b/tests/analysis/polarization/test_polarization.py index 835a3f9d..de38cca5 100644 --- a/tests/analysis/polarization/test_polarization.py +++ b/tests/analysis/polarization/test_polarization.py @@ -10,6 +10,7 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal, assert_series_equal + from pixelator.analysis.polarization import ( PolarizationAnalysis, polarization_scores, @@ -17,7 +18,6 @@ ) from pixelator.graph import Graph from pixelator.pixeldataset import PixelDataset - from tests.graph.networkx.test_tools import create_randomly_connected_bipartite_graph diff --git a/tests/analysis/test_analysis_engine.py b/tests/analysis/test_analysis_engine.py index 94d22847..5089ec58 100644 --- a/tests/analysis/test_analysis_engine.py +++ b/tests/analysis/test_analysis_engine.py @@ -9,6 +9,7 @@ import pandas as pd from pandas.testing import assert_frame_equal + from pixelator.analysis.analysis_engine import ( PerComponentAnalysis, _AnalysisManager, diff --git a/tests/annotate/test_aggregates.py b/tests/annotate/test_aggregates.py index 4463fda7..67f9e0f7 100644 --- a/tests/annotate/test_aggregates.py +++ b/tests/annotate/test_aggregates.py @@ -8,6 +8,7 @@ import pandas as pd import pytest from anndata import AnnData + from pixelator.annotate.aggregates import call_aggregates NBR_OF_MARKERS = 100 diff --git a/tests/annotate/test_annotate.py b/tests/annotate/test_annotate.py index 2a505c24..22563200 100644 --- a/tests/annotate/test_annotate.py +++ b/tests/annotate/test_annotate.py @@ -9,6 +9,7 @@ import pandas as pd import pytest from anndata import AnnData + from pixelator.annotate import cluster_components, filter_components_sizes from pixelator.cli.annotate import annotate_components from pixelator.config import AntibodyPanel diff --git a/tests/annotate/test_cell_calling.py b/tests/annotate/test_cell_calling.py index 6a36e048..eab54fb4 100644 --- a/tests/annotate/test_cell_calling.py +++ b/tests/annotate/test_cell_calling.py @@ -5,6 +5,7 @@ """ import numpy as np + from pixelator.annotate.cell_calling import ( find_component_size_limits, ) diff --git a/tests/collapse/test_process.py b/tests/collapse/test_process.py index 203422f2..c2385153 100644 --- a/tests/collapse/test_process.py +++ b/tests/collapse/test_process.py @@ -14,6 +14,7 @@ import pytest from numpy.testing import assert_array_equal from pandas.testing import assert_frame_equal + from pixelator.collapse.process import ( CollapsedFragment, build_annoytree, diff --git a/tests/config/test_config.py b/tests/config/test_config.py index 38ab1585..b7962ffd 100644 --- a/tests/config/test_config.py +++ b/tests/config/test_config.py @@ -7,6 +7,7 @@ import copy import pytest + from pixelator.config import ( Config, RegionType, diff --git a/tests/conftest.py b/tests/conftest.py index 5ee68e71..feea08ba 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,7 @@ import polars as pl import pytest from anndata import AnnData + from pixelator.config import AntibodyPanel from pixelator.graph import update_edgelist_membership from pixelator.graph.utils import union as graph_union @@ -20,7 +21,6 @@ ) from pixelator.pixeldataset.precomputed_layouts import PreComputedLayouts from pixelator.pixeldataset.utils import edgelist_to_anndata - from tests.graph.networkx.test_tools import ( create_fully_connected_bipartite_graph, create_random_graph, diff --git a/tests/graph/backends/test_implementations.py b/tests/graph/backends/test_implementations.py index 4588eb9f..275b9b41 100644 --- a/tests/graph/backends/test_implementations.py +++ b/tests/graph/backends/test_implementations.py @@ -5,6 +5,7 @@ import networkx as nx import pytest + from pixelator.graph.backends.implementations import ( graph_backend, graph_backend_from_graph_type, diff --git a/tests/graph/conftest.py b/tests/graph/conftest.py index 0e26d524..0bf40f3c 100644 --- a/tests/graph/conftest.py +++ b/tests/graph/conftest.py @@ -7,9 +7,9 @@ import networkx as nx import pandas as pd import pytest + from pixelator.graph import Graph from pixelator.graph.backends.implementations import graph_backend - from tests.graph.networkx.test_tools import add_random_names_to_vertexes diff --git a/tests/graph/networkx/test_tools.py b/tests/graph/networkx/test_tools.py index 3d8a2987..c8eba065 100644 --- a/tests/graph/networkx/test_tools.py +++ b/tests/graph/networkx/test_tools.py @@ -10,6 +10,7 @@ import networkx as nx import numpy as np + from pixelator.graph.utils import Graph diff --git a/tests/graph/test_community_detection.py b/tests/graph/test_community_detection.py index a81a24e1..aca573b1 100644 --- a/tests/graph/test_community_detection.py +++ b/tests/graph/test_community_detection.py @@ -6,6 +6,7 @@ import pandas as pd import polars as pl import pytest + from pixelator.graph.community_detection import ( community_detection_crossing_edges, connect_components, diff --git a/tests/graph/test_graph.py b/tests/graph/test_graph.py index d29b07c0..26462dae 100644 --- a/tests/graph/test_graph.py +++ b/tests/graph/test_graph.py @@ -11,8 +11,8 @@ import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal from pandas.testing import assert_frame_equal -from pixelator.graph import Graph +from pixelator.graph import Graph from tests.graph.networkx.test_tools import random_sequence from tests.test_tools import enforce_edgelist_types_for_tests diff --git a/tests/graph/test_graph_utils.py b/tests/graph/test_graph_utils.py index 03889f8d..fcd202fd 100644 --- a/tests/graph/test_graph_utils.py +++ b/tests/graph/test_graph_utils.py @@ -10,6 +10,7 @@ import polars as pl import pytest from pandas.testing import assert_frame_equal + from pixelator.graph import Graph from pixelator.graph.utils import ( components_metrics, diff --git a/tests/graph/test_node_metrics.py b/tests/graph/test_node_metrics.py index 1be36c0e..3e4ffa7d 100644 --- a/tests/graph/test_node_metrics.py +++ b/tests/graph/test_node_metrics.py @@ -8,6 +8,7 @@ import pytest import scipy.sparse as sp from pandas.testing import assert_frame_equal + from pixelator.graph.node_metrics import compute_transition_probabilities, local_g diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c5337d2d..3ff4b380 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -10,6 +10,7 @@ from typing import Optional, Type import pytest + from pixelator.test_utils import ( # noqa: F401 YamlIntegrationTestsCollector, use_workflow_context, diff --git a/tests/pixeldataset/test_aggregation.py b/tests/pixeldataset/test_aggregation.py index a3c2b2f1..ff45e915 100644 --- a/tests/pixeldataset/test_aggregation.py +++ b/tests/pixeldataset/test_aggregation.py @@ -12,6 +12,7 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal + from pixelator.pixeldataset import ( read, ) diff --git a/tests/pixeldataset/test_datastores.py b/tests/pixeldataset/test_datastores.py index 974aab92..76161b99 100644 --- a/tests/pixeldataset/test_datastores.py +++ b/tests/pixeldataset/test_datastores.py @@ -13,6 +13,7 @@ from anndata import AnnData from pandas.core.frame import DataFrame from pandas.testing import assert_frame_equal + from pixelator.pixeldataset import PixelDataset from pixelator.pixeldataset.datastores import ( CannotOverwriteError, diff --git a/tests/pixeldataset/test_pixeldataset.py b/tests/pixeldataset/test_pixeldataset.py index 57366723..73451961 100644 --- a/tests/pixeldataset/test_pixeldataset.py +++ b/tests/pixeldataset/test_pixeldataset.py @@ -12,6 +12,7 @@ import pytest from numpy.testing import assert_array_equal from pandas.testing import assert_frame_equal + from pixelator.graph import Graph from pixelator.pixeldataset import ( PixelDataset, diff --git a/tests/pixeldataset/test_precomputed_layouts.py b/tests/pixeldataset/test_precomputed_layouts.py index d9d2da65..57b4c14e 100644 --- a/tests/pixeldataset/test_precomputed_layouts.py +++ b/tests/pixeldataset/test_precomputed_layouts.py @@ -8,12 +8,12 @@ import polars as pl import pytest from pandas.testing import assert_frame_equal + from pixelator.pixeldataset.precomputed_layouts import ( PreComputedLayouts, aggregate_precomputed_layouts, generate_precomputed_layouts_for_components, ) - from tests.utils import dna_seqs diff --git a/tests/pixeldataset/test_utils.py b/tests/pixeldataset/test_utils.py index a8494db2..bf4a3cd5 100644 --- a/tests/pixeldataset/test_utils.py +++ b/tests/pixeldataset/test_utils.py @@ -15,6 +15,7 @@ from anndata import AnnData from numpy.testing import assert_array_equal from pandas.testing import assert_frame_equal + from pixelator.config import AntibodyPanel from pixelator.graph import write_recovered_components from pixelator.pixeldataset.utils import ( diff --git a/tests/plot/test_plot.py b/tests/plot/test_plot.py index d4d04de7..bce774e8 100644 --- a/tests/plot/test_plot.py +++ b/tests/plot/test_plot.py @@ -8,6 +8,8 @@ import plotly.graph_objects as go import pytest from numpy.testing import assert_almost_equal +from pytest_snapshot.plugin import Snapshot + from pixelator.graph import Graph from pixelator.plot import ( cell_count_plot, @@ -27,7 +29,6 @@ _calculate_distance_to_unit_sphere_zones, _unit_sphere_surface, ) -from pytest_snapshot.plugin import Snapshot @pytest.mark.parametrize( diff --git a/tests/report/conftest.py b/tests/report/conftest.py index abf150e1..c43e0185 100644 --- a/tests/report/conftest.py +++ b/tests/report/conftest.py @@ -11,6 +11,7 @@ import pandas as pd import pytest + from pixelator import PixelDataset from pixelator.report.common import PixelatorWorkdir diff --git a/tests/report/test_adapterqc.py b/tests/report/test_adapterqc.py index 2f6c4713..648b091e 100644 --- a/tests/report/test_adapterqc.py +++ b/tests/report/test_adapterqc.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir diff --git a/tests/report/test_amplicon.py b/tests/report/test_amplicon.py index 7f7fb29d..04791a39 100644 --- a/tests/report/test_amplicon.py +++ b/tests/report/test_amplicon.py @@ -6,6 +6,7 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal + from pixelator.report import PixelatorReporting, PixelatorWorkdir from pixelator.report.models.amplicon import AmpliconSampleReport diff --git a/tests/report/test_analysis.py b/tests/report/test_analysis.py index f3fa8afd..85ee7cc5 100644 --- a/tests/report/test_analysis.py +++ b/tests/report/test_analysis.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir from pixelator.report.models.analysis import ( AnalysisSampleReport, diff --git a/tests/report/test_annotate.py b/tests/report/test_annotate.py index 6f82c536..92b2b5e6 100644 --- a/tests/report/test_annotate.py +++ b/tests/report/test_annotate.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir from pixelator.report.models import SummaryStatistics from pixelator.report.models.annotate import AnnotateSampleReport diff --git a/tests/report/test_cli_info.py b/tests/report/test_cli_info.py index 58574c23..05ed6124 100644 --- a/tests/report/test_cli_info.py +++ b/tests/report/test_cli_info.py @@ -6,6 +6,7 @@ import shutil import pytest + from pixelator.report import PixelatorReporting, SingleCellStage from pixelator.report.common import WorkdirOutputNotFound from pixelator.report.models import CommandInfo diff --git a/tests/report/test_collapse.py b/tests/report/test_collapse.py index 3ae1e8c5..177851a4 100644 --- a/tests/report/test_collapse.py +++ b/tests/report/test_collapse.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir diff --git a/tests/report/test_data_collection.py b/tests/report/test_data_collection.py index 13762f35..14cf493e 100644 --- a/tests/report/test_data_collection.py +++ b/tests/report/test_data_collection.py @@ -6,6 +6,7 @@ import json import pytest + from pixelator.report.common.json_encoder import PixelatorJSONEncoder from pixelator.report.qcreport.collect import ( collect_antibody_counts_data, diff --git a/tests/report/test_demux.py b/tests/report/test_demux.py index aa946b06..ec5ba19e 100644 --- a/tests/report/test_demux.py +++ b/tests/report/test_demux.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir diff --git a/tests/report/test_graph.py b/tests/report/test_graph.py index 809c43ed..8b7db92b 100644 --- a/tests/report/test_graph.py +++ b/tests/report/test_graph.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir diff --git a/tests/report/test_layout.py b/tests/report/test_layout.py index 872b0c2c..cbece96f 100644 --- a/tests/report/test_layout.py +++ b/tests/report/test_layout.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir from pixelator.report.models.layout import LayoutSampleReport diff --git a/tests/report/test_preqc.py b/tests/report/test_preqc.py index 62cf7975..ca6068bd 100644 --- a/tests/report/test_preqc.py +++ b/tests/report/test_preqc.py @@ -6,6 +6,7 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal + from pixelator.report import PixelatorReporting, PixelatorWorkdir from pixelator.report.models.preqc import PreQCSampleReport diff --git a/tests/report/test_qc_report.py b/tests/report/test_qc_report.py index 3d44093a..e747a984 100644 --- a/tests/report/test_qc_report.py +++ b/tests/report/test_qc_report.py @@ -14,6 +14,8 @@ import lxml import pytest from lxml.etree import _Element as LxmlElement +from playwright.sync_api import Page, expect + from pixelator import __version__ from pixelator.report.qcreport import ( QCReportBuilder, @@ -29,7 +31,6 @@ collect_components_umap_data, collect_reads_per_molecule_frequency, ) -from playwright.sync_api import Page, expect @pytest.fixture() diff --git a/tests/report/test_report_sample_metadata.py b/tests/report/test_report_sample_metadata.py index d70730be..15f66368 100644 --- a/tests/report/test_report_sample_metadata.py +++ b/tests/report/test_report_sample_metadata.py @@ -5,6 +5,7 @@ import pandas as pd import pytest + from pixelator.report.models.report_metadata import SampleMetadata, SampleMetadataRecord diff --git a/tests/report/test_reporting.py b/tests/report/test_reporting.py index a48c0926..e88e9725 100644 --- a/tests/report/test_reporting.py +++ b/tests/report/test_reporting.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.report import PixelatorReporting, PixelatorWorkdir from pixelator.report.common.reporting import _ordered_pixelator_commands diff --git a/tests/report/test_workdir.py b/tests/report/test_workdir.py index ef359873..685fa4dc 100644 --- a/tests/report/test_workdir.py +++ b/tests/report/test_workdir.py @@ -7,6 +7,7 @@ import shutil import pytest + from pixelator.report import PixelatorWorkdir from pixelator.report.common import WorkdirOutputNotFound diff --git a/tests/resources/test_panel.py b/tests/resources/test_panel.py index ca9a8b65..c438716d 100644 --- a/tests/resources/test_panel.py +++ b/tests/resources/test_panel.py @@ -8,6 +8,7 @@ import pandas as pd import pytest + from pixelator.config import AntibodyPanel diff --git a/tests/test_integration.py b/tests/test_integration.py index a6b9ec3d..0b72e596 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -7,6 +7,7 @@ import pytest from click.testing import CliRunner + from pixelator import cli pytestmark = pytest.mark.integration_test diff --git a/tests/test_logging.py b/tests/test_logging.py index 71873e0e..a41ca03c 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -10,6 +10,7 @@ from pathlib import Path import pytest + from pixelator.logging import LoggingSetup from pixelator.utils import ( get_process_pool_executor, diff --git a/tests/test_marks.py b/tests/test_marks.py index 4dc2fd15..5716d9af 100644 --- a/tests/test_marks.py +++ b/tests/test_marks.py @@ -4,6 +4,7 @@ """ import pytest + from pixelator.marks import experimental diff --git a/tests/test_plugin.py b/tests/test_plugin.py index c75ead79..faed6c51 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -8,6 +8,7 @@ from unittest import mock import click + from pixelator.cli.main import main_cli from pixelator.cli.plugin import add_cli_plugins, fetch_cli_plugins from pixelator.config import Config, config diff --git a/tests/test_simplification.py b/tests/test_simplification.py index 8c4b8b4f..1a5450fc 100644 --- a/tests/test_simplification.py +++ b/tests/test_simplification.py @@ -5,6 +5,7 @@ import numpy as np import pytest + from pixelator.utils.simplification import simplify_line_rdp diff --git a/tests/test_statistics.py b/tests/test_statistics.py index 79f7bcfd..eb5fe51b 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -6,6 +6,7 @@ import pandas as pd from numpy.testing import assert_allclose, assert_array_almost_equal from pandas.testing import assert_frame_equal + from pixelator.statistics import ( clr_transformation, correct_pvalues, diff --git a/tests/test_utils.py b/tests/test_utils.py index 9d304506..106d7178 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ """Copyright © 2023 Pixelgen Technologies AB.""" import pytest + from pixelator.utils import flatten diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 30825af1..bbff5ede 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -12,6 +12,7 @@ from unittest.mock import patch import pytest + from pixelator import __version__ from pixelator.utils import ( get_pool_executor, From 7df5e2a0bc66e8faa6ce9dfd982df1d652c77d26 Mon Sep 17 00:00:00 2001 From: Alvaro Martinez Barrio Date: Thu, 26 Sep 2024 08:53:04 +0200 Subject: [PATCH 17/18] fix: changelog --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e68ec151..694ab5b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [x.x.x] - 2024-xx-xx - +## [0.18.3] - 2024-09-26 ### Fixed From 916c3859e288d30d0b19fc2f1061321713ef1066 Mon Sep 17 00:00:00 2001 From: Alvaro Martinez Barrio Date: Mon, 30 Sep 2024 07:02:03 +0200 Subject: [PATCH 18/18] fix: changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 694ab5b9..75ddea98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - - Improved memory usage when aggregating pixel files with precomputed layouts. + - Improved memory usage when aggregating PXL files with precomputed layouts. ## [0.18.2] - 2024-07-16