From cfcce8a9f4a476ed4faa4f068ec4cfaba156114c Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 17 Oct 2023 09:51:08 +0000 Subject: [PATCH 1/4] Make sure types are correctly set, when filtering --- src/pixelator/pixeldataset.py | 11 +++++++---- tests/test_pixeldataset.py | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/pixelator/pixeldataset.py b/src/pixelator/pixeldataset.py index 71b1432a..24657eeb 100644 --- a/src/pixelator/pixeldataset.py +++ b/src/pixelator/pixeldataset.py @@ -700,7 +700,7 @@ def graph( ( self.edgelist_lazy.filter(pl.col("component") == component_id) .collect() - .to_pandas(use_pyarrow_extension_array=True) + .to_pandas() ) ) if potential_component.empty: @@ -839,9 +839,7 @@ def _all_true_array(shape): else self.edgelist_lazy ) - edgelist = _enforce_edgelist_types( - edgelist_pred.collect().to_pandas(use_pyarrow_extension_array=True) - ) + edgelist = _enforce_edgelist_types(edgelist_pred.collect().to_pandas()) if self.polarization is not None: polarization_mask = ( @@ -1299,6 +1297,11 @@ def _enforce_edgelist_types(edgelist: pd.DataFrame) -> pd.DataFrame: if edgelist.shape[0] == 0: edgelist = pd.DataFrame(columns=required_types.keys()) + # If we have the optional sample column, this should be + # set to use a categorical type + if "sample" in edgelist.columns: + required_types["sample"] = "category" + # If all of the prescribed types are already set, just return the edgelist type_dict = edgelist.dtypes.to_dict() if all(type_dict[key] == type_ for key, type_ in required_types.items()): diff --git a/tests/test_pixeldataset.py b/tests/test_pixeldataset.py index 93e1b638..ac65bb0b 100644 --- a/tests/test_pixeldataset.py +++ b/tests/test_pixeldataset.py @@ -516,7 +516,6 @@ def test_edgelist_to_anndata( assert set(adata.obs_names) == set(edgelist["component"].unique()) -@pytest.mark.test_this def test_simple_aggregate(setup_basic_pixel_dataset): """test_simple_aggregate.""" dataset_1, *_ = setup_basic_pixel_dataset @@ -699,6 +698,23 @@ def test_simple_aggregate_ignore_edgelist(setup_basic_pixel_dataset): assert result.edgelist.shape == (0, 9) +def test_filter_should_return_proper_typed_edgelist_data(setup_basic_pixel_dataset): + # Test to check for bug EXE-1177 + # This bug was caused by filtering returning an incorrectly typed + # edgelist, which in turn caused getting the graph to fail + dataset_1, *_ = setup_basic_pixel_dataset + dataset_2 = dataset_1.copy() + + aggregated_data = simple_aggregate( + sample_names=["sample1", "sample2"], datasets=[dataset_1, dataset_2] + ) + + result = aggregated_data.filter(components=aggregated_data.adata.obs.index[:2]) + assert isinstance(result.edgelist["component"].dtype, pd.CategoricalDtype) + # Running graph here to make sure it does not raise an exception + result.graph(result.adata.obs.index[0]) + + def test_copy(setup_basic_pixel_dataset): """test_copy.""" dataset_1, *_ = setup_basic_pixel_dataset From c0b81310ed362bb96fb0e9b4091eff5f5d11c172 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Wed, 18 Oct 2023 09:15:50 +0000 Subject: [PATCH 2/4] Concatenation failure in layouts fixed --- .../graph/backends/implementations.py | 3 +- tests/graph/test_graph_utils.py | 70 +++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/src/pixelator/graph/backends/implementations.py b/src/pixelator/graph/backends/implementations.py index 99eae013..39a667d9 100644 --- a/src/pixelator/graph/backends/implementations.py +++ b/src/pixelator/graph/backends/implementations.py @@ -325,6 +325,7 @@ def layout_coordinates( coordinates = pd.DataFrame( layout_inst.coords, columns=["x", "y"] if layout_inst.dim == 2 else ["x", "y", "z"], + index=raw.vs["name"], ) # If we are doing a 3D layout we add the option of normalized @@ -340,7 +341,7 @@ def layout_coordinates( # Added here to avoid circular imports from pixelator.graph.utils import create_node_markers_counts - node_marker_counts = create_node_markers_counts(raw) + node_marker_counts = create_node_markers_counts(self._raw) df = pd.concat([coordinates, node_marker_counts], axis=1) else: df = coordinates diff --git a/tests/graph/test_graph_utils.py b/tests/graph/test_graph_utils.py index f40a717d..6cb0c036 100644 --- a/tests/graph/test_graph_utils.py +++ b/tests/graph/test_graph_utils.py @@ -93,6 +93,76 @@ def test_build_graph_a_node_projected(full_graph_edgelist: pd.DataFrame): assert graph.vs.attributes() == ["name", "markers", "type", "pixel_type"] +def test_layout_coordinates_all_pixels(full_graph_edgelist: pd.DataFrame): + graph = Graph.from_edgelist( + edgelist=full_graph_edgelist, + add_marker_counts=True, + simplify=True, + use_full_bipartite=True, + ) + result = graph.layout_coordinates(only_keep_a_pixels=False) + assert result.shape == (100, 4) + assert set(result.columns) == {"x", "y", "A", "B"} + + +def test_layout_coordinates_3d_layout(full_graph_edgelist: pd.DataFrame): + graph = Graph.from_edgelist( + edgelist=full_graph_edgelist, + add_marker_counts=True, + simplify=True, + use_full_bipartite=True, + ) + result = graph.layout_coordinates( + layout_algorithm="fruchterman_reingold_3d", only_keep_a_pixels=False + ) + assert set(result.columns) == { + "x", + "y", + "z", + "x_norm", + "y_norm", + "z_norm", + "A", + "B", + } + assert result.shape == (100, 8) + + +def test_layout_coordinates_only_a_pixels(full_graph_edgelist: pd.DataFrame): + graph = Graph.from_edgelist( + edgelist=full_graph_edgelist, + add_marker_counts=True, + simplify=True, + use_full_bipartite=True, + ) + result = graph.layout_coordinates(only_keep_a_pixels=True) + assert result.shape == (50, 4) + assert set(result.columns) == {"x", "y", "A", "B"} + + +def test_layout_coordinates_3d_layout_only_a_pixels(full_graph_edgelist: pd.DataFrame): + graph = Graph.from_edgelist( + edgelist=full_graph_edgelist, + add_marker_counts=True, + simplify=True, + use_full_bipartite=True, + ) + result = graph.layout_coordinates( + layout_algorithm="fruchterman_reingold_3d", only_keep_a_pixels=True + ) + assert set(result.columns) == { + "x", + "y", + "z", + "x_norm", + "y_norm", + "z_norm", + "A", + "B", + } + assert result.shape == (50, 8) + + def test_components_metrics(full_graph_edgelist: pd.DataFrame): """Test generating component metrics.""" # test component metrics From 0118fdaf5ff10dcf82349e6b17c2603be2b1a785 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Wed, 18 Oct 2023 09:24:53 +0000 Subject: [PATCH 3/4] Update changelog --- CHANGELOG.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8753898..7a3c5698 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.15.0]- 2023-10-16 +## [UNRELEASED] + +### Fixed + +* Fixed bug in filtering pixeldataset causing it to return the wrong types +* Fixed bug in graph layout generation + + +## [0.15.0] - 2023-10-16 ### Added From 6e53f695cd84086e3cdb06ae8691aa7901f63f03 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Wed, 18 Oct 2023 12:02:51 +0200 Subject: [PATCH 4/4] Update CHANGELOG.md Co-authored-by: Florian De Temmerman <69114541+fbdtemme@users.noreply.github.com> --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a3c5698..018cc718 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -* Fixed bug in filtering pixeldataset causing it to return the wrong types -* Fixed bug in graph layout generation +* Fixed a bug in filtering pixeldataset causing it to return the wrong types. +* Fixed a bug in graph layout generation due to incorrect data frame concatenation. ## [0.15.0] - 2023-10-16