Skip to content

Commit

Permalink
Merge pull request #26 from PixelgenTechnologies/feature/exe-1177-fix…
Browse files Browse the repository at this point in the history
…-filtering-edgelist-bug

Fix filtering edgelist bug, and layout generation problem
  • Loading branch information
johandahlberg authored Oct 18, 2023
2 parents bf50ce9 + 6e53f69 commit ce48f8e
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 7 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.15.0]- 2023-10-16
## [UNRELEASED]

### Fixed

* Fixed a bug in filtering pixeldataset causing it to return the wrong types.
* Fixed a bug in graph layout generation due to incorrect data frame concatenation.


## [0.15.0] - 2023-10-16

### Added

Expand Down
3 changes: 2 additions & 1 deletion src/pixelator/graph/backends/implementations.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ def layout_coordinates(
coordinates = pd.DataFrame(
layout_inst.coords,
columns=["x", "y"] if layout_inst.dim == 2 else ["x", "y", "z"],
index=raw.vs["name"],
)

# If we are doing a 3D layout we add the option of normalized
Expand All @@ -340,7 +341,7 @@ def layout_coordinates(
# Added here to avoid circular imports
from pixelator.graph.utils import create_node_markers_counts

node_marker_counts = create_node_markers_counts(raw)
node_marker_counts = create_node_markers_counts(self._raw)
df = pd.concat([coordinates, node_marker_counts], axis=1)
else:
df = coordinates
Expand Down
11 changes: 7 additions & 4 deletions src/pixelator/pixeldataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ def graph(
(
self.edgelist_lazy.filter(pl.col("component") == component_id)
.collect()
.to_pandas(use_pyarrow_extension_array=True)
.to_pandas()
)
)
if potential_component.empty:
Expand Down Expand Up @@ -839,9 +839,7 @@ def _all_true_array(shape):
else self.edgelist_lazy
)

edgelist = _enforce_edgelist_types(
edgelist_pred.collect().to_pandas(use_pyarrow_extension_array=True)
)
edgelist = _enforce_edgelist_types(edgelist_pred.collect().to_pandas())

if self.polarization is not None:
polarization_mask = (
Expand Down Expand Up @@ -1299,6 +1297,11 @@ def _enforce_edgelist_types(edgelist: pd.DataFrame) -> pd.DataFrame:
if edgelist.shape[0] == 0:
edgelist = pd.DataFrame(columns=required_types.keys())

# If we have the optional sample column, this should be
# set to use a categorical type
if "sample" in edgelist.columns:
required_types["sample"] = "category"

# If all of the prescribed types are already set, just return the edgelist
type_dict = edgelist.dtypes.to_dict()
if all(type_dict[key] == type_ for key, type_ in required_types.items()):
Expand Down
70 changes: 70 additions & 0 deletions tests/graph/test_graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,76 @@ def test_build_graph_a_node_projected(full_graph_edgelist: pd.DataFrame):
assert graph.vs.attributes() == ["name", "markers", "type", "pixel_type"]


def test_layout_coordinates_all_pixels(full_graph_edgelist: pd.DataFrame):
graph = Graph.from_edgelist(
edgelist=full_graph_edgelist,
add_marker_counts=True,
simplify=True,
use_full_bipartite=True,
)
result = graph.layout_coordinates(only_keep_a_pixels=False)
assert result.shape == (100, 4)
assert set(result.columns) == {"x", "y", "A", "B"}


def test_layout_coordinates_3d_layout(full_graph_edgelist: pd.DataFrame):
graph = Graph.from_edgelist(
edgelist=full_graph_edgelist,
add_marker_counts=True,
simplify=True,
use_full_bipartite=True,
)
result = graph.layout_coordinates(
layout_algorithm="fruchterman_reingold_3d", only_keep_a_pixels=False
)
assert set(result.columns) == {
"x",
"y",
"z",
"x_norm",
"y_norm",
"z_norm",
"A",
"B",
}
assert result.shape == (100, 8)


def test_layout_coordinates_only_a_pixels(full_graph_edgelist: pd.DataFrame):
graph = Graph.from_edgelist(
edgelist=full_graph_edgelist,
add_marker_counts=True,
simplify=True,
use_full_bipartite=True,
)
result = graph.layout_coordinates(only_keep_a_pixels=True)
assert result.shape == (50, 4)
assert set(result.columns) == {"x", "y", "A", "B"}


def test_layout_coordinates_3d_layout_only_a_pixels(full_graph_edgelist: pd.DataFrame):
graph = Graph.from_edgelist(
edgelist=full_graph_edgelist,
add_marker_counts=True,
simplify=True,
use_full_bipartite=True,
)
result = graph.layout_coordinates(
layout_algorithm="fruchterman_reingold_3d", only_keep_a_pixels=True
)
assert set(result.columns) == {
"x",
"y",
"z",
"x_norm",
"y_norm",
"z_norm",
"A",
"B",
}
assert result.shape == (50, 8)


def test_components_metrics(full_graph_edgelist: pd.DataFrame):
"""Test generating component metrics."""
# test component metrics
Expand Down
18 changes: 17 additions & 1 deletion tests/test_pixeldataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,6 @@ def test_edgelist_to_anndata(
assert set(adata.obs_names) == set(edgelist["component"].unique())


@pytest.mark.test_this
def test_simple_aggregate(setup_basic_pixel_dataset):
"""test_simple_aggregate."""
dataset_1, *_ = setup_basic_pixel_dataset
Expand Down Expand Up @@ -699,6 +698,23 @@ def test_simple_aggregate_ignore_edgelist(setup_basic_pixel_dataset):
assert result.edgelist.shape == (0, 9)


def test_filter_should_return_proper_typed_edgelist_data(setup_basic_pixel_dataset):
# Test to check for bug EXE-1177
# This bug was caused by filtering returning an incorrectly typed
# edgelist, which in turn caused getting the graph to fail
dataset_1, *_ = setup_basic_pixel_dataset
dataset_2 = dataset_1.copy()

aggregated_data = simple_aggregate(
sample_names=["sample1", "sample2"], datasets=[dataset_1, dataset_2]
)

result = aggregated_data.filter(components=aggregated_data.adata.obs.index[:2])
assert isinstance(result.edgelist["component"].dtype, pd.CategoricalDtype)
# Running graph here to make sure it does not raise an exception
result.graph(result.adata.obs.index[0])


def test_copy(setup_basic_pixel_dataset):
"""test_copy."""
dataset_1, *_ = setup_basic_pixel_dataset
Expand Down

0 comments on commit ce48f8e

Please sign in to comment.