Skip to content

Commit

Permalink
Merge pull request #17 from josenimo/main
Browse files Browse the repository at this point in the history
NatSorted samples, ignore hidden *.csv files, default upperbound marker, channel picker fixed
  • Loading branch information
josenimo authored Apr 17, 2024
2 parents d993927 + 6837dac commit f1e7c4c
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 46 deletions.
File renamed without changes.
24 changes: 22 additions & 2 deletions src/cell_gater/model/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,32 @@ class DataModel:
_lower_bound_marker: str | None = field(default=None, init=False)
_upper_bound_marker: str | None = field(default=None, init=False)
_markers: Sequence[str] = field(default_factory=list, init=False)
_markers_image_indices: Sequence[str] = field(default_factory=list, init=False)
_marker_filter: str = field(default="dna,dapi", init=True)
_validated: bool = field(default=False, init=True)

_active_marker: str | None = field(default=None, init=False)
_active_sample: str | None = field(default=None, init=False)
_active_y_axis: str | None = field(default=None, init=False)
_active_ref_marker: str | None = field(default=None, init=False)

_gates: pd.DataFrame = field(default_factory=pd.DataFrame, init=False)
_current_gate: float = field(default_factory=float, init=False)

@property
def active_ref_marker(self):
"""The reference marker for the gates."""
return self._active_ref_marker

@active_ref_marker.setter
def active_ref_marker(self, marker: str) -> None:
self._active_ref_marker = marker

@property
def gates(self):
"""The gates dataframe."""
return self._gates

@gates.setter
def gates(self, gates: pd.DataFrame) -> None:
self._gates = gates
Expand All @@ -45,7 +56,7 @@ def gates(self, gates: pd.DataFrame) -> None:
def current_gate(self) -> float:
"""The current gate value."""
return self._current_gate

@current_gate.setter
def current_gate(self, value: float) -> None:
self._current_gate = value
Expand Down Expand Up @@ -132,6 +143,15 @@ def markers(self):
"""The markers included for gating."""
return self._markers

@property
def markers_image_indices(self):
"""The markers included for gating."""
return self._markers_image_indices

@markers_image_indices.setter
def markers_image_indices(self, markers_image_indices: Sequence[str]) -> None:
self._markers_image_indices = markers_image_indices

@markers.setter
def markers(self, markers: Sequence[str]) -> None:
self._markers = markers
Expand Down
11 changes: 7 additions & 4 deletions src/cell_gater/utils/csv_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@ def stack_csv_files(csv_dir: Path) -> pd.DataFrame | None:
napari_notification(f"Loaded {len(csv_files)} regionprops csvs.")
df = pd.DataFrame()
for file in csv_files:
df_file = pd.read_csv(file)
df_file["sample_id"] = file.stem
df = pd.concat([df, df_file], ignore_index=True)
df["sample_id"] = df.sample_id.astype("category")
if not file.name.startswith('.'):
df_file = pd.read_csv(file)
df_file["sample_id"] = file.stem
df = pd.concat([df, df_file], ignore_index=True)
df["sample_id"] = df.sample_id.astype("category")
else:
print(f"Skipping file {file.name} as it is a hidden file.")

return df

Expand Down
32 changes: 19 additions & 13 deletions src/cell_gater/widgets/sample_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from cell_gater.utils.misc import napari_notification
from cell_gater.widgets.scatter_widget import ScatterInputWidget

#TODO still having problem with number of channels
# if user picks a marker that is on the fifth position of the df.columns, then there is a shift

class SampleWidget(QWidget):
"""Sample widget for loading required data."""

Expand Down Expand Up @@ -89,8 +92,8 @@ def __init__(self, viewer: Viewer, model: DataModel | None = None) -> None:
placeholderText="Prefixes separated by commas.",
)
self.filter_field.editingFinished.connect(self._update_filter)
self.layout().addWidget(filter_label, 3, 0, 1 ,2)
self.layout().addWidget(self.filter_field, 3, 3)
self.layout().addWidget(filter_label, 3, 0, 1 ,1)
self.layout().addWidget(self.filter_field, 3, 1, 1, 1)

# Button to start validating all the input
self.validate_button = QPushButton("Validate input")
Expand All @@ -99,11 +102,10 @@ def __init__(self, viewer: Viewer, model: DataModel | None = None) -> None:

self.model.events.regionprops_df.connect(self._set_dropdown_marker_lowerbound)
self.model.events.regionprops_df.connect(self._set_dropdown_marker_upperbound)

#set default bounds



def update_ref_channel(self):
"""Update the reference channel in the data model upon change of text in the reference channel column widget."""
self.model.ref_channel = self.ref_channel.currentText()

@property
def viewer(self) -> Viewer:
Expand Down Expand Up @@ -192,13 +194,13 @@ def _set_dropdown_marker_upperbound(self):
region_props = self.model.regionprops_df
if region_props is not None and len(region_props) > 0:
self.upper_bound_marker_col.addItems(region_props.columns)

#TODO set default to column before "X_centroid"
# This does not work
# if "X_centroid" in list(self.model.regionprops_df.columns):
# self.upper_bound_marker_col.setCurrentIndex(
# self.model.regionprops_df.columns.index("X_centroid")-1 )

#set default to the last column before X_centroid
if "X_centroid" in region_props.columns:
default_index = self.model.regionprops_df.columns.tolist().index("X_centroid")
if default_index != -1:
self.upper_bound_marker_col.setCurrentIndex(default_index-1)
else:
self.upper_bound_marker_col.setCurrentIndex(len(region_props.columns)-1)

def _update_model_lowerbound(self):
"""Update the lowerbound marker in the data model upon change of text in the lowerbound marker column widget."""
Expand Down Expand Up @@ -229,6 +231,7 @@ def _validate(self):
), "Number of images and segmentation masks do not match."

#TODO what happens when upperbound is before lowerbound?
#Should break and give error message

# First check whether there is a difference between the file names without extension and then assign as samples
image_paths_set = {i.stem if ".ome" not in i.stem else i.stem.rstrip(".ome") for i in self.model.image_paths}
Expand All @@ -252,6 +255,9 @@ def _validate(self):
marker_columns = column_ls[lowerbound_index : upperbound_index + 1]
self.model.markers = {marker: i for i, marker in enumerate(marker_columns)}
n_markers = len(self.model.markers)
# ASSUMPTION: markers start at index 1 and finish before X_centroid
markers = column_ls[1:column_ls.index("X_centroid")-1]
self.model.markers_image_indices = {marker: i for i, marker in enumerate(markers)}

for filter in self.model.marker_filter.split(","):
# Do this because changing length would cause errors when deleting in loop.
Expand Down
72 changes: 45 additions & 27 deletions src/cell_gater/widgets/scatter_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
logger.remove()
logger.add(sys.stdout, format="<green>{time:HH:mm:ss.SS}</green> | <level>{level}</level> | {message}")


#Good to have features
#TODO Dynamic loading of markers, without reloading masks or DNA channel, so deprecate Load Sample and Marker button

Expand Down Expand Up @@ -79,7 +78,7 @@ def __init__(self, model: DataModel, viewer: Viewer) -> None:
# Dropdown of samples once directory is loaded
selection_label = QLabel("Select sample:")
self.sample_selection_dropdown = QComboBox()
self.sample_selection_dropdown.addItems(self.model.samples)
self.sample_selection_dropdown.addItems(sorted(self.model.samples, key=self.natural_sort_key) )
self.sample_selection_dropdown.currentTextChanged.connect(self._on_sample_changed)

marker_label = QLabel("Marker label:")
Expand All @@ -96,27 +95,36 @@ def __init__(self, model: DataModel, viewer: Viewer) -> None:
self.choose_y_axis_dropdown.setCurrentText("Area")
self.choose_y_axis_dropdown.currentTextChanged.connect(self._on_y_axis_changed)

# Reference channel
DNA_to_show = QLabel("Select reference channel")
self.ref_channel_dropdown = QComboBox()
self.ref_channel_dropdown.addItems(self.model.markers_image_indices.keys())
self.ref_channel_dropdown.currentTextChanged.connect(self.update_ref_channel)

self.layout().addWidget(selection_label, 0, 0)
self.layout().addWidget(self.sample_selection_dropdown, 0, 1)
self.layout().addWidget(marker_label, 0, 2)
self.layout().addWidget(self.marker_selection_dropdown, 0, 3)
self.layout().addWidget(apply_button, 1, 0, 1, 4)
self.layout().addWidget(choose_y_axis_label, 2, 0, 1, 2)
self.layout().addWidget(self.choose_y_axis_dropdown, 2, 2, 1, 2)
self.layout().addWidget(choose_y_axis_label, 2, 0, 1, 1)
self.layout().addWidget(self.choose_y_axis_dropdown, 2, 1, 1, 1)
self.layout().addWidget(DNA_to_show, 2, 2, 1, 1)
self.layout().addWidget(self.ref_channel_dropdown, 2, 3, 1, 1)

# we have to do this because initially the dropdowns did not change texts yet so these variables are still None.
self.model.active_sample = self.sample_selection_dropdown.currentText()
self.model.active_marker = self.marker_selection_dropdown.currentText()
self.model.active_y_axis = self.choose_y_axis_dropdown.currentText()
self.model.active_ref_marker = self.ref_channel_dropdown.currentText()

self._read_data(self.model.active_sample)
self._load_layers(self.model.markers[self.model.active_marker])
self._load_layers(self.model.markers_image_indices[self.model.active_marker])

# scatter plot
self.scatter_canvas = PlotCanvas(self.model)
self.layout().addWidget(self.scatter_canvas.fig, 3, 0, 1, 4)

# slider
# slider
self.slider_figure = Figure(figsize=(5, 1))
self.slider_canvas = FigureCanvas(self.slider_figure)
self.slider_ax = self.slider_figure.add_subplot(111)
Expand All @@ -130,11 +138,11 @@ def __init__(self, model: DataModel, viewer: Viewer) -> None:

# Initialize gates dataframe
sample_marker_combinations = list(product(
self.model.regionprops_df['sample_id'].unique(),
self.model.regionprops_df["sample_id"].unique(),
self.model.markers
))
self.model.gates = pd.DataFrame(sample_marker_combinations, columns=['sample_id', 'marker_id'])
self.model.gates['gate_value'] = float(0)
self.model.gates = pd.DataFrame(sample_marker_combinations, columns=["sample_id", "marker_id"])
self.model.gates["gate_value"] = float(0)

# gate buttons
save_gate_button = QPushButton("Save Gate")
Expand All @@ -151,6 +159,10 @@ def __init__(self, model: DataModel, viewer: Viewer) -> None:


########################### FUNCTIONS ###########################

def update_ref_channel(self):
self.model.active_ref_marker = self.ref_channel_dropdown.currentText()
self._load_images_and_scatter_plot()

###################
### PLOT POINTS ###
Expand Down Expand Up @@ -187,14 +199,14 @@ def load_gates_dataframe(self):
file_path, _ = self._file_dialog()
if file_path:
self.model.gates = pd.read_csv(file_path)
self.model.gates['sample_id'] = self.model.gates['sample_id'].astype(str)
self.model.gates["sample_id"] = self.model.gates["sample_id"].astype(str)
# check if dataframe has samples and markers
assert 'sample_id' in self.model.gates.columns
assert 'marker_id' in self.model.gates.columns
assert 'gate_value' in self.model.gates.columns
assert "sample_id" in self.model.gates.columns
assert "marker_id" in self.model.gates.columns
assert "gate_value" in self.model.gates.columns
# check if dataframe has the same samples and markers as the regionprops_df
assert set(self.model.gates['sample_id'].unique()) == set(self.model.regionprops_df['sample_id'].unique())
assert set(self.model.gates['marker_id'].unique()) == set(self.model.markers)
assert set(self.model.gates["sample_id"].unique()) == set(self.model.regionprops_df["sample_id"].unique())
assert set(self.model.gates["marker_id"].unique()) == set(self.model.markers)

def save_gates_dataframe(self):
options = QFileDialog.Options()
Expand Down Expand Up @@ -234,7 +246,7 @@ def access_gate(self):
def get_min_max_median_step(self) -> tuple:
df = self.model.regionprops_df
df = df[df["sample_id"] == self.model.active_sample]
min = df[self.model.active_marker].min()
min = df[self.model.active_marker].min() + 1
max = df[self.model.active_marker].max()
init = df[self.model.active_marker].median()
step = min / 100
Expand Down Expand Up @@ -264,8 +276,10 @@ def update_plot(self):
def _load_images_and_scatter_plot(self):
self._clear_layers(clear_all=True)
self._read_data(self.model.active_sample)
self._load_layers(self.model.markers[self.model.active_marker])
logger.debug(f"loading index {self.model.markers[self.model.active_marker]}")
# active marker is a string
# markers is dict with marker_name_string:index (based on dropdowns)
self._load_layers(self.model.markers_image_indices[self.model.active_marker])
logger.debug(f"loading index {self.model.markers_image_indices[self.model.active_marker]}")
self.update_plot()
self.update_slider()

Expand All @@ -285,10 +299,9 @@ def _load_layers(self, marker_index):
# if self.model.active_sample != self._current_sample:
# self._current_sample = copy(self.model.active_sample)

#TODO let user decide which is their DNA channel
self.viewer.add_image(
self._image[0],
name="DNA_" + self.model.active_sample,
self._image[self.model.markers_image_indices[self.model.active_ref_marker]],
name="Reference" + self.model.active_sample,
blending="additive",
visible=False
)
Expand Down Expand Up @@ -337,7 +350,7 @@ def _set_samples_dropdown(self) -> None:
if len(self.model.samples) > 0:
self.sample_selection_dropdown.addItems([None])
self.sample_selection_dropdown.addItems(self.model.samples)

def _on_y_axis_changed(self):
"""Set active y-axis and update the scatter plot."""
self.model.active_y_axis = self.choose_y_axis_dropdown.currentText()
Expand All @@ -356,7 +369,12 @@ def _file_dialog(self):
"CSV Files (*.csv)",
options=options,
)


def natural_sort_key(self, s):
"""Key function for natural sorting."""
import re
return [int(text) if text.isdigit() else text.lower() for text in re.split(r"(\d+)", s)]

@property
def model(self) -> DataModel:
"""The dataclass model that stores information required for cell_gating."""
Expand Down Expand Up @@ -400,10 +418,10 @@ def model(self, model: DataModel) -> None:
def plot_scatter_plot(self, model: DataModel) -> None:
assert self.model.active_marker is not None
assert self.model.active_sample is not None

df = self.model.regionprops_df
df = df[df["sample_id"] == self.model.active_sample]

logger.debug(f"Plotting scatter plot for {self.model.active_sample} and {self.model.active_marker}.")

self.ax.scatter(
Expand All @@ -419,13 +437,13 @@ def plot_scatter_plot(self, model: DataModel) -> None:
self.ax.set_xlim(df[self.model.active_marker].min(), df[self.model.active_marker].max())
self.ax.set_ylabel(self.model.active_y_axis)
self.ax.set_xlabel(f"{self.model.active_marker} intensity")

logger.debug(f"The current gate is {self.model.current_gate}.")
if self.model.current_gate > 0.0:
self.ax.axvline(x=self.model.current_gate, color="red", linewidth=1.0, linestyle="--")
else:
self.ax.axvline(x=1, color="red", linewidth=1.0, linestyle="--")

def update_vertical_line(self, x_position):
"""Update the position of the vertical line."""
self.ax.lines[0].set_xdata(x_position)

0 comments on commit f1e7c4c

Please sign in to comment.