Skip to content

Commit

Permalink
Merge pull request #16 from PixelgenTechnologies/feature/update-webre…
Browse files Browse the repository at this point in the history
…port

update webreport to v0.7.0
  • Loading branch information
fbdtemme authored Oct 5, 2023
2 parents 9b3201a + 6f0645b commit 0ef2956
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 133 deletions.
1 change: 1 addition & 0 deletions src/pixelator/graph/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def edgelist_metrics(edgelist: pd.DataFrame) -> Dict[str, Union[int, float]]:
metrics["frac_upib_upia"] = round(metrics["total_upib"] / metrics["total_upia"], 2)
metrics["markers"] = edgelist["marker"].nunique()
metrics["edges"] = edgelist.shape[0]
metrics["mean_count"] = round(edgelist["count"].mean(), 2)

# Please note that we need to use observed=True
# here upia is a categorical column, and since not
Expand Down
67 changes: 49 additions & 18 deletions src/pixelator/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ def graph_and_annotate_metrics(
"upia": data["total_upia"],
"upib": data["total_upib"],
"umi": data["total_umi"],
"mean_count": data["mean_count"],
"vertices": data["vertices"],
"edges": data["edges"],
cell_column: data["components"],
Expand Down Expand Up @@ -519,6 +520,7 @@ def _collect_metrics(adata: AnnData) -> Dict[str, float]:
metrics = {}
metrics["cells_filtered"] = adata.n_obs
metrics["total_markers"] = adata.n_vars
metrics["total_umis"] = adata.obs["umi"].sum()
metrics["total_reads_cell"] = adata.obs["reads"].sum()
metrics["median_reads_cell"] = adata.obs["reads"].median()
metrics["mean_reads_cell"] = adata.obs["reads"].mean()
Expand All @@ -536,18 +538,20 @@ def _collect_metrics(adata: AnnData) -> Dict[str, float]:
metrics["mean_markers_cell"] = adata.obs["antibodies"].mean()
metrics["upib_per_upia"] = adata.obs["upib"].sum() / adata.obs["upia"].sum()

metrics[
"reads_of_aggregates"
] = 0 # This metric needs to be initialized for the webreport
# This metric needs to be initialized for the webreport
metrics["reads_of_aggregates"] = 0
metrics["umis_of_aggregates"] = 0

# Tau type will only be available if it has been added in the annotate step
if "tau_type" in adata.obs:
metrics["number_of_aggregates"] = np.sum(adata.obs["tau_type"] != "normal")
metrics["fraction_of_aggregates"] = np.sum(
adata.obs["tau_type"] != "normal"
) / len(adata.obs["tau_type"])
metrics["reads_of_aggregates"] = (
adata[adata.obs["tau_type"] != "normal"].obs["reads"].sum()
aggregates_mask = adata.obs["tau_type"] != "normal"
number_of_aggregates = np.sum(aggregates_mask)
metrics["number_of_aggregates"] = number_of_aggregates
metrics["fraction_of_aggregates"] = number_of_aggregates / len(
adata.obs["tau_type"]
)
metrics["reads_of_aggregates"] = adata[aggregates_mask].obs["reads"].sum()
metrics["umis_of_aggregates"] = adata[aggregates_mask].obs["umi"].sum()

if "min_size_threshold" in adata.uns:
metrics["minimum_size_threshold"] = adata.uns["min_size_threshold"]
Expand Down Expand Up @@ -597,6 +601,7 @@ def create_dynamic_report(
summary_preqc: pd.Series,
summary_demux: pd.Series,
summary_collapse: pd.Series,
summary_graph: pd.Series,
summary_annotate: pd.Series,
summary_cell_calling: pd.Series,
info: SampleInfo,
Expand All @@ -616,6 +621,8 @@ def create_dynamic_report(
:param summary_demux: a pd.Series with the `demux` stage metrics of the sample
:param summary_collapse: a pd.Series with the `collapse` stage metrics
of the sample
:param summary_graph: a pd.Series with the `graph` stage metrics
of the sample
:param summary_annotate: a pd.Series with the `annotate` metrics of the sample
:param summary_cell_calling: a pd.Series with the per cell calling metrics
of the sample
Expand All @@ -631,7 +638,7 @@ def create_dynamic_report(
antibodies_data_values = {
"antibody_reads": summary_demux["output"],
"antibody_reads_usable_per_cell": summary_cell_calling["total_reads_cell"],
"antibody_reads_in_aggregates": summary_cell_calling["reads_of_aggregates"],
"antibody_reads_in_outliers": summary_cell_calling["reads_of_aggregates"],
"unrecognized_antibodies": summary_demux["input"] - summary_demux["output"],
}

Expand All @@ -642,8 +649,8 @@ def create_dynamic_report(
"antibody_reads_usable_per_cell"
]
/ summary_all["reads"],
"fraction_antibody_reads_in_aggregates": antibodies_data_values[
"antibody_reads_in_aggregates"
"fraction_antibody_reads_in_outliers": antibodies_data_values[
"antibody_reads_in_outliers"
]
/ summary_all["reads"],
"fraction_unrecognized_antibodies": antibodies_data_values[
Expand All @@ -652,23 +659,45 @@ def create_dynamic_report(
/ summary_all["reads"],
}

placeholder_cell_predictions = {
"predicted_cell_type_b_cells": None,
"fraction_predicted_cell_type_b_cells": None,
"predicted_cell_type_cd4p_cells": None,
"fraction_predicted_cell_type_cd4p_cells": None,
"predicted_cell_type_cd8p_cells": None,
"fraction_predicted_cell_type_cd8p_cells": None,
"predicted_cell_type_monocytes": None,
"fraction_predicted_cell_type_monocytes": None,
"predicted_cell_type_nk_cells": None,
"fraction_predicted_cell_type_nk_cells": None,
"predicted_cell_type_unknown": None,
"fraction_predicted_cell_type_unknown": None,
}

fraction_discarded_umis = round(
summary_cell_calling["umis_of_aggregates"] / summary_cell_calling["total_umis"],
2,
)

metrics = Metrics(
number_of_cells=summary_cell_calling["cells_filtered"],
average_reads_usable_per_cell=summary_cell_calling["mean_reads_cell"],
average_reads_per_cell=(
summary_all["reads"] / summary_cell_calling["cells_filtered"]
),
median_antibody_molecules_per_cell=summary_cell_calling["median_umi_cell"],
average_upis_per_cell=summary_cell_calling["mean_upia_cell"],
average_umis_per_upi=summary_cell_calling["mean_umi_upia_cell"],
fraction_reads_in_cells=summary_cell_calling["total_reads_cell"]
/ summary_all["reads"],
median_antibodies_per_cell=summary_cell_calling["median_markers_cell"],
total_antibodies_detected=summary_cell_calling["total_markers"],
average_upias_per_cell=summary_cell_calling["mean_upia_cell"],
average_umis_per_upia=summary_cell_calling["mean_umi_upia_cell"],
fraction_reads_in_cells=(
summary_cell_calling["total_reads_cell"] / summary_all["reads"]
),
fraction_discarded_umis=fraction_discarded_umis,
total_unique_antibodies_detected=summary_cell_calling["total_markers"],
number_of_reads=summary_all["reads"],
number_of_short_reads_skipped=summary_preqc["too_short_reads"],
fraction_valid_pbs=summary_all["adapterqc"] / summary_all["reads"],
fraction_valid_umis=summary_collapse["input"] / summary_all["reads"],
average_reads_per_molecule=summary_graph["mean_count"],
sequencing_saturation=summary_all["duplication"],
fraction_q30_bases_in_antibody_barcode=summary_amplicon["fraction_q30_barcode"],
fraction_q30_bases_in_umi=summary_amplicon["fraction_q30_umi"],
Expand All @@ -679,6 +708,7 @@ def create_dynamic_report(
fraction_q30_bases_in_read=summary_amplicon["fraction_q30"],
**antibodies_data_values, # type: ignore
**antibodies_data_fractions, # type: ignore
**placeholder_cell_predictions, # type: ignore
)

data = collect_report_data(input_path, sample_id)
Expand Down Expand Up @@ -876,6 +906,7 @@ def make_report(
summary_preqc=summary_preqc.loc[sample, :],
summary_demux=summary_demux.loc[sample, :],
summary_collapse=summary_collapse.loc[sample, :],
summary_graph=summary_graph.loc[sample, :],
summary_annotate=summary_annotate.loc[sample, :],
summary_cell_calling=summary_cell_calling.loc[sample, :],
info=sample_info,
Expand Down
61 changes: 21 additions & 40 deletions src/pixelator/report/webreport/builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""
"""Webreport builder.
The WebreportBuilder is used to inject all webreport data into the
template and write the final webreport to a file.
Expand Down Expand Up @@ -26,8 +27,7 @@


class WebreportBuilder:
"""
Build a webreport from a html template and the required data (CSV and JSON).
"""Build a webreport from a html template and the required data (CSV and JSON).
This will parse a webreport template and, using the write method, inject CSV
and JSON strings into the template using `html <script>` tags under the
Expand All @@ -45,11 +45,10 @@ class WebreportBuilder:
"""

_JSON_OPTIONS: ClassVar[Dict[str, Any]] = {"indent": None, "separators": (",", ":")}
VERSIONS_CONSTRAINTS: ClassVar[List[str]] = ["<0.7.0", ">=0.5.0"]
VERSIONS_CONSTRAINTS: ClassVar[List[str]] = ["<0.8.0", ">=0.7.0"]

def __init__(self, template: Union[str, Path] = DEFAULT_WEBREPORT_TEMPLATE):
"""
Construct a webreport builder given a html template.
"""Construct a webreport builder given a html template.
:param template: path to the webreport template
:raises FileNotFoundError: if the template file does not exist
Expand All @@ -61,11 +60,11 @@ def __init__(self, template: Union[str, Path] = DEFAULT_WEBREPORT_TEMPLATE):
self.template = Path(template)

def _load_template(self) -> Tuple[LxmlElement, LxmlElement]:
"""
Load and parse the webreport template.
"""Load and parse the webreport template.
:raises AssertionError: if no body tag is found in the template
:raises AssertionError: if the webreport template version is not supported
:rtype: Tuple[LxmlElement, LxmlElement]
"""
logger.debug("Loading web report template %s", self.template)

Expand Down Expand Up @@ -101,16 +100,15 @@ def _check_version_compatibility(self, version: str):
f"not satisfy constraint: {constraint}"
)

def write(
def write( # noqa: DOC502
self,
fp: BinaryIO,
sample_info: SampleInfo,
metrics: Metrics,
data: WebreportData,
metrics_definition_file: Optional[PathType] = None,
) -> None:
"""
Inject given data into the webreport and write the results to a stream.
"""Inject given data into the webreport and write the results to a stream.
:param fp: binary stream to write the report to
:param sample_info: Sample information
Expand Down Expand Up @@ -171,9 +169,7 @@ def write(
def _build_metric_definition_file_element(
self, metrics_definition_file: PathType, template_body: LxmlElement
) -> LxmlElement:
"""
Create a lxml HTML object to inject the metrics definition file.
"""
"""Create a lxml HTML object to inject the metrics definition file."""
template_body.cssselect('script[data-type="metric-definitions"]')
if len(template_body) > 0:
metrics_definition_file_el = template_body[0]
Expand All @@ -197,9 +193,7 @@ def _build_metric_definition_file_element(
def _build_sample_and_metrics_element(
self, sample_info: SampleInfo, metrics: Metrics
) -> LxmlElement:
"""
Create a lxml HTML object to inject the metrics and sample info.
"""
"""Create a lxml HTML object to inject the metrics and sample info."""
metrics_el = E.SCRIPT(
**{
"type": "application/octet-stream;base64",
Expand All @@ -217,8 +211,7 @@ def _build_sample_and_metrics_element(
return metrics_el

def _build_ranked_component_size_element(self, data: str) -> LxmlElement:
"""
Create a lxml HTML injecting the ranked component data.
"""Create a lxml HTML injecting the ranked component data.
This data is used for the rank plot and the component size vs marker
scatter plot.
Expand All @@ -234,9 +227,7 @@ def _build_ranked_component_size_element(self, data: str) -> LxmlElement:
return ranked_component_size_el

def _build_component_data_element(self, data: str) -> LxmlElement:
"""
Create a lxml HTML injecting the component data.
"""
"""Create a lxml HTML injecting the component data."""
component_data_el = E.SCRIPT(
**{
"type": "application/octet-stream;base64",
Expand All @@ -248,23 +239,19 @@ def _build_component_data_element(self, data: str) -> LxmlElement:
return component_data_el

def _build_antibodies_per_component_element(self, data: str) -> LxmlElement:
"""
Create a lxml HTML injecting the antibodies_per_cell data.
"""
"""Create a lxml HTML injecting the antibodies_per_cell data."""
antibodies_per_cell_el = E.SCRIPT(
**{
"type": "text/csv",
"data-type": "antibodies-per-cell",
"data-type": "antibodies-per-component",
"data-dataset-id": "0",
}
)
antibodies_per_cell_el.text = data
return antibodies_per_cell_el

def _build_sequencing_saturation_element(self, data: str):
"""
Create an HTML object injecting the sequencing saturation data.
"""
"""Create an HTML object injecting the sequencing saturation data."""
sequencing_saturation_el = E.SCRIPT(
**{
"type": "text/csv",
Expand All @@ -276,27 +263,23 @@ def _build_sequencing_saturation_element(self, data: str):
return sequencing_saturation_el

def _build_antibody_percentages_element(self, data: str) -> LxmlElement:
"""
Create a HTML object injecting the antibody counts data.
"""
"""Create an HTML object injecting the antibody counts data."""
antibody_counts_el = E.SCRIPT(
**{
"type": "text/csv",
"data-type": "antibody-counts",
"data-type": "antibody-percentages",
"data-dataset-id": "0",
}
)
antibody_counts_el.text = data
return antibody_counts_el

def _build_antibody_counts_element(self, data: str) -> LxmlElement:
"""
Create an HTML object injecting the antibody counts data.
"""
"""Create an HTML object injecting the antibody counts data."""
antibody_distribution_el = E.SCRIPT(
**{
"type": "application/octet-stream;base64",
"data-type": "antibody-distribution",
"data-type": "antibody-counts",
"data-dataset-id": "0",
}
)
Expand All @@ -305,7 +288,5 @@ def _build_antibody_counts_element(self, data: str) -> LxmlElement:

@staticmethod
def _compress_data(data: str):
"""
Compress the data using gzip and encode with base64.
"""
"""Compress the data using gzip and encode with base64."""
return base64.b64encode(gzip.compress(data.encode("utf-8")))
Loading

0 comments on commit 0ef2956

Please sign in to comment.