Merge pull request #16 from PixelgenTechnologies/feature/update-webre…

…port update webreport to v0.7.0
PixelgenTechnologies · Oct 5, 2023 · 0ef2956 · 0ef2956
2 parents 9b3201a + 6f0645b
commit 0ef2956
Show file tree

Hide file tree

Showing 10 changed files with 170 additions and 133 deletions.
diff --git a/src/pixelator/graph/utils.py b/src/pixelator/graph/utils.py
@@ -211,6 +211,7 @@ def edgelist_metrics(edgelist: pd.DataFrame) -> Dict[str, Union[int, float]]:
     metrics["frac_upib_upia"] = round(metrics["total_upib"] / metrics["total_upia"], 2)
     metrics["markers"] = edgelist["marker"].nunique()
     metrics["edges"] = edgelist.shape[0]
+    metrics["mean_count"] = round(edgelist["count"].mean(), 2)
 
     # Please note that we need to use observed=True
     # here upia is a categorical column, and since not

diff --git a/src/pixelator/report/__init__.py b/src/pixelator/report/__init__.py
@@ -450,6 +450,7 @@ def graph_and_annotate_metrics(
                     "upia": data["total_upia"],
                     "upib": data["total_upib"],
                     "umi": data["total_umi"],
+                    "mean_count": data["mean_count"],
                     "vertices": data["vertices"],
                     "edges": data["edges"],
                     cell_column: data["components"],
@@ -519,6 +520,7 @@ def _collect_metrics(adata: AnnData) -> Dict[str, float]:
         metrics = {}
         metrics["cells_filtered"] = adata.n_obs
         metrics["total_markers"] = adata.n_vars
+        metrics["total_umis"] = adata.obs["umi"].sum()
         metrics["total_reads_cell"] = adata.obs["reads"].sum()
         metrics["median_reads_cell"] = adata.obs["reads"].median()
         metrics["mean_reads_cell"] = adata.obs["reads"].mean()
@@ -536,18 +538,20 @@ def _collect_metrics(adata: AnnData) -> Dict[str, float]:
         metrics["mean_markers_cell"] = adata.obs["antibodies"].mean()
         metrics["upib_per_upia"] = adata.obs["upib"].sum() / adata.obs["upia"].sum()
 
-        metrics[
-            "reads_of_aggregates"
-        ] = 0  # This metric needs to be initialized for the webreport
+        # This metric needs to be initialized for the webreport
+        metrics["reads_of_aggregates"] = 0
+        metrics["umis_of_aggregates"] = 0
+
         # Tau type will only be available if it has been added in the annotate step
         if "tau_type" in adata.obs:
-            metrics["number_of_aggregates"] = np.sum(adata.obs["tau_type"] != "normal")
-            metrics["fraction_of_aggregates"] = np.sum(
-                adata.obs["tau_type"] != "normal"
-            ) / len(adata.obs["tau_type"])
-            metrics["reads_of_aggregates"] = (
-                adata[adata.obs["tau_type"] != "normal"].obs["reads"].sum()
+            aggregates_mask = adata.obs["tau_type"] != "normal"
+            number_of_aggregates = np.sum(aggregates_mask)
+            metrics["number_of_aggregates"] = number_of_aggregates
+            metrics["fraction_of_aggregates"] = number_of_aggregates / len(
+                adata.obs["tau_type"]
             )
+            metrics["reads_of_aggregates"] = adata[aggregates_mask].obs["reads"].sum()
+            metrics["umis_of_aggregates"] = adata[aggregates_mask].obs["umi"].sum()
 
         if "min_size_threshold" in adata.uns:
             metrics["minimum_size_threshold"] = adata.uns["min_size_threshold"]
@@ -597,6 +601,7 @@ def create_dynamic_report(
     summary_preqc: pd.Series,
     summary_demux: pd.Series,
     summary_collapse: pd.Series,
+    summary_graph: pd.Series,
     summary_annotate: pd.Series,
     summary_cell_calling: pd.Series,
     info: SampleInfo,
@@ -616,6 +621,8 @@ def create_dynamic_report(
     :param summary_demux: a pd.Series with the `demux` stage metrics of the sample
     :param summary_collapse: a pd.Series with the `collapse` stage metrics
         of the sample
+    :param summary_graph: a pd.Series with the `graph` stage metrics
+        of the sample
     :param summary_annotate: a pd.Series with the `annotate` metrics of the sample
     :param summary_cell_calling: a pd.Series with the per cell calling metrics
         of the sample
@@ -631,7 +638,7 @@ def create_dynamic_report(
     antibodies_data_values = {
         "antibody_reads": summary_demux["output"],
         "antibody_reads_usable_per_cell": summary_cell_calling["total_reads_cell"],
-        "antibody_reads_in_aggregates": summary_cell_calling["reads_of_aggregates"],
+        "antibody_reads_in_outliers": summary_cell_calling["reads_of_aggregates"],
         "unrecognized_antibodies": summary_demux["input"] - summary_demux["output"],
     }
 
@@ -642,8 +649,8 @@ def create_dynamic_report(
             "antibody_reads_usable_per_cell"
         ]
         / summary_all["reads"],
-        "fraction_antibody_reads_in_aggregates": antibodies_data_values[
-            "antibody_reads_in_aggregates"
+        "fraction_antibody_reads_in_outliers": antibodies_data_values[
+            "antibody_reads_in_outliers"
         ]
         / summary_all["reads"],
         "fraction_unrecognized_antibodies": antibodies_data_values[
@@ -652,23 +659,45 @@ def create_dynamic_report(
         / summary_all["reads"],
     }
 
+    placeholder_cell_predictions = {
+        "predicted_cell_type_b_cells": None,
+        "fraction_predicted_cell_type_b_cells": None,
+        "predicted_cell_type_cd4p_cells": None,
+        "fraction_predicted_cell_type_cd4p_cells": None,
+        "predicted_cell_type_cd8p_cells": None,
+        "fraction_predicted_cell_type_cd8p_cells": None,
+        "predicted_cell_type_monocytes": None,
+        "fraction_predicted_cell_type_monocytes": None,
+        "predicted_cell_type_nk_cells": None,
+        "fraction_predicted_cell_type_nk_cells": None,
+        "predicted_cell_type_unknown": None,
+        "fraction_predicted_cell_type_unknown": None,
+    }
+
+    fraction_discarded_umis = round(
+        summary_cell_calling["umis_of_aggregates"] / summary_cell_calling["total_umis"],
+        2,
+    )
+
     metrics = Metrics(
         number_of_cells=summary_cell_calling["cells_filtered"],
         average_reads_usable_per_cell=summary_cell_calling["mean_reads_cell"],
         average_reads_per_cell=(
             summary_all["reads"] / summary_cell_calling["cells_filtered"]
         ),
         median_antibody_molecules_per_cell=summary_cell_calling["median_umi_cell"],
-        average_upis_per_cell=summary_cell_calling["mean_upia_cell"],
-        average_umis_per_upi=summary_cell_calling["mean_umi_upia_cell"],
-        fraction_reads_in_cells=summary_cell_calling["total_reads_cell"]
-        / summary_all["reads"],
-        median_antibodies_per_cell=summary_cell_calling["median_markers_cell"],
-        total_antibodies_detected=summary_cell_calling["total_markers"],
+        average_upias_per_cell=summary_cell_calling["mean_upia_cell"],
+        average_umis_per_upia=summary_cell_calling["mean_umi_upia_cell"],
+        fraction_reads_in_cells=(
+            summary_cell_calling["total_reads_cell"] / summary_all["reads"]
+        ),
+        fraction_discarded_umis=fraction_discarded_umis,
+        total_unique_antibodies_detected=summary_cell_calling["total_markers"],
         number_of_reads=summary_all["reads"],
         number_of_short_reads_skipped=summary_preqc["too_short_reads"],
         fraction_valid_pbs=summary_all["adapterqc"] / summary_all["reads"],
         fraction_valid_umis=summary_collapse["input"] / summary_all["reads"],
+        average_reads_per_molecule=summary_graph["mean_count"],
         sequencing_saturation=summary_all["duplication"],
         fraction_q30_bases_in_antibody_barcode=summary_amplicon["fraction_q30_barcode"],
         fraction_q30_bases_in_umi=summary_amplicon["fraction_q30_umi"],
@@ -679,6 +708,7 @@ def create_dynamic_report(
         fraction_q30_bases_in_read=summary_amplicon["fraction_q30"],
         **antibodies_data_values,  # type: ignore
         **antibodies_data_fractions,  # type: ignore
+        **placeholder_cell_predictions,  # type: ignore
     )
 
     data = collect_report_data(input_path, sample_id)
@@ -876,6 +906,7 @@ def make_report(
             summary_preqc=summary_preqc.loc[sample, :],
             summary_demux=summary_demux.loc[sample, :],
             summary_collapse=summary_collapse.loc[sample, :],
+            summary_graph=summary_graph.loc[sample, :],
             summary_annotate=summary_annotate.loc[sample, :],
             summary_cell_calling=summary_cell_calling.loc[sample, :],
             info=sample_info,

diff --git a/src/pixelator/report/webreport/builder.py b/src/pixelator/report/webreport/builder.py
@@ -1,4 +1,5 @@
-"""
+"""Webreport builder.
+
 The WebreportBuilder is used to inject all webreport data into the
 template and write the final webreport to a file.
 
@@ -26,8 +27,7 @@
 
 
 class WebreportBuilder:
-    """
-    Build a webreport from a html template and the required data (CSV and JSON).
+    """Build a webreport from a html template and the required data (CSV and JSON).
 
     This will parse a webreport template and, using the write method, inject CSV
     and JSON strings into the template using `html <script>` tags under the
@@ -45,11 +45,10 @@ class WebreportBuilder:
     """
 
     _JSON_OPTIONS: ClassVar[Dict[str, Any]] = {"indent": None, "separators": (",", ":")}
-    VERSIONS_CONSTRAINTS: ClassVar[List[str]] = ["<0.7.0", ">=0.5.0"]
+    VERSIONS_CONSTRAINTS: ClassVar[List[str]] = ["<0.8.0", ">=0.7.0"]
 
     def __init__(self, template: Union[str, Path] = DEFAULT_WEBREPORT_TEMPLATE):
-        """
-        Construct a webreport builder given a html template.
+        """Construct a webreport builder given a html template.
 
         :param template: path to the webreport template
         :raises FileNotFoundError: if the template file does not exist
@@ -61,11 +60,11 @@ def __init__(self, template: Union[str, Path] = DEFAULT_WEBREPORT_TEMPLATE):
         self.template = Path(template)
 
     def _load_template(self) -> Tuple[LxmlElement, LxmlElement]:
-        """
-        Load and parse the webreport template.
+        """Load and parse the webreport template.
 
         :raises AssertionError: if no body tag is found in the template
         :raises AssertionError: if the webreport template version is not supported
+        :rtype: Tuple[LxmlElement, LxmlElement]
         """
         logger.debug("Loading web report template %s", self.template)
 
@@ -101,16 +100,15 @@ def _check_version_compatibility(self, version: str):
                     f"not satisfy constraint: {constraint}"
                 )
 
-    def write(
+    def write(  # noqa: DOC502
         self,
         fp: BinaryIO,
         sample_info: SampleInfo,
         metrics: Metrics,
         data: WebreportData,
         metrics_definition_file: Optional[PathType] = None,
     ) -> None:
-        """
-        Inject given data into the webreport and write the results to a stream.
+        """Inject given data into the webreport and write the results to a stream.
 
         :param fp: binary stream to write the report to
         :param sample_info: Sample information
@@ -171,9 +169,7 @@ def write(
     def _build_metric_definition_file_element(
         self, metrics_definition_file: PathType, template_body: LxmlElement
     ) -> LxmlElement:
-        """
-        Create a lxml HTML object to inject the metrics definition file.
-        """
+        """Create a lxml HTML object to inject the metrics definition file."""
         template_body.cssselect('script[data-type="metric-definitions"]')
         if len(template_body) > 0:
             metrics_definition_file_el = template_body[0]
@@ -197,9 +193,7 @@ def _build_metric_definition_file_element(
     def _build_sample_and_metrics_element(
         self, sample_info: SampleInfo, metrics: Metrics
     ) -> LxmlElement:
-        """
-        Create a lxml HTML object to inject the metrics and sample info.
-        """
+        """Create a lxml HTML object to inject the metrics and sample info."""
         metrics_el = E.SCRIPT(
             **{
                 "type": "application/octet-stream;base64",
@@ -217,8 +211,7 @@ def _build_sample_and_metrics_element(
         return metrics_el
 
     def _build_ranked_component_size_element(self, data: str) -> LxmlElement:
-        """
-        Create a lxml HTML injecting the ranked component data.
+        """Create a lxml HTML injecting the ranked component data.
 
         This data is used for the rank plot and the component size vs marker
         scatter plot.
@@ -234,9 +227,7 @@ def _build_ranked_component_size_element(self, data: str) -> LxmlElement:
         return ranked_component_size_el
 
     def _build_component_data_element(self, data: str) -> LxmlElement:
-        """
-        Create a lxml HTML injecting the component data.
-        """
+        """Create a lxml HTML injecting the component data."""
         component_data_el = E.SCRIPT(
             **{
                 "type": "application/octet-stream;base64",
@@ -248,23 +239,19 @@ def _build_component_data_element(self, data: str) -> LxmlElement:
         return component_data_el
 
     def _build_antibodies_per_component_element(self, data: str) -> LxmlElement:
-        """
-        Create a lxml HTML injecting the antibodies_per_cell data.
-        """
+        """Create a lxml HTML injecting the antibodies_per_cell data."""
         antibodies_per_cell_el = E.SCRIPT(
             **{
                 "type": "text/csv",
-                "data-type": "antibodies-per-cell",
+                "data-type": "antibodies-per-component",
                 "data-dataset-id": "0",
             }
         )
         antibodies_per_cell_el.text = data
         return antibodies_per_cell_el
 
     def _build_sequencing_saturation_element(self, data: str):
-        """
-        Create an HTML object injecting the sequencing saturation data.
-        """
+        """Create an HTML object injecting the sequencing saturation data."""
         sequencing_saturation_el = E.SCRIPT(
             **{
                 "type": "text/csv",
@@ -276,27 +263,23 @@ def _build_sequencing_saturation_element(self, data: str):
         return sequencing_saturation_el
 
     def _build_antibody_percentages_element(self, data: str) -> LxmlElement:
-        """
-        Create a HTML object injecting the antibody counts data.
-        """
+        """Create an HTML object injecting the antibody counts data."""
         antibody_counts_el = E.SCRIPT(
             **{
                 "type": "text/csv",
-                "data-type": "antibody-counts",
+                "data-type": "antibody-percentages",
                 "data-dataset-id": "0",
             }
         )
         antibody_counts_el.text = data
         return antibody_counts_el
 
     def _build_antibody_counts_element(self, data: str) -> LxmlElement:
-        """
-        Create an HTML object injecting the antibody counts data.
-        """
+        """Create an HTML object injecting the antibody counts data."""
         antibody_distribution_el = E.SCRIPT(
             **{
                 "type": "application/octet-stream;base64",
-                "data-type": "antibody-distribution",
+                "data-type": "antibody-counts",
                 "data-dataset-id": "0",
             }
         )
@@ -305,7 +288,5 @@ def _build_antibody_counts_element(self, data: str) -> LxmlElement:
 
     @staticmethod
     def _compress_data(data: str):
-        """
-        Compress the data using gzip and encode with base64.
-        """
+        """Compress the data using gzip and encode with base64."""
         return base64.b64encode(gzip.compress(data.encode("utf-8")))