Merge pull request #61 from genomic-medicine-sweden/solve_some_issues

Solve some issues
genomic-medicine-sweden · Dec 12, 2023 · 707b6d8 · 707b6d8
2 parents bccf72d + 00bab64
commit 707b6d8
Show file tree

Hide file tree

Showing 22 changed files with 795 additions and 571 deletions.
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -7,6 +7,7 @@ lint:
     - docs/images/tomte_logo.eps
     - docs/images/tomte_pipeline_metromap.eps
     - docs/images/tomte_pipeline_metromap.png
+    - conf/modules.config
   files_unchanged: false
     - assets/sendmail_template.txt
     - .github/CONTRIBUTING.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,14 +3,42 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## 1.0.0 - Nisse [2023-11-06]
+## 1.1.0 - Rudolph [xxxx-xx-xx]
 
 Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template.
 
 ### `Added`
 
+- switch_vep, switch_build_tracks and switch_stringtie to make the pipeline more versatile
+
 ### `Fixed`
 
+- Renamed the other switches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch\* (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as)
+- Separated modules.config into smaller configs
+
 ### `Dependencies`
 
 ### `Deprecated`
+
+## 1.0.0 - Nisse [2023-11-06]
+
+### `Added`
+
+- Trim reads with FASTP
+- Read mapping with STAR
+- Transcript quantification with Salmon
+- Output junction tracks
+- Output bigwig
+- Choice to subsample overrepresented regions with Samtools
+- Choice to downsample number of reads with Samtools
+- Detection of aberrant expression with DROP
+- Detection of aberrant splicing with DROP
+- Filter aberrant expression and aberrant splicing results
+- Guided transcript assembly with StringTie
+- Filtering results of guided transcript assembly with GffCompare
+- SNVs calling with GATK or BCFtools Mpileups
+- Allele Specific Read Counter with ASEReadCounter
+- Assess allelic imbalance with BootstrapAnn
+- Annotation with VEP
+- Alignment QC with Picard CollectRnaSeqMetrics
+- Present QCs with MultiQC
diff --git a/bin/drop_filter_results.py b/bin/drop_filter_results.py
@@ -31,8 +31,12 @@ def annotate_with_hgnc(df_family_aberrant_expression_top_hits: DataFrame, out_dr
     return df_genes.merge(df_family_aberrant_expression_top_hits, left_on="gene_id", right_on="geneID")
 
 
-def filter_by_gene_panel(df_family_top_hits: DataFrame, gene_panel: str, module_name: str) -> DataFrame:
+def filter_by_gene_panel(
+    df_family_top_hits: DataFrame, gene_panel: str, module_name: str, case_id: str, output_file_subfix: str
+) -> DataFrame:
     """Filter out from results any gene that is not present in the provided gene panel."""
+    if case_id != "":
+        case_id += "_"
     if gene_panel != "None":
         df_panel: DataFrame = read_csv(
             gene_panel, sep="\t", names=GENE_PANEL_HEADER, header=None, comment="#", index_col=False
@@ -42,12 +46,17 @@ def filter_by_gene_panel(df_family_top_hits: DataFrame, gene_panel: str, module_
             df_family_top_hits, left_on="hgnc_symbol", right_on="hgncSymbol"
         )
         df_clinical = df_clinical.drop(columns=["hgnc_symbol"])
-        file_name = f"{module_name}_provided_samples_top_hits_filtered.tsv"
+        file_name = f"{case_id}{module_name}_{output_file_subfix}.tsv"
         df_clinical.to_csv(file_name, sep="\t", index=False, header=True)
 
 
 def filter_outrider_results(
-    samples: list, gene_panel: str, out_drop_aberrant_expression_rds: str, out_drop_gene_name: str
+    samples: list,
+    gene_panel: str,
+    out_drop_aberrant_expression_rds: str,
+    out_drop_gene_name: str,
+    case_id: str,
+    output_file_subfix_ae: str,
 ):
     """
     Filter results to get only those from the sample(s) provided.
@@ -79,10 +88,14 @@ def filter_outrider_results(
     df_family_annotated_aberrant_expression_top_hits.to_csv(
         "OUTRIDER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True
     )
-    filter_by_gene_panel(df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER")
+    filter_by_gene_panel(
+        df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER", case_id, output_file_subfix_ae
+    )
 
 
-def filter_fraser_result(samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str):
+def filter_fraser_result(
+    samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str, case_id: str, output_file_subfix_as: str
+):
     """
     Filter results to get only those from the sample(s) provided.
     Two tsvs will be outputed:
@@ -97,7 +110,7 @@ def filter_fraser_result(samples: list, gene_panel: str, out_drop_aberrant_splic
     df_results_family_aberrant_splicing.to_csv(
         "FRASER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True
     )
-    filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER")
+    filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER", case_id, output_file_subfix_as)
 
 
 def parse_args(argv=None):
@@ -134,13 +147,34 @@ def parse_args(argv=None):
         help="Path to gene name annotion, output from DROP Aberrant Expression",
         required=False,
     )
+    parser.add_argument(
+        "--case_id",
+        type=str,
+        default="",
+        help="Case id",
+        required=False,
+    )
+    parser.add_argument(
+        "--output_file_subfix_ae",
+        type=str,
+        default="provided_samples_top_hits_filtered",
+        help="Subfix of Aberrant Expression output file",
+        required=False,
+    )
     parser.add_argument(
         "--out_drop_as_tsv",
         type=str,
         default="None",
         help="Path to tsv output from DROP Aberrant Splicing",
         required=False,
     )
+    parser.add_argument(
+        "--output_file_subfix_as",
+        type=str,
+        default="provided_samples_top_hits_filtered",
+        help="Subfix of Aberrant Splicing output file",
+        required=False,
+    )
     parser.add_argument(
         "--version",
         action="version",
@@ -157,9 +191,15 @@ def main():
         gene_panel=args.gene_panel,
         out_drop_aberrant_expression_rds=args.drop_ae_rds,
         out_drop_gene_name=args.out_drop_gene_name,
+        case_id=args.case_id,
+        output_file_subfix_ae=args.output_file_subfix_ae,
     )
     filter_fraser_result(
-        samples=args.samples, gene_panel=args.gene_panel, out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv
+        samples=args.samples,
+        gene_panel=args.gene_panel,
+        out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv,
+        case_id=args.case_id,
+        output_file_subfix_as=args.output_file_subfix_as,
     )