Skip to content

Commit

Permalink
Merge pull request #61 from genomic-medicine-sweden/solve_some_issues
Browse files Browse the repository at this point in the history
Solve some issues
  • Loading branch information
Lucpen authored Dec 12, 2023
2 parents bccf72d + 00bab64 commit 707b6d8
Show file tree
Hide file tree
Showing 22 changed files with 795 additions and 571 deletions.
1 change: 1 addition & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ lint:
- docs/images/tomte_logo.eps
- docs/images/tomte_pipeline_metromap.eps
- docs/images/tomte_pipeline_metromap.png
- conf/modules.config
files_unchanged: false
- assets/sendmail_template.txt
- .github/CONTRIBUTING.md
Expand Down
30 changes: 29 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,42 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 1.0.0 - Nisse [2023-11-06]
## 1.1.0 - Rudolph [xxxx-xx-xx]

Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template.

### `Added`

- switch_vep, switch_build_tracks and switch_stringtie to make the pipeline more versatile

### `Fixed`

- Renamed the other switches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch\* (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as)
- Separated modules.config into smaller configs

### `Dependencies`

### `Deprecated`

## 1.0.0 - Nisse [2023-11-06]

### `Added`

- Trim reads with FASTP
- Read mapping with STAR
- Transcript quantification with Salmon
- Output junction tracks
- Output bigwig
- Choice to subsample overrepresented regions with Samtools
- Choice to downsample number of reads with Samtools
- Detection of aberrant expression with DROP
- Detection of aberrant splicing with DROP
- Filter aberrant expression and aberrant splicing results
- Guided transcript assembly with StringTie
- Filtering results of guided transcript assembly with GffCompare
- SNVs calling with GATK or BCFtools Mpileups
- Allele Specific Read Counter with ASEReadCounter
- Assess allelic imbalance with BootstrapAnn
- Annotation with VEP
- Alignment QC with Picard CollectRnaSeqMetrics
- Present QCs with MultiQC
54 changes: 47 additions & 7 deletions bin/drop_filter_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,12 @@ def annotate_with_hgnc(df_family_aberrant_expression_top_hits: DataFrame, out_dr
return df_genes.merge(df_family_aberrant_expression_top_hits, left_on="gene_id", right_on="geneID")


def filter_by_gene_panel(df_family_top_hits: DataFrame, gene_panel: str, module_name: str) -> DataFrame:
def filter_by_gene_panel(
df_family_top_hits: DataFrame, gene_panel: str, module_name: str, case_id: str, output_file_subfix: str
) -> DataFrame:
"""Filter out from results any gene that is not present in the provided gene panel."""
if case_id != "":
case_id += "_"
if gene_panel != "None":
df_panel: DataFrame = read_csv(
gene_panel, sep="\t", names=GENE_PANEL_HEADER, header=None, comment="#", index_col=False
Expand All @@ -42,12 +46,17 @@ def filter_by_gene_panel(df_family_top_hits: DataFrame, gene_panel: str, module_
df_family_top_hits, left_on="hgnc_symbol", right_on="hgncSymbol"
)
df_clinical = df_clinical.drop(columns=["hgnc_symbol"])
file_name = f"{module_name}_provided_samples_top_hits_filtered.tsv"
file_name = f"{case_id}{module_name}_{output_file_subfix}.tsv"
df_clinical.to_csv(file_name, sep="\t", index=False, header=True)


def filter_outrider_results(
samples: list, gene_panel: str, out_drop_aberrant_expression_rds: str, out_drop_gene_name: str
samples: list,
gene_panel: str,
out_drop_aberrant_expression_rds: str,
out_drop_gene_name: str,
case_id: str,
output_file_subfix_ae: str,
):
"""
Filter results to get only those from the sample(s) provided.
Expand Down Expand Up @@ -79,10 +88,14 @@ def filter_outrider_results(
df_family_annotated_aberrant_expression_top_hits.to_csv(
"OUTRIDER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True
)
filter_by_gene_panel(df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER")
filter_by_gene_panel(
df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER", case_id, output_file_subfix_ae
)


def filter_fraser_result(samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str):
def filter_fraser_result(
samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str, case_id: str, output_file_subfix_as: str
):
"""
Filter results to get only those from the sample(s) provided.
Two tsvs will be outputed:
Expand All @@ -97,7 +110,7 @@ def filter_fraser_result(samples: list, gene_panel: str, out_drop_aberrant_splic
df_results_family_aberrant_splicing.to_csv(
"FRASER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True
)
filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER")
filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER", case_id, output_file_subfix_as)


def parse_args(argv=None):
Expand Down Expand Up @@ -134,13 +147,34 @@ def parse_args(argv=None):
help="Path to gene name annotion, output from DROP Aberrant Expression",
required=False,
)
parser.add_argument(
"--case_id",
type=str,
default="",
help="Case id",
required=False,
)
parser.add_argument(
"--output_file_subfix_ae",
type=str,
default="provided_samples_top_hits_filtered",
help="Subfix of Aberrant Expression output file",
required=False,
)
parser.add_argument(
"--out_drop_as_tsv",
type=str,
default="None",
help="Path to tsv output from DROP Aberrant Splicing",
required=False,
)
parser.add_argument(
"--output_file_subfix_as",
type=str,
default="provided_samples_top_hits_filtered",
help="Subfix of Aberrant Splicing output file",
required=False,
)
parser.add_argument(
"--version",
action="version",
Expand All @@ -157,9 +191,15 @@ def main():
gene_panel=args.gene_panel,
out_drop_aberrant_expression_rds=args.drop_ae_rds,
out_drop_gene_name=args.out_drop_gene_name,
case_id=args.case_id,
output_file_subfix_ae=args.output_file_subfix_ae,
)
filter_fraser_result(
samples=args.samples, gene_panel=args.gene_panel, out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv
samples=args.samples,
gene_panel=args.gene_panel,
out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv,
case_id=args.case_id,
output_file_subfix_as=args.output_file_subfix_as,
)


Expand Down
Loading

0 comments on commit 707b6d8

Please sign in to comment.