Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing full pipeline #44

Merged
merged 11 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions bin/drop_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def update_config(
genome: Path,
gtf: Path,
genome_assembly: str,
drop_group_samples: str,
padjcutoff: float,
zscorecutoff: float,
drop_module: str,
Expand All @@ -109,10 +110,12 @@ def update_config(
# Export counts
if drop_module == "AE":
config_copy["aberrantExpression"]["run"] = ["true"]
config_copy["aberrantExpression"]["groups"] = [drop_group_samples]
config_copy["aberrantExpression"]["padjCutoff"] = padjcutoff
config_copy["aberrantExpression"]["zScoreCutoff"] = zscorecutoff
elif drop_module == "AS":
config_copy["aberrantSplicing"]["run"] = ["true"]
config_copy["aberrantSplicing"]["groups"] = [drop_group_samples]
config_copy["aberrantSplicing"]["padjCutoff"] = padjcutoff
elif drop_module == "MAE":
config_copy["mae"]["run"] = ["true"]
Expand Down Expand Up @@ -156,6 +159,12 @@ def parse_args(argv=None):
choices=["hg19", "hs37d5", "hg38", "GRCh38"],
required=True,
)
parser.add_argument(
"--drop_group_samples",
type=str,
help="Specify drop group to analyse",
required=True,
)
parser.add_argument(
"--padjcutoff",
type=float,
Expand Down Expand Up @@ -190,6 +199,7 @@ def main():
genome=args.genome_fasta,
gtf=args.gtf,
genome_assembly=args.genome_assembly,
drop_group_samples=args.drop_group_samples,
padjcutoff=args.padjcutoff,
zscorecutoff=args.zscorecutoff,
drop_module=args.drop_module,
Expand Down
136 changes: 0 additions & 136 deletions bin/drop_counts.py

This file was deleted.

27 changes: 15 additions & 12 deletions bin/drop_sample_annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
import csv
from pandas import read_csv, DataFrame, concat
import os

SCRIPT_VERSION = "v1.0"
SAMPLE_ANNOTATION_COLUMNS = [
Expand All @@ -25,7 +26,7 @@


def write_sample_annotation_to_tsv(
bam: str, samples: str, strandedness: str, single_end: str, gtf: str, count_file: str, out_file: str
bam: str, samples: str, strandedness: str, single_end: str, drop_group_sample: str, out_file: str
):
"""Write the Sample Annotation tsv file."""
with open(out_file, "w") as tsv_file:
Expand All @@ -34,9 +35,9 @@ def write_sample_annotation_to_tsv(
for index, id in enumerate(samples):
sa_dict: dict = {}.fromkeys(SAMPLE_ANNOTATION_COLUMNS, "NA")
sa_dict["RNA_ID"] = id
sa_dict["DROP_GROUP"] = "outrider,fraser"
sa_dict["GENE_COUNTS_FILE"] = count_file
sa_dict["GENE_ANNOTATION"] = Path(gtf).stem
sa_dict["DROP_GROUP"] = drop_group_sample
sa_dict["GENE_COUNTS_FILE"] = "NA"
sa_dict["GENE_ANNOTATION"] = "NA"
sa_dict["STRAND"] = strandedness[index]
sa_dict["PAIRED_END"] = is_paired_end(single_end[index])
sa_dict["RNA_BAM_FILE"] = bam[index]
Expand All @@ -50,14 +51,17 @@ def is_paired_end(single_end: str) -> bool:
return False


def write_final_annot_to_tsv(count_file: str, ref_annot: str, out_file: str):
def write_final_annot_to_tsv(ref_count_file: str, ref_annot: str, out_file: str):
"""
Concatenates the Sample Annotation produced by SampleAnnotation with the one
provided for the reference samples, checking for duplicate sample IDs
"""
df_samples: DataFrame = read_csv("drop_annotation_given_samples.tsv", sep="\t")
df_reference: DataFrame = read_csv(ref_annot, sep="\t")
df_reference["GENE_COUNTS_FILE"] = count_file
df_reference["GENE_COUNTS_FILE"] = ref_count_file
df_reference["SPLICE_COUNTS_DIR"] = df_reference["SPLICE_COUNTS_DIR"].str.rstrip("/")
df_reference["SPLICE_COUNTS_DIR"] = df_reference["SPLICE_COUNTS_DIR"].apply(os.path.basename)
Lucpen marked this conversation as resolved.
Show resolved Hide resolved
df_reference["DROP_GROUP"] = df_reference["DROP_GROUP"].str.replace(" ", "")
df_samples["COUNT_OVERLAPS"] = df_reference["COUNT_OVERLAPS"].iloc[0]
df_samples["COUNT_MODE"] = df_reference["COUNT_MODE"].iloc[0]
df_samples["HPO_TERMS"] = df_reference["HPO_TERMS"].iloc[0]
Expand All @@ -74,15 +78,15 @@ def parse_args(argv=None):
formatter_class=argparse.MetavarTypeHelpFormatter,
description="""Generate DROP sample annotation for patients.""",
)
parser.add_argument("--bam", type=str, nargs="+", help="bam files for the patient", required=True)
parser.add_argument("--bam", type=str, nargs="+", help="bam files for the analyzed samples", required=True)
parser.add_argument("--samples", type=str, nargs="+", help="corresponding sample name", required=True)
parser.add_argument("--strandedness", type=str, nargs="+", help="strandedness of RNA", required=True)
parser.add_argument("--single_end", type=str, nargs="+", help="is the sample paired end?", required=True)
parser.add_argument("--gtf", type=str, help="Transcript annotation file in gtf format", required=True)
parser.add_argument(
"--count_file", type=str, help="A tsv file of gene counts for all processed samples.", required=True
"--ref_count_file", type=str, help="A tsv file of gene counts for reference samples.", required=True
)
parser.add_argument("--ref_annot", type=str, help="Path to reference annotation tsv", required=True)
parser.add_argument("--drop_group_sample", type=str, help="Drop group of analyzed samples", required=True)
parser.add_argument("--output", type=str, help="Path to save to", required=True)
parser.add_argument("--version", action="version", version=SCRIPT_VERSION)
return parser.parse_args(argv)
Expand All @@ -96,11 +100,10 @@ def main():
samples=args.samples,
strandedness=args.strandedness,
single_end=args.single_end,
gtf=args.gtf,
count_file=args.count_file,
drop_group_sample=args.drop_group_sample,
out_file="drop_annotation_given_samples.tsv",
)
write_final_annot_to_tsv(count_file=args.count_file, ref_annot=args.ref_annot, out_file=args.output)
write_final_annot_to_tsv(ref_count_file=args.ref_count_file, ref_annot=args.ref_annot, out_file=args.output)


if __name__ == "__main__":
Expand Down
20 changes: 6 additions & 14 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ process {

withName: '.*PREPARE_REFERENCES:GFFREAD' {
ext.when = { (!params.transcript_fasta) }
ext.args = { '-w' }
publishDir = [
path: { "${params.outdir}/references" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
Expand Down Expand Up @@ -316,15 +317,6 @@ process {
//

process {
withName: '.*ANALYSE_TRANSCRIPTS:DROP_COUNTS' {
ext.when = {params.run_drop_ae}
publishDir = [
path: { "${params.outdir}/analyse_transcripts/drop/AE" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

withName: '.*ANALYSE_TRANSCRIPTS:DROP_ANNOT' {
ext.when = {params.run_drop_ae|params.run_drop_as}
publishDir = [
Expand Down Expand Up @@ -475,11 +467,11 @@ process {
withName: '.*ANNOTATE_SNV:ENSEMBLVEP' {
ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_vep" }
Lucpen marked this conversation as resolved.
Show resolved Hide resolved
ext.args = [
'--dir_plugins vep_cache/Plugins',
'--plugin LoFtool,vep_cache/LoFtool_scores.txt',
'--plugin pLI,vep_cache/pLI_values_107.txt',
'--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
'--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS',
'--dir_plugins cache/Plugins',
'--plugin LoFtool,cache/Plugins/LoFtool_scores.txt',
'--plugin pLI,cache/Plugins/pLI_values_107.txt',
'--plugin SpliceAI,snv=cache/Plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=cache/Plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz',
'--plugin MaxEntScan,cache/Plugins/fordownload,SWA,NCSS',
Lucpen marked this conversation as resolved.
Show resolved Hide resolved
'--distance 5000',
'--buffer_size 20000',
'--format vcf --max_sv_size 248956422',
Expand Down
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ params {
star_two_pass_mode = 'None'
subsample_bed = "${projectDir}/test_data/subsample.bed"
seed_frac = 0.001
num_reads = 10000
num_reads = 20000

// VEP
vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz"
Expand Down
7 changes: 6 additions & 1 deletion modules/local/drop_config_runAE.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ process DROP_CONFIG_RUN_AE {
tuple val(meta), path(fasta), path(fai)
path gtf
path sample_annotation
path gene_counts
tuple path(bam), path(bai)
path ref_drop_count_file
path ref_splice_folder
val(genome)
val(drop_group_samples_ae)
val(drop_padjcutoff_ae)
val(drop_zScoreCutoff)

Expand All @@ -30,6 +33,7 @@ process DROP_CONFIG_RUN_AE {

script:
def genome_assembly = "${genome}".contains("h37") ? "hg19" : "${genome}"
def drop_group = "${drop_group_samples_ae}".replace(" ","")
def zscorecutoff = drop_zScoreCutoff ? "--zscorecutoff ${drop_zScoreCutoff}" : ''

"""
Expand All @@ -42,6 +46,7 @@ process DROP_CONFIG_RUN_AE {
--gtf ${gtf} \\
--drop_module AE \\
--genome_assembly $genome_assembly \\
--drop_group_samples $drop_group \\
--padjcutoff ${drop_padjcutoff_ae} \\
$zscorecutoff \\
--output config.yaml
Expand Down
4 changes: 4 additions & 0 deletions modules/local/drop_config_runAS.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ process DROP_CONFIG_RUN_AS {
path gtf
path sample_annotation
tuple path(bam), path(bai)
path ref_drop_count_file
path ref_splice_folder
val(genome)
val(drop_group_samples_as)
val(drop_padjcutoff_as)

output:
Expand All @@ -29,6 +31,7 @@ process DROP_CONFIG_RUN_AS {

script:
def genome_assembly = "${genome}".contains("h37") ? "hg19" : "${genome}"
def drop_group = "${drop_group_samples_as}".replace(" ","")
"""
TMPDIR=\$PWD

Expand All @@ -39,6 +42,7 @@ process DROP_CONFIG_RUN_AS {
--gtf ${gtf}\\
--drop_module AS \\
--genome_assembly $genome_assembly \\
--drop_group_samples $drop_group \\
--padjcutoff ${drop_padjcutoff_as} \\
--output config.yaml

Expand Down
Loading