Skip to content

Commit

Permalink
Added additional Count-matrix reporting based on Kraken-classified reads
Browse files Browse the repository at this point in the history
  • Loading branch information
SaimMomin12 committed Oct 2, 2023
1 parent 51a5b28 commit 37d30d9
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 12 deletions.
7 changes: 5 additions & 2 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ samples = list(list_of_samples.Samples.unique())
include: "rules/download_samples_or_copy.smk"
include: "rules/kraken2_mapping.smk"
include: "rules/cellranger.smk"
include: "rules/kraken_processing.smk"
include: "rules/extract_bam.smk"
include: "rules/extract_tags.smk"
include: "rules/report.smk"
Expand All @@ -21,10 +22,12 @@ rule all:
expand("data/{sample}/{sample}_S1_L001_R2_001.fastq.gz", sample=samples),
expand("results/kraken2/{sample}.kraken",sample=samples),
expand("results/kraken2/{sample}.report.txt",sample=samples),
directory(expand("results/cellranger/{sample}/{sample}/",sample=samples)),
expand("results/cellranger/{sample}/{sample}/",sample=samples),
expand("results/kraken_reads/{sample}_kraken_reads.sam", sample=samples),
expand("results/cellranger/{sample}/possorted_genome_bam.bam", sample=samples),
expand("results/cellranger/{sample}/unmapped_reads.sam", sample=samples),
expand("results/count_matrix/{sample}/count_matrix.tsv", sample=samples),
expand("results/count_matrix/{sample}/kraken_reads_count_matrix.tsv",sample=samples),
"results/kraken_plots/Familywise_tax_readcounts.tsv",
"results/kraken_plots/Specieswise_tax_readcounts.tsv",
"results/kraken_plots/Clustermap_Familywise_log10.png",
Expand All @@ -34,4 +37,4 @@ onsuccess:
print("sc-VirusScan Pipeline finished successfully!")

onerror:
print("sc-VirusScan Pipeline has failed!")
print("sc-VirusScan Pipeline has failed!")
4 changes: 2 additions & 2 deletions env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ channels:
- defaults
dependencies:
- kraken2
- samtools
- samtools==1.12
- entrez-direct
- parallel-fastq-dump
- pandas
- snakemake
- seaborn
- pip:
- synapseclient
- synapseclient
3 changes: 1 addition & 2 deletions rules/download_samples_or_copy.smk
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ rule download_samples_or_copy:
mv data/{wildcards.sample}/*_R2_* data/{wildcards.sample}/{wildcards.sample}_S1_L001_R2_001.fastq.gz
else
mkdir tmp/
mkdir -p tmp/
parallel-fastq-dump --sra-id {wildcards.sample} --split-files --threads {threads} --outdir {params.outdir} --gzip --tmpdir tmp/
mv data/{wildcards.sample}/{wildcards.sample}_2.fastq.gz data/{wildcards.sample}/{wildcards.sample}_S1_L001_R1_001.fastq.gz
mv data/{wildcards.sample}/{wildcards.sample}_3.fastq.gz data/{wildcards.sample}/{wildcards.sample}_S1_L001_R2_001.fastq.gz
rm -rf tmp/
fi
"""
11 changes: 8 additions & 3 deletions rules/extract_tags.smk
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
rule extract_tags:
input:
"results/cellranger/{sample}/unmapped_reads.sam"
i1 = "results/cellranger/{sample}/unmapped_reads.sam",
i2 = "results/kraken_reads/{sample}_kraken_reads.sam"
output:
"results/count_matrix/{sample}/count_matrix.tsv"
o1 = "results/count_matrix/{sample}/count_matrix.tsv",
o2 = "results/count_matrix/{sample}/kraken_reads_count_matrix.tsv",
threads: 16
resources:
mem_mb = 40000
Expand All @@ -13,4 +15,7 @@ rule extract_tags:
log:
"results/logs/count_matrix/{sample}_bam_extract.log"
shell:
"python3 {config[scripts_dir]}/bam_extract.py -i {input} -k {params.p3} -b {params.p2} -o {params.p1}"
"""
python3 {config[scripts_dir]}/bam_extract.py -i {input.i1} -k {params.p3} -b {params.p2} -o {output.o1}
python3 {config[scripts_dir]}/bam_extract.py -i {input.i2} -k {params.p3} -b {params.p2} -o {output.o2}
"""
23 changes: 23 additions & 0 deletions rules/kraken_processing.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
rule kraken2_processing:
input:
i1 = "results/cellranger/{sample}/possorted_genome_bam.bam",
i2 = "results/kraken2/{sample}.kraken"
output:
o1 = "results/kraken_reads/{sample}_kraken_reads.sam",
priority: 90
resources:
mem_mb = 20000
threads: 16
params:
p1 = "results/kraken_reads/{sample}_ReadIds.txt",
p2 = "results/kraken_reads/{sample}_krakenReads.bam"
log:
"results/logs/kraken2/{sample}.kraken.log"
shell:
"""
mkdir -p results/kraken_reads
awk -F'\t' '$3 != "unclassified (taxid 0)" && $3 != "Homo sapiens (taxid 9606)" {{print $2}}' {input.i2} > {params.p1}
samtools view -N {params.p1} -o {params.p2} {input.i1}
samtools index {params.p2}
samtools view -h {params.p2} > {output.o1}
"""
4 changes: 2 additions & 2 deletions rules/report.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ rule kraken_reports:
o4 = "results/kraken_plots/Clustermap_Specieswise_log10.png"
priority: 10
resources:
mem_mb = 1000
mem_mb = 40000
params:
p1 = "results/kraken_plots/",
p2 = "results/kraken2/"
log:
"results/logs/plots/kraken_plots.log"
shell:
"""
python3 scripts/kraken_plot.py -i {params.p2} -o {params.p1}
python3 {config[scripts_dir]}/kraken_plot.py -i {params.p2} -o {params.p1}
"""
3 changes: 2 additions & 1 deletion scripts/bam_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
help="Filtered Barcodes TSV file from CellRanger")
parser.add_argument("-o", "--output-file",
metavar="PATH",
default="count_matrix.tsv",
help="Path to your output file")

args = parser.parse_args()
Expand Down Expand Up @@ -69,4 +70,4 @@
result = merged_barcodes.pivot_table(index='Tax_ID', columns='CR',
values='Read Name',
aggfunc='count').fillna(0.).astype(int)
result.to_csv(args.output_file + "count_matrix.tsv", sep='\t')
result.to_csv(args.output_file, sep='\t')

0 comments on commit 37d30d9

Please sign in to comment.