From 1d3ed88328f208909a420511bdf7021bef063b5d Mon Sep 17 00:00:00 2001 From: mult1fractal Date: Thu, 11 Jan 2024 10:37:20 +0100 Subject: [PATCH] updated sourmash commands to sourmash 4.5 + fixed sourmash output file parsing --- configs/container.config | 2 +- .../sourmash_for_tax.nf | 2 +- workflows/process/sourmash/sourmash.nf | 20 ++++++++++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/configs/container.config b/configs/container.config index 8c0367f..da22635 100644 --- a/configs/container.config +++ b/configs/container.config @@ -13,7 +13,7 @@ process { withLabel: python { container = 'nanozoo/template:3.8--ccd0653' } withLabel: samtools { container = 'nanozoo/samtools:1.9--76b9270' } withLabel: seqkit { container = 'nanozoo/seqkit:0.13.2--cd66104' ; containerOptions = '--shm-size 2g' } - withLabel: sourmash { container = 'nanozoo/sourmash:4.5.0--e12a57a' } + withLabel: sourmash { container = 'nanozoo/sourmash:4.5.0--e12a57a' } withLabel: ubuntu { container = 'nanozoo/basics:1.0--962b907' } withLabel: upsetr { container = 'nanozoo/upsetr:1.4.0--0ea25b3' } withLabel: vibrant { container = 'multifractal/vibrant:0.5' } diff --git a/workflows/process/phage_tax_classification/sourmash_for_tax.nf b/workflows/process/phage_tax_classification/sourmash_for_tax.nf index f34fc58..4263112 100644 --- a/workflows/process/phage_tax_classification/sourmash_for_tax.nf +++ b/workflows/process/phage_tax_classification/sourmash_for_tax.nf @@ -10,7 +10,7 @@ process sourmash_for_tax { shell: """ for fastafile in ${fasta_dir}/*.fa; do - sourmash compute -p ${task.cpus} --scaled 100 -k 21 \${fastafile} + sourmash sketch dna -p k=21,scaled=100 \${fastafile} done for signature in *.sig; do diff --git a/workflows/process/sourmash/sourmash.nf b/workflows/process/sourmash/sourmash.nf index c466e0f..0266624 100644 --- a/workflows/process/sourmash/sourmash.nf +++ b/workflows/process/sourmash/sourmash.nf @@ -9,7 +9,7 @@ process sourmash { script: """ for fastafile in ${fasta_dir}/*.fa; do - sourmash compute -p ${task.cpus} --scaled 100 -k 21 \${fastafile} + sourmash sketch dna -p k=21,scaled=100 \${fastafile} done for signature in *.sig; do @@ -19,9 +19,9 @@ process sourmash { touch ${name}_\${PWD##*/}.list for tempfile in *.temporary; do - value=\$(grep -v "similarity,name,filename,md5" \${tempfile} | wc -l) # filtering criteria + value=\$(grep -v "similarity,md5,filename,name,query_filename,query_name,query_md5,ani" \${tempfile} | wc -l) # filtering criteria filename=\$(basename \${tempfile} .fa.sig.temporary) - prediction_value=\$(grep -v "similarity,name,filename,md5" \${tempfile} |sort -r -k1 | awk 'NR == 1' | cut -d "," -f1 ) + prediction_value=\$(grep -v "similarity,md5,filename,name,query_filename,query_name,query_md5,ani" \${tempfile} |sort -r -k1 | awk 'NR == 1' | cut -d "," -f1 ) if [ \$value -gt 0 ] then echo "\$filename,\$prediction_value" >> ${name}_\${PWD##*/}.list @@ -30,11 +30,21 @@ process sourmash { """ stub: """ - echo "similarity,name,filename,md5" > ${name}_\${PWD##*/}.list + echo "similarity,md5,filename,name,query_filename,query_name,query_md5,ani" > ${name}_\${PWD##*/}.list echo "pos_phage_1,1.0" >> ${name}_\${PWD##*/}.list """ } /* filtering criteria is at line 24 (awk part) with a current similiarity of 0.5 or higher to known phages -*/ \ No newline at end of file +*/ + +// for tempfile in *.temporary; do +// value=$(grep -v "similarity,name,filename,md5" ${tempfile} | wc -l) # filtering criteria +// filename=$(basename ${tempfile} .fa.sig.temporary) +// prediction_value=\$(grep -v "similarity,name,filename,md5" \${tempfile} |sort -r -k1 | awk 'NR == 1' | cut -d "," -f1 ) + +// if [ \$value -gt 0 ] +// then echo "\$filename,\$prediction_value" >> ${name}_\${PWD##*/}.list +// fi +// done \ No newline at end of file