Skip to content

Commit

Permalink
updated sourmash commands to sourmash 4.5 + fixed sourmash output fil…
Browse files Browse the repository at this point in the history
…e parsing
  • Loading branch information
mult1fractal committed Jan 11, 2024
1 parent 31c2416 commit 1d3ed88
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 7 deletions.
2 changes: 1 addition & 1 deletion configs/container.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ process {
withLabel: python { container = 'nanozoo/template:3.8--ccd0653' }
withLabel: samtools { container = 'nanozoo/samtools:1.9--76b9270' }
withLabel: seqkit { container = 'nanozoo/seqkit:0.13.2--cd66104' ; containerOptions = '--shm-size 2g' }
withLabel: sourmash { container = 'nanozoo/sourmash:4.5.0--e12a57a' }
withLabel: sourmash { container = 'nanozoo/sourmash:4.5.0--e12a57a' }
withLabel: ubuntu { container = 'nanozoo/basics:1.0--962b907' }
withLabel: upsetr { container = 'nanozoo/upsetr:1.4.0--0ea25b3' }
withLabel: vibrant { container = 'multifractal/vibrant:0.5' }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ process sourmash_for_tax {
shell:
"""
for fastafile in ${fasta_dir}/*.fa; do
sourmash compute -p ${task.cpus} --scaled 100 -k 21 \${fastafile}
sourmash sketch dna -p k=21,scaled=100 \${fastafile}
done
for signature in *.sig; do
Expand Down
20 changes: 15 additions & 5 deletions workflows/process/sourmash/sourmash.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process sourmash {
script:
"""
for fastafile in ${fasta_dir}/*.fa; do
sourmash compute -p ${task.cpus} --scaled 100 -k 21 \${fastafile}
sourmash sketch dna -p k=21,scaled=100 \${fastafile}
done
for signature in *.sig; do
Expand All @@ -19,9 +19,9 @@ process sourmash {
touch ${name}_\${PWD##*/}.list
for tempfile in *.temporary; do
value=\$(grep -v "similarity,name,filename,md5" \${tempfile} | wc -l) # filtering criteria
value=\$(grep -v "similarity,md5,filename,name,query_filename,query_name,query_md5,ani" \${tempfile} | wc -l) # filtering criteria
filename=\$(basename \${tempfile} .fa.sig.temporary)
prediction_value=\$(grep -v "similarity,name,filename,md5" \${tempfile} |sort -r -k1 | awk 'NR == 1' | cut -d "," -f1 )
prediction_value=\$(grep -v "similarity,md5,filename,name,query_filename,query_name,query_md5,ani" \${tempfile} |sort -r -k1 | awk 'NR == 1' | cut -d "," -f1 )
if [ \$value -gt 0 ]
then echo "\$filename,\$prediction_value" >> ${name}_\${PWD##*/}.list
Expand All @@ -30,11 +30,21 @@ process sourmash {
"""
stub:
"""
echo "similarity,name,filename,md5" > ${name}_\${PWD##*/}.list
echo "similarity,md5,filename,name,query_filename,query_name,query_md5,ani" > ${name}_\${PWD##*/}.list
echo "pos_phage_1,1.0" >> ${name}_\${PWD##*/}.list
"""
}

/*
filtering criteria is at line 24 (awk part) with a current similiarity of 0.5 or higher to known phages
*/
*/

// for tempfile in *.temporary; do
// value=$(grep -v "similarity,name,filename,md5" ${tempfile} | wc -l) # filtering criteria
// filename=$(basename ${tempfile} .fa.sig.temporary)
// prediction_value=\$(grep -v "similarity,name,filename,md5" \${tempfile} |sort -r -k1 | awk 'NR == 1' | cut -d "," -f1 )

// if [ \$value -gt 0 ]
// then echo "\$filename,\$prediction_value" >> ${name}_\${PWD##*/}.list
// fi
// done

0 comments on commit 1d3ed88

Please sign in to comment.