diff --git a/CHANGELOG.md b/CHANGELOG.md index ab4eeef..923df4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#169](https://github.com/nf-core/bacass/pull/169) Refactored long-reads polishing step. - [#167](https://github.com/nf-core/bacass/pull/167) Remove params.save_merged as merged reads are not used in downstream analysis. - [#159](https://github.com/nf-core/bacass/pull/159) Updated Kmerfinder module and increased memory. - [#150](https://github.com/nf-core/bacass/pull/150) Replace local unicycler module with nf-core module + bump version. @@ -17,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#169](https://github.com/nf-core/bacass/pull/169) Fixed long reads polishing input channel. - [#168](https://github.com/nf-core/bacass/pull/168) Fix wrong metadata in canu input channel. - [#163](https://github.com/nf-core/bacass/pull/163) Fixed `params.save_merged` to properly save merged files. - [#160](https://github.com/nf-core/bacass/pull/160) Fixed memory issues in KmerFinder, fixed handling of no species detected, and fixed handling of empty fasta files in the prokka/bakkta channel. diff --git a/modules/local/medaka/main.nf b/modules/local/medaka/main.nf index 7cc540b..656f88a 100644 --- a/modules/local/medaka/main.nf +++ b/modules/local/medaka/main.nf @@ -8,7 +8,7 @@ process MEDAKA { 'biocontainers/medaka:1.4.3--py38h130def0_0' }" input: - tuple val(meta), file(longreads), file(assembly) + tuple val(meta), path(longreads), path(assembly) output: tuple val(meta), path('*_polished_genome.fa') , emit: assembly @@ -33,9 +33,11 @@ process MEDAKA { medaka_consensus $args \ -i ${ reads_bgzip_out ?: longreads } \ -d ${ assembly_bgzip_out ?: assembly } \ - -o "${prefix}_polished_genome.fa" \ + -o "${prefix}_out" \ -t $task.cpus + mv ${prefix}_out/* . + mv consensus.fasta ${prefix}_polished_genome.fa cat <<-END_VERSIONS > versions.yml "${task.process}": medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 2846600..17c3573 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -240,7 +240,7 @@ workflow BACASS { // // MODULE: Miniasm, genome assembly, long reads // - if ( params.assembler == 'miniasm' ) { + if ( params.assembly_type != 'short' && params.assembler == 'miniasm' ) { MINIMAP2_ALIGN ( ch_for_assembly.map{ meta,sr,lr -> tuple(meta,lr) }, [[:],[]], @@ -280,6 +280,8 @@ workflow BACASS { ) ch_assembly = ch_assembly.mix( RACON.out.improved_assembly.dump(tag: 'miniasm') ) ch_versions = ch_versions.mix( RACON.out.versions ) + } else if (params.assembly_type == 'short' && params.assembler == 'miniasm') { + exit("Selected assembler ${params.assembler} cannot run on short reads") } // @@ -294,51 +296,62 @@ workflow BACASS { } // - // MODULE: Nanopolish, polishes assembly using FAST5 files - should take either miniasm, canu, or unicycler consensus sequence + // SUBWORKFLOW: Long reads polishing. Uses medaka or Nanopolish (this last requires Fast5 files available in input samplesheet). // - if ( !params.skip_polish && params.assembly_type == 'long' && params.polish_method != 'medaka' ) { + if ( (params.assembly_type == 'long' && !params.skip_polish) || ( params.assembly_type != 'short' && params.polish_method) ){ + // Set channel for polishing long reads ch_for_assembly .join( ch_assembly ) - .set { ch_for_polish } - - MINIMAP2_POLISH ( - ch_for_polish.map { meta, sr, lr, fasta -> tuple(meta, lr) }, - ch_for_polish.map { meta, sr, lr, fasta -> fasta }, - true, - false, - false - ) - ch_versions = ch_versions.mix(MINIMAP2_POLISH.out.versions) - - SAMTOOLS_INDEX ( - MINIMAP2_POLISH.out.bam.dump(tag: 'samtools_sort') - ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - - ch_for_polish // tuple val(meta), val(reads), file(longreads), file(assembly) - .join( MINIMAP2_POLISH.out.bam ) // tuple val(meta), file(bam) - .join( SAMTOOLS_INDEX.out.bai ) // tuple val(meta), file(bai) - .join( ch_fast5 ) // tuple val(meta), file(fast5) - .set { ch_for_nanopolish } // tuple val(meta), val(reads), file(longreads), file(assembly), file(bam), file(bai), file(fast5) - - // TODO: 'nanopolish index' couldn't be tested. No fast5 provided in test datasets. - NANOPOLISH ( - ch_for_nanopolish.dump(tag: 'into_nanopolish') - ) - ch_versions = ch_versions.mix(NANOPOLISH.out.versions) - } - - // - // MODULE: Medaka, polishes assembly - should take either miniasm, canu, or unicycler consensus sequence - // - if ( !params.skip_polish && params.assembly_type == 'long' && params.polish_method == 'medaka' ) { - ch_for_assembly - .join( ch_assembly ) - .map { meta, sr, lr, assembly -> tuple(meta, lr, assembly) } - .set { ch_for_medaka } - - MEDAKA ( ch_for_medaka.dump(tag: 'into_medaka') ) - ch_versions = ch_versions.mix(MEDAKA.out.versions) + .map { meta, sr, lr, fasta -> tuple(meta, lr, fasta) } + .set { ch_polish_long } // channel: [ val(meta), path(lr), path(fasta) ] + if (params.polish_method == 'medaka'){ + // + // MODULE: Medaka, polishes assembly - should take either miniasm, canu, or unicycler consensus sequence + // + MEDAKA ( ch_polish_long ) + ch_assembly = MEDAKA.out.assembly + ch_versions = ch_versions.mix(MEDAKA.out.versions) + } else if (params.polish_method == 'nanopolish') { + // + // MODULE: Nanopolish, polishes assembly using FAST5 files + // + if (!ch_fast5){ + log.error "ERROR: FAST5 files are required for Nanopolish but none were provided. Please supply FAST5 files or choose another polishing method. Available options are: medaka, nanopolish" + } else { + // + // MODULE: Minimap2 polish + // + MINIMAP2_POLISH ( + ch_polish_long.map { meta, lr, fasta -> tuple(meta, lr) }, + ch_polish_long.map { meta, lr, fasta -> tuple(meta, fasta) }, + true, + false, + false + ) + ch_versions = ch_versions.mix(MINIMAP2_POLISH.out.versions) + // + // MODULE: Samtools index + // + SAMTOOLS_INDEX ( + MINIMAP2_POLISH.out.bam.dump(tag: 'samtools_sort') + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + // + // MODULE: Nanopolish + // + ch_polish_long // tuple val(meta), val(reads), file(longreads), file(assembly) + .join( MINIMAP2_POLISH.out.bam ) // tuple val(meta), file(bam) + .join( SAMTOOLS_INDEX.out.bai ) // tuple val(meta), file(bai) + .join( ch_fast5 ) // tuple val(meta), file(fast5) + .set { ch_for_nanopolish } // tuple val(meta), val(reads), file(longreads), file(assembly), file(bam), file(bai), file(fast5) + // TODO: 'nanopolish index' couldn't be tested. No fast5 provided in test datasets. + NANOPOLISH ( + ch_for_nanopolish.dump(tag: 'into_nanopolish') + ) + ch_assembly = NANOPOLISH.out.assembly + ch_versions = ch_versions.mix( NANOPOLISH.out.versions ) + } + } } //