Skip to content

Commit

Permalink
Merge pull request #169 from Daniel-VM/fix_nanopolish_ont
Browse files Browse the repository at this point in the history
Fix long reads polishing input channel and refactor long reads polishing section
  • Loading branch information
Daniel-VM authored Sep 28, 2024
2 parents 1cb4574 + 0917bda commit 3f6a42d
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 45 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Changed`

- [#169](https://github.com/nf-core/bacass/pull/169) Refactored long-reads polishing step.
- [#167](https://github.com/nf-core/bacass/pull/167) Remove params.save_merged as merged reads are not used in downstream analysis.
- [#159](https://github.com/nf-core/bacass/pull/159) Updated Kmerfinder module and increased memory.
- [#150](https://github.com/nf-core/bacass/pull/150) Replace local unicycler module with nf-core module + bump version.
Expand All @@ -17,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Fixed`

- [#169](https://github.com/nf-core/bacass/pull/169) Fixed long reads polishing input channel.
- [#168](https://github.com/nf-core/bacass/pull/168) Fix wrong metadata in canu input channel.
- [#163](https://github.com/nf-core/bacass/pull/163) Fixed `params.save_merged` to properly save merged files.
- [#160](https://github.com/nf-core/bacass/pull/160) Fixed memory issues in KmerFinder, fixed handling of no species detected, and fixed handling of empty fasta files in the prokka/bakkta channel.
Expand Down
6 changes: 4 additions & 2 deletions modules/local/medaka/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process MEDAKA {
'biocontainers/medaka:1.4.3--py38h130def0_0' }"

input:
tuple val(meta), file(longreads), file(assembly)
tuple val(meta), path(longreads), path(assembly)

output:
tuple val(meta), path('*_polished_genome.fa') , emit: assembly
Expand All @@ -33,9 +33,11 @@ process MEDAKA {
medaka_consensus $args \
-i ${ reads_bgzip_out ?: longreads } \
-d ${ assembly_bgzip_out ?: assembly } \
-o "${prefix}_polished_genome.fa" \
-o "${prefix}_out" \
-t $task.cpus
mv ${prefix}_out/* .
mv consensus.fasta ${prefix}_polished_genome.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' )
Expand Down
99 changes: 56 additions & 43 deletions workflows/bacass.nf
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ workflow BACASS {
//
// MODULE: Miniasm, genome assembly, long reads
//
if ( params.assembler == 'miniasm' ) {
if ( params.assembly_type != 'short' && params.assembler == 'miniasm' ) {
MINIMAP2_ALIGN (
ch_for_assembly.map{ meta,sr,lr -> tuple(meta,lr) },
[[:],[]],
Expand Down Expand Up @@ -280,6 +280,8 @@ workflow BACASS {
)
ch_assembly = ch_assembly.mix( RACON.out.improved_assembly.dump(tag: 'miniasm') )
ch_versions = ch_versions.mix( RACON.out.versions )
} else if (params.assembly_type == 'short' && params.assembler == 'miniasm') {
exit("Selected assembler ${params.assembler} cannot run on short reads")
}

//
Expand All @@ -294,51 +296,62 @@ workflow BACASS {
}

//
// MODULE: Nanopolish, polishes assembly using FAST5 files - should take either miniasm, canu, or unicycler consensus sequence
// SUBWORKFLOW: Long reads polishing. Uses medaka or Nanopolish (this last requires Fast5 files available in input samplesheet).
//
if ( !params.skip_polish && params.assembly_type == 'long' && params.polish_method != 'medaka' ) {
if ( (params.assembly_type == 'long' && !params.skip_polish) || ( params.assembly_type != 'short' && params.polish_method) ){
// Set channel for polishing long reads
ch_for_assembly
.join( ch_assembly )
.set { ch_for_polish }

MINIMAP2_POLISH (
ch_for_polish.map { meta, sr, lr, fasta -> tuple(meta, lr) },
ch_for_polish.map { meta, sr, lr, fasta -> fasta },
true,
false,
false
)
ch_versions = ch_versions.mix(MINIMAP2_POLISH.out.versions)

SAMTOOLS_INDEX (
MINIMAP2_POLISH.out.bam.dump(tag: 'samtools_sort')
)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions)

ch_for_polish // tuple val(meta), val(reads), file(longreads), file(assembly)
.join( MINIMAP2_POLISH.out.bam ) // tuple val(meta), file(bam)
.join( SAMTOOLS_INDEX.out.bai ) // tuple val(meta), file(bai)
.join( ch_fast5 ) // tuple val(meta), file(fast5)
.set { ch_for_nanopolish } // tuple val(meta), val(reads), file(longreads), file(assembly), file(bam), file(bai), file(fast5)

// TODO: 'nanopolish index' couldn't be tested. No fast5 provided in test datasets.
NANOPOLISH (
ch_for_nanopolish.dump(tag: 'into_nanopolish')
)
ch_versions = ch_versions.mix(NANOPOLISH.out.versions)
}

//
// MODULE: Medaka, polishes assembly - should take either miniasm, canu, or unicycler consensus sequence
//
if ( !params.skip_polish && params.assembly_type == 'long' && params.polish_method == 'medaka' ) {
ch_for_assembly
.join( ch_assembly )
.map { meta, sr, lr, assembly -> tuple(meta, lr, assembly) }
.set { ch_for_medaka }

MEDAKA ( ch_for_medaka.dump(tag: 'into_medaka') )
ch_versions = ch_versions.mix(MEDAKA.out.versions)
.map { meta, sr, lr, fasta -> tuple(meta, lr, fasta) }
.set { ch_polish_long } // channel: [ val(meta), path(lr), path(fasta) ]
if (params.polish_method == 'medaka'){
//
// MODULE: Medaka, polishes assembly - should take either miniasm, canu, or unicycler consensus sequence
//
MEDAKA ( ch_polish_long )
ch_assembly = MEDAKA.out.assembly
ch_versions = ch_versions.mix(MEDAKA.out.versions)
} else if (params.polish_method == 'nanopolish') {
//
// MODULE: Nanopolish, polishes assembly using FAST5 files
//
if (!ch_fast5){
log.error "ERROR: FAST5 files are required for Nanopolish but none were provided. Please supply FAST5 files or choose another polishing method. Available options are: medaka, nanopolish"
} else {
//
// MODULE: Minimap2 polish
//
MINIMAP2_POLISH (
ch_polish_long.map { meta, lr, fasta -> tuple(meta, lr) },
ch_polish_long.map { meta, lr, fasta -> tuple(meta, fasta) },
true,
false,
false
)
ch_versions = ch_versions.mix(MINIMAP2_POLISH.out.versions)
//
// MODULE: Samtools index
//
SAMTOOLS_INDEX (
MINIMAP2_POLISH.out.bam.dump(tag: 'samtools_sort')
)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions)
//
// MODULE: Nanopolish
//
ch_polish_long // tuple val(meta), val(reads), file(longreads), file(assembly)
.join( MINIMAP2_POLISH.out.bam ) // tuple val(meta), file(bam)
.join( SAMTOOLS_INDEX.out.bai ) // tuple val(meta), file(bai)
.join( ch_fast5 ) // tuple val(meta), file(fast5)
.set { ch_for_nanopolish } // tuple val(meta), val(reads), file(longreads), file(assembly), file(bam), file(bai), file(fast5)
// TODO: 'nanopolish index' couldn't be tested. No fast5 provided in test datasets.
NANOPOLISH (
ch_for_nanopolish.dump(tag: 'into_nanopolish')
)
ch_assembly = NANOPOLISH.out.assembly
ch_versions = ch_versions.mix( NANOPOLISH.out.versions )
}
}
}

//
Expand Down

0 comments on commit 3f6a42d

Please sign in to comment.