Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix long reads polishing input channel and refactor long reads polishing section #169

Merged
merged 6 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Changed`

- [#169](https://github.com/nf-core/bacass/pull/169) Refactored long-reads polishing step.
- [#167](https://github.com/nf-core/bacass/pull/167) Remove params.save_merged as merged reads are not used in downstream analysis.
- [#159](https://github.com/nf-core/bacass/pull/159) Updated Kmerfinder module and increased memory.
- [#150](https://github.com/nf-core/bacass/pull/150) Replace local unicycler module with nf-core module + bump version.
Expand All @@ -17,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Fixed`

- [#169](https://github.com/nf-core/bacass/pull/169) Fixed long reads polishing input channel.
- [#168](https://github.com/nf-core/bacass/pull/168) Fix wrong metadata in canu input channel.
- [#163](https://github.com/nf-core/bacass/pull/163) Fixed `params.save_merged` to properly save merged files.
- [#160](https://github.com/nf-core/bacass/pull/160) Fixed memory issues in KmerFinder, fixed handling of no species detected, and fixed handling of empty fasta files in the prokka/bakkta channel.
Expand Down
6 changes: 4 additions & 2 deletions modules/local/medaka/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process MEDAKA {
'biocontainers/medaka:1.4.3--py38h130def0_0' }"

input:
tuple val(meta), file(longreads), file(assembly)
tuple val(meta), path(longreads), path(assembly)

output:
tuple val(meta), path('*_polished_genome.fa') , emit: assembly
Expand All @@ -33,9 +33,11 @@ process MEDAKA {
medaka_consensus $args \
-i ${ reads_bgzip_out ?: longreads } \
-d ${ assembly_bgzip_out ?: assembly } \
-o "${prefix}_polished_genome.fa" \
-o "${prefix}_out" \
-t $task.cpus

mv ${prefix}_out/* .
mv consensus.fasta ${prefix}_polished_genome.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' )
Expand Down
99 changes: 56 additions & 43 deletions workflows/bacass.nf
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ workflow BACASS {
//
// MODULE: Miniasm, genome assembly, long reads
//
if ( params.assembler == 'miniasm' ) {
if ( params.assembly_type != 'short' && params.assembler == 'miniasm' ) {
MINIMAP2_ALIGN (
ch_for_assembly.map{ meta,sr,lr -> tuple(meta,lr) },
[[:],[]],
Expand Down Expand Up @@ -280,6 +280,8 @@ workflow BACASS {
)
ch_assembly = ch_assembly.mix( RACON.out.improved_assembly.dump(tag: 'miniasm') )
ch_versions = ch_versions.mix( RACON.out.versions )
} else if (params.assembly_type == 'short' && params.assembler == 'miniasm') {
exit("Selected assembler ${params.assembler} cannot run on short reads")
}

//
Expand All @@ -294,51 +296,62 @@ workflow BACASS {
}

//
// MODULE: Nanopolish, polishes assembly using FAST5 files - should take either miniasm, canu, or unicycler consensus sequence
// SUBWORKFLOW: Long reads polishing. Uses medaka or Nanopolish (this last requires Fast5 files available in input samplesheet).
//
if ( !params.skip_polish && params.assembly_type == 'long' && params.polish_method != 'medaka' ) {
if ( (params.assembly_type == 'long' && !params.skip_polish) || ( params.assembly_type != 'short' && params.polish_method) ){
// Set channel for polishing long reads
ch_for_assembly
.join( ch_assembly )
.set { ch_for_polish }

MINIMAP2_POLISH (
ch_for_polish.map { meta, sr, lr, fasta -> tuple(meta, lr) },
ch_for_polish.map { meta, sr, lr, fasta -> fasta },
true,
false,
false
)
ch_versions = ch_versions.mix(MINIMAP2_POLISH.out.versions)

SAMTOOLS_INDEX (
MINIMAP2_POLISH.out.bam.dump(tag: 'samtools_sort')
)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions)

ch_for_polish // tuple val(meta), val(reads), file(longreads), file(assembly)
.join( MINIMAP2_POLISH.out.bam ) // tuple val(meta), file(bam)
.join( SAMTOOLS_INDEX.out.bai ) // tuple val(meta), file(bai)
.join( ch_fast5 ) // tuple val(meta), file(fast5)
.set { ch_for_nanopolish } // tuple val(meta), val(reads), file(longreads), file(assembly), file(bam), file(bai), file(fast5)

// TODO: 'nanopolish index' couldn't be tested. No fast5 provided in test datasets.
NANOPOLISH (
ch_for_nanopolish.dump(tag: 'into_nanopolish')
)
ch_versions = ch_versions.mix(NANOPOLISH.out.versions)
}

//
// MODULE: Medaka, polishes assembly - should take either miniasm, canu, or unicycler consensus sequence
//
if ( !params.skip_polish && params.assembly_type == 'long' && params.polish_method == 'medaka' ) {
ch_for_assembly
.join( ch_assembly )
.map { meta, sr, lr, assembly -> tuple(meta, lr, assembly) }
.set { ch_for_medaka }

MEDAKA ( ch_for_medaka.dump(tag: 'into_medaka') )
ch_versions = ch_versions.mix(MEDAKA.out.versions)
.map { meta, sr, lr, fasta -> tuple(meta, lr, fasta) }
.set { ch_polish_long } // channel: [ val(meta), path(lr), path(fasta) ]
if (params.polish_method == 'medaka'){
//
// MODULE: Medaka, polishes assembly - should take either miniasm, canu, or unicycler consensus sequence
//
MEDAKA ( ch_polish_long )
ch_assembly = MEDAKA.out.assembly
ch_versions = ch_versions.mix(MEDAKA.out.versions)
} else if (params.polish_method == 'nanopolish') {
//
// MODULE: Nanopolish, polishes assembly using FAST5 files
//
if (!ch_fast5){
log.error "ERROR: FAST5 files are required for Nanopolish but none were provided. Please supply FAST5 files or choose another polishing method. Available options are: medaka, nanopolish"
} else {
//
// MODULE: Minimap2 polish
//
MINIMAP2_POLISH (
ch_polish_long.map { meta, lr, fasta -> tuple(meta, lr) },
ch_polish_long.map { meta, lr, fasta -> tuple(meta, fasta) },
true,
false,
false
)
ch_versions = ch_versions.mix(MINIMAP2_POLISH.out.versions)
//
// MODULE: Samtools index
//
SAMTOOLS_INDEX (
MINIMAP2_POLISH.out.bam.dump(tag: 'samtools_sort')
)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions)
//
// MODULE: Nanopolish
//
ch_polish_long // tuple val(meta), val(reads), file(longreads), file(assembly)
.join( MINIMAP2_POLISH.out.bam ) // tuple val(meta), file(bam)
.join( SAMTOOLS_INDEX.out.bai ) // tuple val(meta), file(bai)
.join( ch_fast5 ) // tuple val(meta), file(fast5)
.set { ch_for_nanopolish } // tuple val(meta), val(reads), file(longreads), file(assembly), file(bam), file(bai), file(fast5)
// TODO: 'nanopolish index' couldn't be tested. No fast5 provided in test datasets.
NANOPOLISH (
ch_for_nanopolish.dump(tag: 'into_nanopolish')
)
ch_assembly = NANOPOLISH.out.assembly
ch_versions = ch_versions.mix( NANOPOLISH.out.versions )
}
}
}

//
Expand Down
Loading