diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d3b4376..aef4ccff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ This version merges the nf-core template updates of v2.9 and v2.10, and updates ### `Added` +- [#116](https://github.com/nf-core/bacass/pull/116) - Added Flye as an alternative assembler for long reads - [#86](https://github.com/nf-core/bacass/pull/86) - Added nf-core subworkflow for trimming and QC of short-reads [nf-core/fastq_trim_fastp_fastqc](https://github.com/nf-core/modules/tree/master/subworkflows/nf-core/fastq_trim_fastp_fastqc). - [#88](https://github.com/nf-core/bacass/pull/88) - Added nf-validation on samplesheet - [#93](https://github.com/nf-core/bacass/pull/93) - Added missing modules output to MultiQC. ( Fastp, PycoQC, Porechop, Quast, Kraken2, and Prokka). diff --git a/CITATIONS.md b/CITATIONS.md index 9d2ab889..9e6e5f6e 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -38,6 +38,10 @@ > Koren S, Walenz BP, Berlin K, Miller JR, Bergman NH, Phillippy AM. Canu: scalable and accurate long-read assembly via adaptive k-mer weighting and repeat separation. Genome Res. 2017 May;27(5):722-736. doi: 10.1101/gr.215087.116. Epub 2017 Mar 15. PMID: 28298431; PMCID: PMC5411767. +- [Flye](https://pubmed.ncbi.nlm.nih.gov/30936562/) + + > Kolmogorov M, Yuan J, Lin Y, Pevzner PA. Assembly of long, error-prone reads using repeat graphs. Nat Biotechnol. 2019 May;37(5):540-546. doi: 10.1038/s41587-019-0072-8. Epub 2019 Apr 1. PMID: 30936562. + - [QUAST](https://pubmed.ncbi.nlm.nih.gov/23422339/) > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PMID: 23422339; PMCID: PMC3624806. diff --git a/conf/modules.config b/conf/modules.config index 702ce9e8..517b8090 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -71,6 +71,17 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'FLYE' { + ext.args = { + [ params.flye_args ? "${params.flye_args}" : ''].join(' ').trim() + } + publishDir = [ + path: { "${params.outdir}/Flye" }, + mode: params.publish_dir_mode, + pattern: "*.{.fasta.gz,log}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: 'MINIMAP2_ALIGN' { ext.args = '-x ava-ont' diff --git a/docs/output.md b/docs/output.md index 4c58ec20..57426785 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,18 +10,21 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [Quality trimming and QC](#quality-trimming-and-qc) - - [Short Read Trimming](#short-read-trimming) - - [Short Read RAW QC](#short-read-raw-qc) - - [Long Read Trimming](#long-read-trimming) - - [Long Read RAW QC](#long-read-raw-qc) -- [Taxonomic classification](#taxonomic-classification) -- [Assembly Output](#assembly-output) - - [Polished assemblies](#polished-assemblies) -- [Assembly QC with QUAST](#assembly-qc-with-quast) -- [Annotation](#annotation) -- [Report](#report) -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [nf-core/bacass: Output](#nf-corebacass-output) + - [Introduction](#introduction) + - [Pipeline overview](#pipeline-overview) + - [Quality trimming and QC](#quality-trimming-and-qc) + - [Short Read Trimming](#short-read-trimming) + - [Short Read RAW QC](#short-read-raw-qc) + - [Long Read Trimming](#long-read-trimming) + - [Long Read RAW QC](#long-read-raw-qc) + - [Taxonomic classification](#taxonomic-classification) + - [Assembly Output](#assembly-output) + - [Polished assemblies](#polished-assemblies) + - [Assembly QC with QUAST](#assembly-qc-with-quast) + - [Annotation](#annotation) + - [Report](#report) + - [Pipeline information](#pipeline-information) ## Quality trimming and QC @@ -121,7 +124,7 @@ Exemplary Kraken2 report screenshot: ## Assembly Output -Trimmed reads are assembled with [Unicycler](https://github.com/rrwick/Unicycler) in `short` or `hybrid` assembly modes. For long-read assembly, there are also `canu` and `miniasm` available. +Trimmed reads are assembled with [Unicycler](https://github.com/rrwick/Unicycler) in `short` or `hybrid` assembly modes. For long-read assembly, there are also `canu`, `flye`, `miniasm` available. Unicycler is a pipeline on its own, which at least for Illumina reads mainly acts as a frontend to Spades with added polishing steps.
@@ -140,6 +143,12 @@ Check out the [Unicycler documentation](https://github.com/rrwick/Unicycler) for Check out the [Canu documentation](https://canu.readthedocs.io/en/latest/index.html) for more information on Canu output. +- `Flye/` + - `*.fasta.gz`: Final assembly in fasta format + - `*.log`: Log file summarizing steps and intermediate results + +Check out the [Flye documentation](https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md) for more information on Flye output. + - `Miniasm/` - `*.fasta.gz`: Assembly in Fasta format - `*_assembly_consensus.fasta.gz`: Consensus assembly in fasta format (polished by Racon) diff --git a/modules.json b/modules.json index 87f35378..016c8f8d 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,11 @@ "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", "installed_by": ["modules", "fastq_trim_fastp_fastqc"] }, + "flye": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "gunzip": { "branch": "master", "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", diff --git a/modules/nf-core/flye/environment.yml b/modules/nf-core/flye/environment.yml new file mode 100644 index 00000000..95854499 --- /dev/null +++ b/modules/nf-core/flye/environment.yml @@ -0,0 +1,7 @@ +name: flye +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::flye=2.9 diff --git a/modules/nf-core/flye/main.nf b/modules/nf-core/flye/main.nf new file mode 100644 index 00000000..3d892180 --- /dev/null +++ b/modules/nf-core/flye/main.nf @@ -0,0 +1,68 @@ +process FLYE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1' : + 'biocontainers/flye:2.9--py39h6935b12_1' }" + + input: + tuple val(meta), path(reads) + val mode + + output: + tuple val(meta), path("*.fasta.gz"), emit: fasta + tuple val(meta), path("*.gfa.gz") , emit: gfa + tuple val(meta), path("*.gv.gz") , emit: gv + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.json") , emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def valid_mode = ["--pacbio-raw", "--pacbio-corr", "--pacbio-hifi", "--nano-raw", "--nano-corr", "--nano-hq"] + if ( !valid_mode.contains(mode) ) { error "Unrecognised mode to run Flye. Options: ${valid_mode.join(', ')}" } + """ + flye \\ + $mode \\ + $reads \\ + --out-dir . \\ + --threads \\ + $task.cpus \\ + $args + + gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz + gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz + gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz + mv assembly_info.txt ${prefix}.assembly_info.txt + mv flye.log ${prefix}.flye.log + mv params.json ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub | gzip -c > ${prefix}.assembly.fasta.gz + echo stub | gzip -c > ${prefix}.assembly_graph.gfa.gz + echo stub | gzip -c > ${prefix}.assembly_graph.gv.gz + echo contig_1 > ${prefix}.assembly_info.txt + echo stub > ${prefix}.flye.log + echo stub > ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/flye/meta.yml b/modules/nf-core/flye/meta.yml new file mode 100644 index 00000000..5c3c816d --- /dev/null +++ b/modules/nf-core/flye/meta.yml @@ -0,0 +1,68 @@ +name: "flye" +description: De novo assembler for single molecule sequencing reads +keywords: + - assembly + - genome + - de novo + - genome assembler + - single molecule +tools: + - "flye": + description: "Fast and accurate de novo assembler for single molecule sequencing reads" + homepage: "https://github.com/fenderglass/Flye" + documentation: "https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md" + tool_dev_url: "https://github.com/fenderglass/Flye" + doi: "10.1038/s41592-020-00971-x" + licence: "['BSD-3-clause']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: Input reads from Oxford Nanopore or PacBio data in FASTA/FASTQ format. + pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}" + - mode: + type: string + description: Flye mode depending on the input data (source and error rate) + pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Assembled FASTA file + pattern: "*.fasta.gz" + - gfa: + type: file + description: Repeat graph in gfa format + pattern: "*.gfa.gz" + - gv: + type: file + description: Repeat graph in gv format + pattern: "*.gv.gz" + - txt: + type: file + description: Extra information and statistics about resulting contigs + pattern: "*.txt" + - log: + type: file + description: Flye log file + pattern: "*.log" + - json: + type: file + description: Flye parameters + pattern: "*.json" +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/modules/nf-core/flye/tests/main.nf.test b/modules/nf-core/flye/tests/main.nf.test new file mode 100644 index 00000000..f06aa1bc --- /dev/null +++ b/modules/nf-core/flye/tests/main.nf.test @@ -0,0 +1,258 @@ +// According to the issue https://github.com/fenderglass/Flye/issues/164 +// Some fluctuations are expected because of the heuristics +// Here we check the that test.assembly_info.txt contains at least one contig + +nextflow_process { + + name "Test Process FLYE" + script "../main.nf" + process "FLYE" + config "./nextflow.config" + tag "flye" + tag "modules" + tag "modules_nfcore" + + + test("flye_pacbio_raw") { + tag "flye_pacbio_raw" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--pacbio-raw" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + ) + } + + } + + + test("flye_pacbio_corr") { + tag "flye_pacbio_corr" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--pacbio-corr" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + + ) + } + + } + + test("flye_pacbio_hifi") { + tag "flye_pacbio_hifi" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--pacbio-hifi" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + + ) + } + + } + + test("flye_nano_raw") { + tag "flye_nano_raw" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--nano-raw" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + ) + } + + } + + test("flye_nano_corr") { + tag "flye_nano_corr" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--nano-corr" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + ) + } + + } + + + test("flye_nano_hq") { + tag "flye_nano_hq" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--nano-hq" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + ) + } + + } + + + +} diff --git a/modules/nf-core/flye/tests/main.nf.test.snap b/modules/nf-core/flye/tests/main.nf.test.snap new file mode 100644 index 00000000..a4aef731 --- /dev/null +++ b/modules/nf-core/flye/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "flye_pacbio_raw": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:38:04.835173617" + }, + "flye_pacbio_hifi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T08:38:39.624137639" + }, + "flye_nano_raw": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:51:24.546896915" + }, + "flye_pacbio_corr": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T08:34:15.751344742" + }, + "flye_nano_corr": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:17:49.861781685" + }, + "flye_nano_hq": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:26:29.081427909" + } +} \ No newline at end of file diff --git a/modules/nf-core/flye/tests/nextflow.config b/modules/nf-core/flye/tests/nextflow.config new file mode 100644 index 00000000..40cf878c --- /dev/null +++ b/modules/nf-core/flye/tests/nextflow.config @@ -0,0 +1,4 @@ +// profile=docker with tests flye_pacbio_raw and flye_nano_raw need more memory that the default of 3.GB +process { + memory = 6.GB +} diff --git a/modules/nf-core/flye/tests/tags.yml b/modules/nf-core/flye/tests/tags.yml new file mode 100644 index 00000000..31103d13 --- /dev/null +++ b/modules/nf-core/flye/tests/tags.yml @@ -0,0 +1,2 @@ +flye: + - modules/nf-core/flye/** diff --git a/nextflow.config b/nextflow.config index 66e2cced..707bbe98 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,11 +20,13 @@ params { kraken2db = "" // Assembly parameters - assembler = 'unicycler' //allowed are unicycler, canu, miniasm + assembler = 'unicycler' //allowed are unicycler, canu, miniasm, flye assembly_type = 'short' //allowed are short, long, hybrid (hybrid works only with Unicycler) unicycler_args = "" canu_mode = '-nanopore' // allowed modes: ["-pacbio", "-nanopore", "-pacbio-hifi"] canu_args = '' //Default no extra options, can be adjusted by the user + flye_mode = '--nano-raw' // allowed modes: ["--pacbio-raw", "--pacbio-corr", "--pacbio-hifi", "--nano-raw", "--nano-corr", "--nano-hq"] + flye_args = '' //Default no extra options, can be adjusted by the user // Assembly polishing polish_method = 'medaka' diff --git a/nextflow_schema.json b/nextflow_schema.json index 6b416b30..e723a33d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -105,7 +105,8 @@ "canu_mode": { "type": "string", "enum": ["-pacbio", "-nanopore", "-pacbio-hifi", "null"], - "description": "Allowed technologies for long read assembly : [\"-pacbio\", \"-nanopore\", \"-pacbio-hifi\"]" + "description": "Allowed technologies for long read assembly : [\"-pacbio\", \"-nanopore\", \"-pacbio-hifi\"]", + "default": "-nanopore" }, "canu_args": { "type": "string", @@ -440,5 +441,16 @@ { "$ref": "#/definitions/generic_options" } - ] + ], + "properties": { + "flye_mode": { + "type": "string", + "default": "--nano-raw", + "description": "Allowed read types for long reads technologies [\"--pacbio-raw\", \"--pacbio-corr\", \"--pacbio-hifi\", \"--nano-raw\", \"--nano-corr\", \"--nano-hq\"]" + }, + "flye_args": { + "type": "string", + "description": "This can be used to supply [extra options](https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md) to the Flye assembler. Will be ignored when other assemblers are used." + } + } } diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 4a6d03a9..a3c87432 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -82,6 +82,7 @@ include { MINIMAP2_ALIGN as MINIMAP2_CONSENSUS } from '../modules/nf-core/minim include { MINIMAP2_ALIGN as MINIMAP2_POLISH } from '../modules/nf-core/minimap2/align/main' include { MINIASM } from '../modules/nf-core/miniasm/main' include { RACON } from '../modules/nf-core/racon/main' +include { FLYE } from '../modules/nf-core/flye/main' include { SAMTOOLS_SORT } from '../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' include { KRAKEN2_KRAKEN2 as KRAKEN2 } from '../modules/nf-core/kraken2/kraken2/main' @@ -220,7 +221,7 @@ workflow BACASS { } // - // ASSEMBLY: Unicycler, Canu, Miniasm + // ASSEMBLY: Unicycler, Canu, Miniasm, Flye // ch_assembly = Channel.empty() @@ -247,6 +248,18 @@ workflow BACASS { ch_assembly = ch_assembly.mix( CANU.out.assembly.dump(tag: 'canu') ) ch_versions = ch_versions.mix(CANU.out.versions.ifEmpty(null)) } + // + // MODULE: Flye, genome assembly, long reads + // + if ( params.assembler == 'flye' ) { + FLYE ( + ch_for_assembly.map { meta, reads, lr -> tuple( meta, lr ) }.view(), + params.flye_mode + // ch_for_assembly.map { meta, reads, lr -> meta.genome_size } // Flye needs genome size? + ) + ch_assembly = ch_assembly.mix( FLYE.out.fasta.dump(tag: 'flye') ) + ch_versions = ch_versions.mix(FLYE.out.versions.ifEmpty(null)) + } // // MODULE: Miniasm, genome assembly, long reads