From 1c389fd97012d9d7a6105ac9734c3a3e6873633d Mon Sep 17 00:00:00 2001 From: Ksenia Date: Tue, 5 Sep 2023 16:09:36 +0100 Subject: [PATCH] Add mitohifi/mitohifi module (#3573) * Add mitohifi module * Lint and add a note in pytest_modules * Fix prettier checks * Fix spelling * Fix data references * Make a note not to use conda * Change md5 values * Remove extra file * Fix code according to code review * ECLint check fix * Extend description of the output * Fix md5 hash for output * Fix prettier * Fix conda checks * Undo changes before merge * Skip conda test on gihub * Update md5sums --------- Co-authored-by: Ksenia Krasheninnikova Co-authored-by: Priyanka Surana --- .github/workflows/test.yml | 2 + modules/nf-core/mitohifi/mitohifi/main.nf | 66 +++++++++++ modules/nf-core/mitohifi/mitohifi/meta.yml | 109 ++++++++++++++++++ tests/config/pytest_modules.yml | 4 + tests/config/test_data.config | 8 ++ .../modules/nf-core/mitohifi/mitohifi/main.nf | 16 +++ .../nf-core/mitohifi/mitohifi/nextflow.config | 5 + .../nf-core/mitohifi/mitohifi/test.yml | 12 ++ 8 files changed, 222 insertions(+) create mode 100644 modules/nf-core/mitohifi/mitohifi/main.nf create mode 100644 modules/nf-core/mitohifi/mitohifi/meta.yml create mode 100644 tests/modules/nf-core/mitohifi/mitohifi/main.nf create mode 100644 tests/modules/nf-core/mitohifi/mitohifi/nextflow.config create mode 100644 tests/modules/nf-core/mitohifi/mitohifi/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e5251ebdc458..d17edfd99693 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -193,6 +193,8 @@ jobs: tags: merquryfk/merquryfk - profile: "conda" tags: merquryfk/ploidyplot + - profile: "conda" + tags: mitohifi/mitohifi - profile: "conda" tags: mitohifi/findmitoreference - profile: "conda" diff --git a/modules/nf-core/mitohifi/mitohifi/main.nf b/modules/nf-core/mitohifi/mitohifi/main.nf new file mode 100644 index 000000000000..1bae00e19690 --- /dev/null +++ b/modules/nf-core/mitohifi/mitohifi/main.nf @@ -0,0 +1,66 @@ +process MITOHIFI_MITOHIFI { + tag "$meta.id" + label 'process_high' + + + // Docker image available at the project github repository + container 'ghcr.io/marcelauliano/mitohifi:master' + + input: + tuple val(meta), path(reads), path(contigs) + path ref_fa + path ref_gb + val mito_code + + output: + tuple val(meta), path("*fasta") , emit: fasta + tuple val(meta), path("*contigs_stats.tsv") , emit: stats + tuple val(meta), path("*gb") , emit: gb, optional: true + tuple val(meta), path("*gff") , emit: gff, optional: true + tuple val(meta), path("*all_potential_contigs.fa") , emit: all_potential_contigs, optional: true + tuple val(meta), path("*contigs_annotations.png") , emit: contigs_annotations, optional: true + tuple val(meta), path("*contigs_circularization") , emit: contigs_circularization, optional: true + tuple val(meta), path("*contigs_filtering") , emit: contigs_filtering, optional: true + tuple val(meta), path("*coverage_mapping") , emit: coverage_mapping, optional: true + tuple val(meta), path("*coverage_plot.png") , emit: coverage_plot, optional: true + tuple val(meta), path("*final_mitogenome.annotation.png"), emit: final_mitogenome_annotation, optional: true + tuple val(meta), path("*final_mitogenome_choice") , emit: final_mitogenome_choice, optional: true + tuple val(meta), path("*final_mitogenome.coverage.png") , emit: final_mitogenome_coverage, optional: true + tuple val(meta), path("*potential_contigs") , emit: potential_contigs, optional: true + tuple val(meta), path("*reads_mapping_and_assembly") , emit: reads_mapping_and_assembly, optional: true + tuple val(meta), path("*shared_genes.tsv") , emit: shared_genes, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "MitoHiFi module does not support Conda. Please use Docker / Singularity instead." + } + + def args = task.ext.args ?: '' + def run_type = reads ? "-r ${reads}" : + contigs ? "-c ${contigs}" : + exit("Reads or contigs must be specified") + """ + mitohifi.py ${run_type} -f ${ref_fa} -g ${ref_gb} -o ${mito_code} -t $task.cpus ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mitohifi: \$( mitohifi.py --version 2>&1 | head -n1 | sed 's/^.*MitoHiFi //; s/ .*\$//' ) + END_VERSIONS + """ + + stub: + """ + touch final_mitogenome.fasta + touch final_mitogenome.fasta + touch contigs_stats.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mitohifi: \$( mitohifi.py --version 2>&1 | head -n1 | sed 's/^.*MitoHiFi //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mitohifi/mitohifi/meta.yml b/modules/nf-core/mitohifi/mitohifi/meta.yml new file mode 100644 index 000000000000..54bd70f7be21 --- /dev/null +++ b/modules/nf-core/mitohifi/mitohifi/meta.yml @@ -0,0 +1,109 @@ +name: "MITOHIFI_MITOHIFI" +description: A python workflow that assembles mitogenomes from Pacbio HiFi reads +keywords: + - mitochondrion + - chloroplast + - PacBio +tools: + - "mitohifi.py": + description: A python workflow that assembles mitogenomes from Pacbio HiFi reads + homepage: https://github.com/marcelauliano/MitoHiFi + documentation: https://github.com/marcelauliano/MitoHiFi + tool_dev_url: https://github.com/marcelauliano/MitoHiFi + doi: "10.1101/2022.12.23.521667" + licence: ["MIT"] + +input: + - reads: + type: file + description: Path to PacBio HiFi reads + pattern: "*.{fa,fa.gz,fasta,fasta.gz}" + - contigs: + type: file + description: Path to genome assembly + pattern: "*.{fa,fasta}" + - ref_fa: + type: file + description: Reference sequence + pattern: "*.{fa,fasta}" + - ref_gb: + type: file + description: Reference annotation + pattern: "*.{gb}" + - code: + type: integer + description: Mitochndrial code for annotation + pattern: "[0-9]*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Mitochondrial sequence + pattern: "*.{fasta,fa}" + - gb: + type: file + description: Genome annotation in case mitofinder was used + pattern: "*.gb" + - gff: + type: file + description: Genome annotation in case mitos was used + pattern: "*.gff" + - all_potential_contigs: + type: file + description: Contains sequences of all potential contigs + pattern: "*all_potential_contigs.fa" + - contigs_annotations: + type: file + description: Graphical representation of annotated genes and tRNAs + pattern: "*contigs_annotations.png" + - contigs_circularization: + type: directory + description: Contains circularization reports + pattern: "*contigs_circularization" + - contigs_filtering: + type: directory + description: Contains files with initial blast matches + pattern: "*contigs_filtering" + - coverage_mapping: + type: directory + description: Contains statistics on coverage mapping + pattern: "*coverage_mapping" + - coverage_plot: + type: file + description: Read coverage plot for mitochondrial contigs + pattern: "*coverage_plot.png" + - final_mitogenome_annotation: + type: file + description: Graphical representation of annotated genes for the final mito contig + pattern: "*final_mitogenome.annotation.png" + - final_mitogenome_choice: + type: directory + description: Files with potential contigs clusterings and alignments + pattern: "*final_mitogenome_choice" + - final_mitogenome_coverage: + type: file + description: Graphical representation of reads coverage plot for the final mito contig + pattern: "*final_mitogenome.coverage.png" + - potential_contigs: + type: directory + description: Files with sequences and annotations of the potential contigs + pattern: "*potential_contigs" + - reads_mapping_and_assembly: + type: directory + description: Read mapping files for run from the raw reads + pattern: "*reads_mapping_and_assembly" + - shared_genes: + type: directory + description: Report on genes shared with the reference genome + pattern: "*shared_genes.tsv" + - versions: + type: file + description: Software versions used in the run + pattern: "versions.yml" + +authors: + - "@ksenia-krasheninnikova" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 85d8dd4f1e7f..361923fbd3e2 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2450,6 +2450,10 @@ mitohifi/findmitoreference: - modules/nf-core/mitohifi/findmitoreference/** - tests/modules/nf-core/mitohifi/findmitoreference/** +mitohifi/mitohifi: + - modules/nf-core/mitohifi/mitohifi/** + - tests/modules/nf-core/mitohifi/mitohifi/** + mlst: - modules/nf-core/mlst/** - tests/modules/nf-core/mlst/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 536560a9b004..b2191d73ae6c 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -655,6 +655,14 @@ params { pretext = "${params.test_data_base}/data/genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext" } } + 'deilephila_porcellus' { + 'mito' { + ref_fa = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.fasta" + ref_gb = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.gb" + hifi_reads = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa" + contigs = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.contigs.fa" + } + } 'imaging' { 'h5' { plant_wga = "${params.test_data_base}/data/imaging/h5/plant_wga.h5" diff --git a/tests/modules/nf-core/mitohifi/mitohifi/main.nf b/tests/modules/nf-core/mitohifi/mitohifi/main.nf new file mode 100644 index 000000000000..48c127b7d177 --- /dev/null +++ b/tests/modules/nf-core/mitohifi/mitohifi/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MITOHIFI_MITOHIFI as MITOHIFI_MITOHIFI } from '../../../../../modules/nf-core/mitohifi/mitohifi/main.nf' + +workflow test_mitohifi_mitohifi { + + species = "'Deilephila porcellus'" + + data_contigs = Channel.of([[id:"ilDeiPorc1"],[],file(params.test_data['deilephila_porcellus']['mito']['contigs'], checkIfExists: true)]) + ref_gb = file(params.test_data['deilephila_porcellus']['mito']['ref_gb'], checkIfExists: true) + ref_fa = file(params.test_data['deilephila_porcellus']['mito']['ref_fa'], checkIfExists: true) + code = 5 + MITOHIFI_MITOHIFI ( data_contigs, ref_fa, ref_gb, code ) +} diff --git a/tests/modules/nf-core/mitohifi/mitohifi/nextflow.config b/tests/modules/nf-core/mitohifi/mitohifi/nextflow.config new file mode 100644 index 000000000000..50f50a7a3579 --- /dev/null +++ b/tests/modules/nf-core/mitohifi/mitohifi/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/nf-core/mitohifi/mitohifi/test.yml b/tests/modules/nf-core/mitohifi/mitohifi/test.yml new file mode 100644 index 000000000000..6b8a26edb88d --- /dev/null +++ b/tests/modules/nf-core/mitohifi/mitohifi/test.yml @@ -0,0 +1,12 @@ +- name: mitohifi mitohifi test_mitohifi_mitohifi + command: nextflow run ./tests/modules/nf-core/mitohifi/mitohifi -entry test_mitohifi_mitohifi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/mitohifi/mitohifi/nextflow.config + tags: + - mitohifi + - mitohifi/mitohifi + files: + - path: output/mitohifi/final_mitogenome.fasta + md5sum: 5654c418bbf991483d9e618dd849af03 + - path: output/mitohifi/final_mitogenome.gb + md5sum: 3b4659a0d7f27fd89510a25c0588909d + - path: output/mitohifi/versions.yml + md5sum: 2a4db1ea8ac4b9b11ea47e1c3963f591