Skip to content

Commit

Permalink
Centrifuge db build (nf-core#3008)
Browse files Browse the repository at this point in the history
* Add inputs and outputs for db build

* Pytest modules for db build

* Add test scripts

* Correct problem with cf output files

* Use same version in singularity and docker containers

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Correct biocontainer name

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Rename taxonomy to taxonomy_tree

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Update modules/nf-core/centrifuge/build/main.nf

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove files form metamix test

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove files form metamix test

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove comment

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove metamix lines

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove reference to kraken

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Change taxonomy to taxonomy_tree

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Update fasta description

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Alternative extension for fasta file

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Update conversion table description

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Update taxonomy tree description

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Update modules/nf-core/centrifuge/build/meta.yml

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Update cf description

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove reference to kraken

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Update modules/nf-core/centrifuge/build/main.nf

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove .

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove reference to kraken

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove empty line

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove empty line

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove empty line

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove empty line

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove empty line

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Remove empty line

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>

* Change build to build_test to avoid conflict with pytest internal behaviour

* Change to build_test

* Remove trailing whitespace

* Symlink to build folder + prettier

* Correct symlink

* Remove unnecessary change to test

* Correct nextflow typo

* Tidy commas

* Tidying and update descriptions

* Correct indentation

* Change symlink

* Test again with synlink

* Symlink test

* Same centrifuge version as kreport

* Correct typo

* Change description

* Migrate to nf-test

* Post linting cleaning and remove final pytest leftovers

* Harshil align and add missing input file

* Clean up after reviews

---------

Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Co-authored-by: sofstam <sofia.stamouli@scilifelab.se>
Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
  • Loading branch information
4 people authored Feb 13, 2024
1 parent 8d8f0ae commit 64163cd
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 0 deletions.
7 changes: 7 additions & 0 deletions modules/nf-core/centrifuge/build/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: centrifuge_build
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::centrifuge=1.0.4.1
58 changes: 58 additions & 0 deletions modules/nf-core/centrifuge/build/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
process CENTRIFUGE_BUILD {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4.1--hdcf5f25_1' :
'biocontainers/centrifuge:1.0.4.1--hdcf5f25_1' }"

input:
tuple val(meta), path(fasta)
path conversion_table
path taxonomy_tree
path name_table
path size_table

output:
tuple val(meta), path("*.cf") , emit: cf
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def size_table_cmd = size_table ? "--size_table ${size_table}" : ""
"""
centrifuge-build \\
-p $task.cpus \\
$fasta \\
${prefix} \\
--conversion-table $conversion_table \\
--taxonomy-tree $taxonomy_tree \\
--name-table $name_table \\
${size_table_cmd}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.1.cf
touch ${prefix}.2.cf
touch ${prefix}.3.cf
touch ${prefix}.4.cf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}
68 changes: 68 additions & 0 deletions modules/nf-core/centrifuge/build/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: centrifuge_build
description: Build centrifuge database for taxonomic profiling
keywords:
- database
- metagenomics
- build
- db
- fasta
tools:
- centrifuge:
description: Classifier for metagenomic sequences
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Fasta file containing sequences to be used in centrifuge database.
pattern: "*.{fasta,fna}"

- conversion_table:
type: file
description: A tab-separated file with sequence ID to taxonomy ID mapping
pattern: "*.{map}"

- taxonomy_tree:
type: file
description: A \t|\t-separated file mapping taxonomy. Typically nodes.dmp from the NCBI taxonomy dump. Links taxonomy IDs to their parents
pattern: "*.{dmp}"

- name_table:
type: file
description: A '|'-separated file mapping taxonomy IDs to a name. Typically names.dmp from the NCBI taxonomy dump. Links taxonomy IDs to their scientific name
pattern: "*.{dmp}"

- size_table:
type: file
description: Optional list of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.
pattern: "*"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- cf:
type: file
description: Index files for the centrifuge database
pattern: "*.{cf}"

authors:
- "@sarah-buddle"
- "@jfy133"
59 changes: 59 additions & 0 deletions modules/nf-core/centrifuge/build/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
nextflow_process {

name "Test Process CENTRIFUGE_BUILD"
script "../main.nf"
process "CENTRIFUGE_BUILD"

tag "modules"
tag "modules_nfcore"
tag "centrifuge"
tag "centrifuge/build"

test("sarscov2 - fasta") {

when {
process {
"""
input[0] = [ [id: 'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[1] = file(params.test_data['sarscov2']['metagenome']['seqid2taxid_map'], checkIfExists: true)
input[2] = file(params.test_data['sarscov2']['metagenome']['nodes_dmp'], checkIfExists: true)
input[3] = file(params.test_data['sarscov2']['metagenome']['names_dmp'], checkIfExists: true)
input[4] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("sarscov2 - fasta - stub") {

options "-stub"

when {
process {
"""
input[0] = [ [id: 'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[1] = file(params.test_data['sarscov2']['metagenome']['seqid2taxid_map'], checkIfExists: true)
input[2] = file(params.test_data['sarscov2']['metagenome']['nodes_dmp'], checkIfExists: true)
input[3] = file(params.test_data['sarscov2']['metagenome']['names_dmp'], checkIfExists: true)
input[4] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
88 changes: 88 additions & 0 deletions modules/nf-core/centrifuge/build/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions modules/nf-core/centrifuge/build/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
centrifuge/build:
- "modules/nf-core/centrifuge/build/**"

0 comments on commit 64163cd

Please sign in to comment.