From f81442a1eecdd9b7b1e2b0fb667d5b65a3d73d40 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:21:24 +0200 Subject: [PATCH 01/11] Update seqtk_mergefa.xml - Restricted input formats to only FASTA and compressed FASTA files (.fasta, .fasta.gz). Removed support for FASTQ files. - Updated the tool description and help section to accurately reflect that the tool only merges FASTA files. - Improved the tool's clarity by ensuring it is used for its intended purpose: merging FASTA files only. --- tools/seqtk/seqtk_mergefa.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/seqtk/seqtk_mergefa.xml b/tools/seqtk/seqtk_mergefa.xml index 5198df76924..ce62fb4ac98 100644 --- a/tools/seqtk/seqtk_mergefa.xml +++ b/tools/seqtk/seqtk_mergefa.xml @@ -1,6 +1,6 @@ - merge two FASTA/Q files + Merge two FASTA files macros.xml @@ -16,14 +16,14 @@ $r $h '$in_fa1' '$in_fa2' -#echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default' +#echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz') else "" # > '$default' ]]> - - + + - + @@ -52,7 +52,7 @@ $h Date: Thu, 26 Sep 2024 15:08:32 +0200 Subject: [PATCH 02/11] Update seqtk_mergefa.xml "Tool merges FASTA/Q files into a FASTA output and considers the quality threshold for FASTQ files when merging." 1. Clarified the -m option to handle ambiguous bases and conflicts (e.g., N and other IUPAC codes). 2. Improved help documentation with clearer examples and explanations. 3. Refined input parameter labels for better clarity and consistency. --- tools/seqtk/seqtk_mergefa.xml | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tools/seqtk/seqtk_mergefa.xml b/tools/seqtk/seqtk_mergefa.xml index ce62fb4ac98..4b60dc5407c 100644 --- a/tools/seqtk/seqtk_mergefa.xml +++ b/tools/seqtk/seqtk_mergefa.xml @@ -1,6 +1,6 @@ - Merge two FASTA files + Merge two FASTA/Q files into a FASTA file output macros.xml @@ -16,14 +16,14 @@ $r $h '$in_fa1' '$in_fa2' -#echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz') else "" # > '$default' +echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default' ]]> - - - + + + - + @@ -52,9 +52,11 @@ $h test0 @@ -64,13 +66,16 @@ Merges two FASTA files using ambiguity codes. >test0 ACTGAMTGCGN -In the following the `-m` option has been set to highlight seqtk-mergefa's features. - -:: +With the `-m` option enabled, the tool merges the sequences and handles ambiguities or conflicts as follows: >test0 ACTGACTGxxa +Explanation: +- Positions with exact matches remain unchanged. +- Positions where no IUPAC code can represent the conflict are marked with placeholders (e.g., `x`). +- If one sequence contains an ambiguous base (e.g., `N`), the corresponding nucleotide in the other sequence is converted to lowercase to indicate uncertainty. + @ATTRIBUTION@ ]]> From 24ef697d86ef795e19adba7f56058ab512487d61 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:31:42 +0200 Subject: [PATCH 03/11] Update seqtk_mergefa.xml edited echo command line back to #echo --- tools/seqtk/seqtk_mergefa.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/seqtk/seqtk_mergefa.xml b/tools/seqtk/seqtk_mergefa.xml index 4b60dc5407c..f5f03489a9a 100644 --- a/tools/seqtk/seqtk_mergefa.xml +++ b/tools/seqtk/seqtk_mergefa.xml @@ -16,7 +16,7 @@ $r $h '$in_fa1' '$in_fa2' -echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default' +#echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default' ]]> From a2528005de87f690524683a006a5b42e6749c90b Mon Sep 17 00:00:00 2001 From: RZ9082 Date: Tue, 1 Oct 2024 16:00:18 +0200 Subject: [PATCH 04/11] adjust output format for -A --- tools/seqtk/seqtk_seq.xml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/seqtk/seqtk_seq.xml b/tools/seqtk/seqtk_seq.xml index 9ae0301d749..51bac930ebf 100644 --- a/tools/seqtk/seqtk_seq.xml +++ b/tools/seqtk/seqtk_seq.xml @@ -23,7 +23,9 @@ seqtk seq -q $q -L $L $c $r -$A +#if $force_fasta == "Yes" + -A +#end if $C $N $x1 @@ -47,14 +49,21 @@ $x2 - + + + + - + + + + + - + - + - + - + - + - + Date: Tue, 8 Oct 2024 12:13:23 +0200 Subject: [PATCH 08/11] determine output format using metadata --- tools/seqtk/macros.xml | 29 +++++++++++++++++++++++++++++ tools/seqtk/seqtk_seq.xml | 3 ++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/tools/seqtk/macros.xml b/tools/seqtk/macros.xml index 1563912be65..0e60565e79a 100644 --- a/tools/seqtk/macros.xml +++ b/tools/seqtk/macros.xml @@ -40,6 +40,35 @@ This Galaxy tool relies on the seqtk toolkit from `lh3/seqtk `_, developed by Heng Li at the Broad Institute ]]> + > galaxy.json + ]]> diff --git a/tools/seqtk/seqtk_seq.xml b/tools/seqtk/seqtk_seq.xml index 334c9488150..e773b2a1baa 100644 --- a/tools/seqtk/seqtk_seq.xml +++ b/tools/seqtk/seqtk_seq.xml @@ -33,6 +33,7 @@ $x2 #end if '$in_file' @CONDITIONAL_GZIP_OUT@ +@GENERATE_GALAXY_JSON_SEQTK_SEQ@ ]]> @@ -54,7 +55,7 @@ $x2 - + From ce62a76d18b495e6b980b291e28297aceb4b6438 Mon Sep 17 00:00:00 2001 From: RZ9082 Date: Wed, 9 Oct 2024 11:09:43 +0200 Subject: [PATCH 09/11] galaxy.json configfile --- tools/seqtk/macros.xml | 29 ----------------------------- tools/seqtk/seqtk_seq.xml | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/tools/seqtk/macros.xml b/tools/seqtk/macros.xml index 0e60565e79a..1563912be65 100644 --- a/tools/seqtk/macros.xml +++ b/tools/seqtk/macros.xml @@ -40,35 +40,6 @@ This Galaxy tool relies on the seqtk toolkit from `lh3/seqtk `_, developed by Heng Li at the Broad Institute ]]> - > galaxy.json - ]]> diff --git a/tools/seqtk/seqtk_seq.xml b/tools/seqtk/seqtk_seq.xml index e773b2a1baa..1225996f035 100644 --- a/tools/seqtk/seqtk_seq.xml +++ b/tools/seqtk/seqtk_seq.xml @@ -8,6 +8,7 @@ + + + #set $ext = None + #if $A and $in_file.is_of_type('fasta.gz', 'fastq.gz') + #set $ext = "fasta.gz" + #elif $A + #set $ext = "fasta" + #else + #set $ext = $in_file.ext + #end if + + {"default": {"ext": "$ext"}} + + From 4ebc92580e24aea3f46a601ed5dbd9ab65d3ef5d Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Tue, 15 Oct 2024 17:40:48 +0200 Subject: [PATCH 10/11] Ensure correct output formats of seqtk mergefa and fix help --- tools/seqtk/seqtk_mergefa.xml | 47 +++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/tools/seqtk/seqtk_mergefa.xml b/tools/seqtk/seqtk_mergefa.xml index f5f03489a9a..d82f5944801 100644 --- a/tools/seqtk/seqtk_mergefa.xml +++ b/tools/seqtk/seqtk_mergefa.xml @@ -1,5 +1,5 @@ - + Merge two FASTA/Q files into a FASTA file output macros.xml @@ -18,17 +18,28 @@ $h '$in_fa2' #echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default' ]]> + + +#set $ext = None +#if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') + #set $ext = "fasta.gz" +#else + #set $ext = "fasta" +#end if +{"default": {"ext": "$ext"}} + + - + - - + + @@ -54,27 +65,25 @@ $h This tool merges two FASTA or FASTQ files into a single FASTA file using IUPAC ambiguity codes where appropriate. When differences occur between the sequences, ambiguity codes are used to represent possible variations. -Additionally, if the `-m` option is set, the tool highlights conflicts by converting nucleotides to lowercase when one of the sequences contains an ambiguity code (e.g., `N`). -### Example: +Example:: - # seq1.fa - >test0 - ACTGACTGAAA + >seq1 + ACTGACTGAAA - # seq2.fa - >test0 - ACTGAMTGCGN + >seq2 + ACTGAMTGCGN -With the `-m` option enabled, the tool merges the sequences and handles ambiguities or conflicts as follows: +will result in:: - >test0 - ACTGACTGxxa + >seq1 + ACTGAMTGMRN -Explanation: -- Positions with exact matches remain unchanged. -- Positions where no IUPAC code can represent the conflict are marked with placeholders (e.g., `x`). -- If one sequence contains an ambiguous base (e.g., `N`), the corresponding nucleotide in the other sequence is converted to lowercase to indicate uncertainty. +If the `-m` option is in use, however, the tool will pick the least ambiguous base if there is no contradiction between the symbols in the inputs. Conflicts are indicated by using x in the merged sequence and the picked base is converted to lowercase if the less specific symbol is an N to express uncertainty. +With this logic the input sequences above will result in the merge result:: + + >seq1 + ACTGACTGxxa @ATTRIBUTION@ ]]> From ea8b4dd69e64e3deeff69c989ec5d628f1cce8ed Mon Sep 17 00:00:00 2001 From: Wolfgang Maier Date: Tue, 15 Oct 2024 17:42:52 +0200 Subject: [PATCH 11/11] More simplifications and some parameter fixes --- tools/seqtk/seqtk_seq.xml | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tools/seqtk/seqtk_seq.xml b/tools/seqtk/seqtk_seq.xml index 1225996f035..9c28ed7f143 100644 --- a/tools/seqtk/seqtk_seq.xml +++ b/tools/seqtk/seqtk_seq.xml @@ -8,7 +8,6 @@ - - #set $ext = None - #if $A and $in_file.is_of_type('fasta.gz', 'fastq.gz') - #set $ext = "fasta.gz" - #elif $A - #set $ext = "fasta" - #else - #set $ext = $in_file.ext - #end if - - {"default": {"ext": "$ext"}} - - + +#if $A and $in_file.is_of_type('fasta.gz', 'fastq.gz') + #set $ext = "fasta.gz" +#elif $A + #set $ext = "fasta" +#else + #set $ext = $in_file.ext +#end if +{"default": {"ext": "$ext"}} + + - + - + @@ -68,7 +65,7 @@ $x2 - +