galaxyproject · bgruening · Oct 16, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 25, 2024
diff --git a/tools/seqtk/seqtk_mergefa.xml b/tools/seqtk/seqtk_mergefa.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
-<tool id="seqtk_mergefa" name="seqtk_mergefa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
-    <description>merge two FASTA/Q files</description>
+<tool id="seqtk_mergefa" name="seqtk_mergefa" version="@TOOL_VERSION@+galaxy1" profile="22.05">
+    <description>Merge two FASTA/Q files into a FASTA file output</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -18,17 +18,28 @@ $h
 '$in_fa2'
 #echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default'
     ]]></command>
+    <configfiles>
+        <configfile filename="outputs.json">
+#set $ext = None
+#if $in_fa1.is_of_type('fasta.gz', 'fastq.gz')
+    #set $ext = "fasta.gz"
+#else
+    #set $ext = "fasta"
+#end if
+{"default": {"ext": "$ext"}}
+        </configfile>
+    </configfiles>
     <inputs>
         <param name="in_fa1" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/Q file #1"/>
         <param name="in_fa2" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/Q file #2"/>
-        <param argument="-q" type="integer" value="0" label="Quality threshold"/>
+        <param argument="-q" type="integer" value="0" label="Quality threshold (for FASTQ)"/>
         <param argument="-i" type="boolean" truevalue="-i" falsevalue="" checked="false" label="Take intersection" />
-        <param argument="-m" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Convert to lowercase when one of the input base is N" />
+        <param argument="-m" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Pick least ambiguous, mask conflicts and uncertainties" help="Tries to pick the least ambiguous symbol from the two inputs, but masks contradictory bases in the inputs as x in the merged result and converts the merged base to lowercase where one of the input bases is an N." />
         <param argument="-r" type="boolean" truevalue="-r" falsevalue="" checked="false" label="Pick a random allele from het" />
         <param argument="-h" type="boolean" truevalue="-h" falsevalue="" checked="false" label="Suppress hets in the input" />
     </inputs>
-    <outputs>
-        <data name="default" format_source="in_fa1" label="${tool.name} on ${on_string}"/>
+    <outputs provided_metadata_file="outputs.json">
+        <data name="default" format="auto" label="${tool.name} on ${on_string}" />
     </outputs>
     <tests>
         <test>
@@ -52,24 +63,27 @@ $h
     <help><![CDATA[
 **What it does**
 
-Merges two fasta files, using ambiguity codes
+This tool merges two FASTA or FASTQ files into a single FASTA file using IUPAC ambiguity codes where appropriate. 
+When differences occur between the sequences, ambiguity codes are used to represent possible variations. 
 
-::
+Example::
 
-    # seq1.fa
-    >test0
-    ACTGACTGAAA
+  >seq1
+  ACTGACTGAAA
 
-    # seq2.fa
-    >test0
-    ACTGAMTGCGN
+  >seq2
+  ACTGAMTGCGN
 
-In the following the `-m` option has been set to highlight seqtk-mergefa's features.
+will result in::
 
-::
+  >seq1
+  ACTGAMTGMRN
 
-    >test0
-    ACTGACTGxxa
+If the `-m` option is in use, however, the tool will pick the least ambiguous base if there is no contradiction between the symbols in the inputs. Conflicts are indicated by using x in the merged sequence and the picked base is converted to lowercase if the less specific symbol is an N to express uncertainty.
+With this logic the input sequences above will result in the merge result::
+
+  >seq1
+  ACTGACTGxxa
 
 @ATTRIBUTION@
     ]]></help>

diff --git a/tools/seqtk/seqtk_seq.xml b/tools/seqtk/seqtk_seq.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="seqtk_seq" name="seqtk_seq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
+<tool id="seqtk_seq" name="seqtk_seq" version="@TOOL_VERSION@+galaxy1" profile="22.05">
     <description>common transformation of FASTA/Q</description>
     <macros>
         <import>macros.xml</import>
@@ -34,13 +34,25 @@ $x2
 '$in_file'
 @CONDITIONAL_GZIP_OUT@
     ]]></command>
+    <configfiles>
+        <configfile filename="outputs.json">
+#if $A and $in_file.is_of_type('fasta.gz', 'fastq.gz')
+    #set $ext = "fasta.gz"
+#elif $A
+    #set $ext = "fasta"
+#else
+    #set $ext = $in_file.ext
+#end if
+{"default": {"ext": "$ext"}}
+        </configfile>
+    </configfiles>
     <inputs>
         <expand macro="in_faq"/>
         <param argument="-q" type="integer" value="0" label="Mask bases with quality lower than INT" />
         <param argument="-X" type="integer" value="255" label="Mask bases with quality higher than INT" />
-        <param argument="-n" type="text" value="0" label="Masked bases converted to CHAR; 0 for lowercase" />
+        <param argument="-n" type="text" value="" label="Masked bases converted to CHAR; leave empty for lowercase masking" />
         <param argument="-l" type="integer" value="0" label="Number of residues per line; 0 for 2^32-1" />
-        <param argument="-Q" type="integer" value="33" label="Quality shift: ASCII-INT gives base quality" />
+        <param argument="-Q" type="integer" value="33" label="Quality shift: ASCII-INT gives base quality" help="Only applied during comparison to quality thresholds for masking" />
         <param argument="-s" type="integer" value="11" label="Random seed" help="Effective with -f" />
         <param argument="-f" type="float" value="1" label="Sample fraction of sequences" />
         <param argument="-M" type="data" format="bed,txt" optional="true" label="Mask regions in BED or name list file" />
@@ -53,26 +65,37 @@ $x2
         <param name="x1" argument="-1" type="boolean" truevalue="-1" falsevalue="" checked="false" label="Output the 2n-1 reads only" />
         <param name="x2" argument="-2" type="boolean" truevalue="-2" falsevalue="" checked="false" label="Output the 2n reads only" />
     </inputs>
-    <outputs>
-        <data name="default" format_source="in_file" label="${tool.name} on ${on_string}" />
+    <outputs provided_metadata_file="outputs.json">
+        <data name="default" format="auto" label="${tool.name} on ${on_string}" />
     </outputs>
+
     <tests>
         <!-- This is a sorry excuse for a test for a tool which does way more
              than it should, but upstream decided to put a TON of functionality
              into a single tool rather than using the single responsibility
              principle. -->
-        <test>
+        <test expect_num_outputs="1">
             <param name="in_file" value="seqtk_seq.fa"/>
             <param name="r" value="True"/>
             <param name="n" value=""/>
             <output name="default" file="seqtk_seq_revcom.fa" ftype="fasta"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="in_file" value="seqtk_seq.fa.gz" ftype="fasta.gz"/>
             <param name="r" value="True"/>
             <param name="n" value=""/>
             <output name="default" file="seqtk_seq_revcom.fa.gz" ftype="fasta.gz"/>
         </test>
+        <test expect_num_outputs="1">
+            <param name="in_file" value="seqtk_trimfq.fq" ftype="fastq"/>
+            <param name="A" value="True" />
+            <output name="default" file="seqtk_seq_A.fasta" ftype="fasta"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="in_file" value="seqtk_trimfq.fq.gz" ftype="fastq.gz"/>
+            <param name="A" value="True" />
+            <output name="default" file="seqtk_seq_A.fasta.gz" ftype="fasta.gz"/>
+        </test>
     </tests>
     <help><![CDATA[
 **What it does**

diff --git a/tools/seqtk/test-data/seqtk_seq_A.fasta b/tools/seqtk/test-data/seqtk_seq_A.fasta
@@ -0,0 +1,2 @@
+>SEQ_ID1
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
diff --git a/tools/seqtk/test-data/seqtk_seq_A.fasta.gz b/tools/seqtk/test-data/seqtk_seq_A.fasta.gz