From 0a616d882f804a0f79befada431df9c5de9e3627 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Thu, 13 Jun 2024 16:53:24 +0200 Subject: [PATCH] cleanup --- tools/openms/PSMFeatureExtractor.xml | 27 -- tools/openms/SeedListGenerator.xml | 2 +- tools/openms/{ => aux}/generate.sh | 0 tools/openms/aux/hardcoded_params.json | 4 - tools/openms/fill_ctd_clargs.py | 70 ---- tools/openms/generate-foo.sh | 225 ----------- tools/openms/macros_discarded_auto.xml | 525 ------------------------- 7 files changed, 1 insertion(+), 852 deletions(-) rename tools/openms/{ => aux}/generate.sh (100%) delete mode 100644 tools/openms/fill_ctd_clargs.py delete mode 100644 tools/openms/generate-foo.sh delete mode 100644 tools/openms/macros_discarded_auto.xml diff --git a/tools/openms/PSMFeatureExtractor.xml b/tools/openms/PSMFeatureExtractor.xml index 196b60dae..f512faaa3 100644 --- a/tools/openms/PSMFeatureExtractor.xml +++ b/tools/openms/PSMFeatureExtractor.xml @@ -172,33 +172,6 @@ ${' '.join(["'in_cond.in/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identi - - - - - - - - - - - - - - - - - - - - - - - - - - - - + diff --git a/tools/openms/generate.sh b/tools/openms/aux/generate.sh similarity index 100% rename from tools/openms/generate.sh rename to tools/openms/aux/generate.sh diff --git a/tools/openms/aux/hardcoded_params.json b/tools/openms/aux/hardcoded_params.json index df3482253..0c4b36f90 100644 --- a/tools/openms/aux/hardcoded_params.json +++ b/tools/openms/aux/hardcoded_params.json @@ -191,7 +191,6 @@ "#": "IDFileConverter remove xml", "#": "OpenSwathWorkflow make in single file input and all outputs non-optional", "#": "XFDR does not need xml .. redundant with xquest.xml TODO check if list is up to date with each new release", - "#": "SeedListGenerator: remove consensusXML https://github.com/OpenMS/OpenMS/issues/4404 .. ", "in": [{ "CTD:restrictions": "oms,idXML,mzid,fasta,pepXML,protXML,mascotXML,omssaXML,psms,tsv,xquest.xml", "tools": ["IDFileConverter"] @@ -201,9 +200,6 @@ }, { "CTD:restrictions": "idXML,mzid,xquest.xml", "tools": ["XFDR"] - }, { - "CTD:restrictions": "mzML,idXML,featureXML", - "tools": ["SeedListGenerator"] }], "#": "IDMapper has in and spectra:in params, in is used in out as format_source", diff --git a/tools/openms/fill_ctd_clargs.py b/tools/openms/fill_ctd_clargs.py deleted file mode 100644 index f36e0ee86..000000000 --- a/tools/openms/fill_ctd_clargs.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 - -import operator -from argparse import ArgumentParser -from functools import reduce # forward compatibility for Python 3 -from io import StringIO - -from CTDopts.CTDopts import ( - _Null, - CTDModel, - ModelTypeError, - Parameters -) - - -def getFromDict(dataDict, mapList): - return reduce(operator.getitem, mapList, dataDict) - - -def setInDict(dataDict, mapList, value): - getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value - - -if __name__ == "__main__": - # note add_help=False since otherwise arguments starting with -h will - # trigger an error (despite allow_abbreviate) - parser = ArgumentParser(prog="fill_ctd_clargs", - description="fill command line arguments" - "into a CTD file and write the CTD file to stdout", - add_help=False, allow_abbrev=False) - parser.add_argument("--ini_file", dest="ini_file", help="input ini file", - metavar='INI', default=None, required=True) - parser.add_argument("--ctd_file", dest="ctd_file", help="input ctd file" - "if given then optional parameters from the ini file" - "will be filled with the defaults from this CTD file", - metavar='CTD', default=None, required=False) - args, cliargs = parser.parse_known_args() - - # load CTDModel - ini_model = None - try: - ini_model = CTDModel(from_file=args.ini_file) - except ModelTypeError: - pass - try: - ini_model = Parameters(from_file=args.ini_file) - except ModelTypeError: - pass - assert ini_model is not None, "Could not parse %s, seems to be no CTD/PARAMS" % (args.ini_file) - - # get a dictionary of the ctd arguments where the values of the parameters - # given on the command line are overwritten - ini_values = ini_model.parse_cl_args(cl_args=cliargs, ignore_required=True) - - if args.ctd_file: - ctd_model = CTDModel(from_file=args.ctd_file) - ctd_values = ctd_model.get_defaults() - for param in ini_model.get_parameters(): - if not param.required and (param.default is None or type(param.default) is _Null): - lineage = param.get_lineage(name_only=True) - try: - default = getFromDict(ctd_values, lineage) - except KeyError: - continue - setInDict(ini_values, lineage, default) - - # write the ctd with the values taken from the dictionary - out = StringIO() - ctd_tree = ini_model.write_ctd(out, ini_values) - print(out.getvalue()) diff --git a/tools/openms/generate-foo.sh b/tools/openms/generate-foo.sh deleted file mode 100644 index a9315c73a..000000000 --- a/tools/openms/generate-foo.sh +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env bash - -# parse test definitions from OpenMS sources for a tool with a given id -function get_tests2 { - id=$1 - >&2 echo "generate tests for $id" - echo '' - - # get the tests from the CMakeLists.txt - # 1st remove some tests - # - OpenSwathMzMLFileCacher with -convert_back argument https://github.com/OpenMS/OpenMS/issues/4399 - # - IDRipper PATH gets empty causing problems. TODO But overall the option needs to be handled differentlt - # - several tools with duplicated input (leads to conflict when linking) - # - MaRaCluster with -consensus_out (parameter blacklister: https://github.com/OpenMS/OpenMS/issues/4456) - # - FileMerger with mixed dta dta2d input (ftype can not be specified in the test, dta can not be sniffed) - # - some input files are originally in a subdir (degenerated cases/), but not in test-data - # - OpenSwathAnalyzer 9/10: cachedMzML (not supported yet) - # - SiriusAdapter_4 depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010 - # - SiriusAdapter_10 should work in >2.8 https://github.com/OpenMS/OpenMS/issues/5869 - CMAKE=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | - grep -v "\.ini\.json" | - sed 's@${DATA_DIR_SHARE}/@@g' | - grep -v 'OpenSwathMzMLFileCacher .*-convert_back' | - sed 's/${TMP_RIP_PATH}/""/' | - grep -v "MaRaClusterAdapter.*-consensus_out"| - grep -v "FileMerger_1_input1.dta2d.*FileMerger_1_input2.dta " | - sed 's@degenerate_cases/@@g' | - egrep -v 'TOPP_OpenSwathAnalyzer_test_3"|TOPP_OpenSwathAnalyzer_test_4"' | - sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' | - grep -v '"TOPP_SiriusAdapter_10"') - - # 1st part is a dirty hack to join lines containing a single function call, e.g. - # addtest(.... - # ....) - echo "$CMAKE" | sed 's/#.*//; s/^\s*//; s/\s*$//' | grep -v "^#" | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | - grep -iE "add_test\(\"(TOPP|UTILS)_.*/$id " | egrep -v "_prepare\"|_convert|WRITEINI|WRITECTD|INVALIDVALUE" | while read -r line - do - line=$(echo "$line" | sed 's/add_test("\([^"]\+\)"/\1/; s/)$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g') - # >&2 echo "line $line" - test_id=$(echo "$line" | cut -d" " -f 1) - tool_id=$(echo "$line" | cut -d" " -f 2) - # >&2 echo "test_id $test_id" - if [[ $test_id =~ _out_?[0-9]? ]]; then - >&2 echo " skip $test_id $line" - continue - fi - if [[ ${id,,} != ${tool_id,,} ]]; then - >&2 echo " skip $test_id ($id != $tool_id) $line" - continue - fi - - #remove tests with set_tests_properties(....PROPERTIES WILL_FAIL 1) - if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then - >&2 echo " skip failing "$test_id - continue - fi - tes=" \n" - line=$(fix_tmp_files "$line") - line=$(unique_files "$line") - # >&2 echo LINE $line - #if there is an ini file then we use this to generate the test - #otherwise the ctd file is used - #other command line parameters are inserted later into this xml - if grep -lq "\-ini" <<<"$line"; then - ini=$(echo $line | sed 's/.*-ini \([^ ]\+\).*/\1/') - ini="test-data/$ini" - else - ini="ctd/$tool_id.ctd" - fi - # >&2 echo "========================================================" - # >&2 echo "USING ini $ini" - cli=$(echo $line |cut -d" " -f3- | sed 's/-ini [^ ]\+//') - - ctdtmp=$(mktemp) - # using eval: otherwise for some reason quoted values are not used properly ('A B' -> ["'A", "B'"]) - # >&2 echo "python3 fill_ctd_clargs.py --ini_file $ini $cli" - eval "python3 fill_ctd_clargs.py --ini_file $ini $cli" > "$ctdtmp" - # >&2 echo $ctdtmp - # >&2 cat $ctdtmp - testtmp=$(mktemp) - # >&2 echo CTDConverter galaxy -i $ctdtmp -o $testtmp -s aux/tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf -p aux/hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib --test-condition "compare=sim_size" "delta_frac=0.7" - CTDConverter galaxy -i $ctdtmp -o $testtmp -s aux/tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf -p aux/hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib --test-condition "compare=sim_size" "delta_frac=0.7" > /dev/null - echo "" - cat $testtmp | grep -v ' /dev/null - - #rm $testtmp - done - echo '' -} - -#some tests use the same file twice which does not work in planemo tests -#hence we create symlinks for each file used twice -function unique_files { - line=$@ - for arg in $@ - do - if [[ ! -f "test-data/$arg" ]]; then - continue - fi - cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n')) - while [[ $cnt -gt 1 ]]; do - new_arg=$(echo $arg | sed "s/\(.*\)\./\1_$cnt./") - ln -fs $arg test-data/$new_arg - line=$(echo $line | sed "s/\($arg.*\)$arg/\1$new_arg/") - cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n')) - done - done - - echo $line -} - -# options of out_type selects need to be fixed to Galaxy data types -function fix_out_type { - grep "^$1" "$2" | awk '{print $2}' -} - -#OpenMS tests output to tmp files and compare with FuzzyDiff to the expected file. -#problem: the extension of the tmp files is unusable for test generation. -#unfortunately the extensions used in the DIFF lines are not always usable for the CLI -#(e.g. for prepare_test_data, e.g. CLI expects csv but test file is txt) -#this function replaces the tmp file by the expected file. -function fix_tmp_files { - # >&2 echo "FIX $line" - ret="" - for a in $@; do - # >&2 echo " a "$a - if [[ ! $a =~ .tmp$ ]] && [[ ! $a =~ _tmp_ ]]; then - ret="$ret $a" - continue - fi - diff_line=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep '\${DIFF}.*'"$a") - # >&2 echo " diff_line "$diff_line - in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$diff_line) - # >&2 echo " in1 "$in1 - if [[ "$a" != "$in1" ]]; then - ret="$ret $a" - continue - fi - in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$diff_line) - in2=$(basename $in2 | sed 's/)$//') - # >&2 echo " in2 "$in2 - if [[ -f "test-data/$in2" ]]; then - ln -fs "$in1" "test-data/$in2" - ret="$ret $in2" - else - ret="$ret $a" - fi - done -# >&2 echo "--> $ret" - echo "$ret" -} - -function link_tmp_files { - # note this also considers commented lines (starting with a #) - # because of tests where the diff command is commented and we - # still want to use the extension of these files - cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/^\s*//; s/\s*$//' | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep "\${DIFF}" | while read -r line - do - in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$line) - in1=$(basename $in1 | sed 's/)$//') - in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$line) - in2=$(basename $in2 | sed 's/)$//') - if [[ "$in1" == "$in2" ]]; then - >&2 echo "not linking equal $in1 $in2" - continue - fi - ln -f -s $in1 test-data/$in2 - done - - find test-data/ -name "*.tmp" -print0 | - while IFS= read -r -d '' i; do - if [ ! -e test-data/$(basename $i .tmp) ]; then - ln -s $(basename $i) test-data/$(basename $i .tmp) - else - ln -fs $(basename $i) test-data/$(basename $i .tmp) - fi - done -} - - - -# parse data preparation calls from OpenMS sources for a tool with a given id -function prepare_test_data { -# id=$1 -# | egrep -i "$id\_.*[0-9]+(_prepare\"|_convert)?" - - OLD_OSW_PARAM=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt |sed 's/#.*$//'| sed 's/^\s*//; s/\s*$//' |awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep OLD_OSW_PARAM | head -n 1 | sed 's/^[^"]\+//; s/)$//; s/"//g') - # TODO SiriusAdapter depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010 - cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | - sed "s/\${OLD_OSW_PARAM}/$OLD_OSW_PARAM/" | - grep -v "\.ini\.json" | - sed 's/.ini.json /ini /' | - sed 's/#.*$//'| - sed 's/^\s*//; s/\s*$//' | - grep -v "^$" | - awk '{printf("%s@NEWLINE@", $0)}' | - sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | - sed 's/degenerate_cases\///' | - egrep -v "WRITEINI|WRITECTD|INVALIDVALUE|DIFF" | - grep add_test | - egrep "TOPP|UTILS" | - sed 's@${DATA_DIR_SHARE}/@@g;'| - sed 's@${TMP_RIP_PATH}@./@g'| - sed 's@TOFCalibration_ref_masses @TOFCalibration_ref_masses.txt @g; s@TOFCalibration_const @TOFCalibration_const.csv @'| - sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' | - while read line - do - test_id=$(echo "$line" | sed 's/add_test(//; s/"//g; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f1) - - if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then - >&2 echo " skip failing "$test_id - continue - fi - - line=$(echo "$line" | sed 's/add_test("//; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f2-) - # line="$(fix_tmp_files $line)" - echo 'echo executing "'$test_id'"' - echo "$line > $test_id.stdout 2> $test_id.stderr" - echo "if [[ \"\$?\" -ne \"0\" ]]; then >&2 echo '$test_id failed'; >&2 echo -e \"stderr:\n\$(cat $test_id.stderr | sed 's/^/ /')\"; echo -e \"stdout:\n\$(cat $test_id.stdout)\";fi" - done -} diff --git a/tools/openms/macros_discarded_auto.xml b/tools/openms/macros_discarded_auto.xml deleted file mode 100644 index 2a01c0a5f..000000000 --- a/tools/openms/macros_discarded_auto.xml +++ /dev/null @@ -1,525 +0,0 @@ - - - -
- - -
- - - - - - - - - - - - -
- - -
- - -
- - - - - - - - - - - - - -
-
- - -
- - - - - - - - - - - - - -
-
- - -
- - - - - - - - - - - - - -
-
- - -
- - - - - - - - - - - - - -
- - -
- - -
- - - - - - - - - - - - - - - - -
-
- - -
- - - - - - - - - - - - - - - - -
- - -
- - -
- - - - - - - - - - - - -
-
- - -
- - - - - - - - - - - - -
-
- - -
- - - - - - - - - - - - -
- - -
- - -
- - - - - - - -
- - - - - - - - -
-
- - - - - - - - - -
-
- - - - - -
-
- - - - - - - - - -
-
- - - - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - - - - - - - -
- - -
-
- - - - - - - - - -
-
- - -
- - - - - - -
- - - - - - - - -
-
- - - - - - - - - -
-
- - - - - -
-
- - - - - - - - - -
-
- - - - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - - - - - - - -
- - -
-
- - - - - - - - - -
- - -
- - - - - -
- - - - - - - - - - - - - - - - - - -
- - -
- - -
- - - - - - - - - - - - - - - - - - - - -
- - -
- - - - -
- - - - - - - - - - - - - - - - - -
-
- - - - -
- - - - - - - - - - - - - - - - - -