back to python file with DM

galaxyproject · Oct 15, 2024 · e82a0c8 · e82a0c8
1 parent 50d544b
commit e82a0c8
Show file tree

Hide file tree

Showing 3 changed files with 158 additions and 19 deletions.
diff --git a/..._manager_groot_database_downloader/data_manager/data_manager_groot_database_downloader.py b/..._manager_groot_database_downloader/data_manager/data_manager_groot_database_downloader.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+#
+# Data manager for reference data for the MetaPhlAn Galaxy tools
+import argparse
+import json
+import shutil
+import subprocess
+from datetime import date
+from pathlib import Path
+
+
+# Utility functions for interacting with Galaxy JSON
+def read_input_json(json_fp):
+    """Read the JSON supplied from the data manager tool
+    Returns a tuple (param_dict,extra_files_path)
+    'param_dict' is an arbitrary dictionary of parameters
+    input into the tool; 'extra_files_path' is the path
+    to a directory where output files must be put for the
+    receiving data manager to pick them up.
+    NB the directory pointed to by 'extra_files_path'
+    doesn't exist initially, it is the job of the script
+    to create it if necessary.
+    """
+    with open(json_fp) as fh:
+        params = json.load(fh)
+    return (params['param_dict'],
+            Path(params['output_data'][0]['extra_files_path']))
+
+
+# Utility functions for creating data table dictionaries
+#
+# Example usage:
+# >>> d = create_data_tables_dict()
+# >>> add_data_table(d,'my_data')
+# >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
+# >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
+# >>> print(json.dumps(d))
+def create_data_tables_dict():
+    """Return a dictionary for storing data table information
+    Returns a dictionary that can be used with 'add_data_table'
+    and 'add_data_table_entry' to store information about a
+    data table. It can be converted to JSON to be sent back to
+    the data manager.
+    """
+    d = {
+        'data_tables': {}
+    }
+    return d
+
+
+def add_data_table(d, table):
+    """Add a data table to the data tables dictionary
+    Creates a placeholder for a data table called 'table'.
+    """
+    d['data_tables'][table] = []
+
+
+def add_data_table_entry(d, table, entry):
+    """Add an entry to a data table
+    Appends an entry to the data table 'table'. 'entry'
+    should be a dictionary where the keys are the names of
+    columns in the data table.
+    Raises an exception if the named data table doesn't
+    exist.
+    """
+    try:
+        d['data_tables'][table].append(entry)
+    except KeyError:
+        raise Exception("add_data_table_entry: no table '%s'" % table)
+
+
+def download_groot_db(data_tables, name, table_name, target_dp, identity, groot_version):
+    """Download GROOT database
+    Creates references to the specified file(s) on the Galaxy
+    server in the appropriate data table (determined from the
+    file extension).
+    The 'data_tables' dictionary should have been created using
+    the 'create_data_tables_dict' and 'add_data_table' functions.
+    Arguments:
+      data_tables: a dictionary containing the data table info
+      name: name of the database to download
+      table_name: name of the table
+      target_dp: directory to put copy or link to the data file
+      identity: identity threshold for GROOT
+      groot_version: version of GROOT to use
+    """
+    # Build the command string
+    cmd = "groot get -d %s -o %s --identity %s" % (name, Path.cwd(), identity)
+
+    # Execute the command
+    subprocess.check_call(cmd, shell=True)
+
+    # Define the target directory path
+    current_db_dp = Path(f"{Path.cwd()}/{name}.{identity}")
+    new_db_dp = Path(f"{target_dp}/{name}.{identity}")
+    shutil.copytree(current_db_dp, new_db_dp)
+
+    # Add the data table entry
+    add_data_table_entry(
+        data_tables,
+        table_name,
+        dict(
+            value='%s.%s' % (name, identity),
+            name='%s (%s percent identity)' % (name, identity),
+            path='/%s.%s' % (name, identity),
+            version=groot_version
+        )
+    )
+
+
+if __name__ == "__main__":
+    print("Starting...")
+
+    # Read command line
+    parser = argparse.ArgumentParser(description='Download and build Groot database')
+    parser.add_argument('--database', help="Name of the database")
+    parser.add_argument('--percentidentity', help="The identity threshold at which the database was clustered")
+    parser.add_argument('--grootversion', help="Version of the Database")
+    parser.add_argument('--json', help="Path to JSON file")
+    args = parser.parse_args()
+    print("args   : %s" % args)
+
+    # Read the input JSON
+    json_fp = Path(args.json)
+    params, target_dp = read_input_json(json_fp)
+
+    # Make the target directory
+    print("Making %s" % target_dp)
+    target_dp.mkdir(parents=True, exist_ok=True)
+
+    # Set up data tables dictionary
+    data_tables = create_data_tables_dict()
+    add_data_table(data_tables, "groot_database")
+
+    # Fetch data from specified data sources
+    print("Download and build database")
+    download_groot_db(
+        data_tables,
+        args.database,
+        "groot_database",
+        target_dp,
+        args.percentidentity,
+        args.grootversion)
+
+    # Write output JSON
+    print("Outputting JSON")
+    with open(json_fp, 'w') as fh:
+        json.dump(data_tables, fh, sort_keys=True)
+    print("Done.")
diff --git a/...manager_groot_database_downloader/data_manager/data_manager_groot_database_downloader.xml b/...manager_groot_database_downloader/data_manager/data_manager_groot_database_downloader.xml
@@ -11,26 +11,13 @@
         <requirement type="package" version="@TOOL_VERSION@">groot</requirement>
     </requirements>
     <command><![CDATA[
-        groot get -d '$database' --identity '$identity' &&
-        cp '$dmjson' '$out_file'
+        if [ -f '/usr/local/ssl/cacert.pem' ] ; then export SSL_CERT_FILE='/usr/local/ssl/cacert.pem' ; fi &&
+    python '$__tool_directory__/data_manager_groot_database_downloader.py' 
+        --database '$database'
+        --grootversion @TOOL_VERSION@
+        --percentidentity '$identity'
+        --json '$out_file'
     ]]></command>
-     <configfiles>
-        <configfile name="dmjson"><![CDATA[
-#from datetime import date
-{
-  "data_tables":{
-    "groot_database":[
-      {
-        "value": "$database.$identity",
-        "name": "$database ($identity percent identity)",
-        "path": "/$database.$identity",
-        "version": "@TOOL_VERSION@"
-      }
-    ]
-  }
-}]]>
-        </configfile>
-    </configfiles>
     <inputs>
         <param name="database" type="select" label="Database name">
             <option value="arg-annot" selected="true">ARG-annot 90% identity (default)</option>

diff --git a/data_managers/data_manager_groot_database_downloader/test-data/groot_database.loc b/data_managers/data_manager_groot_database_downloader/test-data/groot_database.loc
@@ -4,3 +4,6 @@ arg-annot.90	arg-annot (90 percent identity)	/tmp/tmp7wrj6zk0/galaxy-dev/tool-da
 arg-annot.90	arg-annot (90 percent identity)	/tmp/tmpf4g2mbca/galaxy-dev/tool-data/groot_database/data/arg-annot.90	1.1.2
 arg-annot.90	arg-annot (90 percent identity)	/tmp/tmppzyeuezs/galaxy-dev/tool-data/groot_database/arg-annot.90	1.1.2
 arg-annot.90	arg-annot (90 percent identity)	/tmp/tmpk61hkg7b/galaxy-dev/tool-data/groot_database/data/arg-annot.90	1.1.2
+arg-annot.90	arg-annot (90 percent identity)	/tmp/tmpuh2hc68n/galaxy-dev/tool-data/groot_database/data/arg-annot.90	1.1.2
+arg-annot.90	arg-annot (90 percent identity)	/tmp/tmpr3ytm5sp/galaxy-dev/tool-data/groot_database/data/arg-annot.90	1.1.2
+arg-annot.90	arg-annot (90 percent identity)	/tmp/tmpgktyt4ga/galaxy-dev/tool-data/groot_database/data/arg-annot.90	1.1.2