Merge pull request #203 from 4dn-dcic/0.8.1

0.8.1
4dn-dcic · May 30, 2019 · 4d3ed6f · 4d3ed6f
2 parents 3e49f1a + c282835
commit 4d3ed6f
Show file tree

Hide file tree

Showing 21 changed files with 333 additions and 44 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,5 +3,5 @@ include LICENSE.txt
 include requirements.txt
 include requirements-4dn.txt
 include requirements-test.txt
-include tibanna/lambdas/requirements.txt
-include tibanna_4dn/lambdas/requirements.txt
+include tibanna/lambdas/*
+include tibanna_4dn/lambdas/*
diff --git a/awsf/aws_decode_run_json.py b/awsf/aws_decode_run_json.py
@@ -149,26 +149,26 @@ def create_env_def_file(env_filename, Dict, language):
     # env variables set before this script started running.
     with open(env_filename, 'w') as f_env:
         if language == 'wdl':
-            f_env.write("WDL_URL={}\n".format(Dict["Job"]["App"]["wdl_url"]))
+            f_env.write("export WDL_URL={}\n".format(Dict["Job"]["App"]["wdl_url"]))
             # main cwl to be run (the other cwl files will be called by this one)
-            f_env.write("MAIN_WDL={}\n".format(Dict["Job"]["App"]["main_wdl"]))
+            f_env.write("export MAIN_WDL={}\n".format(Dict["Job"]["App"]["main_wdl"]))
             # list of cwl files in an array delimited by a space
-            f_env.write("WDL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_wdl_files"].split(','))))
+            f_env.write("export WDL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_wdl_files"].split(','))))
         else:  # cwl
-            f_env.write("CWL_URL={}\n".format(Dict["Job"]["App"]["cwl_url"]))
+            f_env.write("export CWL_URL={}\n".format(Dict["Job"]["App"]["cwl_url"]))
             # main cwl to be run (the other cwl files will be called by this one)
-            f_env.write("MAIN_CWL={}\n".format(Dict["Job"]["App"]["main_cwl"]))
+            f_env.write("export MAIN_CWL={}\n".format(Dict["Job"]["App"]["main_cwl"]))
             # list of cwl files in an array delimited by a space
-            f_env.write("CWL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_cwl_files"].split(','))))
+            f_env.write("export CWL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_cwl_files"].split(','))))
         # other env variables
-        f_env.write("OUTBUCKET={}\n".format(Dict["Job"]["Output"]["output_bucket_directory"]))
-        f_env.write("PUBLIC_POSTRUN_JSON={}\n".format('1' if Dict["config"].get('public_postrun_json', False) else '0'))
+        f_env.write("export OUTBUCKET={}\n".format(Dict["Job"]["Output"]["output_bucket_directory"]))
+        f_env.write("export PUBLIC_POSTRUN_JSON={}\n".format('1' if Dict["config"].get('public_postrun_json', False) else '0'))
         env_preserv_str = ''
         if "Env" in Dict["Job"]["Input"]:
             for ev, val in Dict["Job"]["Input"]["Env"].iteritems():
                 f_env.write("{}={}\n".format(ev, val))
                 env_preserv_str = env_preserv_str + "--preserve-environment " + ev + " "
-        f_env.write("PRESERVED_ENV_OPTION=\"{}\"\n".format(env_preserv_str))
+        f_env.write("export PRESERVED_ENV_OPTION=\"{}\"\n".format(env_preserv_str))
 
 
 main()
diff --git a/awsf/aws_run_workflow_generic.sh b/awsf/aws_run_workflow_generic.sh
@@ -118,6 +118,7 @@ fi
 exl wget $SCRIPTS_URL/aws_decode_run_json.py
 exl wget $SCRIPTS_URL/aws_update_run_json.py
 exl wget $SCRIPTS_URL/aws_upload_output_update_json.py
+exl wget $SCRIPTS_URL/download_workflow.py
 
 exl echo $JSON_BUCKET_NAME
 exl aws s3 cp s3://$JSON_BUCKET_NAME/$RUN_JSON_FILE_NAME .
@@ -145,6 +146,10 @@ mv $LOGFILE1 $LOGFILE2
 LOGFILE=$LOGFILE2
 send_log
 
+### download cwl from github or any other url.
+pip install boto3
+exl ./download_workflow.py
+
 # set up cronjojb for cloudwatch metrics for memory, disk space and CPU utilization
 cwd0=$(pwd)
 cd ~
@@ -158,23 +163,6 @@ echo "*/1 * * * * top -b | head -15 >> $LOGFILE; du -h $LOCAL_INPUT_DIR/ >> $LOG
 cat cloudwatch.jobs | crontab -
 cd $cwd0
 
-### download cwl from github or any other url.
-if [[ $LANGUAGE == 'wdl' ]]
-then
-  exl echo "main wdl=$MAIN_WDL"
-  for WDL_FILE in $MAIN_WDL $WDL_FILES
-  do
-   exl wget -O$LOCAL_WFDIR/$WDL_FILE $WDL_URL/$WDL_FILE
-  done
-else
-  exl echo "main cwl=$MAIN_CWL"
-  for CWL_FILE in $MAIN_CWL $CWL_FILES
-  do
-   exl wget -O$LOCAL_WFDIR/$CWL_FILE $CWL_URL/$CWL_FILE
-  done
-fi
-
-
 ### download data & reference files from s3
 exl cat $DOWNLOAD_COMMAND_FILE
 exl date 
@@ -221,7 +209,6 @@ exl date ## done time
 send_log
 exl ls -lhtr $LOCAL_OUTDIR/
 #exle aws s3 cp --recursive $LOCAL_OUTDIR s3://$OUTBUCKET
-pip install boto3
 if [[ $LANGUAGE == 'wdl' ]]
 then
   WDLOPTION=wdl

diff --git a/awsf/download_workflow.py b/awsf/download_workflow.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+import os
+import subprocess
+import boto3
+
+
+def main():
+    language = os.environ.get('LANGUAGE')
+    local_wfdir = os.environ.get('LOCAL_WFDIR')
+    subprocess.call(['mkdir', '-p', local_wfdir])
+
+    if language == 'wdl':
+        main_wf = os.environ.get('MAIN_WDL', '')
+        wf_files = os.environ.get('WDL_FILES', '')
+        wf_url = os.environ.get('WDL_URL')
+    else:
+        main_wf = os.environ.get('MAIN_CWL', '')
+        wf_files = os.environ.get('CWL_FILES', '')
+        wf_url = os.environ.get('CWL_URL')
+    # turn into a list
+    if not wf_files:
+        wf_files = []
+    elif ' ' in wf_files:
+        wf_files = wf_files.split(' ')
+    else:
+        wf_files = [wf_files]
+    wf_files.append(main_wf)
+    wf_url = wf_url.strip('/')
+
+    print("main workflow file: %s" % main_wf)
+    print("workflow files: " + str(wf_files))
+
+    s3 = boto3.client('s3')
+    for wf_file in wf_files:
+        target = "%s/%s" % (local_wfdir, wf_file)
+        source = "%s/%s" % (wf_url, wf_file)
+        if wf_url.startswith('http'):
+            subprocess.call(["wget", "-O" + target, source])
+        elif wf_url.startswith('s3'):
+            wf_loc = wf_url.replace('s3://', '')
+            bucket_name = wf_loc.split('/')[0]
+            if len(wf_loc.split('/')) > 1:
+                subdirectory = wf_loc.replace(bucket_name + '/', '')
+                key = subdirectory + '/' + wf_file
+            else:
+                key = wf_file
+            print("downloading key %s from bucket %s to target %s" % (key, bucket_name, target))
+            s3.download_file(Bucket=bucket_name, Key=key, Filename=target)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/news.rst b/docs/news.rst
@@ -14,6 +14,12 @@ Version updates
 +++++++++++++++
 
 
+  **May 30, 2019** The latest version is now 0.8.1_.
+
+    - ``deploy_core`` (and ``deploy_unicorn``) not working in a non-venv environment fixed
+    - local CWL/WDL files and CWL/WDL files on S3 are supported.
+
+
   **May 29, 2019** The latest version is now 0.8.0_.
 
     - Tibanna can now be installed via ``pip install tibanna``! (no need to ``git clone``)
@@ -192,6 +198,7 @@ Version updates
 
     - Killer CLIs ``invoke kill`` is available to kill specific jobs and ``invoke kill_all`` is available to kill all jobs. They terminate both the step function execution and the EC2 instances.
 
+.. _0.8.1: https://github.com/4dn-dcic/tibanna/releases/tag/v0.8.1
 .. _0.8.0: https://github.com/4dn-dcic/tibanna/releases/tag/v0.8.0
 .. _0.7.0: https://github.com/4dn-dcic/tibanna/releases/tag/v0.7.0
 .. _0.6.1: https://github.com/4dn-dcic/tibanna/releases/tag/v0.6.1

diff --git a/requirements-4dn.txt b/requirements-4dn.txt
@@ -1,3 +1 @@
-python-lambda-4dn==0.12.0
 dcicutils==0.6.0
-Benchmark-4dn==0.5.2
diff --git a/setup.cfg b/setup.cfg
@@ -23,7 +23,7 @@ include =
     */tibanna/*
     */tibanna_4dn/*
 [aliases]
-test=pytest
+test=check
 
 [tool:pytest]
 addopts =

diff --git a/setup.py b/setup.py
@@ -26,8 +26,6 @@
 #     'requests==2.20.0'
 # ]
 
-setup_requires = install_requires + ['pytest-runner']
-
 tests_requires = [
     'flake8',
     'pytest',
@@ -57,7 +55,7 @@
             ],
     install_requires=install_requires,
     include_package_data=True,
-    setup_requires=setup_requires,
+    setup_requires=install_requires,
     tests_require=tests_requires,
     entry_points={
         'console_scripts': [

diff --git a/test_json/unicorn/4dn_bwa.runonly.v1.local.json b/test_json/unicorn/4dn_bwa.runonly.v1.local.json
@@ -0,0 +1,47 @@
+{
+  "args": {
+    "app_name": "bwa-mem",
+    "input_parameters": {},
+    "cwl_child_filenames": [],
+    "output_target": {
+      "out_bam": "lalala/out.bam"
+    },
+    "secondary_output_target": {},
+    "cwl_main_filename": "bwa-mem.cwl",
+    "secondary_files": {},
+    "output_S3_bucket": "tibanna-output",
+    "app_version": "5",
+    "cwl_directory_local": "tests/files/cwl_upload/",
+    "cwl_version": "v1",
+    "input_files": {
+      "fastq1": {
+        "bucket_name": "4dn-tool-evaluation-files",
+        "object_key": "GM12878_SRR1658581_1pc_1_R1.fastq.h10000"
+      },
+      "fastq2": {
+        "bucket_name": "4dn-tool-evaluation-files",
+        "object_key": "GM12878_SRR1658581_1pc_1_R2.fastq.h10000"
+      },
+      "bwa_index": {
+        "bucket_name": "elasticbeanstalk-fourfront-webdev-files",
+        "object_key": "1f53df95-4cf3-41cc-971d-81bb16c486dd/4DNFIZQZ39L9.bwaIndex.tgz"
+      }
+    },
+    "input_parameters": {
+        "nThreads": 2
+    }
+  },
+  "config": {
+    "ebs_size": 30,
+    "ebs_type": "io1",
+    "json_bucket": "4dn-aws-pipeline-run-json",
+    "EBS_optimized": false,
+    "ebs_iops": 500,
+    "shutdown_min": 30,
+    "instance_type": "t2.large",
+    "password": "dragonfly",
+    "log_bucket": "tibanna-output",
+    "key_name": "4dn-encode",
+    "cloudwatch_dashboard": true
+  }
+}
diff --git a/tests/files/cwl_upload/bwa-mem.cwl b/tests/files/cwl_upload/bwa-mem.cwl
@@ -0,0 +1,98 @@
+{
+  "hints": [
+    {
+      "dockerPull": "duplexa/4dn-hic:v42.1",
+      "class": "DockerRequirement"
+    }
+  ],
+  "arguments": [],
+  "class": "CommandLineTool",
+  "inputs": [
+    {
+      "type": [
+        "File"
+      ],
+      "id": "#fastq1",
+      "inputBinding": {
+        "position": 1,
+        "separate": true
+      }
+    },
+    {
+      "type": [
+        "File"
+      ],
+      "id": "#fastq2",
+      "inputBinding": {
+        "position": 2,
+        "separate": true
+      }
+    },
+    {
+      "type": [
+        "File"
+      ],
+      "id": "#bwa_index",
+      "inputBinding": {
+        "position": 3,
+        "separate": true
+      }
+    },
+    {
+      "type": [
+        "int"
+      ],
+      "id": "#nThreads",
+      "inputBinding": {
+        "position": 6,
+        "separate": true
+      },
+      "default": 4
+    },
+    {
+      "type": [
+        "null",
+        "string"
+      ],
+      "id": "#prefix",
+      "inputBinding": {
+        "position": 5,
+        "separate": true
+      },
+      "default": "out"
+    },
+    {
+      "type": [
+        "null",
+        "string"
+      ],
+      "id": "#outdir",
+      "inputBinding": {
+        "position": 4,
+        "separate": true
+      },
+      "default": "."
+    }
+  ],
+  "outputs": [
+    {
+      "type": [
+        "null",
+        "File"
+      ],
+      "id": "#out_bam",
+      "outputBinding": {
+        "glob": "*.bam"
+      }
+    }
+  ],
+  "baseCommand": [
+    "run-bwa-mem.sh"
+  ],
+  "requirements": [
+    {
+      "class": "InlineJavascriptRequirement"
+    }
+  ],
+  "cwlVersion": "v1.0"
+}
diff --git a/tests/tibanna/unicorn/conftest.py b/tests/tibanna/unicorn/conftest.py
@@ -37,6 +37,11 @@ def run_task_awsem_event_data_chipseq():
     return get_event_file_for('run_task_awsem', event_file='event_chipseq.json')
 
 
+@pytest.fixture(scope='session')
+def run_task_awsem_event_cwl_upload():
+    return get_event_file_for('run_task_awsem', event_file='event_cwl_upload.json')
+
+
 def get_test_json(file_name):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     event_file_name = os.path.join(dir_path, '..', '..', '..', 'test_json', 'unicorn', file_name)

diff --git a/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child1.cwl b/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child1.cwl
diff --git a/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child2.cwl b/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child2.cwl
diff --git a/tests/tibanna/unicorn/run_task_awsem/cwl_upload/main.cwl b/tests/tibanna/unicorn/run_task_awsem/cwl_upload/main.cwl
diff --git a/tests/tibanna/unicorn/run_task_awsem/event_cwl_upload.json b/tests/tibanna/unicorn/run_task_awsem/event_cwl_upload.json
@@ -0,0 +1,37 @@
+{
+  "args": {
+    "app_name": "md5",
+    "language": "cwl draft-3",
+    "input_parameters": {},
+    "cwl_version": "draft-3",
+    "output_target": {
+      "report": "ac18f2bb-c256-40bf-9562-cdc6179d6f9a/report"
+    },
+    "secondary_output_target": {},
+    "cwl_main_filename": "main.cwl",
+    "cwl_child_filenames": ["child1.cwl", "child2.cwl"],
+    "secondary_files": {},
+    "output_S3_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput",
+    "app_version": "0.0.4",
+    "cwl_directory_local": "tests/tibanna/unicorn/run_task_awsem/cwl_upload/",
+    "input_files": {
+      "input_file": {
+        "bucket_name": "elasticbeanstalk-fourfront-webdev-files",
+        "object_key": "f4864029-a8ad-4bb8-93e7-5108f462ccaa/4DNFIRSRJH45.fastq.gz"
+      }
+    },
+    "input_env": {"TESTENV": 1234}
+  },
+  "config": {
+    "ebs_size": 0,
+    "ebs_type": "io1",
+    "json_bucket": "4dn-aws-pipeline-run-json",
+    "EBS_optimized": "",
+    "ebs_iops": 500,
+    "shutdown_min": 30,
+    "instance_type": "",
+    "password": "",
+    "log_bucket": "tibanna-output",
+    "key_name": "4dn-encode"
+  }
+}