From 4b20470bef1c46910abb4d997d7e95c972471cad Mon Sep 17 00:00:00 2001 From: SooLee Date: Wed, 29 May 2019 16:21:35 -0400 Subject: [PATCH 01/14] new issue with open browser fixed --- tibanna/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tibanna/core.py b/tibanna/core.py index 77e482096..35379fdb1 100644 --- a/tibanna/core.py +++ b/tibanna/core.py @@ -163,7 +163,7 @@ def run_workflow(self, input_json, sfn=None, 'home?region=%s#dashboards:name=awsem-%s' % (AWS_REGION, jobid) print("Cloudwatch Dashboard = %s" % cw_db_url) if open_browser and shutil.which('open') is not None: - subprocess.call('open %s' % data[_tibanna]['url']) + subprocess.call(["open", data[_tibanna]['url']]) return data def add_to_dydb(self, awsem_job_id, execution_name, sfn, logbucket): From b44cdca0e2eb37b9aedc36f66cab96e57c941b10 Mon Sep 17 00:00:00 2001 From: SooLee Date: Wed, 29 May 2019 16:27:14 -0400 Subject: [PATCH 02/14] expose usergroup for deploy_pony for luisa --- tibanna_4dn/__main__.py | 8 +++++--- tibanna_4dn/core.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tibanna_4dn/__main__.py b/tibanna_4dn/__main__.py index fa6c40b99..46304d4a1 100644 --- a/tibanna_4dn/__main__.py +++ b/tibanna_4dn/__main__.py @@ -39,7 +39,9 @@ def args(self): _args['deploy_pony'] = \ [{'flag': ["-s", "--suffix"], 'help': "suffix (e.g. 'dev') to add to the end of the name of" + - "tibanna_pony and AWS Lambda functions within the same usergroup"}] + "tibanna_pony and AWS Lambda functions within the same usergroup"}, + {'flag': ["-g", "--usergroup"], + 'help': "Tibanna usergroup to share the permission to access buckets and run jobs"}], return _args @@ -50,9 +52,9 @@ def deploy_core(name, suffix=None, usergroup=None): API().deploy_core(name=name, suffix=suffix, usergroup=usergroup) -def deploy_pony(suffix=None): +def deploy_pony(suffix=None, usergroup=None): """deploy tibanna unicorn or pony to AWS cloud (pony is for 4DN-DCIC only)""" - API().deploy_pony(suffix=suffix) + API().deploy_pony(suffix=suffix, usergroup=usergroup) def run_workflow(input_json, sfn=TIBANNA_DEFAULT_STEP_FUNCTION_NAME, jobid='', sleep=3): diff --git a/tibanna_4dn/core.py b/tibanna_4dn/core.py index cde73768f..aaa84ef3a 100644 --- a/tibanna_4dn/core.py +++ b/tibanna_4dn/core.py @@ -50,5 +50,5 @@ def env_list(self, name): } return envlist_pony.get(name, '') - def deploy_pony(self, suffix=None): - self.deploy_tibanna(suffix=suffix) + def deploy_pony(self, suffix=None, usergroup=None): + self.deploy_tibanna(suffix=suffix, usergroup=usergroup) From 3d26795b7651decbf2a84b74cdacda4da462682d Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 01:40:21 +0000 Subject: [PATCH 03/14] s3:// location for cwl/wdl files supported --- awsf/aws_run_workflow_generic.sh | 21 +++---------- awsf/download_workflow.py | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 awsf/download_workflow.py diff --git a/awsf/aws_run_workflow_generic.sh b/awsf/aws_run_workflow_generic.sh index 6b5a694de..b71d2b009 100755 --- a/awsf/aws_run_workflow_generic.sh +++ b/awsf/aws_run_workflow_generic.sh @@ -118,6 +118,7 @@ fi exl wget $SCRIPTS_URL/aws_decode_run_json.py exl wget $SCRIPTS_URL/aws_update_run_json.py exl wget $SCRIPTS_URL/aws_upload_output_update_json.py +exl wget $SCRIPTS_URL/download_workflow.py exl echo $JSON_BUCKET_NAME exl aws s3 cp s3://$JSON_BUCKET_NAME/$RUN_JSON_FILE_NAME . @@ -145,6 +146,9 @@ mv $LOGFILE1 $LOGFILE2 LOGFILE=$LOGFILE2 send_log +### download cwl from github or any other url. +exl ./download_workflow.py + # set up cronjojb for cloudwatch metrics for memory, disk space and CPU utilization cwd0=$(pwd) cd ~ @@ -158,23 +162,6 @@ echo "*/1 * * * * top -b | head -15 >> $LOGFILE; du -h $LOCAL_INPUT_DIR/ >> $LOG cat cloudwatch.jobs | crontab - cd $cwd0 -### download cwl from github or any other url. -if [[ $LANGUAGE == 'wdl' ]] -then - exl echo "main wdl=$MAIN_WDL" - for WDL_FILE in $MAIN_WDL $WDL_FILES - do - exl wget -O$LOCAL_WFDIR/$WDL_FILE $WDL_URL/$WDL_FILE - done -else - exl echo "main cwl=$MAIN_CWL" - for CWL_FILE in $MAIN_CWL $CWL_FILES - do - exl wget -O$LOCAL_WFDIR/$CWL_FILE $CWL_URL/$CWL_FILE - done -fi - - ### download data & reference files from s3 exl cat $DOWNLOAD_COMMAND_FILE exl date diff --git a/awsf/download_workflow.py b/awsf/download_workflow.py new file mode 100644 index 000000000..60db49a50 --- /dev/null +++ b/awsf/download_workflow.py @@ -0,0 +1,51 @@ +import os +import subprocess +import boto3 + + +def main(): + language = os.environ.get('LANGUAGE') + local_wfdir = os.environ.get('LOCAL_WFDIR') + subprocess.call(['mkdir', '-p', local_wfdir]) + + if language == 'wdl': + main_wf = os.environ.get('MAIN_WDL', '') + wf_files = os.environ.get('WDL_FILES', '') + wf_url = os.environ.get('WDL_URL') + else: + main_wf = os.environ.get('MAIN_CWL', '') + wf_files = os.environ.get('CWL_FILES', '') + wf_url = os.environ.get('CWL_URL') + # turn into a list + if not wf_files: + wf_files = [] + elif ' ' in wf_files: + wf_files = wf_files.split(' ') + else: + wf_files = [wf_files] + wf_files.append(main_wf) + wf_url = wf_url.strip('/') + + print("main workflow file: %s" % main_wf) + print("workflow files: " + str(wf_files)) + + s3 = boto3.client('s3') + for wf_file in wf_files: + target = "%s/%s" % (local_wfdir, wf_file) + source = "%s/%s" % (wf_url, wf_file) + if wf_url.startswith('http'): + subprocess.call(["wget", "-O" + target, source]) + elif wf_url.startswith('s3'): + wf_loc = wf_url.replace('s3://', '') + bucket_name = wf_loc.split('/')[0] + if len(wf_loc.split('/')) > 1: + subdirectory = wf_loc.replace(bucket_name + '/', '') + key = subdirectory + '/' + wf_file + else: + key = wf_file + print("downloading key %s from bucket %s to target %s" % (key, bucket_name, target)) + s3.download_file(Bucket=bucket_name, Key=key, Filename=target) + + +if __name__ == '__main__': + main() From 22f94595c2daf9861892edc8f011c8be69f66591 Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 02:02:08 +0000 Subject: [PATCH 04/14] minor change in awsf workflow downloaded --- awsf/download_workflow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/awsf/download_workflow.py b/awsf/download_workflow.py index 60db49a50..00d297492 100644 --- a/awsf/download_workflow.py +++ b/awsf/download_workflow.py @@ -1,3 +1,4 @@ +#!/usr/bin/python import os import subprocess import boto3 From 12188562a9927c8c10528e95cac0b71bc0be168f Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 02:30:08 +0000 Subject: [PATCH 05/14] install boto3 to awsem before download_workflow --- awsf/aws_run_workflow_generic.sh | 2 +- awsf/download_workflow.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 awsf/download_workflow.py diff --git a/awsf/aws_run_workflow_generic.sh b/awsf/aws_run_workflow_generic.sh index b71d2b009..4b510835e 100755 --- a/awsf/aws_run_workflow_generic.sh +++ b/awsf/aws_run_workflow_generic.sh @@ -147,6 +147,7 @@ LOGFILE=$LOGFILE2 send_log ### download cwl from github or any other url. +pip install boto3 exl ./download_workflow.py # set up cronjojb for cloudwatch metrics for memory, disk space and CPU utilization @@ -208,7 +209,6 @@ exl date ## done time send_log exl ls -lhtr $LOCAL_OUTDIR/ #exle aws s3 cp --recursive $LOCAL_OUTDIR s3://$OUTBUCKET -pip install boto3 if [[ $LANGUAGE == 'wdl' ]] then WDLOPTION=wdl diff --git a/awsf/download_workflow.py b/awsf/download_workflow.py old mode 100644 new mode 100755 From 92b56422e95a77b72d8cea3f7c976b26b623b352 Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 03:32:13 +0000 Subject: [PATCH 06/14] export wf variables in awsem script --- awsf/aws_decode_run_json.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/awsf/aws_decode_run_json.py b/awsf/aws_decode_run_json.py index 62240b854..9d3ad966e 100755 --- a/awsf/aws_decode_run_json.py +++ b/awsf/aws_decode_run_json.py @@ -149,26 +149,26 @@ def create_env_def_file(env_filename, Dict, language): # env variables set before this script started running. with open(env_filename, 'w') as f_env: if language == 'wdl': - f_env.write("WDL_URL={}\n".format(Dict["Job"]["App"]["wdl_url"])) + f_env.write("export WDL_URL={}\n".format(Dict["Job"]["App"]["wdl_url"])) # main cwl to be run (the other cwl files will be called by this one) - f_env.write("MAIN_WDL={}\n".format(Dict["Job"]["App"]["main_wdl"])) + f_env.write("export MAIN_WDL={}\n".format(Dict["Job"]["App"]["main_wdl"])) # list of cwl files in an array delimited by a space - f_env.write("WDL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_wdl_files"].split(',')))) + f_env.write("export WDL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_wdl_files"].split(',')))) else: # cwl - f_env.write("CWL_URL={}\n".format(Dict["Job"]["App"]["cwl_url"])) + f_env.write("export CWL_URL={}\n".format(Dict["Job"]["App"]["cwl_url"])) # main cwl to be run (the other cwl files will be called by this one) - f_env.write("MAIN_CWL={}\n".format(Dict["Job"]["App"]["main_cwl"])) + f_env.write("export MAIN_CWL={}\n".format(Dict["Job"]["App"]["main_cwl"])) # list of cwl files in an array delimited by a space - f_env.write("CWL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_cwl_files"].split(',')))) + f_env.write("export CWL_FILES=\"{}\"\n".format(' '.join(Dict["Job"]["App"]["other_cwl_files"].split(',')))) # other env variables - f_env.write("OUTBUCKET={}\n".format(Dict["Job"]["Output"]["output_bucket_directory"])) - f_env.write("PUBLIC_POSTRUN_JSON={}\n".format('1' if Dict["config"].get('public_postrun_json', False) else '0')) + f_env.write("export OUTBUCKET={}\n".format(Dict["Job"]["Output"]["output_bucket_directory"])) + f_env.write("export PUBLIC_POSTRUN_JSON={}\n".format('1' if Dict["config"].get('public_postrun_json', False) else '0')) env_preserv_str = '' if "Env" in Dict["Job"]["Input"]: for ev, val in Dict["Job"]["Input"]["Env"].iteritems(): f_env.write("{}={}\n".format(ev, val)) env_preserv_str = env_preserv_str + "--preserve-environment " + ev + " " - f_env.write("PRESERVED_ENV_OPTION=\"{}\"\n".format(env_preserv_str)) + f_env.write("export PRESERVED_ENV_OPTION=\"{}\"\n".format(env_preserv_str)) main() From fe907629e03e106e38975c6f50aa40beacb23824 Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 15:31:26 +0000 Subject: [PATCH 07/14] cwl/wdl file can now be local (not tested) --- .../unicorn/4dn_bwa.runonly.v1.local.json | 47 +++++++++ tests/files/cwl_upload/bwa-mem.cwl | 98 +++++++++++++++++++ tests/tibanna/unicorn/conftest.py | 5 + .../run_task_awsem/cwl_upload/child1.cwl | 0 .../run_task_awsem/cwl_upload/child2.cwl | 0 .../run_task_awsem/cwl_upload/main.cwl | 0 .../run_task_awsem/event_cwl_upload.json | 37 +++++++ tests/tibanna/unicorn/test_ec2_utils.py | 19 +++- tibanna/ec2_utils.py | 21 ++++ tibanna/run_task.py | 9 ++ 10 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 test_json/unicorn/4dn_bwa.runonly.v1.local.json create mode 100644 tests/files/cwl_upload/bwa-mem.cwl create mode 100644 tests/tibanna/unicorn/run_task_awsem/cwl_upload/child1.cwl create mode 100644 tests/tibanna/unicorn/run_task_awsem/cwl_upload/child2.cwl create mode 100644 tests/tibanna/unicorn/run_task_awsem/cwl_upload/main.cwl create mode 100644 tests/tibanna/unicorn/run_task_awsem/event_cwl_upload.json diff --git a/test_json/unicorn/4dn_bwa.runonly.v1.local.json b/test_json/unicorn/4dn_bwa.runonly.v1.local.json new file mode 100644 index 000000000..fa09ba951 --- /dev/null +++ b/test_json/unicorn/4dn_bwa.runonly.v1.local.json @@ -0,0 +1,47 @@ +{ + "args": { + "app_name": "bwa-mem", + "input_parameters": {}, + "cwl_child_filenames": [], + "output_target": { + "out_bam": "lalala/out.bam" + }, + "secondary_output_target": {}, + "cwl_main_filename": "bwa-mem.cwl", + "secondary_files": {}, + "output_S3_bucket": "tibanna-output", + "app_version": "5", + "cwl_directory_local": "tests/files/cwl_upload/", + "cwl_version": "v1", + "input_files": { + "fastq1": { + "bucket_name": "4dn-tool-evaluation-files", + "object_key": "GM12878_SRR1658581_1pc_1_R1.fastq.h10000" + }, + "fastq2": { + "bucket_name": "4dn-tool-evaluation-files", + "object_key": "GM12878_SRR1658581_1pc_1_R2.fastq.h10000" + }, + "bwa_index": { + "bucket_name": "elasticbeanstalk-fourfront-webdev-files", + "object_key": "1f53df95-4cf3-41cc-971d-81bb16c486dd/4DNFIZQZ39L9.bwaIndex.tgz" + } + }, + "input_parameters": { + "nThreads": 2 + } + }, + "config": { + "ebs_size": 30, + "ebs_type": "io1", + "json_bucket": "4dn-aws-pipeline-run-json", + "EBS_optimized": false, + "ebs_iops": 500, + "shutdown_min": 30, + "instance_type": "t2.large", + "password": "dragonfly", + "log_bucket": "tibanna-output", + "key_name": "4dn-encode", + "cloudwatch_dashboard": true + } +} diff --git a/tests/files/cwl_upload/bwa-mem.cwl b/tests/files/cwl_upload/bwa-mem.cwl new file mode 100644 index 000000000..dc9a9b660 --- /dev/null +++ b/tests/files/cwl_upload/bwa-mem.cwl @@ -0,0 +1,98 @@ +{ + "hints": [ + { + "dockerPull": "duplexa/4dn-hic:v42.1", + "class": "DockerRequirement" + } + ], + "arguments": [], + "class": "CommandLineTool", + "inputs": [ + { + "type": [ + "File" + ], + "id": "#fastq1", + "inputBinding": { + "position": 1, + "separate": true + } + }, + { + "type": [ + "File" + ], + "id": "#fastq2", + "inputBinding": { + "position": 2, + "separate": true + } + }, + { + "type": [ + "File" + ], + "id": "#bwa_index", + "inputBinding": { + "position": 3, + "separate": true + } + }, + { + "type": [ + "int" + ], + "id": "#nThreads", + "inputBinding": { + "position": 6, + "separate": true + }, + "default": 4 + }, + { + "type": [ + "null", + "string" + ], + "id": "#prefix", + "inputBinding": { + "position": 5, + "separate": true + }, + "default": "out" + }, + { + "type": [ + "null", + "string" + ], + "id": "#outdir", + "inputBinding": { + "position": 4, + "separate": true + }, + "default": "." + } + ], + "outputs": [ + { + "type": [ + "null", + "File" + ], + "id": "#out_bam", + "outputBinding": { + "glob": "*.bam" + } + } + ], + "baseCommand": [ + "run-bwa-mem.sh" + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "cwlVersion": "v1.0" +} diff --git a/tests/tibanna/unicorn/conftest.py b/tests/tibanna/unicorn/conftest.py index 5f4ca5a1d..ba81c5e68 100644 --- a/tests/tibanna/unicorn/conftest.py +++ b/tests/tibanna/unicorn/conftest.py @@ -37,6 +37,11 @@ def run_task_awsem_event_data_chipseq(): return get_event_file_for('run_task_awsem', event_file='event_chipseq.json') +@pytest.fixture(scope='session') +def run_task_awsem_event_cwl_upload(): + return get_event_file_for('run_task_awsem', event_file='event_cwl_upload.json') + + def get_test_json(file_name): dir_path = os.path.dirname(os.path.realpath(__file__)) event_file_name = os.path.join(dir_path, '..', '..', '..', 'test_json', 'unicorn', file_name) diff --git a/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child1.cwl b/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child1.cwl new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child2.cwl b/tests/tibanna/unicorn/run_task_awsem/cwl_upload/child2.cwl new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tibanna/unicorn/run_task_awsem/cwl_upload/main.cwl b/tests/tibanna/unicorn/run_task_awsem/cwl_upload/main.cwl new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tibanna/unicorn/run_task_awsem/event_cwl_upload.json b/tests/tibanna/unicorn/run_task_awsem/event_cwl_upload.json new file mode 100644 index 000000000..1bb663109 --- /dev/null +++ b/tests/tibanna/unicorn/run_task_awsem/event_cwl_upload.json @@ -0,0 +1,37 @@ +{ + "args": { + "app_name": "md5", + "language": "cwl draft-3", + "input_parameters": {}, + "cwl_version": "draft-3", + "output_target": { + "report": "ac18f2bb-c256-40bf-9562-cdc6179d6f9a/report" + }, + "secondary_output_target": {}, + "cwl_main_filename": "main.cwl", + "cwl_child_filenames": ["child1.cwl", "child2.cwl"], + "secondary_files": {}, + "output_S3_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", + "app_version": "0.0.4", + "cwl_directory_local": "tests/tibanna/unicorn/run_task_awsem/cwl_upload/", + "input_files": { + "input_file": { + "bucket_name": "elasticbeanstalk-fourfront-webdev-files", + "object_key": "f4864029-a8ad-4bb8-93e7-5108f462ccaa/4DNFIRSRJH45.fastq.gz" + } + }, + "input_env": {"TESTENV": 1234} + }, + "config": { + "ebs_size": 0, + "ebs_type": "io1", + "json_bucket": "4dn-aws-pipeline-run-json", + "EBS_optimized": "", + "ebs_iops": 500, + "shutdown_min": 30, + "instance_type": "", + "password": "", + "log_bucket": "tibanna-output", + "key_name": "4dn-encode" + } +} diff --git a/tests/tibanna/unicorn/test_ec2_utils.py b/tests/tibanna/unicorn/test_ec2_utils.py index 91f1ec5ef..29830d678 100644 --- a/tests/tibanna/unicorn/test_ec2_utils.py +++ b/tests/tibanna/unicorn/test_ec2_utils.py @@ -1,4 +1,6 @@ -from tibanna.ec2_utils import update_config +from tibanna.ec2_utils import update_config, upload_workflow_to_s3 +from tibanna.utils import create_jobid +import boto3 def test_update_config(run_task_awsem_event_data): @@ -48,3 +50,18 @@ def test_update_config5(run_task_awsem_event_omit_fields2): assert config['EBS_optimized'] is True assert config['ebs_size'] >= 10 assert config['shutdown_min'] == "now" + + +def test_upload_workflow_to_s3(run_task_awsem_event_cwl_upload): + jobid = create_jobid() + args = run_task_awsem_event_cwl_upload['args'] + cfg = run_task_awsem_event_cwl_upload['config'] + url = upload_workflow_to_s3(args, cfg, jobid) + s3 = boto3.client('s3') + res1 = s3.get_object(Bucket=cfg['log_bucket'], Key=jobid + '.workflow/main.cwl') + res2 = s3.get_object(Bucket=cfg['log_bucket'], Key=jobid + '.workflow/child1.cwl') + res3 = s3.get_object(Bucket=cfg['log_bucket'], Key=jobid + '.workflow/child2.cwl') + assert res1 + assert res2 + assert res3 + assert url == 's3://tibanna-output/' + jobid + '.workflow/' diff --git a/tibanna/ec2_utils.py b/tibanna/ec2_utils.py index 137461b48..e22b032cb 100644 --- a/tibanna/ec2_utils.py +++ b/tibanna/ec2_utils.py @@ -556,3 +556,24 @@ def auto_update_input_json(args, cfg): cfg['language'] = args['language'] update_config(cfg, args.get('app_name', ''), args['input_files'], args.get('input_parameters', {})) + + +def upload_workflow_to_s3(args, cfg, jobid): + bucket = cfg['log_bucket'] + key_prefix = jobid + '.workflow/' + language = args['language'] + if language == 'wdl': + main_wf = args['wdl_main_filename'] + wf_files = args['wdl_child_filenames'] + localdir = args['wdl_directory_local'] + else: + main_wf = args['cwl_main_filename'] + wf_files = args['cwl_child_filenames'] + localdir = args['cwl_directory_local'] + wf_files.append(main_wf) + localdir = localdir.strip('/') + for wf_file in wf_files: + source = localdir + '/' + wf_file + target = key_prefix + wf_file + boto3.client('s3').upload_file(source, bucket, target) + return "s3://%s/%s" % (bucket, key_prefix) diff --git a/tibanna/run_task.py b/tibanna/run_task.py index e12d71284..9f55409cc 100644 --- a/tibanna/run_task.py +++ b/tibanna/run_task.py @@ -5,6 +5,7 @@ from .ec2_utils import ( auto_update_input_json, create_json, + upload_workflow_to_s3, launch_instance, create_cloudwatch_dashboard ) @@ -91,6 +92,14 @@ def run_task(input_json): # create json and copy to s3 jobid = create_json(input_json_copy) + if ('cwl_directory_local' in args and args['cwl_directory_local']) or \ + ('wdl_directory_local' in args and args['wdl_directory_local']): + url = upload_workflow_to_s3(args, cfg, jobid) + if args['language'] == 'wdl': + args['wdl_directory_url'] = url + else: + args['cwl_directory_url'] = url + # profile if os.environ.get('TIBANNA_PROFILE_ACCESS_KEY', None) and \ os.environ.get('TIBANNA_PROFILE_SECRET_KEY', None): From b54313e5f7188a1c8a1c75cdabc28096f5eba64e Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 15:38:12 +0000 Subject: [PATCH 08/14] cwl_directory_url field requirement now removed --- requirements-4dn.txt | 2 -- setup.py | 2 +- tibanna/run_task.py | 8 ++++---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/requirements-4dn.txt b/requirements-4dn.txt index 98d6c1ba1..29dd9da00 100644 --- a/requirements-4dn.txt +++ b/requirements-4dn.txt @@ -1,3 +1 @@ -python-lambda-4dn==0.12.0 dcicutils==0.6.0 -Benchmark-4dn==0.5.2 diff --git a/setup.py b/setup.py index 18144291f..f5cb251e2 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ description='Tibanna runs portable pipelines (in CWL/WDL) on the AWS Cloud.', long_description=long_description, long_description_content_type='text/markdown', - packages=['tibanna', 'tibanna_4dn'], + packages=['tibanna', 'tibanna_4dn', 'tibanna.lambdas', 'tibanna_4dn.lambdas'], zip_safe=False, author='4DN Team at Harvard Medical School', author_email='duplexa@gmail.com, jeremy_johnson@hms.harvard.edu, carl_vitzthum@hms.harvard.edu', diff --git a/tibanna/run_task.py b/tibanna/run_task.py index 9f55409cc..25f66ccd4 100644 --- a/tibanna/run_task.py +++ b/tibanna/run_task.py @@ -44,14 +44,14 @@ def run_task(input_json): secondary_output_target: secondary output files in json format (similar to secondary_files) # required for cwl cwl_main_filename: main cwl file name - cwl_directory_url: the url and subdirectories for the main cwl file + cwl_directory_url or cwl_directory_local : the url or local directory in which the cwl files resides cwl_version: the version of cwl (either 'draft3' or 'v1') cwl_child_filenames (optional): names of the other cwl files used by main cwl file, delimited by comma language (optional for cwl): 'cwl_v1' or 'cwl_draft3' # required for wdl language: 'wdl' wdl_main_filename: main wdl file name - wdl_directory_url: the url of the wdl file + wdl_directory_url or wdl_directory_local : the url or local directory in which the wdl files resides wdl_child_filenames (optional): names of the other wdl files used by main wdl file, delimited by comma # optional dependency: {'exec_arn': [exec_arns]} @@ -64,8 +64,8 @@ def run_task(input_json): CONFIG_KEYS = ["log_bucket"] ARGS_FIELD = "args" ARGS_KEYS = ["input_files", "output_S3_bucket", "output_target"] - ARGS_KEYS_CWL = ["cwl_main_filename", "cwl_directory_url"] - ARGS_KEYS_WDL = ["wdl_main_filename", "wdl_directory_url", "language"] + ARGS_KEYS_CWL = ["cwl_main_filename"] + ARGS_KEYS_WDL = ["wdl_main_filename", "language"] # args: parameters needed by the instance to run a workflow # cfg: parameters needed to launch an instance From 42bf0af10dd69ef3d6a8512b47a9cc7f059288dc Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 15:50:05 +0000 Subject: [PATCH 09/14] upload_workflow_to_s3 moved to run_workflow --- tibanna/core.py | 14 ++++++++++++++ tibanna/ec2_utils.py | 2 +- tibanna/run_task.py | 16 ++++------------ 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/tibanna/core.py b/tibanna/core.py index 35379fdb1..d34ec7452 100644 --- a/tibanna/core.py +++ b/tibanna/core.py @@ -32,6 +32,9 @@ printlog, create_jobid, ) +from .ec2_utils import ( + upload_workflow_to_s3 +) # from botocore.errorfactory import ExecutionAlreadyExists from .iam_utils import ( create_tibanna_iam, @@ -136,6 +139,17 @@ def run_workflow(self, input_json, sfn=None, data[_tibanna]['url'] = url # add jobid data['jobid'] = jobid + if 'args' in data: # unicorn-only + args = data['args'] + cfg = data['config'] + if ('cwl_directory_local' in args and args['cwl_directory_local']) or \ + ('wdl_directory_local' in args and args['wdl_directory_local']): + url = upload_workflow_to_s3(args, cfg, jobid) + if 'language' in args and args['language'] == 'wdl': + args['wdl_directory_url'] = url + else: + args['cwl_directory_url'] = url + # submit job as an execution aws_input = json.dumps(data) print("about to start run %s" % run_name) # trigger the step function to run diff --git a/tibanna/ec2_utils.py b/tibanna/ec2_utils.py index e22b032cb..6adb601fe 100644 --- a/tibanna/ec2_utils.py +++ b/tibanna/ec2_utils.py @@ -561,7 +561,7 @@ def auto_update_input_json(args, cfg): def upload_workflow_to_s3(args, cfg, jobid): bucket = cfg['log_bucket'] key_prefix = jobid + '.workflow/' - language = args['language'] + language = args.get('language', '') if language == 'wdl': main_wf = args['wdl_main_filename'] wf_files = args['wdl_child_filenames'] diff --git a/tibanna/run_task.py b/tibanna/run_task.py index 25f66ccd4..2ff4f6dfc 100644 --- a/tibanna/run_task.py +++ b/tibanna/run_task.py @@ -44,14 +44,14 @@ def run_task(input_json): secondary_output_target: secondary output files in json format (similar to secondary_files) # required for cwl cwl_main_filename: main cwl file name - cwl_directory_url or cwl_directory_local : the url or local directory in which the cwl files resides + cwl_directory_url: the url (http:// or s3://) in which the cwl files resides cwl_version: the version of cwl (either 'draft3' or 'v1') cwl_child_filenames (optional): names of the other cwl files used by main cwl file, delimited by comma language (optional for cwl): 'cwl_v1' or 'cwl_draft3' # required for wdl language: 'wdl' wdl_main_filename: main wdl file name - wdl_directory_url or wdl_directory_local : the url or local directory in which the wdl files resides + wdl_directory_url: the url (http:// or s3://) in which the wdl files resides wdl_child_filenames (optional): names of the other wdl files used by main wdl file, delimited by comma # optional dependency: {'exec_arn': [exec_arns]} @@ -64,8 +64,8 @@ def run_task(input_json): CONFIG_KEYS = ["log_bucket"] ARGS_FIELD = "args" ARGS_KEYS = ["input_files", "output_S3_bucket", "output_target"] - ARGS_KEYS_CWL = ["cwl_main_filename"] - ARGS_KEYS_WDL = ["wdl_main_filename", "language"] + ARGS_KEYS_CWL = ["cwl_main_filename", "cwl_directory_url"] + ARGS_KEYS_WDL = ["wdl_main_filename", "wdl_directory_url", "language"] # args: parameters needed by the instance to run a workflow # cfg: parameters needed to launch an instance @@ -92,14 +92,6 @@ def run_task(input_json): # create json and copy to s3 jobid = create_json(input_json_copy) - if ('cwl_directory_local' in args and args['cwl_directory_local']) or \ - ('wdl_directory_local' in args and args['wdl_directory_local']): - url = upload_workflow_to_s3(args, cfg, jobid) - if args['language'] == 'wdl': - args['wdl_directory_url'] = url - else: - args['cwl_directory_url'] = url - # profile if os.environ.get('TIBANNA_PROFILE_ACCESS_KEY', None) and \ os.environ.get('TIBANNA_PROFILE_SECRET_KEY', None): From bf31f088cd6aa3a0afc659e1b7bb113f6e052742 Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 16:02:31 +0000 Subject: [PATCH 10/14] indentation fix --- tibanna/core.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tibanna/core.py b/tibanna/core.py index d34ec7452..d988a1ee5 100644 --- a/tibanna/core.py +++ b/tibanna/core.py @@ -142,13 +142,13 @@ def run_workflow(self, input_json, sfn=None, if 'args' in data: # unicorn-only args = data['args'] cfg = data['config'] - if ('cwl_directory_local' in args and args['cwl_directory_local']) or \ - ('wdl_directory_local' in args and args['wdl_directory_local']): - url = upload_workflow_to_s3(args, cfg, jobid) - if 'language' in args and args['language'] == 'wdl': - args['wdl_directory_url'] = url - else: - args['cwl_directory_url'] = url + if ('cwl_directory_local' in args and args['cwl_directory_local']) or \ + ('wdl_directory_local' in args and args['wdl_directory_local']): + url = upload_workflow_to_s3(args, cfg, jobid) + if 'language' in args and args['language'] == 'wdl': + args['wdl_directory_url'] = url + else: + args['cwl_directory_url'] = url # submit job as an execution aws_input = json.dumps(data) print("about to start run %s" % run_name) From e0c9d40e42dea35b3db71ce627867706f351da5b Mon Sep 17 00:00:00 2001 From: SooLee Date: Thu, 30 May 2019 16:25:22 +0000 Subject: [PATCH 11/14] cleanup after test --- tests/tibanna/unicorn/test_ec2_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/tibanna/unicorn/test_ec2_utils.py b/tests/tibanna/unicorn/test_ec2_utils.py index 29830d678..c72e15012 100644 --- a/tests/tibanna/unicorn/test_ec2_utils.py +++ b/tests/tibanna/unicorn/test_ec2_utils.py @@ -65,3 +65,8 @@ def test_upload_workflow_to_s3(run_task_awsem_event_cwl_upload): assert res2 assert res3 assert url == 's3://tibanna-output/' + jobid + '.workflow/' + # clean up afterwards + s3.delete_objects(Bucket=cfg['log_bucket'], + Delete={'Objects': [{'Key': jobid + '.workflow/main.cwl'}, + {'Key': jobid + '.workflow/child1.cwl'}, + {'Key': jobid + '.workflow/child2.cwl'}]}) From e89c06fb7c3dfb4f668da1cb541b13770a38753d Mon Sep 17 00:00:00 2001 From: Carl Vitzthum Date: Thu, 30 May 2019 13:42:24 -0400 Subject: [PATCH 12/14] Explicitly include all lambdas subdir files in MANIFEST; undo setup.py pkg changes --- MANIFEST.in | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 4244bc125..3476d2d22 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,5 +3,5 @@ include LICENSE.txt include requirements.txt include requirements-4dn.txt include requirements-test.txt -include tibanna/lambdas/requirements.txt -include tibanna_4dn/lambdas/requirements.txt +include tibanna/lambdas/* +include tibanna_4dn/lambdas/* diff --git a/setup.py b/setup.py index f5cb251e2..18144291f 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ description='Tibanna runs portable pipelines (in CWL/WDL) on the AWS Cloud.', long_description=long_description, long_description_content_type='text/markdown', - packages=['tibanna', 'tibanna_4dn', 'tibanna.lambdas', 'tibanna_4dn.lambdas'], + packages=['tibanna', 'tibanna_4dn'], zip_safe=False, author='4DN Team at Harvard Medical School', author_email='duplexa@gmail.com, jeremy_johnson@hms.harvard.edu, carl_vitzthum@hms.harvard.edu', From c70310a935fbc6d91e16683c8e8df949387a9e0f Mon Sep 17 00:00:00 2001 From: SooLEe Date: Thu, 30 May 2019 17:44:01 +0000 Subject: [PATCH 13/14] doc update --- docs/news.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/news.rst b/docs/news.rst index 23acd7ada..b65dbbe4f 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -14,6 +14,12 @@ Version updates +++++++++++++++ + **May 30, 2019** The latest version is now 0.8.1_. + + - ``deploy_core`` (and ``deploy_unicorn``) not working in a non-venv environment fixed + - local CWL/WDL files and CWL/WDL files on S3 are supported. + + **May 29, 2019** The latest version is now 0.8.0_. - Tibanna can now be installed via ``pip install tibanna``! (no need to ``git clone``) @@ -192,6 +198,7 @@ Version updates - Killer CLIs ``invoke kill`` is available to kill specific jobs and ``invoke kill_all`` is available to kill all jobs. They terminate both the step function execution and the EC2 instances. +.. _0.8.1: https://github.com/4dn-dcic/tibanna/releases/tag/v0.8.1 .. _0.8.0: https://github.com/4dn-dcic/tibanna/releases/tag/v0.8.0 .. _0.7.0: https://github.com/4dn-dcic/tibanna/releases/tag/v0.7.0 .. _0.6.1: https://github.com/4dn-dcic/tibanna/releases/tag/v0.6.1 From 15fc665ff7c8ce6b2ad0e5f5d4c370b75b638fe9 Mon Sep 17 00:00:00 2001 From: Carl Vitzthum Date: Thu, 30 May 2019 14:07:29 -0400 Subject: [PATCH 14/14] Change setup.py test alias to setup.py check (from pytest) --- setup.cfg | 2 +- setup.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index d8d1b489c..dfb765297 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,7 @@ include = */tibanna/* */tibanna_4dn/* [aliases] -test=pytest +test=check [tool:pytest] addopts = diff --git a/setup.py b/setup.py index 18144291f..94feb293e 100644 --- a/setup.py +++ b/setup.py @@ -26,8 +26,6 @@ # 'requests==2.20.0' # ] -setup_requires = install_requires + ['pytest-runner'] - tests_requires = [ 'flake8', 'pytest', @@ -57,7 +55,7 @@ ], install_requires=install_requires, include_package_data=True, - setup_requires=setup_requires, + setup_requires=install_requires, tests_require=tests_requires, entry_points={ 'console_scripts': [