diff --git a/core/check_task_awsf_dev/__init__.py b/core/check_task_awsf_dev/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/core/check_task_awsf_dev/config.yaml b/core/check_task_awsf_dev/config.yaml deleted file mode 100644 index 43140c32e..000000000 --- a/core/check_task_awsf_dev/config.yaml +++ /dev/null @@ -1,15 +0,0 @@ -region: us-east-1 - -function_name: check_task_awsf_dev -handler: service.handler -role: lambda_full_s3 -description: check status of AWSEM run by interegating appropriate files on S3 - -# if access key and secret are left blank, boto will use the credentials -# defined in the [default] section of ~/.aws/credentials. -aws_access_key_id: -aws_secret_access_key: - -# dist_directory: dist -timeout: 300 -memory_size: 256 diff --git a/core/check_task_awsf_dev/event.json b/core/check_task_awsf_dev/event.json deleted file mode 100644 index b86825ebf..000000000 --- a/core/check_task_awsf_dev/event.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "args": { - "app_version": "8", - "app_name": "fastqc-0-11-4-1", - "input_parameters": {}, - "input_files": { - "input_fastq": { - "object_key": "test.h2500.fastq", - "bucket_name": "tibanna-output" - } - }, - "output_target": {}, - "secondary_output_target": {}, - "secondary_files": {}, - "cwl_directory_url": "https://raw.githubusercontent.com/4dn-dcic/pipelines-cwl/master/cwl_awsem/", - "output_S3_bucket": "tibanna-output", - "cwl_child_filenames": ["fastqc-0-11-4.6.cwl"], - "cwl_main_filename": "fastqc-0-11-4-1.8.cwl" - }, - "config": { - "ebs_size": 20, - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "json_dir": "/tmp/json", - "EBS_optimized": false, - "ebs_iops": 500, - "log_bucket": "tibanna-output", - "shutdown_min": "30", - "instance_type": "t2.nano", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id": "ami-cfb14bb5", - "copy_to_s3": true, - "job_tag": "fastqc-0-11-4-1", - "userdata_dir": "/tmp/userdata", - "launch_instance": true, - "password": "hahaha" - }, - "jobid": "YoW5iAVOjoMV" -} diff --git a/core/check_task_awsf_dev/service.py b/core/check_task_awsf_dev/service.py deleted file mode 100644 index f70eb403c..000000000 --- a/core/check_task_awsf_dev/service.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- - -from core import utils -from core.utils import StillRunningException, EC2StartingException, AWSEMJobErrorException -import json - - -def metadata_only(event): - event.update({'postrunjson': 'metadata_only'}) - return event - - -@utils.powerup('check_task_awsem', metadata_only) -def handler(event, context): - ''' - somewhere in the event data should be a jobid - ''' - # s3 bucket that stores the output - bucket_name = event['config']['log_bucket'] - s3 = utils.s3Utils(bucket_name, bucket_name, bucket_name) - - # info about the jobby job - jobid = event['jobid'] - job_started = "%s.job_started" % jobid - job_success = "%s.success" % jobid - job_error = "%s.error" % jobid - job_log = "%s.log" % jobid - postrunjson = "%s.postrun.json" % jobid - job_log_location = "https://s3.amazonaws.com/%s/%s" % (bucket_name, job_log) - postrunjson_location = "https://s3.amazonaws.com/%s/%s" % (bucket_name, postrunjson) - - # check to see ensure this job has started else fail - if not s3.does_key_exist(job_started): - raise EC2StartingException("Failed to find jobid %s, ec2 is probably still booting" % jobid) - - # check to see if job has error, report if so - if s3.does_key_exist(job_error): - raise AWSEMJobErrorException("Job encountered an error check log at %s" % job_log_location) - - # check to see if job has completed if not throw retry error - if s3.does_key_exist(job_success): - if not s3.does_key_exist(postrunjson): - raise Exception("Postrun json not found at %s" % postrunjson_location) - event['postrunjson'] = json.loads(s3.read_s3(postrunjson)) - print("completed successfully") - return event - else: - raise StillRunningException("job %s still running" % jobid) diff --git a/core/run_task_awsf_dev/__init__.py b/core/run_task_awsf_dev/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/core/run_task_awsf_dev/config.yaml b/core/run_task_awsf_dev/config.yaml deleted file mode 100644 index 6d2861cba..000000000 --- a/core/run_task_awsf_dev/config.yaml +++ /dev/null @@ -1,15 +0,0 @@ -region: us-east-1 - -function_name: run_task_awsf_dev -handler: service.handler -role: lambda_full_s3 -description: import files to SBG and create workflow object to store state of workflow run - -# if access key and secret are left blank, boto will use the credentials -# defined in the [default] section of ~/.aws/credentials. -aws_access_key_id: -aws_secret_access_key: - -# dist_directory: dist -timeout: 300 -memory_size: 256 diff --git a/core/run_task_awsf_dev/event.json b/core/run_task_awsf_dev/event.json deleted file mode 100644 index 1a50778ee..000000000 --- a/core/run_task_awsf_dev/event.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "_tibanna": { - "env": "fourfront-webdev", - "run_type": "md5" - }, - "args": { - "app_name": "md5", - "input_parameters": {}, - "cwl_child_filenames": [], - "output_target": { - "report": "ac18f2bb-c256-40bf-9562-cdc6179d6f9a/report" - }, - "secondary_output_target": {}, - "cwl_main_filename": "md5.cwl", - "secondary_files": {}, - "output_S3_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "app_version": "0.0.4", - "cwl_directory_url": "https://raw.githubusercontent.com/4dn-dcic/pipelines-cwl/0.0.4/cwl_awsem/", - "input_files": { - "input_file": { - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "object_key": "f4864029-a8ad-4bb8-93e7-5108f462ccaa/4DNFIRSRJH45.fastq.gz" - } - } - }, - "config": { - "ebs_size": 0, - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "EBS_optimized": "", - "ebs_iops": 500, - "shutdown_min": 120, - "instance_type": "", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id": "ami-cfb14bb5", - "copy_to_s3": true, - "launch_instance": true, - "password": "dragonfly", - "log_bucket": "tibanna-output", - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/dev/awsf/", - "key_name": "" - } -} diff --git a/core/run_task_awsf_dev/service.py b/core/run_task_awsf_dev/service.py deleted file mode 100644 index 5a90481ce..000000000 --- a/core/run_task_awsf_dev/service.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- coding: utf-8 -*- - -from core import ec2_utils as utils -from core.utils import powerup - - -def metadata_only(event): - event.update({'jobid': 'metadata_only'}) - return event - - -@powerup('run_task_awsf', metadata_only) -def handler(event, context): - ''' - config: - instance_type: EC2 instance type - ebs_size: EBS storage size in GB - ebs_type: EBS storage type (available values: gp2, io1, st1, sc1, standard (default: io1) - ebs_iops: EBS storage IOPS - s3_access_arn: IAM instance profile for S3 access - ami_id: ID of AMI used for the instance - it should have docker daemon and - cwl-runner (either toil or cwltools) installed - password: password for ssh connection for user ec2-user - EBS_optimized: Use this flag if the instance type is EBS-optimized (default: EBS-optimized) - shutdown_min: Number of minutes before shutdown after the jobs are finished. (default now) - copy_to_s3: Upload or copy the json file to S3 bucket json_bucket - launch_instance: Launch instance based on the json file - log_bucket: bucket for collecting logs (started, postrun, success, error, log) - - args: - cwl_main_filename: main cwl file name - cwl_child_filenames: names of the other cwl files used by main cwl file, delimiated by comma - app_name: name of the app - app_version: version of the app - cwl_directory_url: the url and subdirectories for the main cwl file - input_reference_files_directory: bucket name and subdirectory for input reference files - output_S3_bucket: bucket name and subdirectory for output files and logs - input_files: input files in json format (parametername: {'bucket_name':bucketname, 'object_key':filename}) - secondary_files: secondary files in json format (parametername: {'bucket_name':bucketnname, 'object_ke':filename}) - input_parameters: input parameters in json format (parametername:value) - ''' - - # read default variables in config - CONFIG_FIELD = "config" - CONFIG_KEYS = ["s3_access_arn", "EBS_optimized", "shutdown_min", "copy_to_s3", - "ami_id", "instance_type", "ebs_size", "launch_instance", - "script_url", "key_name", - "ebs_type", "ebs_iops", "json_bucket", "password", "log_bucket"] - ARGS_FIELD = "args" - ARGS_KEYS = ["cwl_main_filename", "cwl_child_filenames", "app_name", "app_version", - "input_files", "output_S3_bucket", "cwl_directory_url", - "input_parameters", "secondary_files", "output_target", "secondary_output_target"] - - cfg = event.get(CONFIG_FIELD) - for k in CONFIG_KEYS: - assert k in cfg, "%s not in config_field" % k - - args = event.get(ARGS_FIELD) - for k in ARGS_KEYS: - assert k in args, "%s not in args field" % k - - # args: parameters needed by the instance to run a workflow - # cfg: parameters needed to launch an instance - cfg['job_tag'] = args.get('app_name') - cfg['userdata_dir'] = '/tmp/userdata' - - # local directory in which the json file will be first created. - cfg['json_dir'] = '/tmp/json' - - utils.update_config(cfg, args['app_name'], - args['input_files'], args['input_parameters']) - - # create json and copy to s3 - jobid = utils.create_json(event, '') - - # launch instance and execute workflow - if cfg.get('launch_instance'): - launch_instance_log = utils.launch_instance(cfg, jobid) - - event.update({'jobid': jobid}) - event.update(launch_instance_log) - return(event) diff --git a/core/start_run_awsf_dev/__init__.py b/core/start_run_awsf_dev/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/core/start_run_awsf_dev/config.yaml b/core/start_run_awsf_dev/config.yaml deleted file mode 100644 index db398dd07..000000000 --- a/core/start_run_awsf_dev/config.yaml +++ /dev/null @@ -1,15 +0,0 @@ -region: us-east-1 - -function_name: start_run_awsf_dev -handler: service.handler -role: lambda_full_s3 -description: import files to SBG and create workflow object to store state of workflow run - -# if access key and secret are left blank, boto will use the credentials -# defined in the [default] section of ~/.aws/credentials. -aws_access_key_id: -aws_secret_access_key: - -# dist_directory: dist -timeout: 300 -memory_size: 128 diff --git a/core/start_run_awsf_dev/event.json b/core/start_run_awsf_dev/event.json deleted file mode 100644 index f2cad8e52..000000000 --- a/core/start_run_awsf_dev/event.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "config" : { - "ebs_type" : "io1", - "ebs_iops" : 500, - "s3_access_arn" : "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id" : "ami-cfb14bb5", - "json_bucket": "4dn-aws-pipeline-run-json", - "password": "", - "shutdown_min" : 30, - "copy_to_s3" : true, - "launch_instance" : true, - "log_bucket": "tibanna-output", - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", - "key_name": "4dn-encode" - }, - "parameters" : {}, - "_tibanna": { - "env": "fourfront-webdev", - "run_type": "md5" - }, - "app_name": "md5", - "workflow_uuid": "d3f25cd3-e726-4b3c-a022-48f844474b41", - "input_files": [ - {"workflow_argument_name": "input_file", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "uuid": "f4864029-a8ad-4bb8-93e7-5108f462ccaa", - "object_key": "4DNFIRSRJH45.fastq.gz" - } - ], - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput" -} - diff --git a/core/start_run_awsf_dev/event2.json b/core/start_run_awsf_dev/event2.json deleted file mode 100644 index e7ecdd3ec..000000000 --- a/core/start_run_awsf_dev/event2.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "parameters" : {"ncores": 1, "min_res": 500000, "binsize": 500000, "maxmem": "8g" }, - "config" : { - "ebs_type" : "io1", - "ebs_iops" : 500, - "s3_access_arn" : "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id" : "ami-7ff26968", - "json_bucket": "4dn-aws-pipeline-run-json", - "password": "hahaha", - "shutdown_min" : "120", - "copy_to_s3" : true, - "launch_instance" : true, - "log_bucket": "tibanna-output", - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/" - }, - "_tibanna": { - "run_type": "partb", - "env": "fourfront-webdev" - }, - "workflow_uuid": "d9e9c966-56d9-47e8-ae21-47f94a1af417", - "input_files": [ - { - "object_key": "4DNFI823LSII.chrom.sizes", - "workflow_argument_name": "chrsizes", - "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files" - }, - { - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "workflow_argument_name": "input_pairs", - "uuid": ["e0b32fa9-a54e-4f62-86dc-039f60b34812"], - "object_key": ["4DNFIIQN4FKO.pairs.gz"] - } - ], - "app_name": "hi-c-processing-partb/44", - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput" -} - diff --git a/core/start_run_awsf_dev/event3.json b/core/start_run_awsf_dev/event3.json deleted file mode 100644 index 950f8c9e5..000000000 --- a/core/start_run_awsf_dev/event3.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "_tibanna": { - "run_type": "bwa-mem", - "env": "fourfront-webdev" - }, - "app_name": "bwa-mem", - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "workflow_uuid": "0fbe4db8-0b5f-448e-8b58-3f8c84baabf5", - "parameters" : {"nThreads": 4}, - "input_files" : [ - { - "object_key": "4DNFIZQZ39L9.bwaIndex.tgz", - "workflow_argument_name": "bwa_index", - "uuid": "1f53df95-4cf3-41cc-971d-81bb16c486dd", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files" - }, - { - "workflow_argument_name": "fastq1", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "uuid": "1150b428-272b-4a0c-b3e6-4b405c148f7c", - "object_key": "4DNFIVOZN511.fastq.gz" - }, - { - "workflow_argument_name": "fastq2", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "uuid": "f4864029-a8ad-4bb8-93e7-5108f462ccaa", - "object_key": "4DNFIRSRJH45.fastq.gz" - } - ], - "config": { - "ebs_size": 30, - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "ebs_iops": 500, - "shutdown_min": 30, - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id": "ami-cfb14bb5", - "copy_to_s3": true, - "launch_instance": true, - "password": "dragonfly", - "log_bucket": "tibanna-output", - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", - "key_name": "" - } -} - diff --git a/core/start_run_awsf_dev/event4.json b/core/start_run_awsf_dev/event4.json deleted file mode 100644 index e89bf16a1..000000000 --- a/core/start_run_awsf_dev/event4.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "_tibanna": { - "run_type": "hi-c-processing-bam", - "env": "fourfront-webdev" - }, - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "workflow_uuid": "023bfb3e-9a8b-42b9-a9d4-216079526f68", - "input_files" : [ - { - "workflow_argument_name": "input_bams", - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "uuid": ["68f38e45-8c66-41e2-99ab-b0b2fcd20d45","7420a20a-aa77-4ea8-b0b0-32a8e80c9bcb"], - "object_key": ["4DNFI9H51IRL.bam","4DNFIP16HHGH.bam"] - }, - { - "workflow_argument_name": "chromsize", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9", - "object_key": "4DNFI823LSII.chrom.sizes" - } - ], - "app_name": "hi-c-processing-bam", - "parameters": { - "nthreads_parse_sort": 32, - "nthreads_merge": 32 - }, - "config": { - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "ebs_iops": 500, - "shutdown_min": "120", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id": "ami-cfb14bb5", - "copy_to_s3": true, - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", - "launch_instance": true, - "password": "hahaha", - "log_bucket": "tibanna-output", - "key_name": "4dn-encode" - } -} diff --git a/core/start_run_awsf_dev/event_metadata_only.json b/core/start_run_awsf_dev/event_metadata_only.json deleted file mode 100644 index 1a66fc9c5..000000000 --- a/core/start_run_awsf_dev/event_metadata_only.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "app_name": "hi-c-processing-bam", - "parameters": { - "nthreads_merge": 16, - "nthreads_parse_sort": 16 - }, - "_tibanna": { - "run_type": "hi-c-processing-bam", - "env": "fourfront-webdev" - }, - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "tag": "0.2.5", - "config": { - "ami_id": "ami-cfb14bb5", - "json_bucket": "4dn-aws-pipeline-run-json", - "ebs_iops": 500, - "shutdown_min": "30", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "ebs_type": "io1", - "copy_to_s3": true, - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", - "key_name": "4dn-encode", - "launch_instance": true, - "password": "hahaha", - "log_bucket": "tibanna-output" - }, - "workflow_uuid": "023bfb3e-9a8b-42b9-a9d4-216079526f68", - "input_files": [ - { - "object_key": [ - "4DNFI9H51IRL.bam", - "4DNFIP16HHGH.bam" - ], - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "workflow_argument_name": "input_bams", - "uuid": [ - "68f38e45-8c66-41e2-99ab-b0b2fcd20d45", - "7420a20a-aa77-4ea8-b0b0-32a8e80c9bcb" - ] - }, - { - "object_key":"4DNFI823LSII.chrom.sizes", - "bucket_name": "elasticbeanstalk-fourfront-webprod-files", - "workflow_argument_name": "chromsize", - "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9" - } - ], - "metadata_only": true, - "output_files": [ - {"workflow_argument_name": "annotated_bam", - "uuid": "ecabab05-3738-47fe-8b55-b08334463c43" - }, - {"workflow_argument_name": "filtered_pairs", - "uuid": "7054061b-e87d-4ca4-9693-d186348f5206" - } - ] -} diff --git a/core/start_run_awsf_dev/service.py b/core/start_run_awsf_dev/service.py deleted file mode 100644 index ed28fefb8..000000000 --- a/core/start_run_awsf_dev/service.py +++ /dev/null @@ -1,249 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -# import json -import boto3 -from core import ff_utils -from core.utils import Tibanna, ensure_list, powerup - -LOG = logging.getLogger(__name__) -s3 = boto3.resource('s3') - - -def metadata_only(event): - # this relies on the fact that event contains and output key with output files - assert event['metadata_only'] - assert event['output_files'] - return real_handler(event, None) - - -@powerup('start_run_awsf', metadata_only) -def handler(event, context): - return real_handler(event, context) - - -def real_handler(event, context): - ''' - this is generic function to run awsem workflow - based on the data passed in - - workflow_uuid : for now, pass this on. Later we can add a code to automatically retrieve this from app_name. - Note multiple workflow_uuids can be available for an app_name - (different versions of the same app could have a different uuid) - ''' - # get incomming data - input_file_list = event.get('input_files') - app_name = event.get('app_name') - print(app_name) - workflow_uuid = event.get('workflow_uuid') - output_bucket = event.get('output_bucket') - parameters = ff_utils.convert_param(event.get('parameters'), True) - tibanna_settings = event.get('_tibanna', {}) - tag = event.get('tag') - # if they don't pass in env guess it from output_bucket - env = tibanna_settings.get('env', '-'.join(output_bucket.split('-')[1:-1])) - # tibanna provides access to keys based on env and stuff like that - tibanna = Tibanna(env, s3_keys=event.get('s3_keys'), ff_keys=event.get('ff_keys'), - settings=tibanna_settings) - - args = dict() - - # get argument format & type info from workflow - workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys) - LOG.info("workflow info %s" % workflow_info) - if 'error' in workflow_info.get('@type', []): - raise Exception("FATAL, can't lookup workflow info for % fourfront" % workflow_uuid) - - # get cwl info from workflow_info - for k in ['app_name', 'app_version', 'cwl_directory_url', 'cwl_main_filename', 'cwl_child_filenames']: - LOG.info(workflow_info.get(k)) - args[k] = workflow_info.get(k) - - # create the ff_meta output info - input_files = [] - for input_file in input_file_list: - for idx, uuid in enumerate(ensure_list(input_file['uuid'])): - input_files.append({'workflow_argument_name': input_file['workflow_argument_name'], - 'value': uuid, 'ordinal': idx + 1}) - LOG.info("input_files is %s" % input_files) - - # input file args for awsem - args['input_files'] = dict() - args['secondary_files'] = dict() - fe_map = get_format_extension_map(tibanna) - pf_source_experiments_dict = dict() - for input_file in input_file_list: - if isinstance(input_file['uuid'], unicode): - input_file['uuid'] = input_file['uuid'].encode('utf-8') - if isinstance(input_file['object_key'], unicode): - input_file['object_key'] = input_file['object_key'].encode('utf-8') - if isinstance(input_file['uuid'], str) and isinstance(input_file['object_key'], str): - object_key = input_file['uuid'] + '/' + input_file['object_key'] - elif (isinstance(input_file['uuid'], list) and - isinstance(input_file['object_key'], list) and - len(input_file['uuid']) == len(input_file['object_key'])): - - object_key = [a + '/' + b for a, b in zip(input_file['uuid'], input_file['object_key'])] - else: - raise Exception("input_file uuid and object_key should match in their type and length (if lists) : " + - "type{}{} length{}{}".format(type(input_file['uuid']), type(input_file['object_key']), - len(input_file['uuid']), len(input_file['object_key']))) - args['input_files'].update({input_file['workflow_argument_name']: { - 'bucket_name': input_file['bucket_name'], - 'object_key': object_key}}) - - if isinstance(input_file['uuid'], list): - inf_uuids = input_file['uuid'] - else: - inf_uuids = [input_file['uuid']] - for inf_uuid in inf_uuids: - infile_meta = ff_utils.get_metadata(inf_uuid, key=tibanna.ff_keys) - if infile_meta.get('experiments'): - for exp in infile_meta.get('experiments'): - exp_uuid = ff_utils.get_metadata(exp, key=tibanna.ff_keys).get('uuid') - pf_source_experiments_dict.update({exp_uuid: 1}) - if infile_meta.get('source_experiments'): - pf_source_experiments_dict.update({_: 1 for _ in infile_meta.get('source_experiments')}) - if infile_meta.get('extra_files'): - extra_file_format = infile_meta.get('extra_files')[0].get('file_format') # only the first extra file - extra_file_extension = fe_map.get(extra_file_format) - infile_format = infile_meta.get('file_format') - infile_extension = fe_map.get(infile_format) - extra_file_key = object_key.replace(infile_extension, extra_file_extension) - if input_file['workflow_argument_name'] in args['secondary_files']: - if isinstance(args['secondary_files']['object_key'], list): - args['secondary_files']['object_key'].add(extra_file_key) - else: - existing_extra_file_key = args['secondary_files']['object_key'] - args['secondary_files']['object_key'] = [existing_extra_file_key, extra_file_key] - else: - args['secondary_files'].update({input_file['workflow_argument_name']: { - 'bucket_name': input_file['bucket_name'], - 'object_key': extra_file_key}}) - - # processed file metadata - output_files, pf_meta = handle_processed_files(workflow_info, tibanna, pf_source_experiments_dict.keys()) - - ff_meta = ff_utils.create_ffmeta_awsem(workflow_uuid, app_name, input_files, tag=tag, - run_url=tibanna.settings.get('url', ''), - output_files=output_files, parameters=parameters) - - LOG.info("ff_meta is %s" % ff_meta.__dict__) - - # store metadata so we know the run has started - ff_meta.post(key=tibanna.ff_keys) - - # parameters - args['input_parameters'] = event.get('parameters') - - # output target - args['output_target'] = dict() - args['secondary_output_target'] = dict() - for of in ff_meta.output_files: - arg_name = of.get('workflow_argument_name') - if of.get('type') == 'Output processed file': - args['output_target'][arg_name] = of.get('upload_key') - else: - args['output_target'][arg_name] = ff_meta.uuid + '/' + arg_name - if 'secondary_file_formats' in of: - # takes only the first secondary file. - args['secondary_output_target'][arg_name] \ - = of.get('extra_files', [{}, ])[0].get('upload_key') - - # output bucket - args['output_S3_bucket'] = event.get('output_bucket') - - if 'instance_type' not in event['config']: - event['config']['instance_type'] = '' - if 'EBS_optimized' not in event['config']: - event['config']['EBS_optimized'] = '' - if 'ebs_size' not in event['config']: - event['config']['ebs_size'] = 0 - - event.update({"ff_meta": ff_meta.as_dict(), - 'pf_meta': [meta.as_dict() for meta in pf_meta], - "_tibanna": tibanna.as_dict(), - "args": args - }) - return(event) - - -def get_format_extension_map(tibanna): - # get format-extension map - try: - fp_schema = ff_utils.get_metadata("profiles/file_processed.json", key=tibanna.ff_keys) - fe_map = fp_schema.get('file_format_file_extension') - except Exception as e: - LOG.error("Can't get format-extension map from file_processed schema. %s\n" % e) - - return fe_map - - -def proc_file_for_arg_name(output_files, arg_name, tibanna): - if not output_files: - return - of = [output for output in output_files if output.get('workflow_argument_name') == arg_name] - if of: - if len(of) > 1: - raise Exception("multiple output files supplied with same workflow_argument_name") - of = of[0] - return ff_utils.ProcessedFileMetadata.get(of.get('uuid'), tibanna.ff_keys) - - -def handle_processed_files(workflow_info, tibanna, pf_source_experiments=None, - user_supplied_output_files=None): - - fe_map = get_format_extension_map(tibanna) - output_files = [] - pf_meta = [] - try: - for arg in workflow_info.get('arguments', []): - if (arg.get('argument_type') in ['Output processed file', - 'Output report file', - 'Output QC file']): - - of = dict() - of['workflow_argument_name'] = arg.get('workflow_argument_name') - of['type'] = arg.get('argument_type') - if 'argument_format' in arg: - # These are not processed files but report or QC files. - if 'secondary_file_formats' in arg: - of['secondary_file_formats'] = arg.get('secondary_file_formats') - of['secondary_file_extensions'] = [fe_map.get(v) for v in arg.get('secondary_file_formats')] - extra_files = [{"file_format": v} for v in of['secondary_file_formats']] - else: - extra_files = None - - # see if user supplied the output file already - # this is often the case for pseudo workflow runs (run externally) - # TODO move this down next to post of pf - resp = proc_file_for_arg_name(user_supplied_output_files, - arg.get('workflow_argument_name'), - tibanna) - - # if it wasn't supplied as output we have to create a new one - if not resp: - pf = ff_utils.ProcessedFileMetadata(file_format=arg.get('argument_format'), - extra_files=extra_files, - source_experiments=pf_source_experiments) - try: - # actually post processed file metadata here - resp = pf.post(key=tibanna.ff_keys) - resp = resp.get('@graph')[0] - - except Exception as e: - LOG.error("Failed to post Processed file metadata. %s\n" % e) - LOG.error("resp" + str(resp) + "\n") - raise e - of['upload_key'] = resp.get('upload_key') - of['value'] = resp.get('uuid') - of['extra_files'] = resp.get('extra_files') - of['format'] = arg.get('argument_format') - of['extension'] = fe_map.get(arg.get('argument_format')) - pf_meta.append(pf) - output_files.append(of) - - except Exception as e: - LOG.error("output_files = " + str(output_files) + "\n") - LOG.error("Can't prepare output_files information. %s\n" % e) - raise e - return output_files, pf_meta diff --git a/core/update_ffmeta_awsf_dev/__init__.py b/core/update_ffmeta_awsf_dev/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/core/update_ffmeta_awsf_dev/config.yaml b/core/update_ffmeta_awsf_dev/config.yaml deleted file mode 100644 index d6dcd452d..000000000 --- a/core/update_ffmeta_awsf_dev/config.yaml +++ /dev/null @@ -1,15 +0,0 @@ -region: us-east-1 - -function_name: update_ffmeta_awsf_dev -handler: service.handler -role: lambda_full_s3 -description: update ffmeta data as per result of awsf workflow run - -# if access key and secret are left blank, boto will use the credentials -# defined in the [default] section of ~/.aws/credentials. -aws_access_key_id: -aws_secret_access_key: - -# dist_directory: dist -timeout: 300 -memory_size: 128 diff --git a/core/update_ffmeta_awsf_dev/event.json b/core/update_ffmeta_awsf_dev/event.json deleted file mode 100644 index 50f657bcb..000000000 --- a/core/update_ffmeta_awsf_dev/event.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "ff_meta": { - "awsem_app_name": "md5", - "run_platform": "AWSEM", - "uuid": "2d18f2cc-62fb-442d-a4ce-4fa06166630e", - "parameters": [], - "title": "md5 run 2017-08-24 22:20:54.399728", - "sbg_import_ids": [], - "workflow": "d3f25cd3-e726-4b3c-a022-48f844474b41", - "award": "1U01CA200059-01", - "sbg_task_id": "", - "run_url": "https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/arn:aws:states:us-east-1:643366669028:execution:run_awsem_workflow-2:md5_c01492ab-ffff-4b88-b6fa-5dbb74b8ae7e", - "lab": "4dn-dcic-lab", - "sbg_mounted_volume_ids": [], - "run_status": "started", - "input_files": [ - { - "ordinal": 1, - "workflow_argument_name": "input_file", - "value": "f4864029-a8ad-4bb8-93e7-5108f462ccaa" - } - ], - "sbg_export_ids": [], - "output_files": [ - { - "type": "Output report file", - "workflow_argument_name": "report" - } - ] - }, - "_tibanna": { - "env": "fourfront-webdev", - "settings": { - "url": "https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/arn:aws:states:us-east-1:643366669028:execution:run_awsem_workflow-2:md5_c01492ab-ffff-4b88-b6fa-5dbb74b8ae7e", - "run_type": "md5", - "run_name": "md5_c01492ab-ffff-4b88-b6fa-5dbb74b8ae7e", - "env": "fourfront-webdev", - "run_id": "c01492ab-ffff-4b88-b6fa-5dbb74b8ae7e" - } - }, - "args": { - "input_reference_files": {}, - "app_name": "md5", - "input_parameters": {}, - "input_reference_files_directory": "maestro-resources", - "input_files_directory": "elasticbeanstalk-fourfront-webdev-files", - "output_target": { - "report": "lalala/md5_report" - }, - "secondary_output_target": {}, - "cwl": "md5.5.cwl", - "secondary_files": {}, - "cwl_directory": "https://raw.githubusercontent.com/4dn-dcic/pipelines-cwl/master/cwl_awsem/", - "output_S3_bucket": "tibanna-output", - "app_version": "5", - "cwl_children": "", - "input_files": { - "input_file": { - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "object_key": "f4864029-a8ad-4bb8-93e7-5108f462ccaa/4DNFIRSRJH45.fastq.gz" - } - } - }, - "jobid": "8fRIlIfwRNDT", - "pf_meta": [], - "config": { - "ebs_size": 20, - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "json_dir": "/tmp/json", - "EBS_optimized": false, - "ebs_iops": 500, - "outbucket": null, - "shutdown_min": 30, - "instance_type": "t2.nano", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id": "ami-cfb14bb5", - "copy_to_s3": true, - "job_tag": "md5", - "userdata_dir": "/tmp/userdata", - "launch_instance": true, - "password": "hahaha" - } -} diff --git a/core/update_ffmeta_awsf_dev/event_fastqc.json b/core/update_ffmeta_awsf_dev/event_fastqc.json deleted file mode 100644 index 14e456bb2..000000000 --- a/core/update_ffmeta_awsf_dev/event_fastqc.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "ff_meta": { - "run_platform": "AWSEM", - "uuid": "18564368-1299-4ce0-82d1-c8917450d834", - "parameters": [], - "workflow": "2324ad76-ff37-4157-8bcc-3ce72b7dace9", - "title": "fastqc-0-11-4-1 hahaha run 2018-01-16 23:08:21.335210", - "award": "1U01CA200059-01", - "awsem_job_id": "", - "run_url": "https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/arn:aws:states:us-east-1:643366669028:execution:run_awsem_new_pony-dev:fastqc_613a0edc-f130-49a0-b97c-b812a6a42d84", - "awsem_app_name": "fastqc-0-11-4-1", - "lab": "4dn-dcic-lab", - "run_status": "started", - "output_files": [ - { - "type": "Output QC file", - "workflow_argument_name": "report_zip" - } - ], - "input_files": [ - { - "ordinal": 1, - "workflow_argument_name": "input_fastq", - "value": "f4864029-a8ad-4bb8-93e7-5108f462ccaa" - } - ] - }, - "instance_ip": "54.164.70.164", - "app_name": "fastqc-0-11-4-1", - "_tibanna": { - "env": "fourfront-webdev", - "settings": { - "url": "https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/arn:aws:states:us-east-1:643366669028:execution:run_awsem_new_pony-dev:fastqc_613a0edc-f130-49a0-b97c-b812a6a42d84", - "run_type": "fastqc", - "run_id": "613a0edc-f130-49a0-b97c-b812a6a42d84", - "env": "fourfront-webdev", - "run_name": "fastqc_613a0edc-f130-49a0-b97c-b812a6a42d84" - } - }, - "start_time": "20180116-23:08:29-UTC", - "args": { - "secondary_output_target": {}, - "app_name": "fastqc-0-11-4-1", - "input_parameters": {}, - "cwl_child_filenames": [ - "fastqc-0-11-4.cwl" - ], - "output_target": { - "report_zip": "18564368-1299-4ce0-82d1-c8917450d834/report_zip" - }, - "cwl_main_filename": "fastqc-0-11-4-1.cwl", - "secondary_files": {}, - "output_S3_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "app_version": "0.2.0", - "cwl_directory_url": "https://raw.githubusercontent.com/4dn-dcic/pipelines-cwl/0.2.0/cwl_awsem/", - "input_files": { - "input_fastq": { - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "object_key": "f4864029-a8ad-4bb8-93e7-5108f462ccaa/4DNFIRSRJH45.fastq.gz" - } - } - }, - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "jobid": "eUJZ9nH7Rsz2", - "instance_id": "i-04df1c1ca7a06ef66", - "tag": "hahaha", - "pf_meta": [], - "parameters": {}, - "config": { - "ebs_size": 10, - "password": "", - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "json_dir": "/tmp/json", - "EBS_optimized": false, - "ebs_iops": 500, - "userdata_dir": "/tmp/userdata", - "shutdown_min": "300", - "instance_type": "t2.micro", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "job_tag": "fastqc-0-11-4-1", - "copy_to_s3": true, - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", - "key_name": "4dn-encode", - "launch_instance": true, - "ami_id": "ami-cfb14bb5", - "log_bucket": "tibanna-output" - }, - "workflow_uuid": "2324ad76-ff37-4157-8bcc-3ce72b7dace9", - "input_files": [ - { - "workflow_argument_name": "input_fastq", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "uuid": "f4864029-a8ad-4bb8-93e7-5108f462ccaa", - "object_key": "4DNFIRSRJH45.fastq.gz" - } - ] -} diff --git a/core/update_ffmeta_awsf_dev/event_metadataonly.json b/core/update_ffmeta_awsf_dev/event_metadataonly.json deleted file mode 100644 index bd548fe5a..000000000 --- a/core/update_ffmeta_awsf_dev/event_metadataonly.json +++ /dev/null @@ -1,206 +0,0 @@ -{ - "args": { - "secondary_output_target": { - "filtered_pairs": "6b073c8e-49b0-4023-a730-2b38d6bf4681/4DNFI7GCOAST.pairs.gz.px2" - }, - "app_name": "hi-c-processing-bam", - "input_parameters": { - "nthreads_merge": 16, - "nthreads_parse_sort": 16 - }, - "cwl_child_filenames": [ - "pairsam-parse-sort.cwl", - "pairsam-merge.cwl", - "pairsam-markasdup.cwl", - "pairsam-filter.cwl" - ], - "output_target": { - "annotated_bam": "916597d5-29ff-40d5-9b37-f3081897fe39/4DNFIJ76Z6LN.bam", - "filtered_pairs": "6b073c8e-49b0-4023-a730-2b38d6bf4681/4DNFI7GCOAST.pairs.gz" - }, - "cwl_main_filename": "hi-c-processing-bam.cwl", - "secondary_files": {}, - "output_S3_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "app_version": null, - "cwl_directory_url": "https://raw.githubusercontent.com/4dn-dcic/pipelines-cwl/dev/cwl_awsem/", - "input_files": { - "input_bams": { - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "object_key": [ - "68f38e45-8c66-41e2-99ab-b0b2fcd20d45/4DNFI9H51IRL.bam", - "7420a20a-aa77-4ea8-b0b0-32a8e80c9bcb/4DNFIP16HHGH.bam" - ] - }, - "chromsize": { - "bucket_name": "elasticbeanstalk-fourfront-webprod-files", - "object_key": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9/4DNFI823LSII.chrom.sizes" - } - } - }, - "app_name": "hi-c-processing-bam", - "_tibanna": { - "env": "fourfront-webdev", - "settings": { - "run_type": "hi-c-processing-bam", - "env": "fourfront-webdev" - } - }, - "output_files": [ - { - "workflow_argument_name": "annotated_bam", - "uuid": "ecabab05-3738-47fe-8b55-b08334463c43" - }, - { - "workflow_argument_name": "filtered_pairs", - "uuid": "7054061b-e87d-4ca4-9693-d186348f5206" - } - ], - "ff_meta": { - "run_platform": "AWSEM", - "uuid": "82df642a-b6fd-4f63-a946-7cbc55d0cfbc", - "parameters": [ - { - "workflow_argument_name": "nthreads_merge", - "value": "16" - }, - { - "workflow_argument_name": "nthreads_parse_sort", - "value": "16" - } - ], - "workflow": "023bfb3e-9a8b-42b9-a9d4-216079526f68", - "title": "hi-c-processing-bam 0.2.5 run 2018-02-01 12:45:27.534258", - "award": "1U01CA200059-01", - "awsem_job_id": "", - "awsem_app_name": "hi-c-processing-bam", - "lab": "4dn-dcic-lab", - "run_status": "started", - "output_files": [ - { - "extension": ".bam", - "format": "bam", - "workflow_argument_name": "annotated_bam", - "value": "916597d5-29ff-40d5-9b37-f3081897fe39", - "upload_key": "916597d5-29ff-40d5-9b37-f3081897fe39/4DNFIJ76Z6LN.bam", - "type": "Output processed file", - "extra_files": [] - }, - { - "secondary_file_formats": [ - "pairs_px2" - ], - "extension": ".pairs.gz", - "format": "pairs", - "workflow_argument_name": "filtered_pairs", - "value": "6b073c8e-49b0-4023-a730-2b38d6bf4681", - "upload_key": "6b073c8e-49b0-4023-a730-2b38d6bf4681/4DNFI7GCOAST.pairs.gz", - "secondary_file_extensions": [ - ".pairs.gz.px2" - ], - "type": "Output processed file", - "extra_files": [ - { - "status": "to be uploaded by workflow", - "uuid": "6b073c8e-49b0-4023-a730-2b38d6bf4681", - "file_format": "pairs_px2", - "accession": "4DNFI7GCOAST", - "filename": "4DNFI7GCOAST", - "upload_key": "6b073c8e-49b0-4023-a730-2b38d6bf4681/4DNFI7GCOAST.pairs.gz.px2", - "href": "/6b073c8e-49b0-4023-a730-2b38d6bf4681/@@download/4DNFI7GCOAST.pairs.gz.px2" - } - ] - } - ], - "input_files": [ - { - "ordinal": 1, - "workflow_argument_name": "input_bams", - "value": "68f38e45-8c66-41e2-99ab-b0b2fcd20d45" - }, - { - "ordinal": 2, - "workflow_argument_name": "input_bams", - "value": "7420a20a-aa77-4ea8-b0b0-32a8e80c9bcb" - }, - { - "ordinal": 1, - "workflow_argument_name": "chromsize", - "value": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9" - } - ] - }, - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "tag": "0.2.5", - "metadata_only": true, - "pf_meta": [ - { - "status": "to be uploaded by workflow", - "source_experiments": [ - "b2e77ae2-8d67-4fd6-a812-94f9bdfc9dd8" - ], - "uuid": "916597d5-29ff-40d5-9b37-f3081897fe39", - "file_format": "bam", - "accession": "4DNFIJ76Z6LN", - "award": "1U01CA200059-01", - "lab": "4dn-dcic-lab" - }, - { - "status": "to be uploaded by workflow", - "source_experiments": [ - "b2e77ae2-8d67-4fd6-a812-94f9bdfc9dd8" - ], - "uuid": "6b073c8e-49b0-4023-a730-2b38d6bf4681", - "file_format": "pairs", - "accession": "4DNFI7GCOAST", - "award": "1U01CA200059-01", - "lab": "4dn-dcic-lab", - "extra_files": [ - { - "file_format": "pairs_px2" - } - ] - } - ], - "parameters": { - "nthreads_merge": 16, - "nthreads_parse_sort": 16 - }, - "config": { - "ebs_size": 0, - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "EBS_optimized": "", - "ebs_iops": 500, - "shutdown_min": "30", - "instance_type": "", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "ami_id": "ami-cfb14bb5", - "copy_to_s3": true, - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", - "key_name": "4dn-encode", - "launch_instance": true, - "password": "hahaha", - "log_bucket": "tibanna-output" - }, - "workflow_uuid": "023bfb3e-9a8b-42b9-a9d4-216079526f68", - "input_files": [ - { - "uuid": [ - "68f38e45-8c66-41e2-99ab-b0b2fcd20d45", - "7420a20a-aa77-4ea8-b0b0-32a8e80c9bcb" - ], - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "workflow_argument_name": "input_bams", - "object_key": [ - "4DNFI9H51IRL.bam", - "4DNFIP16HHGH.bam" - ] - }, - { - "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9", - "bucket_name": "elasticbeanstalk-fourfront-webprod-files", - "workflow_argument_name": "chromsize", - "object_key": "4DNFI823LSII.chrom.sizes" - } - ] -} diff --git a/core/update_ffmeta_awsf_dev/event_pairsqc.json b/core/update_ffmeta_awsf_dev/event_pairsqc.json deleted file mode 100644 index d9e110e38..000000000 --- a/core/update_ffmeta_awsf_dev/event_pairsqc.json +++ /dev/null @@ -1,130 +0,0 @@ -{ - "ff_meta": { - "run_platform": "AWSEM", - "uuid": "3d71f45f-6a77-47b4-9839-eb02204c6dae", - "parameters": [ - { - "workflow_argument_name": "enzyme", - "value": "6" - }, - { - "workflow_argument_name": "sample_name", - "value": "4DNFI1ZLO9D7" - } - ], - "workflow": "ae3a87cb-3fa2-469e-97c7-540fc2d0a117", - "title": "pairsqc-single run 2018-01-16 23:08:25.250148", - "award": "1U01CA200059-01", - "awsem_job_id": "", - "run_url": "https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/arn:aws:states:us-east-1:643366669028:execution:run_awsem_new_pony-dev:pairsqc-single_649e1149-68b5-43b3-bc92-65ca077dc983", - "awsem_app_name": "pairsqc-single", - "lab": "4dn-dcic-lab", - "run_status": "started", - "output_files": [ - { - "type": "Output QC file", - "workflow_argument_name": "report" - } - ], - "input_files": [ - { - "ordinal": 1, - "workflow_argument_name": "input_pairs", - "value": "817f3faa-0573-45c0-8230-02ec19de6544" - }, - { - "ordinal": 1, - "workflow_argument_name": "chromsizes", - "value": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9" - } - ] - }, - "instance_ip": "52.87.161.131", - "app_name": "pairsqc-single", - "_tibanna": { - "env": "fourfront-webdev", - "settings": { - "url": "https://console.aws.amazon.com/states/home?region=us-east-1#/executions/details/arn:aws:states:us-east-1:643366669028:execution:run_awsem_new_pony-dev:pairsqc-single_649e1149-68b5-43b3-bc92-65ca077dc983", - "run_type": "pairsqc-single", - "run_id": "649e1149-68b5-43b3-bc92-65ca077dc983", - "env": "fourfront-webdev", - "run_name": "pairsqc-single_649e1149-68b5-43b3-bc92-65ca077dc983" - } - }, - "start_time": "20180116-23:08:32-UTC", - "args": { - "secondary_output_target": {}, - "app_name": "pairsqc-single", - "input_parameters": { - "enzyme": "6", - "sample_name": "4DNFI1ZLO9D7" - }, - "cwl_child_filenames": [], - "output_target": { - "report": "3d71f45f-6a77-47b4-9839-eb02204c6dae/report" - }, - "cwl_main_filename": "pairsqc-single.cwl", - "secondary_files": { - "input_pairs": { - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "object_key": "817f3faa-0573-45c0-8230-02ec19de6544/4DNFI1ZLO9D7.pairs.gz.px2" - } - }, - "output_S3_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "app_version": "dev", - "cwl_directory_url": "https://raw.githubusercontent.com/4dn-dcic/pipelines-cwl/dev/cwl_awsem/", - "input_files": { - "chromsizes": { - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "object_key": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9/4DNFI823LSII.chrom.sizes" - }, - "input_pairs": { - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "object_key": "817f3faa-0573-45c0-8230-02ec19de6544/4DNFI1ZLO9D7.pairs.gz" - } - } - }, - "output_bucket": "elasticbeanstalk-fourfront-webdev-wfoutput", - "jobid": "11Jz5nf2xJGt", - "instance_id": "i-0901c30deadd480e4", - "pf_meta": [], - "parameters": { - "enzyme": "6", - "sample_name": "4DNFI1ZLO9D7" - }, - "config": { - "ebs_size": 13, - "password": "dragonfly", - "ebs_type": "io1", - "json_bucket": "4dn-aws-pipeline-run-json", - "json_dir": "/tmp/json", - "EBS_optimized": false, - "ebs_iops": 500, - "userdata_dir": "/tmp/userdata", - "shutdown_min": 120, - "instance_type": "t2.micro", - "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", - "job_tag": "pairsqc-single", - "copy_to_s3": true, - "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", - "key_name": "4dn-encode", - "launch_instance": true, - "ami_id": "ami-cfb14bb5", - "log_bucket": "tibanna-output" - }, - "workflow_uuid": "ae3a87cb-3fa2-469e-97c7-540fc2d0a117", - "input_files": [ - { - "workflow_argument_name": "input_pairs", - "bucket_name": "elasticbeanstalk-fourfront-webdev-wfoutput", - "uuid": "817f3faa-0573-45c0-8230-02ec19de6544", - "object_key": "4DNFI1ZLO9D7.pairs.gz" - }, - { - "object_key": "4DNFI823LSII.chrom.sizes", - "bucket_name": "elasticbeanstalk-fourfront-webdev-files", - "workflow_argument_name": "chromsizes", - "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9" - } - ] -} diff --git a/core/update_ffmeta_awsf_dev/service.py b/core/update_ffmeta_awsf_dev/service.py deleted file mode 100644 index ae8f271e6..000000000 --- a/core/update_ffmeta_awsf_dev/service.py +++ /dev/null @@ -1,244 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -from core import utils, ff_utils, ec2_utils -import boto3 -from collections import defaultdict -from core.fastqc_utils import parse_qc_table - -LOG = logging.getLogger(__name__) -s3 = boto3.resource('s3') - - -def donothing(status, sbg, ff_meta, ff_key=None): - return None - - -def update_processed_file_metadata(status, pf_meta, tibanna): - - ff_key = tibanna.ff_keys - try: - for pf in pf_meta: - pf['status'] = status - except Exception as e: - raise Exception("Unable to update processed file metadata json : %s" % e) - try: - for pf in pf_meta: - pfo = ff_utils.ProcessedFileMetadata(**pf) - pfo.post(key=ff_key) - except Exception as e: - raise Exception("Unable to post processed file metadata : %s" % e) - return pf_meta - - -def qc_updater(status, wf_file, ff_meta, tibanna): - if ff_meta.awsem_app_name == 'fastqc-0-11-4-1': - return _qc_updater(status, wf_file, ff_meta, tibanna, - quality_metric='quality_metric_fastqc', - inputfile_argument='input_fastq', - report_html='fastqc_report.html', - datafiles=['summary.txt', 'fastqc_data.txt']) - elif ff_meta.awsem_app_name == 'pairsqc-single': - inputfile_argument = 'input_pairs' - input_accession = str(wf_file.runner.inputfile_accessions[inputfile_argument]) - return _qc_updater(status, wf_file, ff_meta, tibanna, - quality_metric="quality_metric_pairsqc", - inputfile_argument=inputfile_argument, report_html='pairsqc_report.html', - datafiles=[input_accession + '.summary.out']) - - -def _qc_updater(status, wf_file, ff_meta, tibanna, quality_metric='quality_metric_fastqc', - inputfile_argument='input_fastq', report_html='fastqc_report.html', - datafiles=['summary.txt', 'fastqc_data.txt']): - if status == 'uploading': - # wait until this bad boy is finished - return - # keys - ff_key = tibanna.ff_keys - # move files to proper s3 location - # need to remove sbg from this line - accession = wf_file.runner.inputfile_accessions[inputfile_argument] - zipped_report = wf_file.key - files_to_parse = datafiles - files_to_parse.append(report_html) - LOG.info("accession is %s" % accession) - - try: - files = wf_file.s3.unzip_s3_to_s3(zipped_report, accession, files_to_parse, - acl='public-read') - except Exception as e: - LOG.info(tibanna.s3.__dict__) - raise Exception("%s (key={})\n".format(zipped_report) % e) - - # schema - qc_schema = ff_utils.get_metadata("profiles/" + quality_metric + ".json", key=ff_key) - - # parse fastqc metadata - LOG.info("files : %s" % str(files)) - filedata = [files[_]['data'] for _ in datafiles] - meta = parse_qc_table(filedata, - url=files[report_html]['s3key'], - qc_schema=qc_schema.get('properties')) - LOG.info("fastqc meta is %s" % meta) - - # post fastq metadata - qc_meta = ff_utils.post_to_metadata(meta, quality_metric, key=ff_key) - if qc_meta.get('@graph'): - qc_meta = qc_meta['@graph'][0] - - LOG.info("qc_meta is %s" % qc_meta) - # update original file as well - try: - original_file = ff_utils.get_metadata(accession, key=ff_key) - LOG.info("original_file is %s" % original_file) - except Exception as e: - raise Exception("Couldn't get metadata for accession {} : ".format(accession) + str(e)) - patch_file = {'quality_metric': qc_meta['@id']} - try: - ff_utils.patch_metadata(patch_file, original_file['uuid'], key=ff_key) - except Exception as e: - raise Exception("patch_metadata failed in fastqc_updater." + str(e) + - "original_file ={}\n".format(str(original_file))) - - # patch the workflow run, value_qc is used to make drawing graphs easier. - output_files = ff_meta.output_files - output_files[0]['value_qc'] = qc_meta['@id'] - retval = {"output_quality_metrics": [{"name": quality_metric, "value": qc_meta['@id']}], - 'output_files': output_files} - - LOG.info("retval is %s" % retval) - return retval - - -def md5_updater(status, wf_file, ff_meta, tibanna): - # get key - ff_key = tibanna.ff_keys - # get metadata about original input file - accession = wf_file.runner.inputfile_accessions['input_file'] - original_file = ff_utils.get_metadata(accession, key=ff_key) - - if status.lower() == 'uploaded': - md5 = wf_file.read() - original_md5 = original_file.get('content_md5sum', False) - current_status = original_file.get('status', "uploading") - if original_md5 and original_md5 != md5: - # file status to be upload failed / md5 mismatch - print("no matcho") - md5_updater("upload failed", wf_file, ff_meta, tibanna) - else: - new_file = {} - # change status to uploaded only if it is uploading or upload failed - if current_status in ["uploading", "upload failed"]: - new_file['status'] = 'uploaded' - new_file['content_md5sum'] = md5 - - try: - ff_utils.patch_metadata(new_file, accession, key=ff_key) - except Exception as e: - # TODO specific excpetion - # if patch fails try to patch worfklow status as failed - new_file = {} - new_file['status'] = 'upload failed' - new_file['description'] = str(e) - ff_utils.patch_metadata(new_file, original_file['uuid'], key=ff_key) - elif status == 'upload failed': - new_file = {} - new_file['status'] = 'upload failed' - ff_utils.patch_metadata(new_file, original_file['uuid'], key=ff_key) - - # nothing to patch to ff_meta - return None - - -def metadata_only(event): - # just create a fake awsem config so the handler function does it's magic - ''' - if not event.get('args'): - event['args'] = {'app_name': event['ff_meta'].get('awsem_app_name'), - 'output_S3_bucket': 'metadata_only', - 'output_target': {'metadata_only': 'metadata_only'} - } - - if not event.get('config'): - event['config'] = {'runmode': 'metadata_only'} - ''' - - return real_handler(event, None) - - -@utils.powerup('update_ffmeta_awsem', metadata_only) -def handler(event, context): - return real_handler(event, context) - - -def real_handler(event, context): - # check the status and other details of import - ''' - this is to check if the task run is done: - http://docs.sevenbridges.com/reference#get-task-execution-details - ''' - # get data - # used to automatically determine the environment - tibanna_settings = event.get('_tibanna', {}) - tibanna = utils.Tibanna(**tibanna_settings) - # sbg = sbg_utils.create_sbg_workflow(token=tibanna.sbg_keys, **event.get('workflow')) - ff_meta = ff_utils.create_ffmeta_awsem(app_name=event.get('ff_meta').get('awsem_app_name'), **event.get('ff_meta')) - pf_meta = event.get('pf_meta') - # ensure this bad boy is always initialized - patch_meta = False - awsem = ec2_utils.Awsem(event) - - # go through this and replace export_report with awsf format - # actually interface should be look through ff_meta files and call - # give me the status of this thing from the runner, and runner.output_files.length - # so we just build a runner with interface to sbg and awsem - # runner.output_files.length() - # runner.output_files.file.status - # runner.output_files.file.loc - # runner.output_files.file.get - - awsem_output = awsem.output_files() - ff_output = len(ff_meta.output_files) - if len(awsem_output) != ff_output: - ff_meta.run_status = 'error' - ff_meta.description = "%d files output expected %s" % (ff_output, len(awsem_output)) - ff_meta.post(key=tibanna.ff_keys) - raise Exception("Failing the workflow because outputed files = %d and ffmeta = %d" % - (awsem_output, ff_output)) - - for _, export in awsem_output.iteritems(): - upload_key = export.key - status = export.status - print("export res is %s", status) - if status == 'COMPLETED': - patch_meta = OUTFILE_UPDATERS[export.output_type]('uploaded', export, ff_meta, tibanna) - if pf_meta: - pf_meta = update_processed_file_metadata('uploaded', pf_meta, tibanna) - elif status in ['FAILED']: - patch_meta = OUTFILE_UPDATERS[export.output_type]('upload failed', export, ff_meta, tibanna) - ff_meta.run_status = 'error' - ff_meta.post(key=tibanna.ff_keys) - raise Exception("Failed to export file %s" % (upload_key)) - - # if we got all the exports let's go ahead and update our ff_metadata object - ff_meta.run_status = "complete" - - # allow for a simple way for updater to add appropriate meta_data - if patch_meta: - ff_meta.__dict__.update(patch_meta) - - # make all the file export meta-data stuff here - # TODO: fix bugs with ff_meta mapping for output and input file - try: - ff_meta.post(key=tibanna.ff_keys) - except: - raise Exception("Failed to update run_status") - - event['ff_meta'] = ff_meta.as_dict() - event['pf_meta'] = pf_meta - return event - - -# Cardinal knowledge of all workflow updaters -OUTFILE_UPDATERS = defaultdict(lambda: donothing) -OUTFILE_UPDATERS['Output report file'] = md5_updater -OUTFILE_UPDATERS['Output QC file'] = qc_updater