From 302738b0329d71da6d0d816ca00c7b60c99b3a62 Mon Sep 17 00:00:00 2001 From: SooLEe Date: Tue, 11 Jun 2019 23:07:17 +0000 Subject: [PATCH 1/4] if ebs size float -> round up --- tibanna/ec2_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tibanna/ec2_utils.py b/tibanna/ec2_utils.py index 9ef2bb577..9b8c7205c 100644 --- a/tibanna/ec2_utils.py +++ b/tibanna/ec2_utils.py @@ -359,6 +359,10 @@ def auto_calculate_ebs_size(self): if isinstance(self.cfg.ebs_size, str) and self.cfg.ebs_size.endswith('x'): multiplier = float(self.cfg.ebs_size.rstrip('x')) self.cfg.ebs_size = multiplier * self.total_input_size_in_gb + if round(self.cfg.ebs_size) < self.cfg.ebs_size: + self.cfg.ebs_size = round(self.cfg.ebs_size) + 1 + else: + self.cfg.ebs_size = round(self.cfg.ebs_size) if self.cfg.ebs_size < 10: self.cfg.ebs_size = 10 From 549e374128a7bf87fcf2357d37a398d598c23330 Mon Sep 17 00:00:00 2001 From: SooLEe Date: Tue, 11 Jun 2019 23:52:56 +0000 Subject: [PATCH 2/4] input file can now be a string starting with s3:// --- tests/tibanna/unicorn/test_ec2_utils.py | 92 ++++++++++++++++++++++++- tibanna/ec2_utils.py | 41 +++++++++++ tibanna/exceptions.py | 4 ++ 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/tests/tibanna/unicorn/test_ec2_utils.py b/tests/tibanna/unicorn/test_ec2_utils.py index 29a2c9423..20ec3a5d0 100644 --- a/tests/tibanna/unicorn/test_ec2_utils.py +++ b/tests/tibanna/unicorn/test_ec2_utils.py @@ -9,6 +9,7 @@ from tibanna.utils import create_jobid from tibanna.exceptions import ( MissingFieldInInputJsonException, + MalFormattedInputJsonException, EC2InstanceLimitException, EC2InstanceLimitWaitException ) @@ -37,6 +38,94 @@ def test_args_missing_field(): assert 'output_S3_bucket' in str(ex.value) +def test_args_parse_input_files(): + input_dict = {'args': {'input_files': {"file1": "s3://somebucket/somekey"}, + 'output_S3_bucket': 'somebucket', + 'cwl_main_filename': 'main.cwl', + 'cwl_directory_url': 'someurl', + 'app_name': 'someapp'}} + args = Args(**input_dict['args']) + args.fill_default() + assert hasattr(args, 'input_files') + assert 'file1' in args.input_files + assert 'bucket_name' in args.input_files['file1'] + assert 'object_key' in args.input_files['file1'] + assert args.input_files['file1']['bucket_name'] == 'somebucket' + assert args.input_files['file1']['object_key'] == 'somekey' + + +def test_args_parse_input_files2(): + input_dict = {'args': {'input_files': {"file1": [["s3://somebucket/somekey1", + "s3://somebucket/somekey2"], + ["s3://somebucket/somekey3", + "s3://somebucket/somekey4"]]}, + 'output_S3_bucket': 'somebucket', + 'cwl_main_filename': 'main.cwl', + 'cwl_directory_url': 'someurl', + 'app_name': 'someapp'}} + args = Args(**input_dict['args']) + args.fill_default() + assert hasattr(args, 'input_files') + assert 'file1' in args.input_files + assert 'bucket_name' in args.input_files['file1'] + assert 'object_key' in args.input_files['file1'] + assert args.input_files['file1']['bucket_name'] == 'somebucket' + assert isinstance(args.input_files['file1']['object_key'], list) + assert len(args.input_files['file1']['object_key']) == 2 + assert isinstance(args.input_files['file1']['object_key'][0], list) + assert len(args.input_files['file1']['object_key'][0]) == 2 + assert isinstance(args.input_files['file1']['object_key'][1], list) + assert len(args.input_files['file1']['object_key'][1]) == 2 + assert args.input_files['file1']['object_key'][0][0] == 'somekey1' + assert args.input_files['file1']['object_key'][0][1] == 'somekey2' + assert args.input_files['file1']['object_key'][1][0] == 'somekey3' + assert args.input_files['file1']['object_key'][1][1] == 'somekey4' + + +def test_args_parse_input_files2(): + input_dict = {'args': {'input_files': {"file1": ["s3://somebucket/somekey1", + "s3://somebucket/somekey2"]}, + 'output_S3_bucket': 'somebucket', + 'cwl_main_filename': 'main.cwl', + 'cwl_directory_url': 'someurl', + 'app_name': 'someapp'}} + args = Args(**input_dict['args']) + args.fill_default() + assert hasattr(args, 'input_files') + assert 'file1' in args.input_files + assert 'bucket_name' in args.input_files['file1'] + assert 'object_key' in args.input_files['file1'] + assert args.input_files['file1']['bucket_name'] == 'somebucket' + assert isinstance(args.input_files['file1']['object_key'], list) + assert len(args.input_files['file1']['object_key']) == 2 + assert args.input_files['file1']['object_key'][0] == 'somekey1' + assert args.input_files['file1']['object_key'][1] == 'somekey2' + +def test_args_parse_input_files_format_error(): + input_dict = {'args': {'input_files': {"file1": "somerandomstr"}, + 'output_S3_bucket': 'somebucket', + 'cwl_main_filename': 'main.cwl', + 'cwl_directory_url': 'someurl', + 'app_name': 'someapp'}} + args = Args(**input_dict['args']) + with pytest.raises(MalFormattedInputJsonException) as ex: + args.fill_default() + assert ex + assert 'S3 url must begin with' in str(ex.value) + +def test_args_parse_input_files_format_error2(): + input_dict = {'args': {'input_files': {"file1": ["s3://somebucket/somekey1", + "s3://otherbucket/somekey2"]}, + 'output_S3_bucket': 'somebucket', + 'cwl_main_filename': 'main.cwl', + 'cwl_directory_url': 'someurl', + 'app_name': 'someapp'}} + args = Args(**input_dict['args']) + with pytest.raises(MalFormattedInputJsonException) as ex: + args.fill_default() + assert ex + assert 'bucket' in str(ex.value) + def test_config(): input_dict = {'config': {'log_bucket': 'tibanna-output', 'shutdown_min': 30}} cfg = Config(**input_dict['config']) @@ -192,8 +281,7 @@ def test_update_config_ebs_size2(): execution = Execution(input_dict) execution.input_size_in_bytes = execution.get_input_size_in_bytes() execution.update_config_ebs_size() - assert execution.cfg.ebs_size > 18 - assert execution.cfg.ebs_size < 19 + assert execution.cfg.ebs_size == 19 # cleanup afterwards s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{'Key': randomstr}]}) diff --git a/tibanna/ec2_utils.py b/tibanna/ec2_utils.py index 9b8c7205c..64ed21749 100644 --- a/tibanna/ec2_utils.py +++ b/tibanna/ec2_utils.py @@ -5,6 +5,7 @@ import logging import boto3 import copy +import re from .utils import ( printlog, does_key_exist, @@ -23,6 +24,7 @@ ) from .exceptions import ( MissingFieldInInputJsonException, + MalFormattedInputJsonException, EC2LaunchException, EC2InstanceLimitException, EC2InstanceLimitWaitException, @@ -137,6 +139,8 @@ def fill_default(self): self.singularity = False if not hasattr(self, 'app_name'): self.app_name = '' + # input file format check and parsing + self.parse_input_files() # check workflow info is there and fill in default errmsg_template = "field %s is required in args for language %s" if self.language == 'wdl': @@ -184,6 +188,43 @@ def fill_default(self): errmsg = "either %s or %s must be provided in args" % ('cwl_directory_url', 'cwl_directory_local') raise MissingFieldInInputJsonException(errmsg) + def parse_input_files(self): + """checking format for input files and converting s3:// style string into + bucket_name and object_key""" + if hasattr(self, 'input_files'): + if not isinstance(self.input_files, dict): + errmsg = "'input_files' must be provided as a dictionary (key-value pairs)" + raise MalFormattedInputJsonException(errmsg) + for ip, v in self.input_files.items(): + if isinstance(v, str): + bucket_name, object_key = self.parse_s3_url(v) + self.input_files[ip] = {'bucket_name': bucket_name, 'object_key': object_key} + elif isinstance(v, list): + buckets = flatten(run_on_nested_arrays1(v, self.parse_s3_url, **{'bucket_only': True})) + if len(set(buckets)) != 1: + errmsg = "All the input files corresponding to a single input file argument " + \ + "must be from the same bucket." + raise MalFormattedInputJsonException(errmsg) + object_keys = run_on_nested_arrays1(v, self.parse_s3_url, **{'key_only': True}) + self.input_files[ip] = {'bucket_name': buckets[0], 'object_key': object_keys} + elif isinstance(v, dict) and 'bucket_name' in v and 'object_key' in v: + pass + else: + errmsg = "Each input_file value must be either a string starting with 's3://'" + \ + " or a dictionary with 'bucket_name' and 'object_key' as keys" + raise MalFormattedInputJsonException(errmsg) + + def parse_s3_url(self, url, bucket_only=False, key_only=False): + if not url.startswith('s3://'): + raise MalFormattedInputJsonException("S3 url must begin with 's3://'") + bucket_name = re.sub('^s3://', '', url).split('/')[0] + object_key = re.sub('^s3://' + bucket_name + '/', '', url) + if bucket_only: + return bucket_name + if key_only: + return object_key + return bucket_name, object_key + def as_dict(self): return copy.deepcopy(self.__dict__) diff --git a/tibanna/exceptions.py b/tibanna/exceptions.py index 5e7591da0..24b785014 100644 --- a/tibanna/exceptions.py +++ b/tibanna/exceptions.py @@ -44,3 +44,7 @@ class EC2InstanceLimitWaitException(Exception): class MissingFieldInInputJsonException(Exception): pass + + +class MalFormattedInputJsonException(Exception): + pass From 9d3ed4295cd83f1db01afbd791dd4e5ddf27f7a4 Mon Sep 17 00:00:00 2001 From: SooLEe Date: Wed, 12 Jun 2019 01:02:19 +0000 Subject: [PATCH 3/4] command in input json can be a list --- tests/tibanna/unicorn/test_ec2_utils.py | 21 ++++++++++++++++++++- tibanna/ec2_utils.py | 10 ++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/tibanna/unicorn/test_ec2_utils.py b/tests/tibanna/unicorn/test_ec2_utils.py index 20ec3a5d0..65ed7db66 100644 --- a/tests/tibanna/unicorn/test_ec2_utils.py +++ b/tests/tibanna/unicorn/test_ec2_utils.py @@ -82,7 +82,7 @@ def test_args_parse_input_files2(): assert args.input_files['file1']['object_key'][1][1] == 'somekey4' -def test_args_parse_input_files2(): +def test_args_parse_input_files3(): input_dict = {'args': {'input_files': {"file1": ["s3://somebucket/somekey1", "s3://somebucket/somekey2"]}, 'output_S3_bucket': 'somebucket', @@ -101,6 +101,7 @@ def test_args_parse_input_files2(): assert args.input_files['file1']['object_key'][0] == 'somekey1' assert args.input_files['file1']['object_key'][1] == 'somekey2' + def test_args_parse_input_files_format_error(): input_dict = {'args': {'input_files': {"file1": "somerandomstr"}, 'output_S3_bucket': 'somebucket', @@ -113,6 +114,7 @@ def test_args_parse_input_files_format_error(): assert ex assert 'S3 url must begin with' in str(ex.value) + def test_args_parse_input_files_format_error2(): input_dict = {'args': {'input_files': {"file1": ["s3://somebucket/somekey1", "s3://otherbucket/somekey2"]}, @@ -126,6 +128,18 @@ def test_args_parse_input_files_format_error2(): assert ex assert 'bucket' in str(ex.value) + +def test_parse_command(): + input_dict = {'args': {'command': ['command1', 'command2', 'command3'], + 'output_S3_bucket': 'somebucket', + 'language': 'shell', + 'container_image': 'someimage', + 'app_name': 'someapp'}} + args = Args(**input_dict['args']) + args.fill_default() + assert args.command == 'command1; command2; command3' + + def test_config(): input_dict = {'config': {'log_bucket': 'tibanna-output', 'shutdown_min': 30}} cfg = Config(**input_dict['config']) @@ -213,6 +227,7 @@ def test_execution_benchmark(): s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{'Key': randomstr}]}) + def test_get_file_size(): randomstr = 'test-' + create_jobid() s3 = boto3.client('s3') @@ -224,6 +239,7 @@ def test_get_file_size(): s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{'Key': randomstr}]}) + def test_get_input_size_in_bytes(): randomstr = 'test-' + create_jobid() s3 = boto3.client('s3') @@ -244,6 +260,7 @@ def test_get_input_size_in_bytes(): s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{'Key': randomstr}]}) + def test_update_config_ebs_size(): """ebs_size is given as the 'x' format. The total estimated ebs_size is smaller than 10""" randomstr = 'test-' + create_jobid() @@ -265,6 +282,7 @@ def test_update_config_ebs_size(): s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{'Key': randomstr}]}) + def test_update_config_ebs_size2(): """ebs_size is given as the 'x' format. The total estimated ebs_size is larger than 10""" randomstr = 'test-' + create_jobid() @@ -286,6 +304,7 @@ def test_update_config_ebs_size2(): s3.delete_objects(Bucket='tibanna-output', Delete={'Objects': [{'Key': randomstr}]}) + def test_unicorn_input_missing_field(): """app_name that doesn't exist in benchmark, without instance type, mem, cpu info""" input_dict = {'args': {'input_files': {}, 'app_name': 'app_name_not_in_benchmark', diff --git a/tibanna/ec2_utils.py b/tibanna/ec2_utils.py index 64ed21749..2d33b8216 100644 --- a/tibanna/ec2_utils.py +++ b/tibanna/ec2_utils.py @@ -187,6 +187,16 @@ def fill_default(self): if not self.cwl_directory_local and not self.cwl_directory_url: errmsg = "either %s or %s must be provided in args" % ('cwl_directory_url', 'cwl_directory_local') raise MissingFieldInInputJsonException(errmsg) + # reformat command + self.parse_command() + + def parse_command(self): + """if command is a list, conert it to a string""" + if hasattr(self, 'command'): + if isinstance(self.command, list): + self.command = '; '.join(self.command) + elif not isinstance(self.command, str): + raise MalFormattedInputJsonException("command must be either a string or a list") def parse_input_files(self): """checking format for input files and converting s3:// style string into From 4d87165cd4f0129857fac400b16a3e8d48f78846 Mon Sep 17 00:00:00 2001 From: SooLEe Date: Wed, 12 Jun 2019 01:02:48 +0000 Subject: [PATCH 4/4] version update --- tibanna/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tibanna/_version.py b/tibanna/_version.py index e77bf562a..e466d4612 100644 --- a/tibanna/_version.py +++ b/tibanna/_version.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.8.3" +__version__ = "0.8.4"