diff --git a/awsf3-docker/run.sh b/awsf3-docker/run.sh index 4ede7b61e..3402fda11 100755 --- a/awsf3-docker/run.sh +++ b/awsf3-docker/run.sh @@ -44,9 +44,12 @@ export TOPFILE=$LOCAL_OUTDIR/$JOBID.top # now top command output goes to a sepa export TOPLATESTFILE=$LOCAL_OUTDIR/$JOBID.top_latest # this one includes only the latest top command output export INSTANCE_ID=$(ec2metadata --instance-id|cut -d' ' -f2) export INSTANCE_REGION=$(ec2metadata --availability-zone | sed 's/[a-z]$//') +export INSTANCE_AVAILABILITY_ZONE=$(ec2metadata --availability-zone) export AWS_ACCOUNT_ID=$(aws sts get-caller-identity| grep Account | sed 's/[^0-9]//g') export AWS_REGION=$INSTANCE_REGION # this is for importing awsf3 package which imports tibanna package + + # function that executes a command and collecting log exl(){ $@ >> $LOGFILE 2>> $LOGFILE; handle_error $?; } ## usage: exl command ## ERRCODE has the error code for the command. if something is wrong, send error to s3. exlj(){ $@ >> $LOGJSONFILE 2>> $LOGFILE; $ERRCODE=$?; cat $LOGJSONFILE >> $LOGFILE; handle_error $ERRCODE; } ## usage: exl command ## ERRCODE has the error code for the command. if something is wrong, send error to s3. This one separates stdout to json as well. diff --git a/awsf3/utils.py b/awsf3/utils.py index a7503aa41..f6518bd09 100644 --- a/awsf3/utils.py +++ b/awsf3/utils.py @@ -325,13 +325,14 @@ def write_postrun_json(jsonfile, prj): def update_postrun_json_init(json_old, json_new): - """Update postrun json with just instance ID and filesystem""" + """Update postrun json with just instance ID, filesystem and availability zone""" # read old json file prj = read_postrun_json(json_old) - # simply add instance ID and file system + # simply add instance ID, file system and availability zone prj.Job.instance_id = os.getenv('INSTANCE_ID') prj.Job.filesystem = os.getenv('EBS_DEVICE') + prj.Job.instance_availablity_zone = os.getenv('INSTANCE_AVAILABILITY_ZONE') # write to new json file write_postrun_json(json_new, prj) @@ -441,6 +442,7 @@ def postrun_json_final(prj, logfile=None): prj_job.update(total_output_size=os.getenv('OUTPUTSIZE')) + def upload_postrun_json(jsonfile): prj = read_postrun_json(jsonfile) bucket = prj.Job.Log.log_bucket_directory diff --git a/docs/api.rst b/docs/api.rst index 017727a34..871ad7657 100755 --- a/docs/api.rst +++ b/docs/api.rst @@ -562,6 +562,23 @@ To retrieve the cost and update the metrics report file created with plot_metric variable is not set, it uses name 'tibanna_pony' (4dn default, works only for 4dn). - update_tsv This flag specify to update with cost the tsv file that + update_tsv This flag specifies wether to update the cost in the tsv file that + stores metrics information on the S3 bucket + + +cost_estimate +---- + +Retrieve a cost estimate for a specific job. This will be available as soon as the job finished. This function will return the exact cost, if it is available + +:: + + API().cost_estimate(job_id=, ...) + +**Options** + +:: + + update_tsv This flag specifies wether to update the cost in the tsv file that stores metrics information on the S3 bucket diff --git a/docs/commands.rst b/docs/commands.rst index c42aeb482..e5683cf99 100755 --- a/docs/commands.rst +++ b/docs/commands.rst @@ -547,7 +547,8 @@ To collect, save and visualize the resources metrics from Cloud Watch cost ---- -To retrieve the cost and update the metrics report file created with plot_metrics +To retrieve the cost and update the metrics report file created with plot_metrics. The cost is typically available 24 hours after the job finished. +This function is available to non-admin users from version 1.0.6. :: @@ -567,4 +568,21 @@ To retrieve the cost and update the metrics report file created with plot_metric -u|--update-tsv Update with the cost the tsv file that stores metrics information on the S3 bucket +cost_estimate +---- + +To retrieve cost estimates and update the metrics report file created with plot_metrics. In contrast to the exact costs, the estimated costs are available immediately after the job has completed. +This function requires a (deployed) Tibanna version >=1.0.6. + +:: + + tibanna cost_estimate --job-id= [] + +**Options** + +:: + + -u|--update-tsv Update with the cost the tsv file that stores metrics + information on the S3 bucket + diff --git a/docs/installation.rst b/docs/installation.rst index 1f61d3c08..6c81844cb 100755 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -356,18 +356,18 @@ You will see the list of users. monty -This command will print out the list of users. +The following command will add a user to a specific user group. :: - tibanna add_users --user= --group= + tibanna add_user --user= --usergroup= For example, if you have a user named ``monty`` and you want to give permission to this user to user Tibanna ``lalala``. This will give this user permission to run and monitor the workflow, access the buckets that Tibanna usergroup ``lalala`` was given access to through ``tibanna deploy_unicorn --buckets=,,...`` :: - tibanna add_users --user=monty --group=lalala + tibanna add_user --user=monty --usergroup=lalala Check users again. diff --git a/docs/monitoring.rst b/docs/monitoring.rst index 1a95ba78c..e12c90c1a 100755 --- a/docs/monitoring.rst +++ b/docs/monitoring.rst @@ -472,7 +472,7 @@ By default the command will retrieve the data from cloud watch, and creates seve All the files are eventually uploaded to a folder named ``.metrics`` inside the log S3 bucket specified for tibanna output. To visualize the html report the URL structure is: ``https://.s3.amazonaws.com/.metrics/metrics.html`` -Starting with ``1.0.0``, the metrics plot will include per-process CPU and memory profiles retrived from the top command reports at a 1-minute interval. Additional files `top_cpu.tsv` and `top_mem.tsv` will also be created under the same folder ``.metrics``. +Starting with ``1.0.0``, the metrics plot will include per-process CPU and memory profiles retrieved from the top command reports at a 1-minute interval. Additional files `top_cpu.tsv` and `top_mem.tsv` will also be created under the same folder ``.metrics``. diff --git a/test_json/unicorn/medium_nonspot.postrun.json b/test_json/unicorn/medium_nonspot.postrun.json new file mode 100644 index 000000000..06c63f65a --- /dev/null +++ b/test_json/unicorn/medium_nonspot.postrun.json @@ -0,0 +1,119 @@ +{ + "Job": { + "App": { + "App_name": "", + "App_version": "", + "language": "cwl_v1", + "cwl_url": "https://raw.githubusercontent.com/alexander-veit/tibanna-workflows/main", + "main_cwl": "md5.cwl", + "other_cwl_files": "" + }, + "Input": { + "Input_files_data": { + "gzfile": { + "path": "input/alg1.png.gz", + "profile": "", + "rename": "", + "unzip": "", + "mount": "", + "class": "File", + "dir": "aveit-tibanna-test" + } + }, + "Secondary_files_data": {}, + "Input_parameters": {}, + "Env": {} + }, + "Output": { + "output_bucket_directory": "aveit-tibanna-test", + "output_target": { + "report": "output/my_first_md5_report" + }, + "secondary_output_target": {}, + "alt_cond_output_argnames": {}, + "Output files": { + "report": { + "path": "/data1/out/report", + "target": "output/my_first_md5_report", + "basename": "report", + "checksum": "sha1$d237002c1956532ffb9117972f3a98cd1344df56", + "location": "file:///data1/out/report", + "md5sum": "45ad098a40814f1e7792b4e17b5f74d0", + "size": 66, + "class": "File" + } + } + }, + "start_time": "20210301-16:58:05-UTC", + "JOBID": "NgN7IOBXjgRH", + "Log": { + "log_bucket_directory": "aveit-tibanna-test-log" + }, + "end_time": "20210301-17:03:58-UTC", + "status": "0", + "filesystem": "/dev/nvme1n1", + "instance_id": "i-01769a822e5dbb407", + "instance_availablity_zone": "us-east-1b", + "total_input_size": "12K", + "total_output_size": "36K", + "total_tmp_size": "20K", + "Metrics": { + "cost_estimate": 0.004384172839506173, + "max_mem_used_MB": 713.4609375, + "min_mem_available_MB": 3163.0390625, + "total_mem_MB": 3876.5, + "max_mem_utilization_percent": 18.404770733909455, + "max_cpu_utilization_percent": 69.0, + "max_disk_space_utilization_percent": 1.0498987095511, + "max_disk_space_used_GB": 0.205371856689453, + "max_ebs_read_bytes": 69936332.8 + } + }, + "config": { + "EBS_optimized": true, + "ami_id": "ami-0a7ddfc7e412ab6e0", + "availability_zone": "", + "awsf_image": "aveit/tibanna-awsf:1.0.3.dev4", + "behavior_on_capacity_limit": "fail", + "cloudwatch_dashboard": false, + "cpu": "", + "ebs_iops": "", + "ebs_size": 20, + "ebs_type": "gp3", + "instance_type": "t3.medium", + "job_tag": "", + "json_bucket": "aveit-tibanna-test-log", + "key_name": "", + "language": "cwl_v1", + "log_bucket": "aveit-tibanna-test-log", + "mem": 0, + "password": "", + "public_postrun_json": false, + "root_ebs_size": 8, + "run_name": "t3_medium_nonspot", + "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf3/", + "security_group": "", + "shutdown_min": "now", + "spot_duration": "", + "spot_instance": false, + "subnet": "" + }, + "commands": [ + [ + "docker", + " run", + " -i", + " --mount=type=bind,source=/mnt/data1/tmp4jntok73,target=/OVwPJQ", + " --mount=type=bind,source=/mnt/data1/tmp3s4rl0wc,target=/tmp", + " --mount=type=bind,source=/data1/input/input/alg1.png.gz,target=/var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz,readonly", + " --workdir=/OVwPJQ", + " --rm", + " --env=TMPDIR=/tmp", + " --env=HOME=/OVwPJQ", + " --cidfile=/data1/tmpvfaxztjn/20210301170033-764000.cid", + " aveit/md5:v2", + " run.sh", + " /var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz" + ] + ] +} \ No newline at end of file diff --git a/test_json/unicorn/no_availability_zone.postrun.json b/test_json/unicorn/no_availability_zone.postrun.json new file mode 100644 index 000000000..3742dcbf2 --- /dev/null +++ b/test_json/unicorn/no_availability_zone.postrun.json @@ -0,0 +1,118 @@ +{ + "Job": { + "App": { + "App_name": "", + "App_version": "", + "language": "cwl_v1", + "cwl_url": "https://raw.githubusercontent.com/alexander-veit/tibanna-workflows/main", + "main_cwl": "md5.cwl", + "other_cwl_files": "" + }, + "Input": { + "Input_files_data": { + "gzfile": { + "path": "input/alg1.png.gz", + "profile": "", + "rename": "", + "unzip": "", + "mount": "", + "class": "File", + "dir": "aveit-tibanna-test" + } + }, + "Secondary_files_data": {}, + "Input_parameters": {}, + "Env": {} + }, + "Output": { + "output_bucket_directory": "aveit-tibanna-test", + "output_target": { + "report": "output/my_first_md5_report" + }, + "secondary_output_target": {}, + "alt_cond_output_argnames": {}, + "Output files": { + "report": { + "path": "/data1/out/report", + "target": "output/my_first_md5_report", + "basename": "report", + "checksum": "sha1$d237002c1956532ffb9117972f3a98cd1344df56", + "location": "file:///data1/out/report", + "md5sum": "45ad098a40814f1e7792b4e17b5f74d0", + "size": 66, + "class": "File" + } + } + }, + "start_time": "20210301-16:58:05-UTC", + "JOBID": "NgN7IOBXjgRH", + "Log": { + "log_bucket_directory": "aveit-tibanna-test-log" + }, + "end_time": "20210301-17:03:58-UTC", + "status": "0", + "filesystem": "/dev/nvme1n1", + "instance_id": "i-01769a822e5dbb407", + "total_input_size": "12K", + "total_output_size": "36K", + "total_tmp_size": "20K", + "Metrics": { + "cost_estimate": 0.004384172839506173, + "max_mem_used_MB": 713.4609375, + "min_mem_available_MB": 3163.0390625, + "total_mem_MB": 3876.5, + "max_mem_utilization_percent": 18.404770733909455, + "max_cpu_utilization_percent": 69.0, + "max_disk_space_utilization_percent": 1.0498987095511, + "max_disk_space_used_GB": 0.205371856689453, + "max_ebs_read_bytes": 69936332.8 + } + }, + "config": { + "EBS_optimized": true, + "ami_id": "ami-0a7ddfc7e412ab6e0", + "availability_zone": "", + "awsf_image": "aveit/tibanna-awsf:1.0.3.dev4", + "behavior_on_capacity_limit": "fail", + "cloudwatch_dashboard": false, + "cpu": "", + "ebs_iops": "", + "ebs_size": 20, + "ebs_type": "gp3", + "instance_type": "t3.medium", + "job_tag": "", + "json_bucket": "aveit-tibanna-test-log", + "key_name": "", + "language": "cwl_v1", + "log_bucket": "aveit-tibanna-test-log", + "mem": 0, + "password": "", + "public_postrun_json": false, + "root_ebs_size": 8, + "run_name": "t3_medium_nonspot", + "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf3/", + "security_group": "", + "shutdown_min": "now", + "spot_duration": "", + "spot_instance": true, + "subnet": "" + }, + "commands": [ + [ + "docker", + " run", + " -i", + " --mount=type=bind,source=/mnt/data1/tmp4jntok73,target=/OVwPJQ", + " --mount=type=bind,source=/mnt/data1/tmp3s4rl0wc,target=/tmp", + " --mount=type=bind,source=/data1/input/input/alg1.png.gz,target=/var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz,readonly", + " --workdir=/OVwPJQ", + " --rm", + " --env=TMPDIR=/tmp", + " --env=HOME=/OVwPJQ", + " --cidfile=/data1/tmpvfaxztjn/20210301170033-764000.cid", + " aveit/md5:v2", + " run.sh", + " /var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz" + ] + ] +} \ No newline at end of file diff --git a/test_json/unicorn/small_spot.postrun.json b/test_json/unicorn/small_spot.postrun.json new file mode 100644 index 000000000..c3617d454 --- /dev/null +++ b/test_json/unicorn/small_spot.postrun.json @@ -0,0 +1,119 @@ +{ + "Job": { + "App": { + "App_name": "", + "App_version": "", + "language": "cwl_v1", + "cwl_url": "https://raw.githubusercontent.com/alexander-veit/tibanna-workflows/main", + "main_cwl": "md5.cwl", + "other_cwl_files": "" + }, + "Input": { + "Input_files_data": { + "gzfile": { + "path": "input/alg1.png.gz", + "profile": "", + "rename": "", + "unzip": "", + "mount": "", + "class": "File", + "dir": "aveit-tibanna-test" + } + }, + "Secondary_files_data": {}, + "Input_parameters": {}, + "Env": {} + }, + "Output": { + "output_bucket_directory": "aveit-tibanna-test", + "output_target": { + "report": "output/my_first_md5_report" + }, + "secondary_output_target": {}, + "alt_cond_output_argnames": {}, + "Output files": { + "report": { + "path": "/data1/out/report", + "target": "output/my_first_md5_report", + "basename": "report", + "checksum": "sha1$d237002c1956532ffb9117972f3a98cd1344df56", + "location": "file:///data1/out/report", + "md5sum": "45ad098a40814f1e7792b4e17b5f74d0", + "size": 66, + "class": "File" + } + } + }, + "start_time": "20210301-16:58:05-UTC", + "JOBID": "NgN7IOBXjgRH", + "Log": { + "log_bucket_directory": "aveit-tibanna-test-log" + }, + "end_time": "20210301-17:03:58-UTC", + "status": "0", + "filesystem": "/dev/nvme1n1", + "instance_id": "i-01769a822e5dbb407", + "instance_availablity_zone": "us-east-1b", + "total_input_size": "12K", + "total_output_size": "36K", + "total_tmp_size": "20K", + "Metrics": { + "cost_estimate": 0.004384172839506173, + "max_mem_used_MB": 713.4609375, + "min_mem_available_MB": 3163.0390625, + "total_mem_MB": 3876.5, + "max_mem_utilization_percent": 18.404770733909455, + "max_cpu_utilization_percent": 69.0, + "max_disk_space_utilization_percent": 1.0498987095511, + "max_disk_space_used_GB": 0.205371856689453, + "max_ebs_read_bytes": 69936332.8 + } + }, + "config": { + "EBS_optimized": true, + "ami_id": "ami-0a7ddfc7e412ab6e0", + "availability_zone": "", + "awsf_image": "aveit/tibanna-awsf:1.0.3.dev4", + "behavior_on_capacity_limit": "fail", + "cloudwatch_dashboard": false, + "cpu": "", + "ebs_iops": "", + "ebs_size": 20, + "ebs_type": "gp3", + "instance_type": "t3.small", + "job_tag": "", + "json_bucket": "aveit-tibanna-test-log", + "key_name": "", + "language": "cwl_v1", + "log_bucket": "aveit-tibanna-test-log", + "mem": 0, + "password": "", + "public_postrun_json": false, + "root_ebs_size": 8, + "run_name": "t3_medium_nonspot", + "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf3/", + "security_group": "", + "shutdown_min": "now", + "spot_duration": "", + "spot_instance": true, + "subnet": "" + }, + "commands": [ + [ + "docker", + " run", + " -i", + " --mount=type=bind,source=/mnt/data1/tmp4jntok73,target=/OVwPJQ", + " --mount=type=bind,source=/mnt/data1/tmp3s4rl0wc,target=/tmp", + " --mount=type=bind,source=/data1/input/input/alg1.png.gz,target=/var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz,readonly", + " --workdir=/OVwPJQ", + " --rm", + " --env=TMPDIR=/tmp", + " --env=HOME=/OVwPJQ", + " --cidfile=/data1/tmpvfaxztjn/20210301170033-764000.cid", + " aveit/md5:v2", + " run.sh", + " /var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz" + ] + ] +} \ No newline at end of file diff --git a/test_json/unicorn/small_spot_gp3_iops.postrun.json b/test_json/unicorn/small_spot_gp3_iops.postrun.json new file mode 100644 index 000000000..6111c3157 --- /dev/null +++ b/test_json/unicorn/small_spot_gp3_iops.postrun.json @@ -0,0 +1,119 @@ +{ + "Job": { + "App": { + "App_name": "", + "App_version": "", + "language": "cwl_v1", + "cwl_url": "https://raw.githubusercontent.com/alexander-veit/tibanna-workflows/main", + "main_cwl": "md5.cwl", + "other_cwl_files": "" + }, + "Input": { + "Input_files_data": { + "gzfile": { + "path": "input/alg1.png.gz", + "profile": "", + "rename": "", + "unzip": "", + "mount": "", + "class": "File", + "dir": "aveit-tibanna-test" + } + }, + "Secondary_files_data": {}, + "Input_parameters": {}, + "Env": {} + }, + "Output": { + "output_bucket_directory": "aveit-tibanna-test", + "output_target": { + "report": "output/my_first_md5_report" + }, + "secondary_output_target": {}, + "alt_cond_output_argnames": {}, + "Output files": { + "report": { + "path": "/data1/out/report", + "target": "output/my_first_md5_report", + "basename": "report", + "checksum": "sha1$d237002c1956532ffb9117972f3a98cd1344df56", + "location": "file:///data1/out/report", + "md5sum": "45ad098a40814f1e7792b4e17b5f74d0", + "size": 66, + "class": "File" + } + } + }, + "start_time": "20210301-16:58:05-UTC", + "JOBID": "NgN7IOBXjgRH", + "Log": { + "log_bucket_directory": "aveit-tibanna-test-log" + }, + "end_time": "20210301-17:03:58-UTC", + "status": "0", + "filesystem": "/dev/nvme1n1", + "instance_id": "i-01769a822e5dbb407", + "instance_availablity_zone": "us-east-1b", + "total_input_size": "12K", + "total_output_size": "36K", + "total_tmp_size": "20K", + "Metrics": { + "cost_estimate": 0.004384172839506173, + "max_mem_used_MB": 713.4609375, + "min_mem_available_MB": 3163.0390625, + "total_mem_MB": 3876.5, + "max_mem_utilization_percent": 18.404770733909455, + "max_cpu_utilization_percent": 69.0, + "max_disk_space_utilization_percent": 1.0498987095511, + "max_disk_space_used_GB": 0.205371856689453, + "max_ebs_read_bytes": 69936332.8 + } + }, + "config": { + "EBS_optimized": true, + "ami_id": "ami-0a7ddfc7e412ab6e0", + "availability_zone": "", + "awsf_image": "aveit/tibanna-awsf:1.0.3.dev4", + "behavior_on_capacity_limit": "fail", + "cloudwatch_dashboard": false, + "cpu": "", + "ebs_iops": 3500, + "ebs_size": 20, + "ebs_type": "gp3", + "instance_type": "t3.small", + "job_tag": "", + "json_bucket": "aveit-tibanna-test-log", + "key_name": "", + "language": "cwl_v1", + "log_bucket": "aveit-tibanna-test-log", + "mem": 0, + "password": "", + "public_postrun_json": false, + "root_ebs_size": 8, + "run_name": "t3_medium_nonspot", + "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf3/", + "security_group": "", + "shutdown_min": "now", + "spot_duration": "", + "spot_instance": true, + "subnet": "" + }, + "commands": [ + [ + "docker", + " run", + " -i", + " --mount=type=bind,source=/mnt/data1/tmp4jntok73,target=/OVwPJQ", + " --mount=type=bind,source=/mnt/data1/tmp3s4rl0wc,target=/tmp", + " --mount=type=bind,source=/data1/input/input/alg1.png.gz,target=/var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz,readonly", + " --workdir=/OVwPJQ", + " --rm", + " --env=TMPDIR=/tmp", + " --env=HOME=/OVwPJQ", + " --cidfile=/data1/tmpvfaxztjn/20210301170033-764000.cid", + " aveit/md5:v2", + " run.sh", + " /var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz" + ] + ] +} \ No newline at end of file diff --git a/test_json/unicorn/small_spot_io2_iops.postrun.json b/test_json/unicorn/small_spot_io2_iops.postrun.json new file mode 100644 index 000000000..2eff57fef --- /dev/null +++ b/test_json/unicorn/small_spot_io2_iops.postrun.json @@ -0,0 +1,119 @@ +{ + "Job": { + "App": { + "App_name": "", + "App_version": "", + "language": "cwl_v1", + "cwl_url": "https://raw.githubusercontent.com/alexander-veit/tibanna-workflows/main", + "main_cwl": "md5.cwl", + "other_cwl_files": "" + }, + "Input": { + "Input_files_data": { + "gzfile": { + "path": "input/alg1.png.gz", + "profile": "", + "rename": "", + "unzip": "", + "mount": "", + "class": "File", + "dir": "aveit-tibanna-test" + } + }, + "Secondary_files_data": {}, + "Input_parameters": {}, + "Env": {} + }, + "Output": { + "output_bucket_directory": "aveit-tibanna-test", + "output_target": { + "report": "output/my_first_md5_report" + }, + "secondary_output_target": {}, + "alt_cond_output_argnames": {}, + "Output files": { + "report": { + "path": "/data1/out/report", + "target": "output/my_first_md5_report", + "basename": "report", + "checksum": "sha1$d237002c1956532ffb9117972f3a98cd1344df56", + "location": "file:///data1/out/report", + "md5sum": "45ad098a40814f1e7792b4e17b5f74d0", + "size": 66, + "class": "File" + } + } + }, + "start_time": "20210301-21:56:27-UTC", + "JOBID": "NgN7IOBXjgRH", + "Log": { + "log_bucket_directory": "aveit-tibanna-test-log" + }, + "end_time": "20210301-22:01:49-UTC", + "status": "0", + "filesystem": "/dev/nvme1n1", + "instance_id": "i-01769a822e5dbb407", + "instance_availablity_zone": "us-east-1f", + "total_input_size": "12K", + "total_output_size": "36K", + "total_tmp_size": "20K", + "Metrics": { + "cost_estimate": 0.004384172839506173, + "max_mem_used_MB": 713.4609375, + "min_mem_available_MB": 3163.0390625, + "total_mem_MB": 3876.5, + "max_mem_utilization_percent": 18.404770733909455, + "max_cpu_utilization_percent": 69.0, + "max_disk_space_utilization_percent": 1.0498987095511, + "max_disk_space_used_GB": 0.205371856689453, + "max_ebs_read_bytes": 69936332.8 + } + }, + "config": { + "EBS_optimized": true, + "ami_id": "ami-0a7ddfc7e412ab6e0", + "availability_zone": "", + "awsf_image": "aveit/tibanna-awsf:1.0.3.dev4", + "behavior_on_capacity_limit": "fail", + "cloudwatch_dashboard": false, + "cpu": "", + "ebs_iops": 3500, + "ebs_size": 20, + "ebs_type": "io2", + "instance_type": "t3.small", + "job_tag": "", + "json_bucket": "aveit-tibanna-test-log", + "key_name": "", + "language": "cwl_v1", + "log_bucket": "aveit-tibanna-test-log", + "mem": 0, + "password": "", + "public_postrun_json": false, + "root_ebs_size": 8, + "run_name": "t3_medium_nonspot", + "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf3/", + "security_group": "", + "shutdown_min": "now", + "spot_duration": "", + "spot_instance": true, + "subnet": "" + }, + "commands": [ + [ + "docker", + " run", + " -i", + " --mount=type=bind,source=/mnt/data1/tmp4jntok73,target=/OVwPJQ", + " --mount=type=bind,source=/mnt/data1/tmp3s4rl0wc,target=/tmp", + " --mount=type=bind,source=/data1/input/input/alg1.png.gz,target=/var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz,readonly", + " --workdir=/OVwPJQ", + " --rm", + " --env=TMPDIR=/tmp", + " --env=HOME=/OVwPJQ", + " --cidfile=/data1/tmpvfaxztjn/20210301170033-764000.cid", + " aveit/md5:v2", + " run.sh", + " /var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz" + ] + ] +} \ No newline at end of file diff --git a/tests/tibanna/unicorn/test_ec2_utils.py b/tests/tibanna/unicorn/test_ec2_utils.py index dc98ca037..1e2e095fd 100755 --- a/tests/tibanna/unicorn/test_ec2_utils.py +++ b/tests/tibanna/unicorn/test_ec2_utils.py @@ -4,7 +4,8 @@ Config, Execution, upload_workflow_to_s3, - get_file_size + get_file_size, + cost_estimate ) from tibanna.utils import create_jobid from tibanna.exceptions import ( @@ -13,8 +14,11 @@ EC2InstanceLimitException, EC2InstanceLimitWaitException ) +from tibanna.awsem import AwsemRunJson, AwsemPostRunJson import boto3 import pytest +import os +import json def fun(): @@ -763,3 +767,80 @@ def test_upload_workflow_to_s3(run_task_awsem_event_cwl_upload): Delete={'Objects': [{'Key': jobid + '.workflow/main.cwl'}, {'Key': jobid + '.workflow/child1.cwl'}, {'Key': jobid + '.workflow/child2.cwl'}]}) + + +def test_ec2_cost_estimate_missing_availablity_zone(): + dir_path = os.path.dirname(os.path.realpath(__file__)) + file_name = "no_availability_zone.postrun.json" + file_name = os.path.join(dir_path, '..', '..', '..', 'test_json', 'unicorn', file_name) + with open(file_name, 'r') as file: + postrunjsonstr = file.read().replace('\n', '') + + postrunjsonobj = json.loads(postrunjsonstr) + postrunjson = AwsemPostRunJson(**postrunjsonobj) + assert cost_estimate(postrunjson) == 0.0 + + +def test_ec2_cost_estimate_medium_nonspot(): + dir_path = os.path.dirname(os.path.realpath(__file__)) + file_name = "medium_nonspot.postrun.json" + file_name = os.path.join(dir_path, '..', '..', '..', 'test_json', 'unicorn', file_name) + with open(file_name, 'r') as file: + postrunjsonstr = file.read().replace('\n', '') + + postrunjsonobj = json.loads(postrunjsonstr) + postrunjson = AwsemPostRunJson(**postrunjsonobj) + aws_price_overwrite = { + 'ec2_ondemand_price': 0.0416, 'ebs_root_storage_price': 0.08 + } + estimate = cost_estimate(postrunjson, aws_price_overwrite=aws_price_overwrite) + assert estimate == 0.004384172839506173 + + +def test_ec2_cost_estimate_small_spot(): + dir_path = os.path.dirname(os.path.realpath(__file__)) + file_name = "small_spot.postrun.json" + file_name = os.path.join(dir_path, '..', '..', '..', 'test_json', 'unicorn', file_name) + with open(file_name, 'r') as file: + postrunjsonstr = file.read().replace('\n', '') + + postrunjsonobj = json.loads(postrunjsonstr) + postrunjson = AwsemPostRunJson(**postrunjsonobj) + aws_price_overwrite = { + 'ec2_spot_price': 0.0064, 'ebs_root_storage_price': 0.08 + } + estimate = cost_estimate(postrunjson, aws_price_overwrite=aws_price_overwrite) + assert estimate == 0.0009326172839506175 + + +def test_ec2_cost_estimate_small_spot_gp3_iops(): + dir_path = os.path.dirname(os.path.realpath(__file__)) + file_name = "small_spot_gp3_iops.postrun.json" + file_name = os.path.join(dir_path, '..', '..', '..', 'test_json', 'unicorn', file_name) + with open(file_name, 'r') as file: + postrunjsonstr = file.read().replace('\n', '') + + postrunjsonobj = json.loads(postrunjsonstr) + postrunjson = AwsemPostRunJson(**postrunjsonobj) + aws_price_overwrite = { + 'ec2_spot_price': 0.0064, 'ebs_root_storage_price': 0.08, 'ebs_gp3_iops_price': 0.005 + } + estimate = cost_estimate(postrunjson, aws_price_overwrite=aws_price_overwrite) + assert estimate == 0.0012730879629629633 + + +def test_ec2_cost_estimate_small_spot_io2_iops(): + dir_path = os.path.dirname(os.path.realpath(__file__)) + file_name = "small_spot_io2_iops.postrun.json" + file_name = os.path.join(dir_path, '..', '..', '..', 'test_json', 'unicorn', file_name) + with open(file_name, 'r') as file: + postrunjsonstr = file.read().replace('\n', '') + + postrunjsonobj = json.loads(postrunjsonstr) + postrunjson = AwsemPostRunJson(**postrunjsonobj) + aws_price_overwrite = { + 'ec2_spot_price': 0.0064, 'ebs_root_storage_price': 0.08, 'ebs_storage_price': 0.125, 'ebs_io2_iops_prices': [0.065, 0.0455, 0.03185] + } + estimate = cost_estimate(postrunjson, aws_price_overwrite=aws_price_overwrite) + assert estimate == 0.029224481481481476 + diff --git a/tibanna/__main__.py b/tibanna/__main__.py index 978b1fe5f..81d70d9be 100755 --- a/tibanna/__main__.py +++ b/tibanna/__main__.py @@ -40,8 +40,9 @@ def descriptions(self): 'Use it only when the IAM permissions need to be reset', 'stat': 'print out executions with details', 'users': 'list all users along with their associated tibanna user groups', - 'plot_metrics': 'create a metrics report html and upload it to S3, or retrive one if one already exists', + 'plot_metrics': 'create a metrics report html and upload it to S3, or retrieve one if one already exists', 'cost': 'print out the EC2/EBS cost of a job - it may not be ready for a day after a job finishes', + 'cost_estimate': 'print out the EC2/EBS estimated cost of a job - available as soon as the job finished. Returns the exact costs, if available', 'cleanup': 'remove all tibanna component for a usergroup (and suffix) including step function, lambdas IAM groups', 'create_ami': 'create tibanna ami (Most users do not need this - tibanna AMIs are publicly available.)' } @@ -285,6 +286,12 @@ def args(self): {'flag': ["-u", "--update-tsv"], 'help': "add cost to the metric tsv file on S3", 'action': "store_true"}], + 'cost_estimate': + [{'flag': ["-j", "--job-id"], + 'help': "job id of the specific job to log (alternative to --exec-arn/-e)"}, + {'flag': ["-u", "--update-tsv"], + 'help': "update estimated cost in the metric tsv file on S3", + 'action': "store_true"}], 'cleanup': [{'flag': ["-g", "--usergroup"], 'help': "Tibanna usergroup that shares the permission to access buckets and run jobs"}, @@ -445,6 +452,10 @@ def cost(job_id, sfn=TIBANNA_DEFAULT_STEP_FUNCTION_NAME, update_tsv=False): """print out cost of a specific job""" print(API().cost(job_id=job_id, sfn=sfn, update_tsv=update_tsv)) +def cost_estimate(job_id, update_tsv=False): + """print out estimated cost of a specific job""" + print(API().cost_estimate(job_id=job_id, update_tsv=update_tsv)) + def cleanup(usergroup, suffix='', purge_history=False, do_not_remove_iam_group=False, do_not_ignore_errors=False, quiet=False): API().cleanup(user_group_name=usergroup, suffix=suffix, do_not_remove_iam_group=do_not_remove_iam_group, diff --git a/tibanna/_version.py b/tibanna/_version.py index f6ed90690..6bf0a5b8e 100755 --- a/tibanna/_version.py +++ b/tibanna/_version.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "1.0.5" +__version__ = "1.0.6" diff --git a/tibanna/awsem.py b/tibanna/awsem.py index b2a8cacfc..da43d58f6 100755 --- a/tibanna/awsem.py +++ b/tibanna/awsem.py @@ -289,7 +289,7 @@ def __init__(self, App=None, Input=None, Output=None, JOBID='', start_time=None, end_time=None, status=None, Log=None, total_input_size=None, total_output_size=None, total_tmp_size=None, # older postrunjsons don't have these fields - filesystem='', instance_id='', + filesystem='', instance_id='', instance_availablity_zone='', Metrics=None, strict=True): if strict: if App is None or Input is None or Output is None or not JOBID or start_time is None: @@ -300,6 +300,7 @@ def __init__(self, App=None, Input=None, Output=None, JOBID='', self.status = status self.filesystem = filesystem self.instance_id = instance_id + self.instance_availablity_zone = instance_availablity_zone self.total_input_size = total_input_size self.total_output_size = total_output_size self.total_tmp_size = total_tmp_size diff --git a/tibanna/check_task.py b/tibanna/check_task.py index 1cd339b88..cc25f9c91 100755 --- a/tibanna/check_task.py +++ b/tibanna/check_task.py @@ -10,6 +10,9 @@ does_key_exist, read_s3 ) +from .ec2_utils import ( + cost_estimate +) from .awsem import ( AwsemPostRunJson ) @@ -187,10 +190,12 @@ def add_postrun_json(self, prj, input_json, limit): def handle_metrics(self, prj): try: + estimate = cost_estimate(prj) resources = self.TibannaResource(prj.Job.instance_id, prj.Job.filesystem, prj.Job.start_time_as_str, - prj.Job.end_time_as_str or datetime.now()) + prj.Job.end_time_as_str or datetime.now(), + cost_estimate = estimate) except Exception as e: raise MetricRetrievalException("error getting metrics: %s" % str(e)) prj.Job.update(Metrics=resources.as_dict()) diff --git a/tibanna/core.py b/tibanna/core.py index 3f9967f90..45107c3b1 100755 --- a/tibanna/core.py +++ b/tibanna/core.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import os import boto3 +import botocore import json import time import copy @@ -44,7 +45,8 @@ ) from .ec2_utils import ( UnicornInput, - upload_workflow_to_s3 + upload_workflow_to_s3, + cost_estimate ) from .ami import AMI # from botocore.errorfactory import ExecutionAlreadyExists @@ -1038,7 +1040,8 @@ def plot_metrics(self, job_id, sfn=None, directory='.', open_browser=True, force self.TibannaResource.update_html(log_bucket, job_id + '.metrics/') else: try: - M = self.TibannaResource(instance_id, filesystem, starttime, endtime) + cost_estimate = self.cost_estimate(job_id=job_id) + M = self.TibannaResource(instance_id, filesystem, starttime, endtime, cost_estimate = cost_estimate) top_content = self.log(job_id=job_id, top=True) M.plot_metrics(instance_type, directory, top_content=top_content) except Exception as e: @@ -1053,6 +1056,49 @@ def plot_metrics(self, job_id, sfn=None, directory='.', open_browser=True, force if open_browser: webbrowser.open(METRICS_URL(log_bucket, job_id)) + def cost_estimate(self, job_id, update_tsv=False): + + # We return the real cost, if it is availble, but don't automatically update the Cost row in the tsv + precise_cost = self.cost(job_id, update_tsv=False) + if(precise_cost and precise_cost > 0.0): + return precise_cost + + postrunjsonstr = self.log(job_id=job_id, postrunjson=True) + if not postrunjsonstr: + logger.info("Cost estimation error: postrunjson not found") + return 0.0 + postrunjsonobj = json.loads(postrunjsonstr) + postrunjson = AwsemPostRunJson(**postrunjsonobj) + + # awsf_image was added in 1.0.0. We use that to get the correct ebs root type + ebs_root_type = 'gp3' if 'awsf_image' in postrunjsonobj['config'] else 'gp2' + + cost = cost_estimate(postrunjson, ebs_root_type) + + if update_tsv: + log_bucket = postrunjson.config.log_bucket + # reading from metrics_report.tsv + does_key_exist(log_bucket, job_id + '.metrics/metrics_report.tsv') + read_file = read_s3(log_bucket, os.path.join(job_id + '.metrics/', 'metrics_report.tsv')) + + write_file = "" + for row in read_file.splitlines(): + # Remove Estimated_Cost from file, since we want to update it + if("Estimated_Cost" not in row.split("\t")): + write_file = write_file + row + '\n' + + write_file = write_file + 'Estimated_Cost\t' + str(cost) + '\n' + + # writing + with open('metrics_report.tsv', 'w') as fo: + fo.write(write_file) + # upload new metrics_report.tsv + upload('metrics_report.tsv', log_bucket, job_id + '.metrics/') + os.remove('metrics_report.tsv') + + return cost + + def cost(self, job_id, sfn=None, update_tsv=False): if not sfn: sfn = self.default_stepfunction_name @@ -1072,24 +1118,37 @@ def reformat_time(t, delta): 'Granularity': 'DAILY', 'TimePeriod': {'Start': start_time, 'End': end_time}, - 'Metrics': ['BlendedCost']} - billingres = boto3.client('ce').get_cost_and_usage(**billing_args) + 'Metrics': ['BlendedCost'], + } + + try: + billingres = boto3.client('ce').get_cost_and_usage(**billing_args) + except botocore.exceptions.ClientError as e: + logger.warning("%s. Please try to deploy the latest version of Tibanna." % e) + return 0.0 + cost = sum([float(_['Total']['BlendedCost']['Amount']) for _ in billingres['ResultsByTime']]) if update_tsv: log_bucket = postrunjson.config.log_bucket # reading from metrics_report.tsv does_key_exist(log_bucket, job_id + '.metrics/metrics_report.tsv') read_file = read_s3(log_bucket, os.path.join(job_id + '.metrics/', 'metrics_report.tsv')) - if 'Cost' not in read_file: - write_file = read_file + 'Cost\t' + str(cost) + '\n' - # writing - with open('metrics_report.tsv', 'w') as fo: - fo.write(write_file) - # upload new metrics_report.tsv - upload('metrics_report.tsv', log_bucket, job_id + '.metrics/') - os.remove('metrics_report.tsv') - else: - logger.info("cost already in the tsv file. not updating") + + write_file = "" + for row in read_file.splitlines(): + # Remove Cost from file, since we want to update it + if("Cost" not in row.split("\t")): + write_file = write_file + row + '\n' + + write_file = write_file + 'Cost\t' + str(cost) + '\n' + + #writing + with open('metrics_report.tsv', 'w') as fo: + fo.write(write_file) + # upload new metrics_report.tsv + upload('metrics_report.tsv', log_bucket, job_id + '.metrics/') + os.remove('metrics_report.tsv') + return cost def does_dynamo_table_exist(self, tablename): diff --git a/tibanna/cw_utils.py b/tibanna/cw_utils.py index 02f2d08f1..deaf99499 100755 --- a/tibanna/cw_utils.py +++ b/tibanna/cw_utils.py @@ -28,7 +28,7 @@ class TibannaResource(object): def convert_timestamp_to_datetime(cls, timestamp): return datetime.strptime(timestamp, cls.timestamp_format) - def __init__(self, instance_id, filesystem, starttime, endtime=datetime.utcnow()): + def __init__(self, instance_id, filesystem, starttime, endtime=datetime.utcnow(), cost_estimate = 0.0): """All the Cloudwatch metrics are retrieved and stored at the initialization. :param instance_id: e.g. 'i-0167a6c2d25ce5822' :param filesystem: e.g. "/dev/xvdb", "/dev/nvme1n1" @@ -50,6 +50,7 @@ def __init__(self, instance_id, filesystem, starttime, endtime=datetime.utcnow() self.end = endtime.replace(microsecond=0) # initial endtime for the window requested self.nTimeChunks = nTimeChunks self.list_files = [] + self.cost_estimate = cost_estimate self.get_metrics(nTimeChunks) def get_metrics(self, nTimeChunks=1): @@ -119,6 +120,7 @@ def plot_metrics(self, instance_type, directory='.', top_content=''): 'max_disk_space_utilization_percent': (max_disk_space_utilization_percent_chunks_all_pts, 1), 'max_cpu_utilization_percent': (max_cpu_utilization_percent_chunks_all_pts, 5) } + self.list_files.extend(self.write_top_tsvs(directory, top_content)) self.list_files.append(self.write_tsv(directory, **input_dict)) self.list_files.append(self.write_metrics(instance_type, directory)) @@ -322,12 +324,14 @@ def max_ebs_read_used_all_pts(self): def write_html(self, instance_type, directory): self.check_mkdir(directory) filename = directory + '/' + 'metrics.html' + cost_estimate = '---' if self.cost_estimate == 0.0 else "{:.5f}".format(self.cost_estimate) with open(filename, 'w') as fo: fo.write(self.create_html() % (self.report_title, instance_type, str(self.max_mem_used_MB), str(self.min_mem_available_MB), str(self.max_disk_space_used_GB), str(self.max_mem_utilization_percent), str(self.max_cpu_utilization_percent), str(self.max_disk_space_utilization_percent), '---', # cost placeholder for now + cost_estimate, str(self.start), str(self.end), str(self.end - self.start) ) ) @@ -354,6 +358,8 @@ def update_html(cls, bucket, prefix, directory='.', upload_new=True): else: endtime = cls.convert_timestamp_to_datetime(d['Time_of_Request']) cost = d['Cost'] if 'Cost' in d else '---' + estimated_cost = (float)(d['Estimated_Cost']) if 'Estimated_Cost' in d else 0.0 + estimated_cost = str(estimated_cost) if estimated_cost > 0.0 else '---' instance = d['Instance_Type'] if 'Instance_Type' in d else '---' # writing with open(filename, 'w') as fo: @@ -361,6 +367,7 @@ def update_html(cls, bucket, prefix, directory='.', upload_new=True): d['Maximum_Memory_Used_Mb'], d['Minimum_Memory_Available_Mb'], d['Maximum_Disk_Used_Gb'], d['Maximum_Memory_Utilization'], d['Maximum_CPU_Utilization'], d['Maximum_Disk_Utilization'], cost, + estimated_cost, str(starttime), str(endtime), str(endtime-starttime) ) ) @@ -425,6 +432,7 @@ def write_metrics(self, instance_type, directory): fo.write('Start_Time' + '\t' + str(self.start) + '\n') fo.write('End_Time' + '\t' + str(self.end) + '\n') fo.write('Instance_Type' + '\t' + instance_type + '\n') + fo.write('Estimated_Cost' + '\t' + str(self.cost_estimate) + '\n') return(filename) @staticmethod @@ -615,6 +623,10 @@ def create_html(cls): Cost %s + + Cost (estimated) (USD) + %s +

diff --git a/tibanna/ec2_utils.py b/tibanna/ec2_utils.py index 7ded99f2d..918f32803 100755 --- a/tibanna/ec2_utils.py +++ b/tibanna/ec2_utils.py @@ -4,9 +4,11 @@ import os import logging import boto3 +import botocore import copy import re from . import create_logger +from datetime import datetime, timedelta from .utils import ( does_key_exist, create_jobid @@ -20,7 +22,8 @@ DYNAMODB_TABLE, DEFAULT_ROOT_EBS_SIZE, TIBANNA_AWSF_DIR, - DEFAULT_AWSF_IMAGE + DEFAULT_AWSF_IMAGE, + AWS_REGION_NAMES ) from .exceptions import ( MissingFieldInInputJsonException, @@ -30,7 +33,8 @@ EC2InstanceLimitWaitException, DependencyStillRunningException, DependencyFailedException, - UnsupportedCWLVersionException + UnsupportedCWLVersionException, + PricingRetrievalException ) from .base import SerializableObject from .nnested_array import flatten, run_on_nested_arrays1 @@ -38,6 +42,7 @@ from Benchmark import run as B from Benchmark.classes import get_instance_types, instance_list from Benchmark.byteformat import B2GB + NONSPOT_EC2_PARAM_LIST = ['TagSpecifications', 'InstanceInitiatedShutdownBehavior', 'MaxCount', 'MinCount', 'DisableApiTermination'] @@ -709,7 +714,7 @@ def launch_args(self): return largs def get_instance_info(self): - # get public IP for the instance (This may not happen immediately) + # get public IP and availablity zone for the instance (This may not happen immediately) try: ec2 = boto3.client('ec2') except Exception as e: @@ -721,12 +726,17 @@ def get_instance_info(self): instance_desc_log = ec2.describe_instances(InstanceIds=[self.instance_id]) if 'PublicIpAddress' not in instance_desc_log['Reservations'][0]['Instances'][0]: instance_ip = '' + availability_zone = '' break instance_ip = instance_desc_log['Reservations'][0]['Instances'][0]['PublicIpAddress'] + availability_zone = instance_desc_log['Reservations'][0]['Instances'][0]["Placement"]["AvailabilityZone"] break except: continue - return({'instance_id': self.instance_id, 'instance_ip': instance_ip, 'start_time': self.get_start_time()}) + return({'instance_id': self.instance_id, + 'instance_ip': instance_ip, + 'availability_zone' : availability_zone, + 'start_time': self.get_start_time()}) def check_dependency(self, exec_arn=None): if exec_arn: @@ -882,6 +892,324 @@ def create_cloudwatch_dashboard(self, dashboard_name): ) +def cost_estimate(postrunjson, ebs_root_type = "gp3", aws_price_overwrite = None): + """ + aws_price_overwrite can be used to overwrite the prices obtained from AWS (e.g. ec2 spot price). + This allows historical cost estimates. It is also used for testing. It is a dictionary with keys: + ec2_spot_price, ec2_ondemand_price, ebs_root_storage_price, ebs_gp3_iops_price, ebs_storage_price, + ebs_io1_iops_price, ebs_io2_iops_prices + """ + + cfg = postrunjson.config + job = postrunjson.Job + estimated_cost = 0.0 + + job_start = datetime.strptime(job.start_time, '%Y%m%d-%H:%M:%S-UTC') + job_end = datetime.strptime(job.end_time, '%Y%m%d-%H:%M:%S-UTC') + job_duration = (job_end - job_start).seconds / 3600.0 # in hours + + try: + pricing_client = boto3.client('pricing', region_name=AWS_REGION) + + # Get EC2 spot price + if(cfg.spot_instance): + if(cfg.spot_duration): + raise PricingRetrievalException("Pricing with spot_duration is not supported") + + if(not job.instance_availablity_zone): + raise PricingRetrievalException("Instance availability zone is not available. You might have to deploy a newer version of Tibanna.") + + ec2_client=boto3.client('ec2',region_name=AWS_REGION) + prices=ec2_client.describe_spot_price_history( + InstanceTypes=[cfg.instance_type], + ProductDescriptions=['Linux/UNIX'], + AvailabilityZone=job.instance_availablity_zone, + MaxResults=1) # Most recent price is on top + + if(len(prices['SpotPriceHistory']) == 0): + raise PricingRetrievalException("Spot price could not be retrieved") + + ec2_spot_price = (float)(prices['SpotPriceHistory'][0]['SpotPrice']) + + if((aws_price_overwrite is not None) and 'ec2_spot_price' in aws_price_overwrite): + ec2_spot_price = aws_price_overwrite['ec2_spot_price'] + + estimated_cost = estimated_cost + ec2_spot_price * job_duration + + else: # EC2 onDemand Prices + + prices = pricing_client.get_products(ServiceCode='AmazonEC2', Filters=[ + { + 'Type': 'TERM_MATCH', + 'Field': 'instanceType', + 'Value': cfg.instance_type + }, + { + 'Type': 'TERM_MATCH', + 'Field': 'operatingSystem', + 'Value': 'Linux' + }, + { + 'Type': 'TERM_MATCH', + 'Field': 'location', + 'Value': AWS_REGION_NAMES[AWS_REGION] + }, + { + 'Type': 'TERM_MATCH', + 'Field': 'preInstalledSw', + 'Value': 'NA' + }, + { + 'Type': 'TERM_MATCH', + 'Field': 'capacitystatus', + 'Value': 'used' + }, + { + 'Type': 'TERM_MATCH', + 'Field': 'tenancy', + 'Value': 'Shared' + }, + ]) + price_list = prices["PriceList"] + + if(not prices["PriceList"] or len(price_list) == 0): + raise PricingRetrievalException("We could not retrieve EC2 prices from Amazon") + + if(len(price_list) > 1): + raise PricingRetrievalException("EC2 prices are ambiguous") + + price_item = json.loads(price_list[0]) + terms = price_item["terms"] + term = list(terms["OnDemand"].values())[0] + price_dimension = list(term["priceDimensions"].values())[0] + ec2_ondemand_price = (float)(price_dimension['pricePerUnit']["USD"]) + + + if((aws_price_overwrite is not None) and 'ec2_ondemand_price' in aws_price_overwrite): + ec2_ondemand_price = aws_price_overwrite['ec2_ondemand_price'] + + estimated_cost = estimated_cost + ec2_ondemand_price * job_duration + + + # Get EBS pricing + + prices = pricing_client.get_products(ServiceCode='AmazonEC2', Filters=[ + { + 'Type': 'TERM_MATCH', + 'Field': 'location', + 'Value': AWS_REGION_NAMES[AWS_REGION] + }, + { + 'Field': 'volumeApiName', + 'Type': 'TERM_MATCH', + 'Value': ebs_root_type, + }, + { + 'Field': 'productFamily', + 'Type': 'TERM_MATCH', + 'Value': 'Storage', + }, + ]) + price_list = prices["PriceList"] + + if(not prices["PriceList"] or len(price_list) == 0): + raise PricingRetrievalException("We could not retrieve EBS prices from Amazon") + + if(len(price_list) > 1): + raise PricingRetrievalException("EBS prices are ambiguous") + + price_item = json.loads(price_list[0]) + terms = price_item["terms"] + term = list(terms["OnDemand"].values())[0] + price_dimension = list(term["priceDimensions"].values())[0] + ebs_root_storage_price = (float)(price_dimension['pricePerUnit']["USD"]) + + if((aws_price_overwrite is not None) and 'ebs_root_storage_price' in aws_price_overwrite): + ebs_root_storage_price = aws_price_overwrite['ebs_root_storage_price'] + + # add root EBS costs + root_ebs_cost = ebs_root_storage_price * cfg.root_ebs_size * job_duration / (24.0*30.0) + estimated_cost = estimated_cost + root_ebs_cost + + # add additional EBS costs + if(cfg.ebs_type == "gp3"): + ebs_storage_cost = ebs_root_storage_price * cfg.ebs_size * job_duration / (24.0*30.0) + estimated_cost = estimated_cost + ebs_storage_cost + + if(cfg.ebs_iops): + prices = pricing_client.get_products(ServiceCode='AmazonEC2', Filters=[ + { + 'Type': 'TERM_MATCH', + 'Field': 'location', + 'Value': AWS_REGION_NAMES[AWS_REGION] + }, + { + 'Field': 'volumeApiName', + 'Type': 'TERM_MATCH', + 'Value': cfg.ebs_type, + }, + { + 'Field': 'productFamily', + 'Type': 'TERM_MATCH', + 'Value': 'System Operation', + }, + ]) + price_list = prices["PriceList"] + + if(not prices["PriceList"] or len(price_list) == 0): + raise PricingRetrievalException("We could not retrieve EBS IOPS prices from Amazon") + if(len(price_list) > 1): + raise PricingRetrievalException("EBS IOPS prices are ambiguous") + + price_item = json.loads(price_list[0]) + terms = price_item["terms"] + term = list(terms["OnDemand"].values())[0] + price_dimension = list(term["priceDimensions"].values())[0] + ebs_gp3_iops_price = (float)(price_dimension['pricePerUnit']["USD"]) + + if((aws_price_overwrite is not None) and 'ebs_gp3_iops_price' in aws_price_overwrite): + ebs_gp3_iops_price = aws_price_overwrite['ebs_gp3_iops_price'] + + free_tier = 3000 + ebs_iops_cost = ebs_gp3_iops_price * max(cfg.ebs_iops - free_tier, 0) * job_duration / (24.0*30.0) + estimated_cost = estimated_cost + ebs_iops_cost + + else: + prices = pricing_client.get_products(ServiceCode='AmazonEC2', Filters=[ + { + 'Type': 'TERM_MATCH', + 'Field': 'location', + 'Value': AWS_REGION_NAMES[AWS_REGION] + }, + { + 'Field': 'volumeApiName', + 'Type': 'TERM_MATCH', + 'Value': cfg.ebs_type, + }, + { + 'Field': 'productFamily', + 'Type': 'TERM_MATCH', + 'Value': 'Storage', + }, + ]) + price_list = prices["PriceList"] + + if(not prices["PriceList"] or len(price_list) == 0): + raise PricingRetrievalException("We could not retrieve EBS prices from Amazon") + + if(len(price_list) > 1): + raise PricingRetrievalException("EBS prices are ambiguous") + + price_item = json.loads(price_list[0]) + terms = price_item["terms"] + term = list(terms["OnDemand"].values())[0] + price_dimension = list(term["priceDimensions"].values())[0] + ebs_storage_price = (float)(price_dimension['pricePerUnit']["USD"]) + + if((aws_price_overwrite is not None) and 'ebs_storage_price' in aws_price_overwrite): + ebs_storage_price = aws_price_overwrite['ebs_storage_price'] + + add_ebs_cost = ebs_storage_price * cfg.ebs_size * job_duration / (24.0*30.0) + estimated_cost = estimated_cost + add_ebs_cost + + # Add IOPS prices for io1 + if(cfg.ebs_type == "io1" and cfg.ebs_iops): + prices = pricing_client.get_products(ServiceCode='AmazonEC2', Filters=[ + { + 'Type': 'TERM_MATCH', + 'Field': 'location', + 'Value': AWS_REGION_NAMES[AWS_REGION] + }, + { + 'Field': 'volumeApiName', + 'Type': 'TERM_MATCH', + 'Value': cfg.ebs_type, + }, + { + 'Field': 'productFamily', + 'Type': 'TERM_MATCH', + 'Value': 'System Operation', + }, + ]) + price_list = prices["PriceList"] + + if(not prices["PriceList"] or len(price_list) == 0): + raise PricingRetrievalException("We could not retrieve EBS prices from Amazon") + if(len(price_list) > 1): + raise PricingRetrievalException("EBS prices are ambiguous") + + price_item = json.loads(price_list[0]) + terms = price_item["terms"] + term = list(terms["OnDemand"].values())[0] + price_dimension = list(term["priceDimensions"].values())[0] + ebs_io1_iops_price = (float)(price_dimension['pricePerUnit']["USD"]) + + if((aws_price_overwrite is not None) and 'ebs_io1_iops_price' in aws_price_overwrite): + ebs_io1_iops_price = aws_price_overwrite['ebs_io1_iops_price'] + + ebs_iops_cost = ebs_io1_iops_price * cfg.ebs_iops * job_duration / (24.0*30.0) + estimated_cost = estimated_cost + ebs_iops_cost + + elif (cfg.ebs_type == "io2" and cfg.ebs_iops): + prices = pricing_client.get_products(ServiceCode='AmazonEC2', Filters=[ + { + 'Type': 'TERM_MATCH', + 'Field': 'location', + 'Value': AWS_REGION_NAMES[AWS_REGION] + }, + { + 'Field': 'volumeApiName', + 'Type': 'TERM_MATCH', + 'Value': cfg.ebs_type, + }, + { + 'Field': 'productFamily', + 'Type': 'TERM_MATCH', + 'Value': 'System Operation', + }, + ]) + price_list = prices["PriceList"] + + if(len(price_list) != 3): + raise PricingRetrievalException("EBS prices for io2 are incomplete") + + ebs_io2_iops_prices = [] + for price_entry in price_list: + price_item = json.loads(price_entry) + terms = price_item["terms"] + term = list(terms["OnDemand"].values())[0] + price_dimension = list(term["priceDimensions"].values())[0] + ebs_iops_price = (float)(price_dimension['pricePerUnit']["USD"]) + ebs_io2_iops_prices.append(ebs_iops_price) + ebs_io2_iops_prices.sort(reverse=True) + + if((aws_price_overwrite is not None) and 'ebs_io2_iops_prices' in aws_price_overwrite): + ebs_io2_iops_prices = aws_price_overwrite['ebs_io2_iops_prices'] + + # Pricing tiers are currently hardcoded. There wasn't a simple way to extract them from the pricing information + tier0 = 32000 + tier1 = 64000 + + ebs_iops_cost = ( + ebs_io2_iops_prices[0] * min(cfg.ebs_iops, tier0) + # Portion below 32000 IOPS + ebs_io2_iops_prices[1] * min(max(cfg.ebs_iops - tier0, 0), tier1 - tier0) + # Portion between 32001 and 64000 IOPS + ebs_io2_iops_prices[2] * max(cfg.ebs_iops - tier1, 0) # Portion above 64000 IOPS + ) * job_duration / (24.0*30.0) + estimated_cost = estimated_cost + ebs_iops_cost + + return estimated_cost + + except botocore.exceptions.ClientError as e: + logger.warning("Cost estimation error: %s. Please try to deploy the latest version of Tibanna." % e) + return 0.0 + except PricingRetrievalException as e: + logger.warning("Cost estimation error: %s" % e) + return 0.0 + except Exception as e: + logger.warning("Cost estimation error: %s" % e) + return 0.0 + + def upload_workflow_to_s3(unicorn_input): """input is a UnicornInput object""" args = unicorn_input.args diff --git a/tibanna/exceptions.py b/tibanna/exceptions.py index 9cc8e86fc..74dccac48 100755 --- a/tibanna/exceptions.py +++ b/tibanna/exceptions.py @@ -129,6 +129,10 @@ class MetricRetrievalException(Exception): pass +class PricingRetrievalException(Exception): + pass + + class UnsupportedCWLVersionException(Exception): def __init__(self, message=None): if not message: diff --git a/tibanna/iam_utils.py b/tibanna/iam_utils.py index bed2a9454..e1d01f640 100755 --- a/tibanna/iam_utils.py +++ b/tibanna/iam_utils.py @@ -67,7 +67,7 @@ def iam_group_name(self): def policy_types(self): return ['bucket', 'termination', 'list', 'cloudwatch', 'passrole', 'lambdainvoke', 'cloudwatch_metric', 'cw_dashboard', 'dynamodb', 'ec2_desc', - 'executions'] + 'executions', 'pricing'] def policy_arn(self, policy_type): return 'arn:aws:iam::' + self.account_id + ':policy/' + self.policy_name(policy_type) @@ -83,6 +83,7 @@ def policy_suffix(self, policy_type): 'cw_dashboard': 'cw_dashboard', 'dynamodb': 'dynamodb', 'ec2_desc': 'ec2_desc', + 'pricing': 'pricing', 'executions': 'executions'} if policy_type not in suffices: raise Exception("policy %s must be one of %s." % (policy_type, str(self.policy_types))) @@ -102,6 +103,7 @@ def policy_definition(self, policy_type): 'cw_dashboard': self.policy_cw_dashboard, 'dynamodb': self.policy_dynamodb, 'ec2_desc': self.policy_ec2_desc_policy, + 'pricing': self.policy_pricing, 'executions': self.policy_executions} if policy_type not in definitions: raise Exception("policy %s must be one of %s." % (policy_type, str(self.policy_types))) @@ -134,7 +136,7 @@ def policy_arn_list_for_role(self, role_type): run_task_custom_policy_types = ['list', 'cloudwatch', 'passrole', 'bucket', 'dynamodb', 'executions', 'cw_dashboard'] check_task_custom_policy_types = ['cloudwatch_metric', 'cloudwatch', 'bucket', 'ec2_desc', - 'termination', 'dynamodb'] + 'termination', 'dynamodb', 'pricing'] arnlist = {'ec2': [self.policy_arn(_) for _ in ['bucket', 'cloudwatch_metric']] + ['arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly'], # 'stepfunction': [self.policy_arn(_) for _ in ['lambdainvoke']], @@ -363,7 +365,27 @@ def policy_ec2_desc_policy(self): "Effect": "Allow", "Action": [ "ec2:DescribeInstances", - "ec2:DescribeInstanceStatus" + "ec2:DescribeInstanceStatus", + "ec2:DescribeSpotPriceHistory" + ], + "Resource": "*" + } + ] + } + return policy + + @property + def policy_pricing(self): + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "pricing:DescribeServices", + "pricing:GetAttributeValues", + "pricing:GetProducts", + "ce:GetCostAndUsage" ], "Resource": "*" } @@ -449,8 +471,7 @@ def create_user_group(self, verbose=False): ) if verbose: logger.debug("response from IAM attach_policy :" + str(response)) - custom_policy_types = ['bucket', 'ec2_desc', 'cloudwatch_metric', 'dynamodb', - 'termination', 'executions'] + custom_policy_types = ['bucket', 'ec2_desc', 'cloudwatch_metric', 'dynamodb', 'termination', 'pricing', 'executions'] for pn in [self.policy_name(pt) for pt in custom_policy_types]: response = group.attach_policy( PolicyArn='arn:aws:iam::' + self.account_id + ':policy/' + pn diff --git a/tibanna/vars.py b/tibanna/vars.py index 98507e2c5..1031213ed 100755 --- a/tibanna/vars.py +++ b/tibanna/vars.py @@ -63,6 +63,27 @@ logger.warning("Public Tibanna AMI for region %s is not available." % AWS_REGION) AMI_ID = AMI_PER_REGION.get(AWS_REGION, '') +AWS_REGION_NAMES = { + 'us-east-1': 'US East (N. Virginia)', + 'us-east-2': 'US East (Ohio)', + 'us-west-1': 'US West (N. California)', + 'us-west-2': 'US West (Oregon)', + 'ca-central-1': 'Canada (Central)', + 'eu-north-1': 'EU (Stockholm)', + 'eu-west-1': 'EU (Ireland)', + 'eu-central-1': 'EU (Frankfurt)', + 'eu-west-2': 'EU (London)', + 'eu-west-3': 'EU (Paris)', + 'ap-northeast-1': 'Asia Pacific (Tokyo)', + 'ap-northeast-2': 'Asia Pacific (Seoul)', + 'ap-northeast-3': 'Asia Pacific (Osaka-Local)', + 'ap-southeast-1': 'Asia Pacific (Singapore)', + 'ap-southeast-2': 'Asia Pacific (Sydney)', + 'ap-south-1': 'Asia Pacific (Mumbai)', + 'sa-east-1': 'South America (Sao Paulo)', # intentionally no unicode, + 'us-gov-west-1': 'AWS GovCloud (US)', + 'us-gov-east-1': 'AWS GovCloud (US-East)' +} # Tibanna repo from which awsf scripts are pulled TIBANNA_REPO_NAME = os.environ.get('TIBANNA_REPO_NAME', '4dn-dcic/tibanna')