Add cost estimation (#315)

* Add cost estimation * Add cost estimation to metrics file after run * Fix type and formatting * Add CostAndUsage policy * Make tsv updateable * Update cost estimate when updating costs in tsv * Support cost estimates for io1 and io2 * Fix typo * Add gp3 iops pricing, remove sfn argument * Add to docs, ore useful error messages * Version bump * Get correct ebs root type * Remove version package * Add tests * Add option overwrite_cost_estimate_in_tsv * Simplfy tsv update when calling cost and cost_estimate
4dn-dcic · Mar 2, 2021 · 0592481 · 0592481
1 parent 31e47f2
commit 0592481
Show file tree

Hide file tree

Showing 22 changed files with 1,214 additions and 37 deletions.
diff --git a/awsf3-docker/run.sh b/awsf3-docker/run.sh
@@ -44,9 +44,12 @@ export TOPFILE=$LOCAL_OUTDIR/$JOBID.top  # now top command output goes to a sepa
 export TOPLATESTFILE=$LOCAL_OUTDIR/$JOBID.top_latest  # this one includes only the latest top command output
 export INSTANCE_ID=$(ec2metadata --instance-id|cut -d' ' -f2)
 export INSTANCE_REGION=$(ec2metadata --availability-zone | sed 's/[a-z]$//')
+export INSTANCE_AVAILABILITY_ZONE=$(ec2metadata --availability-zone)
 export AWS_ACCOUNT_ID=$(aws sts get-caller-identity| grep Account | sed 's/[^0-9]//g')
 export AWS_REGION=$INSTANCE_REGION  # this is for importing awsf3 package which imports tibanna package
 
+
+
 # function that executes a command and collecting log
 exl(){ $@ >> $LOGFILE 2>> $LOGFILE; handle_error $?; } ## usage: exl command  ## ERRCODE has the error code for the command. if something is wrong, send error to s3.
 exlj(){ $@ >> $LOGJSONFILE 2>> $LOGFILE; $ERRCODE=$?; cat $LOGJSONFILE >> $LOGFILE; handle_error $ERRCODE; } ## usage: exl command  ## ERRCODE has the error code for the command. if something is wrong, send error to s3. This one separates stdout to json as well.

diff --git a/awsf3/utils.py b/awsf3/utils.py
@@ -325,13 +325,14 @@ def write_postrun_json(jsonfile, prj):
 
 
 def update_postrun_json_init(json_old, json_new):
-    """Update postrun json with just instance ID and filesystem"""
+    """Update postrun json with just instance ID, filesystem and availability zone"""
     # read old json file
     prj = read_postrun_json(json_old)
 
-    # simply add instance ID and file system
+    # simply add instance ID, file system and availability zone
     prj.Job.instance_id = os.getenv('INSTANCE_ID')
     prj.Job.filesystem = os.getenv('EBS_DEVICE')
+    prj.Job.instance_availablity_zone = os.getenv('INSTANCE_AVAILABILITY_ZONE')
 
     # write to new json file
     write_postrun_json(json_new, prj)
@@ -441,6 +442,7 @@ def postrun_json_final(prj, logfile=None):
     prj_job.update(total_output_size=os.getenv('OUTPUTSIZE'))
 
 
+
 def upload_postrun_json(jsonfile):
     prj = read_postrun_json(jsonfile)
     bucket = prj.Job.Log.log_bucket_directory

diff --git a/docs/api.rst b/docs/api.rst
@@ -562,6 +562,23 @@ To retrieve the cost and update the metrics report file created with plot_metric
                                     variable is not set, it uses name 'tibanna_pony' (4dn
                                     default, works only for 4dn).
 
- update_tsv                         This flag specify to update with cost the tsv file that
+ update_tsv                         This flag specifies wether to update the cost in the tsv file that
+                                    stores metrics information on the S3 bucket
+
+
+cost_estimate
+----
+
+Retrieve a cost estimate for a specific job. This will be available as soon as the job finished. This function will return the exact cost, if it is available
+
+::
+
+ API().cost_estimate(job_id=<jobid>, ...)
+
+**Options**
+
+::
+
+ update_tsv                         This flag specifies wether to update the cost in the tsv file that
                                     stores metrics information on the S3 bucket
 
diff --git a/docs/commands.rst b/docs/commands.rst
@@ -547,7 +547,8 @@ To collect, save and visualize the resources metrics from Cloud Watch
 cost
 ----
 
-To retrieve the cost and update the metrics report file created with plot_metrics
+To retrieve the cost and update the metrics report file created with plot_metrics. The cost is typically available 24 hours after the job finished.
+This function is available to non-admin users from version 1.0.6.
 
 ::
 
@@ -567,4 +568,21 @@ To retrieve the cost and update the metrics report file created with plot_metric
  -u|--update-tsv                     Update with the cost the tsv file that stores metrics
                                      information on the S3 bucket
 
+cost_estimate
+----
+
+To retrieve cost estimates and update the metrics report file created with plot_metrics. In contrast to the exact costs, the estimated costs are available immediately after the job has completed.
+This function requires a (deployed) Tibanna version >=1.0.6.
+
+::
+
+ tibanna cost_estimate --job-id=<jobid> [<options>]
+
+**Options**
+
+::
+
+ -u|--update-tsv                     Update with the cost the tsv file that stores metrics
+                                     information on the S3 bucket
+
 
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -356,18 +356,18 @@ You will see the list of users.
     monty	
 
 
-This command will print out the list of users.
+The following command will add a user to a specific user group.
 
 ::
 
-    tibanna add_users --user=<user> --group=<usergroup>
+    tibanna add_user --user=<user> --usergroup=<usergroup>
 
 
 For example, if you have a user named ``monty`` and you want to give permission to this user to user Tibanna ``lalala``. This will give this user permission to run and monitor the workflow, access the buckets that Tibanna usergroup ``lalala``  was given access to through ``tibanna deploy_unicorn --buckets=<b1>,<b2>,...``
 
 ::
 
-    tibanna add_users --user=monty --group=lalala
+    tibanna add_user --user=monty --usergroup=lalala
 
 
 Check users again.

diff --git a/docs/monitoring.rst b/docs/monitoring.rst
@@ -472,7 +472,7 @@ By default the command will retrieve the data from cloud watch, and creates seve
 All the files are eventually uploaded to a folder named ``<jobid>.metrics`` inside the log S3 bucket specified for tibanna output.
 To visualize the html report the URL structure is: ``https://<log-bucket>.s3.amazonaws.com/<jobid>.metrics/metrics.html``
 
-Starting with ``1.0.0``, the metrics plot will include per-process CPU and memory profiles retrived from the top command reports at a 1-minute interval. Additional files `top_cpu.tsv` and `top_mem.tsv` will also be created under the same folder ``<jobid>.metrics``.
+Starting with ``1.0.0``, the metrics plot will include per-process CPU and memory profiles retrieved from the top command reports at a 1-minute interval. Additional files `top_cpu.tsv` and `top_mem.tsv` will also be created under the same folder ``<jobid>.metrics``.
 
 
 

diff --git a/test_json/unicorn/medium_nonspot.postrun.json b/test_json/unicorn/medium_nonspot.postrun.json
@@ -0,0 +1,119 @@
+{
+    "Job": {
+        "App": {
+            "App_name": "",
+            "App_version": "",
+            "language": "cwl_v1",
+            "cwl_url": "https://raw.githubusercontent.com/alexander-veit/tibanna-workflows/main",
+            "main_cwl": "md5.cwl",
+            "other_cwl_files": ""
+        },
+        "Input": {
+            "Input_files_data": {
+                "gzfile": {
+                    "path": "input/alg1.png.gz",
+                    "profile": "",
+                    "rename": "",
+                    "unzip": "",
+                    "mount": "",
+                    "class": "File",
+                    "dir": "aveit-tibanna-test"
+                }
+            },
+            "Secondary_files_data": {},
+            "Input_parameters": {},
+            "Env": {}
+        },
+        "Output": {
+            "output_bucket_directory": "aveit-tibanna-test",
+            "output_target": {
+                "report": "output/my_first_md5_report"
+            },
+            "secondary_output_target": {},
+            "alt_cond_output_argnames": {},
+            "Output files": {
+                "report": {
+                    "path": "/data1/out/report",
+                    "target": "output/my_first_md5_report",
+                    "basename": "report",
+                    "checksum": "sha1$d237002c1956532ffb9117972f3a98cd1344df56",
+                    "location": "file:///data1/out/report",
+                    "md5sum": "45ad098a40814f1e7792b4e17b5f74d0",
+                    "size": 66,
+                    "class": "File"
+                }
+            }
+        },
+        "start_time": "20210301-16:58:05-UTC",
+        "JOBID": "NgN7IOBXjgRH",
+        "Log": {
+            "log_bucket_directory": "aveit-tibanna-test-log"
+        },
+        "end_time": "20210301-17:03:58-UTC",
+        "status": "0",
+        "filesystem": "/dev/nvme1n1",
+        "instance_id": "i-01769a822e5dbb407",
+        "instance_availablity_zone": "us-east-1b",
+        "total_input_size": "12K",
+        "total_output_size": "36K",
+        "total_tmp_size": "20K",
+        "Metrics": {
+            "cost_estimate": 0.004384172839506173,
+            "max_mem_used_MB": 713.4609375,
+            "min_mem_available_MB": 3163.0390625,
+            "total_mem_MB": 3876.5,
+            "max_mem_utilization_percent": 18.404770733909455,
+            "max_cpu_utilization_percent": 69.0,
+            "max_disk_space_utilization_percent": 1.0498987095511,
+            "max_disk_space_used_GB": 0.205371856689453,
+            "max_ebs_read_bytes": 69936332.8
+        }
+    },
+    "config": {
+        "EBS_optimized": true,
+        "ami_id": "ami-0a7ddfc7e412ab6e0",
+        "availability_zone": "",
+        "awsf_image": "aveit/tibanna-awsf:1.0.3.dev4",
+        "behavior_on_capacity_limit": "fail",
+        "cloudwatch_dashboard": false,
+        "cpu": "",
+        "ebs_iops": "",
+        "ebs_size": 20,
+        "ebs_type": "gp3",
+        "instance_type": "t3.medium",
+        "job_tag": "",
+        "json_bucket": "aveit-tibanna-test-log",
+        "key_name": "",
+        "language": "cwl_v1",
+        "log_bucket": "aveit-tibanna-test-log",
+        "mem": 0,
+        "password": "",
+        "public_postrun_json": false,
+        "root_ebs_size": 8,
+        "run_name": "t3_medium_nonspot",
+        "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf3/",
+        "security_group": "",
+        "shutdown_min": "now",
+        "spot_duration": "",
+        "spot_instance": false,
+        "subnet": ""
+    },
+    "commands": [
+        [
+            "docker",
+            "    run",
+            "    -i",
+            "    --mount=type=bind,source=/mnt/data1/tmp4jntok73,target=/OVwPJQ",
+            "    --mount=type=bind,source=/mnt/data1/tmp3s4rl0wc,target=/tmp",
+            "    --mount=type=bind,source=/data1/input/input/alg1.png.gz,target=/var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz,readonly",
+            "    --workdir=/OVwPJQ",
+            "    --rm",
+            "    --env=TMPDIR=/tmp",
+            "    --env=HOME=/OVwPJQ",
+            "    --cidfile=/data1/tmpvfaxztjn/20210301170033-764000.cid",
+            "    aveit/md5:v2",
+            "    run.sh",
+            "    /var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz"
+        ]
+    ]
+}
diff --git a/test_json/unicorn/no_availability_zone.postrun.json b/test_json/unicorn/no_availability_zone.postrun.json
@@ -0,0 +1,118 @@
+{
+    "Job": {
+        "App": {
+            "App_name": "",
+            "App_version": "",
+            "language": "cwl_v1",
+            "cwl_url": "https://raw.githubusercontent.com/alexander-veit/tibanna-workflows/main",
+            "main_cwl": "md5.cwl",
+            "other_cwl_files": ""
+        },
+        "Input": {
+            "Input_files_data": {
+                "gzfile": {
+                    "path": "input/alg1.png.gz",
+                    "profile": "",
+                    "rename": "",
+                    "unzip": "",
+                    "mount": "",
+                    "class": "File",
+                    "dir": "aveit-tibanna-test"
+                }
+            },
+            "Secondary_files_data": {},
+            "Input_parameters": {},
+            "Env": {}
+        },
+        "Output": {
+            "output_bucket_directory": "aveit-tibanna-test",
+            "output_target": {
+                "report": "output/my_first_md5_report"
+            },
+            "secondary_output_target": {},
+            "alt_cond_output_argnames": {},
+            "Output files": {
+                "report": {
+                    "path": "/data1/out/report",
+                    "target": "output/my_first_md5_report",
+                    "basename": "report",
+                    "checksum": "sha1$d237002c1956532ffb9117972f3a98cd1344df56",
+                    "location": "file:///data1/out/report",
+                    "md5sum": "45ad098a40814f1e7792b4e17b5f74d0",
+                    "size": 66,
+                    "class": "File"
+                }
+            }
+        },
+        "start_time": "20210301-16:58:05-UTC",
+        "JOBID": "NgN7IOBXjgRH",
+        "Log": {
+            "log_bucket_directory": "aveit-tibanna-test-log"
+        },
+        "end_time": "20210301-17:03:58-UTC",
+        "status": "0",
+        "filesystem": "/dev/nvme1n1",
+        "instance_id": "i-01769a822e5dbb407",
+        "total_input_size": "12K",
+        "total_output_size": "36K",
+        "total_tmp_size": "20K",
+        "Metrics": {
+            "cost_estimate": 0.004384172839506173,
+            "max_mem_used_MB": 713.4609375,
+            "min_mem_available_MB": 3163.0390625,
+            "total_mem_MB": 3876.5,
+            "max_mem_utilization_percent": 18.404770733909455,
+            "max_cpu_utilization_percent": 69.0,
+            "max_disk_space_utilization_percent": 1.0498987095511,
+            "max_disk_space_used_GB": 0.205371856689453,
+            "max_ebs_read_bytes": 69936332.8
+        }
+    },
+    "config": {
+        "EBS_optimized": true,
+        "ami_id": "ami-0a7ddfc7e412ab6e0",
+        "availability_zone": "",
+        "awsf_image": "aveit/tibanna-awsf:1.0.3.dev4",
+        "behavior_on_capacity_limit": "fail",
+        "cloudwatch_dashboard": false,
+        "cpu": "",
+        "ebs_iops": "",
+        "ebs_size": 20,
+        "ebs_type": "gp3",
+        "instance_type": "t3.medium",
+        "job_tag": "",
+        "json_bucket": "aveit-tibanna-test-log",
+        "key_name": "",
+        "language": "cwl_v1",
+        "log_bucket": "aveit-tibanna-test-log",
+        "mem": 0,
+        "password": "",
+        "public_postrun_json": false,
+        "root_ebs_size": 8,
+        "run_name": "t3_medium_nonspot",
+        "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf3/",
+        "security_group": "",
+        "shutdown_min": "now",
+        "spot_duration": "",
+        "spot_instance": true,
+        "subnet": ""
+    },
+    "commands": [
+        [
+            "docker",
+            "    run",
+            "    -i",
+            "    --mount=type=bind,source=/mnt/data1/tmp4jntok73,target=/OVwPJQ",
+            "    --mount=type=bind,source=/mnt/data1/tmp3s4rl0wc,target=/tmp",
+            "    --mount=type=bind,source=/data1/input/input/alg1.png.gz,target=/var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz,readonly",
+            "    --workdir=/OVwPJQ",
+            "    --rm",
+            "    --env=TMPDIR=/tmp",
+            "    --env=HOME=/OVwPJQ",
+            "    --cidfile=/data1/tmpvfaxztjn/20210301170033-764000.cid",
+            "    aveit/md5:v2",
+            "    run.sh",
+            "    /var/lib/cwl/stg59f7da6f-1d44-4615-abd3-b46c2ceefcf1/alg1.png.gz"
+        ]
+    ]
+}