Skip to content

Commit

Permalink
Merge pull request #50 from mskcc/develop
Browse files Browse the repository at this point in the history
Version 0.3.0 release
  • Loading branch information
nikhil authored Sep 13, 2022
2 parents 7eef22e + 55b76bc commit 80a428e
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 24 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ To access other endpoints, export the environment variable `BEAGLE_ENDPOINT`.
beaglecli file-group list [--page-size=<page_size>]
beaglecli etl delete --job-id=<job_id>...
beaglecli run list [--page-size=<page_size>] [--request-id=<request_id>]... [--tags=<tags>]... [--job-groups=<job_groups>]... [--jira-ids=<jira_ids>]...
beaglecli run latest-info [--request-id=<request_id>] [--job-group=<job_group>] [--jira-id=<jira_id>] [--output-file=<output_file>] [--completed]
beaglecli run latest-info [--request-id=<request_id | request_ids.csv>] [--job-group=<job_group>] [--jira-id=<jira_id>] [--output-file=<output_file>] [--completed] [--page-size=<page_size>] [--metadata-only] [--max-pages]
beaglecli run get <run_id>
beaglecli run submit-request --pipeline=<pipeline> [--request-ids=<request_ids>] [--job-group-id=<job_group_id>] [--for-each=<True or False>]
beaglecli run submit-runs --pipelines=<pipeline>... --versions=<versions>...[--run-file=<run_file>] [--run-ids=<run_ids>]... [--job-group-id=<job_group_id>] [--for-each=<True or False>]
Expand Down Expand Up @@ -65,6 +65,15 @@ To access other endpoints, export the environment variable `BEAGLE_ENDPOINT`.
```
beaglecli run submit-request --pipeline=argos --request-ids=ABCDE_1,ABCDE_2 --job-group-id=FGHIJK-LMNOP-QRSTUV-WXY --job-group-id=FGHIJK-LMNOP-QRSTUV-WXYZ
```
- Return only the output metadata info for a given request id from the run api
```
run latest-info --request-id 10101_A --completed --output-metadata-only --max-pages
```
- Return only the output metadata info for multiple request ids
```
beaglecli run latest-info --request-id requests.txt --completed --output-metadata-only --max-pages
```
Note: Use `requests.txt` as a template for providing a multiple request ids

#### Troubleshooting

Expand Down
152 changes: 130 additions & 22 deletions beaglecli
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ $ beaglecli files list --metadata=igoRequestId:09324_C
"""
from cmath import sin
from ensurepip import version
import os
import sys
from urllib import response
Expand All @@ -25,6 +26,7 @@ from os.path import expanduser
from urllib.parse import urljoin
from datetime import datetime
import traceback
import csv

from apps.access import access_commands
from apps.cmoch import cmoch_commands
Expand All @@ -45,10 +47,10 @@ CONFIG_TEMPLATE = {


API = {
"auth": "api-token-auth/",
"verify": "api-token-verify/",
"refresh": "api-token-refresh/",
"storage": "v0/fs/storage/",
"auth": 'api-token-auth/',
"verify": 'api-token-verify/',
"refresh": 'api-token-refresh/',
"storage": 'v0/fs/storage/',
"file-types": 'v0/fs/file-types/',
"pipelines": 'v0/run/pipelines/',
"files": '/v0/fs/files/',
Expand All @@ -70,7 +72,7 @@ Usage:
beaglecli files create <file_path> <file_type> <file_group_id> [--metadata-path=<metadata_path>] [--size=<size>]
beaglecli files update <file_id> [--file-path=<file_path>] [--file-type=<file_type>] [--file-group=<file_group_id>] [--metadata-path=<metadata_path>] [--size=<size>]
beaglecli files patch <file_id> [--file-path=<file_path>] [--file-type=<file_type>] [--file-group=<file_group_id>] [--metadata=<metadata>]... [--size=<size>]
beaglecli files list [--page-size=<page_size>] [--path=<path>]... [--metadata=<metadata>]... [--file-group=<file_group>]... [--file-name=<file_name>]... [--filename-regex=<filename_regex>]
beaglecli files list [--page-size=<page_size>] [--path=<path>]... [--metadata=<metadata>]... [--file-group=<file_group>]... [--file-name=<file_name>]... [--filename-regex=<filename_regex>] [--file-type=<file_type>]...
beaglecli files delete --file-id=<file_id>...
beaglecli sample create <sample-id>
beaglecli sample list [--sample-id=<sample-id>]
Expand All @@ -82,8 +84,8 @@ Usage:
beaglecli file-group create <file_group_name> <storage>
beaglecli file-group list [--page-size=<page_size>]
beaglecli etl delete --job-id=<job_id>...
beaglecli run list [--page-size=<page_size>] [--request-id=<request_id>]... [--tags=<tags>]... [--job-groups=<job_groups>]... [--jira-ids=<jira_ids>]...
beaglecli run latest-info [--request-id=<request_id>] [--job-group=<job_group>] [--jira-id=<jira_id>] [--output-file=<output_file>] [--completed]
beaglecli run list [--page-size=<page_size>] [--request-id=<request_id>]... [--tags=<tags>]... [--apps="apps"]... [--job-groups=<job_groups>]... [--jira-ids=<jira_ids>]...
beaglecli run latest-info [--request-id=<request_id | request_ids.csv> ] [--job-group=<job_group>] [--apps="apps"]... [--jira-id=<jira_id>] [--output-file=<output_file>] [--completed][--page-size=<page_size>] [--output-metadata-only] [--max-pages]
beaglecli run get <run_id>
beaglecli run submit-request --pipeline=<pipeline> [--request-ids=<request_ids>] [--job-group-id=<job_group_id>] [--for-each=<True or False>]
beaglecli run submit-runs --pipelines=<pipeline>... --versions=<versions>...[--run-file=<run_file>] [--run-ids=<run_ids>]... [--job-group-id=<job_group_id>] [--for-each=<True or False>]
Expand Down Expand Up @@ -299,6 +301,7 @@ def _check_is_authenticated(config):
'refresh': config.refresh})
if response.status_code == 200:
config.set('token', response.json()['access'])
config.set('refresh', response.json()['refresh'])
return True
return False

Expand Down Expand Up @@ -347,29 +350,119 @@ def _get_latest_runs(run_dict):
return run_list


def _get_apps_dict():
url = urljoin(BEAGLE_ENDPOINT, API['pipelines'])
params = dict()
params['page_size'] = 1000000
response = requests.get(url, headers={
'Authorization': 'Bearer %s' % config.token, 'Content-Type': 'application/json'}, params=params)
app_dict = {}
if response.ok:
response_json = response.json()
if "results" in response_json:
result_list = response_json["results"]
name_set = set()
duplicate_set = set()
for single_pipeline in result_list:
name = single_pipeline["name"]
if name in name_set:
duplicate_set.add(name)
name_set.add(name)
for single_pipeline in result_list:
name = single_pipeline["name"]
version = single_pipeline["version"]
id = single_pipeline["id"]
key_name = ""
if name in duplicate_set:
key_name = str(name) + ":"+str(version)
else:
key_name = str(name)
app_dict[key_name] = id
return app_dict
else:
print("Error: beagle returned an empty")
exit(1)
else:
print("ERROR: Could not retrieve app list")
exit(1)


def _get_app_uuid(app_names):
app_dict = _get_apps_dict()
keys = app_dict.keys()
if not keys:
print("Error: Could not retrieve pipeline info")
exit(1)
uuid_list = []
for single_name in app_names:
if single_name not in keys:
matches = (
single_key for single_key in keys if single_name in single_key)
print("Could not find the app " + str(single_name) + " in beagle")
if matches:
print("Here are possible matches:")
print(list(matches))
exit(1)
else:
uuid_list.append(app_dict[single_name])
return uuid_list


def _get_request_Id(run_data):
return run_data.get("tags", {}).get("igoRequestId", "None")


def _get_latest_run_info_command(arguments, config):
# getting params
job_group = arguments.get('--job-group')
jira_id = arguments.get('--jira-id')
apps = arguments.get('--apps')
requestId = arguments.get('--request-id')
completed = arguments.get('--completed')
output_file = arguments.get('--output-file')
page_size = arguments.get('--page-size')
metadata_only = arguments.get('--output-metadata-only')
max_pages = arguments.get('--max-pages')
info_keys = ['id', 'status', 'name', 'tags', 'message', 'app',
'operator_run', 'created_date', 'finished_date', 'execution_id']
'operator_run', 'created_date', 'finished_date', 'execution_id', 'output_metadata']
file_keys = ['name', 'status', 'tags', 'message', 'id', 'execution_id']
params = dict()

# setting url
url = urljoin(BEAGLE_ENDPOINT, API['run'])

# setting / adjusting parameters
params['page_size'] = 1000
params['full'] = True
# open csv
if requestId:
params['requestId'] = requestId
if requestId[0].endswith(".txt"):
with open(requestId[0], newline='') as f:
reader = csv.reader(f, skipinitialspace=True)
requestIdList = []
# iterate over individual requests
for r in reader:
if r:
requestIdList.append(r[0])
params['request_ids'] = requestIdList
else:
params['request_ids'] = requestId
if apps:
uuid_list = _get_app_uuid(apps)
params['apps'] = uuid_list
if job_group:
params['job_groups'] = job_group
if jira_id:
params['jira_id'] = jira_id
params['page_size'] = 1000000
params['full'] = True
if page_size:
params['page_size'] = page_size
if max_pages and not page_size:
count_params = {'request_ids': params['request_ids'], 'count': True}
params['page_size'] = requests.get(url,
headers={'Authorization': 'Bearer %s' % config.token}, params=count_params)
if completed:
params['status'] = "COMPLETED"
# params['values_run'] = ['id', 'status', 'name', 'tags', 'message',
# 'app', 'operator_run', 'created_date', 'finished_date']
url = urljoin(BEAGLE_ENDPOINT, API['run'])

response = requests.get(url,
headers={'Authorization': 'Bearer %s' % config.token}, params=params)
response_json = response.json()
Expand All @@ -380,17 +473,26 @@ def _get_latest_run_info_command(arguments, config):
if single_key in single_run:
run_data[single_key] = single_run[single_key]
app_name = run_data['app']
request_id = _get_request_Id(run_data)
operator_run = run_data['operator_run']
if app_name not in run_list:
run_list[app_name] = {}
if operator_run not in run_list[app_name]:
run_list[app_name][operator_run] = []
run_list[app_name][operator_run].append(run_data)
run_type = "{}:{}".format(app_name, request_id)
if run_type not in run_list:
run_list[run_type] = {}
if operator_run not in run_list[run_type]:
run_list[run_type][operator_run] = []
run_list[run_type][operator_run].append(run_data)
latest_runs = []
for single_app in run_list:
latest_runs += _get_latest_runs(run_list[single_app])
# only return metadata
if metadata_only:
for idx, single_run in enumerate(latest_runs):
latest_runs[idx] = single_run['output_metadata']
response_json = json.dumps(latest_runs, indent=4)
if output_file:
if metadata_only and output_file:
print("Not writing to " + str(output_file) +
" as metadata only has been specified")
elif output_file:
output_str = "redact(y/n)\t" + "\t".join(file_keys) + "\n"
for single_run in latest_runs:
output_str += "n"
Expand All @@ -402,6 +504,7 @@ def _get_latest_run_info_command(arguments, config):
with open(output_file_path, "w") as output_file_obj:
output_file_obj.write(output_str)
response_json = "Done! Output location: " + str(output_file_path)

return response_json


Expand All @@ -411,10 +514,14 @@ def _get_runs_command(arguments, config):
tags = arguments.get('--tags')
job_groups = arguments.get('--job-groups')
jira_ids = arguments.get('--jira-ids')
apps = arguments.get('--apps')

params = dict()
if requestId:
params['requestId'] = requestId
params['request_ids'] = requestId
if apps:
uuid_list = _get_app_uuid(apps)
params['apps'] = uuid_list
if tags:
params['tags'] = tags
if job_groups:
Expand Down Expand Up @@ -476,12 +583,14 @@ def _list_files(arguments, config):
file_name = arguments.get('--file-name')
filename_regex = arguments.get('--filename-regex')
page_size = arguments.get('--page-size')
file_type = arguments.get('--file-type')
params = dict()
params['path'] = paths
params['metadata'] = metadata
params['file_group'] = file_group
params['file_name'] = file_name
params['filename_regex'] = filename_regex
params['file_type'] = file_type
if page_size:
params['page_size'] = page_size
response = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={
Expand Down Expand Up @@ -815,7 +924,6 @@ def _submit_operator_runs(arguments, config):
if for_each:
body['for_each'] = for_each
url = urljoin(BEAGLE_ENDPOINT, API['run-operator-runs'])
print(body)
response = requests.post(url, data=json.dumps(body),
headers={'Authorization': 'Bearer %s' % config.token, 'Content-Type': 'application/json'})
response_json = json.dumps(response.json(), indent=4)
Expand Down
3 changes: 3 additions & 0 deletions requests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
10101_A,
10101_B,
10101_C
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setuptools.setup(
name='beaglecli',
version='0.2.0',
version='0.3.0',
scripts=['beaglecli'] ,
description="Beagle API command line tool",
url="https://github.com/mskcc/beagle_cli",
Expand Down

0 comments on commit 80a428e

Please sign in to comment.