diff --git a/README.md b/README.md index 02e8e3b..1d8afe2 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ To access other endpoints, export the environment variable `BEAGLE_ENDPOINT`. beaglecli file-group list [--page-size=] beaglecli etl delete --job-id=... beaglecli run list [--page-size=] [--request-id=]... [--tags=]... [--job-groups=]... [--jira-ids=]... - beaglecli run latest-info [--request-id=] [--job-group=] [--jira-id=] [--output-file=] [--completed] + beaglecli run latest-info [--request-id=] [--job-group=] [--jira-id=] [--output-file=] [--completed] [--page-size=] [--metadata-only] [--max-pages] beaglecli run get beaglecli run submit-request --pipeline= [--request-ids=] [--job-group-id=] [--for-each=] beaglecli run submit-runs --pipelines=... --versions=...[--run-file=] [--run-ids=]... [--job-group-id=] [--for-each=] @@ -65,6 +65,15 @@ To access other endpoints, export the environment variable `BEAGLE_ENDPOINT`. ``` beaglecli run submit-request --pipeline=argos --request-ids=ABCDE_1,ABCDE_2 --job-group-id=FGHIJK-LMNOP-QRSTUV-WXY --job-group-id=FGHIJK-LMNOP-QRSTUV-WXYZ ``` +- Return only the output metadata info for a given request id from the run api + ``` + run latest-info --request-id 10101_A --completed --output-metadata-only --max-pages + ``` +- Return only the output metadata info for multiple request ids + ``` + beaglecli run latest-info --request-id requests.txt --completed --output-metadata-only --max-pages + ``` +Note: Use `requests.txt` as a template for providing a multiple request ids #### Troubleshooting diff --git a/beaglecli b/beaglecli index de0e647..c37b282 100755 --- a/beaglecli +++ b/beaglecli @@ -14,6 +14,7 @@ $ beaglecli files list --metadata=igoRequestId:09324_C """ from cmath import sin +from ensurepip import version import os import sys from urllib import response @@ -25,6 +26,7 @@ from os.path import expanduser from urllib.parse import urljoin from datetime import datetime import traceback +import csv from apps.access import access_commands from apps.cmoch import cmoch_commands @@ -45,10 +47,10 @@ CONFIG_TEMPLATE = { API = { - "auth": "api-token-auth/", - "verify": "api-token-verify/", - "refresh": "api-token-refresh/", - "storage": "v0/fs/storage/", + "auth": 'api-token-auth/', + "verify": 'api-token-verify/', + "refresh": 'api-token-refresh/', + "storage": 'v0/fs/storage/', "file-types": 'v0/fs/file-types/', "pipelines": 'v0/run/pipelines/', "files": '/v0/fs/files/', @@ -70,7 +72,7 @@ Usage: beaglecli files create [--metadata-path=] [--size=] beaglecli files update [--file-path=] [--file-type=] [--file-group=] [--metadata-path=] [--size=] beaglecli files patch [--file-path=] [--file-type=] [--file-group=] [--metadata=]... [--size=] - beaglecli files list [--page-size=] [--path=]... [--metadata=]... [--file-group=]... [--file-name=]... [--filename-regex=] + beaglecli files list [--page-size=] [--path=]... [--metadata=]... [--file-group=]... [--file-name=]... [--filename-regex=] [--file-type=]... beaglecli files delete --file-id=... beaglecli sample create beaglecli sample list [--sample-id=] @@ -82,8 +84,8 @@ Usage: beaglecli file-group create beaglecli file-group list [--page-size=] beaglecli etl delete --job-id=... - beaglecli run list [--page-size=] [--request-id=]... [--tags=]... [--job-groups=]... [--jira-ids=]... - beaglecli run latest-info [--request-id=] [--job-group=] [--jira-id=] [--output-file=] [--completed] + beaglecli run list [--page-size=] [--request-id=]... [--tags=]... [--apps="apps"]... [--job-groups=]... [--jira-ids=]... + beaglecli run latest-info [--request-id= ] [--job-group=] [--apps="apps"]... [--jira-id=] [--output-file=] [--completed][--page-size=] [--output-metadata-only] [--max-pages] beaglecli run get beaglecli run submit-request --pipeline= [--request-ids=] [--job-group-id=] [--for-each=] beaglecli run submit-runs --pipelines=... --versions=...[--run-file=] [--run-ids=]... [--job-group-id=] [--for-each=] @@ -299,6 +301,7 @@ def _check_is_authenticated(config): 'refresh': config.refresh}) if response.status_code == 200: config.set('token', response.json()['access']) + config.set('refresh', response.json()['refresh']) return True return False @@ -347,29 +350,119 @@ def _get_latest_runs(run_dict): return run_list +def _get_apps_dict(): + url = urljoin(BEAGLE_ENDPOINT, API['pipelines']) + params = dict() + params['page_size'] = 1000000 + response = requests.get(url, headers={ + 'Authorization': 'Bearer %s' % config.token, 'Content-Type': 'application/json'}, params=params) + app_dict = {} + if response.ok: + response_json = response.json() + if "results" in response_json: + result_list = response_json["results"] + name_set = set() + duplicate_set = set() + for single_pipeline in result_list: + name = single_pipeline["name"] + if name in name_set: + duplicate_set.add(name) + name_set.add(name) + for single_pipeline in result_list: + name = single_pipeline["name"] + version = single_pipeline["version"] + id = single_pipeline["id"] + key_name = "" + if name in duplicate_set: + key_name = str(name) + ":"+str(version) + else: + key_name = str(name) + app_dict[key_name] = id + return app_dict + else: + print("Error: beagle returned an empty") + exit(1) + else: + print("ERROR: Could not retrieve app list") + exit(1) + + +def _get_app_uuid(app_names): + app_dict = _get_apps_dict() + keys = app_dict.keys() + if not keys: + print("Error: Could not retrieve pipeline info") + exit(1) + uuid_list = [] + for single_name in app_names: + if single_name not in keys: + matches = ( + single_key for single_key in keys if single_name in single_key) + print("Could not find the app " + str(single_name) + " in beagle") + if matches: + print("Here are possible matches:") + print(list(matches)) + exit(1) + else: + uuid_list.append(app_dict[single_name]) + return uuid_list + + +def _get_request_Id(run_data): + return run_data.get("tags", {}).get("igoRequestId", "None") + + def _get_latest_run_info_command(arguments, config): + # getting params job_group = arguments.get('--job-group') jira_id = arguments.get('--jira-id') + apps = arguments.get('--apps') requestId = arguments.get('--request-id') completed = arguments.get('--completed') output_file = arguments.get('--output-file') + page_size = arguments.get('--page-size') + metadata_only = arguments.get('--output-metadata-only') + max_pages = arguments.get('--max-pages') info_keys = ['id', 'status', 'name', 'tags', 'message', 'app', - 'operator_run', 'created_date', 'finished_date', 'execution_id'] + 'operator_run', 'created_date', 'finished_date', 'execution_id', 'output_metadata'] file_keys = ['name', 'status', 'tags', 'message', 'id', 'execution_id'] params = dict() + + # setting url + url = urljoin(BEAGLE_ENDPOINT, API['run']) + + # setting / adjusting parameters + params['page_size'] = 1000 + params['full'] = True + # open csv if requestId: - params['requestId'] = requestId + if requestId[0].endswith(".txt"): + with open(requestId[0], newline='') as f: + reader = csv.reader(f, skipinitialspace=True) + requestIdList = [] + # iterate over individual requests + for r in reader: + if r: + requestIdList.append(r[0]) + params['request_ids'] = requestIdList + else: + params['request_ids'] = requestId + if apps: + uuid_list = _get_app_uuid(apps) + params['apps'] = uuid_list if job_group: params['job_groups'] = job_group if jira_id: params['jira_id'] = jira_id - params['page_size'] = 1000000 - params['full'] = True + if page_size: + params['page_size'] = page_size + if max_pages and not page_size: + count_params = {'request_ids': params['request_ids'], 'count': True} + params['page_size'] = requests.get(url, + headers={'Authorization': 'Bearer %s' % config.token}, params=count_params) if completed: params['status'] = "COMPLETED" - # params['values_run'] = ['id', 'status', 'name', 'tags', 'message', - # 'app', 'operator_run', 'created_date', 'finished_date'] - url = urljoin(BEAGLE_ENDPOINT, API['run']) + response = requests.get(url, headers={'Authorization': 'Bearer %s' % config.token}, params=params) response_json = response.json() @@ -380,17 +473,26 @@ def _get_latest_run_info_command(arguments, config): if single_key in single_run: run_data[single_key] = single_run[single_key] app_name = run_data['app'] + request_id = _get_request_Id(run_data) operator_run = run_data['operator_run'] - if app_name not in run_list: - run_list[app_name] = {} - if operator_run not in run_list[app_name]: - run_list[app_name][operator_run] = [] - run_list[app_name][operator_run].append(run_data) + run_type = "{}:{}".format(app_name, request_id) + if run_type not in run_list: + run_list[run_type] = {} + if operator_run not in run_list[run_type]: + run_list[run_type][operator_run] = [] + run_list[run_type][operator_run].append(run_data) latest_runs = [] for single_app in run_list: latest_runs += _get_latest_runs(run_list[single_app]) + # only return metadata + if metadata_only: + for idx, single_run in enumerate(latest_runs): + latest_runs[idx] = single_run['output_metadata'] response_json = json.dumps(latest_runs, indent=4) - if output_file: + if metadata_only and output_file: + print("Not writing to " + str(output_file) + + " as metadata only has been specified") + elif output_file: output_str = "redact(y/n)\t" + "\t".join(file_keys) + "\n" for single_run in latest_runs: output_str += "n" @@ -402,6 +504,7 @@ def _get_latest_run_info_command(arguments, config): with open(output_file_path, "w") as output_file_obj: output_file_obj.write(output_str) response_json = "Done! Output location: " + str(output_file_path) + return response_json @@ -411,10 +514,14 @@ def _get_runs_command(arguments, config): tags = arguments.get('--tags') job_groups = arguments.get('--job-groups') jira_ids = arguments.get('--jira-ids') + apps = arguments.get('--apps') params = dict() if requestId: - params['requestId'] = requestId + params['request_ids'] = requestId + if apps: + uuid_list = _get_app_uuid(apps) + params['apps'] = uuid_list if tags: params['tags'] = tags if job_groups: @@ -476,12 +583,14 @@ def _list_files(arguments, config): file_name = arguments.get('--file-name') filename_regex = arguments.get('--filename-regex') page_size = arguments.get('--page-size') + file_type = arguments.get('--file-type') params = dict() params['path'] = paths params['metadata'] = metadata params['file_group'] = file_group params['file_name'] = file_name params['filename_regex'] = filename_regex + params['file_type'] = file_type if page_size: params['page_size'] = page_size response = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={ @@ -815,7 +924,6 @@ def _submit_operator_runs(arguments, config): if for_each: body['for_each'] = for_each url = urljoin(BEAGLE_ENDPOINT, API['run-operator-runs']) - print(body) response = requests.post(url, data=json.dumps(body), headers={'Authorization': 'Bearer %s' % config.token, 'Content-Type': 'application/json'}) response_json = json.dumps(response.json(), indent=4) diff --git a/requests.txt b/requests.txt new file mode 100644 index 0000000..08ccb4a --- /dev/null +++ b/requests.txt @@ -0,0 +1,3 @@ +10101_A, +10101_B, +10101_C diff --git a/setup.py b/setup.py index b8b13f9..ad90de0 100755 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setuptools.setup( name='beaglecli', - version='0.2.0', + version='0.3.0', scripts=['beaglecli'] , description="Beagle API command line tool", url="https://github.com/mskcc/beagle_cli",