From c1c2493b2314b46368a7326d076747e5c23a031a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 19:34:55 -0500 Subject: [PATCH 01/20] Updating for best fit --- .gitignore | 126 ++++++++++++++++++ python/run_create_report/README.md | 107 ++++++++++++--- .../modules/generate_facet_maf_path.py | 91 ++++++++++++- .../modules/read_manifest.py | 22 ++- python/run_create_report/modules/run_cmd.py | 4 +- python/run_create_report/run_create_report.py | 99 +++++++++++--- 6 files changed, 409 insertions(+), 40 deletions(-) diff --git a/.gitignore b/.gitignore index 4b402ae..1c0b6ff 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,129 @@ vignettes/*.pdf # MAC .DS_Store + +# Editors +.vscode/ +.idea/ + +# Vagrant +.vagrant/ + +# Mac/OSX +.DS_Store + +# Windows +Thumbs.db + +# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json \ No newline at end of file diff --git a/python/run_create_report/README.md b/python/run_create_report/README.md index 6e054ed..f9262fe 100644 --- a/python/run_create_report/README.md +++ b/python/run_create_report/README.md @@ -1,9 +1,9 @@ -# Table of Contents +# Run Create Report -- [Table of Contents](#table-of-contents) +- [Run Create Report](#run-create-report) - [Requirements](#requirements) - [run\_create\_report](#run_create_report) - - [main](#main) + - [Main Script (run\_create\_report.py)](#main-script-run_create_reportpy) - [Submodules](#submodules) - [check\_required\_columns](#check_required_columns) - [check\_required\_columns](#check_required_columns-1) @@ -11,6 +11,7 @@ - [generate\_repo\_path](#generate_repo_path) - [read\_manifest](#read_manifest) - [read\_manifest](#read_manifest-1) + - [get\_row](#get_row) - [get\_small\_variant\_csv](#get_small_variant_csv) - [get\_small\_variant\_csv](#get_small_variant_csv-1) - [run\_cmd](#run_cmd) @@ -18,13 +19,15 @@ - [run\_multiple\_cmd](#run_multiple_cmd) - [generate\_facet\_maf\_path](#generate_facet_maf_path) - [generate\_facet\_maf\_path](#generate_facet_maf_path-1) + - [get\_maf\_path](#get_maf_path) + - [get\_best\_fit\_folder](#get_best_fit_folder) - [generate\_create\_report\_cmd](#generate_create_report_cmd) - [generate\_create\_report\_cmd](#generate_create_report_cmd-1) ## Requirements ```bash -access_data_analysis==0.1.2 # works with this repo tag +access_data_analysis=>0.1.2 # works with this repo tag typer==0.3.2 typing_extensions==3.10.0.0 pandas==1.2.5 @@ -37,7 +40,7 @@ rich==12.1.0 -### main +### Main Script (run\_create\_report.py) ```bash Usage: run_create_report.py [OPTIONS] @@ -54,7 +57,17 @@ Options: create_report.R when `--repo` is not given -m, --manifest FILE File containing meta information per sample. - [required] + Require following columns in the header: + cmo_patient_id, sample_id, dmp_patient_id, + collection_date or collection_day, + timepoint. If dmp_sample_id column is given + and has information that will be used to run + facets. If dmp_sample_id is not given and + dmp_patient_id is given than it will be used + to get the Tumor sample with lowest number. + If dmp_sample_id or dmp_patient_id is not + given then it will run without the facet maf + file [required] -v, --variant-results DIRECTORY Base path for all results of small variants @@ -71,6 +84,11 @@ Options: /work/ccs/shared/resources/impact/facets/all /] + -bf, --best-fit If this is set to True then we will attempt + to parse `facets_review.manifest` file to + pick the best fit for a given dmp_sample_id + [default: False] + -l, --tumor-type TEXT Tumor type label for the report [required] -cfm, --copy-facet-maf If this is set to True then we will copy the facet maf file in the directory specified in @@ -87,10 +105,12 @@ Options: -gm, --generate-markdown If given, the create_report.R will be run with `-md` flag to generate markdown [default: False] - -ff, --force If this is set to True then we will not - stop if an error is encountered in a given - sample but keep on running for the next sample - [default: False] + + -ff, --force If this is set to True then we will not stop + if an error is encountered in a given sample + while running create_report.R but keep on + running for the next sample [default: + False] --install-completion Install completion for the current shell. --show-completion Show completion for the current shell, to @@ -106,10 +126,11 @@ Wrapper script to run create_report.R - `repo_path` _Path, optional_ - "Base path to where the git repository is located for access_data_analysis". - `script_path` _Path, optional_ - "Path to the create_report.R script, fall back if `--repo` is not given". - `template_path` _Path, optional_ - "Path to the template.Rmd or template_days.Rmd to be used with create_report.R when `--repo` is not given". -- `manifest` _Path, required_ - "File containing meta information per sample.". +- `manifest` _Path, required_ - "File containing meta information per sample. Require following columns in the header: `cmo_patient_id`, `sample_id`, `dmp_patient_id`, `collection_date` or `collection_day`, `timepoint`. If dmp_sample_id column is given and has information that will be used to run facets. if dmp_sample_id is not given and dmp_patient_id is given than it will be used to get the Tumor sample with lowest number.If dmp_sample_id or dmp_patient_id is not given then it will run without the facet maf file". - `variant_path` _Path, required_ - "Base path for all results of small variants as generated by filter_calls.R script in access_data_analysis (Make sure only High Confidence calls are included)". - `cnv_path` _Path, required_ - "Base path for all results of CNV as generated by CNV_processing.R script in access_data_analysis". - `facet_repo` _Path, required_ - "Base path for all results of facets on Clinical MSK-IMPACT samples". +- `best_fit` _bool, optional_ - "If this is set to True then we will attempt to parse `facets_review.manifest` file to pick the best fit for a given dmp_sample_id". - `tumor_type` _str, required_ - "Tumor type label for the report". - `copy_facet` _bool, optional_ - "If this is set to True then we will copy the facet maf file in the directory specified in `copy_facet_dir`". - `copy_facet_dir` _Path, optional_ - "Directory path where the facet maf file should be copied.". @@ -119,7 +140,7 @@ Wrapper script to run create_report.R **Usage** -- Using Generate Markdown, copy facet maf file, use template_days RMarkdown and force flag +- Using Generate Markdown, copy facet maf file, use template_days RMarkdown, force flag and best fit for facets ```bash > python python/run_create_report/run_create_report.py \ @@ -127,10 +148,10 @@ Wrapper script to run create_report.R -r /home/shahr2/github/access_data_analysis \ -v /home/shahr2/bergerlab/Project_10619_D/small_variants/results_20Jan2023/results_stringent/ \ -c /home/shahr2/bergerlab/Project_10619_D/small_variants/results_20Jan2023/CNA_final_call_set \ --l "Melanoma" -gm -d -cfm -ff +-l "Melanoma" -gm -d -cfm -ff -bf ``` -- Using Generate Markdown and force flag +- Using Generate Markdown, force flag and default fit for facets ```bash > python python/run_create_report/run_create_report.py \ @@ -221,7 +242,7 @@ Generate path to create_report.R and template RMarkdown file def read_manifest(manifest) ``` -_summary_ +Read manifest file **Arguments**: @@ -232,6 +253,24 @@ _summary_ - `data_frame` - _description_ + + +#### get\_row + +```python +def get_row(tsv_file) +``` + +Function to skip rows + +**Arguments**: + +- `tsv_file` _file_ - file to be read + +**Returns**: + +- `list` - lines to be skipped + ### get\_small\_variant\_csv @@ -319,6 +358,44 @@ Get path of maf associated with facet-suite output - `str` - path of the facets maf + + +#### get\_maf\_path + +```python +def get_maf_path(maf_path, patient_id, sample_id) +``` + +Get the path to the maf file + +**Arguments**: + +- `maf_path` _pathlib.Path_ - Base path of the maf file +- `patient_id` _str_: DMP Patient ID for facets +- `sample_id` _str_ - DMP Sample ID if any for facets + +**Returns**: + +- `str` - Path to the maf file + + + +#### get\_best\_fit\_folder + +```python +def get_best_fit_folder(facet_manifest_path) +``` + +Get the best fit folder for the given facet manifest path + +**Arguments**: + +- `facet_manifest_path` _str_ - manifest path to be used for determining best fit + +**Returns**: + +- `pathlib.Path` - path to the folder containing best fit maf files + ### generate\_create\_report\_cmd diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 6483071..a77b984 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -1,28 +1,91 @@ import typer import glob +import pandas as pd from pathlib import Path +from modules.read_manifest import read_manifest -def generate_facet_maf_path(facet_path, patient_id, sample_id=None): +def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): """Get path of maf associated with facet-suite output Args: facet_path (pathlib.PATH|str): path to search for the facet file patient_id (str): patient id to be used to search, default is set to None sample_id (str): sample id to be used to search, default is set to None + best_fit(bool) : if true attempt to get get best fit from facet repo Returns: str: path of the facets maf """ - if sample_id: + if best_fit: + manifest_path = ( + facet_path.joinpath( + patient_id[:7], f"{sample_id}*", "facets_review.manifest" + ) + if sample_id + else facet_path.joinpath( + patient_id[:7], f"{patient_id}*", "facets_review.manifest" + ) + ) + manifest_path = glob.glob(manifest_path.as_posix()) + if len(manifest_path) == 0: + if sample_id: + typer.secho( + f"Could not find the facets-suite `facets_review.manifest` file using sample id. {sample_id}", + err=True, + fg=typer.colors.BRIGHT_RED, + ) + maf_path = get_maf_path(facet_path.joinpath(patient_id[:7], f"{sample_id}*", "default", "*[0-9].ccf.maf"), patient_id, sample_id) + else: + typer.secho( + f"Could not find the facets-suite `facets_review.manifest` file using patient id. {patient_id}", + err=True, + fg=typer.colors.BRIGHT_RED, + ) + maf_path = get_maf_path(facet_path.joinpath(patient_id[:7], f"{patient_id}*", "default", "*[0-9].ccf.maf"), patient_id, None) + elif len(manifest_path) > 1: + manifest_path = [Path(i) for i in manifest_path] + manifest_path = sorted(manifest_path, key=lambda i: str(i.stem)) + manifest_path_sorted = [str(i) for i in manifest_path] + maf_path = ( + get_maf_path(maf_path, patient_id, None) + if ( + best_fit_folder := get_best_fit_folder( + manifest_path_sorted[0] + ) + ) + else None + ) + elif best_fit_folder := get_best_fit_folder(manifest_path[0]): + maf_path = get_maf_path(maf_path, patient_id, None) + else: + maf_path = None + + + elif sample_id: maf_path = facet_path.joinpath( patient_id[:7], f"{sample_id}*", "default", "*[0-9].ccf.maf" ) + maf_path = get_maf_path(maf_path, patient_id, sample_id) else: maf_path = facet_path.joinpath( patient_id[:7], f"{patient_id}*", "default", "*[0-9].ccf.maf" ) + maf_path = get_maf_path(maf_path, patient_id, None) + return maf_path + +def get_maf_path(maf_path, patient_id, sample_id): + """Get the path to the maf file + + Args: + maf_path (pathlib.Path): Base path of the maf file + patient_id (str): DMP Patient ID for facets + sample_id (str): DMP Sample ID if any for facets + + Returns: + str: Path to the maf file + """ maf_list = glob.glob(maf_path.as_posix()) if len(maf_list) == 0: if patient_id: @@ -46,3 +109,27 @@ def generate_facet_maf_path(facet_path, patient_id, sample_id=None): else: maf_list_sorted = maf_list return maf_list_sorted[0] + +def get_best_fit_folder(facet_manifest_path): + """Get the best fit folder for the given facet manifest path + + Args: + facet_manifest_path (str): manifest path to be used for determining best fit + + Returns: + pathlib.Path: path to the folder containing best fit maf files + """ + facet_manifest_path = Path(facet_manifest_path) + base_path = facet_manifest_path.parent + facet_manifest = read_manifest(facet_manifest_path) + facet_manifest[['date_reviewed', 'time_reviewed']] = facet_manifest.date_reviewed.str.split(" ", expand = True) + facet_manifest['date_reviewed'] = pd.to_datetime(facet_manifest['date_reviewed']) + facet_manifest.sort_values(by='date_reviewed',ascending=False) + folder_name = facet_manifest['fit_name'].iloc[0] + return ( + base_path.joinpath(folder_name, "*[0-9].ccf.maf") + if "default" in folder_name or "alt" in folder_name + else base_path.joinpath("default", "*[0-9].ccf.maf") + ) + + \ No newline at end of file diff --git a/python/run_create_report/modules/read_manifest.py b/python/run_create_report/modules/read_manifest.py index 09ac452..cc8c53e 100644 --- a/python/run_create_report/modules/read_manifest.py +++ b/python/run_create_report/modules/read_manifest.py @@ -10,7 +10,23 @@ def read_manifest(manifest): Returns: data_frame: _description_ """ - if(manifest.suffix == ".csv"): - return pd.read_csv(manifest, sep=',', low_memory=False) + skip_rows = get_row(manifest) + if manifest.suffix == ".csv": + return pd.read_csv(manifest, sep=",", skiprows=skip_rows, low_memory=False) else: - return pd.read_csv(manifest, sep='\t', low_memory=False) + return pd.read_csv(manifest, sep="\t", skiprows=skip_rows, low_memory=False) + + +def get_row(tsv_file): + """Function to skip rows + + Args: + tsv_file (file): file to be read + + Returns: + list: lines to be skipped + """ + skipped = [] + with open(tsv_file, "r") as FH: + skipped.extend(i for i, line in enumerate(FH) if line.startswith("#")) + return skipped diff --git a/python/run_create_report/modules/run_cmd.py b/python/run_create_report/modules/run_cmd.py index 49ebd74..00a0f6e 100644 --- a/python/run_create_report/modules/run_cmd.py +++ b/python/run_create_report/modules/run_cmd.py @@ -26,10 +26,12 @@ def run_cmd(cmd,force): print(stdout) if not force: raise typer.Abort() + else: + return stdout else: print("run_cmd:stderr:\n") print(stderr) - return out + return stderr def run_multiple_cmd(commands, parallel_process=None): diff --git a/python/run_create_report/run_create_report.py b/python/run_create_report/run_create_report.py index 3b6528e..f73367f 100644 --- a/python/run_create_report/run_create_report.py +++ b/python/run_create_report/run_create_report.py @@ -41,7 +41,7 @@ def main( writable=False, readable=True, resolve_path=True, - help="File containing meta information per sample.", + help="File containing meta information per sample. Require following columns in the header: cmo_patient_id, sample_id, dmp_patient_id, collection_date or collection_day, timepoint. If dmp_sample_id column is given and has information that will be used to run facets. If dmp_sample_id is not given and dmp_patient_id is given than it will be used to get the Tumor sample with lowest number. If dmp_sample_id or dmp_patient_id is not given then it will run without the facet maf file", ), variant_path: Path = typer.Option( ..., @@ -79,6 +79,12 @@ def main( resolve_path=True, help="Base path for all results of facets on Clinical MSK-IMPACT samples", ), + best_fit: bool = typer.Option( + False, + "--best-fit", + "-bf", + help="If this is set to True then we will attempt to parse `facets_review.manifest` file to pick the best fit for a given dmp_sample_id", + ), tumor_type: str = typer.Option( ..., "--tumor-type", @@ -113,7 +119,7 @@ def main( False, "--force", "-ff", - help="If this is set to True then we will not stop if an error is encountered in a given sample but keep on running for the next sample", + help="If this is set to True then we will not stop if an error is encountered in a given sample while running create_report.R but keep on running for the next sample", ), ): """Wrapper script to run create_report.R @@ -122,10 +128,11 @@ def main( repo_path (Path, optional): "Base path to where the git repository is located for access_data_analysis". script_path (Path, optional): "Path to the create_report.R script, fall back if `--repo` is not given". template_path (Path, optional): "Path to the template.Rmd or template_days.Rmd to be used with create_report.R when `--repo` is not given". - manifest (Path, required): "File containing meta information per sample.". + manifest (Path, required): "File containing meta information per sample. Require following columns in the header: `cmo_patient_id`, `sample_id`, `dmp_patient_id`, `collection_date` or `collection_day`, `timepoint`. If dmp_sample_id column is given and has information that will be used to run facets. if dmp_sample_id is not given and dmp_patient_id is given than it will be used to get the Tumor sample with lowest number.If dmp_sample_id or dmp_patient_id is not given then it will run without the facet maf file". variant_path (Path, required): "Base path for all results of small variants as generated by filter_calls.R script in access_data_analysis (Make sure only High Confidence calls are included)". cnv_path (Path, required): "Base path for all results of CNV as generated by CNV_processing.R script in access_data_analysis". facet_repo (Path, required): "Base path for all results of facets on Clinical MSK-IMPACT samples". + best_fit (bool, optional): "If this is set to True then we will attempt to parse `facets_review.manifest` file to pick the best fit for a given dmp_sample_id". tumor_type (str, required): "Tumor type label for the report". copy_facet (bool, optional): "If this is set to True then we will copy the facet maf file in the directory specified in `copy_facet_dir`". copy_facet_dir (Path, optional): "Directory path where the facet maf file should be copied.". @@ -139,14 +146,26 @@ def main( column_header, manifest_to_traverse = check_required_columns( manifest_df, template_days ) - print("\nTraversing through", len(manifest_to_traverse),"patients to run create_report.R\n") + print( + "\nTraversing through", + len(manifest_to_traverse), + "patients to run create_report.R\n", + ) # get general paths (script_path, template_path) = generate_repo_path( repo_path, script_path, template_path, template_days ) # iterate through each row and select information needed to generate the command - skipped_ids = [ - "\t".join(["cmo_patient_id", "dmp_patient_id", "dmp_sample_id"]) + summary = [ + "\t".join( + [ + "cmo_patient_id", + "dmp_patient_id", + "dmp_sample_id", + "facet_path", + "comments", + ] + ) ] with Progress( SpinnerColumn(), @@ -166,7 +185,7 @@ def main( if "dmp_sample_id" in column_header: dmp_sample_id = manifest_to_traverse.loc[i, "dmp_sample_id"] facet_path = generate_facet_maf_path( - facet_repo, dmp_patient_id, dmp_sample_id + facet_repo, dmp_patient_id, dmp_sample_id, best_fit ) if not facet_path: typer.secho( @@ -174,13 +193,10 @@ def main( err=True, fg=typer.colors.BRIGHT_RED, ) - skipped_ids.append( - "\t".join([cmo_patient_id, dmp_patient_id, dmp_sample_id]) - ) else: if not dmp_patient_id or dmp_patient_id is not None: facet_path = generate_facet_maf_path( - facet_repo, dmp_patient_id, None + facet_repo, dmp_patient_id, None, best_fit ) else: facet_path = None @@ -190,9 +206,6 @@ def main( err=True, fg=typer.colors.BRIGHT_RED, ) - skipped_ids.append( - "\t".join([cmo_patient_id, dmp_patient_id, "NA"]) - ) # Get the sample id from the Facet file if facet_path: facet_path = Path(facet_path) @@ -222,6 +235,30 @@ def main( tumor_type, ) p2 = run_cmd(create_report_cmd, force) + if "Error" in str(p2) or "error" in str(p2): + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + dmp_sample_id, + facet_path, + "create_report.R failed", + ] + ) + ) + else: + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + dmp_sample_id, + facet_path, + "create_report.R ran with facet maf", + ] + ) + ) typer.secho( f"Done running create_report.R for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} and output is written in {html_output}", fg=typer.colors.BRIGHT_GREEN, @@ -246,19 +283,43 @@ def main( tumor_type, ) p3 = run_cmd(create_report_cmd, force) + if "Error" in str(p3) or "error" in str(p3): + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + dmp_sample_id, + "NA", + "create_report.R failed", + ] + ) + ) + else: + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + dmp_sample_id, + "NA", + "create_report.R ran without facet maf", + ] + ) + ) typer.secho( f"Done running create_report.R for patient with CMO ID {cmo_patient_id} and output is written in {html_output}", fg=typer.colors.BRIGHT_GREEN, ) print("\nPatient ids that were skipped as facet maf could not be found\n") - print(skipped_ids) - skip_file = Path.cwd().joinpath("skipped_ids.tsv") - with open(skip_file, 'w') as fp: - for item in skipped_ids: + print(summary) + summary_file = Path.cwd().joinpath("run_create_report_summary.tsv") + with open(summary_file, "w") as fp: + for item in summary: # write each item on a new line fp.write("%s\n" % item) - typer.secho("Done!", fg=typer.colors.BRIGHT_GREEN) + typer.secho("Done running run_create_report!", fg=typer.colors.BRIGHT_GREEN) if __name__ == "__main__": From bf07b2286b2f91ae222a084b6f9d4e179dd2ca62 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 19:41:08 -0500 Subject: [PATCH 02/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index a77b984..0c39a2d 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -49,7 +49,7 @@ def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): manifest_path = sorted(manifest_path, key=lambda i: str(i.stem)) manifest_path_sorted = [str(i) for i in manifest_path] maf_path = ( - get_maf_path(maf_path, patient_id, None) + get_maf_path(best_fit_folder, patient_id, None) if ( best_fit_folder := get_best_fit_folder( manifest_path_sorted[0] @@ -58,7 +58,7 @@ def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): else None ) elif best_fit_folder := get_best_fit_folder(manifest_path[0]): - maf_path = get_maf_path(maf_path, patient_id, None) + maf_path = get_maf_path(best_fit_folder, patient_id, None) else: maf_path = None From b89bcfbc54610bf2d0e4bf37a4a565f7c34875c1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 19:45:56 -0500 Subject: [PATCH 03/20] Update generate_facet_maf_path.py --- .../modules/generate_facet_maf_path.py | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 0c39a2d..2aa7c7b 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -4,7 +4,6 @@ from pathlib import Path from modules.read_manifest import read_manifest - def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): """Get path of maf associated with facet-suite output @@ -18,16 +17,27 @@ def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): str: path of the facets maf """ - if best_fit: - manifest_path = ( - facet_path.joinpath( + if (not best_fit): + if sample_id: + maf_path = facet_path.joinpath( + patient_id[:7], f"{sample_id}*", "default", "*[0-9].ccf.maf" + ) + maf_path = get_maf_path(maf_path, patient_id, sample_id) + else: + maf_path = facet_path.joinpath( + patient_id[:7], f"{patient_id}*", "default", "*[0-9].ccf.maf" + ) + maf_path = get_maf_path(maf_path, patient_id, None) + return maf_path + else: + if sample_id: + manifest_path = facet_path.joinpath( patient_id[:7], f"{sample_id}*", "facets_review.manifest" ) - if sample_id - else facet_path.joinpath( + else: + manifest_path = facet_path.joinpath( patient_id[:7], f"{patient_id}*", "facets_review.manifest" ) - ) manifest_path = glob.glob(manifest_path.as_posix()) if len(manifest_path) == 0: if sample_id: @@ -48,35 +58,22 @@ def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): manifest_path = [Path(i) for i in manifest_path] manifest_path = sorted(manifest_path, key=lambda i: str(i.stem)) manifest_path_sorted = [str(i) for i in manifest_path] - maf_path = ( - get_maf_path(best_fit_folder, patient_id, None) - if ( - best_fit_folder := get_best_fit_folder( - manifest_path_sorted[0] - ) - ) - else None - ) - elif best_fit_folder := get_best_fit_folder(manifest_path[0]): - maf_path = get_maf_path(best_fit_folder, patient_id, None) + best_fit_folder = get_best_fit_folder(manifest_path_sorted[0]) + if best_fit_folder: + maf_path = get_maf_path(maf_path, patient_id, None) + else: + maf_path = None else: - maf_path = None - + best_fit_folder = get_best_fit_folder(manifest_path[0]) + if best_fit_folder: + maf_path = get_maf_path(maf_path, patient_id, None) + else: + maf_path = None - elif sample_id: - maf_path = facet_path.joinpath( - patient_id[:7], f"{sample_id}*", "default", "*[0-9].ccf.maf" - ) - maf_path = get_maf_path(maf_path, patient_id, sample_id) - else: - maf_path = facet_path.joinpath( - patient_id[:7], f"{patient_id}*", "default", "*[0-9].ccf.maf" - ) - maf_path = get_maf_path(maf_path, patient_id, None) - return maf_path + return maf_path def get_maf_path(maf_path, patient_id, sample_id): - """Get the path to the maf file + """Get the maf file Args: maf_path (pathlib.Path): Base path of the maf file From 8e96a67f772a0e2739742717899d870965a82adc Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 19:47:17 -0500 Subject: [PATCH 04/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 2aa7c7b..22720fd 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -60,13 +60,13 @@ def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): manifest_path_sorted = [str(i) for i in manifest_path] best_fit_folder = get_best_fit_folder(manifest_path_sorted[0]) if best_fit_folder: - maf_path = get_maf_path(maf_path, patient_id, None) + maf_path = get_maf_path(best_fit_folder, patient_id, None) else: maf_path = None else: best_fit_folder = get_best_fit_folder(manifest_path[0]) if best_fit_folder: - maf_path = get_maf_path(maf_path, patient_id, None) + maf_path = get_maf_path(best_fit_folder, patient_id, None) else: maf_path = None From 4c83bd39c63fc8d26d2b3fea45e6e0def4ca7fac Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 19:49:26 -0500 Subject: [PATCH 05/20] Update run_create_report.py --- python/run_create_report/run_create_report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/run_create_report/run_create_report.py b/python/run_create_report/run_create_report.py index f73367f..03e3563 100644 --- a/python/run_create_report/run_create_report.py +++ b/python/run_create_report/run_create_report.py @@ -242,7 +242,7 @@ def main( cmo_patient_id, dmp_patient_id, dmp_sample_id, - facet_path, + facet_path.as_posix(), "create_report.R failed", ] ) @@ -254,7 +254,7 @@ def main( cmo_patient_id, dmp_patient_id, dmp_sample_id, - facet_path, + facet_path.as_posix(), "create_report.R ran with facet maf", ] ) From 7b04321e4b093bec450b3b946acf1c81c229e72d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 20:05:57 -0500 Subject: [PATCH 06/20] Update generate_facet_maf_path.py --- .../modules/generate_facet_maf_path.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 22720fd..9dbeff6 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -118,15 +118,19 @@ def get_best_fit_folder(facet_manifest_path): """ facet_manifest_path = Path(facet_manifest_path) base_path = facet_manifest_path.parent - facet_manifest = read_manifest(facet_manifest_path) + facet_manifest_all = read_manifest(facet_manifest_path) + facet_manifest = facet_manifest_all.query("facets_qc"=="TRUE") + if facet_manifest.empty: + return(base_path.joinpath("default", "*[0-9].ccf.maf") + ) facet_manifest[['date_reviewed', 'time_reviewed']] = facet_manifest.date_reviewed.str.split(" ", expand = True) facet_manifest['date_reviewed'] = pd.to_datetime(facet_manifest['date_reviewed']) facet_manifest.sort_values(by='date_reviewed',ascending=False) folder_name = facet_manifest['fit_name'].iloc[0] return ( - base_path.joinpath(folder_name, "*[0-9].ccf.maf") + (base_path.joinpath(folder_name, "*[0-9].ccf.maf")) if "default" in folder_name or "alt" in folder_name - else base_path.joinpath("default", "*[0-9].ccf.maf") + else (base_path.joinpath("default", "*[0-9].ccf.maf")) ) \ No newline at end of file From 9a2a73f28b8c8024f93f5cefaa66ea08e7046ad7 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 20:10:13 -0500 Subject: [PATCH 07/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 9dbeff6..1bc6d29 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -119,7 +119,7 @@ def get_best_fit_folder(facet_manifest_path): facet_manifest_path = Path(facet_manifest_path) base_path = facet_manifest_path.parent facet_manifest_all = read_manifest(facet_manifest_path) - facet_manifest = facet_manifest_all.query("facets_qc"=="TRUE") + facet_manifest = facet_manifest_all.loc(facet_manifest_all.facets_qc) if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) From 339be8a3e00ea99d56da934fd7c71d4405f8c6da Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 20:12:13 -0500 Subject: [PATCH 08/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 1bc6d29..eb09057 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -119,7 +119,7 @@ def get_best_fit_folder(facet_manifest_path): facet_manifest_path = Path(facet_manifest_path) base_path = facet_manifest_path.parent facet_manifest_all = read_manifest(facet_manifest_path) - facet_manifest = facet_manifest_all.loc(facet_manifest_all.facets_qc) + facet_manifest = facet_manifest_all.loc[facet_manifest_all.facets_qc] if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) From 51504468444dd9440706d5d23ff1540d1c8d9a9e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 21:03:14 -0500 Subject: [PATCH 09/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index eb09057..2e2b4eb 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -119,12 +119,12 @@ def get_best_fit_folder(facet_manifest_path): facet_manifest_path = Path(facet_manifest_path) base_path = facet_manifest_path.parent facet_manifest_all = read_manifest(facet_manifest_path) + facet_manifest_all[['date_reviewed', 'time_reviewed']] = facet_manifest_all.date_reviewed.str.split(" ", expand = True) + facet_manifest_all['date_reviewed'] = pd.to_datetime(facet_manifest_all['date_reviewed']) facet_manifest = facet_manifest_all.loc[facet_manifest_all.facets_qc] if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) - facet_manifest[['date_reviewed', 'time_reviewed']] = facet_manifest.date_reviewed.str.split(" ", expand = True) - facet_manifest['date_reviewed'] = pd.to_datetime(facet_manifest['date_reviewed']) facet_manifest.sort_values(by='date_reviewed',ascending=False) folder_name = facet_manifest['fit_name'].iloc[0] return ( From 73e4c46014ed641375fb4173eee04fe6973dabc6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 21:55:38 -0500 Subject: [PATCH 10/20] Update run_create_report.py --- python/run_create_report/run_create_report.py | 226 +++++++++--------- 1 file changed, 112 insertions(+), 114 deletions(-) diff --git a/python/run_create_report/run_create_report.py b/python/run_create_report/run_create_report.py index 03e3563..ec77cc8 100644 --- a/python/run_create_report/run_create_report.py +++ b/python/run_create_report/run_create_report.py @@ -1,15 +1,15 @@ -import typer -import pandas as pd from pathlib import Path -from modules.run_cmd import run_cmd -from modules.read_manifest import read_manifest + +import typer from modules.check_required_columns import check_required_columns +from modules.generate_create_report_cmd import generate_create_report_cmd +from modules.generate_facet_maf_path import generate_facet_maf_path from modules.generate_repo_paths import generate_repo_path from modules.get_small_variant_csv import get_small_variant_csv -from modules.generate_facet_maf_path import generate_facet_maf_path -from modules.generate_create_report_cmd import generate_create_report_cmd -from rich.progress import Progress, SpinnerColumn, TextColumn +from modules.read_manifest import read_manifest +from modules.run_cmd import run_cmd from rich import print +from rich.progress import Progress, SpinnerColumn, TextColumn def main( @@ -187,130 +187,128 @@ def main( facet_path = generate_facet_maf_path( facet_repo, dmp_patient_id, dmp_sample_id, best_fit ) - if not facet_path: - typer.secho( - f"Running for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} without facets maf", - err=True, - fg=typer.colors.BRIGHT_RED, - ) else: + dmp_sample_id = None if not dmp_patient_id or dmp_patient_id is not None: facet_path = generate_facet_maf_path( - facet_repo, dmp_patient_id, None, best_fit + facet_repo, dmp_patient_id, dmp_sample_id, best_fit ) else: facet_path = None - if not facet_path: + if not facet_path: + typer.secho( + f"Running for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} without facets maf", + err=True, + fg=typer.colors.BRIGHT_RED, + ) + if facet_path: + typer.secho( + f"Running for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} with facets maf: {facet_path}", + fg=typer.colors.BRIGHT_GREEN, + ) + facet_path = Path(facet_path) + maf_id = facet_path.stem + dmp_sample_id = maf_id.split("_", 1)[0] + if copy_facet: + if not copy_facet_dir: + copy_facet_dir = Path.cwd() / "facet_files" + copy_facet_dir.mkdir(parents=True, exist_ok=True) + cp_facet_cmd = f"cp {facet_path} {copy_facet_dir.as_posix()}" + p1 = run_cmd(cp_facet_cmd, force) typer.secho( - f"Running for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} without facets maf", - err=True, - fg=typer.colors.BRIGHT_RED, + f"Done copying facet maf file for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} and output is written in {copy_facet_dir}", + fg=typer.colors.BRIGHT_GREEN, ) - # Get the sample id from the Facet file - if facet_path: - facet_path = Path(facet_path) - maf_id = facet_path.stem - dmp_sample_id = maf_id.split("_", 1)[0] - if copy_facet: - if not copy_facet_dir: - copy_facet_dir = Path.cwd() / "facet_files" - copy_facet_dir.mkdir(parents=True, exist_ok=True) - cp_facet_cmd = f"cp {facet_path} {copy_facet_dir.as_posix()}" - p1 = run_cmd(cp_facet_cmd, force) - typer.secho( - f"Done copying facet maf file for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} and output is written in {copy_facet_dir}", - fg=typer.colors.BRIGHT_GREEN, + create_report_cmd, html_output = generate_create_report_cmd( + script_path, + markdown, + template_path, + cmo_patient_id, + small_variants_path, + manifest, + cnv_path, + dmp_patient_id, + dmp_sample_id, + facet_path, + tumor_type, + ) + p2 = run_cmd(create_report_cmd, force) + if "Error" in str(p2) or "error" in str(p2): + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + str(dmp_sample_id), + facet_path.as_posix(), + "create_report.R failed", + ] ) - create_report_cmd, html_output = generate_create_report_cmd( - script_path, - markdown, - template_path, - cmo_patient_id, - small_variants_path, - manifest, - cnv_path, - dmp_patient_id, - dmp_sample_id, - facet_path, - tumor_type, ) - p2 = run_cmd(create_report_cmd, force) - if "Error" in str(p2) or "error" in str(p2): - summary.append( - "\t".join( - [ - cmo_patient_id, - dmp_patient_id, - dmp_sample_id, - facet_path.as_posix(), - "create_report.R failed", - ] - ) - ) - else: - summary.append( - "\t".join( - [ - cmo_patient_id, - dmp_patient_id, - dmp_sample_id, - facet_path.as_posix(), - "create_report.R ran with facet maf", - ] - ) + else: + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + str(dmp_sample_id), + facet_path.as_posix(), + "create_report.R ran with facet maf", + ] ) + ) + typer.secho( + f"Done running create_report.R for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} and output is written in {html_output}", + fg=typer.colors.BRIGHT_GREEN, + ) + else: + if copy_facet: typer.secho( - f"Done running create_report.R for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} and output is written in {html_output}", - fg=typer.colors.BRIGHT_GREEN, + f"No maf file to copy for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} and thus skipping", + fg=typer.colors.BRIGHT_RED, ) - else: - if copy_facet: - typer.secho( - f"No maf file to copy for patient with CMO ID {cmo_patient_id}, and DMP ID {dmp_patient_id} and thus skipping", - fg=typer.colors.BRIGHT_RED, + create_report_cmd, html_output = generate_create_report_cmd( + script_path, + markdown, + template_path, + cmo_patient_id, + small_variants_path, + manifest, + cnv_path, + dmp_patient_id, + dmp_sample_id, + facet_path, + tumor_type, + ) + p3 = run_cmd(create_report_cmd, force) + if "Error" in str(p3) or "error" in str(p3): + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + str(dmp_sample_id), + "NA", + "create_report.R failed", + ] ) - create_report_cmd, html_output = generate_create_report_cmd( - script_path, - markdown, - template_path, - cmo_patient_id, - small_variants_path, - manifest, - cnv_path, - dmp_patient_id, - dmp_sample_id, - facet_path, - tumor_type, ) - p3 = run_cmd(create_report_cmd, force) - if "Error" in str(p3) or "error" in str(p3): - summary.append( - "\t".join( - [ - cmo_patient_id, - dmp_patient_id, - dmp_sample_id, - "NA", - "create_report.R failed", - ] - ) - ) - else: - summary.append( - "\t".join( - [ - cmo_patient_id, - dmp_patient_id, - dmp_sample_id, - "NA", - "create_report.R ran without facet maf", - ] - ) + else: + summary.append( + "\t".join( + [ + cmo_patient_id, + dmp_patient_id, + str(dmp_sample_id), + "NA", + "create_report.R ran without facet maf", + ] ) - typer.secho( - f"Done running create_report.R for patient with CMO ID {cmo_patient_id} and output is written in {html_output}", - fg=typer.colors.BRIGHT_GREEN, ) + typer.secho( + f"Done running create_report.R for patient with CMO ID {cmo_patient_id} and output is written in {html_output}", + fg=typer.colors.BRIGHT_GREEN, + ) print("\nPatient ids that were skipped as facet maf could not be found\n") print(summary) From 2e0f26a7af9b7f81d10cf6c65cd8589ba757a558 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:06:14 -0500 Subject: [PATCH 11/20] Update run_create_report.py --- python/run_create_report/run_create_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run_create_report/run_create_report.py b/python/run_create_report/run_create_report.py index ec77cc8..54379a2 100644 --- a/python/run_create_report/run_create_report.py +++ b/python/run_create_report/run_create_report.py @@ -310,7 +310,7 @@ def main( fg=typer.colors.BRIGHT_GREEN, ) - print("\nPatient ids that were skipped as facet maf could not be found\n") + print("\nSummary for all patient processed..\n") print(summary) summary_file = Path.cwd().joinpath("run_create_report_summary.tsv") with open(summary_file, "w") as fp: From 22c927d46bb25967921c7cd707d310f5b659bbf2 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:08:29 -0500 Subject: [PATCH 12/20] cross check facet review results --- python/run_create_report/modules/generate_facet_maf_path.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 2e2b4eb..9e98404 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -3,6 +3,7 @@ import pandas as pd from pathlib import Path from modules.read_manifest import read_manifest +from rich import print def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): """Get path of maf associated with facet-suite output @@ -122,6 +123,9 @@ def get_best_fit_folder(facet_manifest_path): facet_manifest_all[['date_reviewed', 'time_reviewed']] = facet_manifest_all.date_reviewed.str.split(" ", expand = True) facet_manifest_all['date_reviewed'] = pd.to_datetime(facet_manifest_all['date_reviewed']) facet_manifest = facet_manifest_all.loc[facet_manifest_all.facets_qc] + print("\nFacet Review Manifest\n") + print(facet_manifest) + print("\n--------------------------------") if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) From 33c4de16ebc32398075362171e2a287f00d6919d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:10:41 -0500 Subject: [PATCH 13/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 9e98404..141ced6 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -123,13 +123,13 @@ def get_best_fit_folder(facet_manifest_path): facet_manifest_all[['date_reviewed', 'time_reviewed']] = facet_manifest_all.date_reviewed.str.split(" ", expand = True) facet_manifest_all['date_reviewed'] = pd.to_datetime(facet_manifest_all['date_reviewed']) facet_manifest = facet_manifest_all.loc[facet_manifest_all.facets_qc] - print("\nFacet Review Manifest\n") - print(facet_manifest) - print("\n--------------------------------") if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) facet_manifest.sort_values(by='date_reviewed',ascending=False) + print("\nFacet Review Manifest\n") + print(facet_manifest) + print("\n--------------------------------") folder_name = facet_manifest['fit_name'].iloc[0] return ( (base_path.joinpath(folder_name, "*[0-9].ccf.maf")) From 91a01fbed70d6e4e8b8729a2d09dacf00a3c401d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:12:31 -0500 Subject: [PATCH 14/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 141ced6..d62f5e0 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -126,11 +126,11 @@ def get_best_fit_folder(facet_manifest_path): if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) - facet_manifest.sort_values(by='date_reviewed',ascending=False) + facet_manifest_sort = facet_manifest.sort_values(by='date_reviewed',ascending=False) print("\nFacet Review Manifest\n") - print(facet_manifest) + print(facet_manifest_sort) print("\n--------------------------------") - folder_name = facet_manifest['fit_name'].iloc[0] + folder_name = facet_manifest_sort['fit_name'].iloc[0] return ( (base_path.joinpath(folder_name, "*[0-9].ccf.maf")) if "default" in folder_name or "alt" in folder_name From e8c02e01ebc483b6c35d9d8708a118ffc16b59fe Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:14:04 -0500 Subject: [PATCH 15/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index d62f5e0..6c119c6 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -127,9 +127,6 @@ def get_best_fit_folder(facet_manifest_path): return(base_path.joinpath("default", "*[0-9].ccf.maf") ) facet_manifest_sort = facet_manifest.sort_values(by='date_reviewed',ascending=False) - print("\nFacet Review Manifest\n") - print(facet_manifest_sort) - print("\n--------------------------------") folder_name = facet_manifest_sort['fit_name'].iloc[0] return ( (base_path.joinpath(folder_name, "*[0-9].ccf.maf")) From 1f29a3980fe5edf3618bf8ab9b36002fa6bc8188 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:51:01 -0500 Subject: [PATCH 16/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 6c119c6..d776a3b 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -122,7 +122,8 @@ def get_best_fit_folder(facet_manifest_path): facet_manifest_all = read_manifest(facet_manifest_path) facet_manifest_all[['date_reviewed', 'time_reviewed']] = facet_manifest_all.date_reviewed.str.split(" ", expand = True) facet_manifest_all['date_reviewed'] = pd.to_datetime(facet_manifest_all['date_reviewed']) - facet_manifest = facet_manifest_all.loc[facet_manifest_all.facets_qc] + facet_manifest_true = facet_manifest_all.loc[facet_manifest_all.facets_qc] + facet_manifest = facet_manifest_true.query["review_status == reviewed_best_fit"] if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) From b82d48640584d0e9ebf0b513ad3b66fe7024c033 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:54:32 -0500 Subject: [PATCH 17/20] Update generate_facet_maf_path.py --- .../run_create_report/modules/generate_facet_maf_path.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index d776a3b..b2031ae 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -120,14 +120,20 @@ def get_best_fit_folder(facet_manifest_path): facet_manifest_path = Path(facet_manifest_path) base_path = facet_manifest_path.parent facet_manifest_all = read_manifest(facet_manifest_path) + #split date_reviewed in two columns facet_manifest_all[['date_reviewed', 'time_reviewed']] = facet_manifest_all.date_reviewed.str.split(" ", expand = True) + #convert date_reviewed to date facet_manifest_all['date_reviewed'] = pd.to_datetime(facet_manifest_all['date_reviewed']) + #get facets_qc == TRUE rows facet_manifest_true = facet_manifest_all.loc[facet_manifest_all.facets_qc] - facet_manifest = facet_manifest_true.query["review_status == reviewed_best_fit"] + #get review_status == reviewed_best_fit rows + facet_manifest = facet_manifest_true.query("review_status == reviewed_best_fit") if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) + #sort by date facet_manifest_sort = facet_manifest.sort_values(by='date_reviewed',ascending=False) + #take the first row folder_name = facet_manifest_sort['fit_name'].iloc[0] return ( (base_path.joinpath(folder_name, "*[0-9].ccf.maf")) From 1ce6befba699c7c4d836f8fb3cd11e8de5be170a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 22:58:11 -0500 Subject: [PATCH 18/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index b2031ae..becbfe4 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -127,7 +127,7 @@ def get_best_fit_folder(facet_manifest_path): #get facets_qc == TRUE rows facet_manifest_true = facet_manifest_all.loc[facet_manifest_all.facets_qc] #get review_status == reviewed_best_fit rows - facet_manifest = facet_manifest_true.query("review_status == reviewed_best_fit") + facet_manifest = facet_manifest_true.filter["review_status" == "reviewed_best_fit"] if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) From e5c94865641b7df4963565cbc22215612f7bebc0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 23:00:38 -0500 Subject: [PATCH 19/20] Update generate_facet_maf_path.py --- python/run_create_report/modules/generate_facet_maf_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index becbfe4..34fc4f0 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -127,7 +127,7 @@ def get_best_fit_folder(facet_manifest_path): #get facets_qc == TRUE rows facet_manifest_true = facet_manifest_all.loc[facet_manifest_all.facets_qc] #get review_status == reviewed_best_fit rows - facet_manifest = facet_manifest_true.filter["review_status" == "reviewed_best_fit"] + facet_manifest = facet_manifest_true[facet_manifest_true.review_status.str.contains("reviewed_best_fit") == True] if facet_manifest.empty: return(base_path.joinpath("default", "*[0-9].ccf.maf") ) From ea3f50bbd33100fc137d76ffc1cb56beb19e9a59 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 11 Mar 2023 23:06:38 -0500 Subject: [PATCH 20/20] fix imports --- python/run_create_report/modules/check_required_columns.py | 2 +- .../run_create_report/modules/generate_facet_maf_path.py | 7 ++++--- python/run_create_report/modules/generate_repo_paths.py | 1 - python/run_create_report/modules/get_small_variant_csv.py | 3 ++- python/run_create_report/modules/run_cmd.py | 1 + 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/python/run_create_report/modules/check_required_columns.py b/python/run_create_report/modules/check_required_columns.py index 3233fb7..e50420a 100644 --- a/python/run_create_report/modules/check_required_columns.py +++ b/python/run_create_report/modules/check_required_columns.py @@ -1,5 +1,5 @@ -import typer import pandas as pd +import typer def check_required_columns(manifest, template_days=None): diff --git a/python/run_create_report/modules/generate_facet_maf_path.py b/python/run_create_report/modules/generate_facet_maf_path.py index 34fc4f0..cbe62ab 100644 --- a/python/run_create_report/modules/generate_facet_maf_path.py +++ b/python/run_create_report/modules/generate_facet_maf_path.py @@ -1,9 +1,10 @@ -import typer import glob -import pandas as pd from pathlib import Path + +import pandas as pd +import typer from modules.read_manifest import read_manifest -from rich import print + def generate_facet_maf_path(facet_path, patient_id, sample_id, best_fit): """Get path of maf associated with facet-suite output diff --git a/python/run_create_report/modules/generate_repo_paths.py b/python/run_create_report/modules/generate_repo_paths.py index 82f8ffa..5018cb0 100644 --- a/python/run_create_report/modules/generate_repo_paths.py +++ b/python/run_create_report/modules/generate_repo_paths.py @@ -1,5 +1,4 @@ import typer -from pathlib import Path def generate_repo_path( diff --git a/python/run_create_report/modules/get_small_variant_csv.py b/python/run_create_report/modules/get_small_variant_csv.py index e522e96..e83b9e4 100644 --- a/python/run_create_report/modules/get_small_variant_csv.py +++ b/python/run_create_report/modules/get_small_variant_csv.py @@ -1,6 +1,7 @@ -import typer import glob +import typer + def get_small_variant_csv(patient_id, csv_path): """Get the path to CSV file to be used for a given patient containing all variants diff --git a/python/run_create_report/modules/run_cmd.py b/python/run_create_report/modules/run_cmd.py index 00a0f6e..4a1f90a 100644 --- a/python/run_create_report/modules/run_cmd.py +++ b/python/run_create_report/modules/run_cmd.py @@ -1,4 +1,5 @@ import subprocess + import typer from rich import print