diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e8935bd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +# Use an official Python 3.8 runtime as a parent image +FROM python:3.8-slim-buster + +# Set the working directory in the container to /app +WORKDIR /app + +# Add the current directory contents into the container at /app +ADD . /app + +# Install any needed packages specified in setup.py +RUN pip install . + +# Install bash and bash-completion +RUN apt-get update && apt-get install -y bash bash-completion + +# Make port 80 available to the world outside this container +EXPOSE 80 + +# Define environment variable +ENV NAME MatchEngineV2 +ENV SECRETS_JSON /app/secrets.json + +RUN pip uninstall bson -y +RUN pip uninstall pymongo -y +RUN pip install pymongo==3.8.0 + +# Run app.py when the container launches +CMD tail -f /dev/null diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ff45442 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3' +services: + app: + build: + context: . + dockerfile: Dockerfile + ports: + - "8000:80" + volumes: + - .:/app + environment: + - NAME=MatchEngineV2 diff --git a/matchengine/config/dfci_config.json b/matchengine/config/dfci_config.json index 4251a45..3a2d7c4 100644 --- a/matchengine/config/dfci_config.json +++ b/matchengine/config/dfci_config.json @@ -3,7 +3,7 @@ "trial_identifier": "protocol_no", "match_trial_link_id": "protocol_no", "trial_status_key": { - "key_name": null, + "key_name": "summary", "open_to_accrual_values": ["open to accrual"] }, "ctml_collection_mappings": { @@ -16,6 +16,10 @@ "sample_key": "BIRTH_DATE_INT", "sample_value": "age_range_to_date_int_query" }, + "AGE_EXPRESSION": { + "sample_key": "AGE", + "sample_value": "age_expression_query" + }, "ONCOTREE_PRIMARY_DIAGNOSIS": { "sample_key": "ONCOTREE_PRIMARY_DIAGNOSIS_NAME", "sample_value": "external_file_mapping", @@ -30,13 +34,16 @@ "sample_value": "tmb_range_to_query" }, "HER2_STATUS": { - "ignore": true + "sample_key": "HER2_STATUS", + "sample_value": "true_false_map" }, "PR_STATUS": { - "ignore": true + "sample_key": "PR_STATUS", + "sample_value": "true_false_map" }, "ER_STATUS": { - "ignore": true + "sample_key": "ER_STATUS", + "sample_value": "true_false_map" }, "DISEASE_STATUS": { "ignore": true @@ -114,6 +121,14 @@ "FUSION_PARTNER_HUGO_SYMBOL": { "sample_key": "FUSION_PARTNER_HUGO_SYMBOL", "sample_value": "nomap" + }, + "MOLECULAR_FUNCTION": { + "sample_key": "MUTATION_EFFECT", + "sample_value": "molecular_function_map" + }, + "MATCH_ALL": { + "sample_key": "MATCH_ALL", + "sample_value": "genomic_dummy_map" } } }, @@ -157,7 +172,9 @@ "UVA_STATUS", "LEFT_PARTNER_GENE", "RIGHT_PARTNER_GENE", - "STRUCTURAL_VARIANT_TYPE" + "STRUCTURAL_VARIANT_TYPE", + "MOLECULAR_FUNCTION", + "MUTATION_EFFECT" ], "prior_treatments": [ "DRUG" @@ -169,13 +186,20 @@ "MRN", "ONCOTREE_PRIMARY_DIAGNOSIS_NAME", "TUMOR_MUTATIONAL_BURDEN_PER_MEGABASE", - "VITAL_STATUS" + "PATIENT_ID", + "VITAL_STATUS", + "AGE", + "HER2_STATUS", + "PR_STATUS", + "ER_STATUS", + "STUDY_ID" ], "trial": [ "protocol_no", "nct_id", "treatment_list", "status", + "short_title", "_summary" ] }, @@ -275,7 +299,8 @@ "UVA_STATUS", "LEFT_PARTNER_GENE", "RIGHT_PARTNER_GENE", - "TRUE_HUGO_SYMBOL" + "TRUE_HUGO_SYMBOL", + "MOLECULAR_FUNCTION" ], "clinical": [ "GENDER", @@ -284,7 +309,12 @@ "ONCOTREE_PRIMARY_DIAGNOSIS_NAME", "TUMOR_MUTATIONAL_BURDEN_PER_MEGABASE", "VITAL_STATUS", - "BIRTH_DATE_INT" + "BIRTH_DATE_INT", + "AGE", + "HER2_STATUS", + "PR_STATUS", + "ER_STATUS", + "STUDY_ID" ], "trial_match": [ "hash", diff --git a/matchengine/internals/engine.py b/matchengine/internals/engine.py index c416b5d..8da600d 100644 --- a/matchengine/internals/engine.py +++ b/matchengine/internals/engine.py @@ -143,6 +143,7 @@ def __init__( db_secrets_class: str = None, report_all_clinical_reasons: bool = False, ignore_run_log: bool = False, + ignore_report_date: bool = False, skip_run_log_entry: bool = False, trial_match_collection: str = "trial_match", drop: bool = False, @@ -161,6 +162,7 @@ def __init__( self.run_id = uuid.uuid4() self.run_log_entries = dict() self.ignore_run_log = ignore_run_log + self.ignore_report_date = ignore_report_date self.skip_run_log_entry = skip_run_log_entry self.clinical_run_log_entries = dict() self._protocol_nos_param = list(protocol_nos) if protocol_nos is not None else protocol_nos @@ -514,7 +516,7 @@ async def _async_get_matches_for_trial(self, protocol_no: str) -> Dict[str, List # check if node has any age criteria, to know to check for newly qualifying patients # or patients aging out for k, v in criteria.get('clinical', dict()).items(): - if k.lower() == 'age_numerical': + if k.lower() == 'age_numerical' or k.lower() == 'age_expression': age_criteria.add(v) if self.debug: log.info(f"Query: {query}") @@ -565,7 +567,7 @@ def _get_clinical_data(self): query: Dict = {} if self.sample_ids is not None: query.update({"SAMPLE_ID": {"$in": list(self.sample_ids)}}) - projection = {'_id': 1, 'SAMPLE_ID': 1, 'VITAL_STATUS': 1, 'BIRTH_DATE_INT': 1} + projection = {'_id': 1, 'SAMPLE_ID': 1, 'VITAL_STATUS': 1, 'BIRTH_DATE_INT': 1, 'AGE': 1} if not self.ignore_run_log: projection.update({'_updated': 1, 'run_history': 1}) projection.update({ @@ -598,11 +600,13 @@ def get_clinical_deceased(self) -> Set[ClinicalID]: in self._clinical_data.items() if clinical_data['VITAL_STATUS'] == 'deceased'} + # use the BIRTH_DATE_INT field, otherwise return the age field def get_clinical_birth_dates(self) -> Dict[ClinicalID, int]: - return {clinical_id: clinical_data['BIRTH_DATE_INT'] - for clinical_id, clinical_data - in self._clinical_data.items() - } + for clinical_id, clinical_data in self._clinical_data.items(): + if 'BIRTH_DATE_INT' in clinical_data: + return {clinical_id: clinical_data['BIRTH_DATE_INT']} + else: + return {clinical_id: clinical_data['AGE']} def get_clinical_ids_from_sample_ids(self) -> Dict[ClinicalID, str]: """ @@ -613,7 +617,8 @@ def get_clinical_ids_from_sample_ids(self) -> Dict[ClinicalID, str]: self._clinical_data.items()} else: return {clinical_id: clinical_data['SAMPLE_ID'] for clinical_id, clinical_data in - self._clinical_data.items() if clinical_data['VITAL_STATUS'] == 'alive'} + self._clinical_data.items() if (clinical_data['VITAL_STATUS'] is not None and clinical_data['VITAL_STATUS'].lower() == 'alive')} + def get_trials(self) -> Dict[str, Trial]: """ diff --git a/matchengine/internals/load.py b/matchengine/internals/load.py index eb14f27..eb5d7d6 100644 --- a/matchengine/internals/load.py +++ b/matchengine/internals/load.py @@ -5,6 +5,7 @@ import os from argparse import Namespace from contextlib import ExitStack +from typing import List import yaml from bson import json_util @@ -14,6 +15,12 @@ logging.basicConfig(level=logging.INFO) log = logging.getLogger('matchengine') +def load_from_variable(data, data_format='json'): + with ExitStack() as stack: + db_rw = stack.enter_context(MongoDBConnection(read_only=False, db="matchminer", async_init=False)) + log.info('Adding trial(s) to mongo...') + if data_format == 'json': + load_from_memory(db_rw, data) def load(args: Namespace): """ @@ -56,16 +63,32 @@ def load(args: Namespace): def load_trials(db_rw, args: Namespace): if args.trial_format == 'json': load_trials_json(args, db_rw) - elif args.trial_format == 'yaml': + elif args.trial_format == 'yml': load_trials_yaml(args, db_rw) def load_trials_yaml(args: Namespace, db_rw): if os.path.isdir(args.trial): - load_dir(args, db_rw, "yaml", args.trial, 'trial') + load_dir(args, db_rw, "yml", args.trial, 'trial') else: - load_file(db_rw, 'yaml', args.trial, 'trial') - + load_file(db_rw, 'yml', args.trial, 'trial') + +def load_from_memory(db_rw, json_list: List[dict]): + for data in json_list: + if is_valid_single_json_dict(data): + for key in list(data.keys()): + if key == 'BIRTH_DATE': + data[key] = convert_birthdate(data[key]) + data['BIRTH_DATE_INT'] = int(data[key].strftime('%Y%m%d')) + if key == 'AGE': + data[key] = int(data[key]) + db_rw.trial.insert_one(data) + +def is_valid_single_json_dict(json_dict: dict): + """Check if a JSON file is a single object or an array of JSON objects""" + if json_dict.__class__ is list: + return False + return True def load_trials_json(args: Namespace, db_rw): # load a directory of json files @@ -108,6 +131,21 @@ def load_trials_json(args: Namespace, db_rw): ######################## # patient data loading ######################## + +def load_clinical_via_api(file_path: str): + with ExitStack() as stack: + db_rw = stack.enter_context(MongoDBConnection(read_only=False, db="matchminer", async_init=False)) + load_file(db_rw, 'csv', file_path, 'clinical') + +def load_genomic_via_api(file_path: str): + with ExitStack() as stack: + db_rw = stack.enter_context(MongoDBConnection(read_only=False, db="matchminer", async_init=False)) + db_ro = stack.enter_context(MongoDBConnection(read_only=True, db="matchminer", async_init=False)) + if len(list(db_ro.clinical.find({}))) == 0: + raise RuntimeError("No clinical documents in db. Please load clinical documents before loading genomic.") + load_file(db_rw, 'csv', file_path, 'genomic') + map_clinical_to_genomic(db_rw, db_ro) + def load_clinical(db_rw, args: Namespace): if args.patient_format == 'json': @@ -135,7 +173,7 @@ def load_genomic(db_rw, db_ro, args: Namespace, ): def map_clinical_to_genomic(db_rw, db_ro): - """Ensure that all genomic docs are linked to their corresponding clinical docs by _id""" + """Ensure that all genomic docs are linked to their corresponding clinical docs by _id""" clinical_docs = list(db_ro.clinical.find({}, {"_id": 1, "SAMPLE_ID": 1})) clinical_dict = dict(zip([i['SAMPLE_ID'] for i in clinical_docs], [i['_id'] for i in clinical_docs])) @@ -169,10 +207,12 @@ def load_file(db_rw, filetype: str, path: str, collection: str): if key == 'BIRTH_DATE': row[key] = convert_birthdate(row[key]) row['BIRTH_DATE_INT'] = int(row[key].strftime('%Y%m%d')) + if key == 'AGE': + row[key] = int(row[key]) db_rw[collection].insert_one(row) else: raw_file_data = file_handle.read() - if filetype == 'yaml': + if filetype == 'yml': data = yaml.safe_load_all(raw_file_data) db_rw[collection].insert_many(data) elif filetype == 'json': @@ -182,6 +222,8 @@ def load_file(db_rw, filetype: str, path: str, collection: str): if key == 'BIRTH_DATE': data[key] = convert_birthdate(data[key]) data['BIRTH_DATE_INT'] = int(data[key].strftime('%Y%m%d')) + if key == 'AGE': + data[key] = int(data[key]) db_rw[collection].insert_one(data) diff --git a/matchengine/internals/query_transform.py b/matchengine/internals/query_transform.py index 91fd354..87e2dde 100644 --- a/matchengine/internals/query_transform.py +++ b/matchengine/internals/query_transform.py @@ -82,6 +82,26 @@ def age_range_to_date_int_query(self, **kwargs): query_date = current_date + (- relativedelta(years=years, months=months)) return QueryTransformerResult({sample_key: {operator_map[operator]: int(query_date.strftime('%Y%m%d'))}}, False) + # straight comparison of a year value to the age field + def age_expression_query(self, **kwargs): + sample_key = kwargs['sample_key'] + trial_value = kwargs['trial_value'] + operator_map = { + "==": "$eq", + "<=": "$lte", + ">=": "$gte", + ">": "$gt", + "<": "$lt" + } + # funky logic is because 1 month curation is curated as "0.083" (1/12 a year) + operator = ''.join([i for i in trial_value if not i.isdigit() and i != '.']) + numeric = "".join([i for i in trial_value if i.isdigit() or i == '.']) + if numeric.startswith('.'): + numeric = '0' + numeric + split_time = numeric.split('.') + years = int(split_time[0] if split_time[0].isdigit() else 0) + return QueryTransformerResult({sample_key: {operator_map[operator]: years}}, False) + def nomap(self, **kwargs): trial_path = kwargs['trial_path'] trial_key = kwargs['trial_key'] diff --git a/matchengine/internals/utilities/query.py b/matchengine/internals/utilities/query.py index d5edcc4..54e6a65 100644 --- a/matchengine/internals/utilities/query.py +++ b/matchengine/internals/utilities/query.py @@ -3,6 +3,7 @@ import asyncio import logging import operator +import re from collections import defaultdict from functools import reduce from typing import TYPE_CHECKING, Dict @@ -72,7 +73,30 @@ async def execute_clinical_queries(matchengine: MatchEngine, matchengine.cache.in_process.setdefault(query_hash, set()).update(need_new) if need_new: - new_query = {'$and': [{join_field: {'$in': list(need_new)}}, query_part.query]} + # recompile the query to be case insensitive + # convert the $in into a list of $or conditions so we can use $regex inside a $in + # mongo has a limitation that cannot use $regex within a $in + # using regex + if "ONCOTREE_PRIMARY_DIAGNOSIS_NAME" in query_part.query: + if "$in" in query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']: + new_conditions = [ + {'ONCOTREE_PRIMARY_DIAGNOSIS_NAME': {'$regex': f'^{old_query}$', '$options': 'i'}} for + old_query in query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']['$in']] + del query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] # Remove old query from query_part + query_part.query['$or'] = new_conditions # Add new conditions to query_part + else: + org_query = query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']; + ignore_case_query = {'$regex': f'^{org_query}$', '$options': 'i'} + query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = ignore_case_query + + # Exclude documents where 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' is 'NA' + new_query = { + '$and': [ + {join_field: {'$in': list(need_new)}}, + query_part.query, + {'ONCOTREE_PRIMARY_DIAGNOSIS_NAME': {'$ne': 'NA'}} + ] + } if matchengine.debug: log.info(f"{query_part.query}") projection = {id_field: 1, join_field: 1} @@ -167,6 +191,7 @@ async def execute_extended_queries( projection = {id_field: 1, join_field: 1} genomic_docs = await matchengine.async_db_ro[collection].find(new_query, projection).to_list(None) if matchengine.debug: + # this prints the genomic query + clinical IDs that were queried and results log.info(f"{new_query} returned {genomic_docs}") for genomic_doc in genomic_docs: @@ -264,19 +289,41 @@ async def get_docs_results(matchengine: MatchEngine, needed_clinical, needed_ext return results +# def get_valid_reasons(matchengine: MatchEngine, possible_reasons, clinical_ids, genomic_ids): +# valid_reasons = {} +# for clinical_id, reasons in possible_reasons.items(): +# if clinical_id in clinical_ids: +# list_o_reasons = list() +# for reason in reasons: +# if ((reason.__class__ is ExtendedMatchReason +# and (reason.query_node.exclusion or reason.reference_id in genomic_ids[ +# reason.query_node.query_level])) +# or (reason.__class__ is ClinicalMatchReason +# and (matchengine.report_all_clinical_reasons +# or frozenset(reason.query_part.query.keys()) +# in c))): +# list_o_reasons.append(reason) +# valid_reasons[clinical_id] = list_o_reasons +# +# return valid_reasons + def get_valid_reasons(matchengine: MatchEngine, possible_reasons, clinical_ids, genomic_ids): valid_reasons = {} for clinical_id, reasons in possible_reasons.items(): if clinical_id in clinical_ids: list_o_reasons = list() for reason in reasons: - if ((reason.__class__ is ExtendedMatchReason - and (reason.query_node.exclusion or reason.reference_id in genomic_ids[ - reason.query_node.query_level])) - or (reason.__class__ is ClinicalMatchReason - and (matchengine.report_all_clinical_reasons - or frozenset(reason.query_part.query.keys()) - in matchengine.match_criteria_transform.valid_clinical_reasons))): + should_add_reason = False + if reason.__class__ is ExtendedMatchReason: + if (reason.query_node.exclusion or reason.reference_id in + genomic_ids[reason.query_node.query_level]): + should_add_reason = True + elif reason.__class__ is ClinicalMatchReason: + keys = frozenset(reason.query_part.query.keys()) + if matchengine.report_all_clinical_reasons or \ + keys.issubset(matchengine.match_criteria_transform.valid_clinical_reasons): + should_add_reason = True + if should_add_reason: list_o_reasons.append(reason) valid_reasons[clinical_id] = list_o_reasons diff --git a/matchengine/internals/utilities/utilities.py b/matchengine/internals/utilities/utilities.py index 75ed793..4d76e2d 100644 --- a/matchengine/internals/utilities/utilities.py +++ b/matchengine/internals/utilities/utilities.py @@ -176,10 +176,10 @@ def get_sort_order(matchengine: MatchEngine, match_document: Dict) -> list: sort_array.append(sort_index) # If an idenfitifer is not a protocol id (e.g. 17-251) then skip replacing - identifier = match_document.get(matchengine.match_criteria_transform.trial_identifier, None) - if isinstance(identifier, ObjectId) or identifier is None: - pass - else: - sort_array.append(int(identifier.replace("-", ""))) + # identifier = match_document.get(matchengine.match_criteria_transform.trial_identifier, None) + # if isinstance(identifier, ObjectId) or identifier is None: + # pass + # else: + # sort_array.append(int(identifier.replace("-", ""))) return sort_array diff --git a/matchengine/main.py b/matchengine/main.py index 45812ec..97fbe33 100644 --- a/matchengine/main.py +++ b/matchengine/main.py @@ -26,6 +26,7 @@ def main(run_args): db_secrets_class=run_args.db_secrets_class, report_all_clinical_reasons=run_args.report_all_clinical_reasons, ignore_run_log=run_args.ignore_run_log, + ignore_report_date=run_args.ignore_report_date, skip_run_log_entry=run_args.skip_run_log_entry, trial_match_collection=run_args.trial_match_collection, drop=run_args.drop or run_args.drop_and_exit, @@ -39,7 +40,8 @@ def main(run_args): me.update_all_matches() if run_args.csv_output: - me.create_output_csv() + from matchengine.internals.utilities.output import create_output_csv + create_output_csv(me) if __name__ == "__main__": @@ -66,12 +68,13 @@ def main(run_args): db_name_help = ("Specify a custom db name to load trials and/or patient data into. If no value is passed, " "db name will be take from SECRETS_JSON file.") run_log_help = "Ignore the run log and run on all specified sample IDs/protocol nos" + ignore_report_date_help = "Ignore the report date when matching structural variations" base_dir = os.path.dirname(__file__) subp = parser.add_subparsers(help='sub-command help') subp_p = subp.add_parser('load', help='Sets up your MongoDB for matching.') subp_p.add_argument('-t', dest='trial', default=None, help=param_trials_help) subp_p.add_argument('-c', dest='clinical', default=None, help=param_clinical_help) - subp_p.add_argument('-g', dest='extended_attributes', default=None, help=param_genomic_help) + subp_p.add_argument('-g', dest='genomic', default=None, help=param_genomic_help) subp_p.add_argument('--trial-format', dest='trial_format', default='json', action='store', choices=['yml', 'json'], help=param_trial_format_help) subp_p.add_argument('--patient-format', dest='patient_format', default='json', action='store', @@ -87,6 +90,8 @@ def main(run_args): subp_p.add_argument("--match-on-closed", dest="match_on_closed", action="store_true", default=False, help=closed_help) subp_p.add_argument("--force", dest="ignore_run_log", action="store_true", default=False, help=run_log_help) + subp_p.add_argument("--ignore-report-date", dest="ignore_report_date", action="store_true", default=False, + help=ignore_report_date_help) subp_p.add_argument("--skip-run-log-entry", dest="skip_run_log_entry", action="store_true", diff --git a/matchengine/plugins/DFCIQueryNodeTransformer.py b/matchengine/plugins/DFCIQueryNodeTransformer.py index f39c054..e46f2cb 100644 --- a/matchengine/plugins/DFCIQueryNodeTransformer.py +++ b/matchengine/plugins/DFCIQueryNodeTransformer.py @@ -63,7 +63,7 @@ def extended_query_node_clinical_ids_subsetter(self: MatchEngine, clinical_ids: Iterable[ClinicalID]) -> Tuple[bool, Set[ClinicalID]]: # DFCI provided structural variant data in a structured format only starting Dec. 1st 2018 # Patients with reports from before this date should not have structural variants shown in UI - if query_node.get_query_part_by_key('STRUCTURED_SV') is not None: + if query_node.get_query_part_by_key('STRUCTURED_SV') is not None and self.ignore_report_date is False: return True, { clinical_id for clinical_id @@ -75,7 +75,8 @@ def extended_query_node_clinical_ids_subsetter(self: MatchEngine, datetime.datetime(1900, 1, 1, 1, 1, 1, 1) ) >= datetime.datetime(2018, 12, 1, 0, 0, 0, 0) } - elif query_node.get_query_part_by_key('STRUCTURAL_VARIANT_COMMENT') is not None: + elif query_node.get_query_part_by_key('STRUCTURAL_VARIANT_COMMENT') is not None \ + and self.ignore_report_date is False: return True if query_node.exclusion else False, { clinical_id for clinical_id diff --git a/matchengine/plugins/DFCIQueryTransformers.py b/matchengine/plugins/DFCIQueryTransformers.py index a50473f..9e30b67 100644 --- a/matchengine/plugins/DFCIQueryTransformers.py +++ b/matchengine/plugins/DFCIQueryTransformers.py @@ -121,5 +121,33 @@ def mmr_ms_map(self, **kwargs): sample_value = mmr_map[trial_value] return QueryTransformerResult({sample_key: sample_value}, negate) + def molecular_function_map(self, **kwargs): + molecular_function_map = { + 'Activating': {"$in": ["Gain-of-function", "Likely Gain-of-function"]}, + 'Inactivating': {"$in": ["Loss-of-function", "Likely Loss-of-function"]} + } + trial_value = kwargs['trial_value'] + trial_value, negate = self.transform.is_negate(trial_value) + sample_key = kwargs['sample_key'] + sample_value = molecular_function_map[trial_value] + ret = QueryTransformerResult({sample_key: sample_value}, negate) + return ret; + + # return all matches for genomic query + def genomic_dummy_map(self, **kwargs): + trial_value = kwargs['trial_value'] + sample_key = kwargs['sample_key'] + return QueryTransformerResult({'CLINICAL_ID': {'$ne': ''}}, False); + + # map 'true' 'false' to 'positive' 'negative', other values such as 'NA', 'Unknown' and 'equivocal' are not mapped + def true_false_map(self, **kwargs): + trial_value = kwargs['trial_value'] + sample_key = kwargs['sample_key'] + if trial_value.upper() == 'TRUE': + return QueryTransformerResult({sample_key: 'Positive'}, False) + elif trial_value.upper() == 'FALSE': + return QueryTransformerResult({sample_key: 'Negative'}, False) + else: + return QueryTransformerResult({sample_key: trial_value}, False) __export__ = ["DFCIQueryTransformers"] diff --git a/matchengine/plugins/DFCITrialMatchDocumentCreator.py b/matchengine/plugins/DFCITrialMatchDocumentCreator.py index 7c04f0c..78f4a87 100644 --- a/matchengine/plugins/DFCITrialMatchDocumentCreator.py +++ b/matchengine/plugins/DFCITrialMatchDocumentCreator.py @@ -26,7 +26,7 @@ def get_genomic_details(genomic_doc: Dict, trial_match: TrialMatch): is_variant = 'gene' # add wildtype calls - if wildtype: + if wildtype and wildtype.lower == 'true': alteration.append('wt ') # add gene @@ -34,7 +34,7 @@ def get_genomic_details(genomic_doc: Dict, trial_match: TrialMatch): alteration.append(hugo_symbol) # add mutation - if true_protein is not None: + if true_protein is not None and true_protein: alteration.append(f' {true_protein}') is_variant = ('variant' if {'protein_change', 'wildcard_protein_change'}.intersection( @@ -235,6 +235,9 @@ def create_trial_matches(self, trial_match: TrialMatch, new_trial_match: Dict) - query = trial_match.match_reason.extract_raw_query() clinical_doc = self.cache.docs[trial_match.match_reason.clinical_id] new_trial_match.update({'cancer_type_match': get_cancer_type_match(trial_match)}) + # Add in additional fields we need for frontend + if ('arm_description' in trial_match.match_clause_data.match_clause_additional_attributes): + new_trial_match.update({'arm_description': trial_match.match_clause_data.match_clause_additional_attributes['arm_description']}) if trial_match.match_reason.reason_name == 'genomic': genomic_doc = self.cache.docs.setdefault(trial_match.match_reason.reference_id, None) diff --git a/matchengine/ref/oncotree_mapping.json b/matchengine/ref/oncotree_mapping.json index 162076d..ece3e7b 100644 --- a/matchengine/ref/oncotree_mapping.json +++ b/matchengine/ref/oncotree_mapping.json @@ -110,6 +110,9 @@ "Anaplastic Thyroid Cancer": [ "Anaplastic Thyroid Cancer" ], + "Adenocarcinoma In Situ": [ + "Adenocarcinoma In Situ" + ], "Angiocentric Glioma": [ "Angiocentric Glioma" ], @@ -297,7 +300,8 @@ "Bowel", "Signet Ring Cell Type of the Appendix", "High-Grade Neuroendocrine Carcinoma of the Colon and Rectum", - "Appendiceal Adenocarcinoma" + "Appendiceal Adenocarcinoma", + "Colon Adenocarcinoma In Situ" ], "Breast": [ "Breast Invasive Lobular Carcinoma", @@ -315,6 +319,7 @@ "Invasive Breast Carcinoma", "Phyllodes Tumor of the Breast", "Breast Invasive Carcinosarcoma, NOS", + "Breast Invasive Carcinoma, NOS", "Breast Angiosarcoma", "Benign Phyllodes Tumor of the Breast", "Adenomyoepithelioma of the Breast", @@ -348,6 +353,9 @@ "Breast Invasive Carcinosarcoma, NOS": [ "Breast Invasive Carcinosarcoma, NOS" ], + "Breast Invasive Carcinoma, NOS": [ + "Breast Invasive Carcinoma, NOS" + ], "Breast Invasive Ductal Carcinoma": [ "Breast Invasive Ductal Carcinoma" ], @@ -678,6 +686,9 @@ "Colon Adenocarcinoma": [ "Colon Adenocarcinoma" ], + "Colon Adenocarcinoma In Situ": [ + "Colon Adenocarcinoma In Situ" + ], "Colonic Type Adenocarcinoma of the Appendix": [ "Colonic Type Adenocarcinoma of the Appendix" ], @@ -686,7 +697,8 @@ "Colorectal Adenocarcinoma", "Colon Adenocarcinoma", "Mucinous Adenocarcinoma of the Colon and Rectum", - "Rectal Adenocarcinoma" + "Rectal Adenocarcinoma", + "Colon Adenocarcinoma In Situ" ], "Combined Small Cell Lung Carcinoma": [ "Combined Small Cell Lung Carcinoma" @@ -1270,7 +1282,8 @@ "Breast Invasive Ductal Carcinoma", "Invasive Breast Carcinoma", "Breast Invasive Carcinosarcoma, NOS", - "Breast Invasive Mixed Mucinous Carcinoma" + "Breast Invasive Mixed Mucinous Carcinoma", + "Breast Invasive Carcinoma, NOS" ], "Invasive Hydatidiform Mole": [ "Invasive Hydatidiform Mole" @@ -1405,11 +1418,16 @@ "Small Cell Lung Cancer", "Inflammatory Myofibroblastic Lung Tumor", "Atypical Lung Carcinoid", - "Large Cell Neuroendocrine Carcinoma" + "Large Cell Neuroendocrine Carcinoma", + "Lung Adenocarcinoma In Situ", + "Pleomorphic Carcinoma of the Lung" ], "Lung Adenocarcinoma": [ "Lung Adenocarcinoma" ], + "Lung Adenocarcinoma In Situ": [ + "Lung Adenocarcinoma In Situ" + ], "Lung Adenosquamous Carcinoma": [ "Lung Adenosquamous Carcinoma" ], @@ -1807,7 +1825,8 @@ "Large Cell Lung Carcinoma With Rhabdoid Phenotype", "Large Cell Lung Carcinoma", "Poorly Differentiated Non-Small Cell Lung Cancer", - "Salivary Gland-Type Tumor of the Lung" + "Salivary Gland-Type Tumor of the Lung", + "Pleomorphic Carcinoma of the Lung" ], "Ocular Melanoma": [ "Uveal Melanoma", @@ -1864,7 +1883,8 @@ "Neuroendocrine Carcinoma, NOS", "Other", "Acinar Cell Carcinoma, NOS", - "Mixed Cancer Types" + "Mixed Cancer Types", + "Adenocarcinoma In Situ" ], "Other Uterine Tumor": [ "Other Uterine Tumor" @@ -2109,6 +2129,9 @@ "Plasmacytoid/Signet Ring Cell Bladder Carcinoma": [ "Plasmacytoid/Signet Ring Cell Bladder Carcinoma" ], + "Pleomorphic Carcinoma of the Lung": [ + "Pleomorphic Carcinoma of the Lung" + ], "Pleomorphic Liposarcoma": [ "Pleomorphic Liposarcoma" ], @@ -3529,6 +3552,11 @@ "Papillary Glioneuronal Tumor", "Ewing Sarcoma", "Renal Angiomyolipoma", - "Large Cell Neuroendocrine Carcinoma" + "Large Cell Neuroendocrine Carcinoma", + "Breast Invasive Carcinoma, NOS", + "Pleomorphic Carcinoma of the Lung", + "Lung Adenocarcinoma In Situ", + "Colon Adenocarcinoma In Situ", + "Adenocarcinoma In Situ" ] -} \ No newline at end of file +} diff --git a/matchengine/tests/config.json b/matchengine/tests/config.json index 737de92..07de8a2 100644 --- a/matchengine/tests/config.json +++ b/matchengine/tests/config.json @@ -57,7 +57,7 @@ }, "VARIANT_CLASSIFICATION": { "sample_key": "TRUE_VARIANT_CLASSIFICATION", - "sample_value": "nomap" + "sample_value": "nomap" }, "VARIANT_CATEGORY": { "sample_key": "VARIANT_CATEGORY", @@ -274,7 +274,8 @@ "ONCOTREE_PRIMARY_DIAGNOSIS_NAME", "TUMOR_MUTATIONAL_BURDEN_PER_MEGABASE", "VITAL_STATUS", - "BIRTH_DATE_INT" + "BIRTH_DATE_INT", + "AGE" ], "trial_match": [ "hash", diff --git a/secrets.json b/secrets.json new file mode 100644 index 0000000..40e8de0 --- /dev/null +++ b/secrets.json @@ -0,0 +1,8 @@ +{ + "MONGO_HOST": "localhost", + "MONGO_PORT": 27017, + "MONGO_DBNAME": "matchminer", + "MONGO_USERNAME": "", + "MONGO_PASSWORD": "", + "MONGO_URI": "mongodb://localhost:27017/matchminer" +} \ No newline at end of file