-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DRAFT] Pugh lab main [just to compare] #17
base: master
Are you sure you want to change the base?
Changes from all commits
1007557
64bebc5
d121066
763737f
718627d
52f57c3
9e62654
4994641
ec98770
fa8c8e8
a376538
526e169
2d5d5af
a881e01
fd14bea
e6e7611
ece0b8e
042df3a
cb3f772
a19aae9
b616ade
99f5f19
11a84d3
887cded
a2141f6
510d1b3
7ded6f2
860240a
71ea8dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Use an official Python 3.8 runtime as a parent image | ||
FROM python:3.8-slim-buster | ||
|
||
# Set the working directory in the container to /app | ||
WORKDIR /app | ||
|
||
# Add the current directory contents into the container at /app | ||
ADD . /app | ||
|
||
# Install any needed packages specified in setup.py | ||
RUN pip install . | ||
|
||
# Install bash and bash-completion | ||
RUN apt-get update && apt-get install -y bash bash-completion | ||
|
||
# Make port 80 available to the world outside this container | ||
EXPOSE 80 | ||
|
||
# Define environment variable | ||
ENV NAME MatchEngineV2 | ||
ENV SECRETS_JSON /app/secrets.json | ||
|
||
RUN pip uninstall bson -y | ||
RUN pip uninstall pymongo -y | ||
RUN pip install pymongo==3.8.0 | ||
|
||
# Run app.py when the container launches | ||
CMD tail -f /dev/null |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
version: '3' | ||
services: | ||
app: | ||
build: | ||
context: . | ||
dockerfile: Dockerfile | ||
ports: | ||
- "8000:80" | ||
volumes: | ||
- .:/app | ||
environment: | ||
- NAME=MatchEngineV2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -143,6 +143,7 @@ def __init__( | |
db_secrets_class: str = None, | ||
report_all_clinical_reasons: bool = False, | ||
ignore_run_log: bool = False, | ||
ignore_report_date: bool = False, | ||
skip_run_log_entry: bool = False, | ||
trial_match_collection: str = "trial_match", | ||
drop: bool = False, | ||
|
@@ -161,6 +162,7 @@ def __init__( | |
self.run_id = uuid.uuid4() | ||
self.run_log_entries = dict() | ||
self.ignore_run_log = ignore_run_log | ||
self.ignore_report_date = ignore_report_date | ||
self.skip_run_log_entry = skip_run_log_entry | ||
self.clinical_run_log_entries = dict() | ||
self._protocol_nos_param = list(protocol_nos) if protocol_nos is not None else protocol_nos | ||
|
@@ -514,7 +516,7 @@ async def _async_get_matches_for_trial(self, protocol_no: str) -> Dict[str, List | |
# check if node has any age criteria, to know to check for newly qualifying patients | ||
# or patients aging out | ||
for k, v in criteria.get('clinical', dict()).items(): | ||
if k.lower() == 'age_numerical': | ||
if k.lower() == 'age_numerical' or k.lower() == 'age_expression': | ||
age_criteria.add(v) | ||
if self.debug: | ||
log.info(f"Query: {query}") | ||
|
@@ -565,7 +567,7 @@ def _get_clinical_data(self): | |
query: Dict = {} | ||
if self.sample_ids is not None: | ||
query.update({"SAMPLE_ID": {"$in": list(self.sample_ids)}}) | ||
projection = {'_id': 1, 'SAMPLE_ID': 1, 'VITAL_STATUS': 1, 'BIRTH_DATE_INT': 1} | ||
projection = {'_id': 1, 'SAMPLE_ID': 1, 'VITAL_STATUS': 1, 'BIRTH_DATE_INT': 1, 'AGE': 1} | ||
if not self.ignore_run_log: | ||
projection.update({'_updated': 1, 'run_history': 1}) | ||
projection.update({ | ||
|
@@ -598,11 +600,13 @@ def get_clinical_deceased(self) -> Set[ClinicalID]: | |
in self._clinical_data.items() | ||
if clinical_data['VITAL_STATUS'] == 'deceased'} | ||
|
||
# use the BIRTH_DATE_INT field, otherwise return the age field | ||
def get_clinical_birth_dates(self) -> Dict[ClinicalID, int]: | ||
return {clinical_id: clinical_data['BIRTH_DATE_INT'] | ||
for clinical_id, clinical_data | ||
in self._clinical_data.items() | ||
} | ||
for clinical_id, clinical_data in self._clinical_data.items(): | ||
if 'BIRTH_DATE_INT' in clinical_data: | ||
return {clinical_id: clinical_data['BIRTH_DATE_INT']} | ||
else: | ||
return {clinical_id: clinical_data['AGE']} | ||
|
||
def get_clinical_ids_from_sample_ids(self) -> Dict[ClinicalID, str]: | ||
""" | ||
|
@@ -613,7 +617,8 @@ def get_clinical_ids_from_sample_ids(self) -> Dict[ClinicalID, str]: | |
self._clinical_data.items()} | ||
else: | ||
return {clinical_id: clinical_data['SAMPLE_ID'] for clinical_id, clinical_data in | ||
self._clinical_data.items() if clinical_data['VITAL_STATUS'] == 'alive'} | ||
self._clinical_data.items() if (clinical_data['VITAL_STATUS'] is not None and clinical_data['VITAL_STATUS'].lower() == 'alive')} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch, we should have that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new ME version moves this logic around a bit. Now it ignores users with VITAL_STATUS "deceased," rather than including only users with VITAL_STATUS "alive" (i.e. everyone is alive by default). |
||
|
||
|
||
def get_trials(self) -> Dict[str, Trial]: | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
import os | ||
from argparse import Namespace | ||
from contextlib import ExitStack | ||
from typing import List | ||
|
||
import yaml | ||
from bson import json_util | ||
|
@@ -14,6 +15,12 @@ | |
logging.basicConfig(level=logging.INFO) | ||
log = logging.getLogger('matchengine') | ||
|
||
def load_from_variable(data, data_format='json'): | ||
with ExitStack() as stack: | ||
db_rw = stack.enter_context(MongoDBConnection(read_only=False, db="matchminer", async_init=False)) | ||
log.info('Adding trial(s) to mongo...') | ||
if data_format == 'json': | ||
load_from_memory(db_rw, data) | ||
|
||
def load(args: Namespace): | ||
""" | ||
|
@@ -56,16 +63,32 @@ def load(args: Namespace): | |
def load_trials(db_rw, args: Namespace): | ||
if args.trial_format == 'json': | ||
load_trials_json(args, db_rw) | ||
elif args.trial_format == 'yaml': | ||
elif args.trial_format == 'yml': | ||
load_trials_yaml(args, db_rw) | ||
|
||
|
||
def load_trials_yaml(args: Namespace, db_rw): | ||
if os.path.isdir(args.trial): | ||
load_dir(args, db_rw, "yaml", args.trial, 'trial') | ||
load_dir(args, db_rw, "yml", args.trial, 'trial') | ||
else: | ||
load_file(db_rw, 'yaml', args.trial, 'trial') | ||
|
||
load_file(db_rw, 'yml', args.trial, 'trial') | ||
|
||
def load_from_memory(db_rw, json_list: List[dict]): | ||
for data in json_list: | ||
if is_valid_single_json_dict(data): | ||
for key in list(data.keys()): | ||
if key == 'BIRTH_DATE': | ||
data[key] = convert_birthdate(data[key]) | ||
data['BIRTH_DATE_INT'] = int(data[key].strftime('%Y%m%d')) | ||
if key == 'AGE': | ||
data[key] = int(data[key]) | ||
db_rw.trial.insert_one(data) | ||
|
||
def is_valid_single_json_dict(json_dict: dict): | ||
"""Check if a JSON file is a single object or an array of JSON objects""" | ||
if json_dict.__class__ is list: | ||
return False | ||
return True | ||
|
||
def load_trials_json(args: Namespace, db_rw): | ||
# load a directory of json files | ||
|
@@ -108,6 +131,21 @@ def load_trials_json(args: Namespace, db_rw): | |
######################## | ||
# patient data loading | ||
######################## | ||
|
||
def load_clinical_via_api(file_path: str): | ||
with ExitStack() as stack: | ||
db_rw = stack.enter_context(MongoDBConnection(read_only=False, db="matchminer", async_init=False)) | ||
load_file(db_rw, 'csv', file_path, 'clinical') | ||
|
||
def load_genomic_via_api(file_path: str): | ||
with ExitStack() as stack: | ||
db_rw = stack.enter_context(MongoDBConnection(read_only=False, db="matchminer", async_init=False)) | ||
db_ro = stack.enter_context(MongoDBConnection(read_only=True, db="matchminer", async_init=False)) | ||
if len(list(db_ro.clinical.find({}))) == 0: | ||
raise RuntimeError("No clinical documents in db. Please load clinical documents before loading genomic.") | ||
load_file(db_rw, 'csv', file_path, 'genomic') | ||
map_clinical_to_genomic(db_rw, db_ro) | ||
|
||
def load_clinical(db_rw, args: Namespace): | ||
if args.patient_format == 'json': | ||
|
||
|
@@ -135,7 +173,7 @@ def load_genomic(db_rw, db_ro, args: Namespace, ): | |
|
||
|
||
def map_clinical_to_genomic(db_rw, db_ro): | ||
"""Ensure that all genomic docs are linked to their corresponding clinical docs by _id""" | ||
"""Ensure that all genomic docs are linked to their corresponding clinical docs by _id""" | ||
clinical_docs = list(db_ro.clinical.find({}, {"_id": 1, "SAMPLE_ID": 1})) | ||
clinical_dict = dict(zip([i['SAMPLE_ID'] for i in clinical_docs], [i['_id'] for i in clinical_docs])) | ||
|
||
|
@@ -169,10 +207,12 @@ def load_file(db_rw, filetype: str, path: str, collection: str): | |
if key == 'BIRTH_DATE': | ||
row[key] = convert_birthdate(row[key]) | ||
row['BIRTH_DATE_INT'] = int(row[key].strftime('%Y%m%d')) | ||
if key == 'AGE': | ||
row[key] = int(row[key]) | ||
db_rw[collection].insert_one(row) | ||
else: | ||
raw_file_data = file_handle.read() | ||
if filetype == 'yaml': | ||
if filetype == 'yml': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just a bugfix that we can incorporate. |
||
data = yaml.safe_load_all(raw_file_data) | ||
db_rw[collection].insert_many(data) | ||
elif filetype == 'json': | ||
|
@@ -182,6 +222,8 @@ def load_file(db_rw, filetype: str, path: str, collection: str): | |
if key == 'BIRTH_DATE': | ||
data[key] = convert_birthdate(data[key]) | ||
data['BIRTH_DATE_INT'] = int(data[key].strftime('%Y%m%d')) | ||
if key == 'AGE': | ||
data[key] = int(data[key]) | ||
db_rw[collection].insert_one(data) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the changes to this file can be incorporated without issues. The one exception is the "trial_status_key," which determines how we decide if trials are open or closed; that may be something we need to keep separate for PMATCH.