-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
384 lines (319 loc) · 13.6 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
from collections import OrderedDict
import json
import logging
from redcap_invitae import Redcap
logger = logging.getLogger(__name__)
CHECKBOX_POSITIVE_VALUES = ['1', 'checked']
def convert_emerge_race_to_redox_race(participant_data):
""" Converts eMERGE race_at_enrollment to Redox Patient.Demographics.race values
Note: R4 allows multiple race options. If more than one race option is selected, 'Other Race'
is returned. If nothing is selected, 'Unknown' is returned.
Params
------
participant_data: Dict of participant data containing race_at_enrollment values, e.g.,
'race_at_enrollment___1': '1'
Handles both raw (e.g., '0', '1') and label (e.g., 'Unchecked', 'Checked') formats
Returns
-------
(String) First matching race. If no match found, return 'Unknown'
"""
mappings = {
'race_at_enrollment___1': 'American Indian or Alaska Native',
'race_at_enrollment___2': 'Asian',
'race_at_enrollment___3': 'Black or African American',
'race_at_enrollment___4': 'Other Race',
'race_at_enrollment___5': 'Other Race',
'race_at_enrollment___6': 'Native Hawaiian or Other Pacific Islander',
'race_at_enrollment___7': 'White',
'race_at_enrollment___8': 'Other Race',
'race_at_enrollment___9': 'Prefer not to answer',
}
no_match_race = 'Unknown'
multiple_race = 'Other Race'
redox_races = []
for race_variable, redox_race in mappings.items():
if race_variable in participant_data:
race_value = participant_data[race_variable].lower()
if race_value == '1' or race_value == 'checked':
redox_races.append(redox_race)
n_races = len(redox_races)
if n_races == 0:
return no_match_race
elif n_races == 1:
return redox_races[0]
else:
return multiple_race
def convert_emerge_race_to_invitae_ancestry(participant_data):
""" Converts eMERGE participant data to Invitae ancestry options
From R4, uses: race_at_enrollment, ashkenazi_jewish_ancestors
Params
------
participant_data: Dict of participant data containing race_at_enrollment values, e.g.,
'race_at_enrollment___1': '1'
Handles both raw (e.g., '0', '1') and label (e.g., 'Unchecked', 'Checked') formats
Returns
-------
List of matching ancestry options
"""
race_mappings = {
'race_at_enrollment___1': 'Native American',
'race_at_enrollment___2': 'Asian',
'race_at_enrollment___3': 'Black/African-American',
'race_at_enrollment___4': 'Hispanic',
# 'race_at_enrollment___5': None, # No good mapping from "Middle Eastern or North African" to Invitae ancestry options
'race_at_enrollment___6': 'Pacific Islander',
'race_at_enrollment___7': 'White/Caucasian',
'race_at_enrollment___8': 'Other',
'race_at_enrollment___9': 'Unknown',
}
var_ashkenazi = 'ashkenazi_jewish_ancestors'
invitae_ashkenazi = 'Ashkenazi Jewish'
ancestries = []
# R4:race_at_enrollment
for race_variable, invitae_ancestry in race_mappings.items():
if race_variable in participant_data:
race_value = participant_data[race_variable].lower()
if race_value in ('1', 'checked'):
ancestries.append(invitae_ancestry)
# R4: ashkenazi_jewish_ancestors
if participant_data.get(var_ashkenazi, '').lower() in ('1', 'yes'):
ancestries.append(invitae_ashkenazi)
return ancestries
def map_redcap_sex_to_redox_sex(redcap_sex):
""" Map REDCap values for sex to Redox defined values
"Intersex" is mapped to "Other"
Params
------
redcap_sex: (string) REDCap raw data
Returns
-------
(string) Redox sex value
"""
map = {
'1': 'Female',
'2': 'Male',
'3': 'Other', # REDCap: Intersex
'4': 'Unknown', # REDCap: Prefer not to answer
'': 'Unknown' # REDCap: (question not answered)
}
return map[redcap_sex]
def get_invitae_primary_indication(record):
""" Choose a primary indication for Invitae API order
Looks as participant's personal health history response in baseline survey. Chooses the closest
primary indication based on participant's current and past health history, ignoring conditions
the participant indicates they are at risk for. For healthy participants, "Other" is chosen.
Params
------
record: Dict of participant's records containing values from personal health history checkboxes, e.g.,
'prostate_cancer___1': '1'
Handles both raw (e.g., '0', '1') and label (e.g., 'Unchecked', 'Checked') formats
Returns
-------
(String) First relevant primary indication. For healthy participants or no match found, return 'Other'
"""
mappings = OrderedDict([
('prostate_cancer', 'Prostate Cancer'),
('pancreatic_cancer', 'Pancreatic Cancer'),
('breast_cancer', 'Other Cancer'),
('ovarian_cancer', 'Other Cancer'),
('colorectal_cancer', 'Other Cancer'),
('atrial_fibrillation', 'Cardiology: Arrhythmia'),
('coronary_heart_disease', 'Cardiology: Other'),
('heart_failure', 'Cardiology: Other'),
])
checkbox_suffix = '___1'
past_modifier = '_2'
for emerge_variable_base, invitae_indication in mappings.items():
# check if this participant has the condition:
# 1) currently
if record[emerge_variable_base + checkbox_suffix].lower() in CHECKBOX_POSITIVE_VALUES:
return invitae_indication
# 2) past
if record[emerge_variable_base + past_modifier + checkbox_suffix].lower() in CHECKBOX_POSITIVE_VALUES:
return invitae_indication
# Use 'Other' for all other scenarios
return 'Other'
def describe_patient_history(record):
""" Creates a description of patient history for Invitae Order
Looks as participant's personal health history response in baseline survey and creates a written description
Params
------
record: Dict of participant's records containing values from personal health history checkboxes, e.g.,
'prostate_cancer___1': '1'
Handles both raw (e.g., '0', '1') and label (e.g., 'Unchecked', 'Checked') formats
Returns
-------
(String) Written description of current and past conditions.
"""
mappings = {
Redcap.FIELD_BPHH_HYPERTENSION: 'hypertension',
Redcap.FIELD_BPHH_HYPERLIPID: 'hypercholesterolemia',
Redcap.FIELD_BPHH_T1DM: 'type 1 diabetes',
Redcap.FIELD_BPHH_T2DM: 'type 2 diabetes',
Redcap.FIELD_BPHH_KD: 'weak or failing kidneys or kidney disease',
Redcap.FIELD_BPHH_ASTHMA: 'asthma',
Redcap.FIELD_BPHH_OBESITY: 'obesity',
Redcap.FIELD_BPHH_SLEEPAPNEA: 'sleep apnea',
Redcap.FIELD_BPHH_CHD: 'coronary heart disease',
Redcap.FIELD_BPHH_HF: 'heart failure',
Redcap.FIELD_BPHH_AFIB: 'atrial fibrillation',
Redcap.FIELD_BPHH_BRCA: 'breast cancer',
Redcap.FIELD_BPHH_OVCA: 'ovarian cancer',
Redcap.FIELD_BPHH_PRCA: 'prostate cancer',
Redcap.FIELD_BPHH_PACA: 'pancreatic cancer',
Redcap.FIELD_BPHH_COCA: 'colorectal cancer'
}
checkbox_suffix = '___1'
past_modifier = '_2'
current_conditions = list()
past_conditions = list()
for emerge_variable_base, description in mappings.items():
# check if this participant has the condition:
# 1) currently
if record[emerge_variable_base + checkbox_suffix].lower() in CHECKBOX_POSITIVE_VALUES:
current_conditions.append(description)
# 2) past
if record[emerge_variable_base + past_modifier + checkbox_suffix].lower() in CHECKBOX_POSITIVE_VALUES:
past_conditions.append(description)
condition_strings = list()
if current_conditions:
condition_strings.append(f"Current conditions: {', '.join(current_conditions)}.")
if past_conditions:
condition_strings.append(f"Past conditions: {', '.join(past_conditions)}.")
return ' '.join(condition_strings)
def generate_family_history(metree):
""" Creates a description of family history for Invitae Order
Looks as participant's MeTree JSON data and generates text description of family history
Params
------
metree: MeTree data. If metree passed in as string, will try to load JSON data. Otherwise, expect list of dicts.
Returns
-------
tuple: ((str) Written description of family history, (int) # Family members (excluding self))
"""
history = list()
if type(metree) is str:
metree = json.loads(metree)
elif type(metree) is not list:
logger.error(f'metree type error. type: {type(metree)}. value: {metree}')
raise TypeError
# Create description for each person
family_count = 0
for record in metree:
# Create description for each condition
conditions = list()
for condition in record['conditions']:
cstr = condition['id']
# If the condition id is "other" and meta.other has info, use that
if cstr == 'other':
other = condition['meta'].get('other', '')
if other:
cstr = other
age = condition['age']
if age:
cstr += f' (age {str(age)})'
conditions.append(cstr)
# Create summary of conditions
if conditions:
conditions_str = '; '.join(conditions)
else:
# If medicalHistory is not empty, use that as the summary.
# It may be things like "healthy" or "unknown"
mh = record['medicalHistory']
if mh:
conditions_str = mh
else:
conditions_str = 'no conditions listed'
# Identify each person only by their relation
rel = record['relation']
history.append(f"{rel}: {conditions_str}.")
# Count number of family members
if rel != 'SELF':
family_count += 1
# Merge strings across all people
history_str = ' '.join(history)
return history_str, family_count
# testing
if __name__ == "__main__":
# Create template
d_template = {'ashkenazi_jewish_ancestors': ''}
for i in range(1, 10):
d_template[f'race_at_enrollment___{i}'] = '0'
print('################ test convert_emerge_race_to_redox_race ################')
# Test with nothing filled
print('\nTest with nothing')
r = convert_emerge_race_to_redox_race(d_template)
print(r)
# Test single option
print('\nTest single race')
for i in range(1, 10):
d = d_template.copy()
d[f'race_at_enrollment___{i}'] = '1'
r = convert_emerge_race_to_redox_race(d)
print(f'{i}: {r}')
# Test joint Hispanic and 1 race
print('\nTest hispanic + single race')
d_template_3 = d_template.copy()
d_template_3['race_at_enrollment___4'] = '1'
for i in range(1, 10):
d = d_template_3.copy()
d[f'race_at_enrollment___{i}'] = '1'
r = convert_emerge_race_to_redox_race(d)
print(f'{i}: {r}')
print('################ test convert_emerge_race_to_invitae_ancestry ################')
# Test with nothing filled
print('\nTest with nothing')
a = convert_emerge_race_to_invitae_ancestry(d_template)
print(a)
# Test single option
print('\nTest single race')
for i in range(1, 10):
d = d_template.copy()
d[f'race_at_enrollment___{i}'] = '1'
a = convert_emerge_race_to_invitae_ancestry(d)
print(f'{i}: {a}')
# Test joint ashkenazi jewish and 1 race
print('\nTest ashkenazi jewish + single race')
d_template_2 = d_template.copy()
d_template_2['ashkenazi_jewish_ancestors'] = '1'
for i in range(1, 10):
d = d_template_2.copy()
d[f'race_at_enrollment___{i}'] = '1'
a = convert_emerge_race_to_invitae_ancestry(d)
print(f'{i}: {a}')
# Test joint Hispanic and 1 race
print('\nTest hispanic + single race')
d_template_3 = d_template.copy()
d_template_3['race_at_enrollment___4'] = '1'
for i in range(1, 10):
d = d_template_3.copy()
d[f'race_at_enrollment___{i}'] = '1'
a = convert_emerge_race_to_invitae_ancestry(d)
print(f'{i}: {a}')
print('################ test get_invitae_primary_indication ################')
d_template = {(x+'___1'):'0' for x in Redcap.FIELDS_BPHH}
for f in Redcap.FIELDS_BPHH:
d = d_template.copy()
d[f+'___1'] = '1'
x = get_invitae_primary_indication(d)
print(f'{f}: {x}')
print('################ test describe_patient_history with individual conditions ################')
d_template = {(x+'___1'):'0' for x in Redcap.FIELDS_BPHH}
for f in Redcap.FIELDS_BPHH:
d = d_template.copy()
d[f+'___1'] = '1'
x = describe_patient_history(d)
print(f'{f}: {x}')
print('################ test describe_patient_history with random selection of conditinos ################')
d_template = {(x+'___1'):'0' for x in Redcap.FIELDS_BPHH}
n = len(Redcap.FIELDS_BPHH)
import random
for i in range(10):
d = d_template.copy()
n_conditions = random.randint(1, 10)
sample = random.sample(range(n), n_conditions)
fields = [Redcap.FIELDS_BPHH[i]+'___1' for i in sample]
for f in fields:
d[f] = '1'
x = describe_patient_history(d)
print(f'fields: {fields}\ndescription: {x}\n--------------\n')