From 8f8df59d2719c6bf76159ddcdae314b57ec747d1 Mon Sep 17 00:00:00 2001 From: "Dr. Hannah De los Santos" Date: Tue, 22 Aug 2023 12:36:04 -0400 Subject: [PATCH] Fix smoothing overwriting WHO scores --- R/growth.R | 42 ++++++++++++++++++------------------------ R/infants_clean.R | 7 +++++-- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/R/growth.R b/R/growth.R index 144a884..1e131bf 100644 --- a/R/growth.R +++ b/R/growth.R @@ -456,15 +456,13 @@ cleangrowth <- function(subjid, if(infants){ if (!quietly) cat(sprintf("[%s] Calculating z-scores...\n", Sys.time())) + # removing z calculations, as they are not used # for infants, use z and who measurement.to.z <- read_anthro(ref.data.path, cdc.only = TRUE, infants = T) measurement.to.z_who <- read_anthro(ref.data.path, cdc.only = FALSE, infants = T) - data.all[, z.orig_cdc := measurement.to.z(param, agedays, sex, v)] - data.all[, z.orig_who := measurement.to.z_who(param, agedays, sex, v)] - # calculate "standard deviation" scores if (!quietly) cat(sprintf("[%s] Calculating SD-scores...\n", Sys.time())) @@ -473,39 +471,35 @@ cleangrowth <- function(subjid, # smooth z-scores/SD scores between ages 1 - 3yo using weighted scores # older uses cdc, younger uses who - who_weight <- 4 - (data.all$agedays/365.25) - cdc_weight <- (data.all$agedays/365.25) - 2 + data.all$ageyears <- data.all$agedays/365.25 - smooth_val <- data.all$agedays/365.25 >= 2 & - data.all$agedays/365.25 <= 4 & + who_weight <- 4 - (data.all$ageyears) + cdc_weight <- (data.all$ageyears) - 2 + + smooth_val <- data.all$ageyears >= 2 & + data.all$ageyears <= 4 & data.all$param != "HEADCM" data.all[smooth_val, - z.orig := (z.orig_cdc[smooth_val]*cdc_weight[smooth_val] + - z.orig_who[smooth_val]*who_weight[smooth_val])/2] - data.all[smooth_val, - sd.orig := (sd.orig_cdc[smooth_val]*cdc_weight[smooth_val] + - sd.orig_who[smooth_val]*who_weight[smooth_val])/2] + sd.orig := (data.all$sd.orig_cdc[smooth_val]*cdc_weight[smooth_val] + + data.all$sd.orig_who[smooth_val]*who_weight[smooth_val])/2] # otherwise use WHO and CDC for older and younger, respectively who_val <- data.all$param == "HEADCM" | - data.all$agedays/365.25 < 2 - data.all[who_val | (smooth_val & is.na(data.all$z.orig_cdc)), - z.orig := data.all$z.orig_who[who_val | (smooth_val & is.na(data.all$z.orig_cdc))]] - data.all[who_val | (smooth_val & is.na(data.all$sd.orig_cdc)), - sd.orig := data.all$sd.orig_who[who_val | (smooth_val & is.na(data.all$sd.orig_cdc))]] + data.all$ageyears < 2 - cdc_val <- data.all$param != "HEADCM" | - data.all$agedays/365.25 > 4 - data.all[cdc_val | (smooth_val & is.na(data.all$z.orig_who)), - z.orig := data.all$z.orig_cdc[cdc_val | (smooth_val & is.na(data.all$z.orig_who))]] - data.all[cdc_val | (smooth_val & is.na(data.all$sd.orig_who)), - sd.orig := data.all$sd.orig_cdc[cdc_val | (smooth_val & is.na(data.all$sd.orig_who))]] + data.all[(who_val & !smooth_val) | (smooth_val & is.na(data.all$sd.orig_cdc)), + sd.orig := data.all$sd.orig_who[(who_val & !smooth_val) | (smooth_val & is.na(data.all$sd.orig_cdc))]] + + cdc_val <- data.all$param != "HEADCM" & + data.all$ageyears > 4 + + data.all[(cdc_val & !smooth_val) | (smooth_val & is.na(data.all$sd.orig_who)), + sd.orig := data.all$sd.orig_cdc[(cdc_val & !smooth_val) | (smooth_val & is.na(data.all$sd.orig_who))]] # NOTE: SD SCORES IN CODE ARE Z IN INFANT DOCS -- USE sd.orig ONLY # keep the original, uncorrected, unrecentered zscores data.all[,sd.orig_uncorr := sd.orig] - data.all[,z.orig_uncorr := z.orig] # NOTE: MAY WANT TO SUBSET HERE diff --git a/R/infants_clean.R b/R/infants_clean.R index d480dc1..acf8c23 100644 --- a/R/infants_clean.R +++ b/R/infants_clean.R @@ -280,7 +280,7 @@ cleanbatch_infants <- function(data.df, # identify z cutoff # ***Note, using unrecentered values*** # *For weight only do after birth - data.df[valid_set & param == "WEIGHTKG" & sd.orig_uncorr < - 25 & + data.df[valid_set & param == "WEIGHTKG" & sd.orig_uncorr < -25 & ageyears < 1, exclude := exc_nam] data.df[valid_set & param == "WEIGHTKG" & sd.orig_uncorr < -15 & @@ -290,7 +290,7 @@ cleanbatch_infants <- function(data.df, exclude := exc_nam] # *Max z-score for height based on analysis of CHOP data because 15/25 too loose for upper limits - data.df[valid_set & param == "HEIGHTCM" & sd.orig_uncorr < - 25 & + data.df[valid_set & param == "HEIGHTCM" & sd.orig_uncorr < -25 & ageyears < 1, exclude := exc_nam] data.df[valid_set & param == "HEIGHTCM" & sd.orig_uncorr < -15 & @@ -299,6 +299,9 @@ cleanbatch_infants <- function(data.df, data.df[valid_set & param == "HEIGHTCM" & sd.orig_uncorr > 8, exclude := exc_nam] + print("Standard BIV") + print(data.df[param == "HEIGHTCM",c(ageyears, param, sd.orig_uncorr)]) + # head circumference data.df[valid_set & param == "HEADCM" & sd.orig_uncorr < -15, exclude := exc_nam]