From 33bc28b0a17776ce9eb4de5a2a7472206cce672e Mon Sep 17 00:00:00 2001 From: Jie_evezhang Date: Tue, 28 May 2024 22:42:44 +0200 Subject: [PATCH 1/5] order --- sessions/causal-mediation-analysis-survival-outcomes.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sessions/causal-mediation-analysis-survival-outcomes.qmd b/sessions/causal-mediation-analysis-survival-outcomes.qmd index ed088dc..d3f5c0c 100644 --- a/sessions/causal-mediation-analysis-survival-outcomes.qmd +++ b/sessions/causal-mediation-analysis-survival-outcomes.qmd @@ -75,6 +75,8 @@ results with respect to time-to-event outcomes @tein_estimating_2003. They found that the methods coincides for the accelerated failure time model but not for the proportional hazards model. +When the outcome is common, we can use weight approach (Lange 2012). + To sum up, we can only use the traditional approaches for rare outcomes. Otherwise, we can use the product method to get an indication of whether there is mediation, but be aware that the estimate is not accurate. @@ -116,8 +118,6 @@ have to satisfy below assumptions: - Additionally, we assume that the mediator is measured for everyone before the outcome occurs. -When the outcome is common, we can use weight approach (Lange 2012) - ## Example We will continue working on the obesity-CVD example in the Framingham From 892776f9015cebcb2b294b973a4589b6bf838b57 Mon Sep 17 00:00:00 2001 From: Jie_evezhang Date: Tue, 28 May 2024 22:44:09 +0200 Subject: [PATCH 2/5] test --- .../causal-mediation-analysis-estimation.qmd | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/sessions/causal-mediation-analysis-estimation.qmd b/sessions/causal-mediation-analysis-estimation.qmd index 5ad772a..c9dbc0f 100644 --- a/sessions/causal-mediation-analysis-estimation.qmd +++ b/sessions/causal-mediation-analysis-estimation.qmd @@ -432,27 +432,27 @@ grViz(" digraph { graph [] node [shape = plaintext] - X - M - L - U [fontcolor = red] - Y - edge [minlen = 1.5] - X->Y - X->L - L->Y + W [label = 'W'] + A [label = 'A'] + M [label = 'M'] + Y [label = 'Y'] + L [label = 'L'] + U [label = 'U'] + edge [minlen = 2] + A->M + A->L L->M - U->L - U->Y - X->M + L->Y M->Y - W->X - W->M + A->Y + W->A W->Y - { rank = same; X; M; Y } - { rank = min; L;} - { rank = max; W;} -}") + W->M + U->L + U->Y +{rank = same; W; A; M; Y; } +} +") ``` ### Time-varying confounding @@ -478,7 +478,7 @@ digraph { L2 Lt Y - edge [minlen = 1.5] + edge [minlen = 2] X0 -> X1 X1 -> X2 X2 -> Xt @@ -572,9 +572,10 @@ ObsData <- datasim(n = 10000) # really large sample TRUE_EY.1 <- mean(ObsData$Y.1) TRUE_EY.1 # mean outcome under A = 1 TRUE_EY.0 <- mean(ObsData$Y.0) +TRUE_EY.0 # True average causal effect TRUE_ACE <- TRUE_EY.1 - TRUE_EY.0 -TRUE_EY.1 - TRUE_EY.0 +TRUE_ACE TRUE_MOR <- (TRUE_EY.1 * (1 - TRUE_EY.0)) / ((1 - TRUE_EY.1) * TRUE_EY.0) TRUE_MOR # true marginal OR ``` @@ -632,7 +633,6 @@ A0Data_M1$M <- M_A1 # Predict the outcomes for these scenarios Y_A1_M0 <- predict(outcome_model, newdata = A1Data_M0, type = "response") Y_A0_M1 <- predict(outcome_model, newdata = A0Data_M1, type = "response") -data.frame(Y_A1_M1 = head(Y_A1_M1), Y_A0_M0 = head(Y_A0_M0),Y_A1_M0 = head(Y_A1_M0),Y_A0_M1 = head(Y_A0_M1)) ``` Step 4: Compute the average effects @@ -647,18 +647,15 @@ total_effect <- mean_Y_A1 - mean_Y_A0 # Total effect total_effect +# Calculate the Marginal Odds Ratio using g-computation predictions +gcomp_MOR <- (mean_Y_A1 * (1 - mean_Y_A0)) / ((1 - mean_Y_A1) * mean_Y_A0) + + # Calculate the average outcomes under counterfactual scenarios -mean_Y_A1 <- mean(Y_A1_M1) -mean_Y_A0 <- mean(Y_A0_M0) mean_Y_A1_M0 <- mean(Y_A1_M0) mean_Y_A0_M1 <- mean(Y_A0_M1) mean_Y_A1_M1 <- mean(Y_A1_M1) mean_Y_A0_M0 <- mean(Y_A0_M0) -# Calculate the total effect -gcomp_ACE <- mean_Y_A1 - mean_Y_A0 -gcomp_ACE -# Calculate the Marginal Odds Ratio using g-computation predictions -gcomp_MOR <- (mean_Y_A1 * (1 - mean_Y_A0)) / ((1 - mean_Y_A1) * mean_Y_A0) ``` We recommend take the advantage of R packages to do the work From 39b4b2942c1242db1ef93800ede3e52ab0922b30 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Wed, 29 May 2024 06:19:18 +0200 Subject: [PATCH 3/5] refs update --- includes/papers.bib | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/includes/papers.bib b/includes/papers.bib index c6751e3..3eecab5 100644 --- a/includes/papers.bib +++ b/includes/papers.bib @@ -389,7 +389,7 @@ @article{knowler_reduction_2002 year = {2002}, pmid = {11832527}, pmcid = {PMC1370926}, - keywords = {Adult, Blood Glucose, Body Mass Index, Diabetes Mellitus, Type 2, Double-Blind Method, Energy Intake, Exercise, Female, Humans, Hypoglycemic Agents, Incidence, Life Style, Male, Metformin, Middle Aged, Patient Compliance, Risk Factors, Weight Loss}, + keywords = {Male, Adult, Humans, Energy Intake, Female, Middle Aged, Risk Factors, Diabetes Mellitus, Type 2, Life Style, Blood Glucose, Metformin, Weight Loss, Exercise, Body Mass Index, Incidence, Double-Blind Method, Hypoglycemic Agents, Patient Compliance}, pages = {393--403}, file = {Full Text:C\:\\Users\\au302898\\Zotero\\storage\\PQA8KYS7\\Knowler et al. - 2002 - Reduction in the incidence of type 2 diabetes with.pdf:application/pdf}, } @@ -410,7 +410,7 @@ @article{pan_effects_1997 month = apr, year = {1997}, pmid = {9096977}, - keywords = {Adult, Blood Glucose, Body Mass Index, China, Combined Modality Therapy, Diabetes Mellitus, Diabetes Mellitus, Type 2, Exercise, Female, Follow-Up Studies, Glucose Intolerance, Humans, Incidence, Male, Mass Screening, Middle Aged, Obesity, Proportional Hazards Models, Risk Factors, Time Factors}, + keywords = {Male, Adult, Obesity, Humans, Female, Middle Aged, Risk Factors, Diabetes Mellitus, Type 2, Blood Glucose, Exercise, Body Mass Index, Diabetes Mellitus, Proportional Hazards Models, Incidence, Follow-Up Studies, China, Time Factors, Combined Modality Therapy, Glucose Intolerance, Mass Screening}, pages = {537--544}, } @@ -430,7 +430,7 @@ @article{tuomilehto_prevention_2001 month = may, year = {2001}, pmid = {11333990}, - keywords = {Diabetes Mellitus, Type 2, Diet, Fat-Restricted, Dietary Fiber, Exercise, Female, Glucose Intolerance, Glucose Tolerance Test, Humans, Incidence, Life Style, Male, Middle Aged, Obesity, Risk, Weight Loss}, + keywords = {Male, Obesity, Humans, Female, Middle Aged, Diabetes Mellitus, Type 2, Life Style, Diet, Fat-Restricted, Weight Loss, Dietary Fiber, Glucose Tolerance Test, Risk, Exercise, Incidence, Glucose Intolerance}, pages = {1343--1350}, } @@ -534,3 +534,22 @@ @article{tingley_mediation_2014 year = {2014}, file = {Tingley et al. - 2014 - mediation R Package for Causal Med.pdf:C\:\\Users\\au302898\\Zotero\\storage\\AQ6Z2QNW\\Tingley et al. - 2014 - mediation R Package for Causal Med.pdf:application/pdf}, } + +@article{vanderweele_sensitivity_2017, + title = {Sensitivity {Analysis} in {Observational} {Research}: {Introducing} the {E}-{Value}}, + volume = {167}, + issn = {1539-3704}, + shorttitle = {Sensitivity {Analysis} in {Observational} {Research}}, + doi = {10.7326/M16-2607}, + abstract = {Sensitivity analysis is useful in assessing how robust an association is to potential unmeasured or uncontrolled confounding. This article introduces a new measure called the "E-value," which is related to the evidence for causality in observational studies that are potentially subject to confounding. The E-value is defined as the minimum strength of association, on the risk ratio scale, that an unmeasured confounder would need to have with both the treatment and the outcome to fully explain away a specific treatment-outcome association, conditional on the measured covariates. A large E-value implies that considerable unmeasured confounding would be needed to explain away an effect estimate. A small E-value implies little unmeasured confounding would be needed to explain away an effect estimate. The authors propose that in all observational studies intended to produce evidence for causality, the E-value be reported or some other sensitivity analysis be used. They suggest calculating the E-value for both the observed association estimate (after adjustments for measured confounders) and the limit of the confidence interval closest to the null. If this were to become standard practice, the ability of the scientific community to assess evidence from observational studies would improve considerably, and ultimately, science would be strengthened.}, + language = {eng}, + number = {4}, + journal = {Annals of Internal Medicine}, + author = {VanderWeele, Tyler J. and Ding, Peng}, + month = aug, + year = {2017}, + pmid = {28693043}, + keywords = {Confounding Factors, Epidemiologic, Epidemiologic Research Design, Humans, Observational Studies as Topic, Sensitivity and Specificity}, + pages = {268--274}, + file = {Submitted Version:C\:\\Users\\au302898\\Zotero\\storage\\5RG8AH7Q\\VanderWeele and Ding - 2017 - Sensitivity Analysis in Observational Research In.pdf:application/pdf}, +} From 7c8ae95d6f7fb127147b0c21387f3a3c43253601 Mon Sep 17 00:00:00 2001 From: Daniel Ibsen Date: Wed, 29 May 2024 06:21:17 +0200 Subject: [PATCH 4/5] added ref to e-value --- ...sal-mediation-analysis-sensitivity-analysis.qmd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sessions/causal-mediation-analysis-sensitivity-analysis.qmd b/sessions/causal-mediation-analysis-sensitivity-analysis.qmd index 95d7834..fbdf9f8 100644 --- a/sessions/causal-mediation-analysis-sensitivity-analysis.qmd +++ b/sessions/causal-mediation-analysis-sensitivity-analysis.qmd @@ -252,8 +252,8 @@ levels *A* = *a* and *A* = *a*^\*^. Once we have calculated the bias term **B~*mult*~(*c*)**, we can estimate our risk ratio controlling only for *C* (if the outcome is rare, fit a logistic regression) and we divide our estimate by -**B~*mult*~(*c*)** to get the corrected estimate for risk ratio—that is, -what we would have obtained if we had adjusted for *U* as well. +**B~*mult*~(*c*)** to get the corrected estimate for risk ratio---that +is, what we would have obtained if we had adjusted for *U* as well. Under the simplifying assumptions of (A8.1.1) and (A8.1.2b), we can also obtain corrected confidence intervals by dividing both limits of the @@ -429,7 +429,7 @@ Once we have calculated the bias term $B^{CDE}_{mult}(m|c)$, we can estimate the *CDE* risk ratio controlling only for *C* (if the outcome is rare), we fit a logistic regression) and we divide our estimate and confidence intervals by the bias factor $B^{CDE}_{mult}(m|c)$ to get the -corrected estimate for CDE risk ratio and its confidence interval—that +corrected estimate for CDE risk ratio and its confidence interval---that is, what we would have obtained if we had adjusted for *U* a well. We have to specify the two prevalences of *U*, namely $P(U = 1|a,m, c)$ @@ -573,14 +573,14 @@ uc_sens - The E-value is the minimum strength of association, on the risk ratio scale, that an unmeasured confounder would need to have with both the treatment and the outcome to fully explain away a specific - treatment–outcome association, conditional on the measured - covariates. + treatment--outcome association, conditional on the measured + covariates @vanderweele_sensitivity_2017. - A large E-value implies that considerable unmeasured confounding would be needed to explain away an effect estimate. - A small E-value implies little unmeasured confounding would be needed to explain away an effect estimate. - -(Tyler J VanderWeele 2017) ::: + +## References From f96f0e9c359b4ea9a2121f7230af92f9f24ccd50 Mon Sep 17 00:00:00 2001 From: Jie_evezhang Date: Wed, 29 May 2024 06:55:45 +0200 Subject: [PATCH 5/5] CHANGE code --- .../causal-mediation-analysis-estimation.qmd | 135 +++++++----------- 1 file changed, 49 insertions(+), 86 deletions(-) diff --git a/sessions/causal-mediation-analysis-estimation.qmd b/sessions/causal-mediation-analysis-estimation.qmd index 8f76fab..fd6ada9 100644 --- a/sessions/causal-mediation-analysis-estimation.qmd +++ b/sessions/causal-mediation-analysis-estimation.qmd @@ -554,108 +554,71 @@ We will now demonstrate how to conduct g-computation in a simulated dataset. ```{r} -datasim <- function(n) { - x1 <- rbinom(n, size = 1, prob = 0.5) +datasim <- function(n) { + # This small function simulates a dataset with n rows + # containing covariats, and action, an outcome and + # the underlying potential outcomes + x1 <- rbinom(n, size = 1, prob = 0.5) x2 <- rbinom(n, size = 1, prob = 0.65) x3 <- rnorm(n, 0, 1) x4 <- rnorm(n, 0, 1) - A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5 * x2 + 2.2 * x3 + 0.6 * x4 + 0.4 * x2 * x4)) - M <- 0.5 * A + 0.3 * x1 + rnorm(n) - Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4)) - Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4)) - Y <- Y.1 * A + Y.0 * (1 - A) - data.frame(x1, x2, x3, x4, A, M, Y, Y.1, Y.0) -} + # The action (independent variable, treatment, exposure...) + # is a function of x2, x3, x4 and + # the product of (ie, an interaction of) x2 and x4 + A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5*x2 + 2.2*x3 + 0.6*x4 + 0.4*x2*x4)) + # Simulate the two potential outcomes + # as functions of x1, X2, X4 and the product of x2 and x4 + + # Potential outcome if the action is 1 + # note that here, we add 1 + Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4)) + # Potential outcome if the action is 0 + # note that here, we do not add 1 so that there + # is a different the two potential outcomes (ie, an effect of A) + Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4)) + + # Observed outcome + # is the potential outcomes (Y.1 or Y.0) + # corresponding to action the individual experienced (A) + Y <- Y.1*A + Y.0*(1 - A) + + # Return a data.frame as the output of this function + data.frame(x1, x2, x3, x4, A, Y, Y.1, Y.0) +} + + set.seed(120110) # for reproducibility -ObsData <- datasim(n = 10000) # really large sample -TRUE_EY.1 <- mean(ObsData$Y.1) -TRUE_EY.1 # mean outcome under A = 1 -TRUE_EY.0 <- mean(ObsData$Y.0) -TRUE_EY.0 -# True average causal effect -TRUE_ACE <- TRUE_EY.1 - TRUE_EY.0 -TRUE_ACE -TRUE_MOR <- (TRUE_EY.1 * (1 - TRUE_EY.0)) / ((1 - TRUE_EY.1) * TRUE_EY.0) -TRUE_MOR # true marginal OR -``` +ObsData <- datasim(n = 500000) # really large sample +TRUE_EY.1 <- mean(ObsData$Y.1); TRUE_EY.1 # mean outcome under A = 1 -Step 1: Fit the models -```{r} -##fit the mediator as a function of A and W -mediator_model <- lm(M ~ A + x1 + x2 + x3 + x4, data = ObsData) -##fit the outcome as a function of A, M and W -outcome_model <- glm(Y ~ A + M + x1 + x2 + x3 + x4, data = ObsData, family = binomial) -``` +# Fit a logistic regression model predicting Y from the relevant confounders +Q <- glm(Y ~ A + x2 + x4 + x2*x4, data = ObsData, family=binomial) -Step 2: Duplicate the initial dataset in two counterfactual datasets. In -one world, set A=1; in the other, set A=0. All other variables keep the -original values. -```{r} -# Copy the "actual" data twice +# Copy the "actual" (simulated) data twice A1Data <- A0Data <- ObsData # In one world A equals 1 for everyone, in the other one it equals 0 for everyone # The rest of the data stays as is (for now) -A1Data$A <- 1 -A0Data$A <- 0 -``` - -Step 3. Apply the function from step 1 to predict each individual's -mediator and outcome in the two counterfactual datasets. +A1Data$A <- 1; A0Data$A <- 0 +head(ObsData); head(A1Data); head(A0Data) -```{r} -# Predict M if A=1 -M_A1 <- predict(mediator_model, newdata=A1Data) -# Predict M if A=0 -M_A0 <- predict(mediator_model, newdata=A0Data) +# Predict Y if everybody attends +Y_A1 <- predict(Q, A1Data, type="response") +# Predict Y if nobody attends +Y_A0 <- predict(Q, A0Data, type="response") # Taking a look at the predictions -data.frame(M_A1 = head(M_A1), M_A0 = head(M_A0)) - - -# Add the predicted mediator values to the respective datasets -A1Data$M <- M_A1 -A0Data$M <- M_A0 -# Predict Y if A=1 and MA1 using M_A1 -Y_A1_M1 <- predict(outcome_model, newdata = A1Data, type = "response") -# Predict Y if A=0 using MA0 using M_A0 -Y_A0_M0 <- predict(outcome_model, newdata = A0Data, type = "response") - - -# Create dataset for A=1, M=0 and A=0, M=1 scenarios -A1Data_M0 <- A1Data -A0Data_M1 <- A0Data -A1Data_M0$M <- M_A0 -A0Data_M1$M <- M_A1 +data.frame(Y_A1=head(Y_A1), Y_A0=head(Y_A0), TRUE_Y=head(ObsData$Y)) |> round(digits = 2) +# Mean outcomes in the two worlds +pred_A1 <- mean(Y_A1); pred_A0 <- mean(Y_A0) -# Predict the outcomes for these scenarios -Y_A1_M0 <- predict(outcome_model, newdata = A1Data_M0, type = "response") -Y_A0_M1 <- predict(outcome_model, newdata = A0Data_M1, type = "response") -``` - -Step 4: Compute the average effects - -```{r} -# Average outcome when A = 1 -mean_Y_A1 <- mean(Y_A1_M1) -# Average outcome when A = 0 -mean_Y_A0 <- mean(Y_A0_M0) -# Total effect -total_effect <- mean_Y_A1 - mean_Y_A0 -# Total effect -total_effect - -# Calculate the Marginal Odds Ratio using g-computation predictions -gcomp_MOR <- (mean_Y_A1 * (1 - mean_Y_A0)) / ((1 - mean_Y_A1) * mean_Y_A0) - - -# Calculate the average outcomes under counterfactual scenarios -mean_Y_A1_M0 <- mean(Y_A1_M0) -mean_Y_A0_M1 <- mean(Y_A0_M1) -mean_Y_A1_M1 <- mean(Y_A1_M1) -mean_Y_A0_M0 <- mean(Y_A0_M0) +# Marginal odds ratio +MOR_gcomp <- (pred_A1*(1 - pred_A0))/((1 - pred_A1)*pred_A0) +# ATE (risk difference) +RD_gcomp <- pred_A1 - pred_A0 +c(MOR_gcomp, RD_gcomp) |> round(digits=3) ``` We recommend take the advantage of R packages to do the work