Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
omarsilverman committed May 29, 2024
2 parents ee5dab8 + f96f0e9 commit a92fdf5
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 120 deletions.
25 changes: 22 additions & 3 deletions includes/papers.bib
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ @article{knowler_reduction_2002
year = {2002},
pmid = {11832527},
pmcid = {PMC1370926},
keywords = {Adult, Blood Glucose, Body Mass Index, Diabetes Mellitus, Type 2, Double-Blind Method, Energy Intake, Exercise, Female, Humans, Hypoglycemic Agents, Incidence, Life Style, Male, Metformin, Middle Aged, Patient Compliance, Risk Factors, Weight Loss},
keywords = {Male, Adult, Humans, Energy Intake, Female, Middle Aged, Risk Factors, Diabetes Mellitus, Type 2, Life Style, Blood Glucose, Metformin, Weight Loss, Exercise, Body Mass Index, Incidence, Double-Blind Method, Hypoglycemic Agents, Patient Compliance},
pages = {393--403},
file = {Full Text:C\:\\Users\\au302898\\Zotero\\storage\\PQA8KYS7\\Knowler et al. - 2002 - Reduction in the incidence of type 2 diabetes with.pdf:application/pdf},
}
Expand All @@ -410,7 +410,7 @@ @article{pan_effects_1997
month = apr,
year = {1997},
pmid = {9096977},
keywords = {Adult, Blood Glucose, Body Mass Index, China, Combined Modality Therapy, Diabetes Mellitus, Diabetes Mellitus, Type 2, Exercise, Female, Follow-Up Studies, Glucose Intolerance, Humans, Incidence, Male, Mass Screening, Middle Aged, Obesity, Proportional Hazards Models, Risk Factors, Time Factors},
keywords = {Male, Adult, Obesity, Humans, Female, Middle Aged, Risk Factors, Diabetes Mellitus, Type 2, Blood Glucose, Exercise, Body Mass Index, Diabetes Mellitus, Proportional Hazards Models, Incidence, Follow-Up Studies, China, Time Factors, Combined Modality Therapy, Glucose Intolerance, Mass Screening},
pages = {537--544},
}

Expand All @@ -430,7 +430,7 @@ @article{tuomilehto_prevention_2001
month = may,
year = {2001},
pmid = {11333990},
keywords = {Diabetes Mellitus, Type 2, Diet, Fat-Restricted, Dietary Fiber, Exercise, Female, Glucose Intolerance, Glucose Tolerance Test, Humans, Incidence, Life Style, Male, Middle Aged, Obesity, Risk, Weight Loss},
keywords = {Male, Obesity, Humans, Female, Middle Aged, Diabetes Mellitus, Type 2, Life Style, Diet, Fat-Restricted, Weight Loss, Dietary Fiber, Glucose Tolerance Test, Risk, Exercise, Incidence, Glucose Intolerance},
pages = {1343--1350},
}

Expand Down Expand Up @@ -534,3 +534,22 @@ @article{tingley_mediation_2014
year = {2014},
file = {Tingley et al. - 2014 - mediation R Package for Causal Med.pdf:C\:\\Users\\au302898\\Zotero\\storage\\AQ6Z2QNW\\Tingley et al. - 2014 - mediation R Package for Causal Med.pdf:application/pdf},
}

@article{vanderweele_sensitivity_2017,
title = {Sensitivity {Analysis} in {Observational} {Research}: {Introducing} the {E}-{Value}},
volume = {167},
issn = {1539-3704},
shorttitle = {Sensitivity {Analysis} in {Observational} {Research}},
doi = {10.7326/M16-2607},
abstract = {Sensitivity analysis is useful in assessing how robust an association is to potential unmeasured or uncontrolled confounding. This article introduces a new measure called the "E-value," which is related to the evidence for causality in observational studies that are potentially subject to confounding. The E-value is defined as the minimum strength of association, on the risk ratio scale, that an unmeasured confounder would need to have with both the treatment and the outcome to fully explain away a specific treatment-outcome association, conditional on the measured covariates. A large E-value implies that considerable unmeasured confounding would be needed to explain away an effect estimate. A small E-value implies little unmeasured confounding would be needed to explain away an effect estimate. The authors propose that in all observational studies intended to produce evidence for causality, the E-value be reported or some other sensitivity analysis be used. They suggest calculating the E-value for both the observed association estimate (after adjustments for measured confounders) and the limit of the confidence interval closest to the null. If this were to become standard practice, the ability of the scientific community to assess evidence from observational studies would improve considerably, and ultimately, science would be strengthened.},
language = {eng},
number = {4},
journal = {Annals of Internal Medicine},
author = {VanderWeele, Tyler J. and Ding, Peng},
month = aug,
year = {2017},
pmid = {28693043},
keywords = {Confounding Factors, Epidemiologic, Epidemiologic Research Design, Humans, Observational Studies as Topic, Sensitivity and Specificity},
pages = {268--274},
file = {Submitted Version:C\:\\Users\\au302898\\Zotero\\storage\\5RG8AH7Q\\VanderWeele and Ding - 2017 - Sensitivity Analysis in Observational Research In.pdf:application/pdf},
}
176 changes: 68 additions & 108 deletions sessions/causal-mediation-analysis-estimation.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -432,27 +432,27 @@ grViz("
digraph {
graph []
node [shape = plaintext]
X
M
L
U [fontcolor = red]
Y
edge [minlen = 1.5]
X->Y
X->L
L->Y
W [label = 'W']
A [label = 'A']
M [label = 'M']
Y [label = 'Y']
L [label = 'L']
U [label = 'U']
edge [minlen = 2]
A->M
A->L
L->M
U->L
U->Y
X->M
L->Y
M->Y
W->X
W->M
A->Y
W->A
W->Y
{ rank = same; X; M; Y }
{ rank = min; L;}
{ rank = max; W;}
}")
W->M
U->L
U->Y
{rank = same; W; A; M; Y; }
}
")
```

### Time-varying confounding
Expand All @@ -478,7 +478,7 @@ digraph {
L2
Lt
Y
edge [minlen = 1.5]
edge [minlen = 2]
X0 -> X1
X1 -> X2
X2 -> Xt
Expand Down Expand Up @@ -554,111 +554,71 @@ We will now demonstrate how to conduct g-computation in a simulated
dataset.

```{r}
datasim <- function(n) {
x1 <- rbinom(n, size = 1, prob = 0.5)
datasim <- function(n) {
# This small function simulates a dataset with n rows
# containing covariats, and action, an outcome and
# the underlying potential outcomes
x1 <- rbinom(n, size = 1, prob = 0.5)
x2 <- rbinom(n, size = 1, prob = 0.65)
x3 <- rnorm(n, 0, 1)
x4 <- rnorm(n, 0, 1)
A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5 * x2 + 2.2 * x3 + 0.6 * x4 + 0.4 * x2 * x4))
M <- 0.5 * A + 0.3 * x1 + rnorm(n)
Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4))
Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4))
Y <- Y.1 * A + Y.0 * (1 - A)
data.frame(x1, x2, x3, x4, A, M, Y, Y.1, Y.0)
}
# The action (independent variable, treatment, exposure...)
# is a function of x2, x3, x4 and
# the product of (ie, an interaction of) x2 and x4
A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5*x2 + 2.2*x3 + 0.6*x4 + 0.4*x2*x4))
# Simulate the two potential outcomes
# as functions of x1, X2, X4 and the product of x2 and x4
# Potential outcome if the action is 1
# note that here, we add 1
Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4))
# Potential outcome if the action is 0
# note that here, we do not add 1 so that there
# is a different the two potential outcomes (ie, an effect of A)
Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4))
# Observed outcome
# is the potential outcomes (Y.1 or Y.0)
# corresponding to action the individual experienced (A)
Y <- Y.1*A + Y.0*(1 - A)
# Return a data.frame as the output of this function
data.frame(x1, x2, x3, x4, A, Y, Y.1, Y.0)
}
set.seed(120110) # for reproducibility
ObsData <- datasim(n = 10000) # really large sample
TRUE_EY.1 <- mean(ObsData$Y.1)
TRUE_EY.1 # mean outcome under A = 1
TRUE_EY.0 <- mean(ObsData$Y.0)
# True average causal effect
TRUE_ACE <- TRUE_EY.1 - TRUE_EY.0
TRUE_EY.1 - TRUE_EY.0
TRUE_MOR <- (TRUE_EY.1 * (1 - TRUE_EY.0)) / ((1 - TRUE_EY.1) * TRUE_EY.0)
TRUE_MOR # true marginal OR
```
ObsData <- datasim(n = 500000) # really large sample
TRUE_EY.1 <- mean(ObsData$Y.1); TRUE_EY.1 # mean outcome under A = 1
Step 1: Fit the models
```{r}
##fit the mediator as a function of A and W
mediator_model <- lm(M ~ A + x1 + x2 + x3 + x4, data = ObsData)
##fit the outcome as a function of A, M and W
outcome_model <- glm(Y ~ A + M + x1 + x2 + x3 + x4, data = ObsData, family = binomial)
```
# Fit a logistic regression model predicting Y from the relevant confounders
Q <- glm(Y ~ A + x2 + x4 + x2*x4, data = ObsData, family=binomial)
Step 2: Duplicate the initial dataset in two counterfactual datasets. In
one world, set A=1; in the other, set A=0. All other variables keep the
original values.
```{r}
# Copy the "actual" data twice
# Copy the "actual" (simulated) data twice
A1Data <- A0Data <- ObsData
# In one world A equals 1 for everyone, in the other one it equals 0 for everyone
# The rest of the data stays as is (for now)
A1Data$A <- 1
A0Data$A <- 0
```
A1Data$A <- 1; A0Data$A <- 0
head(ObsData); head(A1Data); head(A0Data)
Step 3. Apply the function from step 1 to predict each individual's
mediator and outcome in the two counterfactual datasets.

```{r}
# Predict M if A=1
M_A1 <- predict(mediator_model, newdata=A1Data)
# Predict M if A=0
M_A0 <- predict(mediator_model, newdata=A0Data)
# Predict Y if everybody attends
Y_A1 <- predict(Q, A1Data, type="response")
# Predict Y if nobody attends
Y_A0 <- predict(Q, A0Data, type="response")
# Taking a look at the predictions
data.frame(M_A1 = head(M_A1), M_A0 = head(M_A0))
# Add the predicted mediator values to the respective datasets
A1Data$M <- M_A1
A0Data$M <- M_A0
# Predict Y if A=1 and MA1 using M_A1
Y_A1_M1 <- predict(outcome_model, newdata = A1Data, type = "response")
# Predict Y if A=0 using MA0 using M_A0
Y_A0_M0 <- predict(outcome_model, newdata = A0Data, type = "response")
data.frame(Y_A1=head(Y_A1), Y_A0=head(Y_A0), TRUE_Y=head(ObsData$Y)) |> round(digits = 2)
# Create dataset for A=1, M=0 and A=0, M=1 scenarios
A1Data_M0 <- A1Data
A0Data_M1 <- A0Data
A1Data_M0$M <- M_A0
A0Data_M1$M <- M_A1
# Mean outcomes in the two worlds
pred_A1 <- mean(Y_A1); pred_A0 <- mean(Y_A0)
# Predict the outcomes for these scenarios
Y_A1_M0 <- predict(outcome_model, newdata = A1Data_M0, type = "response")
Y_A0_M1 <- predict(outcome_model, newdata = A0Data_M1, type = "response")
data.frame(Y_A1_M1 = head(Y_A1_M1), Y_A0_M0 = head(Y_A0_M0),Y_A1_M0 = head(Y_A1_M0),Y_A0_M1 = head(Y_A0_M1))
```

Step 4: Compute the average effects

```{r}
# Average outcome when A = 1
mean_Y_A1 <- mean(Y_A1_M1)
# Average outcome when A = 0
mean_Y_A0 <- mean(Y_A0_M0)
# Total effect
total_effect <- mean_Y_A1 - mean_Y_A0
# Total effect
total_effect
# Calculate the average outcomes under counterfactual scenarios
mean_Y_A1 <- mean(Y_A1_M1)
mean_Y_A0 <- mean(Y_A0_M0)
mean_Y_A1_M0 <- mean(Y_A1_M0)
mean_Y_A0_M1 <- mean(Y_A0_M1)
mean_Y_A1_M1 <- mean(Y_A1_M1)
mean_Y_A0_M0 <- mean(Y_A0_M0)
# Calculate the total effect
gcomp_ACE <- mean_Y_A1 - mean_Y_A0
gcomp_ACE
# Calculate the Marginal Odds Ratio using g-computation predictions
gcomp_MOR <- (mean_Y_A1 * (1 - mean_Y_A0)) / ((1 - mean_Y_A1) * mean_Y_A0)
# Marginal odds ratio
MOR_gcomp <- (pred_A1*(1 - pred_A0))/((1 - pred_A1)*pred_A0)
# ATE (risk difference)
RD_gcomp <- pred_A1 - pred_A0
c(MOR_gcomp, RD_gcomp) |> round(digits=3)
```

We recommend take the advantage of R packages to do the work
Expand Down
14 changes: 7 additions & 7 deletions sessions/causal-mediation-analysis-sensitivity-analysis.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ levels *A* = *a* and *A* = *a*^\*^.
Once we have calculated the bias term **B~*mult*~(*c*)**, we can
estimate our risk ratio controlling only for *C* (if the outcome is
rare, fit a logistic regression) and we divide our estimate by
**B~*mult*~(*c*)** to get the corrected estimate for risk ratiothat is,
what we would have obtained if we had adjusted for *U* as well.
**B~*mult*~(*c*)** to get the corrected estimate for risk ratio---that
is, what we would have obtained if we had adjusted for *U* as well.

Under the simplifying assumptions of (A8.1.1) and (A8.1.2b), we can also
obtain corrected confidence intervals by dividing both limits of the
Expand Down Expand Up @@ -429,7 +429,7 @@ Once we have calculated the bias term $B^{CDE}_{mult}(m|c)$, we can
estimate the *CDE* risk ratio controlling only for *C* (if the outcome
is rare), we fit a logistic regression) and we divide our estimate and
confidence intervals by the bias factor $B^{CDE}_{mult}(m|c)$ to get the
corrected estimate for CDE risk ratio and its confidence intervalthat
corrected estimate for CDE risk ratio and its confidence interval---that
is, what we would have obtained if we had adjusted for *U* a well.

We have to specify the two prevalences of *U*, namely $P(U = 1|a,m, c)$
Expand Down Expand Up @@ -573,14 +573,14 @@ uc_sens
- The E-value is the minimum strength of association, on the risk
ratio scale, that an unmeasured confounder would need to have with
both the treatment and the outcome to fully explain away a specific
treatmentoutcome association, conditional on the measured
covariates.
treatment--outcome association, conditional on the measured
covariates @vanderweele_sensitivity_2017.

- A large E-value implies that considerable unmeasured confounding
would be needed to explain away an effect estimate.

- A small E-value implies little unmeasured confounding would be
needed to explain away an effect estimate.

(Tyler J VanderWeele 2017)
:::

## References
4 changes: 2 additions & 2 deletions sessions/causal-mediation-analysis-survival-outcomes.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ results with respect to time-to-event outcomes @tein_estimating_2003.
They found that the methods coincides for the accelerated failure time
model but not for the proportional hazards model.

When the outcome is common, we can use weight approach (Lange 2012).

To sum up, we can only use the traditional approaches for rare outcomes.
Otherwise, we can use the product method to get an indication of whether
there is mediation, but be aware that the estimate is not accurate.
Expand Down Expand Up @@ -116,8 +118,6 @@ have to satisfy below assumptions:
- Additionally, we assume that the mediator is measured for everyone
before the outcome occurs.

When the outcome is common, we can use weight approach (Lange 2012)

## Example

We will continue working on the obesity-CVD example in the Framingham
Expand Down

0 comments on commit a92fdf5

Please sign in to comment.