Merge branch 'main' of https://github.com/steno-aarhus/mediation-anal…

…ysis-course
steno-aarhus · May 27, 2024 · 27ab60b · 27ab60b
2 parents 0b68cf5 + d4facbd
commit 27ab60b
Show file tree

Hide file tree

Showing 14 changed files with 387 additions and 199 deletions.
diff --git a/_extensions/rostools/r3-theme/_extension.yml b/_extensions/rostools/r3-theme/_extension.yml
@@ -40,7 +40,6 @@ contributes:
 
     csl: vancouver.csl
 
-    callout-appearance: minimal
     reference-location: margin
     crossref:
       chapters: true

diff --git a/_quarto.yml b/_quarto.yml
@@ -1,9 +1,5 @@
 project:
   type: r3-theme
-  render:
-    - "*.qmd"
-    - "*.md"
-    - "*.Rmd"
 
 book:
   title: "Analysis of mechanisms"
@@ -33,6 +29,7 @@ book:
       - sessions/NetCoupler-example.qmd
   appendices:
     - appendices/references.qmd
+    - LICENSE.md
   page-footer:
     center:
       - text: "License: CC BY 4.0 {{< fa brands creative-commons >}} {{< fa brands creative-commons-by >}}"

diff --git a/includes/papers.bib b/includes/papers.bib
diff --git a/preamble/pre-course.qmd b/preamble/pre-course.qmd
@@ -102,7 +102,6 @@ use:
 #| results: asis
 
 install.packages("name-of-package")
-
 ```
 
 If you have issues with the CMA package, you could also try this:
@@ -115,8 +114,6 @@ If you have issues with the CMA package, you could also try this:
 install.packages("devtools")
 
 devtools::install_github("BS1125/CMAverse")
-
-
 ```
 
 ## Code of conduct {#sec-code-of-conduct}

diff --git a/sessions/NetCoupler-example.qmd b/sessions/NetCoupler-example.qmd
@@ -1,6 +1,6 @@
 # NetCoupler
 
-```{r}
+```{r setup}
 #| include: false
 library(here)
 library(DiagrammeR)

diff --git a/sessions/causal-mediation-analysis-estimation.qmd b/sessions/causal-mediation-analysis-estimation.qmd
@@ -1,8 +1,7 @@
 # Estimation of effects using causal mediation analysis
 
-<!-- Daniel 120 min -->
+```{r setup}
 
-```{r}
 #| include: false
 library(here)
 library(DiagrammeR)
@@ -40,6 +39,7 @@ digraph {
   edge [minlen = 2]
     A->M
     M->Y
+    A->Y
     W->A
     W->Y
     W->M
@@ -91,20 +91,21 @@ explain the intuition behind the estimands. Similar equations exist for
 other types and combinations of variables.
 
 ```{r}
-# first load the dataset
-nhanes <- read.csv(here::here("data/nhanes_dataset.csv"))
 
+nhanes <- read_csv(here::here("data/nhanes_dataset.csv"))
+# only keeping the variables we need for the example and removing missing variables
 nhanes <- nhanes %>%
   select(
-    id = seqn,
-    w1 = age,
-    w2 = gender,
-    w3 = education_clean,
-    w4 = smoke, 
-    a = total_redmeat,            #this is the exposure
-    m = magic_biomarker,          #this is the mediator
-    y = blood_glucose) %>%        #this is the outcome
-    na.omit()
+    id = seqn, 
+    w1 = age, 
+    w2 = gender, 
+    w3 = education_clean, 
+    w4 = smoke,
+    a = total_redmeat, # this is the exposure
+    m = magic_biomarker, # this is the mediator
+    y = blood_glucose # this is the outcome
+  ) %>% 
+  na.omit()
 ```
 
 We can now run a model for the mediator only adjusting for a single
@@ -426,26 +427,25 @@ and Y.
 
 ```{r  echo=FALSE }
 # Creating The causal diagram for a mediation model
-
 library(DiagrammeR)
 grViz("
 digraph {
   graph []
   node [shape = plaintext]
-    X 
-    M 
+    X
+    M
     L
     U [fontcolor = red]
-    Y 
+    Y
   edge [minlen = 1.5]
-    X->Y 
+    X->Y
     X->L
     L->Y
     L->M
     U->L
     U->Y
-    X->M 
-    M->Y 
+    X->M
+    M->Y
     W->X
     W->M
     W->Y
@@ -508,35 +508,35 @@ digraph {
     L1
     L2
     Lt
-    Y 
+    Y
   edge [minlen = 1.5]
     X0 -> X1
     X1 -> X2
     X2 -> Xt
-    Xt -> Y  
+    Xt -> Y
     M0 -> M1
     M1 -> M2
     M2 -> Mt
     Mt -> Y
     L0 -> L1
-    L1 -> L2     
-    L2 -> Lt   
+    L1 -> L2
+    L2 -> Lt
     X0 -> M0
     X1 -> M1
     X2 -> M2
     Xt -> Mt
     L0 -> X0
-    L1 -> X1     
-    L2 -> X2     
-    Lt -> Xt  
+    L1 -> X1
+    L2 -> X2
+    Lt -> Xt
     Lt -> Y
     X0 -> L1
     X1 -> L2
     X2 -> Lt
-    W -> L0  
-    W -> L1     
-    W -> L2     
-    W -> Lt 
+    W -> L0
+    W -> L1
+    W -> L2
+    W -> Lt
     W -> Y
   { rank = min; W;}
   { rank = same; L0; L1; L2; Lt }
@@ -547,10 +547,10 @@ digraph {
 
 ### G-computation
 
-The G-computation algorithm was first introduced by Robins (refer) for
-estimating time-varying exposure causal in the presence of time-varying
-confounders of exposure effects. When estimating total effect,
-g-computation is generally equivalent to
+The G-computation algorithm was first introduced by Robins 1986
+@robins_new_1986 for estimating time-varying exposure causal in the
+presence of time-varying confounders of exposure effects. When
+estimating total effect, g-computation is generally equivalent to
 inverse-probability-of-treatment weighting (IPTW). But in
 high-dimensional settings, g-computation is more powerful. G-computation
 (using g-formula) could also provide an intuitive method for decomposing
@@ -581,30 +581,33 @@ We will now demonstrate how to conduct g-computation in a simulated
 dataset.
 
 ```{r}
-datasim <- function(n) { 
-  x1 <- rbinom(n, size = 1, prob = 0.5) 
+datasim <- function(n) {
+  x1 <- rbinom(n, size = 1, prob = 0.5)
   x2 <- rbinom(n, size = 1, prob = 0.65)
   x3 <- rnorm(n, 0, 1)
   x4 <- rnorm(n, 0, 1)
-  A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5*x2 + 2.2*x3 + 0.6*x4 + 0.4*x2*x4)) 
-  Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4)) 
-  Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15*x1 + 0.45*x2 + 0.20*x4 + 0.4*x2*x4)) 
-  Y <- Y.1*A + Y.0*(1 - A) 
-  data.frame(x1, x2, x3, x4, A, Y, Y.1, Y.0) 
-} 
+  A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5 * x2 + 2.2 * x3 + 0.6 * x4 + 0.4 * x2 * x4))
+  Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4))
+  Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4))
+  Y <- Y.1 * A + Y.0 * (1 - A)
+  data.frame(x1, x2, x3, x4, A, Y, Y.1, Y.0)
+}
 
 set.seed(120110) # for reproducibility
 ObsData <- datasim(n = 10000) # really large sample
-TRUE_EY.1 <- mean(ObsData$Y.1); TRUE_EY.1  # mean outcome under A = 1
-TRUE_EY.0 <- mean(ObsData$Y.0); TRUE_EY.0 
-TRUE_MOR <- (TRUE_EY.1*(1 - TRUE_EY.0))/((1 - TRUE_EY.1)*TRUE_EY.0); TRUE_MOR # true marginal OR
+TRUE_EY.1 <- mean(ObsData$Y.1)
+TRUE_EY.1 # mean outcome under A = 1
+TRUE_EY.0 <- mean(ObsData$Y.0)
+TRUE_EY.0
+TRUE_MOR <- (TRUE_EY.1 * (1 - TRUE_EY.0)) / ((1 - TRUE_EY.1) * TRUE_EY.0)
+TRUE_MOR # true marginal OR
 ```
 
 The Total effect if we use traditional model:
 
 ```{r}
 # Fit a logistic regression model predicting Y from the relevant confounders
-Q <- glm(Y ~ A + x2 + x4 + x2*x4, data = ObsData, family=binomial)
+Q <- glm(Y ~ A + x2 + x4 + x2 * x4, data = ObsData, family = binomial)
 exp(Q$coef[2])
 ```
 
@@ -621,38 +624,38 @@ A1Data$A <- 1
 A0Data$A <- 0
 
 # Predict Y if A=1
-Y_A1 <- predict(Q, A1Data, type="response") 
+Y_A1 <- predict(Q, A1Data, type = "response")
 # Predict Y if A=0
-Y_A0 <- predict(Q, A0Data, type="response") 
+Y_A0 <- predict(Q, A0Data, type = "response")
 # Taking a look at the predictions
-data.frame(Y_A1=head(Y_A1), Y_A0=head(Y_A0), TRUE_Y=head(ObsData$Y)) |> round(digits = 2)
+data.frame(Y_A1 = head(Y_A1), Y_A0 = head(Y_A0), TRUE_Y = head(ObsData$Y)) |> round(digits = 2)
 
 # Mean outcomes in the two worlds
 pred_A1 <- mean(Y_A1)
 pred_A0 <- mean(Y_A0)
 # Marginal odds ratio
-MOR_gcomp <- (pred_A1*(1 - pred_A0))/((1 - pred_A1)*pred_A0)
+MOR_gcomp <- (pred_A1 * (1 - pred_A0)) / ((1 - pred_A1) * pred_A0)
 # ATE (risk difference)
 RD_gcomp <- pred_A1 - pred_A0
-c(MOR_gcomp, RD_gcomp) |> round(digits=2)
+c(MOR_gcomp, RD_gcomp) |> round(digits = 2)
 ```
 
 The demo shows how to estimate total effect using g-computation. If you
-would like to estimate PNDE, TNIT, PNIE, TNDE, then you also need to
+would like to estimate PNDE, TNIE, PNIE, TNDE, then you also need to
 predict two potential mediators observed in a hypothetical world in
 which all individuals are intervention (A=1) or non-intervention (A=0).
 
 We recommend take the advantage of R packages to do the work.
 
 ```{r}
 set.seed(1)
-expit <- function(x) exp(x)/(1+exp(x))
+expit <- function(x) exp(x) / (1 + exp(x))
 n <- 10000
 w <- rnorm(n, mean = 1, sd = 0.2)
-a <- rbinom(n, 1, expit(0.3 + 0.5*w))
-l <- rnorm(n, mean = 1 + a + 0.5*w, sd = 0.5)
-m <- rbinom(n, 1, expit(1 + 2*a - l + 2*w))
-y <- rbinom(n, 1, expit(0.3*a + 0.8*m + 0.5*a*m - 0.3*l + 0.4*w))
+a <- rbinom(n, 1, expit(0.3 + 0.5 * w))
+l <- rnorm(n, mean = 1 + a + 0.5 * w, sd = 0.5)
+m <- rbinom(n, 1, expit(1 + 2 * a - l + 2 * w))
+y <- rbinom(n, 1, expit(0.3 * a + 0.8 * m + 0.5 * a * m - 0.3 * l + 0.4 * w))
 data <- data.frame(a, m, y, w, l)
 
 library(CMAverse)

diff --git a/sessions/causal-mediation-analysis-introduction.qmd b/sessions/causal-mediation-analysis-introduction.qmd
@@ -1,10 +1,7 @@
 # Introduction to causal mediation analysis
 
-<!-- Daniel 90 min -->
-
-```{r}
+```{r setup}
 #| include: false
-
 library(here)
 library(DiagrammeR)
 ```
@@ -273,6 +270,7 @@ digraph {
   edge [minlen = 2]
     A->M
     M->Y
+    A->Y
     W->A
     W->Y
     W->M
@@ -319,6 +317,7 @@ digraph {
   edge [minlen = 2]
     A->M
     M->Y
+    A->Y
     W1->A
     W1->Y
     W1->M
@@ -333,9 +332,10 @@ digraph {
 ```
 
 From the DAG rules, we have a special problem that we cannot solve with
-traditional regression approaches. If we adjust for W2 we open the
-backdoor path from $W1 \leftarrow W2 \rightarrow Y$. We will work on how
-to solve this problem later in the course.
+traditional regression approaches. If we adjust for W2 we open a
+backdoor path by adjusting for the collider
+$W1 \rightarrow W2 \leftarrow A$. We will work on how to solve this
+problem later in the course.
 
 ## Non-linearity and interactions
 
@@ -578,3 +578,4 @@ In addition to consistency there is the assumption of no carry-over
 effects.
 
 ## References
+
diff --git a/sessions/causal-mediation-analysis-sensitivity-analysis.qmd b/sessions/causal-mediation-analysis-sensitivity-analysis.qmd
@@ -269,7 +269,6 @@ available on *U* and that *U* confounds the mediator--outcome
 relationship.
 
 ```{r, echo = FALSE}
-
 library(DiagrammeR)
 grViz("
 digraph {
@@ -396,7 +395,6 @@ next figure, we can use these same techniques and the same parameters to
 do sensitivity analysis for *NDE* as well.
 
 ```{r, echo = FALSE}
-
 library(DiagrammeR)
 grViz("
 digraph {