-
Notifications
You must be signed in to change notification settings - Fork 0
/
00_Demography.R
258 lines (215 loc) · 10.2 KB
/
00_Demography.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#############################
#Author: Veronica Malizia
#Date: 20/06/2022
#R version: 3.6.1
#
#This script initializes human demography (with age/birth/deaths)
#The age distribution at the equilibrium reached by these simulations will determine the initial population for SchiSTOP
#
# Input Data:
# - Age distribution Uganda 2019
# - Death_rates_Uganda_2019 specific by age (from: https://apps.who.int/gho/data/view.searo.61730?lang=en)
#
# Main use: calibrating birth rate and migration rate to keep population constant and reach equilibrium.
# Death probabilities are given to preserve age distribution.
# Saved output:
# - demography parameters
# - final age-distribution: "Equilibrium_age_distribution.RData"
#
#############################
rm(list = ls())
#.libPaths(c("C:/Program Files/R/R-4.1.2/library",.libPaths()))
library(tidyverse)
library(readxl)
library(foreach)
library(doParallel)
library(splitstackshape)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
source.dir <- "C:/Users/Z541213/Documents/Project/Model/Schisto_model"
output.dir <- file.path(source.dir, "/Output")
if(!file.exists(output.dir)){
dir.create(output.dir)
}
################
#Initial plausible Ugandan population
#Starting with the initial cohort
################
age_distr0 <- read_xlsx("Age distribution_Uganda_2019.xlsx")
death_rates <- read.csv("death_rates_Uganda_2019.csv")
prob_death <- read.csv("prob_death_Uganda_2019.csv")
N <- 1000 #population size
#0=male, 1=female
cohort <- c()
for(i in 1:nrow(age_distr0)){
people <- (age_distr0$Both_sexes[i]/100)*N
tmp <- tibble(age = round(runif(round(people), age_distr0$Age_lo[i], age_distr0$Age_hi[i])),
sex = c(as.numeric(rbernoulli(round(people)))))
cohort <- rbind(cohort, tmp)
}
hist(cohort$age)
table(cohort$sex)
#Output folder
output_dem <- file.path(source.dir, "/Output/Demography")
if(!file.exists(output_dem)){
dir.create(output_dem)
}
##Parameters
T <- 300*12 #number of years
#ts <- T*12
birth_rate <- 36.5 #37.38 is crude annual birth rate Uganda 2019 (same y of available lifetables), 34.8 for Sub-Saharan Africa (per 1000 individuals)
#-1.09 is the net migration rate for 2022 for Uganda (per 1000 individuals)
emig_rate <- 18.6 #This approximates the emigration rate from rural villages (2021) (per 1000 individuals)
# Annual time step to get equilibrium
seeds <- 10 # User-defined
writeLines(c(""), "Sink_demography.txt") #initiate log file
time.start <- Sys.time()
cluster <- makeCluster(min(parallel::detectCores(logical = FALSE), seeds))
clusterEvalQ(cluster, .libPaths(c("C:/Program Files/R/R-4.1.2/library",.libPaths())))
registerDoParallel(cluster)
results <- foreach(k = 1:seeds,
.inorder = TRUE,
.errorhandling = "pass", #remove or pass
.verbose = TRUE,
#.combine = bind_rows,
.packages = c("tidyverse", "splitstackshape")) %dopar% {
#for each seed:
#Time step events, for each individual
#Initialize
pop <- cohort %>%
mutate(death_age=100,
age_group = as.numeric(cut(cohort$age, c(-1, prob_death$Age_hi))))
N <- nrow(pop)
ind_file <- c()
for(t in 1:T){
sink("Sink_demography.txt", append=TRUE)
cat(paste(Sys.time(), ": Starting seed", k, "time step", t, "\n", sep = " "))
sink()
#Births (deterministic process)
#for now birth rate does not depend on age-specific female fertility
#here, not stochastic. Just a fixed rate.
births <- round(birth_rate*(nrow(pop)/1000)/12)
#births <- round(birth_rate*(nrow(pop)/1000))
nb <- tibble(age=rep(0, births),
sex=as.numeric(rbernoulli(births, 0.5)),
death_age=100,
age_group=1) #new born
pop <- bind_rows(pop, nb)
#Deaths
if(t %in% seq(1, (T*12), 12)){ #Januaries
ag <- as.numeric(cut(pop$age, c(-1, prob_death$Age_hi))) #age groups
pop <- mutate(pop, age_group = ag)
deaths <- as.tibble(table(ag)) %>%
rename(Ag = ag) %>%
mutate(n.deaths = round(prob_death[Ag, "Both_sexes"]*n)) %>%
filter(n.deaths > 0)
names(deaths$n.deaths) <- deaths$Ag
# if(length(which(deaths$n.alive==0))>0) #we already remove the age group where nobody is left (if any)
# pop <- filter(pop, age_group != deaths$Ag[deaths$n.alive==0])
deads <- stratified(pop[pop$age_group %in% deaths$Ag, ], "age_group",
deaths$n.deaths, keep.rownames = T) %>%
mutate(death_age = age + runif(n(), 1, 12)/12) #starts from 1/12, but it reaches 12/12, that is the following January
pop[deads$rn,"death_age"] <- deads$death_age #we assign death age within that year
}
pop <- filter(pop, age < death_age)
#Migration
#For now we do not account for age-specific emigration
lambda <- round(emig_rate*(nrow(pop)/1000)/12) #net out migration rate (annual)
#lambda <- round(emig_rate*(nrow(pop)/1000))
emigrated <- sample(which(pop$age>5&pop$age<55), lambda)
if(length(emigrated)>0)
pop <- pop[-emigrated,]
#Update age
pop$age <- pop$age + 1/12
N <- c(N, nrow(pop))
#Save annual individual output
ind_file <- rbind(ind_file,
select(pop, age, age_group) %>% #, sex
mutate(time = t,
seed = k))
##ATTENTION!
#Need to cancel extra files if running less seeds at the following round
#This can be improved
}
filename <- paste("Ind_out_seed_", k, ".csv", sep="")
write.csv(ind_file, file.path(output_dem, filename), row.names = F)
res <- tibble(time = 0:T,
seed = rep(k, (T+1)),
pop_size = N)
}
stopCluster(cluster)
time.end <- Sys.time()
time.end - time.start
#Collating results
res <- bind_rows(results)
#Average results over simulations
avg_res <- res %>%
group_by(time) %>%
summarise(N = mean(pop_size))
ggplot(res) +
geom_line(aes(x=time/12, y=pop_size, group = seed), color = "grey20", alpha = 0.3) +
geom_line(data=avg_res, aes(x=time/12, y=N), size=1) +
annotate("text", x = 200, y = 1500, label = emig_rate) +
scale_y_continuous(name = "Population size (counts)",
breaks = seq(0, 10000, 500),
limits = c(0, 2000),
expand = c(0, 0)) +
scale_x_continuous(name = "Time [years]",
limits = c(0, T/12),
expand = c(0, 0)) +
expand_limits(x = 0,y = 0)
hist(cohort$age)
hist(pop$age, breaks = c(0, prob_death$Age_hi[-c(1, nrow(prob_death))]+1),
main = "Initial age distribution", xlab = "Age [years]")
#Saving image
tiff("Demography_monthlyts.tif", width=7, height=6, units = "in", res = 300)
...
dev.off()
plot(N, type='l')
#### Individual-level results
#Collate results by seeds
data_all <- list.files(path = output_dem, # Identify all output CSV files
pattern = "Ind_out_seed_*", full.names = TRUE) %>%
lapply(read_csv, show_col_types = F) %>% # Store all files in list
bind_rows # Combine data sets into one
##Check that Age distribution is preserved by time
#https://r-charts.com/distribution/ggridges/
cohort_plot_age <- cohort %>%
mutate(time = 0,
seed = NA)
bind_rows(cohort_plot_age,
data_all %>%
filter(time %in% c(240*1:50))) %>%
ggplot(aes(x=round(age))) +
geom_histogram(aes(colour=as.factor(seed)), binwidth = 6,
fill="white", position = 'dodge') +
#stat_bin(aes(y=..count.., label=..count..), binwidth = 5, geom="text", vjust=-.5) +
guides(colour=guide_legend(title="Simulation seed", nrow = 2)) +
scale_color_grey() +
facet_wrap(~ as.factor(time/12)) +
scale_y_continuous(name = "Counts",
expand = c(0, 0)) +
scale_x_continuous(name = "Age [years]",
expand = c(0, 0)) +
expand_limits(x = 0,y = 0) +
theme(legend.position="top")
#Save population at the end of simulation as starting population for the transmission model
#I save it as an age distribution
eq_distr <- data_all %>%
filter(time == T) %>%
group_by(seed, age_group) %>%
tally() %>% #counting rows to have the population size
group_by(age_group) %>% #summarise over seeds
summarise(n = mean(n))
eq_distr <- bind_cols(eq_distr, prob_death[1:nrow(eq_distr), 1:2])
cohort <- c()
for(i in 1:nrow(eq_distr)){
tmp <- tibble(age = runif(eq_distr$n[i], eq_distr$Age_lo[i], eq_distr$Age_hi[i]))
cohort <- rbind(cohort, tmp)
}
cohort <- cohort %>%
mutate(sex = as.numeric(rbernoulli(nrow(cohort), 0.5)))
#Checks
hist(cohort$age, breaks = c(0, prob_death$Age_hi[-c(1, nrow(prob_death))]+1),
main = "Initial age distribution", xlab = "Age (ys)")
table(cohort$sex)
save(eq_distr, cohort, file = "Equilibrium_age_distribution.RData")