-
Notifications
You must be signed in to change notification settings - Fork 1
/
check_fit.R
263 lines (227 loc) · 14.7 KB
/
check_fit.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#
# PLOT FITS.
#
gen_fit = function(test, parametersI, ld, sitreps, virus, sero, populations, sero_cut_off)
{
test = copy(test);
test = test[population %in% populations];
sero = copy(sero);
sero = sero[NHS.region %in% populations];
sero = sero[sero$Start.date < sero_cut_off]
virus = copy(virus);
virus = virus[NHS.region %in% populations];
ld = copy(ld);
ld = ld[name %in% populations];
sitreps = copy(sitreps);
sitreps = sitreps[name %in% populations]
# Calculate total population
popsize = NULL
for (i in seq_along(parametersI)) {
if (!is.null(parametersI[[i]])) {
popsize = rbind(popsize,
data.table(Geography = parametersI[[i]]$pop[[1]]$name,
population_size = sum(parametersI[[i]]$pop[[1]]$size),
population_size_15plus = sum(parametersI[[i]]$pop[[1]]$size[-(1:3)]))
)
}
}
# Create formatted output
output = SPIM_output_full(test, 2020, 11, 1, "2020-01-01", as.character(ymd("2020-01-01") + max(test$t)))
output[, d := make_date(`Year of Value`, `Month of Value`, `Day of Value`)]
output = merge(output, popsize, by = "Geography")
adj_output = function(output, val_type, div, pop = 0, pop15 = 0) {
output[ValueType == val_type, Value := Value / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.05` := `Quantile 0.05` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.1` := `Quantile 0.1` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.15` := `Quantile 0.15` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.2` := `Quantile 0.2` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.25` := `Quantile 0.25` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.3` := `Quantile 0.3` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.35` := `Quantile 0.35` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.4` := `Quantile 0.4` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.45` := `Quantile 0.45` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.5` := `Quantile 0.5` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.55` := `Quantile 0.55` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.6` := `Quantile 0.6` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.65` := `Quantile 0.65` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.7` := `Quantile 0.7` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.75` := `Quantile 0.75` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.8` := `Quantile 0.8` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.85` := `Quantile 0.85` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.9` := `Quantile 0.9` / (div + population_size * pop + population_size_15plus * pop15)]
output[ValueType == val_type, `Quantile 0.95` := `Quantile 0.95` / (div + population_size * pop + population_size_15plus * pop15)]
}
adj_output(output, "hospital_inc", 1)
adj_output(output, "hospital_prev", 1)
adj_output(output, "icu_prev", 1)
adj_output(output, "prevalence_mtp", 0, 0.01)
adj_output(output, "sero_prev", 0, 0, 0.01)
adj_output(output, "type28_death_inc_line", 1)
# Make data to output
data = make_data(ld, sitreps, virus, sero)
data = merge(data, popsize, by = "Geography")
adj_data = function(data, val_type, div, pop = 0) {
data[ValueType == val_type, ymin := ymin / (div + population_size * pop)]
data[ValueType == val_type, y := y / (div + population_size * pop)]
data[ValueType == val_type, ymax := ymax / (div + population_size * pop)]
}
adj_data(data, "hospital_inc", 1)
adj_data(data, "hospital_prev", 1)
adj_data(data, "icu_prev", 1)
adj_data(data, "prevalence_mtp", 0.01)
adj_data(data, "sero_prev", 0.01)
adj_data(data, "type28_death_inc_line", 1)
output[ValueType == "hospital_inc", ValueType := "Hospital\nadmissions"]
output[ValueType == "hospital_prev", ValueType := "Hospital beds\noccupied"]
output[ValueType == "icu_prev", ValueType := "ICU beds\noccupied"]
output[ValueType == "infections_inc", ValueType := "Infection\nincidence"]
output[ValueType == "prevalence_mtp", ValueType := "PCR\nprevalence (%)"]
output[ValueType == "sero_prev", ValueType := "Seroprevalence\n(%)"]
output[ValueType == "type28_death_inc_line", ValueType := "Deaths"]
data[ValueType == "hospital_inc", ValueType := "Hospital\nadmissions"]
data[ValueType == "hospital_prev", ValueType := "Hospital beds\noccupied"]
data[ValueType == "icu_prev", ValueType := "ICU beds\noccupied"]
data[ValueType == "infections_inc", ValueType := "Infection\nincidence"]
data[ValueType == "prevalence_mtp", ValueType := "PCR\nprevalence (%)"]
data[ValueType == "sero_prev", ValueType := "Seroprevalence\n(%)"]
data[ValueType == "type28_death_inc_line", ValueType := "Deaths"]
return (list(data, output))
}
check_fit = function(test, parametersI, ld, sitreps, virus, sero, populations, death_cutoff, max_date, min_date = NULL, sero_cut_off)
{
# Calculate total population
popsize = NULL
for (i in seq_along(parametersI)) {
if (!is.null(parametersI[[i]])) {
popsize = rbind(popsize,
data.table(Geography = parametersI[[i]]$pop[[1]]$name,
population_size = sum(parametersI[[i]]$pop[[1]]$size),
population_size_15plus = sum(parametersI[[i]]$pop[[1]]$size[-(1:3)]))
)
}
}
adj_data = function(data, val_type, div, pop = 0) {
data[ValueType == val_type, ymin := ymin / (div + population_size * pop)]
data[ValueType == val_type, y := y / (div + population_size * pop)]
data[ValueType == val_type, ymax := ymax / (div + population_size * pop)]
}
extra_sero = sero[sero$Start.date >= sero_cut_off]
extra_sero = extra_sero[NHS.region %in% populations]
extra_sero = extra_sero[!Data.source %like% "NHS BT", .(ValueType = "Seroprevalence\n(%)", Geography = NHS.region,
dmin = as.Date(Start.date), d = as.Date(Start.date) + (as.Date(End.date) - as.Date(Start.date)) / 2, dmax = as.Date(End.date),
ymin = pct(Lower.bound), y = pct(Central.estimate), ymax = pct(Upper.bound))]
extra_sero = merge(extra_sero, popsize, by = "Geography")
adj_data(extra_sero, "Seroprevalence\n(%)", 0.01)
fit = gen_fit(test, parametersI, ld, sitreps, virus, sero, populations, sero_cut_off)
data = fit[[1]]
output = fit[[2]]
output = output[d <= max_date]
if (!is.null(min_date)) {
output = output[d >= min_date]
data = data[d >= min_date]
}
# Augment deaths data
cutoff_date = ld[, max(date)] - death_cutoff;
data = data[ValueType != "Deaths" | (d <= cutoff_date)]
# Make plot
theme_set(cowplot::theme_cowplot(font_size = 10) + theme(strip.background = element_blank()))
linetypes = c("Deaths", "Hospital\nadmissions", "Hospital beds\noccupied", "ICU beds\noccupied")
plot = ggplot(output[d > "2020-03-01" & AgeBand == "All"]) +
geom_ribbon(aes(x = d, ymin = `Quantile 0.05`, ymax = `Quantile 0.95`, fill = ValueType), alpha = 0.5) +
geom_line(aes(x = d, y = Value, colour = ValueType)) +
geom_line(data = data[ValueType %in% linetypes], aes(x = d, y = y), size = 0.2) +
geom_point(data = data[!ValueType %in% linetypes], aes(x = d, y = y), size = 0.01, shape = 20) +
geom_linerange(data = data, aes(x = d, ymin = ymin, ymax = ymax), size = 0.2) +
geom_linerange(data = data, aes(xmin = dmin, xmax = dmax, y = y), size = 0.2) +
geom_point(data = extra_sero, aes(x = d, y = y), size = 0.01, shape = 20, color = 'dodgerblue2', fill = 'dodgerblue2') +
geom_linerange(data = extra_sero, aes(x = d, ymin = ymin, ymax = ymax), size = 0.2, color = 'dodgerblue2') +
geom_linerange(data = extra_sero, aes(xmin = dmin, xmax = dmax, y = y), size = 0.2, color = 'dodgerblue2') +
facet_grid(ValueType ~ Geography, scales = "free", switch = "y") +
theme(legend.position = "none", strip.placement = "outside", strip.background = element_blank()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
labs(x = NULL, y = NULL)
return (plot)
}
check_fit_small_output = function(test, parametersI, ld, sitreps, virus, sero, populations, death_cutoff, max_date, min_date = NULL, sero_cut_off)
{
# Calculate total population
popsize = NULL
for (i in seq_along(parametersI)) {
if (!is.null(parametersI[[i]])) {
popsize = rbind(popsize,
data.table(Geography = parametersI[[i]]$pop[[1]]$name,
population_size = sum(parametersI[[i]]$pop[[1]]$size),
population_size_15plus = sum(parametersI[[i]]$pop[[1]]$size[-(1:3)]))
)
}
}
adj_data = function(data, val_type, div, pop = 0) {
data[ValueType == val_type, ymin := ymin / (div + population_size * pop)]
data[ValueType == val_type, y := y / (div + population_size * pop)]
data[ValueType == val_type, ymax := ymax / (div + population_size * pop)]
}
extra_sero = sero[sero$Start.date >= sero_cut_off]
extra_sero = extra_sero[NHS.region %in% populations]
extra_sero = extra_sero[!Data.source %like% "NHS BT", .(ValueType = "Seroprevalence\n(%)", Geography = NHS.region,
dmin = as.Date(Start.date), d = as.Date(Start.date) + (as.Date(End.date) - as.Date(Start.date)) / 2, dmax = as.Date(End.date),
ymin = pct(Lower.bound), y = pct(Central.estimate), ymax = pct(Upper.bound))]
extra_sero = merge(extra_sero, popsize, by = "Geography")
adj_data(extra_sero, "Seroprevalence\n(%)", 0.01)
fit = gen_fit(test, parametersI, ld, sitreps, virus, sero, populations, sero_cut_off)
data = fit[[1]]
output = fit[[2]]
output = output[d <= max_date]
if (!is.null(min_date)) {
output = output[d >= min_date]
data = data[d >= min_date]
}
# Augment deaths data
cutoff_date = ld[, max(date)] - death_cutoff;
data = data[ValueType != "Deaths" | (d <= cutoff_date)]
# Make plot
theme_set(cowplot::theme_cowplot(font_size = 10) + theme(strip.background = element_blank()))
linetypes = c("Deaths", "Hospital\nadmissions", "Hospital beds\noccupied", "ICU beds\noccupied")
valuetypes = c("Deaths", "Hospital\nadmissions", "Hospital beds\noccupied", "ICU beds\noccupied", "PCR\nprevalence (%)", "Seroprevalence\n(%)")
plot = ggplot(output[d > "2020-03-01" & AgeBand == "All" & ValueType %in% valuetypes]) +
geom_ribbon(aes(x = d, ymin = `Quantile 0.05`, ymax = `Quantile 0.95`, fill = ValueType), alpha = 0.5) +
geom_line(aes(x = d, y = Value, colour = ValueType)) +
geom_line(data = data[ValueType %in% linetypes], aes(x = d, y = y), size = 0.2) +
geom_point(data = data[!ValueType %in% linetypes], aes(x = d, y = y), size = 0.01, shape = 20) +
geom_linerange(data = data, aes(x = d, ymin = ymin, ymax = ymax), size = 0.2) +
geom_linerange(data = data, aes(xmin = dmin, xmax = dmax, y = y), size = 0.2) +
geom_point(data = extra_sero, aes(x = d, y = y), size = 0.01, shape = 20, color = 'dodgerblue2', fill = 'dodgerblue2') +
geom_linerange(data = extra_sero, aes(x = d, ymin = ymin, ymax = ymax), size = 0.2, color = 'dodgerblue2') +
geom_linerange(data = extra_sero, aes(xmin = dmin, xmax = dmax, y = y), size = 0.2, color = 'dodgerblue2') +
facet_grid(ValueType ~ Geography, scales = "free", switch = "y") +
theme(legend.position = "none", strip.placement = "outside", strip.background = element_blank()) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
labs(x = NULL, y = NULL)
return (plot)
}
compare_fit = function(test, test0, ld, sitreps, virus, sero, populations, populations0, max_date)
{
fit = gen_fit(test, ld, sitreps, virus, sero, populations)
output = fit[[2]]
fit = gen_fit(test0, ld, sitreps, virus, sero, populations0)
data = fit[[1]]
output0 = fit[[2]]
output[, kind := "With VOC"]
output0[, kind := "Without VOC"]
output = rbind(output, output0)
output = output[d <= max_date]
# Make plot
theme_set(cowplot::theme_cowplot(font_size = 10) + theme(strip.background = element_blank()))
linetypes = c("Deaths", "Hospital\nadmissions", "Hospital beds\noccupied", "ICU beds\noccupied")
plot = ggplot(output[d > "2020-03-01" & AgeBand == "All"]) +
geom_ribbon(aes(x = d, ymin = `Quantile 0.05`, ymax = `Quantile 0.95`, fill = ValueType, group = kind), alpha = 0.5) +
geom_line(aes(x = d, y = Value, colour = ValueType, linetype = kind)) +
geom_line(data = data[ValueType %in% linetypes], aes(x = d, y = y), size = 0.2) +
geom_point(data = data[!ValueType %in% linetypes], aes(x = d, y = y), size = 0.01, shape = 20) +
geom_linerange(data = data, aes(x = d, ymin = ymin, ymax = ymax), size = 0.2) +
geom_linerange(data = data, aes(xmin = dmin, xmax = dmax, y = y), size = 0.2) +
facet_grid(ValueType ~ Geography, scales = "free", switch = "y") +
theme(legend.position = "none", strip.placement = "outside", strip.background = element_blank()) +
scale_x_date(date_breaks = "2 months", date_labels = "%b") +
labs(x = NULL, y = NULL)
return (plot)
}