-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathH1profile.R
340 lines (294 loc) · 11.2 KB
/
H1profile.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# Fitting profiles for Haiti 1.
#
# ********
# NOTE:
#
# If you would like to run this code but have never used the `batchtools` R
# package, then we strongly recommend completing both Examples 1 and 2 from
# the batchtools vignette, by first installing the `batchtools` package and
# running: `vignette('batchtools')`, or visiting:
#
# https://mllg.github.io/batchtools/articles/batchtools.html
#
# WARNING:
#
# This script was ran using a high-performance computing cluster. In total,
# the RUN_LEVEL == 3 version of this script took 3631 compute hours. Partial
# results can be obtained by lowering the required computational effort
# (params: NP, NMIF, nprof, NP_EVAL, NREPS_EVAL), which can be done by
# reducing the RUN_LEVEL to 2 or 1, or alternatively by manually changing
# these values.
# ********
#
# The haitipkg::fit_haiti1 function is a very simple helper that will take as
# input some starting parameters, and hyperparameters for the IF2 algorithm, and
# will return parameter estimates along with corresponding estimates of their
# log-likelihoods.
#
# In this script we will use this function to do a profile likelihood search
# of all model parameters using the `batchtools` package. To do this, we will
# create a grid of parameter values, and submit jobs where each job will use
# a row from the grid as a starting point to fit the model.
#
# The number of rows in the grid (which is the `final_pars` object that is
# created using a for-loop over the profile parameters) correspond to the total
# number of jobs that will be submitted using `batchtools`. Each job takes
# roughly the same amount of time to compute, and hence the run-time complexity
# is linear with respect to the total number of jobs. The total number of jobs
# can be controlled by either (1) changing the number of variables to profile
# over (2) changing the number of unique values for each profile parameter (3)
# changing the number of points per profile point.
#
# The time-complexity of each job is roughly linear in the parameters NP, and
# NMIF. The fitting algorithm (IF2) has a computational complexity that is
# linear with these parameters, and the fitting procedure takes the majority
# of the time for each job. The model fit is evaluated in each job using the
# particle filter algorithm, which has a computational complexity that is
# linear in NP_EVAL and NREPS_EVAL, so modifying these parameters will also
# have an effect on the total computational effort required, but is typically
# much less than the effort used to fit the model.
# Load Packages -----------------------------------------------------------
library(batchtools)
library(tidyverse)
library(data.table)
library(haitipkg)
RUN_LEVEL <- 3
nprof <- switch(RUN_LEVEL, 2, 15, 25)
NMIF <- switch(RUN_LEVEL, 5, 150, 200)
NP <- switch(RUN_LEVEL, 50, 1000, 1500)
NP_EVAL <- switch(RUN_LEVEL, 100, 3000, 5000)
NREPS_EVAL <- switch(RUN_LEVEL, 3, 10, 15)
COOLING <- 0.5
# Create Experiment Registry ----------------------------------------------
reg <- makeExperimentRegistry(
file.dir = paste0('model1/profileReg_RL', RUN_LEVEL),
seed = 739164,
packages = c("spatPomp", 'haitipkg', 'pomp')
)
# Create Profile Design Matrix --------------------------------------------
set.seed(665544)
# Create Model 1 pomp object, used to get fixed parameters in the searches.
h1 <- haiti1_joint()
# Vector of parameters to profile over.
prof_params <- c(
'betat',
'tau_epi',
'tau_end',
'rho',
'nu',
'sig_sq_epi',
'sig_sq_end',
'E_0',
'I_0',
'beta1',
'beta2',
'beta3',
'beta4',
'beta5',
'beta6'
)
# Initialize parameter matrix using default parameter values.
final_pars <- coef(h1)
prof_vars <- c()
for (pp in prof_params) { # Loop through all profile parameters
# Profile bounds estimated using RL = 2
if (pp == "betat") {
prof_values <- seq(-0.15, 0.05, length.out = 30)
} else if (pp == 'tau_epi') {
prof_values <- c(exp(seq(2.75, 9, length.out = 40)), 5e+12)
} else if (pp == 'tau_end') {
prof_values <- c(exp(seq(2, 9, length.out = 40)), 5e+12)
} else if (pp == 'rho') {
prof_values <- seq(0.1, 0.9, length.out = 30)
} else if (pp == 'nu') {
prof_values <- seq(0.9, 1, length.out = 15)
} else if (pp == 'sig_sq_epi') {
prof_values <- seq(0.04, 0.2, length.out = 30)
} else if (pp == 'sig_sq_end') {
prof_values <- seq(0.02, 0.275, length.out = 30)
} else if (pp == 'E_0') {
prof_values <- seq(1 / 10911819, 25000 / 10911819, 1000 / 10911819)
} else if (pp == 'I_0') {
prof_values <- seq(1 / 10911819, 25000 / 10911819, 1000 / 10911819)
} else if (pp == 'beta1') {
prof_values <- seq(0.5, 2.5, length.out = 20)
} else if (pp == 'beta2') {
prof_values <- seq(0.25, 2, length.out = 20)
} else if (pp == 'beta3') {
prof_values <- seq(0.4, 2.1, length.out = 20)
} else if (pp == 'beta4') {
prof_values <- seq(0.4, 2.1, length.out = 20)
} else if (pp == 'beta5') {
prof_values <- seq(0.5, 2.5, length.out = 20)
} else if (pp == 'beta6') {
prof_values <- seq(-0.1, 2, length.out = 20)
}
prof_cols <- matrix(rep(prof_values, each = nprof), ncol = 1)
colnames(prof_cols) <- pp
# Parameter bounds found using RL 2.
bounds <- tibble::tribble(
~param, ~lower, ~upper,
"beta1", 1.00, 2.15,
"beta2", .50, 2.00,
"beta3", .75, 1.75,
"beta4", .75, 1.50,
"beta5", 1.00, 2.10,
"beta6", .50, 1.50,
"betat", -0.15, 0.05,
"tau_epi", 180, 1250,
"tau_end", 50, 1000,
"rho", 0.2, 0.95,
"nu", 0.94, 1,
"sig_sq_epi", 0.075, 0.125,
"sig_sq_end", 0.05, 0.2,
"E_0", 10 / 10911819, 5000 / 10911819,
"I_0", 10 / 10911819, 20000 / 10911819
)
bounds <- bounds %>%
filter(param != pp)
lower <- bounds$lower
names(lower) <- bounds$param
upper <- bounds$upper
names(upper) <- bounds$param
guesses_tmp <- pomp::runif_design(
lower = lower,
upper = upper,
nseq = nprof * length(prof_values)
)
guesses <- dplyr::bind_cols(prof_cols, guesses_tmp)
S_0 <- matrix(unname(1 - guesses[, 'I_0'] - guesses[, 'E_0']), ncol = 1)
colnames(S_0) <- 'S_0'
guesses <- dplyr::bind_cols(guesses, S_0)
# We need to add fixed parameters
all_params <- coef(h1)
fixed_params <- all_params[!names(all_params) %in% colnames(guesses)]
fixed_mat <- matrix(
rep(fixed_params, nprof * length(prof_values)),
byrow = TRUE, nrow = nprof * length(prof_values)
)
colnames(fixed_mat) <- names(all_params[!names(all_params) %in% colnames(guesses)])
# Combine estimated and fixed parameters, and reorder based on original order.
guesses_all <- cbind(guesses, fixed_mat)[, names(coef(h1))]
final_pars <- rbind(final_pars, guesses_all)
prof_vars <- c(prof_vars, rep(pp, nrow(guesses_all)))
}
final_pars <- final_pars[-1, names(coef(h1))]
data_obj <- list(starts = final_pars, prof_vars = prof_vars)
# dim(final_pars)
# Define profile problem for registry -------------------------------------
# A simple function that will sample the starting values of the profile search
sample_starting_point <- function(data = data_obj, job, i, ...) {
list(
start_par = unlist(data$starts[i, ]),
prof_var = data$prof_vars[i]
)
}
addProblem(name = 'profile', data = data_obj, fun = sample_starting_point)
# Define algorithm for parameter estimation -------------------------------
# This function estimates model parameters using the IF2 algorithm. All
# calibrated parameters besides the profile parameter in the instance object
# and the fixed parameters have a non-zero RW_SD.
fit_model <- function(
data, job, instance, nmif, np, np_eval, nreps_eval,
cooling, ...
) {
est_param_names <- c(
paste0('beta', 1:6), 'betat', 'rho', 'nu', 'E_0', 'I_0',
'tau_epi', 'tau_end', 'sig_sq_epi', 'sig_sq_end'
)
est_param_names <- est_param_names[est_param_names != instance$prof_var]
# Simple function that is used to set the rw_sd.
# This function is convenient so that we don't have to type out the rw_sd
# for each unit parameter.
set_rw <- function(x) {
if (x == 'nu') {
return(0.01)
} else if (x == 'E_0' || x == "I_0") {
return(expression(ivp(0.2)))
} else if (x == 'tau_epi' || x == 'sig_sq_epi') {
return(expression(ifelse(time < 232, 0.02, 0)))
} else if (x == 'tau_end' || x == 'sig_sq_end') {
return(expression(ifelse(time >= 232, 0.02, 0)))
} else {
return(0.02)
}
}
reg_rw.sd <- lapply(est_param_names, set_rw)
names(reg_rw.sd) <- est_param_names
RW_SD <- do.call(pomp::rw_sd, reg_rw.sd)
fit_haiti1(
start_params = instance$start_par,
NMIF = nmif,
NP = np,
NP_EVAL = np_eval,
NREPS_EVAL = nreps_eval,
RW_SD = RW_SD,
COOLING = cooling
)
}
addAlgorithm(name = 'fitMod', fun = fit_model)
# Completing the experiment -----------------------------------------------
pdes <- list('profile' = data.frame(i = 1:nrow(final_pars), prof_var = prof_vars))
ades <- list(
'fitMod' = data.frame(
nmif = NMIF, np = NP, np_eval = NP_EVAL, nreps_eval = NREPS_EVAL, cooling = COOLING
)
)
addExperiments(prob.designs = pdes, algo.designs = ades)
# Submit Jobs -------------------------------------------------------------
# resources1 <- list(account = 'stats_dept1', walltime = '10:00', memory = '1000m', ncpus = 1)
resources1 <- list(
account = 'stats_dept1', walltime = '3:30:00',
memory = '5000m', ncpus = 1
)
submitJobs(
data.table(
job.id = 1:nrow(final_pars),
chunk = 1:(round(nrow(final_pars) / 3))
), resources = resources1
)
# Get Results -------------------------------------------------------------
# You can check the status of the jobs (how many are queued, how many have
# started, how many have finished, and how many have errors / ran out of time)
# by running:
#
# getJobStatus()
#
# You can ensure that you won't summarize the results until everything has
# finished by running:
#
# waitForJobs()
#
# Because there are many long jobs, however, it may not be the best idea to do
# this interactively. Instead, it's wise to periodically check on the status
# of the jobs when you think they may have finished. Therefore if you would
# like to check on your results after closing your R session, you need to
# re-load your registry (after opening up a new R-session in the same
# working directory that was used to create the registry) using:
#
# library(batchtools)
#
# RUN_LEVEL = 3 # Or adjust RUN_LEVEL as needed.
# reg = loadRegistry(paste0('model1/profileReg_RL', RUN_LEVEL))
# getStatus()
#
# Once all of the jobs are finished, we can summarize the results using:
#
# h1_profile_results = unwrap(reduceResultsDataTable())
# h1_pars = unwrap(getJobPars())
# h1_results = ijoin(h1_pars, h1_profile_results)
#
# saveRDS(
# h1_results,
# paste0('model1/run_level_', RUN_LEVEL, '/', 'h1_profiles.rds')
# )
#
# We can also get information about each job, which can be helpful to pick the
# size of each run level and to estimate the cost of the RUN_LEVEL_3 search.
# For example, we can see the total / average computational time of all of the
# jobs by running:
#
# h1_stats <- unwrap(getJobStatus())
# h1_stats <- ijoin(h1_pars, h1_stats)
#
# summary(h1_stats$time.running |> as.numeric()) # unit of measurement is seconds