Skip to content

Commit

Permalink
refit for custom_stan_model test, run-extended
Browse files Browse the repository at this point in the history
  • Loading branch information
santikka committed Apr 8, 2024
2 parents 5db0628 + 88bf8ab commit cd81e97
Show file tree
Hide file tree
Showing 24 changed files with 225 additions and 60 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ export(lfactor)
export(lfo)
export(loo)
export(mcmc_diagnostics)
export(mice.impute.lag)
export(mice.impute.lead)
export(ndraws)
export(obs)
export(plot_betas)
Expand Down
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# dynamite 1.5.0

* Estimation of dynamic multivariate panel models with multiple imputation is now available via the function `dynamice()` which uses the `mice` package.
* `predict` function no longer permutes the posterior samples when all samples are used i.e. when `n_draws = NULL` (default). This also corrects the standard error estimates of `loo`, which were not correct earlier due to the mixing of chains.
* Added an argument `thin` for the `loo` method.
* `predict` and `fitted` functions no longer permutes the posterior samples when all samples are used i.e. when `n_draws = NULL` (default). This also corrects the standard error estimates of `loo`, which were not correct earlier due to the mixing of chains.
* Added an argument `thin` for the `loo`, `predict` and `fitted` method.
* Print method now only prints the run time for fastest and slowest chain instead of all chains.

# dynamite 1.4.11

Expand Down
27 changes: 21 additions & 6 deletions R/dynamice.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,22 @@
#' @inheritParams dynamite
#' @param mice_args \[`list()`]\cr
#' Arguments passed to [mice::mice()] excluding `data`.
#' @param impute_format \[`character(1L)`]\cr Format of the data that will be
#' @param impute_format \[`character(1)`]\cr Format of the data that will be
#' passed to the imputation method. Should be either `"wide"` (the default)
#' or `"long"` corresponding to wide format and long format imputation.
#' @param keep_imputed \[`logical(1)`]\cr Should the imputed datasets be
#' kept in the return object? The default is `FALSE`. If `TRUE`, the
#' imputations will be included in the `imputed` field in the return object
#' that is otherwise `NULL`.
#' @export
dynamice <- function(dformula, data, time, group = NULL,
priors = NULL, backend = "rstan",
verbose = TRUE, verbose_stan = FALSE,
stanc_options = list("O0"),
threads_per_chain = 1L, grainsize = NULL,
custom_stan_model = NULL, debug = NULL,
mice_args = list(), impute_format = "wide", ...) {
mice_args = list(), impute_format = "wide",
keep_imputed = FALSE, ...) {
stopifnot_(
requireNamespace("mice"),
"Please install the {.pkg mice} package to use multiple imputation."
Expand Down Expand Up @@ -54,6 +59,10 @@ dynamice <- function(dformula, data, time, group = NULL,
!inherits(impute_format, "try-error"),
"Argument {.arg impute_format} must be either {.val long} or {.val wide}."
)
stopifnot_(
checkmate::test_flag(x = keep_imputed),
"Argument {.arg keep_imputed} must be a single {.cls logical} value."
)
data <- droplevels(data)
data <- data.table::as.data.table(data)
stopifnot_(
Expand Down Expand Up @@ -160,8 +169,8 @@ dynamice <- function(dformula, data, time, group = NULL,
priors = priors,
backend = backend,
permutation = sample(n_draws),
imputed = imputed,
call = tmp$call, # TODO?
imputed = onlyif(keep_imputed, imputed),
call = tmp$call # TODO?
),
class = "dynamitefit"
)
Expand Down Expand Up @@ -435,10 +444,13 @@ parse_predictors_wide <- function(dformula, value_vars, idx_time, group_var) {

#' Compute Lagging Values of an Imputed Response
#'
#' Function for computing the lagged values of an imputed response in `mice`.
#'
#' @inheritParams mice::mice.impute.norm
#' @param group_var \[`character(1)`]\cr Name of the grouping variable.
#' @param resp \[`character(1)`]\cr Name of the response variable.
#' @noRd
#' @keywords internal
#' @export
mice.impute.lag <- function(y, ry, x, wy = NULL,
group_var, resp, ...) {
if (is.null(wy)) {
Expand All @@ -454,10 +466,13 @@ mice.impute.lag <- function(y, ry, x, wy = NULL,

#' Compute Leading Values of an Imputed Response
#'
#' Function for computing the leading values of an imputed response in `mice`.
#'
#' @inheritParams mice::mice.impute.norm
#' @param group_var \[`character(1)`]\cr Name of the grouping variable.
#' @param resp \[`character(1)`]\cr Name of the response variable.
#' @noRd
#' @keywords internal
#' @export
mice.impute.lead <- function(y, ry, x, wy = NULL,
group_var, resp, ...) {
if (is.null(wy)) {
Expand Down
14 changes: 12 additions & 2 deletions R/fitted.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
#' }
#' }
#'
fitted.dynamitefit <- function(object, newdata = NULL, n_draws = NULL,
fitted.dynamitefit <- function(object, newdata = NULL, n_draws = NULL, thin = 1,
expand = TRUE, df = TRUE, ...) {
stopifnot_(
!is.null(object$stanfit),
Expand All @@ -72,6 +72,16 @@ fitted.dynamitefit <- function(object, newdata = NULL, n_draws = NULL,
checkmate::test_flag(x = df),
"Argument {.arg df} must be a single {.cls logical} value."
)
if (thin > 1L) {
idx_draws <- seq.int(1L, ndraws(object), by = thin)
} else {
n_draws <- check_ndraws(n_draws, ndraws(object))
idx_draws <- ifelse_(
identical(n_draws, ndraws(object)),
seq_len(n_draws),
object$permutation[seq_len(n_draws)]
)
}
initialize_predict(
object,
newdata,
Expand All @@ -81,7 +91,7 @@ fitted.dynamitefit <- function(object, newdata = NULL, n_draws = NULL,
impute = "none",
new_levels = "none",
global_fixed = FALSE,
n_draws,
idx_draws,
expand,
df
)
Expand Down
10 changes: 6 additions & 4 deletions R/lfo.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#' )
#' out$ELPD
#' out$ELPD_SE
#' plot(out)
#' }
#' }
#'
Expand Down Expand Up @@ -92,6 +93,8 @@ lfo <- function(x, L, verbose = TRUE, k_threshold = 0.7, ...) {
}
fit <- update_(x, data = d, refresh = 0, ...)

idx_draws <- seq.int(1, ndraws(fit))
n_draws <- length(idx_draws)
# would be faster to use only data
# x$data[eval(time) >= tp[L] - x$stan$fixed]
# but in a case of missing data this is not necessarily enough
Expand All @@ -104,13 +107,12 @@ lfo <- function(x, L, verbose = TRUE, k_threshold = 0.7, ...) {
impute = "none",
new_levels = "none",
global_fixed = FALSE,
n_draws = NULL,
idx_draws,
expand = FALSE,
df = FALSE
)$simulated
# avoid NSE notes from R CMD check
loglik <- patterns <- .draw <- group <- groups <- time <- NULL
n_draws <- ndraws(x)
# sum the log-likelihood over the channels and non-missing time points
# for each group, time, and draw
# drop those id&time pairs which contain NA
Expand Down Expand Up @@ -176,7 +178,7 @@ lfo <- function(x, L, verbose = TRUE, k_threshold = 0.7, ...) {
)
lr <- lr[, non_na_idx]
ll <- ll[, non_na_idx]
psis_obj <- suppressWarnings(loo::psis(lr, r_eff = NA))
psis_obj <- suppressWarnings(loo::psis(lr))
k <- loo::pareto_k_values(psis_obj)
ks[[i - L]] <- k
if (any(k > k_threshold)) {
Expand All @@ -202,7 +204,7 @@ lfo <- function(x, L, verbose = TRUE, k_threshold = 0.7, ...) {
impute = "none",
new_levels = "none",
global_fixed = FALSE,
n_draws = NULL,
idx_draws,
expand = FALSE,
df = FALSE
)$simulated
Expand Down
15 changes: 8 additions & 7 deletions R/loo.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,12 @@ loo.dynamitefit <- function(x, separate_channels = FALSE, thin = 1L, ...) {
checkmate::test_int(x = thin, lower = 1L, upper = ndraws(x)),
"Argument {.arg thin} must be a single positive {.cls integer}."
)
# compute loglik for all posterior samples even with thin > 1
n_chains <- x$stanfit@sim$chains
n_draws <- ndraws(x)
idx_draws <- seq.int(1, n_draws, by = thin)
# need equal number of samples per chain
idx_draws <- idx_draws[seq_len(n_draws %/% thin - n_draws %% n_chains)]
n_draws <- length(idx_draws) %/% n_chains
out <- initialize_predict(
x,
newdata = NULL,
Expand All @@ -61,18 +66,14 @@ loo.dynamitefit <- function(x, separate_channels = FALSE, thin = 1L, ...) {
impute = "none",
new_levels = "none",
global_fixed = FALSE,
n_draws = NULL,
idx_draws,
expand = FALSE,
df = FALSE
)$simulated
# avoid NSE notes from R CMD check
patterns <- NULL

n_chains <- x$stanfit@sim$chains
n_draws <- ndraws(x) %/% n_chains
idx_draws <- seq.int(1L, n_draws * n_chains, by = thin)
loo_ <- function(ll, n_draws, n_chains) {
ll <- t(matrix(ll, ncol = n_draws * n_chains)[, idx_draws])
ll <- t(matrix(ll, ncol = n_draws * n_chains))
reff <- loo::relative_eff(
exp(ll),
chain_id = rep(seq_len(n_chains), each = nrow(ll) / n_chains)
Expand Down
39 changes: 26 additions & 13 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@
#' default is `NULL` which uses all samples without permuting (with chains
#' concatenated). If `n_draws`is smaller than `ndraws(object)`, a random
#' subset of `n_draws` posterior samples are used.
#' @param thin \[`integer(1)`]\cr Use only every `thin` posterior sample.
#' This can be beneficial with when the model object contains
#' large number of samples. Default is `1` meaning that all samples are used.
#' @param expand \[`logical(1)`]\cr If `TRUE` (the default), the output
#' is a single `data.frame` containing the original `newdata` and the
#' predicted values. Otherwise, a `list` is returned with two components,
Expand Down Expand Up @@ -158,7 +161,7 @@ predict.dynamitefit <- function(object, newdata = NULL,
new_levels = c(
"none", "bootstrap", "gaussian", "original"
),
global_fixed = FALSE, n_draws = NULL,
global_fixed = FALSE, n_draws = NULL, thin = 1,
expand = TRUE, df = TRUE, ...) {
stopifnot_(
!is.null(object$stanfit),
Expand Down Expand Up @@ -203,6 +206,20 @@ predict.dynamitefit <- function(object, newdata = NULL,
checkmate::test_flag(x = df),
"Argument {.arg df} must be a single {.cls logical} value."
)
stopifnot_(
checkmate::test_int(x = thin, lower = 1L, upper = ndraws(object)),
"Argument {.arg thin} must be a single positive {.cls integer}."
)
if (thin > 1L) {
idx_draws <- seq.int(1L, ndraws(object), by = thin)
} else {
n_draws <- check_ndraws(n_draws, ndraws(object))
idx_draws <- ifelse_(
identical(n_draws, ndraws(object)),
seq_len(n_draws),
object$permutation[seq_len(n_draws)]
)
}
initialize_predict(
object,
newdata,
Expand All @@ -212,7 +229,7 @@ predict.dynamitefit <- function(object, newdata = NULL,
impute,
new_levels,
global_fixed,
n_draws,
idx_draws,
expand,
df
)
Expand All @@ -225,8 +242,8 @@ predict.dynamitefit <- function(object, newdata = NULL,
#' `"loglik"`.
#' @noRd
initialize_predict <- function(object, newdata, type, eval_type, funs, impute,
new_levels, global_fixed, n_draws, expand, df) {
n_draws <- check_ndraws(n_draws, ndraws(object))
new_levels, global_fixed, idx_draws, expand, df) {
n_draws <- length(idx_draws)
newdata_null <- is.null(newdata)
newdata <- check_newdata(object, newdata)
fixed <- as.integer(attr(object$dformulas$all, "max_lag"))
Expand Down Expand Up @@ -334,7 +351,7 @@ initialize_predict <- function(object, newdata, type, eval_type, funs, impute,
type = type,
eval_type = eval_type,
new_levels = new_levels,
n_draws = n_draws,
idx_draws = idx_draws,
fixed = fixed,
funs = funs,
group_var = group_var,
Expand All @@ -353,8 +370,9 @@ initialize_predict <- function(object, newdata, type, eval_type, funs, impute,
#' @param mode Simulation mode, either `"full"` or `"summary"`.
#' @noRd
predict_ <- function(object, simulated, storage, observed,
mode, type, eval_type, new_levels, n_draws, fixed, funs,
mode, type, eval_type, new_levels, idx_draws, fixed, funs,
group_var, time_var, expand, df) {
n_draws <- length(idx_draws)
formulas_stoch <- object$dformulas$stoch
resp <- get_responses(object$dformulas$all)
resp_stoch <- get_responses(object$dformulas$stoch)
Expand Down Expand Up @@ -390,8 +408,8 @@ predict_ <- function(object, simulated, storage, observed,
env = list(n_new = n_new, n_draws = n_draws)
]
simulated[,
(".draw") := rep(seq.int(1L, n_draws), n_new),
env = list(n_new = n_new, n_draws = n_draws)
(".draw") := rep(seq.int(1L, n_draws), n_new),
env = list(n_new = n_new, n_draws = n_draws)
]
idx <- which(draw_time == u_time[1L]) + (fixed - 1L) * n_draws
n_sim <- n_draws
Expand Down Expand Up @@ -436,11 +454,6 @@ predict_ <- function(object, simulated, storage, observed,
)
idx_summ <- which(summaries[[time_var]] == u_time[1L]) + (fixed - 1L)
}
idx_draws <- ifelse_(
identical(n_draws, ndraws(object)),
seq_len(n_draws),
object$permutation[seq_len(n_draws)]
)
eval_envs <- prepare_eval_envs(
object,
simulated,
Expand Down
11 changes: 9 additions & 2 deletions R/print.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,16 @@ print.dynamitefit <- function(x, full_diagnostics = FALSE, ...) {
sumr$variable[max_rhat], ")",
sep = ""
)
cat("\n\nElapsed time (seconds):\n")
print(rstan::get_elapsed_time(x$stanfit))
runtimes <- rstan::get_elapsed_time(x$stanfit)

if (nrow(runtimes) > 2L) {
rs <- rowSums(runtimes)
cat("\n\nElapsed time (seconds) for fastest and slowest chains:\n")
print(runtimes[c(which.min(rs), which.max(rs)), ])
} else {
cat("\n\nElapsed time (seconds):\n")
print(runtimes)
}
cat(
"\nSummary statistics of the time- and group-invariant parameters:\n"
)
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
Loading

0 comments on commit cd81e97

Please sign in to comment.