diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 00000000..25dd011f --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 1.4.0 +Date: 2024-06-13 18:14:55 UTC +SHA: 2452cb048b6037b7a1ff84e89ec102ef6883ca62 diff --git a/DESCRIPTION b/DESCRIPTION index f8e42c48..d303363c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: lmtp Title: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies -Version: 1.3.3 +Version: 1.4.0 Authors@R: c(person(given = "Nicholas", family = "Williams", @@ -14,7 +14,7 @@ Authors@R: comment = c(ORCID = "0000-0001-9056-2047"))) Description: Non-parametric estimators for casual effects based on longitudinal modified treatment policies as described in Diaz, Williams, Hoffman, and Schenck , traditional point treatment, - and traditional longitudinal effects. Continuous, binary, and categorical treatments are allowed as well are + and traditional longitudinal effects. Continuous, binary, categorical treatments, and multivariate treatments are allowed as well are censored outcomes. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type. For both continuous and binary outcomes, additive treatment effects can be calculated and relative risks and odds ratios may be calculated for binary outcomes. @@ -24,7 +24,7 @@ License: AGPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.1 Imports: stats, nnls, @@ -36,7 +36,8 @@ Imports: progressr, data.table (>= 1.13.0), checkmate (>= 2.1.0), - SuperLearner + SuperLearner, + schoolmath URL: http://beyondtheate.com, https://github.com/nt-williams/lmtp BugReports: https://github.com/nt-williams/lmtp/issues Suggests: diff --git a/NAMESPACE b/NAMESPACE index ca2eef58..102394ea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,9 @@ S3method(print,lmtp_contrast) S3method(tidy,lmtp) export(create_node_list) export(event_locf) +export(ipsi) export(lmtp_contrast) +export(lmtp_control) export(lmtp_ipw) export(lmtp_sdr) export(lmtp_sub) @@ -31,6 +33,7 @@ importFrom(stats,predict) importFrom(stats,qlogis) importFrom(stats,qnorm) importFrom(stats,quantile) +importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,var) importFrom(stats,weighted.mean) diff --git a/NEWS.md b/NEWS.md index e104c821..10cdb3a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,21 @@ +# lmtp 1.4.0 + +### New Features + +- Can now estimate the effects of simultaneous interventions on multiple variables. +- New pre-packaged shift function, `ipsi()` for estimating IPSI effects using the risk ratio. +- `lmtp_control()` now replaces extra estimator arguments. + +### Bug Fixes + +- Standard errors now incorporate survey weights (see issue \#134). +- Bug fix when shift is NULL and data is a tibble (see issue \#137) + +### General + +- The `intervention_type` argument has been fully deprecated. +- Now attempting to detect intervention type errors (see issue \#98). + # lmtp 1.3.3 ### New Features diff --git a/R/checks.R b/R/checks.R index d684e954..10e7649f 100644 --- a/R/checks.R +++ b/R/checks.R @@ -2,7 +2,11 @@ check_lmtp_data <- function(x, trt, outcome, baseline, time_vary, cens, id) { for (t in 1:determine_tau(outcome, trt)) { ci <- censored(x, cens, t)$j di <- at_risk(x, risk_indicators(outcome), t, TRUE) - trt_t <- ifelse(length(trt) == 1, trt, trt[t]) + if (length(trt) > 1) { + trt_t <- trt[[t]] + } else { + trt_t <- trt[[1]] + } data_t <- x[ci & di, c(trt_t, baseline, unlist(time_vary[t])), drop = FALSE] if (any(is.na(data_t))) { @@ -15,6 +19,14 @@ check_lmtp_data <- function(x, trt, outcome, baseline, time_vary, cens, id) { assertLmtpData <- checkmate::makeAssertionFunction(check_lmtp_data) +assert_trt <- function(trt, tau) { + is_list <- is.list(trt) + if (!isTRUE(is_list)) { + return(assertTrtCharacter(trt, tau)) + } + assertTrtList(trt, tau) +} + check_trt_character <- function(trt, tau) { is_character <- checkmate::check_character(trt) if (!isTRUE(is_character)) { @@ -30,6 +42,21 @@ check_trt_character <- function(trt, tau) { assertTrtCharacter <- checkmate::makeAssertionFunction(check_trt_character) +check_trt_list <- function(trt, tau) { + is_list <- checkmate::check_list(trt) + if (!isTRUE(is_list)) { + return(is_list) + } + + if (length(trt) != 1 && length(trt) != tau) { + return(paste0("'trt' should be of length 1 or ", tau)) + } + + TRUE +} + +assertTrtList <- checkmate::makeAssertionFunction(check_trt_list) + check_reserved_names <- function(x) { bad_names <- c("lmtp_id", "tmp_lmtp_stack_indicator", "tmp_lmtp_scaled_outcome") %in% names(x) if (!any(bad_names)) { @@ -135,8 +162,8 @@ assertDr <- checkmate::makeAssertionFunction(check_dr) check_ref_class <- function(x) { if (!is.lmtp(x)) { - is_num <- checkmate::check_number(x) - if (!isTRUE(is_num)) { + is_num <- checkmate::test_number(x) + if (isFALSE(is_num)) { return("Must either be a single numeric value or another lmtp object") } } @@ -144,3 +171,33 @@ check_ref_class <- function(x) { } assertRefClass <- checkmate::makeAssertionFunction(check_ref_class) + +check_trt_type <- function(data, trt, mtp) { + is_decimal <- vector("logical", length(trt)) + for (i in seq_along(trt)) { + a <- data[[trt[i]]] + if (is.character(a) | is.factor(a)) next + is_decimal[i] <- any(schoolmath::is.decimal(a[!is.na(a)])) + } + if (any(is_decimal) & isFALSE(mtp)) { + cli::cli_warn("Detected decimalish `trt` values and {.code mtp = FALSE}. Consider setting {.code mtp = TRUE} if getting errors.") + } +} + +check_same_weights <- function(weights) { + if (length(weights) == 1) { + check <- TRUE + } else if (length(weights) == 2) { + check <- identical(weights[[1]], weights[[2]]) + } else { + check <- all(sapply(1:(length(weights) - 1), function(i) identical(weights[[i]], weights[[i + 1]]))) + } + + if (isFALSE(check)) { + return("Weights must all be the same.") + } + TRUE +} + +assertSameWeights <- checkmate::makeAssertionFunction(check_same_weights) + diff --git a/R/contrasts.R b/R/contrasts.R index 330d4fcb..72f7fd67 100644 --- a/R/contrasts.R +++ b/R/contrasts.R @@ -25,6 +25,12 @@ lmtp_contrast <- function(..., ref, type = c("additive", "rr", "or")) { assertRefClass(ref) assertContrastType(match.arg(type), fits, .var.name = "type") + weights <- lapply(fits, function(x) x$weights) + if (isFALSE(is.numeric(ref))) { + weights <- append(weights, list(ref$weights)) + } + assertSameWeights(weights) + if (is.numeric(ref)) { type <- "additive" message("Non-estimated reference value, defaulting type = 'additive'") @@ -68,7 +74,7 @@ contrast_additive_single <- function(fit, ref) { fit$id <- 1:length(eif) } - clusters <- split(eif, fit$id) + clusters <- split(eif*fit$weights, fit$id) j <- length(clusters) std.error <- sqrt(var(vapply(clusters, function(x) mean(x), 1)) / j) conf.low <- theta - qnorm(0.975) * std.error @@ -107,7 +113,8 @@ contrast_rr <- function(fits, ref) { contrast_rr_single <- function(fit, ref) { theta <- fit$theta / ref$theta - log_eif <- (fit$eif / fit$theta) - (ref$eif / ref$theta) + log_eif <- (fit$eif*fit$weights / fit$theta) - + (ref$eif*ref$weights / ref$theta) if (is.null(fit$id)) { fit$id <- 1:length(eif) @@ -151,7 +158,8 @@ contrast_or <- function(fits, ref) { contrast_or_single <- function(fit, ref) { theta <- (fit$theta / (1 - fit$theta)) / (ref$theta / (1 - ref$theta)) - log_eif <- (fit$eif / (fit$theta * (1 - fit$theta))) - (ref$eif / (ref$theta * (1 - ref$theta))) + log_eif <- (fit$eif*fit$weights / (fit$theta * (1 - fit$theta))) - + (ref$eif*ref$weights / (ref$theta * (1 - ref$theta))) if (is.null(fit$id)) { fit$id <- 1:length(eif) diff --git a/R/data.R b/R/data.R index af177331..cd7fb70c 100644 --- a/R/data.R +++ b/R/data.R @@ -82,3 +82,19 @@ #' \item{Y2}{Final outcome node.} #' } "sim_timevary_surv" + +#' Simulated Multivariate Exposure Data +#' +#' A dataset with a continuous outcome, three baseline covariates, +#' and two treatment variables. +#' +#' @format A data frame with 2000 rows and 6 variables: +#' \describe{ +#' \item{C1}{Continuous baseline variable.} +#' \item{C2}{Continuous baseline variable.} +#' \item{C3}{Continuous baseline variable.} +#' \item{D1}{Treatment variable one at baseline.} +#' \item{D2}{Treatment variable two at baseline.} +#' \item{Y}{Continuous outcome} +#' } +"multivariate_data" diff --git a/R/density_ratios.R b/R/density_ratios.R index f70d5bc2..c25e7b19 100644 --- a/R/density_ratios.R +++ b/R/density_ratios.R @@ -1,32 +1,36 @@ -cf_r <- function(Task, learners, mtp, lrnr_folds, trim, full_fits, pb) { - fopts <- options("lmtp.bound", "lmtp.trt.length") - out <- list() - +cf_r <- function(task, learners, mtp, control, pb) { + out <- vector("list", length = length(task$folds)) + if (length(learners) == 1 && learners == "SL.mean") { warning("Using 'SL.mean' as the only learner of the density ratios will always result in a misspecified model! If your exposure is randomized, consider using `c('SL.glm', 'SL.glmnet')`.", call. = FALSE) } - - for (fold in seq_along(Task$folds)) { + + for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ - options(fopts) - estimate_r( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted, Task$folds, fold), - Task$trt, Task$cens, Task$risk, Task$tau, Task$node_list$trt, - learners, pb, mtp, lrnr_folds, full_fits + get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted, task$folds, fold), + task$trt, + task$cens, + task$risk, + task$tau, + task$node_list$trt, + learners, + pb, + mtp, + control ) }, seed = TRUE) } - trim_ratios(recombine_ratios(future::value(out), Task$folds), trim) + trim_ratios(recombine_ratios(future::value(out), task$folds), control$.trim) } -estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, lrnr_folds, full_fits) { +estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, control) { densratios <- matrix(nrow = nrow(natural$valid), ncol = tau) - fits <- list() + fits <- vector("list", length = tau) for (t in 1:tau) { jrt <- rep(censored(natural$train, cens, t)$j, 2) @@ -35,9 +39,13 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne jrv <- censored(natural$valid, cens, t)$j drv <- at_risk(natural$valid, risk, t) - trt_t <- ifelse(length(trt) > 1, trt[t], trt) + if (length(trt) > 1) { + trt_t <- trt[[t]] + } else { + trt_t <- trt[[1]] + } - frv <- followed_rule(natural$valid[[trt_t]], shifted$valid[[trt_t]], mtp) + frv <- followed_rule(natural$valid[, trt_t], shifted$valid[, trt_t], mtp) vars <- c(node_list[[t]], cens[[t]]) stacked <- stack_data(natural$train, shifted$train, trt, cens, t) @@ -48,10 +56,10 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne learners, "binomial", stacked[jrt & drt, ]$lmtp_id, - lrnr_folds + control$.learners_trt_folds ) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) @@ -73,7 +81,7 @@ stack_data <- function(natural, shifted, trt, cens, tau) { shifted_half <- natural if (length(trt) > 1 || tau == 1) { - shifted_half[[trt[tau]]] <- shifted[[trt[tau]]] + shifted_half[, trt[[tau]]] <- shifted[, trt[[tau]]] } if (!is.null(cens)) { diff --git a/R/estimators.R b/R/estimators.R index 0651dd40..e3f956a1 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -7,8 +7,11 @@ #' @param data \[\code{data.frame}\]\cr #' A \code{data.frame} in wide format containing all necessary variables #' for the estimation problem. Must not be a \code{data.table}. -#' @param trt \[\code{character}\]\cr +#' @param trt \[\code{character}\] or \[\code{list}\]\cr #' A vector containing the column names of treatment variables ordered by time. +#' Or, a list of vectors, the same length as the number of time points of observation. +#' Vectors should contain column names for the treatment variables at each time point. The list +#' should be ordered following the time ordering of the model. #' @param outcome \[\code{character}\]\cr #' The column name of the outcome variable. In the case of time-to-event #' analysis, a vector containing the columns names of intermediate outcome variables and the final @@ -55,20 +58,8 @@ #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .trim \[\code{numeric(1)}\]\cr -#' Determines the amount the density ratios should be trimmed. -#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile -#' to the 0.999 percentile. A value of 1 indicates no trimming. -#' @param .learners_outcome_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_outcome}. -#' @param .learners_trt_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_trt}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. -#' @param ... Extra arguments. Exists for backwards compatibility. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' #' @details #' ## Should \code{mtp = TRUE}? @@ -102,13 +93,11 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, k = Inf, mtp = FALSE, outcome_type = c("binomial", "continuous", "survival"), - # intervention_type = c("static", "dynamic", "mtp"), id = NULL, bounds = NULL, learners_outcome = "SL.glm", learners_trt = "SL.glm", - folds = 10, weights = NULL, .bound = 1e-5, .trim = 0.999, - .learners_outcome_folds = 10, .learners_trt_folds = 10, - .return_full_fits = FALSE, ...) { + folds = 10, weights = NULL, + control = lmtp_control()) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -117,11 +106,11 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, tau <- determine_tau(outcome, trt) - assertTrtCharacter(trt, tau) + assert_trt(trt, tau) checkmate::assertCharacter(cens, len = tau, null.ok = !checkmate::anyMissing(data[, outcome, drop = FALSE])) checkmate::assertList(time_vary, types = c("NULL", "character"), len = tau, null.ok = TRUE) checkmate::assertCharacter(id, len = 1, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) + checkmate::assertSubset(c(unlist(trt), outcome, baseline, unlist(time_vary), cens, id), names(data)) assertLmtpData(data, trt, outcome, baseline, time_vary, cens, id) assertOutcomeTypes(data, outcome, match.arg(outcome_type)) assertReservedNames(data) @@ -131,21 +120,14 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_outcome_folds, null.ok = TRUE) - checkmate::assertNumber(.learners_trt_folds, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertNumber(.trim, upper = 1) - checkmate::assertLogical(.return_full_fits, len = 1) - - extras <- list(...) - if ("intervention_type" %in% names(extras)) { - mtp <- extras$intervention_type == "mtp" - warning("The `intervention_type` argument of `lmtp_tmle()` is deprecated as of lmtp 1.3.1", - call. = FALSE) - } - - Task <- lmtp_Task$new( + checkmate::assertNumber(control$.learners_outcome_folds, null.ok = TRUE) + checkmate::assertNumber(control$.learners_trt_folds, null.ok = TRUE) + checkmate::assertNumber(control$.bound) + checkmate::assertNumber(control$.trim, upper = 1) + checkmate::assertLogical(control$.return_full_fits, len = 1) + check_trt_type(data, unlist(trt), mtp) + + task <- lmtp_task$new( data = data, trt = trt, outcome = outcome, @@ -160,29 +142,34 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, V = folds, weights = weights, bounds = bounds, - bound = .bound + bound = control$.bound ) - pb <- progressr::progressor(Task$tau*folds*2) + pb <- progressr::progressor(task$tau*folds*2) - ratios <- cf_r(Task, learners_trt, mtp, .learners_trt_folds, .trim, .return_full_fits, pb) - estims <- cf_tmle(Task, "tmp_lmtp_scaled_outcome", ratios$ratios, learners_outcome, .learners_outcome_folds, .return_full_fits, pb) + ratios <- cf_r(task, learners_trt, mtp, control, pb) + estims <- cf_tmle(task, + "tmp_lmtp_scaled_outcome", + ratios$ratios, + learners_outcome, + control, + pb) theta_dr( list( estimator = "TMLE", m = list(natural = estims$natural, shifted = estims$shifted), r = ratios$ratios, - tau = Task$tau, - folds = Task$folds, - id = Task$id, - outcome_type = Task$outcome_type, - bounds = Task$bounds, - weights = Task$weights, + tau = task$tau, + folds = task$folds, + id = task$id, + outcome_type = task$outcome_type, + bounds = task$bounds, + weights = task$weights, shift = if (is.null(shifted)) deparse(substitute((shift))) else NULL, fits_m = estims$fits, fits_r = ratios$fits, - outcome_type = Task$outcome_type + outcome_type = task$outcome_type ), FALSE ) @@ -197,8 +184,11 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' @param data \[\code{data.frame}\]\cr #' A \code{data.frame} in wide format containing all necessary variables #' for the estimation problem. Must not be a \code{data.table}. -#' @param trt \[\code{character}\]\cr +#' @param trt \[\code{character}\] or \[\code{list}\]\cr #' A vector containing the column names of treatment variables ordered by time. +#' Or, a list of vectors, the same length as the number of time points of observation. +#' Vectors should contain column names for the treatment variables at each time point. The list +#' should be ordered following the time ordering of the model. #' @param outcome \[\code{character}\]\cr #' The column name of the outcome variable. In the case of time-to-event #' analysis, a vector containing the columns names of intermediate outcome variables and the final @@ -245,20 +235,8 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .trim \[\code{numeric(1)}\]\cr -#' Determines the amount the density ratios should be trimmed. -#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile -#' to the 0.999 percentile. A value of 1 indicates no trimming. -#' @param .learners_outcome_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_outcome}. -#' @param .learners_trt_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_trt}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. -#' @param ... Extra arguments. Exists for backwards compatibility. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' #' @details #' ## Should \code{mtp = TRUE}? @@ -292,14 +270,12 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, k = Inf, mtp = FALSE, - # intervention_type = c("static", "dynamic", "mtp"), outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, learners_outcome = "SL.glm", learners_trt = "SL.glm", - folds = 10, weights = NULL, .bound = 1e-5, .trim = 0.999, - .learners_outcome_folds = 10, .learners_trt_folds = 10, - .return_full_fits = FALSE, ...) { + folds = 10, weights = NULL, + control = lmtp_control()) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -308,11 +284,11 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, tau <- determine_tau(outcome, trt) - assertTrtCharacter(trt, tau) + assert_trt(trt, tau) checkmate::assertCharacter(cens, len = tau, null.ok = !checkmate::anyMissing(data[, outcome, drop = FALSE])) checkmate::assertList(time_vary, types = c("NULL", "character"), len = tau, null.ok = TRUE) checkmate::assertCharacter(id, len = 1, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) + checkmate::assertSubset(c(unlist(trt), outcome, baseline, unlist(time_vary), cens, id), names(data)) assertLmtpData(data, trt, outcome, baseline, time_vary, cens, id) assertOutcomeTypes(data, outcome, match.arg(outcome_type)) assertReservedNames(data) @@ -322,14 +298,14 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_outcome_folds, null.ok = TRUE) - checkmate::assertNumber(.learners_trt_folds, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertNumber(.trim, upper = 1) - checkmate::assertLogical(.return_full_fits, len = 1) - - Task <- lmtp_Task$new( + checkmate::assertNumber(control$.learners_outcome_folds, null.ok = TRUE) + checkmate::assertNumber(control$.learners_trt_folds, null.ok = TRUE) + checkmate::assertNumber(control$.bound) + checkmate::assertNumber(control$.trim, upper = 1) + checkmate::assertLogical(control$.return_full_fits, len = 1) + check_trt_type(data, unlist(trt), mtp) + + task <- lmtp_task$new( data = data, trt = trt, outcome = outcome, @@ -344,36 +320,34 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, V = folds, weights = weights, bounds = bounds, - bound = .bound + bound = control$control$.bound ) - extras <- list(...) - if ("intervention_type" %in% names(extras)) { - mtp <- extras$intervention_type == "mtp" - warning("The `intervention_type` argument of `lmtp_sdr()` is deprecated as of lmtp 1.3.1", - call. = FALSE) - } + pb <- progressr::progressor(task$tau*folds*2) - pb <- progressr::progressor(Task$tau*folds*2) - - ratios <- cf_r(Task, learners_trt, mtp, .learners_trt_folds, .trim, .return_full_fits, pb) - estims <- cf_sdr(Task, "tmp_lmtp_scaled_outcome", ratios$ratios, learners_outcome, .learners_outcome_folds, .return_full_fits, pb) + ratios <- cf_r(task, learners_trt, mtp, control, pb) + estims <- cf_sdr(task, + "tmp_lmtp_scaled_outcome", + ratios$ratios, + learners_outcome, + control, + pb) theta_dr( list( estimator = "SDR", m = list(natural = estims$natural, shifted = estims$shifted), r = ratios$ratios, - tau = Task$tau, - folds = Task$folds, - id = Task$id, - outcome_type = Task$outcome_type, - bounds = Task$bounds, - weights = Task$weights, + tau = task$tau, + folds = task$folds, + id = task$id, + outcome_type = task$outcome_type, + bounds = task$bounds, + weights = task$weights, shift = if (is.null(shifted)) deparse(substitute((shift))) else NULL, fits_m = estims$fits, fits_r = ratios$fits, - outcome_type = Task$outcome_type + outcome_type = task$outcome_type ), TRUE ) @@ -388,8 +362,11 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' @param data \[\code{data.frame}\]\cr #' A \code{data.frame} in wide format containing all necessary variables #' for the estimation problem. Must not be a \code{data.table}. -#' @param trt \[\code{character}\]\cr +#' @param trt \[\code{character}\] or \[\code{list}\]\cr #' A vector containing the column names of treatment variables ordered by time. +#' Or, a list of vectors, the same length as the number of time points of observation. +#' Vectors should contain column names for the treatment variables at each time point. The list +#' should be ordered following the time ordering of the model. #' @param outcome \[\code{character}\]\cr #' The column name of the outcome variable. In the case of time-to-event #' analysis, a vector containing the columns names of intermediate outcome variables and the final @@ -430,13 +407,8 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .learners_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' #' @return A list of class \code{lmtp} containing the following components: #' @@ -458,8 +430,8 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens shift = NULL, shifted = NULL, k = Inf, outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, learners = "SL.glm", - folds = 10, weights = NULL, .bound = 1e-5, .learners_folds = 10, - .return_full_fits = FALSE) { + folds = 10, weights = NULL, + control = lmtp_control()) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -468,11 +440,11 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens tau <- determine_tau(outcome, trt) - assertTrtCharacter(trt, tau) + assert_trt(trt, tau) checkmate::assertCharacter(cens, len = tau, null.ok = !checkmate::anyMissing(data[, outcome, drop = FALSE])) checkmate::assertList(time_vary, types = c("NULL", "character"), len = tau, null.ok = TRUE) checkmate::assertCharacter(id, len = 1, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) + checkmate::assertSubset(c(unlist(trt), outcome, baseline, unlist(time_vary), cens, id), names(data)) assertLmtpData(data, trt, outcome, baseline, time_vary, cens, id) assertOutcomeTypes(data, outcome, match.arg(outcome_type)) assertReservedNames(data) @@ -482,12 +454,11 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_folds, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertLogical(.return_full_fits, len = 1) + checkmate::assertNumber(control$.learners_outcome_folds, null.ok = TRUE) + checkmate::assertNumber(control$.bound) + checkmate::assertLogical(control$.return_full_fits, len = 1) - Task <- lmtp_Task$new( + task <- lmtp_task$new( data = data, trt = trt, outcome = outcome, @@ -502,23 +473,27 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens V = folds, weights = weights, bounds = bounds, - bound = .bound + bound = control$.bound ) - pb <- progressr::progressor(Task$tau*folds) + pb <- progressr::progressor(task$tau*folds) - estims <- cf_sub(Task, "tmp_lmtp_scaled_outcome", learners, .learners_folds, .return_full_fits, pb) + estims <- cf_sub(task, + "tmp_lmtp_scaled_outcome", + learners, + control, + pb) theta_sub( eta = list( m = estims$m, - outcome_type = Task$outcome_type, - bounds = Task$bounds, - folds = Task$folds, - weights = Task$weights, + outcome_type = task$outcome_type, + bounds = task$bounds, + folds = task$folds, + weights = task$weights, shift = if (is.null(shifted)) deparse(substitute((shift))) else NULL, fits_m = estims$fits, - outcome_type = Task$outcome_type + outcome_type = task$outcome_type ) ) } @@ -532,8 +507,11 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens #' @param data \[\code{data.frame}\]\cr #' A \code{data.frame} in wide format containing all necessary variables #' for the estimation problem. Must not be a \code{data.table}. -#' @param trt \[\code{character}\]\cr +#' @param trt \[\code{character}\] or \[\code{list}\]\cr #' A vector containing the column names of treatment variables ordered by time. +#' Or, a list of vectors, the same length as the number of time points of observation. +#' Vectors should contain column names for the treatment variables at each time point. The list +#' should be ordered following the time ordering of the model. #' @param outcome \[\code{character}\]\cr #' The column name of the outcome variable. In the case of time-to-event #' analysis, a vector containing the columns names of intermediate outcome variables and the final @@ -574,18 +552,8 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .trim \[\code{numeric(1)}\]\cr -#' Determines the amount the density ratios should be trimmed. -#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile -#' to the 0.999 percentile. A value of 1 indicates no trimming. -#' @param .learners_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. -#' @param ... Extra arguments. Exists for backwards compatibility. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' #' @details #' ## Should \code{mtp = TRUE}? @@ -612,13 +580,11 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens #' @example inst/examples/ipw-ex.R lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, mtp = FALSE, - # intervention_type = c("static", "dynamic", "mtp"), k = Inf, id = NULL, outcome_type = c("binomial", "continuous", "survival"), learners = "SL.glm", folds = 10, weights = NULL, - .bound = 1e-5, .trim = 0.999, .learners_folds = 10, - .return_full_fits = FALSE, ...) { + control = lmtp_control()) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -627,11 +593,11 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens tau <- determine_tau(outcome, trt) - assertTrtCharacter(trt, tau) + assert_trt(trt, tau) checkmate::assertCharacter(cens, len = tau, null.ok = !checkmate::anyMissing(data[, outcome, drop = FALSE])) checkmate::assertList(time_vary, types = c("NULL", "character"), len = tau, null.ok = TRUE) checkmate::assertCharacter(id, len = 1, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) + checkmate::assertSubset(c(unlist(trt), outcome, baseline, unlist(time_vary), cens, id), names(data)) assertLmtpData(data, trt, outcome, baseline, time_vary, cens, id) assertOutcomeTypes(data, outcome, match.arg(outcome_type)) assertReservedNames(data) @@ -640,13 +606,13 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_folds, null.ok = TRUE) - checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertNumber(.trim, upper = 1) - checkmate::assertLogical(.return_full_fits, len = 1) + checkmate::assertNumber(control$.learners_trt_folds, null.ok = TRUE) + checkmate::assertNumber(control$.bound) + checkmate::assertNumber(control$.trim, upper = 1) + checkmate::assertLogical(control$.return_full_fits, len = 1) + check_trt_type(data, unlist(trt), mtp) - Task <- lmtp_Task$new( + task <- lmtp_task$new( data = data, trt = trt, outcome = outcome, @@ -661,19 +627,12 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens V = folds, weights = weights, bounds = NULL, - bound = .bound + bound = control$.bound ) - pb <- progressr::progressor(Task$tau*folds) - - extras <- list(...) - if ("intervention_type" %in% names(extras)) { - mtp <- extras$intervention_type == "mtp" - warning("The `intervention_type` argument of `lmtp_ipw()` is deprecated as of lmtp 1.3.1", - call. = FALSE) - } + pb <- progressr::progressor(task$tau*folds) - ratios <- cf_r(Task, learners, mtp, .learners_folds, .trim, .return_full_fits, pb) + ratios <- cf_r(task, learners, mtp, control, pb) theta_ipw( eta = list( @@ -682,14 +641,14 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens nrow = nrow(ratios$ratios), ncol = ncol(ratios$ratios) ), - y = if (Task$survival) { + y = if (task$survival) { convert_to_surv(data[[final_outcome(outcome)]]) } else { data[[final_outcome(outcome)]] }, - folds = Task$folds, - weights = Task$weights, - tau = Task$tau, + folds = task$folds, + weights = task$weights, + tau = task$tau, shift = if (is.null(shifted)) deparse(substitute((shift))) else NULL, fits_r = ratios$fits ) diff --git a/R/gcomp.R b/R/gcomp.R index 5125dbda..ad0b7183 100644 --- a/R/gcomp.R +++ b/R/gcomp.R @@ -1,15 +1,20 @@ -cf_sub <- function(Task, outcome, learners, lrnr_folds, full_fits, pb) { - out <- list() - - for (fold in seq_along(Task$folds)) { +cf_sub <- function(task, outcome, learners, control, pb) { + out <- vector("list", length = length(task$folds)) + for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ estimate_sub( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted[, Task$trt, drop = F], Task$folds, fold), + get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted[, unlist(task$trt), drop = F], task$folds, fold), + task$trt, outcome, - Task$node_list$outcome, Task$cens, - Task$risk, Task$tau, Task$outcome_type, - learners, lrnr_folds, pb, full_fits + task$node_list$outcome, + task$cens, + task$risk, + task$tau, + task$outcome_type, + learners, + control, + pb ) }, seed = TRUE) @@ -18,16 +23,16 @@ cf_sub <- function(Task, outcome, learners, lrnr_folds, full_fits, pb) { out <- future::value(out) list( - m = recombine_outcome(out, "m", Task$folds), + m = recombine_outcome(out, "m", task$folds), fits = lapply(out, function(x) x[["fits"]]) ) } -estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, - tau, outcome_type, learners, lrnr_folds, pb, full_fits) { +estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk, + tau, outcome_type, learners, control, pb) { m <- matrix(nrow = nrow(natural$valid), ncol = tau) - fits <- list() + fits <- vector("list", length = tau) for (t in tau:1) { i <- censored(natural$train, cens, t)$i @@ -52,21 +57,26 @@ estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, learners, outcome_type, id = natural$train[i & rt, ][["lmtp_id"]], - lrnr_folds + control$.learners_outcome_folds ) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) } - trt_var <- names(shifted$train)[t] + if (length(trt) > 1) { + trt_t <- trt[[t]] + } else { + trt_t <- trt[[1]] + } + under_shift_train <- natural$train[jt & rt, vars] - under_shift_train[[trt_var]] <- shifted$train[jt & rt, trt_var] + under_shift_train[, trt_t] <- shifted$train[jt & rt, trt_t] under_shift_valid <- natural$valid[jv & rv, vars] - under_shift_valid[[trt_var]] <- shifted$valid[jv & rv, trt_var] + under_shift_valid[, trt_t] <- shifted$valid[jv & rv, trt_t] natural$train[jt & rt, pseudo] <- bound(SL_predict(fit, under_shift_train), 1e-05) m[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05) diff --git a/R/lmtp-package.R b/R/lmtp-package.R index 319994d1..cf0c16f4 100644 --- a/R/lmtp-package.R +++ b/R/lmtp-package.R @@ -1,4 +1,4 @@ -#' @importFrom stats as.formula coef glm plogis predict qlogis qnorm pnorm sd quantile var binomial gaussian na.omit weighted.mean +#' @importFrom stats runif as.formula coef glm plogis predict qlogis qnorm pnorm sd quantile var binomial gaussian na.omit weighted.mean #' @keywords internal "_PACKAGE" diff --git a/R/lmtp_Task.R b/R/lmtp_Task.R index 74795bd7..6d5e3130 100644 --- a/R/lmtp_Task.R +++ b/R/lmtp_Task.R @@ -1,6 +1,6 @@ #' @importFrom R6 R6Class -lmtp_Task <- R6::R6Class( - "lmtp_Task", +lmtp_task <- R6::R6Class( + "lmtp_task", public = list( natural = NULL, shifted = NULL, @@ -16,7 +16,10 @@ lmtp_Task <- R6::R6Class( bounds = NULL, folds = NULL, weights = NULL, - initialize = function(data, trt, outcome, time_vary, baseline, cens, k, shift, shifted, id, outcome_type = NULL, V = 10, weights = NULL, bounds = NULL, bound = NULL) { + multivariate = NULL, + initialize = function(data, trt, outcome, time_vary, baseline, cens, k, + shift, shifted, id, outcome_type = NULL, V = 10, + weights = NULL, bounds = NULL, bound = NULL) { self$tau <- determine_tau(outcome, trt) self$n <- nrow(data) self$trt <- trt @@ -28,7 +31,8 @@ lmtp_Task <- R6::R6Class( self$bounds <- y_bounds(data[[final_outcome(outcome)]], self$outcome_type, bounds) data$lmtp_id <- create_ids(data, id) self$id <- data$lmtp_id - self$folds <- setup_cv(data, data$lmtp_id, V) + self$folds <- setup_cv(data, V, data$lmtp_id, final_outcome(outcome), self$outcome_type) + self$multivariate <- is.list(trt) shifted <- { if (is.null(shifted) && !is.null(shift)) @@ -42,8 +46,8 @@ lmtp_Task <- R6::R6Class( } } - data <- data.table::copy(data) - shifted <- data.table::copy(shifted) + data <- data.table::copy(as.data.frame(data)) + shifted <- data.table::copy(as.data.frame(shifted)) data <- fix_censoring_ind(data, cens) shifted <- fix_censoring_ind(shifted, cens) @@ -62,7 +66,14 @@ lmtp_Task <- R6::R6Class( self$shifted <- shifted if (!is.null(weights)) { - self$weights <- weights + if (is_normalized(weights)) { + self$weights <- weights + } else { + # Normalize weights + self$weights <- weights / mean(weights) + } + } else { + self$weights <- rep(1, self$n) } } ) diff --git a/R/lmtp_control.R b/R/lmtp_control.R new file mode 100644 index 00000000..4c8ad6bb --- /dev/null +++ b/R/lmtp_control.R @@ -0,0 +1,32 @@ +#' Set LMTP Estimation Parameters +#' +#' @param .bound \[\code{numeric(1)}\]\cr +#' Determines that maximum and minimum values (scaled) predictions +#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. +#' @param .trim \[\code{numeric(1)}\]\cr +#' Determines the amount the density ratios should be trimmed. +#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile +#' to the 0.999 percentile. A value of 1 indicates no trimming. +#' @param .learners_outcome_folds \[\code{integer(1)}\]\cr +#' The number of cross-validation folds for \code{learners_outcome}. +#' @param .learners_trt_folds \[\code{integer(1)}\]\cr +#' The number of cross-validation folds for \code{learners_trt}. +#' @param .return_full_fits \[\code{logical(1)}\]\cr +#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. +#' +#' @return A list of parameters controlling the estimation procedure. +#' @export +#' +#' @examples +#' lmtp_control(.trim = 0.975) +lmtp_control <- function(.bound = 1e5, + .trim = 0.999, + .learners_outcome_folds = 10, + .learners_trt_folds = 10, + .return_full_fits = FALSE) { + list(.bound = .bound, + .trim = .trim, + .learners_outcome_folds = .learners_outcome_folds, + .learners_trt_folds = .learners_trt_folds, + .return_full_fits = .return_full_fits) +} diff --git a/R/nodelist.R b/R/nodelist.R index 773f1c5f..ec40d128 100644 --- a/R/nodelist.R +++ b/R/nodelist.R @@ -53,7 +53,7 @@ trt_node_list <- function(trt, time_vary, baseline = NULL, k, tau) { if (length(trt) == tau) { for (i in 1:tau) { if (i > 1) { - out[[i]] <- c(time_vary[[i]], trt[i - 1]) + out[[i]] <- c(time_vary[[i]], trt[[i - 1]]) } else { out[[i]] <- c(time_vary[[i]]) } @@ -62,14 +62,14 @@ trt_node_list <- function(trt, time_vary, baseline = NULL, k, tau) { if (length(trt) != tau) { for (i in 1:tau) { - out[[i]] <- c(time_vary[[i]], trt) + out[[i]] <- c(time_vary[[i]], unlist(trt)) } } } else { if (length(trt) == tau) { for (i in 1:tau) { if (i > 1) { - out[[i]] <- c(out[[i]], time_vary[[i]], trt[i - 1]) + out[[i]] <- c(out[[i]], time_vary[[i]], trt[[i - 1]]) } else { out[[i]] <- c(out[[i]], time_vary[[i]]) } @@ -78,7 +78,7 @@ trt_node_list <- function(trt, time_vary, baseline = NULL, k, tau) { if (length(trt) != tau) { for (i in 1:tau) { - out[[i]] <- c(out[[i]], time_vary[[i]], trt) + out[[i]] <- c(out[[i]], time_vary[[i]], unlist(trt)) } } } @@ -101,13 +101,13 @@ outcome_node_list <- function(trt, time_vary, baseline = NULL, k, tau) { if (length(trt) == tau) { for (i in 1:tau) { - out[[i]] <- c(time_vary[[i]], trt[i]) + out[[i]] <- c(time_vary[[i]], trt[[i]]) } } if (length(trt) != tau) { for (i in 1:tau) { - out[[i]] <- c(time_vary[[i]], trt) + out[[i]] <- c(time_vary[[i]], unlist(trt)) } } diff --git a/R/sdr.R b/R/sdr.R index 05674c89..b497c020 100644 --- a/R/sdr.R +++ b/R/sdr.R @@ -1,14 +1,21 @@ -cf_sdr <- function(Task, outcome, ratios, learners, lrnr_folds, full_fits, pb) { - out <- list() - for (fold in seq_along(Task$folds)) { +cf_sdr <- function(task, outcome, ratios, learners, control, pb) { + out <- vector("list", length = length(task$folds)) + for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ estimate_sdr( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted[, Task$trt, drop = F], Task$folds, fold), - outcome, Task$node_list$outcome, - Task$cens, Task$risk, Task$tau, Task$outcome_type, - get_folded_data(ratios, Task$folds, fold)$train, - learners, lrnr_folds, pb, full_fits + get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted[, unlist(task$trt), drop = F], task$folds, fold), + task$trt, + outcome, + task$node_list$outcome, + task$cens, + task$risk, + task$tau, + task$outcome_type, + get_folded_data(ratios, task$folds, fold)$train, + learners, + control, + pb ) }, seed = TRUE) @@ -16,20 +23,20 @@ cf_sdr <- function(Task, outcome, ratios, learners, lrnr_folds, full_fits, pb) { out <- future::value(out) - list(natural = recombine_outcome(out, "natural", Task$folds), - shifted = recombine_outcome(out, "shifted", Task$folds), + list(natural = recombine_outcome(out, "natural", task$folds), + shifted = recombine_outcome(out, "shifted", task$folds), fits = lapply(out, function(x) x[["fits"]])) } -estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, - outcome_type, ratios, learners, lrnr_folds, pb, full_fits) { +estimate_sdr <- function(natural, shifted, trt, outcome, node_list, cens, risk, tau, + outcome_type, ratios, learners, control, pb) { m_natural_train <- m_shifted_train <- cbind(matrix(nrow = nrow(natural$train), ncol = tau), natural$train[[outcome]]) m_natural_valid <- m_shifted_valid <- cbind(matrix(nrow = nrow(natural$valid), ncol = tau), natural$valid[[outcome]]) - fits <- list() + fits <- vector("list", length = tau) for (t in tau:1) { i <- censored(natural$train, cens, t)$i @@ -49,9 +56,9 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners, outcome_type, id = natural$train[i & rt, ][["lmtp_id"]], - lrnr_folds) + control$.learners_outcome_folds) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) @@ -73,21 +80,26 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners, "continuous", id = natural$train[i & rt, ][["lmtp_id"]], - lrnr_folds) + control$.learners_outcome_folds) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) } } - trt_var <- names(shifted$train)[t] + if (length(trt) > 1) { + trt_t <- trt[[t]] + } else { + trt_t <- trt[[1]] + } + under_shift_train <- natural$train[jt & rt, vars] - under_shift_train[[trt_var]] <- shifted$train[jt & rt, trt_var] + under_shift_train[, trt_t] <- shifted$train[jt & rt, trt_t] under_shift_valid <- natural$valid[jv & rv, vars] - under_shift_valid[[trt_var]] <- shifted$valid[jv & rv, trt_var] + under_shift_valid[, trt_t] <- shifted$valid[jv & rv, trt_t] m_natural_train[jt & rt, t] <- bound(SL_predict(fit, natural$train[jt & rt, vars]), 1e-05) m_shifted_train[jt & rt, t] <- bound(SL_predict(fit, under_shift_train), 1e-05) diff --git a/R/shift.R b/R/shift.R index 4a8a693d..115f77a7 100644 --- a/R/shift.R +++ b/R/shift.R @@ -2,7 +2,13 @@ shift_data <- function(data, trt, cens, shift) { if (is.null(shift)) { return(shift_cens(data, cens)) } - shift_trt(shift_cens(data, cens), trt, shift) + + is_multivariate <- is.list(trt) + if (isTRUE(is_multivariate)) { + return(shift_trt_list(shift_cens(data, cens), trt, shift)) + } + + shift_trt_character(shift_cens(data, cens), trt, shift) } shift_cens <- function(data, cens) { @@ -13,13 +19,23 @@ shift_cens <- function(data, cens) { as.data.frame(out, check.names = FALSE) } -shift_trt <- function(data, trt, .f) { +shift_trt_character <- function(data, trt, .f) { for (a in trt) { data[[a]] <- .f(data, a) } data } +shift_trt_list <- function(data, trt, .f) { + for (a in trt) { + new <- .f(data, a) + for (col in a) { + data[[col]] <- new[[col]] + } + } + data +} + #' Turn All Treatment Nodes On #' #' A pre-packaged shift function for use with provided estimators when the exposure is binary. @@ -69,3 +85,45 @@ static_binary_on <- function(data, trt) { static_binary_off <- function(data, trt) { rep(0, length(data[[trt]])) } + +#' IPSI Function Factory +#' +#' A function factory that returns a shift function for increasing or decreasing +#' the probability of exposure when exposure is binary. +#' +#' @param delta \[\code{numeric(1)}\]\cr +#' A risk ratio between 0 and Inf. +#' +#' @seealso [lmtp_tmle()], [lmtp_sdr()], [lmtp_sub()], [lmtp_ipw()] +#' @return A shift function. +#' @export +#' +#' @examples +#' \donttest{ +#' data("iptwExWide", package = "twang") +#' a <- paste0("tx", 1:3) +#' baseline <- c("gender", "age") +#' tv <- list(c("use0"), c("use1"), c("use2")) +#' lmtp_sdr(iptwExWide, a, "outcome", baseline = baseline, time_vary = tv, +#' shift = ipsi(0.5), outcome_type = "continuous", folds = 2) +#' } +ipsi <- function(delta) { + if (delta > 1) { + return(ipsi_up(1 / delta)) + } + ipsi_down(delta) +} + +ipsi_up <- function(delta) { + function(data, trt) { + eps <- runif(nrow(data), 0, 1) + ifelse(eps < delta, data[[trt]], 1) + } +} + +ipsi_down <- function(delta) { + function(data, trt) { + eps <- runif(nrow(data), 0, 1) + ifelse(eps < delta, data[[trt]], 0) + } +} diff --git a/R/theta.R b/R/theta.R index 986625a1..7df2f8fa 100644 --- a/R/theta.R +++ b/R/theta.R @@ -70,18 +70,11 @@ theta_dr <- function(eta, augmented = FALSE) { tau = eta$tau, shifted = eta$m$shifted, natural = eta$m$natural) - theta <- { if (augmented) - if (is.null(eta$weights)) - mean(inflnce) - else - weighted.mean(inflnce, eta$weights) + weighted.mean(inflnce, eta$weights) else - if (is.null(eta$weights)) - mean(eta$m$shifted[, 1]) - else - weighted.mean(eta$m$shifted[, 1], eta$weights) + weighted.mean(eta$m$shifted[, 1], eta$weights) } if (eta$outcome_type == "continuous") { @@ -89,7 +82,7 @@ theta_dr <- function(eta, augmented = FALSE) { theta <- rescale_y_continuous(theta, eta$bounds) } - clusters <- split(inflnce, eta$id) + clusters <- split(inflnce*eta$weights, eta$id) j <- length(clusters) se <- sqrt(var(vapply(clusters, function(x) mean(x), 1)) / j) ci_low <- theta - (qnorm(0.975) * se) @@ -110,6 +103,7 @@ theta_dr <- function(eta, augmented = FALSE) { binomial = eta$m$shifted ), density_ratios = eta$r, + weights = eta$weights, fits_m = eta$fits_m, fits_r = eta$fits_r, outcome_type = eta$outcome_type diff --git a/R/tmle.R b/R/tmle.R index 0584830e..c9e40224 100644 --- a/R/tmle.R +++ b/R/tmle.R @@ -1,20 +1,26 @@ -cf_tmle <- function(Task, outcome, ratios, learners, lrnr_folds, full_fits, pb) { - out <- list() - +cf_tmle <- function(task, outcome, ratios, learners, control, pb) { + out <- vector("list", length = length(task$folds)) ratios <- matrix(t(apply(ratios, 1, cumprod)), nrow = nrow(ratios), ncol = ncol(ratios)) - for (fold in seq_along(Task$folds)) { + for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ estimate_tmle( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted[, Task$trt, drop = F], Task$folds, fold), - outcome, Task$node_list$outcome, Task$cens, Task$risk, - Task$tau, Task$outcome_type, - get_folded_data(ratios, Task$folds, fold)$train, - Task$weights[Task$folds[[fold]]$training_set], - learners, lrnr_folds, pb, full_fits + get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted[, unlist(task$trt), drop = F], task$folds, fold), + task$trt, + outcome, + task$node_list$outcome, + task$cens, + task$risk, + task$tau, + task$outcome_type, + get_folded_data(ratios, task$folds, fold)$train, + task$weights[task$folds[[fold]]$training_set], + learners, + control, + pb ) }, seed = TRUE) @@ -23,17 +29,18 @@ cf_tmle <- function(Task, outcome, ratios, learners, lrnr_folds, full_fits, pb) out <- future::value(out) list( - natural = recombine_outcome(out, "natural", Task$folds), - shifted = cbind(recombine_outcome(out, "shifted", Task$folds), Task$natural[["tmp_lmtp_scaled_outcome"]]), + natural = recombine_outcome(out, "natural", task$folds), + shifted = cbind(recombine_outcome(out, "shifted", task$folds), task$natural[["tmp_lmtp_scaled_outcome"]]), fits = lapply(out, function(x) x[["fits"]]) ) } -estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, outcome_type, ratios, weights, learners, lrnr_folds, pb, full_fits) { +estimate_tmle <- function(natural, shifted, trt, outcome, node_list, cens, + risk, tau, outcome_type, ratios, weights, learners, control, pb) { m_natural_train <- m_shifted_train <- matrix(nrow = nrow(natural$train), ncol = tau) m_natural_valid <- m_shifted_valid <- matrix(nrow = nrow(natural$valid), ncol = tau) - fits <- list() + fits <- vector("list", length = tau) for (t in tau:1) { i <- censored(natural$train, cens, t)$i jt <- censored(natural$train, cens, t)$j @@ -57,33 +64,33 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners, outcome_type, id = natural$train[i & rt,][["lmtp_id"]], - lrnr_folds + control$.learners_outcome_folds ) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) } - trt_var <- names(shifted$train)[t] + if (length(trt) > 1) { + trt_t <- trt[[t]] + } else { + trt_t <- trt[[1]] + } + under_shift_train <- natural$train[jt & rt, vars] - under_shift_train[[trt_var]] <- shifted$train[jt & rt, trt_var] + under_shift_train[, trt_t] <- shifted$train[jt & rt, trt_t] under_shift_valid <- natural$valid[jv & rv, vars] - under_shift_valid[[trt_var]] <- shifted$valid[jv & rv, trt_var] + under_shift_valid[, trt_t] <- shifted$valid[jv & rv, trt_t] m_natural_train[jt & rt, t] <- bound(SL_predict(fit, natural$train[jt & rt, vars]), 1e-05) m_shifted_train[jt & rt, t] <- bound(SL_predict(fit, under_shift_train), 1e-05) m_natural_valid[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, vars]), 1e-05) m_shifted_valid[jv & rv, t] <- bound(SL_predict(fit, under_shift_valid), 1e-05) - wts <- { - if (is.null(weights)) - ratios[i & rt, t] - else - ratios[i & rt, t] * weights[i & rt] - } + wts <- ratios[i & rt, t] * weights[i & rt] fit <- sw( glm( diff --git a/R/utils.R b/R/utils.R index a686f03c..92becb1a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,8 +6,15 @@ determine_tau <- function(outcome, trt) { length(outcome) } -setup_cv <- function(data, id, V = 10) { - out <- origami::make_folds(data, cluster_ids = id, V = V) +setup_cv <- function(data, V = 10, id, strata, outcome_type) { + if (length(unique(id)) == nrow(data) & outcome_type == "binomial") { + strata <- data[[strata]] + strata[is.na(strata)] <- 2 + out <- origami::make_folds(data, V = V, strata_ids = strata) + } else { + out <- origami::make_folds(data, cluster_ids = id, V = V) + } + if (V > 1) { return(out) } @@ -93,6 +100,9 @@ at_risk <- function(data, risk, tau, check = FALSE) { followed_rule <- function(obs_trt, shifted_trt, mtp) { if (mtp) { + if (inherits(obs_trt, "data.frame")) { + return(rep(TRUE, nrow(obs_trt))) + } return(rep(TRUE, length(obs_trt))) } @@ -212,3 +222,8 @@ compute_weights <- function(r, t, tau) { if (ncol(out) > ncol(r)) return(t(out)) out } + +is_normalized <- function(x, tolerance = .Machine$double.eps^0.5) { + # Check if the mean is approximately 1 within the given tolerance + abs(mean(x) - 1) < tolerance +} diff --git a/README.Rmd b/README.Rmd index 935378fd..2ce2e861 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,24 +13,21 @@ knitr::opts_chunk$set( ) ``` -# lmtp +# lmtp -[![CRAN status](https://www.r-pkg.org/badges/version/lmtp)](https://CRAN.R-project.org/package=lmtp) -![](http://cranlogs.r-pkg.org/badges/grand-total/lmtp) -[![R build status](https://github.com/nt-williams/lmtp/workflows/R-CMD-check/badge.svg)](https://github.com/nt-williams/lmtp/actions) -[![codecov](https://codecov.io/gh/nt-williams/lmtp/branch/master/graph/badge.svg)](https://app.codecov.io/gh/nt-williams/lmtp) -[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) -[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) + +[![CRAN status](https://www.r-pkg.org/badges/version/lmtp)](https://CRAN.R-project.org/package=lmtp) ![](http://cranlogs.r-pkg.org/badges/grand-total/lmtp) [![R build status](https://github.com/nt-williams/lmtp/workflows/R-CMD-check/badge.svg)](https://github.com/nt-williams/lmtp/actions) [![codecov](https://codecov.io/gh/nt-williams/lmtp/branch/master/graph/badge.svg)](https://app.codecov.io/gh/nt-williams/lmtp) [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) + > Non-parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies Nick Williams and Ivan Diaz ---- +------------------------------------------------------------------------ -**lmtp** is an R package that provides an estimation framework for the casual effects of feasible interventions based on point-treatment and longitudinal modified treatment policies as described in Diaz, Williams, Hoffman, and Schenck (2020). Two primary estimators are supported, a targeted maximum likelihood (TML) estimator and a sequentially doubly robust (SDR) estimator (a G-computation and an inverse probability of treatment weighting estimator are provided for the sake of being thorough but their use is recommended against in favor of the TML and SDR estimators). Both binary and continuous outcomes (both with censoring) are allowed. **lmtp** is built atop the [`SuperLearner`](https://CRAN.R-project.org/package=SuperLearner) package to utilize ensemble machine learning for estimation. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type providing decreased computation time when treatment is continuous. Dynamic treatment regimes are also supported. +**lmtp** is an R package that provides an estimation framework for the casual effects of feasible interventions based on point-treatment and longitudinal modified treatment policies as described in Diaz, Williams, Hoffman, and Schenck (2020). Two primary estimators are supported, a targeted maximum likelihood (TML) estimator and a sequentially doubly robust (SDR) estimator (a G-computation and an inverse probability of treatment weighting estimator are provided for the sake of being thorough but their use is recommended against in favor of the TML and SDR estimators). Both binary and continuous outcomes (both with censoring) are allowed. **lmtp** is built atop the [`SuperLearner`](https://CRAN.R-project.org/package=SuperLearner) package to utilize ensemble machine learning for estimation. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type providing decreased computation time when treatment is continuous. Dynamic treatment regimes are also supported. A list of papers using **lmtp** is [here](https://gist.github.com/nt-williams/15068f5849a67ff4d2cb7f2dcf97b3de). @@ -38,54 +35,56 @@ For an in-depth look at the package's functionality, please consult the accompan ## Installation -**lmtp** can be installed from CRAN with: +**lmtp** can be installed from CRAN with: -```r +``` r install.packages("lmtp") ``` -The stable, development version can be installed from GitHub with: +The stable, development version can be installed from GitHub with: -```r +``` r devtools::install_github("nt-williams/lmtp@devel") ``` -A version allowing for different covariates sets for the treatment, censoring, and outcome regressions: +A version allowing for different covariates sets for the treatment, censoring, and outcome regressions: -```r +``` r devtools::install_github("nt-williams/lmtp@separate-variable-sets") ``` -## What even is a modified treatment policy? +## What even is a modified treatment policy? -Modified treatment policies (MTP) are interventions that can depend on the *natural* value of the treatment (the treatment value in the absence of intervention). A key assumption for causal inference is the *positivity assumption* which states that all observations have a non-zero probability of experiencing a treatment value. **When working with continuous or multivalued treatments, violations of the positivity assumption are likely to occur. MTPs offer a solution to this problem.** +Modified treatment policies (MTP) are interventions that can depend on the *natural* value of the treatment (the treatment value in the absence of intervention). A key assumption for causal inference is the *positivity assumption* which states that all observations have a non-zero probability of experiencing a treatment value. **When working with continuous or multivalued treatments, violations of the positivity assumption are likely to occur. MTPs offer a solution to this problem.** -## Can lmtp estimate other effects? +## Can lmtp estimate other effects? -Yes! **lmtp** can estimate the effects of deterministic, static treatment effects (such as the ATE) and deterministic, dynamic treatment regimes for binary, continuous, and survival outcomes. +Yes! **lmtp** can estimate the effects of deterministic, static treatment effects (such as the ATE) and deterministic, dynamic treatment regimes for binary, continuous, and survival outcomes. ### Features -| Feature | Status | -|---------------------------------|:-----------:| -| Point treatment | ✓ | -| Longitudinal treatment | ✓ | -| Modified treatment intervention | ✓ | -| Static intervention | ✓ | -| Dynamic intervention | ✓ | -| Continuous treatment | ✓ | -| Binary treatment | ✓ | -| Categorical treatment | ✓ | -| Missingness in treatment | | -| Continuous outcome | ✓ | -| Binary outcome | ✓ | -| Censored outcome | ✓ | -| Mediation | | -| Survey weights | ✓ | -| Super learner | ✓ | -| Clustered data | ✓ | -| Parallel processing | ✓ | -| Progress bars | ✓ | +| Feature | Status | +|---------------------------------------------------------|:-------------:| +| Point treatment | ✓ | +| Longitudinal treatment | ✓ | +| Modified treatment intervention | ✓ | +| Incremental Propensity Score Intervention (Using the risk ratio) | ✓ | +| Static intervention | ✓ | +| Dynamic intervention | ✓ | +| Continuous treatment | ✓ | +| Binary treatment | ✓ | +| Categorical treatment | ✓ | +| Multivariate treatment | ✓ | +| Missingness in treatment | | +| Continuous outcome | ✓ | +| Binary outcome | ✓ | +| Censored outcome | ✓ | +| Mediation | | +| Survey weights | ✓ | +| Super learner | ✓ | +| Clustered data | ✓ | +| Parallel processing | ✓ | +| Progress bars | ✓ | ## Example @@ -114,7 +113,7 @@ A <- c("A_1", "A_2", "A_3", "A_4") L <- list(c("L_1"), c("L_2"), c("L_3"), c("L_4")) ``` -We can now estimate the effect of our treatment policy, `d`. In this example, we'll use the cross-validated TML estimator with 10 folds. +We can now estimate the effect of our treatment policy, `d`. In this example, we'll use the cross-validated TML estimator with 10 folds. ```{r, eval = FALSE} lmtp_tmle(sim_t4, A, "Y", time_vary = L, shift = policy, intervention_type = "mtp", folds = 10) @@ -131,35 +130,35 @@ lmtp_tmle(sim_t4, A, "Y", time_vary = L, shift = policy, intervention_type = "mt #### Single time point - + #### Time-varying exposure and confounders, not survival outcome - + #### Single exposure, survival outcome - + #### Time-varying exposure and confounders, survival outcome - + ## Similar Implementations A variety of other R packages perform similar tasks as **lmtp**. However, **lmtp** is the only R package currently capable of estimating causal effects for binary, categorical, and continuous exposures in both the point treatment and longitudinal setting using traditional causal effects or modified treatment policies. -- [`txshift`](https://github.com/nhejazi/txshift) -- [`tmle3`](https://github.com/tlverse/tmle3) -- [`tmle3shift`](https://github.com/tlverse/tmle3shift) -- [`ltmle`](https://CRAN.R-project.org/package=ltmle) -- [`tmle`](https://CRAN.R-project.org/package=tmle) +- [`txshift`](https://github.com/nhejazi/txshift)\ +- [`tmle3`](https://github.com/tlverse/tmle3)\ +- [`tmle3shift`](https://github.com/tlverse/tmle3shift) +- [`ltmle`](https://CRAN.R-project.org/package=ltmle)\ +- [`tmle`](https://CRAN.R-project.org/package=tmle) ## Citation Please cite the following when using **lmtp** in publications. Citation should include both the R package article and the paper establishing the statistical methodology. -``` +``` @article{, title = {lmtp: An R package for estimating the causal effects of modified treatment policies}, author = {Nicholas T Williams and Iván Díaz}, @@ -182,4 +181,3 @@ Please cite the following when using **lmtp** in publications. Citation should i ## References Iván Díaz, Nicholas Williams, Katherine L. Hoffman & Edward J. Schenck (2021) Non-parametric causal effects based on longitudinal modified treatment policies, Journal of the American Statistical Association, DOI: 10.1080/01621459.2021.1955691 - diff --git a/README.md b/README.md index 3abdba50..8374a0f3 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# lmtp +# lmtp @@ -15,6 +15,7 @@ v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/li [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) + > Non-parametric Causal Effects of Feasible Interventions Based on @@ -87,31 +88,38 @@ regimes for binary, continuous, and survival outcomes. ### Features -| Feature | Status | -|---------------------------------|:------:| -| Point treatment | ✓ | -| Longitudinal treatment | ✓ | -| Modified treatment intervention | ✓ | -| Static intervention | ✓ | -| Dynamic intervention | ✓ | -| Continuous treatment | ✓ | -| Binary treatment | ✓ | -| Categorical treatment | ✓ | -| Missingness in treatment | | -| Continuous outcome | ✓ | -| Binary outcome | ✓ | -| Censored outcome | ✓ | -| Mediation | | -| Survey weights | ✓ | -| Super learner | ✓ | -| Clustered data | ✓ | -| Parallel processing | ✓ | -| Progress bars | ✓ | +| Feature | Status | +|------------------------------------------------------------------|:------:| +| Point treatment | ✓ | +| Longitudinal treatment | ✓ | +| Modified treatment intervention | ✓ | +| Incremental Propensity Score Intervention (Using the risk ratio) | ✓ | +| Static intervention | ✓ | +| Dynamic intervention | ✓ | +| Continuous treatment | ✓ | +| Binary treatment | ✓ | +| Categorical treatment | ✓ | +| Multivariate treatment | ✓ | +| Missingness in treatment | | +| Continuous outcome | ✓ | +| Binary outcome | ✓ | +| Censored outcome | ✓ | +| Mediation | | +| Survey weights | ✓ | +| Super learner | ✓ | +| Clustered data | ✓ | +| Parallel processing | ✓ | +| Progress bars | ✓ | ## Example ``` r library(lmtp) +#> Loading required package: mlr3superlearner +#> Loading required package: mlr3learners +#> Warning: package 'mlr3learners' was built under R version 4.2.3 +#> Loading required package: mlr3 +#> Warning: package 'mlr3' was built under R version 4.2.3 # the data: 4 treatment nodes with time varying covariates and a binary outcome head(sim_t4) @@ -163,19 +171,19 @@ lmtp_tmle(sim_t4, A, "Y", time_vary = L, shift = policy, intervention_type = "mt #### Single time point - + #### Time-varying exposure and confounders, not survival outcome - + #### Single exposure, survival outcome - + #### Time-varying exposure and confounders, survival outcome - + ## Similar Implementations diff --git a/cran-comments.md b/cran-comments.md index 49645184..6fd60426 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,4 +1,4 @@ -## Version 1.3.3 submission +## Version 1.4.0 submission ## Test environments diff --git a/data/multivariate_data.rda b/data/multivariate_data.rda new file mode 100644 index 00000000..7bd9a911 Binary files /dev/null and b/data/multivariate_data.rda differ diff --git a/inst/examples/ipw-ex.R b/inst/examples/ipw-ex.R index cade3566..46abc4c7 100644 --- a/inst/examples/ipw-ex.R +++ b/inst/examples/ipw-ex.R @@ -141,4 +141,23 @@ lmtp_ipw(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_ipw(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1, mtp = TRUE) } diff --git a/inst/examples/sdr-ex.R b/inst/examples/sdr-ex.R index caaae42a..e1a4ec1a 100644 --- a/inst/examples/sdr-ex.R +++ b/inst/examples/sdr-ex.R @@ -143,4 +143,23 @@ lmtp_sdr(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_sdr(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1, mtp = TRUE) } diff --git a/inst/examples/sub-ex.R b/inst/examples/sub-ex.R index ca5f6890..0ae063fc 100644 --- a/inst/examples/sub-ex.R +++ b/inst/examples/sub-ex.R @@ -137,4 +137,23 @@ lmtp_sub(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_sub(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1) } diff --git a/inst/examples/tmle-ex.R b/inst/examples/tmle-ex.R index 25d353fd..a6631c97 100644 --- a/inst/examples/tmle-ex.R +++ b/inst/examples/tmle-ex.R @@ -140,4 +140,23 @@ lmtp_tmle(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_tmle(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1, mtp = TRUE) } diff --git a/man/ipsi.Rd b/man/ipsi.Rd new file mode 100644 index 00000000..9fcc91d2 --- /dev/null +++ b/man/ipsi.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shift.R +\name{ipsi} +\alias{ipsi} +\title{IPSI Function Factory} +\usage{ +ipsi(delta) +} +\arguments{ +\item{delta}{[\code{numeric(1)}]\cr +A risk ratio between 0 and Inf.} +} +\value{ +A shift function. +} +\description{ +A function factory that returns a shift function for increasing or decreasing +the probability of exposure when exposure is binary. +} +\examples{ +\donttest{ +data("iptwExWide", package = "twang") +a <- paste0("tx", 1:3) +baseline <- c("gender", "age") +tv <- list(c("use0"), c("use1"), c("use2")) +lmtp_sdr(iptwExWide, a, "outcome", baseline = baseline, time_vary = tv, + shift = ipsi(0.5), outcome_type = "continuous", folds = 2) +} +} +\seealso{ +\code{\link[=lmtp_tmle]{lmtp_tmle()}}, \code{\link[=lmtp_sdr]{lmtp_sdr()}}, \code{\link[=lmtp_sub]{lmtp_sub()}}, \code{\link[=lmtp_ipw]{lmtp_ipw()}} +} diff --git a/man/lmtp-package.Rd b/man/lmtp-package.Rd index 211ad080..993a55d0 100644 --- a/man/lmtp-package.Rd +++ b/man/lmtp-package.Rd @@ -6,7 +6,7 @@ \alias{lmtp-package} \title{lmtp: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies} \description{ -Non-parametric estimators for casual effects based on longitudinal modified treatment policies as described in Diaz, Williams, Hoffman, and Schenck \doi{10.1080/01621459.2021.1955691}, traditional point treatment, and traditional longitudinal effects. Continuous, binary, and categorical treatments are allowed as well are censored outcomes. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type. For both continuous and binary outcomes, additive treatment effects can be calculated and relative risks and odds ratios may be calculated for binary outcomes. +Non-parametric estimators for casual effects based on longitudinal modified treatment policies as described in Diaz, Williams, Hoffman, and Schenck \doi{10.1080/01621459.2021.1955691}, traditional point treatment, and traditional longitudinal effects. Continuous, binary, categorical treatments, and multivariate treatments are allowed as well are censored outcomes. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type. For both continuous and binary outcomes, additive treatment effects can be calculated and relative risks and odds ratios may be calculated for binary outcomes. } \seealso{ Useful links: @@ -21,7 +21,7 @@ Useful links: Authors: \itemize{ - \item Iván Díaz \email{ild2005@med.cornell.edu} (\href{https://orcid.org/0000-0001-9056-2047}{ORCID}) [copyright holder] + \item Iván Díaz \email{Ivan.Diaz@nyulangone.org} (\href{https://orcid.org/0000-0001-9056-2047}{ORCID}) [copyright holder] } } diff --git a/man/lmtp_control.Rd b/man/lmtp_control.Rd new file mode 100644 index 00000000..2e1d10af --- /dev/null +++ b/man/lmtp_control.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lmtp_control.R +\name{lmtp_control} +\alias{lmtp_control} +\title{Set LMTP Estimation Parameters} +\usage{ +lmtp_control( + .bound = 1e+05, + .trim = 0.999, + .learners_outcome_folds = 10, + .learners_trt_folds = 10, + .return_full_fits = FALSE +) +} +\arguments{ +\item{.bound}{[\code{numeric(1)}]\cr +Determines that maximum and minimum values (scaled) predictions +will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} + +\item{.trim}{[\code{numeric(1)}]\cr +Determines the amount the density ratios should be trimmed. +The default is 0.999, trimming the density ratios greater than the 0.999 percentile +to the 0.999 percentile. A value of 1 indicates no trimming.} + +\item{.learners_outcome_folds}{[\code{integer(1)}]\cr +The number of cross-validation folds for \code{learners_outcome}.} + +\item{.learners_trt_folds}{[\code{integer(1)}]\cr +The number of cross-validation folds for \code{learners_trt}.} + +\item{.return_full_fits}{[\code{logical(1)}]\cr +Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} +} +\value{ +A list of parameters controlling the estimation procedure. +} +\description{ +Set LMTP Estimation Parameters +} +\examples{ +lmtp_control(.trim = 0.975) +} diff --git a/man/lmtp_ipw.Rd b/man/lmtp_ipw.Rd index 6d9156aa..a63071d8 100644 --- a/man/lmtp_ipw.Rd +++ b/man/lmtp_ipw.Rd @@ -20,11 +20,7 @@ lmtp_ipw( learners = "SL.glm", folds = 10, weights = NULL, - .bound = 1e-05, - .trim = 0.999, - .learners_folds = 10, - .return_full_fits = FALSE, - ... + control = lmtp_control() ) } \arguments{ @@ -32,8 +28,11 @@ lmtp_ipw( A \code{data.frame} in wide format containing all necessary variables for the estimation problem. Must not be a \code{data.table}.} -\item{trt}{[\code{character}]\cr -A vector containing the column names of treatment variables ordered by time.} +\item{trt}{[\code{character}] or [\code{list}]\cr +A vector containing the column names of treatment variables ordered by time. +Or, a list of vectors, the same length as the number of time points of observation. +Vectors should contain column names for the treatment variables at each time point. The list +should be ordered following the time ordering of the model.} \item{outcome}{[\code{character}]\cr The column name of the outcome variable. In the case of time-to-event @@ -88,22 +87,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.trim}{[\code{numeric(1)}]\cr -Determines the amount the density ratios should be trimmed. -The default is 0.999, trimming the density ratios greater than the 0.999 percentile -to the 0.999 percentile. A value of 1 indicates no trimming.} - -\item{.learners_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} - -\item{...}{Extra arguments. Exists for backwards compatibility.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} } \value{ A list of class \code{lmtp} containing the following components: @@ -278,5 +263,24 @@ by some amount, use \code{mtp = TRUE}}. lmtp_ipw(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_ipw(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1, mtp = TRUE) } } diff --git a/man/lmtp_sdr.Rd b/man/lmtp_sdr.Rd index 3bf9217d..4108a841 100644 --- a/man/lmtp_sdr.Rd +++ b/man/lmtp_sdr.Rd @@ -22,12 +22,7 @@ lmtp_sdr( learners_trt = "SL.glm", folds = 10, weights = NULL, - .bound = 1e-05, - .trim = 0.999, - .learners_outcome_folds = 10, - .learners_trt_folds = 10, - .return_full_fits = FALSE, - ... + control = lmtp_control() ) } \arguments{ @@ -35,8 +30,11 @@ lmtp_sdr( A \code{data.frame} in wide format containing all necessary variables for the estimation problem. Must not be a \code{data.table}.} -\item{trt}{[\code{character}]\cr -A vector containing the column names of treatment variables ordered by time.} +\item{trt}{[\code{character}] or [\code{list}]\cr +A vector containing the column names of treatment variables ordered by time. +Or, a list of vectors, the same length as the number of time points of observation. +Vectors should contain column names for the treatment variables at each time point. The list +should be ordered following the time ordering of the model.} \item{outcome}{[\code{character}]\cr The column name of the outcome variable. In the case of time-to-event @@ -99,25 +97,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.trim}{[\code{numeric(1)}]\cr -Determines the amount the density ratios should be trimmed. -The default is 0.999, trimming the density ratios greater than the 0.999 percentile -to the 0.999 percentile. A value of 1 indicates no trimming.} - -\item{.learners_outcome_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_outcome}.} - -\item{.learners_trt_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_trt}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} - -\item{...}{Extra arguments. Exists for backwards compatibility.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} } \value{ A list of class \code{lmtp} containing the following components: @@ -300,5 +281,24 @@ by some amount, use \code{mtp = TRUE}}. lmtp_sdr(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_sdr(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1, mtp = TRUE) } } diff --git a/man/lmtp_sub.Rd b/man/lmtp_sub.Rd index d9018b9f..3d64f177 100644 --- a/man/lmtp_sub.Rd +++ b/man/lmtp_sub.Rd @@ -20,9 +20,7 @@ lmtp_sub( learners = "SL.glm", folds = 10, weights = NULL, - .bound = 1e-05, - .learners_folds = 10, - .return_full_fits = FALSE + control = lmtp_control() ) } \arguments{ @@ -30,8 +28,11 @@ lmtp_sub( A \code{data.frame} in wide format containing all necessary variables for the estimation problem. Must not be a \code{data.table}.} -\item{trt}{[\code{character}]\cr -A vector containing the column names of treatment variables ordered by time.} +\item{trt}{[\code{character}] or [\code{list}]\cr +A vector containing the column names of treatment variables ordered by time. +Or, a list of vectors, the same length as the number of time points of observation. +Vectors should contain column names for the treatment variables at each time point. The list +should be ordered following the time ordering of the model.} \item{outcome}{[\code{character}]\cr The column name of the outcome variable. In the case of time-to-event @@ -86,15 +87,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.learners_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} } \value{ A list of class \code{lmtp} containing the following components: @@ -256,5 +250,24 @@ continuous, or time-to-event outcomes. Supports binary, categorical, and continu lmtp_sub(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_sub(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1) } } diff --git a/man/lmtp_tmle.Rd b/man/lmtp_tmle.Rd index b00e3770..f790bab0 100644 --- a/man/lmtp_tmle.Rd +++ b/man/lmtp_tmle.Rd @@ -22,12 +22,7 @@ lmtp_tmle( learners_trt = "SL.glm", folds = 10, weights = NULL, - .bound = 1e-05, - .trim = 0.999, - .learners_outcome_folds = 10, - .learners_trt_folds = 10, - .return_full_fits = FALSE, - ... + control = lmtp_control() ) } \arguments{ @@ -35,8 +30,11 @@ lmtp_tmle( A \code{data.frame} in wide format containing all necessary variables for the estimation problem. Must not be a \code{data.table}.} -\item{trt}{[\code{character}]\cr -A vector containing the column names of treatment variables ordered by time.} +\item{trt}{[\code{character}] or [\code{list}]\cr +A vector containing the column names of treatment variables ordered by time. +Or, a list of vectors, the same length as the number of time points of observation. +Vectors should contain column names for the treatment variables at each time point. The list +should be ordered following the time ordering of the model.} \item{outcome}{[\code{character}]\cr The column name of the outcome variable. In the case of time-to-event @@ -99,25 +97,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.trim}{[\code{numeric(1)}]\cr -Determines the amount the density ratios should be trimmed. -The default is 0.999, trimming the density ratios greater than the 0.999 percentile -to the 0.999 percentile. A value of 1 indicates no trimming.} - -\item{.learners_outcome_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_outcome}.} - -\item{.learners_trt_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_trt}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} - -\item{...}{Extra arguments. Exists for backwards compatibility.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} } \value{ A list of class \code{lmtp} containing the following components: @@ -297,5 +278,24 @@ by some amount, use \code{mtp = TRUE}}. lmtp_tmle(sim_point_surv, A, Y, W, cens = C, folds = 2, shift = static_binary_on, outcome_type = "survival") + + # Example 6.1 + # Intervening on multiple exposures simultaneously. Interested in the effect of + # a modified treatment policy where variable D1 is decreased by 0.1 units and + # variable D2 is decreased by 0.5 units simultaneously. + A <- list(c("D1", "D2")) + W <- paste0("C", 1:3) + Y <- "Y" + + d <- function(data, a) { + out = list( + data[[a[1]]] - 0.1, + data[[a[2]]] - 0.5 + ) + setNames(out, a) + } + + lmtp_tmle(multivariate_data, A, Y, W, shift = d, + outcome_type = "continuous", folds = 1, mtp = TRUE) } } diff --git a/man/multivariate_data.Rd b/man/multivariate_data.Rd new file mode 100644 index 00000000..41d7cd36 --- /dev/null +++ b/man/multivariate_data.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{multivariate_data} +\alias{multivariate_data} +\title{Simulated Multivariate Exposure Data} +\format{ +A data frame with 2000 rows and 6 variables: +\describe{ +\item{C1}{Continuous baseline variable.} +\item{C2}{Continuous baseline variable.} +\item{C3}{Continuous baseline variable.} +\item{D1}{Treatment variable one at baseline.} +\item{D2}{Treatment variable two at baseline.} +\item{Y}{Continuous outcome} +} +} +\usage{ +multivariate_data +} +\description{ +A dataset with a continuous outcome, three baseline covariates, +and two treatment variables. +} +\keyword{datasets} diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 31cce903..39b3aebc 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -59,7 +59,7 @@ test_that("Variables dont exist", { expect_error( lmtp_sub(sim_cens, A, "Y", time_vary = L, cens = cens), - "Assertion on 'c(trt, outcome, baseline, unlist(time_vary), cens, id)' failed: Must be a subset of {'L1','A1','C1','L2','A2','C2','Y'}, but has additional elements {'A'}.", + "Assertion on 'c(unlist(trt), outcome, baseline, unlist(time_vary), cens, id)' failed: Must be a subset of {'L1','A1','C1','L2','A2','C2','Y'}, but has additional elements {'A'}.", fixed = TRUE ) }) @@ -160,7 +160,7 @@ test_that("Contrast assertions", { "Assertion on 'fits' failed: Contrasts not implemented for substitution/IPW estimators." ) - fit <- lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1) + fit <- lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1, mtp = TRUE) expect_error( lmtp_contrast(fit, ref = c(0.1, 0.2)), "Assertion on 'ref' failed: Must either be a single numeric value or another lmtp object." @@ -171,7 +171,7 @@ test_that("Contrast assertions", { "Assertion on 'fits' failed: Objects must be of type 'lmtp'." ) - fit <- lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1, outcome_type = "continuous") + fit <- lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1, outcome_type = "continuous", mtp = TRUE) expect_error( lmtp_contrast(fit, ref = 0.1, type = "rr"), "Assertion on 'type' failed: 'rr' specified but one or more outcome types are not 'binomial' or 'survival'." diff --git a/tests/testthat/test-shifted.R b/tests/testthat/test-shifted.R index 1f80130c..b85f90ba 100644 --- a/tests/testthat/test-shifted.R +++ b/tests/testthat/test-shifted.R @@ -15,17 +15,17 @@ sub <- ipw <- sw(lmtp_ipw(sim_cens, a, "Y", NULL, nodes, - cens, k = 0, shifted = sc, folds = 2, intervention_type = "mtp")) + cens, k = 0, shifted = sc, folds = 2, mtp = TRUE)) tmle <- sw(lmtp_tmle(sim_cens, a, "Y", nodes, baseline = NULL, cens, k = 0, shifted = sc, - outcome_type = "binomial", folds = 2, intervention_type = "mtp")) + outcome_type = "binomial", folds = 2, mtp = TRUE)) sdr <- sw(lmtp_sdr(sim_cens, a, "Y", nodes, baseline = NULL, cens, k = 0, shifted = sc, - outcome_type = "binomial", folds = 2, intervention_type = "mtp")) + outcome_type = "binomial", folds = 2, mtp = TRUE)) # tests test_that("estimator fidelity with shifted data supplied", { diff --git a/tests/testthat/test-survey.R b/tests/testthat/test-survey.R index 4cef245d..2cdd6d5c 100644 --- a/tests/testthat/test-survey.R +++ b/tests/testthat/test-survey.R @@ -6,7 +6,7 @@ n <- 1e4 W1 <- rbinom(n, size = 1, prob = 0.5) W2 <- rbinom(n, size = 1, prob = 0.65) A <- rbinom(n, size = 1, prob = plogis(-0.4 + 0.2 * W2 + 0.15 * W1)) -Y.1 <-rbinom(n, size = 1, prob = plogis(-1 + 1 - 0.1 * W1 + 0.3 * W2)) +Y.1 <- rbinom(n, size = 1, prob = plogis(-1 + 1 - 0.1 * W1 + 0.3 * W2)) Y.0 <- rbinom(n, size = 1, prob = plogis(-1 + 0 - 0.1 * W1 + 0.3 * W2)) Y <- Y.1 * A + Y.0 * (1 - A) diff --git a/tests/testthat/test-time_varying_treatment.R b/tests/testthat/test-time_varying_treatment.R index dc797933..7e7cd1e0 100644 --- a/tests/testthat/test-time_varying_treatment.R +++ b/tests/testthat/test-time_varying_treatment.R @@ -24,9 +24,9 @@ d <- function(data, trt) { truth <- 0.305 sub <- sw(lmtp_sub(tmp, a, "Y", time_vary = time_varying, shift = d, folds = 1)) -ipw <- sw(lmtp_ipw(tmp, a, "Y", time_vary = time_varying, shift = d, intervention_type = "mtp", folds = 1)) -tmle <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, intervention_type = "mtp", folds = 1)) -sdr <- sw(lmtp_sdr(tmp, a, "Y", time_vary = time_varying, shift = d, intervention_type = "mtp", folds = 1)) +ipw <- sw(lmtp_ipw(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1)) +tmle <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1)) +sdr <- sw(lmtp_sdr(tmp, a, "Y", time_vary = time_varying, shift = d, mtp = T, folds = 1)) test_that("time varying treatment fidelity, t = 4", { expect_equal(truth, sub$theta, tolerance = 0.025)