Merge pull request #138 from nt-williams/devel

Devel
nt-williams · Jun 26, 2024 · d16cb0f · d16cb0f
2 parents 432551c + 5dfc790
commit d16cb0f
Show file tree

Hide file tree

Showing 39 changed files with 870 additions and 460 deletions.
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -0,0 +1,3 @@
+Version: 1.4.0
+Date: 2024-06-13 18:14:55 UTC
+SHA: 2452cb048b6037b7a1ff84e89ec102ef6883ca62
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: lmtp
 Title: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies
-Version: 1.3.3
+Version: 1.4.0
 Authors@R: 
     c(person(given = "Nicholas",
              family = "Williams",
@@ -14,7 +14,7 @@ Authors@R:
              comment = c(ORCID = "0000-0001-9056-2047")))
 Description: Non-parametric estimators for casual effects based on longitudinal modified treatment 
   policies as described in Diaz, Williams, Hoffman, and Schenck <doi:10.1080/01621459.2021.1955691>, traditional point treatment, 
-  and traditional longitudinal effects. Continuous, binary, and categorical treatments are allowed as well are 
+  and traditional longitudinal effects. Continuous, binary, categorical treatments, and multivariate treatments are allowed as well are 
   censored outcomes. The treatment mechanism is estimated via a density ratio classification procedure 
   irrespective of treatment variable type. For both continuous and binary outcomes, additive treatment effects 
   can be calculated and relative risks and odds ratios may be calculated for binary outcomes.  
@@ -24,7 +24,7 @@ License: AGPL-3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.3.1
 Imports: 
     stats,
     nnls,
@@ -36,7 +36,8 @@ Imports:
     progressr,
     data.table (>= 1.13.0),
     checkmate (>= 2.1.0),
-    SuperLearner
+    SuperLearner,
+    schoolmath
 URL: http://beyondtheate.com, https://github.com/nt-williams/lmtp
 BugReports: https://github.com/nt-williams/lmtp/issues
 Suggests: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,7 +5,9 @@ S3method(print,lmtp_contrast)
 S3method(tidy,lmtp)
 export(create_node_list)
 export(event_locf)
+export(ipsi)
 export(lmtp_contrast)
+export(lmtp_control)
 export(lmtp_ipw)
 export(lmtp_sdr)
 export(lmtp_sub)
@@ -31,6 +33,7 @@ importFrom(stats,predict)
 importFrom(stats,qlogis)
 importFrom(stats,qnorm)
 importFrom(stats,quantile)
+importFrom(stats,runif)
 importFrom(stats,sd)
 importFrom(stats,var)
 importFrom(stats,weighted.mean)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,21 @@
+# lmtp 1.4.0
+
+### New Features
+
+-   Can now estimate the effects of simultaneous interventions on multiple variables. 
+-   New pre-packaged shift function, `ipsi()` for estimating IPSI effects using the risk ratio.
+-   `lmtp_control()` now replaces extra estimator arguments.
+
+### Bug Fixes
+
+-   Standard errors now incorporate survey weights (see issue \#134).
+-   Bug fix when shift is NULL and data is a tibble (see issue \#137)
+
+### General
+
+-   The `intervention_type` argument has been fully deprecated.
+-   Now attempting to detect intervention type errors (see issue \#98).
+
 # lmtp 1.3.3
 
 ### New Features

diff --git a/R/checks.R b/R/checks.R
@@ -2,7 +2,11 @@ check_lmtp_data <- function(x, trt, outcome, baseline, time_vary, cens, id) {
   for (t in 1:determine_tau(outcome, trt)) {
     ci <- censored(x, cens, t)$j
     di <- at_risk(x, risk_indicators(outcome), t, TRUE)
-    trt_t <- ifelse(length(trt) == 1, trt, trt[t])
+    if (length(trt) > 1) {
+      trt_t <- trt[[t]]
+    } else {
+      trt_t <- trt[[1]]
+    }
     data_t <- x[ci & di, c(trt_t, baseline, unlist(time_vary[t])), drop = FALSE]
 
     if (any(is.na(data_t))) {
@@ -15,6 +19,14 @@ check_lmtp_data <- function(x, trt, outcome, baseline, time_vary, cens, id) {
 
 assertLmtpData <- checkmate::makeAssertionFunction(check_lmtp_data)
 
+assert_trt <- function(trt, tau) {
+  is_list <- is.list(trt)
+  if (!isTRUE(is_list)) {
+    return(assertTrtCharacter(trt, tau))
+  }
+  assertTrtList(trt, tau)
+}
+
 check_trt_character <- function(trt, tau) {
   is_character <- checkmate::check_character(trt)
   if (!isTRUE(is_character)) {
@@ -30,6 +42,21 @@ check_trt_character <- function(trt, tau) {
 
 assertTrtCharacter <- checkmate::makeAssertionFunction(check_trt_character)
 
+check_trt_list <- function(trt, tau) {
+  is_list <- checkmate::check_list(trt)
+  if (!isTRUE(is_list)) {
+    return(is_list)
+  }
+
+  if (length(trt) != 1 && length(trt) != tau) {
+    return(paste0("'trt' should be of length 1 or ", tau))
+  }
+
+  TRUE
+}
+
+assertTrtList <- checkmate::makeAssertionFunction(check_trt_list)
+
 check_reserved_names <- function(x) {
   bad_names <- c("lmtp_id", "tmp_lmtp_stack_indicator", "tmp_lmtp_scaled_outcome") %in% names(x)
   if (!any(bad_names)) {
@@ -135,12 +162,42 @@ assertDr <- checkmate::makeAssertionFunction(check_dr)
 
 check_ref_class <- function(x) {
   if (!is.lmtp(x)) {
-    is_num <- checkmate::check_number(x)
-    if (!isTRUE(is_num)) {
+    is_num <- checkmate::test_number(x)
+    if (isFALSE(is_num)) {
       return("Must either be a single numeric value or another lmtp object")
     }
   }
   TRUE
 }
 
 assertRefClass <- checkmate::makeAssertionFunction(check_ref_class)
+
+check_trt_type <- function(data, trt, mtp) {
+  is_decimal <- vector("logical", length(trt))
+  for (i in seq_along(trt)) {
+    a <- data[[trt[i]]]
+    if (is.character(a) | is.factor(a)) next
+    is_decimal[i] <- any(schoolmath::is.decimal(a[!is.na(a)]))
+  }
+  if (any(is_decimal) & isFALSE(mtp)) {
+      cli::cli_warn("Detected decimalish `trt` values and {.code mtp = FALSE}. Consider setting {.code mtp = TRUE} if getting errors.")
+  }
+}
+
+check_same_weights <- function(weights) {
+  if (length(weights) == 1) {
+    check <- TRUE
+  } else if (length(weights) == 2) {
+    check <- identical(weights[[1]], weights[[2]])
+  } else {
+    check <- all(sapply(1:(length(weights) - 1), function(i) identical(weights[[i]], weights[[i + 1]])))
+  }
+
+  if (isFALSE(check)) {
+    return("Weights must all be the same.")
+  }
+  TRUE
+}
+
+assertSameWeights <- checkmate::makeAssertionFunction(check_same_weights)
+
diff --git a/R/contrasts.R b/R/contrasts.R
@@ -25,6 +25,12 @@ lmtp_contrast <- function(..., ref, type = c("additive", "rr", "or")) {
   assertRefClass(ref)
   assertContrastType(match.arg(type), fits, .var.name = "type")
 
+  weights <- lapply(fits, function(x) x$weights)
+  if (isFALSE(is.numeric(ref))) {
+    weights <- append(weights, list(ref$weights))
+  }
+  assertSameWeights(weights)
+
   if (is.numeric(ref)) {
     type <- "additive"
     message("Non-estimated reference value, defaulting type = 'additive'")
@@ -68,7 +74,7 @@ contrast_additive_single <- function(fit, ref) {
     fit$id <- 1:length(eif)
   }
 
-  clusters <- split(eif, fit$id)
+  clusters <- split(eif*fit$weights, fit$id)
   j <- length(clusters)
   std.error <- sqrt(var(vapply(clusters, function(x) mean(x), 1)) / j)
   conf.low <- theta - qnorm(0.975) * std.error
@@ -107,7 +113,8 @@ contrast_rr <- function(fits, ref) {
 
 contrast_rr_single <- function(fit, ref) {
   theta <- fit$theta / ref$theta
-  log_eif <- (fit$eif / fit$theta) - (ref$eif / ref$theta)
+  log_eif <- (fit$eif*fit$weights / fit$theta) -
+    (ref$eif*ref$weights / ref$theta)
 
   if (is.null(fit$id)) {
     fit$id <- 1:length(eif)
@@ -151,7 +158,8 @@ contrast_or <- function(fits, ref) {
 
 contrast_or_single <- function(fit, ref) {
   theta <- (fit$theta / (1 - fit$theta)) / (ref$theta / (1 - ref$theta))
-  log_eif <- (fit$eif / (fit$theta * (1 - fit$theta))) - (ref$eif / (ref$theta * (1 - ref$theta)))
+  log_eif <- (fit$eif*fit$weights / (fit$theta * (1 - fit$theta))) -
+    (ref$eif*ref$weights / (ref$theta * (1 - ref$theta)))
 
   if (is.null(fit$id)) {
     fit$id <- 1:length(eif)

diff --git a/R/data.R b/R/data.R
@@ -82,3 +82,19 @@
 #'   \item{Y2}{Final outcome node.}
 #' }
 "sim_timevary_surv"
+
+#' Simulated Multivariate Exposure Data
+#'
+#' A dataset with a continuous outcome, three baseline covariates,
+#' and two treatment variables.
+#'
+#' @format A data frame with 2000 rows and 6 variables:
+#' \describe{
+#'   \item{C1}{Continuous baseline variable.}
+#'   \item{C2}{Continuous baseline variable.}
+#'   \item{C3}{Continuous baseline variable.}
+#'   \item{D1}{Treatment variable one at baseline.}
+#'   \item{D2}{Treatment variable two at baseline.}
+#'   \item{Y}{Continuous outcome}
+#' }
+"multivariate_data"
diff --git a/R/density_ratios.R b/R/density_ratios.R
@@ -1,32 +1,36 @@
-cf_r <- function(Task, learners, mtp, lrnr_folds, trim, full_fits, pb) {
-  fopts <- options("lmtp.bound", "lmtp.trt.length")
-  out <- list()
-
+cf_r <- function(task, learners, mtp, control, pb) {
+  out <- vector("list", length = length(task$folds))
+
   if (length(learners) == 1 && learners == "SL.mean") {
     warning("Using 'SL.mean' as the only learner of the density ratios will always result in a misspecified model! If your exposure is randomized, consider using `c('SL.glm', 'SL.glmnet')`.",
             call. = FALSE)
   }
-
-  for (fold in seq_along(Task$folds)) {
+  
+  for (fold in seq_along(task$folds)) {
     out[[fold]] <- future::future({
-      options(fopts)
-
       estimate_r(
-        get_folded_data(Task$natural, Task$folds, fold),
-        get_folded_data(Task$shifted, Task$folds, fold),
-        Task$trt, Task$cens, Task$risk, Task$tau, Task$node_list$trt,
-        learners, pb, mtp, lrnr_folds, full_fits
+        get_folded_data(task$natural, task$folds, fold),
+        get_folded_data(task$shifted, task$folds, fold),
+        task$trt,
+        task$cens,
+        task$risk,
+        task$tau,
+        task$node_list$trt,
+        learners,
+        pb,
+        mtp,
+        control
       )
     },
     seed = TRUE)
   }
 
-  trim_ratios(recombine_ratios(future::value(out), Task$folds), trim)
+  trim_ratios(recombine_ratios(future::value(out), task$folds), control$.trim)
 }
 
-estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, lrnr_folds, full_fits) {
+estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, control) {
   densratios <- matrix(nrow = nrow(natural$valid), ncol = tau)
-  fits <- list()
+  fits <- vector("list", length = tau)
 
   for (t in 1:tau) {
     jrt <- rep(censored(natural$train, cens, t)$j, 2)
@@ -35,9 +39,13 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne
     jrv <- censored(natural$valid, cens, t)$j
     drv <- at_risk(natural$valid, risk, t)
 
-    trt_t <- ifelse(length(trt) > 1, trt[t], trt)
+    if (length(trt) > 1) {
+      trt_t <- trt[[t]]
+    } else {
+      trt_t <- trt[[1]]
+    }
 
-    frv <- followed_rule(natural$valid[[trt_t]], shifted$valid[[trt_t]], mtp)
+    frv <- followed_rule(natural$valid[, trt_t], shifted$valid[, trt_t], mtp)
 
     vars <- c(node_list[[t]], cens[[t]])
     stacked <- stack_data(natural$train, shifted$train, trt, cens, t)
@@ -48,10 +56,10 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne
       learners,
       "binomial",
       stacked[jrt & drt, ]$lmtp_id,
-      lrnr_folds
+      control$.learners_trt_folds
     )
 
-    if (full_fits) {
+    if (control$.return_full_fits) {
       fits[[t]] <- fit
     } else {
       fits[[t]] <- extract_sl_weights(fit)
@@ -73,7 +81,7 @@ stack_data <- function(natural, shifted, trt, cens, tau) {
   shifted_half <- natural
 
   if (length(trt) > 1 || tau == 1) {
-    shifted_half[[trt[tau]]] <- shifted[[trt[tau]]]
+    shifted_half[, trt[[tau]]] <- shifted[, trt[[tau]]]
   }
 
   if (!is.null(cens)) {