From 21230bf91e288eba209207c747c7a6264a112588 Mon Sep 17 00:00:00 2001 From: ismayc Date: Thu, 2 Aug 2018 17:18:52 -0700 Subject: [PATCH 01/78] Add visualise as alias --- NAMESPACE | 1 + R/visualize.R | 4 ++++ man/visualize.Rd | 6 ++++++ tests/testthat/test-visualize.R | 4 ++++ 4 files changed, 15 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 11f3fcff..a6bf6e27 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(rep_sample_n) export(specify) export(t_stat) export(t_test) +export(visualise) export(visualize) importFrom(dplyr,bind_rows) importFrom(dplyr,group_by) diff --git a/R/visualize.R b/R/visualize.R index 68d57da6..76e200e0 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -675,3 +675,7 @@ visualize_both <- function(data, bins, get_percentile <- function(vector, observation) { stats::ecdf(vector)(observation) } + +#' @rdname visualize +#' @export +visualise <- visualize \ No newline at end of file diff --git a/man/visualize.Rd b/man/visualize.Rd index e7c2a8c4..43cbc7ce 100755 --- a/man/visualize.Rd +++ b/man/visualize.Rd @@ -2,12 +2,18 @@ % Please edit documentation in R/visualize.R \name{visualize} \alias{visualize} +\alias{visualise} \title{Visualize statistical inference} \usage{ visualize(data, bins = 15, method = "simulation", dens_color = "black", obs_stat = NULL, obs_stat_color = "red2", pvalue_fill = "pink", direction = NULL, endpoints = NULL, endpoints_color = "mediumaquamarine", ci_fill = "turquoise", ...) + +visualise(data, bins = 15, method = "simulation", + dens_color = "black", obs_stat = NULL, obs_stat_color = "red2", + pvalue_fill = "pink", direction = NULL, endpoints = NULL, + endpoints_color = "mediumaquamarine", ci_fill = "turquoise", ...) } \arguments{ \item{data}{The output from \code{\link[=calculate]{calculate()}}.} diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 0a3e3a46..78d5272e 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -51,6 +51,10 @@ obs_F <- anova( test_that("visualize basic tests", { expect_silent(visualize(Sepal.Width_resamp)) + + # visualise also works + expect_silent(visualise(Sepal.Width_resamp)) + expect_error( Sepal.Width_resamp %>% visualize(bins = "yep") ) From f1ad7cbba91d8a3ff376886f6dcd1e1da30c1d40 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 5 Aug 2018 11:31:50 +0300 Subject: [PATCH 02/78] Remove trailing spaces in code. Used {styler} (1.0.2): `styler::style_pkg(scope = "none", strict = FALSE)`. --- R/calculate.R | 90 +++---- R/conf_int.R | 16 +- R/generate.R | 26 +- R/hypothesize.R | 25 +- R/infer.R | 2 +- R/p_value.R | 30 +-- R/print_methods.R | 4 +- R/rep_sample_n.R | 1 - R/set_params.R | 58 ++-- R/specify.R | 10 +- R/utils.R | 62 ++--- R/visualize.R | 326 +++++++++++----------- R/wrappers.R | 36 +-- tests/testthat/test-calculate.R | 76 +++--- tests/testthat/test-conf_int.R | 17 +- tests/testthat/test-generate.R | 96 +++---- tests/testthat/test-hypothesize.R | 32 +-- tests/testthat/test-p_value.R | 48 ++-- tests/testthat/test-rep_sample_n.R | 13 +- tests/testthat/test-utils.R | 4 +- tests/testthat/test-visualize.R | 420 ++++++++++++++--------------- tests/testthat/test-wrappers.R | 62 ++--- 22 files changed, 724 insertions(+), 730 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 9eeccd81..56147753 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -51,12 +51,12 @@ calculate <- function(x, check_for_factor_stat(x, stat, explanatory_variable(x)) check_args_and_attr(x, explanatory_variable(x), response_variable(x), stat) check_point_params(x, stat) - + if (!has_response(x)) stop_glue( "The response variable is not set. Make sure to `specify()` it first." ) - + if (is.null(attr(x, "generate")) || !attr(x, "generate")) { if (is.null(attr(x, "null"))) { x$replicate <- 1L @@ -77,7 +77,7 @@ calculate <- function(x, "implemented) for `stat` = \"{stat}\". Are you missing ", "a `generate()` step?" ) - + else if (!(stat %in% c("Chisq", "prop"))){ # From `hypothesize()` to `calculate()` # Catch-all if generate was not called @@ -87,13 +87,13 @@ calculate <- function(x, return(x) } } - + if (stat %in% c("diff in means", "diff in medians", "diff in props") || (!is.null(attr(x, "theory_type")) && attr(x, "theory_type") %in% c("Two sample props z", "Two sample t"))) { check_order(x, explanatory_variable(x), order) } - + if (!( stat %in% c("diff in means", "diff in medians", "diff in props") || ( @@ -108,11 +108,11 @@ calculate <- function(x, ) } } - + # Use S3 method to match correct calculation - result <- calc_impl(structure(stat, class = gsub(" ", "_", stat)), + result <- calc_impl(structure(stat, class = gsub(" ", "_", stat)), x, order, ...) - + if ("NULL" %in% class(result)) stop_glue( "Your choice of `stat` is invalid for the ", @@ -120,14 +120,14 @@ calculate <- function(x, ) # else # class(result) <- append("infer", class(result)) - + result <- set_attributes(to = result, from = x) attr(result, "stat") <- stat - + # For returning a 1x1 observed statistic value if (nrow(result) == 1) result <- select(result, stat) - + return(result) } @@ -138,16 +138,16 @@ calc_impl <- calc_impl.mean <- function(stat, x, order, ...) { col <- base::setdiff(names(x), "replicate") - + x %>% dplyr::group_by(replicate) %>% dplyr::summarize(stat = mean(!!(sym(col)), ...)) - + } calc_impl.median <- function(stat, x, order, ...) { col <- base::setdiff(names(x), "replicate") - + x %>% dplyr::group_by(replicate) %>% dplyr::summarize(stat = stats::median(!!(sym(col)), ...)) @@ -155,7 +155,7 @@ calc_impl.median <- function(stat, x, order, ...) { calc_impl.sd <- function(stat, x, order, ...) { col <- base::setdiff(names(x), "replicate") - + x %>% dplyr::group_by(replicate) %>% dplyr::summarize(stat = stats::sd(!!(sym(col)), ...)) @@ -163,7 +163,7 @@ calc_impl.sd <- function(stat, x, order, ...) { calc_impl.prop <- function(stat, x, order, ...) { col <- base::setdiff(names(x), "replicate") - + ## No longer needed with implementation of `check_point_params()` # if(!is.factor(x[[col]])){ # stop_glue( @@ -171,13 +171,13 @@ calc_impl.prop <- function(stat, x, order, ...) { # "variable is not a factor." # ) # } - + if (is.null(attr(x, "success"))) stop_glue( 'To calculate a proportion, the `"success"` argument ', 'must be provided in `specify()`.' ) - + success <- attr(x, "success") x %>% dplyr::group_by(replicate) %>% @@ -208,8 +208,8 @@ calc_impl.slope <- function(stat, x, order, ...) { } calc_impl.correlation <- function(stat, x, order, ...) { - x %>% - dplyr::summarize(stat = stats::cor(!!attr(x, "explanatory"), + x %>% + dplyr::summarize(stat = stats::cor(!!attr(x, "explanatory"), !!attr(x, "response"))) } @@ -234,7 +234,7 @@ calc_impl.diff_in_medians <- function(stat, x, order, ...) { calc_impl.Chisq <- function(stat, x, order, ...) { ## The following could stand to be cleaned up - + if (is.null(attr(x, "explanatory"))) { # Chi-Square Goodness of Fit if (!is.null(attr(x, "params"))) { @@ -243,7 +243,7 @@ calc_impl.Chisq <- function(stat, x, order, ...) { dplyr::summarize(stat = stats::chisq.test(table(!!( attr(x, "response") )), p = attr(x, "params"))$stat) - + } else { # Straight from `specify()` stop_glue("In order to calculate a Chi-Square Goodness of Fit ", @@ -252,7 +252,7 @@ calc_impl.Chisq <- function(stat, x, order, ...) { "using `calculate()`") } - + } else { # This is not matching with chisq.test # obs_tab <- x %>% @@ -266,22 +266,22 @@ calc_impl.Chisq <- function(stat, x, order, ...) { # dplyr::summarize(stat = sum((table(!!(attr(x, "response")), # !!(attr(x, "explanatory"))) # - expected)^2 / expected, ...)) - + # Chi-Square Test of Independence - + result <- x %>% dplyr::do(broom::tidy(suppressWarnings(stats::chisq.test(table( .[[as.character(attr(x, "response"))]], .[[as.character(attr(x, "explanatory"))]] ))))) %>% dplyr::ungroup() - + if (!is.null(attr(x, "generate"))) result <- result %>% dplyr::select(replicate, stat = statistic) else result <- result %>% dplyr::select(stat = statistic) - + attr(result, "response") <- attr(x, "response") attr(result, "success") <- attr(x, "success") attr(result, "explanatory") <- attr(x, "explanatory") @@ -290,16 +290,16 @@ calc_impl.Chisq <- function(stat, x, order, ...) { attr(result, "distr_param") <- attr(x, "distr_param") attr(result, "distr_param2") <- attr(x, "distr_param2") attr(result, "theory_type") <- attr(x, "theory_type") - + result - + } } calc_impl.diff_in_props <- function(stat, x, order, ...) { col <- attr(x, "response") success <- attr(x, "success") - + x %>% dplyr::group_by(replicate,!!attr(x, "explanatory")) %>% dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>% @@ -309,17 +309,17 @@ calc_impl.diff_in_props <- function(stat, x, order, ...) { calc_impl.t <- function(stat, x, order, ...) { # Two sample means - + if (attr(x, "theory_type") == "Two sample t") { # Re-order levels x <- reorder_explanatory(x, order) - + df_out <- x %>% dplyr::summarize(stat = stats::t.test( !!attr(x, "response") ~ !!attr(x, "explanatory"), ... )[["statistic"]]) } - + # Standardized slope and standardized correlation are commented out # since there currently is no way to specify which one and # the standardization formulas are different. @@ -344,7 +344,7 @@ calc_impl.t <- function(stat, x, order, ...) { # ) %>% # dplyr::mutate(stat = corr * (sqrt(nrow(x) - 2)) / sqrt(1 - corr ^ 2)) # } - + # One sample mean else if (attr(x, "theory_type") == "One sample t") { # For bootstrap @@ -359,7 +359,7 @@ calc_impl.t <- function(stat, x, order, ...) { else { x %>% dplyr::summarize(stat = stats::t.test( - !!attr(x, "response"), + !!attr(x, "response"), mu = attr(x, "params"), ...)[["statistic"]]) } @@ -371,10 +371,10 @@ calc_impl.z <- function(stat, x, order, ...) { if (attr(x, "theory_type") == "Two sample props z") { col <- attr(x, "response") success <- attr(x, "success") - + x$explan <- factor(explanatory_variable(x), levels = c(order[1], order[2])) - + aggregated <- x %>% dplyr::group_by(replicate, explan) %>% dplyr::summarize( @@ -384,7 +384,7 @@ calc_impl.z <- function(stat, x, order, ...) { num_suc = sum(rlang::eval_tidy(col) == rlang::eval_tidy(success)) ) - + df_out <- aggregated %>% dplyr::summarize( diff_prop = prop[explan == order[1]] @@ -398,30 +398,30 @@ calc_impl.z <- function(stat, x, order, ...) { stat = diff_prop / denom ) %>% dplyr::select(-total_suc,-n1,-n2) - + df_out - + } else # One sample proportion if (attr(x, "theory_type") == "One sample prop z") { # When `hypothesize()` has been called success <- attr(x, "success") - + p0 <- attr(x, "params")[1] num_rows <- nrow(x) / length(unique(x$replicate)) - + col <- attr(x, "response") # if(is.null(success)) # success <- quo(get_par_levels(x)[1]) # Error given instead - + df_out <- x %>% dplyr::summarize(stat = (mean( rlang::eval_tidy(col) == rlang::eval_tidy(success), ... ) - p0) / sqrt((p0 * (1 - p0)) / num_rows)) - + df_out - + # Straight from `specify()` doesn't make sense # since standardizing requires a hypothesized value } diff --git a/R/conf_int.R b/R/conf_int.R index 6c1a47e0..81bfffd2 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -35,15 +35,15 @@ NULL #' @rdname get_ci #' @export -conf_int <- function(x, level = 0.95, type = "percentile", +conf_int <- function(x, level = 0.95, type = "percentile", point_estimate = NULL){ - + check_ci_args(x, level, type, point_estimate) - + if(type == "percentile") { - ci_vec <- stats::quantile(x[["stat"]], + ci_vec <- stats::quantile(x[["stat"]], probs = c((1 - level) / 2, level + (1 - level) / 2)) - + ci <- tibble::tibble(ci_vec[1], ci_vec[2]) names(ci) <- names(ci_vec) } else { @@ -53,12 +53,12 @@ conf_int <- function(x, level = 0.95, type = "percentile", lower = point_estimate - multiplier * stats::sd(x[["stat"]]), upper = point_estimate + multiplier * stats::sd(x[["stat"]])) } - + return(ci) } check_ci_args <- function(x, level, type, point_estimate){ - + if(!is.null(point_estimate)){ if(!is.data.frame(point_estimate)) check_type(point_estimate, is.numeric) @@ -70,7 +70,7 @@ check_ci_args <- function(x, level, type, point_estimate){ if(level <= 0 || level >= 1){ stop_glue("The value of `level` must be between 0 and 1 non-inclusive.") } - + if(!(type %in% c("percentile", "se"))){ stop_glue('The options for `type` are "percentile" or "se".') } diff --git a/R/generate.R b/R/generate.R index bb3b3087..029fa99f 100755 --- a/R/generate.R +++ b/R/generate.R @@ -24,12 +24,12 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { auto_type <- attr(x, "type") - + if(!is.null(auto_type)){ if (is.null(type)) { stop_glue("Supply not `NULL` value of `type`.") } - + if(auto_type != type) stop_glue( "You have specified `type = \"{type}\"`, but `type` is expected to be ", @@ -38,9 +38,9 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { else type <- auto_type } - + attr(x, "generate") <- TRUE - + if (type == "permute" && any(is.null(attr(x, "response")), is.null(attr(x, "explanatory")))) { stop_glue("Please `specify()` an explanatory and a response variable ", @@ -80,12 +80,12 @@ bootstrap <- function(x, reps = 1, ...) { # If so, shift the variable chosen to have a mean corresponding # to that specified in `hypothesize` if(attr(attr(x, "params"), "names") == "mu"){ - + col <- as.character(attr(x, "response")) # if(attr(x, "theory_type") != "One sample t"){ x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params") # } - + # Standardize after centering above ##### # Determining whether or not to implement this t transformation @@ -119,9 +119,9 @@ bootstrap <- function(x, reps = 1, ...) { # Set variables for use in calculate() result <- rep_sample_n(x, size = nrow(x), replace = TRUE, reps = reps) result <- set_attributes(to = result, from = x) - + class(result) <- append("infer", class(result)) - + return(result) } @@ -132,11 +132,11 @@ permute <- function(x, reps = 1, ...) { dplyr::bind_rows() %>% dplyr::mutate(replicate = rep(1:reps, each = nrow(x))) %>% dplyr::group_by(replicate) - + df_out <- set_attributes(to = df_out, from = x) - + class(df_out) <- append("infer", class(df_out)) - + return(df_out) } @@ -171,10 +171,8 @@ simulate <- function(x, reps = 1, ...) { ) rep_tbl <- set_attributes(to = rep_tbl, from = x) - + class(rep_tbl) <- append("infer", class(rep_tbl)) return(dplyr::group_by(rep_tbl, replicate)) } - - diff --git a/R/hypothesize.R b/R/hypothesize.R index b49e4909..41baf587 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -19,39 +19,39 @@ #' #' @export hypothesize <- function(x, null, ...) { - + hypothesize_checks(x, null) - + attr(x, "null") <- null - + dots <- list(...) - + if( (null == "point") && (length(dots) == 0) ){ stop_glue("Provide a parameter and a value to check such as `mu = 30` ", "for the point hypothesis.") } - + if((null == "independence") && (length(dots) > 0)) { warning_glue("Parameter values are not specified when testing that two ", "variables are independent.") } - + if((length(dots) > 0) && (null == "point")) { params <- parse_params(dots, x) attr(x, "params") <- params - + if(any(grepl("p.", attr(attr(x, "params"), "names")))){ # simulate instead of bootstrap based on the value of `p` provided attr(x, "type") <- "simulate" } else { attr(x, "type") <- "bootstrap" } - + } - + if(!is.null(null) && null == "independence") attr(x, "type") <- "permute" - + # Check one proportion test set up correctly if(null == "point"){ if(is.factor(response_variable(x))){ @@ -60,7 +60,7 @@ hypothesize <- function(x, null, ...) { 'to be used as a parameter.') } } - + # Check one numeric test set up correctly ## Not currently able to reach in testing as other checks ## already produce errors @@ -70,7 +70,6 @@ hypothesize <- function(x, null, ...) { # stop_glue('Testing one numerical variable requires one of ', # '`mu`, `med`, or `sd` to be used as a parameter.') # } - + return(tibble::as_tibble(x)) } - diff --git a/R/infer.R b/R/infer.R index 4620ef2a..af1163fa 100755 --- a/R/infer.R +++ b/R/infer.R @@ -14,7 +14,7 @@ NULL ## quiets concerns of R CMD check re: the .'s that appear in pipelines ## From Jenny Bryan's googlesheets package -if(getRversion() >= "2.15.1") +if(getRversion() >= "2.15.1") utils::globalVariables(c("prop", "stat", "xbar", "xtilde", "x", "..density..", "statistic", ".", "parameter", "p.value", "xmin", "xmax", "density", "denom", diff --git a/R/p_value.R b/R/p_value.R index 7d0ee23d..785471a5 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -32,24 +32,24 @@ NULL #' @rdname get_pvalue #' @export p_value <- function(x, obs_stat, direction){ - + check_type(x, is.data.frame) obs_stat <- check_obs_stat(obs_stat) check_direction(direction) - + is_simulation_based <- !is.null(attr(x, "generate")) && attr(x, "generate") if(is_simulation_based) - pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat, + pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat, direction = direction) - + ## Theoretical-based p-value # Could be more specific # else if(is.null(attr(x, "theory_type")) || is.null(attr(x, "distr_param"))) # stop_glue("Attributes have not been set appropriately. ", # "Check your {{infer}} pipeline again.") - + # if(!("stat" %in% names(x))){ # # Theoretical distribution # which_distribution(x, @@ -57,39 +57,39 @@ p_value <- function(x, obs_stat, direction){ # obs_stat = obs_stat, # direction = direction) # } - + return(pvalue) } simulation_based_p_value <- function(x, obs_stat, direction){ - + if(direction %in% c("less", "left")){ - p_value <- x %>% + p_value <- x %>% dplyr::summarize(p_value = mean(stat <= obs_stat)) } else if(direction %in% c("greater", "right")){ - p_value <- x %>% + p_value <- x %>% dplyr::summarize(p_value = mean(stat >= obs_stat)) } else{ p_value <- x %>% two_sided_p_value(obs_stat = obs_stat) } - + p_value } two_sided_p_value <- function(x, obs_stat){ - + if(stats::median(x$stat) >= obs_stat){ basic_p_value <- get_percentile(x$stat, obs_stat) + - (1 - get_percentile(x$stat, stats::median(x$stat) + + (1 - get_percentile(x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat)) } else { basic_p_value <- 1 - get_percentile(x$stat, obs_stat) + - (get_percentile(x$stat, stats::median(x$stat) + + (get_percentile(x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat)) } - + if(basic_p_value >= 1) # Catch all if adding both sides produces a number # larger than 1. Should update with test in that @@ -118,7 +118,7 @@ get_pvalue <- p_value # } #theory_t_pvalue <- - + # set_lower_tail <- function(direction){ # if(direction %in% c("greater", "right")) # lower_tail <- FALSE diff --git a/R/print_methods.R b/R/print_methods.R index 752845fc..9433f5d2 100644 --- a/R/print_methods.R +++ b/R/print_methods.R @@ -21,9 +21,9 @@ print.infer <- function(x, ...) { if ("null" %in% attrs) { header[3] <- glue_null('Null Hypothesis: {attr(x, "null")}') } - + cat(glue::glue_collapse(header[header != ""], sep = "\n")) cat("\n") - + NextMethod() } diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R index 6268f23e..20b80a1e 100644 --- a/R/rep_sample_n.R +++ b/R/rep_sample_n.R @@ -77,4 +77,3 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) { names(rep_tbl)[-1] <- names(tbl) dplyr::group_by(rep_tbl, replicate) } - diff --git a/R/set_params.R b/R/set_params.R index 94f23e52..13d9fc32 100755 --- a/R/set_params.R +++ b/R/set_params.R @@ -6,30 +6,30 @@ set_params <- function(x){ attr(x, "theory_type") <- NULL - + if(!is.null(attr(x, "response"))){ num_response_levels <- length(levels(response_variable(x))) } - + # One variable - if (!is.null(attr(x, "response")) && is.null(attr(x, "explanatory")) && - !is.null(attr(x, "response_type")) && + if (!is.null(attr(x, "response")) && is.null(attr(x, "explanatory")) && + !is.null(attr(x, "response_type")) && is.null(attr(x, "explanatory_type"))){ - + # One mean if(attr(x, "response_type") %in% c("integer", "numeric")){ attr(x, "theory_type") <- "One sample t" - attr(x, "distr_param") <- x %>% + attr(x, "distr_param") <- x %>% dplyr::summarize(df = stats::t.test( response_variable(x))[["parameter"]] - ) %>% + ) %>% dplyr::pull() attr(x, "type") <- "bootstrap" } - + # One prop else if(attr(x, "response_type") == "factor" && (num_response_levels == 2)){ - + # No parameters since standard normal attr(x, "theory_type") <- "One sample prop z" # Changed to `"simulate"` when `p` provided in `hypothesize()` @@ -39,53 +39,53 @@ set_params <- function(x){ attr(x, "distr_param") <- num_response_levels - 1 attr(x, "type") <- "simulate" } - + } - + # Two variables - if (!is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & - !is.null(attr(x, "response_type")) && + if (!is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & + !is.null(attr(x, "response_type")) && !is.null(attr(x, "explanatory_type"))){ - + attr(x, "type") <- "bootstrap" - + # Response is numeric, explanatory is categorical if(attr(x, "response_type") %in% c("integer", "numeric") & attr(x, "explanatory_type") == "factor"){ - + # Two sample means (t distribution) if(length(levels(explanatory_variable(x))) == 2) { attr(x, "theory_type") <- "Two sample t" # Keep track of Satterthwaite degrees of freedom since lost when # in aggregation w/ calculate()/generate() - attr(x, "distr_param") <- x %>% + attr(x, "distr_param") <- x %>% dplyr::summarize(df = stats::t.test( !!attr(x, "response") ~ !!attr(x, "explanatory"))[["parameter"]] - ) %>% + ) %>% dplyr::pull() } else { # >2 sample means (F distribution) attr(x, "theory_type") <- "ANOVA" # Get numerator and denominator degrees of freedom - attr(x, "distr_param") <- x %>% + attr(x, "distr_param") <- x %>% dplyr::summarize(df1 = stats::anova(stats::aov( !! attr(x, "response") ~ !! attr(x, "explanatory")))$Df[1] - ) %>% + ) %>% dplyr::pull() - attr(x, "distr_param2") <- x %>% + attr(x, "distr_param2") <- x %>% dplyr::summarize(df2 = stats::anova(stats::aov( !! attr(x, "response") ~ !! attr(x, "explanatory")))$Df[2] - ) %>% + ) %>% dplyr::pull() } } - + # Response is categorical, explanatory is categorical if(attr(x, "response_type") == "factor" & attr(x, "explanatory_type") == "factor"){ - + attr(x, "type") <- "bootstrap" - + # Two sample proportions (z distribution) # Parameter(s) not needed since standard normal if(length(levels(response_variable(x))) == 2 & @@ -95,14 +95,14 @@ set_params <- function(x){ # >2 sample proportions (chi-square test of indep) else{ attr(x, "theory_type") <- "Chi-square test of indep" - attr(x, "distr_param") <- x %>% + attr(x, "distr_param") <- x %>% dplyr::summarize(df = suppressWarnings(stats::chisq.test( table(response_variable(x), explanatory_variable(x)))$parameter)) %>% dplyr::pull() } } - + # Response is numeric, explanatory is numeric if(attr(x, "response_type") %in% c("integer", "numeric") & attr(x, "explanatory_type") %in% c("integer", "numeric")){ @@ -112,9 +112,9 @@ set_params <- function(x){ attr(x, "distr_param") <- nrow(x) - 2 } } - + # if(is.null(attr(x, "theory_type"))) # warning_glue("Theoretical type not yet implemented") - + x } diff --git a/R/specify.R b/R/specify.R index ad87cbb5..f3779839 100755 --- a/R/specify.R +++ b/R/specify.R @@ -55,11 +55,11 @@ specify <- function(x, formula, response = NULL, attr(x, "response") <- f_lhs(formula) attr(x, "explanatory") <- f_rhs(formula) } - + if (is.null(attr(x, "response"))) { stop_glue("Supply not `NULL` response variable.") } - + if (!(as.character(attr(x, "response")) %in% names(x))) { stop_glue('The response variable `{attr(x, "response")}` ', 'cannot be found in this dataframe.') @@ -115,15 +115,15 @@ specify <- function(x, formula, response = NULL, warning_glue("Removed {sum(!is_complete)} rows containing missing values.") } - + # To help determine theoretical distribution to plot attr(x, "response_type") <- class(response_variable(x)) - + if(is.null(attr(x, "explanatory"))) attr(x, "explanatory_type") <- NULL else attr(x, "explanatory_type") <- class(explanatory_variable(x)) - + if(attr(x, "response_type") == "factor" && is.null(success) && length(levels(response_variable(x))) == 2 && (is.null(attr(x, "explanatory_type")) || diff --git a/R/utils.R b/R/utils.R index 6c5c2a35..7a740919 100644 --- a/R/utils.R +++ b/R/utils.R @@ -22,7 +22,7 @@ set_attributes <- function(to, from = x){ attr(to, "theory_type") <- attr(from, "theory_type") attr(to, "generate") <- attr(from, "generate") attr(to, "type") <- attr(from, "type") - + return(to) } @@ -85,7 +85,7 @@ null_transformer <- function(text, envir) { if (is.null(out)) { return("NULL") } - + out } @@ -114,9 +114,9 @@ check_order <- function(x, explanatory_variable, order){ } } -check_args_and_attr <- function(x, explanatory_variable, response_variable, +check_args_and_attr <- function(x, explanatory_variable, response_variable, stat){ - + # Could also do `stat <- match.arg(stat)` # but that's not as helpful to beginners with the cryptic error msg if (!stat %in% c("mean", "median", "sd", "prop", @@ -125,11 +125,11 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, stop_glue("You specified a string for `stat` that is not implemented. ", "Check your spelling and `?calculate` for current options.") } - + if (!("replicate" %in% names(x)) && !is.null(attr(x, "generate"))) warning_glue('A `generate()` step was not performed prior to ', '`calculate()`. Review carefully.') - + if (stat %in% c("F", "slope", "diff in means", "diff in medians")){ if (has_explanatory(x) && !is.numeric(response_variable(x))){ stop_glue( @@ -138,7 +138,7 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, ) } } - + if (stat %in% c("diff in props", "Chisq")){ if (has_explanatory(x) && !is.factor(response_variable(x))){ stop_glue( @@ -152,7 +152,7 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, check_for_numeric_stat <- function(x, stat){ if (stat %in% c("mean", "median", "sd")){ col <- base::setdiff(names(x), "replicate") - + if (!is.numeric(x[[as.character(col)]])){ stop_glue( "Calculating a {stat} here is not appropriate\n", @@ -163,7 +163,7 @@ check_for_numeric_stat <- function(x, stat){ } check_for_factor_stat <- function(x, stat, explanatory_variable){ - + if (stat %in% c("diff in means", "diff in medians", "diff in props", "F")){ if (!is.factor(explanatory_variable)){ stop_glue( @@ -176,8 +176,8 @@ check_for_factor_stat <- function(x, stat, explanatory_variable){ } check_point_params <- function(x, stat){ - - param_names <- attr(attr(x, "params"), "names") + + param_names <- attr(attr(x, "params"), "names") hyp_text <- 'to be set in `hypothesize()`.' if(!is.null(attr(x, "null"))){ if(stat %in% c("mean", "median", "sd", "prop")){ @@ -194,7 +194,7 @@ check_point_params <- function(x, stat){ # stop_glue('`stat == "sd"` requires `"sigma"` {hyp_text}') if ( (!(stat == "sd") && ("sigma" %in% param_names)) ) stop_glue('`"sigma"` does not correspond to `stat = "{stat}"`.') - + ## Tests unable to get to # if(stat == "prop" && !(any(grepl("p.", param_names)))) # stop_glue('`stat == "prop"` requires `"p"` {hyp_text}') @@ -207,21 +207,21 @@ parse_params <- function(dots, x) { mu_ind <- grep("mu", names(dots)) med_ind <- grep("med", names(dots)) sig_ind <- grep("sigma", names(dots)) - + # error: cannot specify more than one of props, means, medians, or sds - if ( length(p_ind) + length(mu_ind) + length(med_ind) + if ( length(p_ind) + length(mu_ind) + length(med_ind) + length(sig_ind) != 1 ){ stop_glue( 'Parameter values can be only one of `p`, `mu`, `med`, or `sigma`.' ) } - + # add in 1 - p if it's missing # Outside if() is needed to ensure an error does not occur in referencing the # 0 index of dots if (length(p_ind)) { if (length(dots[[p_ind]]) == 1) { - + if (attr(x, "null") == "point" && is.null(attr(x, "success"))) { stop_glue("A point null regarding a proportion requires ", "that `success` be indicated in `specify()`.") @@ -230,7 +230,7 @@ parse_params <- function(dots, x) { stop_glue( "The value suggested for `p` is not between 0 and 1, inclusive." ) - missing_lev <- base::setdiff(unique(pull(x, !!attr(x, "response"))), + missing_lev <- base::setdiff(unique(pull(x, !!attr(x, "response"))), attr(x, "success")) dots$p <- append(dots$p, 1 - dots$p) names(dots$p) <- c(attr(x, "success"), missing_lev) @@ -241,12 +241,12 @@ parse_params <- function(dots, x) { } } } - + # if (sum(dots[[p_ind]]) != 1){ # dots[[p_ind]] <- dots[[p_ind]]/sum(dots[[p_ind]]) # warning_glue("Proportions do not sum to 1, normalizing automatically.") # } - + return(unlist(dots)) } @@ -255,25 +255,25 @@ hypothesize_checks <- function(x, null){ if (!sum(class(x) %in% c("data.frame", "tbl", "tbl_df", "grouped_df"))) { stop_glue("x must be a data.frame or tibble") } - + # error: null not found if (!(null %in% c("independence", "point"))) { stop_glue( "Choice of null is not supported. Check `?hypothesize` for options." ) } - + # if (length(null) != 1) { # stop_glue('Choose between either `"independence"` or `"point"` ', # 'for `null` argument.') # } - + if(!has_response(x)){ stop_glue( "The response variable is not set. Make sure to `specify()` it first." ) } - + if(null == "independence" && !has_explanatory(x)){ stop_glue('Please `specify()` an explanatory and a response variable ', 'when testing\n', @@ -284,7 +284,7 @@ hypothesize_checks <- function(x, null){ check_direction <- function(direction = c("less", "greater", "two_sided", "left", "right", "both")){ check_type(direction, is.character) - + if(!(direction %in% c("less", "greater", "two_sided", "left", "right", "both"))){ stop_glue('The provided value for `direction` is not appropriate. ', @@ -297,10 +297,10 @@ check_obs_stat <- function(obs_stat){ if(!is.null(obs_stat)){ if("data.frame" %in% class(obs_stat)){ check_type(obs_stat, is.data.frame) - if( (nrow(obs_stat) != 1) || (ncol(obs_stat) != 1) ) - warning_glue("The first row and first column value of the given ", + if( (nrow(obs_stat) != 1) || (ncol(obs_stat) != 1) ) + warning_glue("The first row and first column value of the given ", "`obs_stat` will be used.") - + # [[1]] is used in case `stat` is not specified as name of 1x1 obs_stat <- obs_stat[[1]][[1]] check_type(obs_stat, is.numeric) @@ -309,7 +309,7 @@ check_obs_stat <- function(obs_stat){ check_type(obs_stat, is.numeric) } } - + obs_stat } @@ -341,12 +341,12 @@ check_type <- function(x, predicate, type = NULL) { predicate_name <- deparse(rlang::enexpr(predicate)) type <- parse_type(predicate_name) } - + if (!isTRUE(predicate(x))) { # Not using "must be of type" because of 'tibble' and 'string' cases stop_glue("`{x_name}` must be '{type}', not '{get_type(x)}'.") } - + x } @@ -355,7 +355,7 @@ get_type <- function(x) { if (is.data.frame(x)) { return("data.frame") } - + typeof(x) } diff --git a/R/visualize.R b/R/visualize.R index 68d57da6..7129b050 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -69,16 +69,16 @@ #' @importFrom ggplot2 xlab ylab geom_vline geom_rect geom_bar #' @importFrom stats dt qt df qf dnorm qnorm dchisq qchisq #' @export -visualize <- function(data, bins = 15, method = "simulation", +visualize <- function(data, bins = 15, method = "simulation", dens_color = "black", - obs_stat = NULL, + obs_stat = NULL, obs_stat_color = "red2", pvalue_fill = "pink", - direction = NULL, - endpoints = NULL, + direction = NULL, + endpoints = NULL, endpoints_color = "mediumaquamarine", ci_fill = "turquoise", ...) { - + check_type(data, is.data.frame) check_type(bins, is.numeric) check_type(method, is.character) @@ -87,7 +87,7 @@ visualize <- function(data, bins = 15, method = "simulation", check_type(pvalue_fill, is.character) if(!is.null(direction)) check_type(direction, is.character) - if(is.data.frame(endpoints) && + if(is.data.frame(endpoints) && ( (nrow(endpoints) != 1) || (ncol(endpoints) != 2) ) ){ stop_glue( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." @@ -103,7 +103,7 @@ visualize <- function(data, bins = 15, method = "simulation", if(is.data.frame(endpoints)) endpoints <- unlist(endpoints) obs_stat <- check_obs_stat(obs_stat) - if(!is.null(direction) && + if(!is.null(direction) && (is.null(obs_stat) + is.null(endpoints)) != 1) stop_glue( "Shading requires either `endpoints` values for a confidence interval ", @@ -111,50 +111,50 @@ visualize <- function(data, bins = 15, method = "simulation", ) if(method == "simulation"){ - - infer_plot <- visualize_simulation(data = data, bins = bins, + + infer_plot <- visualize_simulation(data = data, bins = bins, dens_color = dens_color, - obs_stat = obs_stat, + obs_stat = obs_stat, obs_stat_color = obs_stat_color, - direction = direction, + direction = direction, pvalue_fill = pvalue_fill, - endpoints = endpoints, + endpoints = endpoints, ci_fill = ci_fill, ...) - + } else if(method == "theoretical"){ - + infer_plot <- visualize_theoretical(data = data, dens_color = dens_color, - obs_stat = obs_stat, + obs_stat = obs_stat, obs_stat_color = obs_stat_color, - direction = direction, + direction = direction, pvalue_fill = pvalue_fill, - endpoints = endpoints, + endpoints = endpoints, ci_fill = ci_fill, ...) - - + + } else if(method == "both"){ - + if(!("stat" %in% names(data))) stop_glue('`generate()` and `calculate()` are both required ', 'to be done prior to `visualize(method = "both")`') - + if(("replicate" %in% names(data)) && length(unique(data$replicate)) < 100) warning_glue( "With only {length(unique(data$stat))} replicates, it may be ", "difficult to see the relationship between simulation and theory." ) - - infer_plot <- visualize_both(data = data, bins = bins, + + infer_plot <- visualize_both(data = data, bins = bins, dens_color = dens_color, - obs_stat = obs_stat, + obs_stat = obs_stat, obs_stat_color = obs_stat_color, direction = direction, pvalue_fill = pvalue_fill, - endpoints = endpoints, + endpoints = endpoints, ci_fill = ci_fill, ...) } else { @@ -162,31 +162,31 @@ visualize <- function(data, bins = 15, method = "simulation", '`"theoretical"`, `"both"`, or `"simulation"`. ', '`"simulation"` is the default.') } - + if(!is.null(obs_stat)){#&& !is.null(direction) infer_plot <- infer_plot + geom_vline(xintercept = obs_stat, size = 2, color = obs_stat_color, ...) } - + if(!is.null(endpoints)){ if(!is.null(obs_stat)) warning_glue("Values for both `endpoints` and `obs_stat` were given ", "when only one should be set. Ignoring `obs_stat` values.") infer_plot <- infer_plot + - geom_vline(xintercept = endpoints, size = 2, - color = endpoints_color, + geom_vline(xintercept = endpoints, size = 2, + color = endpoints_color, ...) } - + infer_plot } -theory_t_plot <- function(deg_freedom, statistic_text = "t", +theory_t_plot <- function(deg_freedom, statistic_text = "t", dens_color = dens_color, ...){ - ggplot(data.frame(x = c(qt(0.001, deg_freedom), - qt(0.999, deg_freedom)))) + - stat_function(mapping = aes(x), fun = dt, args = list(df = deg_freedom), + ggplot(data.frame(x = c(qt(0.001, deg_freedom), + qt(0.999, deg_freedom)))) + + stat_function(mapping = aes(x), fun = dt, args = list(df = deg_freedom), color = dens_color) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + xlab("") + @@ -196,12 +196,12 @@ theory_t_plot <- function(deg_freedom, statistic_text = "t", both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", dens_color, obs_stat, - direction, + direction, bins, - pvalue_fill, - endpoints, + pvalue_fill, + endpoints, ci_fill, ...){ - + infer_t_plot <- shade_density_check(data = data, obs_stat = obs_stat, direction = direction, @@ -211,7 +211,7 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", ci_fill = ci_fill) infer_t_plot + - stat_function(fun = dt, args = list(df = deg_freedom), + stat_function(fun = dt, args = list(df = deg_freedom), color = dens_color) + ggtitle(glue_null( "Simulation-Based and Theoretical {statistic_text} Null Distributions" @@ -220,12 +220,12 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", ylab("") } -theory_anova_plot <- function(deg_freedom_top, deg_freedom_bottom, - statistic_text = "F", +theory_anova_plot <- function(deg_freedom_top, deg_freedom_bottom, + statistic_text = "F", dens_color = dens_color, ...){ - ggplot(data.frame(x = c(qf(0.001, deg_freedom_top, deg_freedom_bottom), - qf(0.999, deg_freedom_top, deg_freedom_bottom)))) + - stat_function(mapping = aes(x), fun = df, + ggplot(data.frame(x = c(qf(0.001, deg_freedom_top, deg_freedom_bottom), + qf(0.999, deg_freedom_top, deg_freedom_bottom)))) + + stat_function(mapping = aes(x), fun = df, args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), color = dens_color) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + @@ -233,44 +233,44 @@ theory_anova_plot <- function(deg_freedom_top, deg_freedom_bottom, ylab("") } -both_anova_plot <- function(data, deg_freedom_top, +both_anova_plot <- function(data, deg_freedom_top, deg_freedom_bottom, statistic_text = "F", dens_color, obs_stat, - direction, + direction, bins, endpoints, pvalue_fill, ci_fill, ...){ - + if(!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue( "F usually corresponds to right-tailed tests. Proceed with caution." ) - - infer_anova_plot <- shade_density_check(data = data, + + infer_anova_plot <- shade_density_check(data = data, obs_stat = obs_stat, direction = direction, bins = bins, endpoints = endpoints, pvalue_fill = pvalue_fill, ci_fill = ci_fill) - + infer_anova_plot <- infer_anova_plot + - stat_function(fun = df, + stat_function(fun = df, args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), color = dens_color) + ggtitle(glue_null( "Simulation-Based and Theoretical {statistic_text} Null Distributions" )) + xlab("Fstat") + - ylab("") + ylab("") } theory_z_plot <- function(statistic_text = "z", dens_color = dens_color, ...){ - - ggplot(data.frame(x = c(qnorm(0.001), qnorm(0.999)))) + + + ggplot(data.frame(x = c(qnorm(0.001), qnorm(0.999)))) + stat_function(mapping = aes(x), fun = dnorm, color = dens_color) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + xlab("") + @@ -286,15 +286,15 @@ both_z_plot <- function(data, statistic_text = "z", endpoints, ci_fill, ...){ - - infer_z_plot <- shade_density_check(data = data, + + infer_z_plot <- shade_density_check(data = data, obs_stat = obs_stat, direction = direction, bins = bins, endpoints = endpoints, pvalue_fill = pvalue_fill, ci_fill = ci_fill) - + infer_z_plot + stat_function(fun = dnorm, color = dens_color) + ggtitle(glue_null( @@ -304,13 +304,13 @@ both_z_plot <- function(data, statistic_text = "z", ylab("") } -theory_chisq_plot <- function(deg_freedom, - statistic_text = "Chi-Square", +theory_chisq_plot <- function(deg_freedom, + statistic_text = "Chi-Square", dens_color = dens_color, ...){ - ggplot(data.frame(x = c(qchisq(0.001, deg_freedom), - qchisq(0.999, deg_freedom)))) + - stat_function(mapping = aes(x), fun = dchisq, - args = list(df = deg_freedom), + ggplot(data.frame(x = c(qchisq(0.001, deg_freedom), + qchisq(0.999, deg_freedom)))) + + stat_function(mapping = aes(x), fun = dchisq, + args = list(df = deg_freedom), color = dens_color) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + xlab("") + @@ -320,17 +320,17 @@ theory_chisq_plot <- function(deg_freedom, both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", dens_color, obs_stat, - direction, - bins, - endpoints, + direction, + bins, + endpoints, pvalue_fill = pvalue_fill, ci_fill = ci_fill, ...){ - + if(!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue("Chi-square usually corresponds to right-tailed tests. ", "Proceed with caution.") - + infer_chisq_plot <- shade_density_check(data = data, obs_stat = obs_stat, direction = direction, @@ -338,9 +338,9 @@ both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", endpoints = endpoints, pvalue_fill = pvalue_fill, ci_fill = ci_fill) - + infer_chisq_plot + - stat_function(fun = dchisq, args = list(df = deg_freedom), + stat_function(fun = dchisq, args = list(df = deg_freedom), color = dens_color) + ggtitle(glue_null( "Simulation-Based and Theoretical {statistic_text} Null Distributions" @@ -351,14 +351,14 @@ both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", shade_density_check <- function(data, - obs_stat, - direction, - bins, - density = TRUE, + obs_stat, + direction, + bins, + density = TRUE, pvalue_fill, endpoints, ci_fill, ...) { - + if(is.null(direction) || is.null(obs_stat)){ if(density){ gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + @@ -370,7 +370,7 @@ shade_density_check <- function(data, # geom_histogram(bins = bins, color = "white", ...) #} } - + if(xor(!is.null(obs_stat), !is.null(endpoints))){ if(!is.null(direction)){ if(density){ @@ -381,72 +381,72 @@ shade_density_check <- function(data, gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + geom_histogram(bins = bins, color = "white", ...) } - + if(direction %in% c("less", "left")){ gg_plot <- gg_plot + - geom_rect(fill = pvalue_fill, alpha = 0.01, - aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), + geom_rect(fill = pvalue_fill, alpha = 0.01, + aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), ...) } if(direction %in% c("greater", "right")){ gg_plot <- gg_plot + - geom_rect(fill = pvalue_fill, alpha = 0.01, - aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), + geom_rect(fill = pvalue_fill, alpha = 0.01, + aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), ...) } - - if(direction %in% c("two_sided", "both") && + + if(direction %in% c("two_sided", "both") && obs_stat >= stats::median(data$stat)){ gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, - mapping = aes(xmin = obs_stat, xmax = Inf, ymin = 0, + mapping = aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), ...) + geom_rect(fill = pvalue_fill, alpha = 0.01, mapping = aes( - xmin = -Inf, + xmin = -Inf, xmax = stats::quantile( - data$stat, + data$stat, probs = 1 - get_percentile(data$stat, obs_stat) ), ymin = 0, ymax = Inf, ...) - ) + ) } - - if(direction %in% c("two_sided", "both") && + + if(direction %in% c("two_sided", "both") && obs_stat < stats::median(data$stat)){ gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, - mapping = aes(xmin = -Inf, xmax = obs_stat, ymin = 0, + mapping = aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), ...) + geom_rect(fill = pvalue_fill, alpha = 0.01, - mapping = aes( + mapping = aes( xmin = stats::quantile( - data$stat, + data$stat, probs = 1 - get_percentile(data$stat, obs_stat) ), xmax = Inf, ymin = 0, ymax = Inf, ...) - ) + ) } } - + if(direction == "between"){ gg_plot <- gg_plot + - geom_rect(fill = ci_fill, alpha = 0.01, - aes(xmin = endpoints[1], - xmax = endpoints[2], ymin = 0, ymax = Inf), + geom_rect(fill = ci_fill, alpha = 0.01, + aes(xmin = endpoints[1], + xmax = endpoints[2], ymin = 0, ymax = Inf), ...) } - + } gg_plot } -visualize_simulation <- function(data, bins, - method = "simulation", +visualize_simulation <- function(data, bins, + method = "simulation", dens_color, - obs_stat, + obs_stat, obs_stat_color, - direction, + direction, pvalue_fill, endpoints, ci_fill, ...) { @@ -465,7 +465,7 @@ visualize_simulation <- function(data, bins, bins = bins, density = FALSE, pvalue_fill = pvalue_fill, - endpoints = endpoints, + endpoints = endpoints, ci_fill = ci_fill ) } @@ -474,99 +474,99 @@ visualize_simulation <- function(data, bins, visualize_theoretical <- function(data, dens_color, - obs_stat, + obs_stat, obs_stat_color, - direction, - pvalue_fill, + direction, + pvalue_fill, endpoints, - ci_fill, + ci_fill, ...) { - + warning_glue( "Check to make sure the conditions have been met for the theoretical ", "method. {{infer}} currently does not check these for you." ) - - if(!is.null(attr(data, "stat")) && + + if(!is.null(attr(data, "stat")) && !(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) warning_glue( "Your `calculate`d statistic and the theoretical distribution are on ", "different scales. Displaying only the theoretical distribution." ) - - if(attr(data, "theory_type") %in% - c("Two sample t", "Slope with t", "One sample t")){ + + if(attr(data, "theory_type") %in% + c("Two sample t", "Slope with t", "One sample t")){ infer_plot <- theory_t_plot(deg_freedom = attr(data, "distr_param"), statistic_text = "t", dens_color = dens_color) } - + else if(attr(data, "theory_type") == "ANOVA"){ - + if(!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue( "F usually corresponds to right-tailed tests. Proceed with caution." ) - + infer_plot <- theory_anova_plot( - deg_freedom_top = attr(data, "distr_param"), - deg_freedom_bottom = attr(data, "distr_param2"), + deg_freedom_top = attr(data, "distr_param"), + deg_freedom_bottom = attr(data, "distr_param2"), statistic_text = "F", dens_color = dens_color) } - - else if(attr(data, "theory_type") %in% + + else if(attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z")){ - infer_plot <- theory_z_plot(statistic_text = "z", + infer_plot <- theory_z_plot(statistic_text = "z", dens_color = dens_color) } - - else if(attr(data, "theory_type") %in% - c("Chi-square test of indep", "Chi-square Goodness of Fit")){ - + + else if(attr(data, "theory_type") %in% + c("Chi-square test of indep", "Chi-square Goodness of Fit")){ + if(!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue("Chi-square usually corresponds to right-tailed tests. ", "Proceed with caution.") - + infer_plot <- theory_chisq_plot(deg_freedom = attr(data, "distr_param"), statistic_text = "Chi-Square", dens_color = dens_color) } - + # else # stop_glue( # '"{attr(data, "theory_type")}" is not implemented (possibly yet).' # ) - + # Move into its own function - + if(!is.null(obs_stat)){ if(!is.null(direction)){ if(direction %in% c("less", "left")){ infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), ...) } if(direction %in% c("greater", "right")){ infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), ...) } - + # Assuming two-tailed shading will only happen with theoretical # distributions centered at 0 if(direction %in% c("two_sided", "both") && obs_stat >= 0){ infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), ...) + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, aes(xmin = -Inf, xmax = -obs_stat, ymin = 0, ymax = Inf), ...) @@ -574,44 +574,44 @@ visualize_theoretical <- function(data, if(direction %in% c("two_sided", "both") && obs_stat < 0){ infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), ...) + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, aes(xmin = -obs_stat, xmax = Inf, ymin = 0, ymax = Inf), ...) } } } - + # To implement: plotting of theoretical confidence interval values - + infer_plot } -visualize_both <- function(data, bins, +visualize_both <- function(data, bins, dens_color, - obs_stat, + obs_stat, obs_stat_color, - direction, - pvalue_fill, - endpoints, + direction, + pvalue_fill, + endpoints, ci_fill, ...) { - + warning_glue( "Check to make sure the conditions have been met for the theoretical ", "method. `infer` currently does not check these for you." ) - + if(!(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) stop_glue("Your `calculate`d statistic and the theoretical distribution ", "are on different scales. Use a standardized `stat` instead.") - + if(attr(data, "theory_type") %in% c("Two sample t", "Slope with t")){ - - infer_plot <- both_t_plot(data = data, + + infer_plot <- both_t_plot(data = data, deg_freedom = attr(data, "distr_param"), statistic_text = "t", dens_color = dens_color, @@ -622,13 +622,13 @@ visualize_both <- function(data, bins, endpoints = endpoints, ci_fill = ci_fill) } - + else if(attr(data, "theory_type") == "ANOVA"){ infer_plot <- both_anova_plot( - data = data, - deg_freedom_top = attr(data, "distr_param"), - deg_freedom_bottom = attr(data, "distr_param2"), - statistic_text = "F", + data = data, + deg_freedom_top = attr(data, "distr_param"), + deg_freedom_bottom = attr(data, "distr_param2"), + statistic_text = "F", dens_color = dens_color, bins = bins, direction = direction, @@ -637,11 +637,11 @@ visualize_both <- function(data, bins, endpoints = endpoints, ci_fill = ci_fill) } - - else if(attr(data, "theory_type") %in% + + else if(attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z")){ - infer_plot <- both_z_plot(data = data, - statistic_text = "z", + infer_plot <- both_z_plot(data = data, + statistic_text = "z", dens_color = dens_color, bins = bins, direction = direction, @@ -650,13 +650,13 @@ visualize_both <- function(data, bins, endpoints = endpoints, ci_fill = ci_fill) } - + else if( - attr(data, "theory_type") %in% + attr(data, "theory_type") %in% c("Chi-square test of indep", "Chi-square Goodness of Fit")){ - infer_plot <- both_chisq_plot(data = data, + infer_plot <- both_chisq_plot(data = data, deg_freedom = attr(data, "distr_param"), - statistic_text = "Chi-Square", + statistic_text = "Chi-Square", dens_color = dens_color, bins = bins, direction = direction, @@ -665,10 +665,10 @@ visualize_both <- function(data, bins, endpoints = endpoints, ci_fill = ci_fill) } - + # else # stop_glue('"{attr(data, "theory_type")}" is not implemented yet.') - + infer_plot } diff --git a/R/wrappers.R b/R/wrappers.R index 52908511..72e07f55 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -33,13 +33,13 @@ #' @export t_test <- function(data, formula, #response = NULL, explanatory = NULL, order = NULL, - alternative = "two_sided", mu = 0, + alternative = "two_sided", mu = 0, conf_int = TRUE, conf_level = 0.95, ...){ - + check_conf_level(conf_level) - + # Match with old "dot" syntax if(alternative == "two_sided") alternative <- "two.sided" @@ -47,17 +47,17 @@ t_test <- function(data, formula, #response = NULL, explanatory = NULL, ### Only currently working with formula interface # if (hasArg(formula)) { if(!is.null(f_rhs(formula))){ - + data[[as.character(f_rhs(formula))]] <- factor(data[[as.character(f_rhs(formula))]], levels = c(order[1], order[2])) - + # Two sample case prelim <- data %>% stats::t.test(formula = formula, data = ., alternative = alternative, - mu = mu, - conf.level = conf_level, + mu = mu, + conf.level = conf_level, ...) %>% broom::glance() } else { @@ -67,24 +67,24 @@ t_test <- function(data, formula, #response = NULL, explanatory = NULL, data <- as.data.frame(data) prelim <- stats::t.test(x = data[[as.character(f_lhs(formula))]], alternative = alternative, - mu = mu, - conf.level = conf_level, - ...) %>% + mu = mu, + conf.level = conf_level, + ...) %>% broom::glance() } - + if(conf_int){ - results <- prelim %>% + results <- prelim %>% dplyr::select(statistic, t_df = parameter, p_value = p.value, - alternative, + alternative, lower_ci = conf.low, upper_ci = conf.high) } else { - results <- prelim %>% + results <- prelim %>% dplyr::select(statistic, t_df = parameter, p_value = p.value, alternative) } - + return(results) # } else { # data %>% @@ -114,8 +114,8 @@ t_test <- function(data, formula, #response = NULL, explanatory = NULL, #' #' @export t_stat <- function(data, formula, ...){ - data %>% - t_test(formula = formula, ...) %>% + data %>% + t_test(formula = formula, ...) %>% dplyr::select(statistic) } @@ -168,7 +168,7 @@ chisq_test <- function(data, formula, #response = NULL, explanatory = NULL, #' #' @export chisq_stat <- function(data, formula, ...){ - + if(is.null(f_rhs(formula))){ stop_glue( "`chisq_stat()` currently only has functionality for ", diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 8dda3b7b..34933f46 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -1,12 +1,12 @@ context("calculate") -iris_df <- tibble::as_tibble(iris) +iris_df <- tibble::as_tibble(iris) -iris_tbl <- iris_df %>% +iris_tbl <- iris_df %>% dplyr::mutate(Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) + dplyr::if_else(Sepal.Width > 3, "large", "small")) # calculate arguments test_that("x is a tibble", { @@ -29,7 +29,7 @@ test_that("stat argument is appropriate", { }) test_that("response attribute has been set", { - expect_error(tibble::as.tibble(iris) %>% + expect_error(tibble::as.tibble(iris) %>% calculate(stat = "median") ) }) @@ -40,7 +40,7 @@ test_that("variable chosen is of appropriate class (one var problems)", { specify(Species ~ NULL) %>% hypothesize(null = "point", p = c("setosa" = .5, - "versicolor" = .25, + "versicolor" = .25, "virginica" = .25)) %>% generate(reps = 10, type = "simulate") expect_error(calculate(gen_iris1, stat = "mean")) @@ -114,7 +114,7 @@ test_that("response variable is a factor (two var problems)", { expect_error(calculate(gen_iris4, stat = "diff in props")) expect_error(calculate(gen_iris4, stat = "t")) - + # Check successful diff in props gen_iris4a <- iris %>% dplyr::mutate(Sepal.Length.Group = @@ -142,9 +142,9 @@ test_that("response variable is numeric (two var problems)", { test_that("two sample mean-type problems are working", { gen_iris5a <- iris %>% dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% - specify(Sepal.Width ~ Sepal.Length.Group) %>% - hypothesize(null = "independence") %>% + dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + specify(Sepal.Width ~ Sepal.Length.Group) %>% + hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") expect_error(calculate(gen_iris5a, stat = "diff in means")) expect_silent(calculate(gen_iris5a, stat = "diff in means", @@ -210,7 +210,7 @@ test_that("chi-square matches chisq.test value", { dplyr::do(broom::tidy(stats::chisq.test(table(.$Species)))) %>% dplyr::select(replicate, stat = statistic) expect_equal(infer_way, trad_way) - + gen_iris9a <- iris %>% specify(Species ~ NULL) %>% hypothesize(null = "point", @@ -226,7 +226,7 @@ test_that("chi-square matches chisq.test value", { p = c(0.8, 0.1, 0.1)))) %>% dplyr::select(replicate, stat = statistic) expect_equal(infer_way, trad_way) - + }) test_that("`order` is working", { @@ -256,7 +256,7 @@ test_that("`order` is working", { order = c(">5", "<=4", ">4"))) # order not given expect_error(calculate(gen_iris11, stat = "diff in means")) - + }) test_that('success is working for stat = "prop"', { @@ -270,7 +270,7 @@ test_that('success is working for stat = "prop"', { calculate(stat = "prop")) expect_silent(gen_iris12 %>% calculate(stat = "z")) - + }) test_that("NULL response gives error", { @@ -325,61 +325,61 @@ test_that("order being given when not needed gives warning", { # }) test_that("specify() %>% calculate() works", { - expect_silent(iris_tbl %>% + expect_silent(iris_tbl %>% specify(Petal.Width ~ NULL) %>% calculate(stat = "mean") ) - expect_error(iris_tbl %>% + expect_error(iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% calculate(stat = "mean") ) - - expect_error(iris_tbl %>% - specify(Species ~ NULL) %>% + + expect_error(iris_tbl %>% + specify(Species ~ NULL) %>% calculate(stat = "Chisq")) }) test_that("One sample t hypothesis test is working", { expect_silent( - iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 1) %>% - generate(reps = 10) %>% + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 1) %>% + generate(reps = 10) %>% calculate(stat = "t") ) - + }) test_that("specify done before calculate", { - iris_mean <- iris_tbl %>% + iris_mean <- iris_tbl %>% dplyr::select(stat = Sepal.Width) expect_error(calculate(iris_mean, stat = "mean")) - - iris_prop <- iris_tbl %>% + + iris_prop <- iris_tbl %>% dplyr::select(Sepal.Length.Group) attr(iris_prop, "response") <- "Sepal.Length.Group" expect_error(calculate(iris_prop, stat = "prop")) }) test_that("chisq GoF has params specified for observed stat", { - no_params <- iris_df %>% + no_params <- iris_df %>% specify(response = Species) - expect_error(calculate(no_params, stat = "Chisq")) - - params <- iris_df %>% - specify(response = Species) %>% + expect_error(calculate(no_params, stat = "Chisq")) + + params <- iris_df %>% + specify(response = Species) %>% hypothesize(null = "point", p = c("setosa" = .5, - "versicolor" = .25, + "versicolor" = .25, "virginica" = .25)) expect_silent(calculate(params, stat = "Chisq")) }) test_that("generate not done before calculate", { - iris_hyp <- iris_tbl %>% - specify(Sepal.Width ~ Sepal.Length.Group) %>% + iris_hyp <- iris_tbl %>% + specify(Sepal.Width ~ Sepal.Length.Group) %>% hypothesize(null = "independence") attr(iris_hyp, "generate") <- TRUE expect_warning(calculate(iris_hyp, stat = "t", order = c(">5", "<=5"))) @@ -387,10 +387,10 @@ test_that("generate not done before calculate", { test_that("One sample t bootstrap is working", { expect_silent( - iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - generate(reps = 10) %>% + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + generate(reps = 10) %>% calculate(stat = "t") ) - + }) diff --git a/tests/testthat/test-conf_int.R b/tests/testthat/test-conf_int.R index 0ae2ab3f..6dcc63ae 100644 --- a/tests/testthat/test-conf_int.R +++ b/tests/testthat/test-conf_int.R @@ -1,21 +1,21 @@ context("conf_int") -iris_tbl <- iris %>% +iris_tbl <- iris %>% dplyr::mutate(Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) + dplyr::if_else(Sepal.Width > 3, "large", "small")) iris_calc <- iris_tbl %>% specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% hypothesize(null = "independence") %>% - generate(reps = 1000) %>% + generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("large", "small")) obs_diff <- iris_tbl %>% specify(Sepal.Length.Group ~ Sepal.Width.Group, - success = "<=5") %>% + success = "<=5") %>% calculate(stat = "diff in props", order = c("large", "small")) set.seed(2018) @@ -23,19 +23,19 @@ test_df <- tibble::tibble(stat = rnorm(100)) test_that("basics work", { expect_silent( - test_df %>% + test_df %>% conf_int() ) expect_error( - test_df %>% + test_df %>% conf_int(type = "other") ) expect_error( - test_df %>% + test_df %>% conf_int(level = 1.2) ) expect_error( - test_df %>% + test_df %>% conf_int(point_estimate = "help") ) expect_silent( @@ -51,4 +51,3 @@ test_that("basics work", { iris_calc %>% get_ci(type = "se") ) }) - diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index cbf4617f..02ed4b43 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -75,7 +75,7 @@ test_that("cohesion with type argument", { test_that("sensible output", { - expect_equal(nrow(mtcars) * 500, + expect_equal(nrow(mtcars) * 500, nrow(generate(hyp_prop, reps = 500, type = "simulate"))) expect_silent(generate(hyp_mean, reps = 1, type = "bootstrap")) expect_error(generate(hyp_mean, reps = 1, type = "other")) @@ -88,74 +88,74 @@ test_that("auto `type` works (generate)", { am = factor(am), gear = factor(gear), carb = factor(carb)) - + one_mean <- mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% + hypothesize(null = "point", mu = 25) %>% generate(reps = 100) - + one_nonshift_mean <- mtcars %>% specify(response = mpg) %>% generate(reps = 100) - + one_median <- mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% + hypothesize(null = "point", med = 26) %>% generate(reps = 100) - + one_prop <- mtcars %>% specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% + hypothesize(null = "point", p = .25) %>% generate(reps = 100) - + two_props <- mtcars %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") %>% generate(reps = 100) - + gof_chisq <- mtcars %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% - generate(reps = 100) - + generate(reps = 100) + indep_chisq <- mtcars %>% specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am hypothesize(null = "independence") %>% - generate(reps = 100) - + generate(reps = 100) + two_means <- mtcars %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100) - + anova_f <- mtcars %>% specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100) - + slopes <- mtcars %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100) - + one_nonshift_prop <- mtcars %>% specify(response = am, success = "1") %>% generate(reps = 100) - + two_means_boot <- mtcars %>% specify(mpg ~ am) %>% generate(reps = 100) - + two_props_boot <- mtcars %>% specify(am ~ vs, success = "1") %>% generate(reps = 100) - + slope_boot <- mtcars %>% - specify(mpg ~ hp) %>% + specify(mpg ~ hp) %>% generate(reps = 100) - + expect_equal(attr(one_mean, "type"), "bootstrap") - expect_equal(attr(one_nonshift_mean, "type"), "bootstrap") + expect_equal(attr(one_nonshift_mean, "type"), "bootstrap") expect_equal(attr(one_median, "type"), "bootstrap") expect_equal(attr(one_prop, "type"), "simulate") expect_equal(attr(two_props, "type"), "permute") @@ -168,89 +168,89 @@ test_that("auto `type` works (generate)", { expect_equal(attr(two_means_boot, "type"), "bootstrap") expect_equal(attr(two_props_boot, "type"), "bootstrap") expect_equal(attr(slope_boot, "type"), "bootstrap") - + expect_error(mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% + hypothesize(null = "point", mu = 25) %>% generate(reps = 100, type = "permute")) - + expect_error(mtcars %>% specify(response = mpg) %>% generate(reps = 100, type = "simulate")) - + expect_error(mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% + hypothesize(null = "point", med = 26) %>% generate(reps = 100, type = "permute")) - + expect_error(mtcars %>% specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% + hypothesize(null = "point", p = .25) %>% generate(reps = 100, type = "bootstrap")) - + expect_error(mtcars %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap")) - + expect_error(mtcars %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% generate(reps = 100, type = "bootstrap")) - + expect_error(mtcars %>% specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100, type = "simulate")) - + expect_error(mtcars %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap")) - + expect_error(mtcars %>% specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100, type = "simulate")) - + expect_error(mtcars %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap")) - + expect_error(mtcars %>% specify(response = am, success = "1") %>% generate(reps = 100, type = "simulate")) - + expect_error(mtcars %>% specify(mpg ~ am) %>% generate(reps = 100, type = "permute")) - + expect_error(mtcars %>% specify(am ~ vs, success = "1") %>% generate(reps = 100, type = "simulate")) - + expect_error(mtcars %>% - specify(mpg ~ hp) %>% + specify(mpg ~ hp) %>% generate(reps = 100, type = "simulate")) - + }) test_that("mismatches lead to error", { expect_error(mtcars %>% generate(reps = 10, type = "permute")) - expect_error(mtcars %>% specify(am ~ NULL, success = "1") %>% - hypothesize(null = "independence", p = c("1" = 0.5)) %>% + expect_error(mtcars %>% specify(am ~ NULL, success = "1") %>% + hypothesize(null = "independence", p = c("1" = 0.5)) %>% generate(reps = 100, type = "simulate")) expect_error(mtcars %>% specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, - "8" = .25)) %>% + hypothesize(null = "point", p = c("4" = .5, "6" = .25, + "8" = .25)) %>% generate(reps = 100, type = "bootstrap")) expect_error(mtcars %>% - specify(mpg ~ hp) %>% + specify(mpg ~ hp) %>% generate(reps = 100, type = "other")) }) test_that("generate() handles `NULL` value of `type`", { expect_error(generate(hyp_prop, type = NULL), "NULL.*type") -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-hypothesize.R b/tests/testthat/test-hypothesize.R index 876ea935..828a815d 100644 --- a/tests/testthat/test-hypothesize.R +++ b/tests/testthat/test-hypothesize.R @@ -67,47 +67,47 @@ test_that("auto `type` works (hypothesize)", { }) test_that("hypothesize arguments function",{ - + mtcars_f <- dplyr::mutate(mtcars, cyl = factor(cyl)) mtcars_s <- mtcars_f %>% specify(response = mpg) matrix1 <- matrix(data = NA, nrow = 3, ncol = 3) - + expect_error(hypothesize(matrix1)) expect_error(hypothesize(mtcars_s, null = NA)) expect_error(hypothesize(mtcars_s)) - + expect_error(mtcars_s %>% hypothesize(null = "point", mean = 3)) - + expect_error(mtcars_s %>% hypothesize(null = "independence")) expect_error(mtcars_s %>% hypothesize(null = "point")) # Produces error on win-build # expect_warning(mtcars_s %>% # hypothesize(null = c("point", "independence"), mu = 3)) - + expect_error(mtcars %>% dplyr::select(vs) %>% hypothesize(null = "point", mu = 1)) - - expect_error(mtcars %>% specify(response = vs) %>% + + expect_error(mtcars %>% specify(response = vs) %>% hypothesize(null = "point", mu = 1)) - - expect_error(mtcars %>% specify(response = vs, success = "1") %>% + + expect_error(mtcars %>% specify(response = vs, success = "1") %>% hypothesize(null = "point", p = 1.1)) - expect_error(mtcars %>% specify(response = vs, success = "1") %>% + expect_error(mtcars %>% specify(response = vs, success = "1") %>% hypothesize(null = "point", p = -23)) - - expect_error(mtcars_s %>% - hypothesize(null = "point", + + expect_error(mtcars_s %>% + hypothesize(null = "point", p = c("4" = .2, "6" = .25, "8" = .25))) - + expect_error(mtcars_s %>% hypothesize(null = "point", p = 0.2)) expect_warning(mtcars %>% specify(mpg ~ vs) %>% hypothesize(null = "independence", p = 0.5)) - + expect_error(mtcars_s %>% hypothesize()) }) test_that("params correct", { - expect_error(hypothesize(one_prop_specify, + expect_error(hypothesize(one_prop_specify, null = "point", mu = 2)) expect_error(hypothesize(one_mean_specify, null = "point", mean = 0.5)) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index 7735ad8e..75e6ff4b 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -1,16 +1,16 @@ context("p_value") -iris_tbl <- iris %>% +iris_tbl <- iris %>% dplyr::mutate(Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) + dplyr::if_else(Sepal.Width > 3, "large", "small")) iris_calc <- iris_tbl %>% specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% hypothesize(null = "independence") %>% - generate(reps = 1000) %>% + generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("large", "small")) set.seed(2018) @@ -18,48 +18,48 @@ test_df <- tibble::tibble(stat = rnorm(100)) test_that("direction is appropriate", { expect_error( - test_df %>% + test_df %>% p_value(obs_stat = 0.5, direction = "righ") ) }) test_that("p_value makes sense", { expect_lt( - iris_calc %>% - p_value(obs_stat = 0.1, direction = "right") %>% - dplyr::pull(), + iris_calc %>% + p_value(obs_stat = 0.1, direction = "right") %>% + dplyr::pull(), expected = 0.1 ) expect_gt( - iris_calc %>% - p_value(obs_stat = -0.1, direction = "greater") %>% - dplyr::pull(), + iris_calc %>% + p_value(obs_stat = -0.1, direction = "greater") %>% + dplyr::pull(), expected = 0.9 ) expect_equal( - iris_calc %>% - p_value(obs_stat = median(iris_calc$stat), - direction = "both") %>% - dplyr::pull(), + iris_calc %>% + p_value(obs_stat = median(iris_calc$stat), + direction = "both") %>% + dplyr::pull(), expected = 1 ) expect_lt( - iris_calc %>% - p_value(obs_stat = -0.2, direction = "left") %>% - dplyr::pull(), + iris_calc %>% + p_value(obs_stat = -0.2, direction = "left") %>% + dplyr::pull(), expected = 0.02 ) expect_gt( - iris_calc %>% - p_value(obs_stat = -0.2, direction = "right") %>% - dplyr::pull(), + iris_calc %>% + p_value(obs_stat = -0.2, direction = "right") %>% + dplyr::pull(), expected = 0.98 ) expect_equal( - iris_calc %>% - get_pvalue(obs_stat = median(iris_calc$stat) + 1, - direction = "two_sided") %>% - dplyr::pull(), + iris_calc %>% + get_pvalue(obs_stat = median(iris_calc$stat) + 1, + direction = "two_sided") %>% + dplyr::pull(), expected = 0 ) }) diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R index 69af2136..8e9f9544 100644 --- a/tests/testthat/test-rep_sample_n.R +++ b/tests/testthat/test-rep_sample_n.R @@ -9,19 +9,18 @@ population <- tibble::data_frame( test_that("rep_sample_n works", { expect_silent(population %>% rep_sample_n(size = 2, reps = 10)) - expect_error(population %>% - rep_sample_n(size = 2, reps = 10, + expect_error(population %>% + rep_sample_n(size = 2, reps = 10, prob = rep(x = 1/5, times = 100))) - expect_error(population %>% + expect_error(population %>% rep_sample_n(size = 2, reps = 10, prob = c(1/2, 1/2))) - expect_error(population %>% + expect_error(population %>% rep_sample_n(size = 2, reps = 10, prob = c(0.25, 1/5, 1/5, 1/5, 0.15))) test_rep <- population %>% rep_sample_n(size = 2, reps = 10) expect_equal(c("replicate", names(population)), names(test_rep)) - - -}) + +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 5742ef7d..f86f18b0 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -16,14 +16,14 @@ test_that("message_glue handles `NULL`", { test_that("glue_null works", { adj <- "quick" - + expect_equal( glue_null( "The {adj} brown {null_val} jumps ", "over the lazy {NULL}." ), "The quick brown NULL jumps over the lazy NULL." ) - + expect_equal( glue_null("The {adj}", "brown", .sep = "-"), "The quick-brown" diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 0a3e3a46..55fc5ad0 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -6,20 +6,20 @@ library(dplyr) Sepal.Width_resamp <- iris %>% specify(Sepal.Width ~ NULL) %>% hypothesize(null = "point", med = 3) %>% - generate(reps = 10, type = "bootstrap") %>% - calculate(stat = "median") + generate(reps = 10, type = "bootstrap") %>% + calculate(stat = "median") -iris_tbl <- tibble::as_tibble(iris) %>% +iris_tbl <- tibble::as_tibble(iris) %>% dplyr::mutate(Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) - -obs_slope <- lm(Sepal.Length ~ Sepal.Width, - data = iris_tbl) %>% - broom::tidy() %>% - dplyr::filter(term == "Sepal.Width") %>% - dplyr::select(estimate) %>% + dplyr::if_else(Sepal.Width > 3, "large", "small")) + +obs_slope <- lm(Sepal.Length ~ Sepal.Width, + data = iris_tbl) %>% + broom::tidy() %>% + dplyr::filter(term == "Sepal.Width") %>% + dplyr::select(estimate) %>% dplyr::pull() obs_diff <- iris_tbl %>% @@ -28,14 +28,14 @@ obs_diff <- iris_tbl %>% summarize(diff(prop)) %>% pull() -obs_z <- sqrt(stats::prop.test(x = table(iris_tbl$Sepal.Length.Group, +obs_z <- sqrt(stats::prop.test(x = table(iris_tbl$Sepal.Length.Group, iris_tbl$Sepal.Width.Group), n = nrow(iris_tbl), alternative = "two.sided", correct = FALSE)$statistic) obs_diff_mean <- iris_tbl %>% - group_by(Sepal.Length.Group) %>% + group_by(Sepal.Length.Group) %>% summarize(mean_sepal_width = mean(Sepal.Width)) %>% summarize(diff(mean_sepal_width)) %>% pull() @@ -47,7 +47,7 @@ obs_t <- iris_tbl %>% obs_F <- anova( aov(formula = Sepal.Width ~ Species, data = iris_tbl) )$`F value`[1] - + test_that("visualize basic tests", { expect_silent(visualize(Sepal.Width_resamp)) @@ -57,235 +57,235 @@ test_that("visualize basic tests", { expect_silent(iris_tbl %>% specify(Sepal.Length ~ Sepal.Width) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "slope") %>% visualize(obs_stat = obs_slope, direction = "right")) - + #obs_stat not specified - expect_error(iris_tbl %>% + expect_error(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", + order = c(">5", "<=5")) %>% visualize(direction = "both") ) - - expect_silent(iris_tbl %>% + + expect_silent(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", + order = c(">5", "<=5")) %>% visualize(direction = "both", obs_stat = obs_diff) ) - - expect_warning(iris_tbl %>% + + expect_warning(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - calculate(stat = "z", order = c(">5", "<=5")) %>% + success = "large") %>% + hypothesize(null = "independence") %>% + calculate(stat = "z", order = c(">5", "<=5")) %>% visualize(method = "theoretical") ) - + # diff in props and z on different scales - expect_error(expect_warning(iris_tbl %>% + expect_error(expect_warning(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% - visualize(method = "both", direction = "both", + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", + order = c(">5", "<=5")) %>% + visualize(method = "both", direction = "both", obs_stat = obs_diff) )) - - expect_silent(iris_tbl %>% + + expect_silent(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", + order = c(">5", "<=5")) %>% visualize() ) - - expect_warning(iris_tbl %>% + + expect_warning(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", - order = c(">5", "<=5")) %>% + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", + order = c(">5", "<=5")) %>% visualize(method = "both", direction = "both", obs_stat = obs_z) ) - - expect_warning(iris_tbl %>% + + expect_warning(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", - order = c("<=5", ">5")) %>% + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", + order = c("<=5", ">5")) %>% visualize(method = "both", direction = "both", obs_stat = -obs_z) ) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large") ) %>% - visualize(method = "both", direction = "left", + + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("small", "large") ) %>% + visualize(method = "both", direction = "left", obs_stat = -obs_t) ) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% + + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% # generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large") ) %>% - visualize(method = "theoretical", direction = "left", + calculate(stat = "t", order = c("small", "large") ) %>% + visualize(method = "theoretical", direction = "left", obs_stat = -obs_t) ) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Length.Group) %>% - hypothesize(null = "independence") %>% + + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Sepal.Length.Group) %>% + hypothesize(null = "independence") %>% visualize(method = "theoretical") ) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% + + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% visualize(method = "theoretical") ) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = obs_F, + + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = obs_F, direction = "right") ) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = obs_F, + + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = obs_F, direction = "left") ) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = obs_F, + + expect_warning(iris_tbl %>% + specify(Sepal.Width.Group ~ Species, + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "Chisq") %>% + visualize(method = "both", obs_stat = obs_F, direction = "right") ) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% + + expect_warning(iris_tbl %>% + specify(Sepal.Width.Group ~ Species, + success = "large") %>% + hypothesize(null = "independence") %>% #calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = obs_F, + visualize(method = "theoretical", obs_stat = obs_F, direction = "right") ) - - expect_warning(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", + + expect_warning(iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize(null = "point", p = c("setosa" = 0.4, "versicolor" = 0.4, - "virginica" = 0.2)) %>% - generate(reps = 100, type = "simulate") %>% - calculate(stat = "Chisq") %>% + "virginica" = 0.2)) %>% + generate(reps = 100, type = "simulate") %>% + calculate(stat = "Chisq") %>% visualize(method = "both") ) - + #traditional instead of theoretical - expect_error(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", + expect_error(iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize(null = "point", p = c("setosa" = 0.4, "versicolor" = 0.4, - "virginica" = 0.2)) %>% + "virginica" = 0.2)) %>% # generate(reps = 100, type = "simulate") %>% # calculate(stat = "Chisq") %>% visualize(method = "traditional") ) - - expect_warning(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", + + expect_warning(iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize(null = "point", p = c("setosa" = 0.4, "versicolor" = 0.4, - "virginica" = 0.2)) %>% + "virginica" = 0.2)) %>% #generate(reps = 100, type = "simulate") %>% #calculate(stat = "Chisq") %>% visualize(method = "theoretical") ) - - expect_silent(iris_tbl %>% - specify(Petal.Width ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% - calculate(stat = "diff in means", - order = c("large", "small")) %>% + + expect_silent(iris_tbl %>% + specify(Petal.Width ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 10, type = "permute") %>% + calculate(stat = "diff in means", + order = c("large", "small")) %>% visualize(direction = "both", obs_stat = obs_diff_mean) ) - + # Produces warning first for not checking conditions but would also error - expect_error(expect_warning(iris_tbl %>% - specify(Petal.Width ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", - order = c("large", "small")) %>% + expect_error(expect_warning(iris_tbl %>% + specify(Petal.Width ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in means", + order = c("large", "small")) %>% visualize(method = "both", direction = "both", obs_stat = obs_diff_mean) )) - - expect_warning(iris_tbl %>% - specify(Petal.Width ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", - order = c("large", "small")) %>% + + expect_warning(iris_tbl %>% + specify(Petal.Width ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in means", + order = c("large", "small")) %>% visualize(method = "theoretical", direction = "both", obs_stat = obs_diff_mean) ) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ NULL, success = "small") %>% - hypothesize(null = "point", p = 0.8) %>% + + expect_warning(iris_tbl %>% + specify(Sepal.Width.Group ~ NULL, success = "small") %>% + hypothesize(null = "point", p = 0.8) %>% # generate(reps = 100, type = "simulate") %>% # calculate(stat = "z") %>% - visualize(method = "theoretical", + visualize(method = "theoretical", obs_stat = 2, # Should probably update direction = "both") ) - + expect_silent(iris_tbl %>% specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 1.3) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% + hypothesize(null = "point", mu = 1.3) %>% + generate(reps = 100, type = "bootstrap") %>% + calculate(stat = "mean") %>% visualize(direction = "left", obs_stat = mean(iris$Petal.Width)) ) - - + + }) test_that("get_percentile works", { @@ -293,114 +293,114 @@ test_that("get_percentile works", { }) test_that("obs_stat as a data.frame works", { - mean_petal_width <- iris_tbl %>% + mean_petal_width <- iris_tbl %>% specify(Petal.Width ~ NULL) %>% calculate(stat = "mean") - expect_silent(iris_tbl %>% + expect_silent(iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% + calculate(stat = "mean") %>% visualize(obs_stat = mean_petal_width) ) mean_df_test <- data.frame(x = c(4.1, 1), y = c(1, 2)) - expect_warning(iris_tbl %>% + expect_warning(iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% + calculate(stat = "mean") %>% visualize(obs_stat = mean_df_test) ) - + }) test_that('method = "both" behaves nicely', { # stop_glue('`generate()` and `calculate()` are both required ', # 'to be done prior to `visualize(method = "both")`') - expect_error(iris_tbl %>% + expect_error(iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% # calculate(stat = "mean") %>% visualize(method = "both")) - + # - expect_warning(iris_tbl %>% + expect_warning(iris_tbl %>% specify(Petal.Width ~ Sepal.Length.Group) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 10, type = "bootstrap") %>% - calculate(stat = "t", order = c(">5", "<=5")) %>% + calculate(stat = "t", order = c(">5", "<=5")) %>% visualize(method = "both") - ) + ) }) test_that("Traditional right-tailed tests have warning if not right-tailed", { - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% + expect_warning(iris_tbl %>% + specify(Sepal.Width.Group ~ Species, + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "Chisq") %>% visualize(method = "both", obs_stat = 2, direction = "left") ) - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = 2, + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = 2, direction = "two_sided") ) - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% + expect_warning(iris_tbl %>% + specify(Sepal.Width.Group ~ Species, + success = "large") %>% + hypothesize(null = "independence") %>% # generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = 2, + calculate(stat = "Chisq") %>% + visualize(method = "theoretical", obs_stat = 2, direction = "left") ) - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% + expect_warning(iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% # generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "theoretical", obs_stat = 2, + calculate(stat = "F") %>% + visualize(method = "theoretical", obs_stat = 2, direction = "two_sided") ) - + }) test_that("confidence interval plots are working",{ - - iris_boot <- iris_tbl %>% + + iris_boot <- iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - generate(reps = 100) %>% - calculate(stat = "diff in props", + success = "large") %>% + generate(reps = 100) %>% + calculate(stat = "diff in props", order = c(">5", "<=5")) - + df_error <- tibble::tibble(col1 = rnorm(5), col2 = rnorm(5)) vec_error <- 1:10 - + perc_ci <- iris_boot %>% get_ci() - + expect_error( - iris_boot %>% visualize(endpoints = df_error) + iris_boot %>% visualize(endpoints = df_error) ) - + expect_warning( - iris_boot %>% visualize(endpoints = vec_error) + iris_boot %>% visualize(endpoints = vec_error) ) - + expect_silent( iris_boot %>% visualize(endpoints = perc_ci, direction = "between") ) - + expect_warning( iris_boot %>% visualize(obs_stat = 3, endpoints = perc_ci) ) - + }) diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R index d210dc34..b43a331d 100644 --- a/tests/testthat/test-wrappers.R +++ b/tests/testthat/test-wrappers.R @@ -1,17 +1,17 @@ context("wrappers") iris2 <- iris %>% - dplyr::filter(Species != "setosa") %>% + dplyr::filter(Species != "setosa") %>% droplevels(.$Species) iris3 <- iris %>% - dplyr::mutate(Sepal.Length.Group = + dplyr::mutate(Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) test_that("t_test works", { # order is missing expect_error(iris2 %>% t_test(Sepal.Width ~ Species)) - + expect_error(iris2 %>% t_test(response = "Sepal.Width", explanatory = "Species")) ## Not implemented @@ -23,10 +23,10 @@ test_that("chisq_test works", { expect_silent(iris3 %>% chisq_test(Sepal.Length.Group ~ Species)) new_way <- iris3 %>% chisq_test(Sepal.Length.Group ~ Species) old_way <- chisq.test(x = table(iris3$Species, - iris3$Sepal.Length.Group)) %>% + iris3$Sepal.Length.Group)) %>% broom::glance() %>% dplyr::select(statistic, chisq_df = parameter, p_value = p.value) - + expect_equal(new_way, old_way) ## Not implemented # expect_silent(iris3 %>% chisq_test(response = Sepal.Length.Group, @@ -40,18 +40,18 @@ test_that("_stat functions work", { ) another_way <- iris3 %>% chisq_test(Sepal.Length.Group ~ Species) %>% - dplyr::select(statistic) %>% + dplyr::select(statistic) %>% dplyr::rename(stat = statistic) - obs_stat_way <- iris3 %>% + obs_stat_way <- iris3 %>% chisq_stat(Sepal.Length.Group ~ Species) one_more <- chisq.test( - table(iris3$Species, + table(iris3$Species, iris3$Sepal.Length.Group) )$statistic expect_equivalent(another_way, obs_stat_way) expect_equivalent(one_more, dplyr::pull(obs_stat_way)) - + # Goodness of Fit expect_error(iris3 %>% chisq_test(Species ~ NULL)) expect_error(iris3 %>% chisq_stat(Species ~ NULL)) @@ -61,81 +61,81 @@ test_that("_stat functions work", { # obs_stat_way <- iris3 %>% # chisq_stat(Species ~ NULL) # expect_equivalent(another_way, obs_stat_way) - + # Two sample t expect_silent( - iris2 %>% t_stat(Sepal.Width ~ Species, + iris2 %>% t_stat(Sepal.Width ~ Species, order = c("virginica", "versicolor")) ) another_way <- iris2 %>% t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor")) %>% dplyr::select(statistic) - obs_stat_way <- iris2 %>% + obs_stat_way <- iris2 %>% t_stat(Sepal.Width ~ Species, order = c("virginica", "versicolor")) expect_equivalent(another_way, obs_stat_way) - + # One sample t expect_silent(iris2 %>% t_stat(Sepal.Width ~ NULL)) another_way <- iris2 %>% t_test(Sepal.Width ~ NULL) %>% dplyr::select(statistic) - obs_stat_way <- iris2 %>% + obs_stat_way <- iris2 %>% t_stat(Sepal.Width ~ NULL) expect_equivalent(another_way, obs_stat_way) }) test_that("conf_int argument works", { expect_equal( - names(iris2 %>% + names(iris2 %>% t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor"), conf_int = FALSE)), c("statistic", "t_df", "p_value", "alternative") ) expect_equal( - names(iris2 %>% + names(iris2 %>% t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor"), conf_int = TRUE)), c("statistic", "t_df", "p_value", "alternative", "lower_ci", "upper_ci") ) - - ci_test <- iris2 %>% + + ci_test <- iris2 %>% t_test(Sepal.Width ~ Species, order = c("versicolor", "virginica"), conf_int = TRUE, conf_level = 0.9) - old_way <- t.test(formula = Sepal.Width ~ Species, + old_way <- t.test(formula = Sepal.Width ~ Species, data = iris2, conf.level = 0.9)[["conf.int"]] expect_equal(ci_test$lower_ci[1], old_way[1]) expect_equal(ci_test$upper_ci[1], old_way[2]) - + expect_error( - iris2 %>% + iris2 %>% t_test(Petal.Width ~ Species, order = c("versicolor", "virginica"), conf_int = TRUE, conf_level = 1.1) ) - + # Check that var.equal produces different results # Thanks for finding this @EllaKaye! set.seed(2018) iris_small <- iris2 %>% sample_n(10) - no_var_equal <- iris_small %>% + no_var_equal <- iris_small %>% t_stat(Petal.Width ~ Species, order = c("versicolor", "virginica")) - var_equal <- iris_small %>% + var_equal <- iris_small %>% t_stat(Petal.Width ~ Species, order = c("versicolor", "virginica"), var.equal = TRUE) expect_false( no_var_equal == var_equal ) - - shortcut_no_var_equal <- iris_small %>% - specify(Petal.Width ~ Species) %>% + + shortcut_no_var_equal <- iris_small %>% + specify(Petal.Width ~ Species) %>% calculate(stat = "t", order = c("versicolor", "virginica")) - - shortcut_var_equal <- iris_small %>% - specify(Petal.Width ~ Species) %>% + + shortcut_var_equal <- iris_small %>% + specify(Petal.Width ~ Species) %>% calculate(stat = "t", order = c("versicolor", "virginica"), var.equal = TRUE) expect_false( shortcut_no_var_equal == shortcut_var_equal ) - + }) From 6b18823fa07c0c35b4641f67bdff9591c756b6fb Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 5 Aug 2018 12:05:16 +0300 Subject: [PATCH 03/78] Update spaces. Details: - Remove trailing spaces in comments (both raw and roxygen). - Update spacing in function calls and definitions. - Remove spaces after `!!` and `!!!` (also a tidyverse style guide). Used {styler} (1.0.2.9000 from commit 1dd6b04, as it has some bugs fixed): `styler::style_pkg(scope = "spaces", strict = FALSE, include_roxygen_examples = FALSE)`. After that manually remove spaces around `/` in simple fractions in test files. --- R/calculate.R | 28 +++--- R/conf_int.R | 26 +++--- R/generate.R | 28 +++--- R/hypothesize.R | 24 ++--- R/infer.R | 6 +- R/p_value.R | 46 +++++----- R/print_methods.R | 2 +- R/rep_sample_n.R | 10 +- R/set_params.R | 44 ++++----- R/specify.R | 26 +++--- R/utils.R | 102 ++++++++++---------- R/visualize.R | 148 +++++++++++++++--------------- R/wrappers.R | 44 ++++----- tests/testthat/test-calculate.R | 18 ++-- tests/testthat/test-hypothesize.R | 4 +- tests/testthat/test-visualize.R | 34 +++---- tests/testthat/test-wrappers.R | 2 +- 17 files changed, 296 insertions(+), 296 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 56147753..668b9680 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -1,5 +1,5 @@ #' Calculate summary statistics -#' +#' #' @param x The output from [generate()] for computation-based inference or the #' output from [hypothesize()] piped in to here for theory-based inference. #' @param stat A string giving the type of the statistic to calculate. Current @@ -12,7 +12,7 @@ #' difference in means, medians, or proportions and t and z statistics. #' @param ... To pass options like `na.rm = TRUE` into functions like #' [mean()][base::mean()], [sd()][stats::sd()], etc. -#' +#' #' @return A tibble containing a `stat` column of calculated statistics. #' #' @examples @@ -23,7 +23,7 @@ #' hypothesize(null = "independence") %>% #' generate(reps = 100, type = "permute") %>% #' calculate(stat = "diff in props", order = c("1", "0")) -#' +#' #' @importFrom dplyr group_by summarize n #' @importFrom rlang !! sym quo enquo eval_tidy #' @export @@ -78,7 +78,7 @@ calculate <- function(x, "a `generate()` step?" ) - else if (!(stat %in% c("Chisq", "prop"))){ + else if (!(stat %in% c("Chisq", "prop"))) { # From `hypothesize()` to `calculate()` # Catch-all if generate was not called # warning_glue("You unexpectantly went from `hypothesize()` to ", @@ -182,7 +182,7 @@ calc_impl.prop <- function(stat, x, order, ...) { x %>% dplyr::group_by(replicate) %>% dplyr::summarize(stat = mean(!!sym(col) == success, - #rlang::eval_tidy(col) == rlang::eval_tidy(success), + # rlang::eval_tidy(col) == rlang::eval_tidy(success), ...)) } @@ -224,7 +224,7 @@ calc_impl.diff_in_means <- function(stat, x, order, ...) { calc_impl.diff_in_medians <- function(stat, x, order, ...) { x %>% - dplyr::group_by(replicate,!!(attr(x, "explanatory"))) %>% + dplyr::group_by(replicate, !!(attr(x, "explanatory"))) %>% dplyr::summarize(xtilde = stats::median(!!attr(x, "response"), ...)) %>% dplyr::group_by(replicate) %>% @@ -301,7 +301,7 @@ calc_impl.diff_in_props <- function(stat, x, order, ...) { success <- attr(x, "success") x %>% - dplyr::group_by(replicate,!!attr(x, "explanatory")) %>% + dplyr::group_by(replicate, !!attr(x, "explanatory")) %>% dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>% dplyr::summarize(stat = prop[!!attr(x, "explanatory") == order[1]] - prop[!!attr(x, "explanatory") == order[2]]) @@ -327,21 +327,21 @@ calc_impl.t <- function(stat, x, order, ...) { # else if ( (attr(x, "theory_type") == "Slope/correlation with t") && # stat == "slope"){ # explan_string <- as.character(attr(x, "explanatory")) - # + # # x %>% # dplyr::summarize(stat = summary(stats::lm( # !!attr(x, "response") ~ !!attr(x, "explanatory") # ))[["coefficients"]][explan_string, "t value"]) # } - # + # # # Standardized correlation # else if ( (attr(x, "theory_type") == "Slope/correlation with t") && # stat == "correlation"){ - # - # x %>% - # dplyr::summarize(corr = cor(!!attr(x, "explanatory"), + # + # x %>% + # dplyr::summarize(corr = cor(!!attr(x, "explanatory"), # !!attr(x, "response")) - # ) %>% + # ) %>% # dplyr::mutate(stat = corr * (sqrt(nrow(x) - 2)) / sqrt(1 - corr ^ 2)) # } @@ -397,7 +397,7 @@ calc_impl.z <- function(stat, x, order, ...) { + p_hat * (1 - p_hat) / n2), stat = diff_prop / denom ) %>% - dplyr::select(-total_suc,-n1,-n2) + dplyr::select(-total_suc, -n1, -n2) df_out diff --git a/R/conf_int.R b/R/conf_int.R index 81bfffd2..887a58ad 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -1,8 +1,8 @@ #' Compute confidence interval -#' +#' #' Only simulation-based methods are (currently only) supported. #' `get_confidence_interval()` and `get_ci()` are both aliases of `conf_int()`. -#' +#' #' @param x Data frame of calculated statistics or containing attributes of #' theoretical distribution values. Currently, dependent on statistics being #' stored in `stat` column as created in [calculate()] function. @@ -16,7 +16,7 @@ #' #' @return A 1 x 2 tibble with values corresponding to lower and upper values in #' the confidence interval. -#' +#' #' @examples #' mtcars_df <- mtcars %>% #' dplyr::mutate(am = factor(am)) @@ -29,18 +29,18 @@ #' calculate(stat = "diff in means", order = c("1", "0")) #' bootstrap_distn %>% conf_int(level = 0.9) #' bootstrap_distn %>% conf_int(type = "se", point_estimate = d_hat) -#' +#' #' @name get_ci NULL #' @rdname get_ci #' @export conf_int <- function(x, level = 0.95, type = "percentile", - point_estimate = NULL){ + point_estimate = NULL) { check_ci_args(x, level, type, point_estimate) - if(type == "percentile") { + if (type == "percentile") { ci_vec <- stats::quantile(x[["stat"]], probs = c((1 - level) / 2, level + (1 - level) / 2)) @@ -57,29 +57,29 @@ conf_int <- function(x, level = 0.95, type = "percentile", return(ci) } -check_ci_args <- function(x, level, type, point_estimate){ +check_ci_args <- function(x, level, type, point_estimate) { - if(!is.null(point_estimate)){ - if(!is.data.frame(point_estimate)) + if (!is.null(point_estimate)) { + if (!is.data.frame(point_estimate)) check_type(point_estimate, is.numeric) else check_type(point_estimate, is.data.frame) } check_type(x, is.data.frame) check_type(level, is.numeric) - if(level <= 0 || level >= 1){ + if (level <= 0 || level >= 1) { stop_glue("The value of `level` must be between 0 and 1 non-inclusive.") } - if(!(type %in% c("percentile", "se"))){ + if (!(type %in% c("percentile", "se"))) { stop_glue('The options for `type` are "percentile" or "se".') } - if(type == "se" && is.null(point_estimate)) + if (type == "se" && is.null(point_estimate)) stop_glue('A numeric value needs to be given for `point_estimate` ', 'for `type = "se"') - if(type == "se" && is.vector(point_estimate)) + if (type == "se" && is.vector(point_estimate)) check_type(point_estimate, is.numeric) } diff --git a/R/generate.R b/R/generate.R index 029fa99f..2c86a47d 100755 --- a/R/generate.R +++ b/R/generate.R @@ -1,16 +1,16 @@ #' Generate resamples, permutations, or simulations -#' +#' #' Generation is done based on [specify()] and (if needed) [hypothesize()] #' inputs. -#' +#' #' @param x A data frame that can be coerced into a [tibble][tibble::tibble]. #' @param reps The number of resamples to generate. #' @param type Currently either `bootstrap`, `permute`, or `simulate`. #' @param ... Currently ignored. -#' +#' #' @return A tibble containing `rep` generated datasets, indicated by the #' `replicate` column. -#' +#' #' @examples #' # Permutation test for two binary variables #' mtcars %>% @@ -18,19 +18,19 @@ #' specify(am ~ vs, success = "1") %>% #' hypothesize(null = "independence") %>% #' generate(reps = 100, type = "permute") -#' +#' #' @importFrom dplyr group_by #' @export generate <- function(x, reps = 1, type = attr(x, "type"), ...) { auto_type <- attr(x, "type") - if(!is.null(auto_type)){ + if (!is.null(auto_type)) { if (is.null(type)) { stop_glue("Supply not `NULL` value of `type`.") } - if(auto_type != type) + if (auto_type != type) stop_glue( "You have specified `type = \"{type}\"`, but `type` is expected to be ", "`\"{auto_type}\"`. Please try again with appropriate `type` value." @@ -76,10 +76,10 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { bootstrap <- function(x, reps = 1, ...) { # Check if hypothesis test chosen - if(!is.null(attr(x, "null"))){ + if (!is.null(attr(x, "null"))) { # If so, shift the variable chosen to have a mean corresponding # to that specified in `hypothesize` - if(attr(attr(x, "params"), "names") == "mu"){ + if (attr(attr(x, "params"), "names") == "mu") { col <- as.character(attr(x, "response")) # if(attr(x, "theory_type") != "One sample t"){ @@ -91,14 +91,14 @@ bootstrap <- function(x, reps = 1, ...) { # Determining whether or not to implement this t transformation ##### # else { -# std_error <- stats::sd(x[[col]], na.rm = TRUE) / +# std_error <- stats::sd(x[[col]], na.rm = TRUE) / # sqrt(length(x[[col]])) # x[[col]] <- ( x[[col]] - mean(x[[col]], na.rm = TRUE) ) / std_error # } } # Similarly for median - else if(attr(attr(x, "params"), "names") == "med"){ + else if (attr(attr(x, "params"), "names") == "med") { col <- as.character(attr(x, "response")) x[[col]] <- x[[col]] - stats::median(x[[col]], na.rm = TRUE) + attr(x, "params") @@ -144,7 +144,7 @@ permute_once <- function(x, ...) { dots <- list(...) if (attr(x, "null") == "independence") { - y <- pull(x, !! attr(x, "response")) + y <- pull(x, !!attr(x, "response")) y_prime <- sample(y, size = length(y), replace = FALSE) x[as.character(attr(x, "response"))] <- y_prime @@ -157,7 +157,7 @@ permute_once <- function(x, ...) { #' @importFrom tibble tibble #' @importFrom rlang := simulate <- function(x, reps = 1, ...) { - fct_levels <- as.character(unique(dplyr::pull(x, !! attr(x, "response")))) + fct_levels <- as.character(unique(dplyr::pull(x, !!attr(x, "response")))) col_simmed <- unlist(replicate(reps, sample(fct_levels, size = nrow(x), @@ -166,7 +166,7 @@ simulate <- function(x, reps = 1, ...) { simplify = FALSE)) rep_tbl <- tibble::tibble( - !! attr(x, "response") := as.factor(col_simmed), + !!attr(x, "response") := as.factor(col_simmed), replicate = as.factor(rep(1:reps, rep(nrow(x), reps))) ) diff --git a/R/hypothesize.R b/R/hypothesize.R index 41baf587..62f8046f 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -1,13 +1,13 @@ #' Declare a null hypothesis -#' +#' #' @param x A data frame that can be coerced into a [tibble][tibble::tibble]. #' @param null The null hypothesis. Options include `"independence"` and #' `"point"`. #' @param ... Arguments passed to downstream functions. -#' +#' #' @return A tibble containing the response (and explanatory, if specified) #' variable data with parameter information stored as well. -#' +#' #' @examples #' # Permutation test similar to ANOVA #' mtcars %>% @@ -16,7 +16,7 @@ #' hypothesize(null = "independence") %>% #' generate(reps = 100, type = "permute") %>% #' calculate(stat = "F") -#' +#' #' @export hypothesize <- function(x, null, ...) { @@ -26,21 +26,21 @@ hypothesize <- function(x, null, ...) { dots <- list(...) - if( (null == "point") && (length(dots) == 0) ){ + if ((null == "point") && (length(dots) == 0)) { stop_glue("Provide a parameter and a value to check such as `mu = 30` ", "for the point hypothesis.") } - if((null == "independence") && (length(dots) > 0)) { + if ((null == "independence") && (length(dots) > 0)) { warning_glue("Parameter values are not specified when testing that two ", "variables are independent.") } - if((length(dots) > 0) && (null == "point")) { + if ((length(dots) > 0) && (null == "point")) { params <- parse_params(dots, x) attr(x, "params") <- params - if(any(grepl("p.", attr(attr(x, "params"), "names")))){ + if (any(grepl("p.", attr(attr(x, "params"), "names")))) { # simulate instead of bootstrap based on the value of `p` provided attr(x, "type") <- "simulate" } else { @@ -49,13 +49,13 @@ hypothesize <- function(x, null, ...) { } - if(!is.null(null) && null == "independence") + if (!is.null(null) && null == "independence") attr(x, "type") <- "permute" # Check one proportion test set up correctly - if(null == "point"){ - if(is.factor(response_variable(x))){ - if(!any(grepl("p", attr(attr(x, "params"), "names")))) + if (null == "point") { + if (is.factor(response_variable(x))) { + if (!any(grepl("p", attr(attr(x, "params"), "names")))) stop_glue('Testing one categorical variable requires `p` ', 'to be used as a parameter.') } diff --git a/R/infer.R b/R/infer.R index af1163fa..679908d8 100755 --- a/R/infer.R +++ b/R/infer.R @@ -1,5 +1,5 @@ #' infer: a grammar for statistical inference -#' +#' #' The objective of this package is to perform statistical inference using a #' grammar that illustrates the underlying concepts and a format that coheres #' with the tidyverse. @@ -7,14 +7,14 @@ #' @examples #' # Example usage: #' library(infer) -#' +#' #' @docType package #' @name infer NULL ## quiets concerns of R CMD check re: the .'s that appear in pipelines ## From Jenny Bryan's googlesheets package -if(getRversion() >= "2.15.1") +if (getRversion() >= "2.15.1") utils::globalVariables(c("prop", "stat", "xbar", "xtilde", "x", "..density..", "statistic", ".", "parameter", "p.value", "xmin", "xmax", "density", "denom", diff --git a/R/p_value.R b/R/p_value.R index 785471a5..8cceab14 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -1,8 +1,8 @@ #' Compute p-value -#' +#' #' Only simulation-based methods are (currently only) supported. `get_pvalue()` #' is an alias of `p_value`. -#' +#' #' @param x Data frame of calculated statistics or containing attributes of #' theoretical distribution values. #' @param obs_stat A numeric value or a 1x1 data frame (as extreme or more @@ -11,7 +11,7 @@ #' `"two_sided"`. Can also specify `"left"`, `"right"`, or `"both"`. #' #' @return A 1x1 data frame with value between 0 and 1. -#' +#' #' @examples #' mtcars_df <- mtcars %>% #' dplyr::mutate(am = factor(am)) @@ -19,19 +19,19 @@ #' specify(mpg ~ am) %>% #' calculate(stat = "diff in means", order = c("1", "0")) #' null_distn <- mtcars_df %>% -#' specify(mpg ~ am) %>% +#' specify(mpg ~ am) %>% #' hypothesize(null = "independence") %>% #' generate(reps = 100) %>% #' calculate(stat = "diff in means", order = c("1", "0")) -#' null_distn %>% +#' null_distn %>% #' p_value(obs_stat = d_hat, direction = "right") -#' +#' #' @name get_pvalue NULL #' @rdname get_pvalue #' @export -p_value <- function(x, obs_stat, direction){ +p_value <- function(x, obs_stat, direction) { check_type(x, is.data.frame) obs_stat <- check_obs_stat(obs_stat) @@ -40,7 +40,7 @@ p_value <- function(x, obs_stat, direction){ is_simulation_based <- !is.null(attr(x, "generate")) && attr(x, "generate") - if(is_simulation_based) + if (is_simulation_based) pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat, direction = direction) @@ -52,35 +52,35 @@ p_value <- function(x, obs_stat, direction){ # if(!("stat" %in% names(x))){ # # Theoretical distribution - # which_distribution(x, + # which_distribution(x, # theory_type <- attr(x, "theory_type"), # obs_stat = obs_stat, - # direction = direction) + # direction = direction) # } return(pvalue) } -simulation_based_p_value <- function(x, obs_stat, direction){ +simulation_based_p_value <- function(x, obs_stat, direction) { - if(direction %in% c("less", "left")){ + if (direction %in% c("less", "left")) { p_value <- x %>% dplyr::summarize(p_value = mean(stat <= obs_stat)) } - else if(direction %in% c("greater", "right")){ + else if (direction %in% c("greater", "right")) { p_value <- x %>% dplyr::summarize(p_value = mean(stat >= obs_stat)) } - else{ + else { p_value <- x %>% two_sided_p_value(obs_stat = obs_stat) } p_value } -two_sided_p_value <- function(x, obs_stat){ +two_sided_p_value <- function(x, obs_stat) { - if(stats::median(x$stat) >= obs_stat){ + if (stats::median(x$stat) >= obs_stat) { basic_p_value <- get_percentile(x$stat, obs_stat) + (1 - get_percentile(x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat)) @@ -90,7 +90,7 @@ two_sided_p_value <- function(x, obs_stat){ stats::median(x$stat) - obs_stat)) } - if(basic_p_value >= 1) + if (basic_p_value >= 1) # Catch all if adding both sides produces a number # larger than 1. Should update with test in that # scenario instead of using >= @@ -104,26 +104,26 @@ two_sided_p_value <- function(x, obs_stat){ get_pvalue <- p_value # which_distribution <- function(x, theory_type, obs_stat, direction){ -# +# # param <- attr(x, "distr_param") # if(!is.null(attr(x, "distr_param2"))) # param2 <- attr(x, "distr_param2") -# +# # if(theory_type == "Two sample t") # return(pt(q = obs_stat, # df = param, # lower.tail = set_lower_tail(direction)) # ) -# +# # } -#theory_t_pvalue <- +# theory_t_pvalue <- # set_lower_tail <- function(direction){ # if(direction %in% c("greater", "right")) # lower_tail <- FALSE # else # lower_tail <- TRUE -# +# # lower_tail -# } +# } diff --git a/R/print_methods.R b/R/print_methods.R index 9433f5d2..a1cf028c 100644 --- a/R/print_methods.R +++ b/R/print_methods.R @@ -3,7 +3,7 @@ #' @param x An object of class `infer`, i.e. output from [specify()] or #' [hypothesize()]. #' @param ... Arguments passed to methods. -#' +#' #' @export print.infer <- function(x, ...) { attrs <- names(attributes(x)) diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R index 20b80a1e..b9079387 100644 --- a/R/rep_sample_n.R +++ b/R/rep_sample_n.R @@ -9,10 +9,10 @@ #' @param reps Number of samples of size n = `size` to take. #' @param prob A vector of probability weights for obtaining the elements of the #' vector being sampled. -#' +#' #' @return A tibble of size `rep` times `size` rows corresponding to `rep` #' samples of size n = `size` from `tbl`. -#' +#' #' @examples #' suppressPackageStartupMessages(library(dplyr)) #' suppressPackageStartupMessages(library(ggplot2)) @@ -31,13 +31,13 @@ #' group_by(replicate) %>% #' summarize(prop_hurricane = mean(status == "hurricane")) #' p_hats -#' +#' #' # Plot sampling distribution #' ggplot(p_hats, aes(x = prop_hurricane)) + #' geom_density() + #' labs(x = "p_hat", y = "Number of samples", #' title = "Sampling distribution of p_hat from 1000 samples of size 50") -#' +#' #' @importFrom dplyr pull #' @importFrom dplyr inner_join #' @importFrom dplyr group_by @@ -49,7 +49,7 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) { check_type(size, is.numeric) check_type(replace, is.logical) check_type(reps, is.numeric) - if(!is.null(prob)) + if (!is.null(prob)) check_type(prob, is.numeric) # assign non-uniform probabilities diff --git a/R/set_params.R b/R/set_params.R index 13d9fc32..77717f74 100755 --- a/R/set_params.R +++ b/R/set_params.R @@ -1,23 +1,23 @@ #' To determine which theoretical distribution to fit (if any) -#' +#' #' @param x A data frame that can be coerced into a [tibble][tibble::tibble]. -#' +#' #' @noRd -set_params <- function(x){ +set_params <- function(x) { attr(x, "theory_type") <- NULL - if(!is.null(attr(x, "response"))){ + if (!is.null(attr(x, "response"))) { num_response_levels <- length(levels(response_variable(x))) } # One variable if (!is.null(attr(x, "response")) && is.null(attr(x, "explanatory")) && !is.null(attr(x, "response_type")) && - is.null(attr(x, "explanatory_type"))){ + is.null(attr(x, "explanatory_type"))) { # One mean - if(attr(x, "response_type") %in% c("integer", "numeric")){ + if (attr(x, "response_type") %in% c("integer", "numeric")) { attr(x, "theory_type") <- "One sample t" attr(x, "distr_param") <- x %>% dplyr::summarize(df = stats::t.test( @@ -28,7 +28,7 @@ set_params <- function(x){ } # One prop - else if(attr(x, "response_type") == "factor" && (num_response_levels == 2)){ + else if (attr(x, "response_type") == "factor" && (num_response_levels == 2)) { # No parameters since standard normal attr(x, "theory_type") <- "One sample prop z" @@ -45,18 +45,18 @@ set_params <- function(x){ # Two variables if (!is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & !is.null(attr(x, "response_type")) && - !is.null(attr(x, "explanatory_type"))){ + !is.null(attr(x, "explanatory_type"))) { attr(x, "type") <- "bootstrap" # Response is numeric, explanatory is categorical - if(attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") == "factor"){ + if (attr(x, "response_type") %in% c("integer", "numeric") & + attr(x, "explanatory_type") == "factor") { # Two sample means (t distribution) - if(length(levels(explanatory_variable(x))) == 2) { + if (length(levels(explanatory_variable(x))) == 2) { attr(x, "theory_type") <- "Two sample t" - # Keep track of Satterthwaite degrees of freedom since lost when + # Keep track of Satterthwaite degrees of freedom since lost when # in aggregation w/ calculate()/generate() attr(x, "distr_param") <- x %>% dplyr::summarize(df = stats::t.test( @@ -69,31 +69,31 @@ set_params <- function(x){ # Get numerator and denominator degrees of freedom attr(x, "distr_param") <- x %>% dplyr::summarize(df1 = stats::anova(stats::aov( - !! attr(x, "response") ~ !! attr(x, "explanatory")))$Df[1] + !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[1] ) %>% dplyr::pull() attr(x, "distr_param2") <- x %>% dplyr::summarize(df2 = stats::anova(stats::aov( - !! attr(x, "response") ~ !! attr(x, "explanatory")))$Df[2] + !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[2] ) %>% dplyr::pull() } } # Response is categorical, explanatory is categorical - if(attr(x, "response_type") == "factor" & - attr(x, "explanatory_type") == "factor"){ + if (attr(x, "response_type") == "factor" & + attr(x, "explanatory_type") == "factor") { attr(x, "type") <- "bootstrap" - # Two sample proportions (z distribution) + # Two sample proportions (z distribution) # Parameter(s) not needed since standard normal - if(length(levels(response_variable(x))) == 2 & - length(levels(explanatory_variable(x))) == 2){ + if (length(levels(response_variable(x))) == 2 & + length(levels(explanatory_variable(x))) == 2) { attr(x, "theory_type") <- "Two sample props z" } # >2 sample proportions (chi-square test of indep) - else{ + else { attr(x, "theory_type") <- "Chi-square test of indep" attr(x, "distr_param") <- x %>% dplyr::summarize(df = suppressWarnings(stats::chisq.test( @@ -104,8 +104,8 @@ set_params <- function(x){ } # Response is numeric, explanatory is numeric - if(attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") %in% c("integer", "numeric")){ + if (attr(x, "response_type") %in% c("integer", "numeric") & + attr(x, "explanatory_type") %in% c("integer", "numeric")) { response_string <- as.character(attr(x, "response")) explanatory_string <- as.character(attr(x, "explanatory")) attr(x, "theory_type") <- "Slope/correlation with t" diff --git a/R/specify.R b/R/specify.R index f3779839..c3a3ab6b 100755 --- a/R/specify.R +++ b/R/specify.R @@ -1,7 +1,7 @@ #' Specify the response and explanatory variables -#' +#' #' `specify()` also converts character variables chosen to be `factor`s. -#' +#' #' @param x A data frame that can be coerced into a [tibble][tibble::tibble]. #' @param formula A formula with the response variable on the left and the #' explanatory on the right. @@ -12,10 +12,10 @@ #' @param success The level of `response` that will be considered a success, as #' a string. Needed for inference on one proportion, a difference in #' proportions, and corresponding z stats. -#' +#' #' @return A tibble containing the response (and explanatory, if specified) #' variable data. -#' +#' #' @examples #' # Permutation test similar to ANOVA #' mtcars %>% @@ -24,7 +24,7 @@ #' hypothesize(null = "independence") %>% #' generate(reps = 100, type = "permute") %>% #' calculate(stat = "F") -#' +#' #' @importFrom rlang f_lhs #' @importFrom rlang f_rhs #' @importFrom dplyr mutate_if select one_of @@ -39,7 +39,7 @@ specify <- function(x, formula, response = NULL, mutate_if(is.character, as.factor) %>% mutate_if(is.logical, as.factor) - if ((!methods::hasArg(formula) && !methods::hasArg(response))){ + if ((!methods::hasArg(formula) && !methods::hasArg(response))) { stop_glue("Please give the `response` variable.") } if (methods::hasArg(formula)) { @@ -68,18 +68,18 @@ specify <- function(x, formula, response = NULL, response_col <- rlang::eval_tidy(attr(x, "response"), x) # if there's an explanatory var - if(has_explanatory(x)) { - if(!as.character(attr(x, "explanatory")) %in% names(x)) { + if (has_explanatory(x)) { + if (!as.character(attr(x, "explanatory")) %in% names(x)) { stop_glue('The explanatory variable `{attr(x, "explanatory")}` ', 'cannot be found in this dataframe.') } - if(identical(as.character(attr(x, "response")), + if (identical(as.character(attr(x, "response")), as.character(attr(x, "explanatory")))) { stop_glue("The response and explanatory variables must be different ", "from one another.") } explanatory_col <- rlang::eval_tidy(attr(x, "explanatory"), x) - if(is.character(explanatory_col)) { + if (is.character(explanatory_col)) { explanatory_col <- as.factor(explanatory_col) } } @@ -119,16 +119,16 @@ specify <- function(x, formula, response = NULL, # To help determine theoretical distribution to plot attr(x, "response_type") <- class(response_variable(x)) - if(is.null(attr(x, "explanatory"))) + if (is.null(attr(x, "explanatory"))) attr(x, "explanatory_type") <- NULL else attr(x, "explanatory_type") <- class(explanatory_variable(x)) - if(attr(x, "response_type") == "factor" && is.null(success) && + if (attr(x, "response_type") == "factor" && is.null(success) && length(levels(response_variable(x))) == 2 && (is.null(attr(x, "explanatory_type")) || (!is.null(attr(x, "explanatory_type")) && - length(levels(explanatory_variable(x))) == 2)) ) + length(levels(explanatory_variable(x))) == 2))) stop_glue( 'A level of the response variable `{attr(x, "response")}` ', 'needs to be specified for the `success` argument in `specify()`.' diff --git a/R/utils.R b/R/utils.R index 7a740919..feb18799 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,6 +1,6 @@ format_params <- function(x) { par_levels <- get_par_levels(x) - fct_levels <- as.character(unique(dplyr::pull(x, !! attr(x, "response")))) + fct_levels <- as.character(unique(dplyr::pull(x, !!attr(x, "response")))) return(attr(x, "params")[match(fct_levels, par_levels)]) } @@ -9,7 +9,7 @@ get_par_levels <- function(x) { return(gsub("^.\\.", "", par_names)) } -set_attributes <- function(to, from = x){ +set_attributes <- function(to, from = x) { attr(to, "response") <- attr(from, "response") attr(to, "success") <- attr(from, "success") attr(to, "explanatory") <- attr(from, "explanatory") @@ -34,18 +34,18 @@ response_variable <- function(x) { x[[as.character(attr(x, "response"))]] } -reorder_explanatory <- function(x, order){ +reorder_explanatory <- function(x, order) { x[[as.character(attr(x, "explanatory"))]] <- factor(x[[as.character(attr(x, "explanatory"))]], levels = c(order[1], order[2])) x } -has_explanatory <- function(x){ +has_explanatory <- function(x) { !is.null(attr(x, "explanatory")) } -has_response <- function(x){ +has_response <- function(x) { !is.null(attr(x, "response")) } @@ -89,39 +89,39 @@ null_transformer <- function(text, envir) { out } -check_order <- function(x, explanatory_variable, order){ +check_order <- function(x, explanatory_variable, order) { unique_explanatory_variable <- unique(explanatory_variable) - if (length(unique_explanatory_variable) != 2){ + if (length(unique_explanatory_variable) != 2) { stop_glue("Statistic is based on a difference; the explanatory variable ", "should have two levels.") } - if(is.null(order)){ + if (is.null(order)) { stop_glue("Statistic is based on a difference; specify the `order` in ", "which to subtract the levels of the explanatory variable. ", '`order = c("first", "second")` means `("first" - "second")`. ', "Check `?calculate` for details.") } else { - if(xor(is.na(order[1]), is.na(order[2]))) + if (xor(is.na(order[1]), is.na(order[2]))) stop_glue( "Only one level specified in `order`. Both levels need to be specified." ) - if(length(order) > 2) + if (length(order) > 2) stop_glue("`order` is expecting only two entries.") - if(order[1] %in% unique_explanatory_variable == FALSE) + if (order[1] %in% unique_explanatory_variable == FALSE) stop_glue("{order[1]} is not a level of the explanatory variable.") - if(order[2] %in% unique_explanatory_variable == FALSE) + if (order[2] %in% unique_explanatory_variable == FALSE) stop_glue("{order[2]} is not a level of the explanatory variable.") } } check_args_and_attr <- function(x, explanatory_variable, response_variable, - stat){ + stat) { # Could also do `stat <- match.arg(stat)` # but that's not as helpful to beginners with the cryptic error msg if (!stat %in% c("mean", "median", "sd", "prop", "diff in means", "diff in medians", "diff in props", - "Chisq", "F", "slope", "correlation", "t", "z")){ + "Chisq", "F", "slope", "correlation", "t", "z")) { stop_glue("You specified a string for `stat` that is not implemented. ", "Check your spelling and `?calculate` for current options.") } @@ -130,8 +130,8 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, warning_glue('A `generate()` step was not performed prior to ', '`calculate()`. Review carefully.') - if (stat %in% c("F", "slope", "diff in means", "diff in medians")){ - if (has_explanatory(x) && !is.numeric(response_variable(x))){ + if (stat %in% c("F", "slope", "diff in means", "diff in medians")) { + if (has_explanatory(x) && !is.numeric(response_variable(x))) { stop_glue( 'The response variable of `{attr(x, "response")}` is not appropriate\n', "since '{stat}' is expecting the response variable to be numeric." @@ -139,8 +139,8 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, } } - if (stat %in% c("diff in props", "Chisq")){ - if (has_explanatory(x) && !is.factor(response_variable(x))){ + if (stat %in% c("diff in props", "Chisq")) { + if (has_explanatory(x) && !is.factor(response_variable(x))) { stop_glue( 'The response variable of `{attr(x, "response")}` is not appropriate\n', "since '{stat}' is expecting the response variable to be a factor." @@ -149,11 +149,11 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, } } -check_for_numeric_stat <- function(x, stat){ - if (stat %in% c("mean", "median", "sd")){ +check_for_numeric_stat <- function(x, stat) { + if (stat %in% c("mean", "median", "sd")) { col <- base::setdiff(names(x), "replicate") - if (!is.numeric(x[[as.character(col)]])){ + if (!is.numeric(x[[as.character(col)]])) { stop_glue( "Calculating a {stat} here is not appropriate\n", "since the `{col}` variable is not numeric." @@ -162,10 +162,10 @@ check_for_numeric_stat <- function(x, stat){ } } -check_for_factor_stat <- function(x, stat, explanatory_variable){ +check_for_factor_stat <- function(x, stat, explanatory_variable) { - if (stat %in% c("diff in means", "diff in medians", "diff in props", "F")){ - if (!is.factor(explanatory_variable)){ + if (stat %in% c("diff in means", "diff in medians", "diff in props", "F")) { + if (!is.factor(explanatory_variable)) { stop_glue( 'The explanatory variable of `{attr(x, "explanatory")}` is not ', "appropriate\n", @@ -175,24 +175,24 @@ check_for_factor_stat <- function(x, stat, explanatory_variable){ } } -check_point_params <- function(x, stat){ +check_point_params <- function(x, stat) { param_names <- attr(attr(x, "params"), "names") hyp_text <- 'to be set in `hypothesize()`.' - if(!is.null(attr(x, "null"))){ - if(stat %in% c("mean", "median", "sd", "prop")){ - if( (stat == "mean" && !("mu" %in% param_names)) ) + if (!is.null(attr(x, "null"))) { + if (stat %in% c("mean", "median", "sd", "prop")) { + if ((stat == "mean" && !("mu" %in% param_names))) stop_glue('`stat == "mean"` requires `"mu"` {hyp_text}') - if ( (!(stat == "mean") && ("mu" %in% param_names)) ) + if ((!(stat == "mean") && ("mu" %in% param_names))) stop_glue('`"mu"` does not correspond to `stat = "{stat}"`.') - if( (stat == "median" && !("med" %in% param_names) ) ) + if ((stat == "median" && !("med" %in% param_names))) stop_glue('`stat == "median"` requires `"med"` {hyp_text}') - if ( (!(stat == "median") && ("med" %in% param_names)) ) + if ((!(stat == "median") && ("med" %in% param_names))) stop_glue('`"med"` does not correspond to `stat = "{stat}"`.') ## Tests unable to get to # if( (stat == "sigma" && !("sd" %in% param_names)) ) # stop_glue('`stat == "sd"` requires `"sigma"` {hyp_text}') - if ( (!(stat == "sd") && ("sigma" %in% param_names)) ) + if ((!(stat == "sd") && ("sigma" %in% param_names))) stop_glue('`"sigma"` does not correspond to `stat = "{stat}"`.') ## Tests unable to get to @@ -209,8 +209,8 @@ parse_params <- function(dots, x) { sig_ind <- grep("sigma", names(dots)) # error: cannot specify more than one of props, means, medians, or sds - if ( length(p_ind) + length(mu_ind) + length(med_ind) - + length(sig_ind) != 1 ){ + if (length(p_ind) + length(mu_ind) + length(med_ind) + + length(sig_ind) != 1) { stop_glue( 'Parameter values can be only one of `p`, `mu`, `med`, or `sigma`.' ) @@ -226,7 +226,7 @@ parse_params <- function(dots, x) { stop_glue("A point null regarding a proportion requires ", "that `success` be indicated in `specify()`.") } - if(dots$p < 0 || dots$p > 1) + if (dots$p < 0 || dots$p > 1) stop_glue( "The value suggested for `p` is not between 0 and 1, inclusive." ) @@ -235,7 +235,7 @@ parse_params <- function(dots, x) { dots$p <- append(dots$p, 1 - dots$p) names(dots$p) <- c(attr(x, "success"), missing_lev) } else { - if(sum(dots$p) != 1){ + if (sum(dots$p) != 1) { stop_glue("Make sure the hypothesized values for the `p` parameters ", "sum to 1. Please try again.") } @@ -250,7 +250,7 @@ parse_params <- function(dots, x) { return(unlist(dots)) } -hypothesize_checks <- function(x, null){ +hypothesize_checks <- function(x, null) { # error: x is not a dataframe if (!sum(class(x) %in% c("data.frame", "tbl", "tbl_df", "grouped_df"))) { stop_glue("x must be a data.frame or tibble") @@ -268,13 +268,13 @@ hypothesize_checks <- function(x, null){ # 'for `null` argument.') # } - if(!has_response(x)){ + if (!has_response(x)) { stop_glue( "The response variable is not set. Make sure to `specify()` it first." ) } - if(null == "independence" && !has_explanatory(x)){ + if (null == "independence" && !has_explanatory(x)) { stop_glue('Please `specify()` an explanatory and a response variable ', 'when testing\n', 'a null hypothesis of `"independence"`.') @@ -282,22 +282,22 @@ hypothesize_checks <- function(x, null){ } check_direction <- function(direction = c("less", "greater", "two_sided", - "left", "right", "both")){ + "left", "right", "both")) { check_type(direction, is.character) - if(!(direction %in% c("less", "greater", "two_sided", - "left", "right", "both"))){ + if (!(direction %in% c("less", "greater", "two_sided", + "left", "right", "both"))) { stop_glue('The provided value for `direction` is not appropriate. ', 'Possible values are "less", "greater", "two_sided", ', '"left", "right", or "both".') } } -check_obs_stat <- function(obs_stat){ - if(!is.null(obs_stat)){ - if("data.frame" %in% class(obs_stat)){ +check_obs_stat <- function(obs_stat) { + if (!is.null(obs_stat)) { + if ("data.frame" %in% class(obs_stat)) { check_type(obs_stat, is.data.frame) - if( (nrow(obs_stat) != 1) || (ncol(obs_stat) != 1) ) + if ((nrow(obs_stat) != 1) || (ncol(obs_stat) != 1)) warning_glue("The first row and first column value of the given ", "`obs_stat` will be used.") @@ -305,7 +305,7 @@ check_obs_stat <- function(obs_stat){ obs_stat <- obs_stat[[1]][[1]] check_type(obs_stat, is.numeric) } - else{ + else { check_type(obs_stat, is.numeric) } } @@ -314,9 +314,9 @@ check_obs_stat <- function(obs_stat){ } #' Check object type -#' +#' #' Throw an error in case object is not of desired type. -#' +#' #' @param x An object to check. #' @param predicate A function to perform check. A good idea is to use function #' named `is.*()` or `is_*()` with possible `::` prefix. @@ -324,7 +324,7 @@ check_obs_stat <- function(obs_stat){ #' original name of supplied `predicate`: all alphanumeric with '_' and '.' #' characters (until the name end) after the first appearance of either `is.` #' or `is_`. In case of a doubt supply `type` explicitly. -#' +#' #' @examples #' \dontrun{ #' x <- 1 @@ -332,7 +332,7 @@ check_obs_stat <- function(obs_stat){ #' check_type(x, is.logical) #' check_type(x, rlang::is_string, "character of length 1") #' } -#' +#' #' @keywords internal #' @noRd check_type <- function(x, predicate, type = NULL) { diff --git a/R/visualize.R b/R/visualize.R index 7129b050..6cd293c7 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -1,8 +1,8 @@ #' Visualize statistical inference -#' +#' #' Visualize the distribution of the simulation-based inferential statistics or #' the theoretical distribution (or both!). -#' +#' #' @param data The output from [calculate()]. #' @param bins The number of bins in the histogram. #' @param method A string giving the method to display. Options are @@ -29,12 +29,12 @@ #' @param ci_fill A character or hex string specifying the color to shade the #' confidence interval. #' @param ... Other arguments passed along to ggplot2. -#' +#' #' @return A ggplot object showing the simulation-based distribution as a #' histogram or bar graph. Also used to show the theoretical curves. -#' +#' #' @examples -#' # Permutations to create a simulation-based null distribution for +#' # Permutations to create a simulation-based null distribution for #' # one numerical response and one categorical predictor #' # using t statistic #' mtcars %>% @@ -44,8 +44,8 @@ #' generate(reps = 100, type = "permute") %>% #' calculate(stat = "t", order = c("1", "0")) %>% #' visualize(method = "simulation") #default method -#' -#' # Theoretical t distribution for +#' +#' # Theoretical t distribution for #' # one numerical response and one categorical predictor #' # using t statistic #' mtcars %>% @@ -55,7 +55,7 @@ #' # generate() is not needed since we are not doing simulation #' calculate(stat = "t", order = c("1", "0")) %>% #' visualize(method = "theoretical") -#' +#' #' # Overlay theoretical distribution on top of randomized t-statistics #' mtcars %>% #' dplyr::mutate(am = factor(am)) %>% @@ -64,7 +64,7 @@ #' generate(reps = 100, type = "permute") %>% #' calculate(stat = "t", order = c("1", "0")) %>% #' visualize(method = "both") -#' +#' #' @importFrom ggplot2 ggplot geom_histogram aes stat_function ggtitle #' @importFrom ggplot2 xlab ylab geom_vline geom_rect geom_bar #' @importFrom stats dt qt df qf dnorm qnorm dchisq qchisq @@ -85,32 +85,32 @@ visualize <- function(data, bins = 15, method = "simulation", check_type(dens_color, is.character) check_type(obs_stat_color, is.character) check_type(pvalue_fill, is.character) - if(!is.null(direction)) + if (!is.null(direction)) check_type(direction, is.character) - if(is.data.frame(endpoints) && - ( (nrow(endpoints) != 1) || (ncol(endpoints) != 2) ) ){ + if (is.data.frame(endpoints) && + ((nrow(endpoints) != 1) || (ncol(endpoints) != 2))) { stop_glue( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." ) } - if(is.vector(endpoints) && ( length(endpoints) != 2) ) { + if (is.vector(endpoints) && (length(endpoints) != 2)) { warning_glue( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. ", "Using the first two entries as the `endpoints`." ) endpoints <- endpoints[1:2] } - if(is.data.frame(endpoints)) + if (is.data.frame(endpoints)) endpoints <- unlist(endpoints) obs_stat <- check_obs_stat(obs_stat) - if(!is.null(direction) && + if (!is.null(direction) && (is.null(obs_stat) + is.null(endpoints)) != 1) stop_glue( "Shading requires either `endpoints` values for a confidence interval ", "or the observed statistic `obs_stat` to be provided." ) - if(method == "simulation"){ + if (method == "simulation") { infer_plot <- visualize_simulation(data = data, bins = bins, dens_color = dens_color, @@ -122,7 +122,7 @@ visualize <- function(data, bins = 15, method = "simulation", ci_fill = ci_fill, ...) - } else if(method == "theoretical"){ + } else if (method == "theoretical") { infer_plot <- visualize_theoretical(data = data, dens_color = dens_color, @@ -135,13 +135,13 @@ visualize <- function(data, bins = 15, method = "simulation", ...) - } else if(method == "both"){ + } else if (method == "both") { - if(!("stat" %in% names(data))) + if (!("stat" %in% names(data))) stop_glue('`generate()` and `calculate()` are both required ', 'to be done prior to `visualize(method = "both")`') - if(("replicate" %in% names(data)) && + if (("replicate" %in% names(data)) && length(unique(data$replicate)) < 100) warning_glue( "With only {length(unique(data$stat))} replicates, it may be ", @@ -163,13 +163,13 @@ visualize <- function(data, bins = 15, method = "simulation", '`"simulation"` is the default.') } - if(!is.null(obs_stat)){#&& !is.null(direction) + if (!is.null(obs_stat)) { # && !is.null(direction) infer_plot <- infer_plot + geom_vline(xintercept = obs_stat, size = 2, color = obs_stat_color, ...) } - if(!is.null(endpoints)){ - if(!is.null(obs_stat)) + if (!is.null(endpoints)) { + if (!is.null(obs_stat)) warning_glue("Values for both `endpoints` and `obs_stat` were given ", "when only one should be set. Ignoring `obs_stat` values.") infer_plot <- infer_plot + @@ -183,7 +183,7 @@ visualize <- function(data, bins = 15, method = "simulation", theory_t_plot <- function(deg_freedom, statistic_text = "t", - dens_color = dens_color, ...){ + dens_color = dens_color, ...) { ggplot(data.frame(x = c(qt(0.001, deg_freedom), qt(0.999, deg_freedom)))) + stat_function(mapping = aes(x), fun = dt, args = list(df = deg_freedom), @@ -200,7 +200,7 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", bins, pvalue_fill, endpoints, - ci_fill, ...){ + ci_fill, ...) { infer_t_plot <- shade_density_check(data = data, obs_stat = obs_stat, @@ -222,7 +222,7 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", theory_anova_plot <- function(deg_freedom_top, deg_freedom_bottom, statistic_text = "F", - dens_color = dens_color, ...){ + dens_color = dens_color, ...) { ggplot(data.frame(x = c(qf(0.001, deg_freedom_top, deg_freedom_bottom), qf(0.999, deg_freedom_top, deg_freedom_bottom)))) + stat_function(mapping = aes(x), fun = df, @@ -242,9 +242,9 @@ both_anova_plot <- function(data, deg_freedom_top, endpoints, pvalue_fill, ci_fill, - ...){ + ...) { - if(!is.null(direction) && !(direction %in% c("greater", "right"))) + if (!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue( "F usually corresponds to right-tailed tests. Proceed with caution." ) @@ -268,7 +268,7 @@ both_anova_plot <- function(data, deg_freedom_top, ylab("") } -theory_z_plot <- function(statistic_text = "z", dens_color = dens_color, ...){ +theory_z_plot <- function(statistic_text = "z", dens_color = dens_color, ...) { ggplot(data.frame(x = c(qnorm(0.001), qnorm(0.999)))) + stat_function(mapping = aes(x), fun = dnorm, color = dens_color) + @@ -285,7 +285,7 @@ both_z_plot <- function(data, statistic_text = "z", bins, endpoints, ci_fill, - ...){ + ...) { infer_z_plot <- shade_density_check(data = data, obs_stat = obs_stat, @@ -306,7 +306,7 @@ both_z_plot <- function(data, statistic_text = "z", theory_chisq_plot <- function(deg_freedom, statistic_text = "Chi-Square", - dens_color = dens_color, ...){ + dens_color = dens_color, ...) { ggplot(data.frame(x = c(qchisq(0.001, deg_freedom), qchisq(0.999, deg_freedom)))) + stat_function(mapping = aes(x), fun = dchisq, @@ -325,9 +325,9 @@ both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", endpoints, pvalue_fill = pvalue_fill, ci_fill = ci_fill, - ...){ + ...) { - if(!is.null(direction) && !(direction %in% c("greater", "right"))) + if (!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue("Chi-square usually corresponds to right-tailed tests. ", "Proceed with caution.") @@ -359,21 +359,21 @@ shade_density_check <- function(data, endpoints, ci_fill, ...) { - if(is.null(direction) || is.null(obs_stat)){ - if(density){ + if (is.null(direction) || is.null(obs_stat)) { + if (density) { gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + geom_histogram(bins = bins, color = "white", mapping = aes(y = ..density..), ...) - } #else { + } # else { # Not sure if needed? Can't get tests to find it - #gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + + # gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + # geom_histogram(bins = bins, color = "white", ...) - #} + # } } - if(xor(!is.null(obs_stat), !is.null(endpoints))){ - if(!is.null(direction)){ - if(density){ + if (xor(!is.null(obs_stat), !is.null(endpoints))) { + if (!is.null(direction)) { + if (density) { gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + geom_histogram(bins = bins, color = "white", mapping = aes(y = ..density..), ...) @@ -382,21 +382,21 @@ shade_density_check <- function(data, geom_histogram(bins = bins, color = "white", ...) } - if(direction %in% c("less", "left")){ + if (direction %in% c("less", "left")) { gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), ...) } - if(direction %in% c("greater", "right")){ + if (direction %in% c("greater", "right")) { gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), ...) } - if(direction %in% c("two_sided", "both") && - obs_stat >= stats::median(data$stat)){ + if (direction %in% c("two_sided", "both") && + obs_stat >= stats::median(data$stat)) { gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, mapping = aes(xmin = obs_stat, xmax = Inf, ymin = 0, @@ -412,8 +412,8 @@ shade_density_check <- function(data, ) } - if(direction %in% c("two_sided", "both") && - obs_stat < stats::median(data$stat)){ + if (direction %in% c("two_sided", "both") && + obs_stat < stats::median(data$stat)) { gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, mapping = aes(xmin = -Inf, xmax = obs_stat, ymin = 0, @@ -429,7 +429,7 @@ shade_density_check <- function(data, } - if(direction == "between"){ + if (direction == "between") { gg_plot <- gg_plot + geom_rect(fill = ci_fill, alpha = 0.01, aes(xmin = endpoints[1], @@ -450,8 +450,8 @@ visualize_simulation <- function(data, bins, pvalue_fill, endpoints, ci_fill, ...) { - if(is.null(direction)){ - if(length(unique(data$stat)) >= 10) + if (is.null(direction)) { + if (length(unique(data$stat)) >= 10) infer_plot <- ggplot(data = data, mapping = aes(x = stat)) + geom_histogram(bins = bins, color = "white", ...) else @@ -487,23 +487,23 @@ visualize_theoretical <- function(data, "method. {{infer}} currently does not check these for you." ) - if(!is.null(attr(data, "stat")) && + if (!is.null(attr(data, "stat")) && !(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) warning_glue( "Your `calculate`d statistic and the theoretical distribution are on ", "different scales. Displaying only the theoretical distribution." ) - if(attr(data, "theory_type") %in% - c("Two sample t", "Slope with t", "One sample t")){ + if (attr(data, "theory_type") %in% + c("Two sample t", "Slope with t", "One sample t")) { infer_plot <- theory_t_plot(deg_freedom = attr(data, "distr_param"), statistic_text = "t", dens_color = dens_color) } - else if(attr(data, "theory_type") == "ANOVA"){ + else if (attr(data, "theory_type") == "ANOVA") { - if(!is.null(direction) && !(direction %in% c("greater", "right"))) + if (!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue( "F usually corresponds to right-tailed tests. Proceed with caution." ) @@ -515,16 +515,16 @@ visualize_theoretical <- function(data, dens_color = dens_color) } - else if(attr(data, "theory_type") %in% - c("One sample prop z", "Two sample props z")){ + else if (attr(data, "theory_type") %in% + c("One sample prop z", "Two sample props z")) { infer_plot <- theory_z_plot(statistic_text = "z", dens_color = dens_color) } - else if(attr(data, "theory_type") %in% - c("Chi-square test of indep", "Chi-square Goodness of Fit")){ + else if (attr(data, "theory_type") %in% + c("Chi-square test of indep", "Chi-square Goodness of Fit")) { - if(!is.null(direction) && !(direction %in% c("greater", "right"))) + if (!is.null(direction) && !(direction %in% c("greater", "right"))) warning_glue("Chi-square usually corresponds to right-tailed tests. ", "Proceed with caution.") @@ -540,16 +540,16 @@ visualize_theoretical <- function(data, # Move into its own function - if(!is.null(obs_stat)){ - if(!is.null(direction)){ - if(direction %in% c("less", "left")){ + if (!is.null(obs_stat)) { + if (!is.null(direction)) { + if (direction %in% c("less", "left")) { infer_plot <- infer_plot + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), ...) } - if(direction %in% c("greater", "right")){ + if (direction %in% c("greater", "right")) { infer_plot <- infer_plot + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, @@ -558,9 +558,9 @@ visualize_theoretical <- function(data, ...) } - # Assuming two-tailed shading will only happen with theoretical + # Assuming two-tailed shading will only happen with theoretical # distributions centered at 0 - if(direction %in% c("two_sided", "both") && obs_stat >= 0){ + if (direction %in% c("two_sided", "both") && obs_stat >= 0) { infer_plot <- infer_plot + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, @@ -572,7 +572,7 @@ visualize_theoretical <- function(data, ...) } - if(direction %in% c("two_sided", "both") && obs_stat < 0){ + if (direction %in% c("two_sided", "both") && obs_stat < 0) { infer_plot <- infer_plot + geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, alpha = 0.6, @@ -605,11 +605,11 @@ visualize_both <- function(data, bins, "method. `infer` currently does not check these for you." ) - if(!(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) + if (!(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) stop_glue("Your `calculate`d statistic and the theoretical distribution ", "are on different scales. Use a standardized `stat` instead.") - if(attr(data, "theory_type") %in% c("Two sample t", "Slope with t")){ + if (attr(data, "theory_type") %in% c("Two sample t", "Slope with t")) { infer_plot <- both_t_plot(data = data, deg_freedom = attr(data, "distr_param"), @@ -623,7 +623,7 @@ visualize_both <- function(data, bins, ci_fill = ci_fill) } - else if(attr(data, "theory_type") == "ANOVA"){ + else if (attr(data, "theory_type") == "ANOVA") { infer_plot <- both_anova_plot( data = data, deg_freedom_top = attr(data, "distr_param"), @@ -638,8 +638,8 @@ visualize_both <- function(data, bins, ci_fill = ci_fill) } - else if(attr(data, "theory_type") %in% - c("One sample prop z", "Two sample props z")){ + else if (attr(data, "theory_type") %in% + c("One sample prop z", "Two sample props z")) { infer_plot <- both_z_plot(data = data, statistic_text = "z", dens_color = dens_color, @@ -651,9 +651,9 @@ visualize_both <- function(data, bins, ci_fill = ci_fill) } - else if( + else if ( attr(data, "theory_type") %in% - c("Chi-square test of indep", "Chi-square Goodness of Fit")){ + c("Chi-square test of indep", "Chi-square Goodness of Fit")) { infer_plot <- both_chisq_plot(data = data, deg_freedom = attr(data, "distr_param"), statistic_text = "Chi-Square", diff --git a/R/wrappers.R b/R/wrappers.R index 72e07f55..d3c034e4 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -21,32 +21,32 @@ #' interval or not. `TRUE` by default. #' @param conf_level A numeric value between 0 and 1. Default value is 0.95. #' @param ... For passing in other arguments to [t.test()][stats::t.test()]. -#' +#' #' @examples #' # t test for comparing mpg against automatic/manual #' mtcars %>% #' dplyr::mutate(am = factor(am)) %>% #' t_test(mpg ~ am, order = c("1", "0"), alternative = "less") -#' +#' #' @importFrom rlang f_lhs #' @importFrom rlang f_rhs #' @export -t_test <- function(data, formula, #response = NULL, explanatory = NULL, +t_test <- function(data, formula, # response = NULL, explanatory = NULL, order = NULL, alternative = "two_sided", mu = 0, conf_int = TRUE, conf_level = 0.95, - ...){ + ...) { check_conf_level(conf_level) # Match with old "dot" syntax - if(alternative == "two_sided") + if (alternative == "two_sided") alternative <- "two.sided" ### Only currently working with formula interface # if (hasArg(formula)) { - if(!is.null(f_rhs(formula))){ + if (!is.null(f_rhs(formula))) { data[[as.character(f_rhs(formula))]] <- factor(data[[as.character(f_rhs(formula))]], @@ -73,7 +73,7 @@ t_test <- function(data, formula, #response = NULL, explanatory = NULL, broom::glance() } - if(conf_int){ + if (conf_int) { results <- prelim %>% dplyr::select(statistic, t_df = parameter, p_value = p.value, alternative, @@ -104,16 +104,16 @@ t_test <- function(data, formula, #response = NULL, explanatory = NULL, } #' Tidy t-test statistic -#' +#' #' A shortcut wrapper function to get the observed test statistic for a t test. #' #' @param data A data frame that can be coerced into a [tibble][tibble::tibble]. #' @param formula A formula with the response variable on the left and the #' explanatory on the right. #' @param ... Pass in arguments to \\{infer\\} functions. -#' +#' #' @export -t_stat <- function(data, formula, ...){ +t_stat <- function(data, formula, ...) { data %>% t_test(formula = formula, ...) %>% dplyr::select(statistic) @@ -128,19 +128,19 @@ t_stat <- function(data, formula, ...){ #' @param formula A formula with the response variable on the left and the #' explanatory on the right. #' @param ... Additional arguments for [chisq.test()][stats::chisq.test()]. -#' +#' #' @examples #' # chisq test for comparing number of cylinders against automatic/manual #' mtcars %>% #' dplyr::mutate(cyl = factor(cyl), am = factor(am)) %>% #' chisq_test(cyl ~ am) -#' +#' #' @importFrom rlang f_lhs f_rhs #' @export -chisq_test <- function(data, formula, #response = NULL, explanatory = NULL, - ...){ +chisq_test <- function(data, formula, # response = NULL, explanatory = NULL, + ...) { - if(is.null(f_rhs(formula))) + if (is.null(f_rhs(formula))) stop_glue( "`chisq_test()` currently only has functionality for ", "Chi-Square Test of Independence, not for Chi-Square Goodness of Fit." @@ -149,14 +149,14 @@ chisq_test <- function(data, formula, #response = NULL, explanatory = NULL, explanatory_var <- f_rhs(formula) response_var <- f_lhs(formula) - df <- data[ , as.character(c(response_var, explanatory_var))] + df <- data[, as.character(c(response_var, explanatory_var))] stats::chisq.test(table(df), ...) %>% broom::glance() %>% dplyr::select(statistic, chisq_df = parameter, p_value = p.value) } #' Tidy chi-squared test statistic -#' +#' #' A shortcut wrapper function to get the observed test statistic for a chisq #' test. Uses [chisq.test()][stats::chisq.test()], which applies a continuity #' correction. @@ -165,11 +165,11 @@ chisq_test <- function(data, formula, #response = NULL, explanatory = NULL, #' @param formula A formula with the response variable on the left and the #' explanatory on the right. #' @param ... Additional arguments for [chisq.test()][stats::chisq.test()]. -#' +#' #' @export -chisq_stat <- function(data, formula, ...){ +chisq_stat <- function(data, formula, ...) { - if(is.null(f_rhs(formula))){ + if (is.null(f_rhs(formula))) { stop_glue( "`chisq_stat()` currently only has functionality for ", "Chi-Square Test of Independence, not for Chi-Square Goodness of Fit. ", @@ -183,8 +183,8 @@ chisq_stat <- function(data, formula, ...){ } -check_conf_level <- function(conf_level){ - if(class(conf_level) != "numeric" | +check_conf_level <- function(conf_level) { + if (class(conf_level) != "numeric" | conf_level < 0 | conf_level > 1) stop_glue("The `conf_level` argument must be a number between 0 and 1.") diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 34933f46..34856237 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -186,7 +186,7 @@ test_that("chi-square matches chisq.test value", { hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") infer_way <- calculate(gen_iris8, stat = "Chisq") - #chisq.test way + # chisq.test way trad_way <- gen_iris8 %>% dplyr::group_by(replicate) %>% dplyr::do(broom::tidy(stats::chisq.test(table(.$Petal.Length.Group, @@ -204,7 +204,7 @@ test_that("chi-square matches chisq.test value", { "virginica" = 1/3)) %>% generate(reps = 10, type = "simulate") infer_way <- calculate(gen_iris9, stat = "Chisq") - #chisq.test way + # chisq.test way trad_way <- gen_iris9 %>% dplyr::group_by(replicate) %>% dplyr::do(broom::tidy(stats::chisq.test(table(.$Species)))) %>% @@ -219,7 +219,7 @@ test_that("chi-square matches chisq.test value", { "virginica" = 0.1)) %>% generate(reps = 10, type = "simulate") infer_way <- calculate(gen_iris9a, stat = "Chisq") - #chisq.test way + # chisq.test way trad_way <- gen_iris9a %>% dplyr::group_by(replicate) %>% dplyr::do(broom::tidy(stats::chisq.test(table(.$Species), @@ -312,16 +312,16 @@ test_that("order being given when not needed gives warning", { ## Breaks oldrel build. Commented out for now. # test_that("warning given if calculate without generate", { -# expect_warning(iris %>% -# specify(Species ~ NULL) %>% -# hypothesize(null = "point", +# expect_warning(iris %>% +# specify(Species ~ NULL) %>% +# hypothesize(null = "point", # p = c("setosa" = 0.4, # "versicolor" = 0.4, -# "virginica" = 0.2)) %>% -# #generate(reps = 10, type = "simulate") %>% +# "virginica" = 0.2)) %>% +# #generate(reps = 10, type = "simulate") %>% # calculate(stat = "Chisq") # ) -# +# # }) test_that("specify() %>% calculate() works", { diff --git a/tests/testthat/test-hypothesize.R b/tests/testthat/test-hypothesize.R index 828a815d..717e0a78 100644 --- a/tests/testthat/test-hypothesize.R +++ b/tests/testthat/test-hypothesize.R @@ -66,7 +66,7 @@ test_that("auto `type` works (hypothesize)", { expect_equal(attr(slopes, "type"), "permute") }) -test_that("hypothesize arguments function",{ +test_that("hypothesize arguments function", { mtcars_f <- dplyr::mutate(mtcars, cyl = factor(cyl)) mtcars_s <- mtcars_f %>% specify(response = mpg) @@ -81,7 +81,7 @@ test_that("hypothesize arguments function",{ expect_error(mtcars_s %>% hypothesize(null = "independence")) expect_error(mtcars_s %>% hypothesize(null = "point")) # Produces error on win-build -# expect_warning(mtcars_s %>% +# expect_warning(mtcars_s %>% # hypothesize(null = c("point", "independence"), mu = 3)) expect_error(mtcars %>% dplyr::select(vs) %>% diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 55fc5ad0..fce2f025 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -61,7 +61,7 @@ test_that("visualize basic tests", { calculate(stat = "slope") %>% visualize(obs_stat = obs_slope, direction = "right")) - #obs_stat not specified + # obs_stat not specified expect_error(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% @@ -138,7 +138,7 @@ test_that("visualize basic tests", { specify(Sepal.Length ~ Sepal.Width.Group) %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large") ) %>% + calculate(stat = "t", order = c("small", "large")) %>% visualize(method = "both", direction = "left", obs_stat = -obs_t) ) @@ -146,8 +146,8 @@ test_that("visualize basic tests", { expect_warning(iris_tbl %>% specify(Sepal.Length ~ Sepal.Width.Group) %>% hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large") ) %>% + # generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("small", "large")) %>% visualize(method = "theoretical", direction = "left", obs_stat = -obs_t) ) @@ -196,7 +196,7 @@ test_that("visualize basic tests", { specify(Sepal.Width.Group ~ Species, success = "large") %>% hypothesize(null = "independence") %>% - #calculate(stat = "Chisq") %>% + # calculate(stat = "Chisq") %>% visualize(method = "theoretical", obs_stat = obs_F, direction = "right") ) @@ -212,15 +212,15 @@ test_that("visualize basic tests", { visualize(method = "both") ) - #traditional instead of theoretical + # traditional instead of theoretical expect_error(iris_tbl %>% specify(Species ~ NULL) %>% hypothesize(null = "point", p = c("setosa" = 0.4, "versicolor" = 0.4, "virginica" = 0.2)) %>% -# generate(reps = 100, type = "simulate") %>% -# calculate(stat = "Chisq") %>% +# generate(reps = 100, type = "simulate") %>% +# calculate(stat = "Chisq") %>% visualize(method = "traditional") ) @@ -230,8 +230,8 @@ test_that("visualize basic tests", { p = c("setosa" = 0.4, "versicolor" = 0.4, "virginica" = 0.2)) %>% - #generate(reps = 100, type = "simulate") %>% - #calculate(stat = "Chisq") %>% + # generate(reps = 100, type = "simulate") %>% + # calculate(stat = "Chisq") %>% visualize(method = "theoretical") ) @@ -269,8 +269,8 @@ test_that("visualize basic tests", { expect_warning(iris_tbl %>% specify(Sepal.Width.Group ~ NULL, success = "small") %>% hypothesize(null = "point", p = 0.8) %>% -# generate(reps = 100, type = "simulate") %>% -# calculate(stat = "z") %>% +# generate(reps = 100, type = "simulate") %>% +# calculate(stat = "z") %>% visualize(method = "theoretical", obs_stat = 2, # Should probably update direction = "both") @@ -322,10 +322,10 @@ test_that('method = "both" behaves nicely', { specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% - # calculate(stat = "mean") %>% + # calculate(stat = "mean") %>% visualize(method = "both")) - # + # expect_warning(iris_tbl %>% specify(Petal.Width ~ Sepal.Length.Group) %>% hypothesize(null = "point", mu = 4) %>% @@ -356,7 +356,7 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { specify(Sepal.Width.Group ~ Species, success = "large") %>% hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% + # generate(reps = 100, type = "permute") %>% calculate(stat = "Chisq") %>% visualize(method = "theoretical", obs_stat = 2, direction = "left") @@ -364,7 +364,7 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { expect_warning(iris_tbl %>% specify(Sepal.Length ~ Species) %>% hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% + # generate(reps = 100, type = "permute") %>% calculate(stat = "F") %>% visualize(method = "theoretical", obs_stat = 2, direction = "two_sided") @@ -372,7 +372,7 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { }) -test_that("confidence interval plots are working",{ +test_that("confidence interval plots are working", { iris_boot <- iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R index b43a331d..81c058a8 100644 --- a/tests/testthat/test-wrappers.R +++ b/tests/testthat/test-wrappers.R @@ -58,7 +58,7 @@ test_that("_stat functions work", { # another_way <- iris3 %>% # chisq_test(Species ~ NULL) %>% # dplyr::select(statistic) -# obs_stat_way <- iris3 %>% +# obs_stat_way <- iris3 %>% # chisq_stat(Species ~ NULL) # expect_equivalent(another_way, obs_stat_way) From ba37ef513c4bfdb02ed0c154920077046cdb7c3e Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 5 Aug 2018 13:44:10 +0300 Subject: [PATCH 04/78] Update indenting (manually). --- R/calculate.R | 120 ++++++------ R/conf_int.R | 13 +- R/generate.R | 39 ++-- R/hypothesize.R | 16 +- R/infer.R | 3 +- R/p_value.R | 61 ++++--- R/rep_sample_n.R | 6 +- R/set_params.R | 51 +++--- R/specify.R | 14 +- R/utils.R | 63 ++++--- R/visualize.R | 130 ++++++------- R/wrappers.R | 16 +- tests/testthat/test-calculate.R | 6 - tests/testthat/test-conf_int.R | 4 +- tests/testthat/test-generate.R | 168 +++++++++-------- tests/testthat/test-hypothesize.R | 9 +- tests/testthat/test-p_value.R | 4 +- tests/testthat/test-rep_sample_n.R | 6 +- tests/testthat/test-specify.R | 3 - tests/testthat/test-visualize.R | 284 +++++++++++++---------------- tests/testthat/test-wrappers.R | 7 +- 21 files changed, 480 insertions(+), 543 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 668b9680..041c4080 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -52,33 +52,34 @@ calculate <- function(x, check_args_and_attr(x, explanatory_variable(x), response_variable(x), stat) check_point_params(x, stat) - if (!has_response(x)) + if (!has_response(x)) { stop_glue( "The response variable is not set. Make sure to `specify()` it first." ) + } if (is.null(attr(x, "generate")) || !attr(x, "generate")) { if (is.null(attr(x, "null"))) { x$replicate <- 1L - } - else if (stat %in% c( - "mean", - "median", - "sd", - "prop", - "diff in means", - "diff in medians", - "diff in props", - "slope", - "correlation" - )) + } else if ( + stat %in% c( + "mean", + "median", + "sd", + "prop", + "diff in means", + "diff in medians", + "diff in props", + "slope", + "correlation" + ) + ) { stop_glue( "Theoretical distributions do not exist (or have not been ", "implemented) for `stat` = \"{stat}\". Are you missing ", "a `generate()` step?" ) - - else if (!(stat %in% c("Chisq", "prop"))) { + } else if (!(stat %in% c("Chisq", "prop"))) { # From `hypothesize()` to `calculate()` # Catch-all if generate was not called # warning_glue("You unexpectantly went from `hypothesize()` to ", @@ -87,10 +88,12 @@ calculate <- function(x, return(x) } } - - if (stat %in% c("diff in means", "diff in medians", "diff in props") || - (!is.null(attr(x, "theory_type")) && - attr(x, "theory_type") %in% c("Two sample props z", "Two sample t"))) { + + if ( + stat %in% c("diff in means", "diff in medians", "diff in props") || + (!is.null(attr(x, "theory_type")) && + attr(x, "theory_type") %in% c("Two sample props z", "Two sample t")) + ) { check_order(x, explanatory_variable(x), order) } @@ -113,27 +116,30 @@ calculate <- function(x, result <- calc_impl(structure(stat, class = gsub(" ", "_", stat)), x, order, ...) - if ("NULL" %in% class(result)) + if ("NULL" %in% class(result)) { stop_glue( "Your choice of `stat` is invalid for the ", "types of variables `specify`ed." ) -# else -# class(result) <- append("infer", class(result)) + } +# else { +# class(result) <- append("infer", class(result)) +# } result <- set_attributes(to = result, from = x) attr(result, "stat") <- stat # For returning a 1x1 observed statistic value - if (nrow(result) == 1) + if (nrow(result) == 1) { result <- select(result, stat) + } return(result) } -calc_impl <- - function(type, x, order, ...) - UseMethod("calc_impl", type) +calc_impl <- function(type, x, order, ...) { + UseMethod("calc_impl", type) +} calc_impl.mean <- function(stat, x, order, ...) { @@ -142,7 +148,6 @@ calc_impl.mean <- function(stat, x, order, ...) { x %>% dplyr::group_by(replicate) %>% dplyr::summarize(stat = mean(!!(sym(col)), ...)) - } calc_impl.median <- function(stat, x, order, ...) { @@ -165,18 +170,19 @@ calc_impl.prop <- function(stat, x, order, ...) { col <- base::setdiff(names(x), "replicate") ## No longer needed with implementation of `check_point_params()` - # if(!is.factor(x[[col]])){ + # if (!is.factor(x[[col]])) { # stop_glue( # "Calculating a {stat} here is not appropriate since the `{col}` ", # "variable is not a factor." # ) # } - if (is.null(attr(x, "success"))) + if (is.null(attr(x, "success"))) { stop_glue( 'To calculate a proportion, the `"success"` argument ', 'must be provided in `specify()`.' ) + } success <- attr(x, "success") x %>% @@ -243,16 +249,13 @@ calc_impl.Chisq <- function(stat, x, order, ...) { dplyr::summarize(stat = stats::chisq.test(table(!!( attr(x, "response") )), p = attr(x, "params"))$stat) - } else { # Straight from `specify()` stop_glue("In order to calculate a Chi-Square Goodness of Fit ", "statistic, hypothesized values must be given for the `p` ", "parameter in the `hypothesize()` function prior to ", "using `calculate()`") - } - } else { # This is not matching with chisq.test # obs_tab <- x %>% @@ -268,7 +271,6 @@ calc_impl.Chisq <- function(stat, x, order, ...) { # - expected)^2 / expected, ...)) # Chi-Square Test of Independence - result <- x %>% dplyr::do(broom::tidy(suppressWarnings(stats::chisq.test(table( .[[as.character(attr(x, "response"))]], @@ -276,11 +278,11 @@ calc_impl.Chisq <- function(stat, x, order, ...) { ))))) %>% dplyr::ungroup() - if (!is.null(attr(x, "generate"))) - result <- - result %>% dplyr::select(replicate, stat = statistic) - else + if (!is.null(attr(x, "generate"))) { + result <- result %>% dplyr::select(replicate, stat = statistic) + } else { result <- result %>% dplyr::select(stat = statistic) + } attr(result, "response") <- attr(x, "response") attr(result, "success") <- attr(x, "success") @@ -292,7 +294,6 @@ calc_impl.Chisq <- function(stat, x, order, ...) { attr(result, "theory_type") <- attr(x, "theory_type") result - } } @@ -354,9 +355,8 @@ calc_impl.t <- function(stat, x, order, ...) { stat = stats::t.test(!!attr(x, "response"), ... )[["statistic"]]) - } - # For hypothesis testing - else { + } else { + # For hypothesis testing x %>% dplyr::summarize(stat = stats::t.test( !!attr(x, "response"), @@ -400,29 +400,29 @@ calc_impl.z <- function(stat, x, order, ...) { dplyr::select(-total_suc, -n1, -n2) df_out - - } else + } else if (attr(x, "theory_type") == "One sample prop z") { # One sample proportion - if (attr(x, "theory_type") == "One sample prop z") { - # When `hypothesize()` has been called - success <- attr(x, "success") + + # When `hypothesize()` has been called + success <- attr(x, "success") - p0 <- attr(x, "params")[1] - num_rows <- nrow(x) / length(unique(x$replicate)) + p0 <- attr(x, "params")[1] + num_rows <- nrow(x) / length(unique(x$replicate)) - col <- attr(x, "response") - # if(is.null(success)) - # success <- quo(get_par_levels(x)[1]) - # Error given instead + col <- attr(x, "response") +# if (is.null(success)) { +# success <- quo(get_par_levels(x)[1]) +# } +# Error given instead - df_out <- x %>% - dplyr::summarize(stat = (mean( - rlang::eval_tidy(col) == rlang::eval_tidy(success), ... - ) - p0) / sqrt((p0 * (1 - p0)) / num_rows)) + df_out <- x %>% + dplyr::summarize(stat = (mean( + rlang::eval_tidy(col) == rlang::eval_tidy(success), ... + ) - p0) / sqrt((p0 * (1 - p0)) / num_rows)) - df_out + df_out - # Straight from `specify()` doesn't make sense - # since standardizing requires a hypothesized value - } + # Straight from `specify()` doesn't make sense + # since standardizing requires a hypothesized value + } } diff --git a/R/conf_int.R b/R/conf_int.R index 887a58ad..6fbadc06 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -37,7 +37,6 @@ NULL #' @export conf_int <- function(x, level = 0.95, type = "percentile", point_estimate = NULL) { - check_ci_args(x, level, type, point_estimate) if (type == "percentile") { @@ -58,12 +57,12 @@ conf_int <- function(x, level = 0.95, type = "percentile", } check_ci_args <- function(x, level, type, point_estimate) { - if (!is.null(point_estimate)) { - if (!is.data.frame(point_estimate)) + if (!is.data.frame(point_estimate)) { check_type(point_estimate, is.numeric) - else + } else { check_type(point_estimate, is.data.frame) + } } check_type(x, is.data.frame) check_type(level, is.numeric) @@ -75,12 +74,14 @@ check_ci_args <- function(x, level, type, point_estimate) { stop_glue('The options for `type` are "percentile" or "se".') } - if (type == "se" && is.null(point_estimate)) + if (type == "se" && is.null(point_estimate)) { stop_glue('A numeric value needs to be given for `point_estimate` ', 'for `type = "se"') + } - if (type == "se" && is.vector(point_estimate)) + if (type == "se" && is.vector(point_estimate)) { check_type(point_estimate, is.numeric) + } } diff --git a/R/generate.R b/R/generate.R index 2c86a47d..9f10c488 100755 --- a/R/generate.R +++ b/R/generate.R @@ -22,7 +22,6 @@ #' @importFrom dplyr group_by #' @export generate <- function(x, reps = 1, type = attr(x, "type"), ...) { - auto_type <- attr(x, "type") if (!is.null(auto_type)) { @@ -30,13 +29,14 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { stop_glue("Supply not `NULL` value of `type`.") } - if (auto_type != type) + if (auto_type != type) { stop_glue( "You have specified `type = \"{type}\"`, but `type` is expected to be ", "`\"{auto_type}\"`. Please try again with appropriate `type` value." ) - else + } else { type <- auto_type + } } attr(x, "generate") <- TRUE @@ -62,16 +62,14 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { if (type == "bootstrap") { return(bootstrap(x, reps, ...)) - } - else if (type == "permute") { + } else if (type == "permute") { return(permute(x, reps, ...)) - } - else if (type == "simulate") { + } else if (type == "simulate") { return(simulate(x, reps, ...)) - } -# else if (!(type %in% c("bootstrap", "permute", "simulate"))) -# stop_glue("Choose one of the available options for `type`: ", -# '`"bootstrap"`, `"permute"`, or `"simulate"`') + } # else if (!(type %in% c("bootstrap", "permute", "simulate"))) { + # stop_glue("Choose one of the available options for `type`: ", + # '`"bootstrap"`, `"permute"`, or `"simulate"`') + # } } bootstrap <- function(x, reps = 1, ...) { @@ -80,21 +78,20 @@ bootstrap <- function(x, reps = 1, ...) { # If so, shift the variable chosen to have a mean corresponding # to that specified in `hypothesize` if (attr(attr(x, "params"), "names") == "mu") { - col <- as.character(attr(x, "response")) -# if(attr(x, "theory_type") != "One sample t"){ - x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params") +# if (attr(x, "theory_type") != "One sample t") { + x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params") # } # Standardize after centering above ##### # Determining whether or not to implement this t transformation ##### -# else { -# std_error <- stats::sd(x[[col]], na.rm = TRUE) / -# sqrt(length(x[[col]])) -# x[[col]] <- ( x[[col]] - mean(x[[col]], na.rm = TRUE) ) / std_error -# } + # else { + # std_error <- stats::sd(x[[col]], na.rm = TRUE) / + # sqrt(length(x[[col]])) + # x[[col]] <- (x[[col]] - mean(x[[col]], na.rm = TRUE)) / std_error + # } } # Similarly for median @@ -109,7 +106,7 @@ bootstrap <- function(x, reps = 1, ...) { # Similarly for sd ## Temporarily removed since this implementation does not scale correctly - # else if(attr(attr(x, "params"), "names") == "sigma"){ + # else if (attr(attr(x, "params"), "names") == "sigma") { # col <- as.character(attr(x, "response")) # x[[col]] <- x[[col]] - # stats::sd(x[[col]], na.rm = TRUE) + attr(x, "params") @@ -126,7 +123,6 @@ bootstrap <- function(x, reps = 1, ...) { } #' @importFrom dplyr bind_rows group_by - permute <- function(x, reps = 1, ...) { df_out <- replicate(reps, permute_once(x), simplify = FALSE) %>% dplyr::bind_rows() %>% @@ -150,7 +146,6 @@ permute_once <- function(x, ...) { x[as.character(attr(x, "response"))] <- y_prime return(x) } - } #' @importFrom dplyr pull diff --git a/R/hypothesize.R b/R/hypothesize.R index 62f8046f..af9a6b5c 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -19,7 +19,6 @@ #' #' @export hypothesize <- function(x, null, ...) { - hypothesize_checks(x, null) attr(x, "null") <- null @@ -49,26 +48,31 @@ hypothesize <- function(x, null, ...) { } - if (!is.null(null) && null == "independence") + if (!is.null(null) && null == "independence") { attr(x, "type") <- "permute" + } # Check one proportion test set up correctly if (null == "point") { if (is.factor(response_variable(x))) { - if (!any(grepl("p", attr(attr(x, "params"), "names")))) + if (!any(grepl("p", attr(attr(x, "params"), "names")))) { stop_glue('Testing one categorical variable requires `p` ', 'to be used as a parameter.') + } } } # Check one numeric test set up correctly ## Not currently able to reach in testing as other checks ## already produce errors - # if(null == "point"){ - # if(!is.factor(response_variable(x)) - # & !any(grepl("mu|med|sigma", attr(attr(x, "params"), "names")))) + # if (null == "point") { + # if ( + # !is.factor(response_variable(x)) + # & !any(grepl("mu|med|sigma", attr(attr(x, "params"), "names"))) + # ) { # stop_glue('Testing one numerical variable requires one of ', # '`mu`, `med`, or `sd` to be used as a parameter.') + # } # } return(tibble::as_tibble(x)) diff --git a/R/infer.R b/R/infer.R index 679908d8..084a98c8 100755 --- a/R/infer.R +++ b/R/infer.R @@ -14,10 +14,11 @@ NULL ## quiets concerns of R CMD check re: the .'s that appear in pipelines ## From Jenny Bryan's googlesheets package -if (getRversion() >= "2.15.1") +if (getRversion() >= "2.15.1") { utils::globalVariables(c("prop", "stat", "xbar", "xtilde", "x", "..density..", "statistic", ".", "parameter", "p.value", "xmin", "xmax", "density", "denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat", "total_suc", "explan", "probs", "conf.low", "conf.high")) +} diff --git a/R/p_value.R b/R/p_value.R index 8cceab14..fe349ede 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -32,7 +32,6 @@ NULL #' @rdname get_pvalue #' @export p_value <- function(x, obs_stat, direction) { - check_type(x, is.data.frame) obs_stat <- check_obs_stat(obs_stat) check_direction(direction) @@ -40,17 +39,21 @@ p_value <- function(x, obs_stat, direction) { is_simulation_based <- !is.null(attr(x, "generate")) && attr(x, "generate") - if (is_simulation_based) + if (is_simulation_based) { pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat, - direction = direction) + direction = direction) + } ## Theoretical-based p-value # Could be more specific - # else if(is.null(attr(x, "theory_type")) || is.null(attr(x, "distr_param"))) + # else if ( + # is.null(attr(x, "theory_type")) || is.null(attr(x, "distr_param")) + # ) { # stop_glue("Attributes have not been set appropriately. ", # "Check your {{infer}} pipeline again.") + # } - # if(!("stat" %in% names(x))){ + # if (!("stat" %in% names(x))) { # # Theoretical distribution # which_distribution(x, # theory_type <- attr(x, "theory_type"), @@ -62,16 +65,13 @@ p_value <- function(x, obs_stat, direction) { } simulation_based_p_value <- function(x, obs_stat, direction) { - if (direction %in% c("less", "left")) { - p_value <- x %>% + p_value <- x %>% dplyr::summarize(p_value = mean(stat <= obs_stat)) - } - else if (direction %in% c("greater", "right")) { - p_value <- x %>% + } else if (direction %in% c("greater", "right")) { + p_value <- x %>% dplyr::summarize(p_value = mean(stat >= obs_stat)) - } - else { + } else { p_value <- x %>% two_sided_p_value(obs_stat = obs_stat) } @@ -79,51 +79,52 @@ simulation_based_p_value <- function(x, obs_stat, direction) { } two_sided_p_value <- function(x, obs_stat) { - if (stats::median(x$stat) >= obs_stat) { basic_p_value <- get_percentile(x$stat, obs_stat) + (1 - get_percentile(x$stat, stats::median(x$stat) + - stats::median(x$stat) - obs_stat)) + stats::median(x$stat) - obs_stat)) } else { basic_p_value <- 1 - get_percentile(x$stat, obs_stat) + (get_percentile(x$stat, stats::median(x$stat) + - stats::median(x$stat) - obs_stat)) + stats::median(x$stat) - obs_stat)) } - if (basic_p_value >= 1) + if (basic_p_value >= 1) { # Catch all if adding both sides produces a number # larger than 1. Should update with test in that # scenario instead of using >= return(tibble::tibble(p_value = 1)) - else + } else { return(tibble::tibble(p_value = basic_p_value)) + } } #' @rdname get_pvalue #' @export get_pvalue <- p_value -# which_distribution <- function(x, theory_type, obs_stat, direction){ -# +# which_distribution <- function(x, theory_type, obs_stat, direction) { # param <- attr(x, "distr_param") -# if(!is.null(attr(x, "distr_param2"))) +# if (!is.null(attr(x, "distr_param2"))) { # param2 <- attr(x, "distr_param2") -# -# if(theory_type == "Two sample t") +# } +# +# if (theory_type == "Two sample t") { # return(pt(q = obs_stat, -# df = param, -# lower.tail = set_lower_tail(direction)) -# ) -# +# df = param, +# lower.tail = set_lower_tail(direction)) +# ) +# } # } # theory_t_pvalue <- -# set_lower_tail <- function(direction){ -# if(direction %in% c("greater", "right")) +# set_lower_tail <- function(direction) { +# if (direction %in% c("greater", "right")) { # lower_tail <- FALSE -# else +# } else { # lower_tail <- TRUE -# +# } +# # lower_tail # } diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R index b9079387..0dc80c99 100644 --- a/R/rep_sample_n.R +++ b/R/rep_sample_n.R @@ -49,17 +49,19 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) { check_type(size, is.numeric) check_type(replace, is.logical) check_type(reps, is.numeric) - if (!is.null(prob)) + if (!is.null(prob)) { check_type(prob, is.numeric) + } # assign non-uniform probabilities # there should be a better way!! # prob needs to be nrow(tbl) -- not just number of factor levels if (!is.null(prob)) { - if (length(prob) != n) + if (length(prob) != n) { stop_glue( "The argument `prob` must have length `nrow(tbl)` = {nrow(tbl)}" ) + } prob <- tibble::tibble(vals = levels(dplyr::pull(tbl, 1))) %>% dplyr::mutate(probs = prob) %>% diff --git a/R/set_params.R b/R/set_params.R index 77717f74..827d90b1 100755 --- a/R/set_params.R +++ b/R/set_params.R @@ -4,7 +4,6 @@ #' #' @noRd set_params <- function(x) { - attr(x, "theory_type") <- NULL if (!is.null(attr(x, "response"))) { @@ -21,15 +20,15 @@ set_params <- function(x) { attr(x, "theory_type") <- "One sample t" attr(x, "distr_param") <- x %>% dplyr::summarize(df = stats::t.test( - response_variable(x))[["parameter"]] - ) %>% + response_variable(x))[["parameter"]] + ) %>% dplyr::pull() attr(x, "type") <- "bootstrap" - } - - # One prop - else if (attr(x, "response_type") == "factor" && (num_response_levels == 2)) { - + } else if ( + + # One prop + attr(x, "response_type") == "factor" && (num_response_levels == 2) + ) { # No parameters since standard normal attr(x, "theory_type") <- "One sample prop z" # Changed to `"simulate"` when `p` provided in `hypothesize()` @@ -39,20 +38,18 @@ set_params <- function(x) { attr(x, "distr_param") <- num_response_levels - 1 attr(x, "type") <- "simulate" } - } # Two variables if (!is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & !is.null(attr(x, "response_type")) && !is.null(attr(x, "explanatory_type"))) { - attr(x, "type") <- "bootstrap" # Response is numeric, explanatory is categorical if (attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") == "factor") { - + attr(x, "explanatory_type") == "factor") { + # Two sample means (t distribution) if (length(levels(explanatory_variable(x))) == 2) { attr(x, "theory_type") <- "Two sample t" @@ -60,40 +57,40 @@ set_params <- function(x) { # in aggregation w/ calculate()/generate() attr(x, "distr_param") <- x %>% dplyr::summarize(df = stats::t.test( - !!attr(x, "response") ~ !!attr(x, "explanatory"))[["parameter"]] - ) %>% + !!attr(x, "response") ~ !!attr(x, "explanatory"))[["parameter"]] + ) %>% dplyr::pull() } else { + # >2 sample means (F distribution) attr(x, "theory_type") <- "ANOVA" # Get numerator and denominator degrees of freedom attr(x, "distr_param") <- x %>% dplyr::summarize(df1 = stats::anova(stats::aov( - !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[1] - ) %>% + !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[1] + ) %>% dplyr::pull() attr(x, "distr_param2") <- x %>% dplyr::summarize(df2 = stats::anova(stats::aov( - !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[2] - ) %>% + !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[2] + ) %>% dplyr::pull() } } # Response is categorical, explanatory is categorical if (attr(x, "response_type") == "factor" & - attr(x, "explanatory_type") == "factor") { - + attr(x, "explanatory_type") == "factor") { attr(x, "type") <- "bootstrap" # Two sample proportions (z distribution) # Parameter(s) not needed since standard normal if (length(levels(response_variable(x))) == 2 & - length(levels(explanatory_variable(x))) == 2) { + length(levels(explanatory_variable(x))) == 2) { attr(x, "theory_type") <- "Two sample props z" - } - # >2 sample proportions (chi-square test of indep) - else { + } else { + + # >2 sample proportions (chi-square test of indep) attr(x, "theory_type") <- "Chi-square test of indep" attr(x, "distr_param") <- x %>% dplyr::summarize(df = suppressWarnings(stats::chisq.test( @@ -105,11 +102,11 @@ set_params <- function(x) { # Response is numeric, explanatory is numeric if (attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") %in% c("integer", "numeric")) { + attr(x, "explanatory_type") %in% c("integer", "numeric")) { response_string <- as.character(attr(x, "response")) explanatory_string <- as.character(attr(x, "explanatory")) - attr(x, "theory_type") <- "Slope/correlation with t" - attr(x, "distr_param") <- nrow(x) - 2 + attr(x, "theory_type") <- "Slope/correlation with t" + attr(x, "distr_param") <- nrow(x) - 2 } } diff --git a/R/specify.R b/R/specify.R index c3a3ab6b..f0e33193 100755 --- a/R/specify.R +++ b/R/specify.R @@ -119,20 +119,22 @@ specify <- function(x, formula, response = NULL, # To help determine theoretical distribution to plot attr(x, "response_type") <- class(response_variable(x)) - if (is.null(attr(x, "explanatory"))) + if (is.null(attr(x, "explanatory"))) { attr(x, "explanatory_type") <- NULL - else + } else { attr(x, "explanatory_type") <- class(explanatory_variable(x)) + } if (attr(x, "response_type") == "factor" && is.null(success) && - length(levels(response_variable(x))) == 2 && - (is.null(attr(x, "explanatory_type")) || - (!is.null(attr(x, "explanatory_type")) && - length(levels(explanatory_variable(x))) == 2))) + length(levels(response_variable(x))) == 2 && + (is.null(attr(x, "explanatory_type")) || + (!is.null(attr(x, "explanatory_type")) && + length(levels(explanatory_variable(x))) == 2))) { stop_glue( 'A level of the response variable `{attr(x, "response")}` ', 'needs to be specified for the `success` argument in `specify()`.' ) + } # Determine appropriate parameters for theoretical distribution fit x <- set_params(x) diff --git a/R/utils.R b/R/utils.R index feb18799..69a77962 100644 --- a/R/utils.R +++ b/R/utils.R @@ -101,22 +101,25 @@ check_order <- function(x, explanatory_variable, order) { '`order = c("first", "second")` means `("first" - "second")`. ', "Check `?calculate` for details.") } else { - if (xor(is.na(order[1]), is.na(order[2]))) + if (xor(is.na(order[1]), is.na(order[2]))) { stop_glue( "Only one level specified in `order`. Both levels need to be specified." ) - if (length(order) > 2) + } + if (length(order) > 2) { stop_glue("`order` is expecting only two entries.") - if (order[1] %in% unique_explanatory_variable == FALSE) + } + if (order[1] %in% unique_explanatory_variable == FALSE) { stop_glue("{order[1]} is not a level of the explanatory variable.") - if (order[2] %in% unique_explanatory_variable == FALSE) + } + if (order[2] %in% unique_explanatory_variable == FALSE) { stop_glue("{order[2]} is not a level of the explanatory variable.") + } } } check_args_and_attr <- function(x, explanatory_variable, response_variable, stat) { - # Could also do `stat <- match.arg(stat)` # but that's not as helpful to beginners with the cryptic error msg if (!stat %in% c("mean", "median", "sd", "prop", @@ -126,9 +129,10 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, "Check your spelling and `?calculate` for current options.") } - if (!("replicate" %in% names(x)) && !is.null(attr(x, "generate"))) + if (!("replicate" %in% names(x)) && !is.null(attr(x, "generate"))) { warning_glue('A `generate()` step was not performed prior to ', '`calculate()`. Review carefully.') + } if (stat %in% c("F", "slope", "diff in means", "diff in medians")) { if (has_explanatory(x) && !is.numeric(response_variable(x))) { @@ -163,7 +167,6 @@ check_for_numeric_stat <- function(x, stat) { } check_for_factor_stat <- function(x, stat, explanatory_variable) { - if (stat %in% c("diff in means", "diff in medians", "diff in props", "F")) { if (!is.factor(explanatory_variable)) { stop_glue( @@ -176,28 +179,34 @@ check_for_factor_stat <- function(x, stat, explanatory_variable) { } check_point_params <- function(x, stat) { - param_names <- attr(attr(x, "params"), "names") hyp_text <- 'to be set in `hypothesize()`.' if (!is.null(attr(x, "null"))) { if (stat %in% c("mean", "median", "sd", "prop")) { - if ((stat == "mean" && !("mu" %in% param_names))) + if ((stat == "mean" && !("mu" %in% param_names))) { stop_glue('`stat == "mean"` requires `"mu"` {hyp_text}') - if ((!(stat == "mean") && ("mu" %in% param_names))) + } + if ((!(stat == "mean") && ("mu" %in% param_names))) { stop_glue('`"mu"` does not correspond to `stat = "{stat}"`.') - if ((stat == "median" && !("med" %in% param_names))) + } + if ((stat == "median" && !("med" %in% param_names))) { stop_glue('`stat == "median"` requires `"med"` {hyp_text}') - if ((!(stat == "median") && ("med" %in% param_names))) + } + if ((!(stat == "median") && ("med" %in% param_names))) { stop_glue('`"med"` does not correspond to `stat = "{stat}"`.') + } ## Tests unable to get to - # if( (stat == "sigma" && !("sd" %in% param_names)) ) + # if ((stat == "sigma" && !("sd" %in% param_names))) { # stop_glue('`stat == "sd"` requires `"sigma"` {hyp_text}') - if ((!(stat == "sd") && ("sigma" %in% param_names))) + # } + if ((!(stat == "sd") && ("sigma" %in% param_names))) { stop_glue('`"sigma"` does not correspond to `stat = "{stat}"`.') + } ## Tests unable to get to - # if(stat == "prop" && !(any(grepl("p.", param_names)))) + # if (stat == "prop" && !(any(grepl("p.", param_names)))) { # stop_glue('`stat == "prop"` requires `"p"` {hyp_text}') + # } } } } @@ -210,7 +219,7 @@ parse_params <- function(dots, x) { # error: cannot specify more than one of props, means, medians, or sds if (length(p_ind) + length(mu_ind) + length(med_ind) - + length(sig_ind) != 1) { + + length(sig_ind) != 1) { stop_glue( 'Parameter values can be only one of `p`, `mu`, `med`, or `sigma`.' ) @@ -221,17 +230,17 @@ parse_params <- function(dots, x) { # 0 index of dots if (length(p_ind)) { if (length(dots[[p_ind]]) == 1) { - if (attr(x, "null") == "point" && is.null(attr(x, "success"))) { stop_glue("A point null regarding a proportion requires ", "that `success` be indicated in `specify()`.") } - if (dots$p < 0 || dots$p > 1) + if (dots$p < 0 || dots$p > 1) { stop_glue( "The value suggested for `p` is not between 0 and 1, inclusive." ) + } missing_lev <- base::setdiff(unique(pull(x, !!attr(x, "response"))), - attr(x, "success")) + attr(x, "success")) dots$p <- append(dots$p, 1 - dots$p) names(dots$p) <- c(attr(x, "success"), missing_lev) } else { @@ -242,7 +251,7 @@ parse_params <- function(dots, x) { } } - # if (sum(dots[[p_ind]]) != 1){ + # if (sum(dots[[p_ind]]) != 1) { # dots[[p_ind]] <- dots[[p_ind]]/sum(dots[[p_ind]]) # warning_glue("Proportions do not sum to 1, normalizing automatically.") # } @@ -263,10 +272,10 @@ hypothesize_checks <- function(x, null) { ) } - # if (length(null) != 1) { - # stop_glue('Choose between either `"independence"` or `"point"` ', - # 'for `null` argument.') - # } + # if (length(null) != 1) { + # stop_glue('Choose between either `"independence"` or `"point"` ', + # 'for `null` argument.') + # } if (!has_response(x)) { stop_glue( @@ -297,15 +306,15 @@ check_obs_stat <- function(obs_stat) { if (!is.null(obs_stat)) { if ("data.frame" %in% class(obs_stat)) { check_type(obs_stat, is.data.frame) - if ((nrow(obs_stat) != 1) || (ncol(obs_stat) != 1)) + if ((nrow(obs_stat) != 1) || (ncol(obs_stat) != 1)) { warning_glue("The first row and first column value of the given ", "`obs_stat` will be used.") + } # [[1]] is used in case `stat` is not specified as name of 1x1 obs_stat <- obs_stat[[1]][[1]] check_type(obs_stat, is.numeric) - } - else { + } else { check_type(obs_stat, is.numeric) } } diff --git a/R/visualize.R b/R/visualize.R index 6cd293c7..121dbf7d 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -78,17 +78,17 @@ visualize <- function(data, bins = 15, method = "simulation", endpoints = NULL, endpoints_color = "mediumaquamarine", ci_fill = "turquoise", ...) { - check_type(data, is.data.frame) check_type(bins, is.numeric) check_type(method, is.character) check_type(dens_color, is.character) check_type(obs_stat_color, is.character) check_type(pvalue_fill, is.character) - if (!is.null(direction)) + if (!is.null(direction)) { check_type(direction, is.character) + } if (is.data.frame(endpoints) && - ((nrow(endpoints) != 1) || (ncol(endpoints) != 2))) { + ((nrow(endpoints) != 1) || (ncol(endpoints) != 2))) { stop_glue( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." ) @@ -100,18 +100,19 @@ visualize <- function(data, bins = 15, method = "simulation", ) endpoints <- endpoints[1:2] } - if (is.data.frame(endpoints)) + if (is.data.frame(endpoints)) { endpoints <- unlist(endpoints) + } obs_stat <- check_obs_stat(obs_stat) if (!is.null(direction) && - (is.null(obs_stat) + is.null(endpoints)) != 1) + (is.null(obs_stat) + is.null(endpoints)) != 1) { stop_glue( "Shading requires either `endpoints` values for a confidence interval ", "or the observed statistic `obs_stat` to be provided." ) + } if (method == "simulation") { - infer_plot <- visualize_simulation(data = data, bins = bins, dens_color = dens_color, obs_stat = obs_stat, @@ -121,9 +122,7 @@ visualize <- function(data, bins = 15, method = "simulation", endpoints = endpoints, ci_fill = ci_fill, ...) - } else if (method == "theoretical") { - infer_plot <- visualize_theoretical(data = data, dens_color = dens_color, obs_stat = obs_stat, @@ -133,20 +132,19 @@ visualize <- function(data, bins = 15, method = "simulation", endpoints = endpoints, ci_fill = ci_fill, ...) - - } else if (method == "both") { - - if (!("stat" %in% names(data))) + if (!("stat" %in% names(data))) { stop_glue('`generate()` and `calculate()` are both required ', 'to be done prior to `visualize(method = "both")`') + } if (("replicate" %in% names(data)) && - length(unique(data$replicate)) < 100) + length(unique(data$replicate)) < 100) { warning_glue( "With only {length(unique(data$stat))} replicates, it may be ", "difficult to see the relationship between simulation and theory." ) + } infer_plot <- visualize_both(data = data, bins = bins, dens_color = dens_color, @@ -169,9 +167,10 @@ visualize <- function(data, bins = 15, method = "simulation", } if (!is.null(endpoints)) { - if (!is.null(obs_stat)) + if (!is.null(obs_stat)) { warning_glue("Values for both `endpoints` and `obs_stat` were given ", "when only one should be set. Ignoring `obs_stat` values.") + } infer_plot <- infer_plot + geom_vline(xintercept = endpoints, size = 2, color = endpoints_color, @@ -181,7 +180,6 @@ visualize <- function(data, bins = 15, method = "simulation", infer_plot } - theory_t_plot <- function(deg_freedom, statistic_text = "t", dens_color = dens_color, ...) { ggplot(data.frame(x = c(qt(0.001, deg_freedom), @@ -201,7 +199,6 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", pvalue_fill, endpoints, ci_fill, ...) { - infer_t_plot <- shade_density_check(data = data, obs_stat = obs_stat, direction = direction, @@ -243,11 +240,11 @@ both_anova_plot <- function(data, deg_freedom_top, pvalue_fill, ci_fill, ...) { - - if (!is.null(direction) && !(direction %in% c("greater", "right"))) + if (!is.null(direction) && !(direction %in% c("greater", "right"))) { warning_glue( "F usually corresponds to right-tailed tests. Proceed with caution." ) + } infer_anova_plot <- shade_density_check(data = data, obs_stat = obs_stat, @@ -269,7 +266,6 @@ both_anova_plot <- function(data, deg_freedom_top, } theory_z_plot <- function(statistic_text = "z", dens_color = dens_color, ...) { - ggplot(data.frame(x = c(qnorm(0.001), qnorm(0.999)))) + stat_function(mapping = aes(x), fun = dnorm, color = dens_color) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + @@ -286,7 +282,6 @@ both_z_plot <- function(data, statistic_text = "z", endpoints, ci_fill, ...) { - infer_z_plot <- shade_density_check(data = data, obs_stat = obs_stat, direction = direction, @@ -294,7 +289,6 @@ both_z_plot <- function(data, statistic_text = "z", endpoints = endpoints, pvalue_fill = pvalue_fill, ci_fill = ci_fill) - infer_z_plot + stat_function(fun = dnorm, color = dens_color) + ggtitle(glue_null( @@ -326,10 +320,10 @@ both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", pvalue_fill = pvalue_fill, ci_fill = ci_fill, ...) { - - if (!is.null(direction) && !(direction %in% c("greater", "right"))) + if (!is.null(direction) && !(direction %in% c("greater", "right"))) { warning_glue("Chi-square usually corresponds to right-tailed tests. ", "Proceed with caution.") + } infer_chisq_plot <- shade_density_check(data = data, obs_stat = obs_stat, @@ -349,7 +343,6 @@ both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", ylab("") } - shade_density_check <- function(data, obs_stat, direction, @@ -358,7 +351,6 @@ shade_density_check <- function(data, pvalue_fill, endpoints, ci_fill, ...) { - if (is.null(direction) || is.null(obs_stat)) { if (density) { gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + @@ -396,7 +388,7 @@ shade_density_check <- function(data, } if (direction %in% c("two_sided", "both") && - obs_stat >= stats::median(data$stat)) { + obs_stat >= stats::median(data$stat)) { gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, mapping = aes(xmin = obs_stat, xmax = Inf, ymin = 0, @@ -413,7 +405,7 @@ shade_density_check <- function(data, } if (direction %in% c("two_sided", "both") && - obs_stat < stats::median(data$stat)) { + obs_stat < stats::median(data$stat)) { gg_plot <- gg_plot + geom_rect(fill = pvalue_fill, alpha = 0.01, mapping = aes(xmin = -Inf, xmax = obs_stat, ymin = 0, @@ -428,7 +420,6 @@ shade_density_check <- function(data, } } - if (direction == "between") { gg_plot <- gg_plot + geom_rect(fill = ci_fill, alpha = 0.01, @@ -436,9 +427,8 @@ shade_density_check <- function(data, xmax = endpoints[2], ymin = 0, ymax = Inf), ...) } - } - gg_plot + gg_plot } visualize_simulation <- function(data, bins, @@ -451,13 +441,14 @@ visualize_simulation <- function(data, bins, endpoints, ci_fill, ...) { if (is.null(direction)) { - if (length(unique(data$stat)) >= 10) + if (length(unique(data$stat)) >= 10) { infer_plot <- ggplot(data = data, mapping = aes(x = stat)) + geom_histogram(bins = bins, color = "white", ...) - else + } else { infer_plot <- ggplot(data = data, mapping = aes(x = stat)) + geom_bar(...) + xlab("stat") + } } else { infer_plot <- shade_density_check(data = data, obs_stat = obs_stat, @@ -466,8 +457,7 @@ visualize_simulation <- function(data, bins, density = FALSE, pvalue_fill = pvalue_fill, endpoints = endpoints, - ci_fill = ci_fill - ) + ci_fill = ci_fill) } infer_plot } @@ -481,65 +471,60 @@ visualize_theoretical <- function(data, endpoints, ci_fill, ...) { - warning_glue( "Check to make sure the conditions have been met for the theoretical ", "method. {{infer}} currently does not check these for you." ) if (!is.null(attr(data, "stat")) && - !(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) + !(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) { warning_glue( "Your `calculate`d statistic and the theoretical distribution are on ", "different scales. Displaying only the theoretical distribution." ) + } if (attr(data, "theory_type") %in% - c("Two sample t", "Slope with t", "One sample t")) { + c("Two sample t", "Slope with t", "One sample t")) { infer_plot <- theory_t_plot(deg_freedom = attr(data, "distr_param"), statistic_text = "t", dens_color = dens_color) - } - - else if (attr(data, "theory_type") == "ANOVA") { - - if (!is.null(direction) && !(direction %in% c("greater", "right"))) + } else if (attr(data, "theory_type") == "ANOVA") { + + if (!is.null(direction) && !(direction %in% c("greater", "right"))) { warning_glue( "F usually corresponds to right-tailed tests. Proceed with caution." ) + } infer_plot <- theory_anova_plot( deg_freedom_top = attr(data, "distr_param"), deg_freedom_bottom = attr(data, "distr_param2"), statistic_text = "F", dens_color = dens_color) - } - - else if (attr(data, "theory_type") %in% - c("One sample prop z", "Two sample props z")) { + } else if (attr(data, "theory_type") %in% + c("One sample prop z", "Two sample props z")) { + infer_plot <- theory_z_plot(statistic_text = "z", dens_color = dens_color) - } + } else if (attr(data, "theory_type") %in% + c("Chi-square test of indep", "Chi-square Goodness of Fit")) { - else if (attr(data, "theory_type") %in% - c("Chi-square test of indep", "Chi-square Goodness of Fit")) { - - if (!is.null(direction) && !(direction %in% c("greater", "right"))) + if (!is.null(direction) && !(direction %in% c("greater", "right"))) { warning_glue("Chi-square usually corresponds to right-tailed tests. ", "Proceed with caution.") + } infer_plot <- theory_chisq_plot(deg_freedom = attr(data, "distr_param"), statistic_text = "Chi-Square", dens_color = dens_color) - } - -# else -# stop_glue( -# '"{attr(data, "theory_type")}" is not implemented (possibly yet).' -# ) + } # else { + # stop_glue( + # '"{attr(data, "theory_type")}" is not implemented (possibly yet).' + # ) + # } # Move into its own function - if (!is.null(obs_stat)) { if (!is.null(direction)) { if (direction %in% c("less", "left")) { @@ -599,18 +584,17 @@ visualize_both <- function(data, bins, pvalue_fill, endpoints, ci_fill, ...) { - warning_glue( "Check to make sure the conditions have been met for the theoretical ", "method. `infer` currently does not check these for you." ) - if (!(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) + if (!(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) { stop_glue("Your `calculate`d statistic and the theoretical distribution ", "are on different scales. Use a standardized `stat` instead.") + } if (attr(data, "theory_type") %in% c("Two sample t", "Slope with t")) { - infer_plot <- both_t_plot(data = data, deg_freedom = attr(data, "distr_param"), statistic_text = "t", @@ -621,9 +605,7 @@ visualize_both <- function(data, bins, pvalue_fill = pvalue_fill, endpoints = endpoints, ci_fill = ci_fill) - } - - else if (attr(data, "theory_type") == "ANOVA") { + } else if (attr(data, "theory_type") == "ANOVA") { infer_plot <- both_anova_plot( data = data, deg_freedom_top = attr(data, "distr_param"), @@ -636,10 +618,8 @@ visualize_both <- function(data, bins, pvalue_fill = pvalue_fill, endpoints = endpoints, ci_fill = ci_fill) - } - - else if (attr(data, "theory_type") %in% - c("One sample prop z", "Two sample props z")) { + } else if (attr(data, "theory_type") %in% + c("One sample prop z", "Two sample props z")) { infer_plot <- both_z_plot(data = data, statistic_text = "z", dens_color = dens_color, @@ -649,11 +629,10 @@ visualize_both <- function(data, bins, pvalue_fill = pvalue_fill, endpoints = endpoints, ci_fill = ci_fill) - } - - else if ( + } else if ( attr(data, "theory_type") %in% - c("Chi-square test of indep", "Chi-square Goodness of Fit")) { + c("Chi-square test of indep", "Chi-square Goodness of Fit") + ) { infer_plot <- both_chisq_plot(data = data, deg_freedom = attr(data, "distr_param"), statistic_text = "Chi-Square", @@ -664,10 +643,9 @@ visualize_both <- function(data, bins, pvalue_fill = pvalue_fill, endpoints = endpoints, ci_fill = ci_fill) - } - -# else -# stop_glue('"{attr(data, "theory_type")}" is not implemented yet.') + } # else { + # stop_glue('"{attr(data, "theory_type")}" is not implemented yet.') + # } infer_plot } diff --git a/R/wrappers.R b/R/wrappers.R index d3c034e4..17c54eee 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -37,17 +37,16 @@ t_test <- function(data, formula, # response = NULL, explanatory = NULL, conf_int = TRUE, conf_level = 0.95, ...) { - check_conf_level(conf_level) # Match with old "dot" syntax - if (alternative == "two_sided") + if (alternative == "two_sided") { alternative <- "two.sided" + } ### Only currently working with formula interface # if (hasArg(formula)) { if (!is.null(f_rhs(formula))) { - data[[as.character(f_rhs(formula))]] <- factor(data[[as.character(f_rhs(formula))]], levels = c(order[1], order[2])) @@ -139,12 +138,12 @@ t_stat <- function(data, formula, ...) { #' @export chisq_test <- function(data, formula, # response = NULL, explanatory = NULL, ...) { - - if (is.null(f_rhs(formula))) + if (is.null(f_rhs(formula))) { stop_glue( "`chisq_test()` currently only has functionality for ", "Chi-Square Test of Independence, not for Chi-Square Goodness of Fit." ) + } ## Only currently working with formula interface explanatory_var <- f_rhs(formula) response_var <- f_lhs(formula) @@ -168,7 +167,6 @@ chisq_test <- function(data, formula, # response = NULL, explanatory = NULL, #' #' @export chisq_stat <- function(data, formula, ...) { - if (is.null(f_rhs(formula))) { stop_glue( "`chisq_stat()` currently only has functionality for ", @@ -182,10 +180,10 @@ chisq_stat <- function(data, formula, ...) { } } - check_conf_level <- function(conf_level) { if (class(conf_level) != "numeric" | - conf_level < 0 | - conf_level > 1) + conf_level < 0 | + conf_level > 1) { stop_glue("The `conf_level` argument must be a number between 0 and 1.") + } } diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 34856237..f682ff99 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -226,7 +226,6 @@ test_that("chi-square matches chisq.test value", { p = c(0.8, 0.1, 0.1)))) %>% dplyr::select(replicate, stat = statistic) expect_equal(infer_way, trad_way) - }) test_that("`order` is working", { @@ -256,7 +255,6 @@ test_that("`order` is working", { order = c(">5", "<=4", ">4"))) # order not given expect_error(calculate(gen_iris11, stat = "diff in means")) - }) test_that('success is working for stat = "prop"', { @@ -270,7 +268,6 @@ test_that('success is working for stat = "prop"', { calculate(stat = "prop")) expect_silent(gen_iris12 %>% calculate(stat = "z")) - }) test_that("NULL response gives error", { @@ -348,11 +345,9 @@ test_that("One sample t hypothesis test is working", { generate(reps = 10) %>% calculate(stat = "t") ) - }) test_that("specify done before calculate", { - iris_mean <- iris_tbl %>% dplyr::select(stat = Sepal.Width) expect_error(calculate(iris_mean, stat = "mean")) @@ -392,5 +387,4 @@ test_that("One sample t bootstrap is working", { generate(reps = 10) %>% calculate(stat = "t") ) - }) diff --git a/tests/testthat/test-conf_int.R b/tests/testthat/test-conf_int.R index 6dcc63ae..6413db4a 100644 --- a/tests/testthat/test-conf_int.R +++ b/tests/testthat/test-conf_int.R @@ -13,7 +13,7 @@ iris_calc <- iris_tbl %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("large", "small")) -obs_diff <- iris_tbl %>% +obs_diff <- iris_tbl %>% specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% calculate(stat = "diff in props", order = c("large", "small")) @@ -25,7 +25,7 @@ test_that("basics work", { expect_silent( test_df %>% conf_int() - ) + ) expect_error( test_df %>% conf_int(type = "other") diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index 02ed4b43..e92d5f77 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -2,10 +2,10 @@ context("generate") mtcars <- as.data.frame(mtcars) %>% dplyr::mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) + vs = factor(vs), + am = factor(am), + gear = factor(gear), + carb = factor(carb)) hyp_prop <- mtcars %>% specify(response = am, success = "1") %>% @@ -44,7 +44,6 @@ hyp_anova <- mtcars %>% hypothesize(null = "independence") test_that("cohesion with type argument", { - expect_error(generate(hyp_prop, type = "bootstrap")) expect_error(generate(hyp_diff_in_props, type = "bootstrap")) expect_error(generate(hyp_chisq_gof, type = "bootstrap")) @@ -70,11 +69,9 @@ test_that("cohesion with type argument", { expect_error(generate(hyp_mean, type = "permute")) expect_silent(generate(hyp_diff_in_means, type = "permute")) expect_silent(generate(hyp_anova, type = "permute")) - }) test_that("sensible output", { - expect_equal(nrow(mtcars) * 500, nrow(generate(hyp_prop, reps = 500, type = "simulate"))) expect_silent(generate(hyp_mean, reps = 1, type = "bootstrap")) @@ -154,86 +151,85 @@ test_that("auto `type` works (generate)", { specify(mpg ~ hp) %>% generate(reps = 100) - expect_equal(attr(one_mean, "type"), "bootstrap") - expect_equal(attr(one_nonshift_mean, "type"), "bootstrap") - expect_equal(attr(one_median, "type"), "bootstrap") - expect_equal(attr(one_prop, "type"), "simulate") - expect_equal(attr(two_props, "type"), "permute") - expect_equal(attr(gof_chisq, "type"), "simulate") - expect_equal(attr(indep_chisq, "type"), "permute") - expect_equal(attr(two_means, "type"), "permute") - expect_equal(attr(anova_f, "type"), "permute") - expect_equal(attr(slopes, "type"), "permute") - expect_equal(attr(one_nonshift_prop, "type"), "bootstrap") - expect_equal(attr(two_means_boot, "type"), "bootstrap") - expect_equal(attr(two_props_boot, "type"), "bootstrap") - expect_equal(attr(slope_boot, "type"), "bootstrap") - - expect_error(mtcars %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% - generate(reps = 100, type = "permute")) - - expect_error(mtcars %>% - specify(response = mpg) %>% - generate(reps = 100, type = "simulate")) - - expect_error(mtcars %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% - generate(reps = 100, type = "permute")) - - expect_error(mtcars %>% - specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% - generate(reps = 100, type = "bootstrap")) - - expect_error(mtcars %>% - specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs - hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap")) - - expect_error(mtcars %>% - specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% - generate(reps = 100, type = "bootstrap")) - - expect_error(mtcars %>% - specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "simulate")) - - expect_error(mtcars %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap")) - - expect_error(mtcars %>% - specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "simulate")) - - expect_error(mtcars %>% - specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap")) - - expect_error(mtcars %>% - specify(response = am, success = "1") %>% - generate(reps = 100, type = "simulate")) - - expect_error(mtcars %>% - specify(mpg ~ am) %>% - generate(reps = 100, type = "permute")) - - expect_error(mtcars %>% - specify(am ~ vs, success = "1") %>% - generate(reps = 100, type = "simulate")) - - expect_error(mtcars %>% - specify(mpg ~ hp) %>% - generate(reps = 100, type = "simulate")) + expect_equal(attr(one_mean, "type"), "bootstrap") + expect_equal(attr(one_nonshift_mean, "type"), "bootstrap") + expect_equal(attr(one_median, "type"), "bootstrap") + expect_equal(attr(one_prop, "type"), "simulate") + expect_equal(attr(two_props, "type"), "permute") + expect_equal(attr(gof_chisq, "type"), "simulate") + expect_equal(attr(indep_chisq, "type"), "permute") + expect_equal(attr(two_means, "type"), "permute") + expect_equal(attr(anova_f, "type"), "permute") + expect_equal(attr(slopes, "type"), "permute") + expect_equal(attr(one_nonshift_prop, "type"), "bootstrap") + expect_equal(attr(two_means_boot, "type"), "bootstrap") + expect_equal(attr(two_props_boot, "type"), "bootstrap") + expect_equal(attr(slope_boot, "type"), "bootstrap") + + expect_error(mtcars %>% + specify(response = mpg) %>% # formula alt: mpg ~ NULL + hypothesize(null = "point", mu = 25) %>% + generate(reps = 100, type = "permute")) + + expect_error(mtcars %>% + specify(response = mpg) %>% + generate(reps = 100, type = "simulate")) + + expect_error(mtcars %>% + specify(response = mpg) %>% # formula alt: mpg ~ NULL + hypothesize(null = "point", med = 26) %>% + generate(reps = 100, type = "permute")) + expect_error(mtcars %>% + specify(response = am, success = "1") %>% # formula alt: am ~ NULL + hypothesize(null = "point", p = .25) %>% + generate(reps = 100, type = "bootstrap")) + + expect_error(mtcars %>% + specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs + hypothesize(null = "independence") %>% + generate(reps = 100, type = "bootstrap")) + + expect_error(mtcars %>% + specify(cyl ~ NULL) %>% # alt: response = cyl + hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% + generate(reps = 100, type = "bootstrap")) + + expect_error(mtcars %>% + specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am + hypothesize(null = "independence") %>% + generate(reps = 100, type = "simulate")) + + expect_error(mtcars %>% + specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am + hypothesize(null = "independence") %>% + generate(reps = 100, type = "bootstrap")) + + expect_error(mtcars %>% + specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") %>% + generate(reps = 100, type = "simulate")) + + expect_error(mtcars %>% + specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") %>% + generate(reps = 100, type = "bootstrap")) + + expect_error(mtcars %>% + specify(response = am, success = "1") %>% + generate(reps = 100, type = "simulate")) + + expect_error(mtcars %>% + specify(mpg ~ am) %>% + generate(reps = 100, type = "permute")) + + expect_error(mtcars %>% + specify(am ~ vs, success = "1") %>% + generate(reps = 100, type = "simulate")) + + expect_error(mtcars %>% + specify(mpg ~ hp) %>% + generate(reps = 100, type = "simulate")) }) test_that("mismatches lead to error", { diff --git a/tests/testthat/test-hypothesize.R b/tests/testthat/test-hypothesize.R index 717e0a78..3ffbc004 100644 --- a/tests/testthat/test-hypothesize.R +++ b/tests/testthat/test-hypothesize.R @@ -2,10 +2,10 @@ context("hypothesize") mtcars <- as.data.frame(mtcars) %>% dplyr::mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) + vs = factor(vs), + am = factor(am), + gear = factor(gear), + carb = factor(carb)) one_mean <- mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL @@ -67,7 +67,6 @@ test_that("auto `type` works (hypothesize)", { }) test_that("hypothesize arguments function", { - mtcars_f <- dplyr::mutate(mtcars, cyl = factor(cyl)) mtcars_s <- mtcars_f %>% specify(response = mpg) matrix1 <- matrix(data = NA, nrow = 3, ncol = 3) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index 75e6ff4b..7ea6d232 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -20,7 +20,7 @@ test_that("direction is appropriate", { expect_error( test_df %>% p_value(obs_stat = 0.5, direction = "righ") - ) + ) }) test_that("p_value makes sense", { @@ -29,7 +29,7 @@ test_that("p_value makes sense", { p_value(obs_stat = 0.1, direction = "right") %>% dplyr::pull(), expected = 0.1 - ) + ) expect_gt( iris_calc %>% p_value(obs_stat = -0.1, direction = "greater") %>% diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R index 8e9f9544..a6e0eca9 100644 --- a/tests/testthat/test-rep_sample_n.R +++ b/tests/testthat/test-rep_sample_n.R @@ -10,8 +10,8 @@ test_that("rep_sample_n works", { expect_silent(population %>% rep_sample_n(size = 2, reps = 10)) expect_error(population %>% - rep_sample_n(size = 2, reps = 10, - prob = rep(x = 1/5, times = 100))) + rep_sample_n(size = 2, reps = 10, + prob = rep(x = 1/5, times = 100))) expect_error(population %>% rep_sample_n(size = 2, reps = 10, prob = c(1/2, 1/2))) @@ -21,6 +21,4 @@ test_that("rep_sample_n works", { test_rep <- population %>% rep_sample_n(size = 2, reps = 10) expect_equal(c("replicate", names(population)), names(test_rep)) - - }) diff --git a/tests/testthat/test-specify.R b/tests/testthat/test-specify.R index 3aec3e63..f858e3be 100644 --- a/tests/testthat/test-specify.R +++ b/tests/testthat/test-specify.R @@ -31,7 +31,6 @@ test_that("auto `type` works (specify)", { }) test_that("data argument", { - expect_error(specify(blah ~ cyl)) expect_error(specify(1:3)) expect_is(mtcars, "data.frame") @@ -39,7 +38,6 @@ test_that("data argument", { }) test_that("response and explanatory arguments", { - expect_error(specify(mtcars, response = blah)) expect_error(specify(mtcars, response = "blah")) expect_error(specify(mtcars, formula = mpg ~ blah)) @@ -54,7 +52,6 @@ test_that("response and explanatory arguments", { }) test_that("success argument", { - expect_error(specify(mtcars, response = vs, success = 1)) expect_error(specify(mtcars, response = vs, success = "bogus")) expect_error(specify(mtcars, response = mpg, success = "1")) diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index fce2f025..6973527d 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -63,32 +63,29 @@ test_that("visualize basic tests", { # obs_stat not specified expect_error(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% - visualize(direction = "both") - ) - - expect_silent(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% calculate(stat = "diff in props", order = c(">5", "<=5")) %>% - visualize(direction = "both", obs_stat = obs_diff) - ) + visualize(direction = "both")) - expect_warning(iris_tbl %>% + expect_silent(iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% hypothesize(null = "independence") %>% - calculate(stat = "z", order = c(">5", "<=5")) %>% - visualize(method = "theoretical") - ) + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", + order = c(">5", "<=5")) %>% + visualize(direction = "both", obs_stat = obs_diff)) + + expect_warning(iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, + success = "large") %>% + hypothesize(null = "independence") %>% + calculate(stat = "z", order = c(">5", "<=5")) %>% + visualize(method = "theoretical")) # diff in props and z on different scales expect_error(expect_warning(iris_tbl %>% @@ -103,137 +100,123 @@ test_that("visualize basic tests", { )) expect_silent(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% - visualize() - ) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", - order = c(">5", "<=5")) %>% - visualize(method = "both", direction = "both", - obs_stat = obs_z) - ) + specify(Sepal.Width.Group ~ Sepal.Length.Group, + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", + order = c(">5", "<=5")) %>% + visualize()) expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% calculate(stat = "z", - order = c("<=5", ">5")) %>% + order = c(">5", "<=5")) %>% visualize(method = "both", direction = "both", - obs_stat = -obs_z) - ) + obs_stat = obs_z)) expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large")) %>% - visualize(method = "both", direction = "left", - obs_stat = -obs_t) - ) + specify(Sepal.Width.Group ~ Sepal.Length.Group, + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", + order = c("<=5", ">5")) %>% + visualize(method = "both", direction = "both", + obs_stat = -obs_z)) expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large")) %>% - visualize(method = "theoretical", direction = "left", - obs_stat = -obs_t) - ) + specify(Sepal.Length ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("small", "large")) %>% + visualize(method = "both", direction = "left", + obs_stat = -obs_t)) expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Length.Group) %>% - hypothesize(null = "independence") %>% - visualize(method = "theoretical") - ) + specify(Sepal.Length ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% +# generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("small", "large")) %>% + visualize(method = "theoretical", direction = "left", + obs_stat = -obs_t)) expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - visualize(method = "theoretical") - ) + specify(Sepal.Length ~ Sepal.Length.Group) %>% + hypothesize(null = "independence") %>% + visualize(method = "theoretical")) expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = obs_F, - direction = "right") - ) + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + visualize(method = "theoretical")) expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = obs_F, - direction = "left") - ) + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = obs_F, + direction = "right")) expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = obs_F, - direction = "right") - ) + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = obs_F, + direction = "left")) expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% - # calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = obs_F, - direction = "right") - ) + specify(Sepal.Width.Group ~ Species, + success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "Chisq") %>% + visualize(method = "both", obs_stat = obs_F, + direction = "right")) expect_warning(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = 0.4, - "versicolor" = 0.4, - "virginica" = 0.2)) %>% - generate(reps = 100, type = "simulate") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both") - ) + specify(Sepal.Width.Group ~ Species, + success = "large") %>% + hypothesize(null = "independence") %>% + # calculate(stat = "Chisq") %>% + visualize(method = "theoretical", obs_stat = obs_F, + direction = "right")) + + expect_warning(iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize(null = "point", + p = c("setosa" = 0.4, + "versicolor" = 0.4, + "virginica" = 0.2)) %>% + generate(reps = 100, type = "simulate") %>% + calculate(stat = "Chisq") %>% + visualize(method = "both")) # traditional instead of theoretical expect_error(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = 0.4, - "versicolor" = 0.4, - "virginica" = 0.2)) %>% -# generate(reps = 100, type = "simulate") %>% -# calculate(stat = "Chisq") %>% - visualize(method = "traditional") - ) - - expect_warning(iris_tbl %>% specify(Species ~ NULL) %>% hypothesize(null = "point", p = c("setosa" = 0.4, "versicolor" = 0.4, "virginica" = 0.2)) %>% - # generate(reps = 100, type = "simulate") %>% - # calculate(stat = "Chisq") %>% - visualize(method = "theoretical") - ) +# generate(reps = 100, type = "simulate") %>% +# calculate(stat = "Chisq") %>% + visualize(method = "traditional")) + + expect_warning(iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize(null = "point", + p = c("setosa" = 0.4, + "versicolor" = 0.4, + "virginica" = 0.2)) %>% + # generate(reps = 100, type = "simulate") %>% + # calculate(stat = "Chisq") %>% + visualize(method = "theoretical")) expect_silent(iris_tbl %>% specify(Petal.Width ~ Sepal.Width.Group) %>% @@ -257,24 +240,22 @@ test_that("visualize basic tests", { )) expect_warning(iris_tbl %>% - specify(Petal.Width ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", - order = c("large", "small")) %>% - visualize(method = "theoretical", direction = "both", - obs_stat = obs_diff_mean) - ) + specify(Petal.Width ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in means", + order = c("large", "small")) %>% + visualize(method = "theoretical", direction = "both", + obs_stat = obs_diff_mean)) expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ NULL, success = "small") %>% - hypothesize(null = "point", p = 0.8) %>% -# generate(reps = 100, type = "simulate") %>% -# calculate(stat = "z") %>% - visualize(method = "theoretical", - obs_stat = 2, # Should probably update - direction = "both") - ) + specify(Sepal.Width.Group ~ NULL, success = "small") %>% + hypothesize(null = "point", p = 0.8) %>% +# generate(reps = 100, type = "simulate") %>% +# calculate(stat = "z") %>% + visualize(method = "theoretical", + obs_stat = 2, # Should probably update + direction = "both")) expect_silent(iris_tbl %>% specify(Petal.Width ~ NULL) %>% @@ -282,10 +263,7 @@ test_that("visualize basic tests", { generate(reps = 100, type = "bootstrap") %>% calculate(stat = "mean") %>% visualize(direction = "left", - obs_stat = mean(iris$Petal.Width)) - ) - - + obs_stat = mean(iris$Petal.Width))) }) test_that("get_percentile works", { @@ -297,21 +275,18 @@ test_that("obs_stat as a data.frame works", { specify(Petal.Width ~ NULL) %>% calculate(stat = "mean") expect_silent(iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% - visualize(obs_stat = mean_petal_width) - ) + calculate(stat = "mean") %>% + visualize(obs_stat = mean_petal_width)) mean_df_test <- data.frame(x = c(4.1, 1), y = c(1, 2)) expect_warning(iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% calculate(stat = "mean") %>% - visualize(obs_stat = mean_df_test) - ) - + visualize(obs_stat = mean_df_test)) }) @@ -322,17 +297,15 @@ test_that('method = "both" behaves nicely', { specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% - # calculate(stat = "mean") %>% +# calculate(stat = "mean") %>% visualize(method = "both")) - # expect_warning(iris_tbl %>% specify(Petal.Width ~ Sepal.Length.Group) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 10, type = "bootstrap") %>% calculate(stat = "t", order = c(">5", "<=5")) %>% - visualize(method = "both") - ) + visualize(method = "both")) }) test_that("Traditional right-tailed tests have warning if not right-tailed", { @@ -342,38 +315,32 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = 2, direction = "left") - ) + visualize(method = "both", obs_stat = 2, direction = "left")) expect_warning(iris_tbl %>% specify(Sepal.Length ~ Species) %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% calculate(stat = "F") %>% visualize(method = "both", obs_stat = 2, - direction = "two_sided") - ) + direction = "two_sided")) expect_warning(iris_tbl %>% specify(Sepal.Width.Group ~ Species, success = "large") %>% hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% +# generate(reps = 100, type = "permute") %>% calculate(stat = "Chisq") %>% visualize(method = "theoretical", obs_stat = 2, - direction = "left") - ) + direction = "left")) expect_warning(iris_tbl %>% specify(Sepal.Length ~ Species) %>% hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% +# generate(reps = 100, type = "permute") %>% calculate(stat = "F") %>% visualize(method = "theoretical", obs_stat = 2, - direction = "two_sided") - ) - + direction = "two_sided")) }) test_that("confidence interval plots are working", { - iris_boot <- iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% @@ -402,5 +369,4 @@ test_that("confidence interval plots are working", { expect_warning( iris_boot %>% visualize(obs_stat = 3, endpoints = perc_ci) ) - }) diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R index 81c058a8..028ebaa7 100644 --- a/tests/testthat/test-wrappers.R +++ b/tests/testthat/test-wrappers.R @@ -47,7 +47,7 @@ test_that("_stat functions work", { one_more <- chisq.test( table(iris3$Species, iris3$Sepal.Length.Group) - )$statistic + )$statistic expect_equivalent(another_way, obs_stat_way) expect_equivalent(one_more, dplyr::pull(obs_stat_way)) @@ -66,7 +66,7 @@ test_that("_stat functions work", { expect_silent( iris2 %>% t_stat(Sepal.Width ~ Species, order = c("virginica", "versicolor")) - ) + ) another_way <- iris2 %>% t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor")) %>% dplyr::select(statistic) @@ -89,7 +89,7 @@ test_that("conf_int argument works", { names(iris2 %>% t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor"), conf_int = FALSE)), - c("statistic", "t_df", "p_value", "alternative") + c("statistic", "t_df", "p_value", "alternative") ) expect_equal( names(iris2 %>% @@ -137,5 +137,4 @@ test_that("conf_int argument works", { expect_false( shortcut_no_var_equal == shortcut_var_equal ) - }) From ea190f90b3344687aefa5d200202ee55b6546b39 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 5 Aug 2018 16:53:52 +0300 Subject: [PATCH 05/78] Update long lines (manually). --- R/calculate.R | 218 ++++++----- R/conf_int.R | 14 +- R/generate.R | 55 +-- R/hypothesize.R | 24 +- R/infer.R | 14 +- R/p_value.R | 44 ++- R/rep_sample_n.R | 14 +- R/set_params.R | 72 ++-- R/specify.R | 57 ++- R/utils.R | 99 +++-- R/visualize.R | 555 +++++++++++++++----------- R/wrappers.R | 83 ++-- tests/testthat/test-calculate.R | 229 ++++++----- tests/testthat/test-conf_int.R | 51 +-- tests/testthat/test-generate.R | 89 +++-- tests/testthat/test-hypothesize.R | 68 ++-- tests/testthat/test-p_value.R | 22 +- tests/testthat/test-print.R | 10 +- tests/testthat/test-rep_sample_n.R | 28 +- tests/testthat/test-specify.R | 24 +- tests/testthat/test-visualize.R | 603 +++++++++++++++-------------- tests/testthat/test-wrappers.R | 96 +++-- 22 files changed, 1372 insertions(+), 1097 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 041c4080..488b3db0 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -29,19 +29,9 @@ #' @export calculate <- function(x, stat = c( - "mean", - "median", - "sd", - "prop", - "diff in means", - "diff in medians", - "diff in props", - "Chisq", - "F", - "slope", - "correlation", - "t", - "z" + "mean", "median", "sd", "prop", "diff in means", + "diff in medians", "diff in props", "Chisq", "F", + "slope", "correlation", "t", "z" ), order = NULL, ...) { @@ -63,15 +53,8 @@ calculate <- function(x, x$replicate <- 1L } else if ( stat %in% c( - "mean", - "median", - "sd", - "prop", - "diff in means", - "diff in medians", - "diff in props", - "slope", - "correlation" + "mean", "median", "sd", "prop", "diff in means", "diff in medians", + "diff in props", "slope", "correlation" ) ) { stop_glue( @@ -113,13 +96,13 @@ calculate <- function(x, } # Use S3 method to match correct calculation - result <- calc_impl(structure(stat, class = gsub(" ", "_", stat)), - x, order, ...) + result <- calc_impl( + structure(stat, class = gsub(" ", "_", stat)), x, order, ... + ) if ("NULL" %in% class(result)) { stop_glue( - "Your choice of `stat` is invalid for the ", - "types of variables `specify`ed." + "Your choice of `stat` is invalid for the types of variables `specify`ed." ) } # else { @@ -141,7 +124,6 @@ calc_impl <- function(type, x, order, ...) { UseMethod("calc_impl", type) } - calc_impl.mean <- function(stat, x, order, ...) { col <- base::setdiff(names(x), "replicate") @@ -179,44 +161,45 @@ calc_impl.prop <- function(stat, x, order, ...) { if (is.null(attr(x, "success"))) { stop_glue( - 'To calculate a proportion, the `"success"` argument ', - 'must be provided in `specify()`.' + 'To calculate a proportion, the `"success"` argument must be provided ', + 'in `specify()`.' ) } success <- attr(x, "success") x %>% dplyr::group_by(replicate) %>% - dplyr::summarize(stat = mean(!!sym(col) == success, - # rlang::eval_tidy(col) == rlang::eval_tidy(success), - ...)) + dplyr::summarize( + stat = mean( + # rlang::eval_tidy(col) == rlang::eval_tidy(success), ... + !!sym(col) == success, ... + ) + ) } - calc_impl.F <- function(stat, x, order, ...) { x %>% - dplyr::summarize(stat = stats::anova(stats::lm(!!( - attr(x, "response") - ) ~ !!( - attr(x, "explanatory") - )))$`F value`[1]) + dplyr::summarize( + stat = stats::anova( + stats::lm(!!(attr(x, "response")) ~ !!(attr(x, "explanatory"))) + )$`F value`[1] + ) } - - calc_impl.slope <- function(stat, x, order, ...) { x %>% - dplyr::summarize(stat = stats::coef(stats::lm(!!( - attr(x, "response") - ) ~ !!( - attr(x, "explanatory") - )))[2]) + dplyr::summarize( + stat = stats::coef( + stats::lm(!!(attr(x, "response")) ~ !!(attr(x, "explanatory"))) + )[2] + ) } calc_impl.correlation <- function(stat, x, order, ...) { x %>% - dplyr::summarize(stat = stats::cor(!!attr(x, "explanatory"), - !!attr(x, "response"))) + dplyr::summarize( + stat = stats::cor(!!attr(x, "explanatory"), !!attr(x, "response")) + ) } calc_impl.diff_in_means <- function(stat, x, order, ...) { @@ -224,18 +207,21 @@ calc_impl.diff_in_means <- function(stat, x, order, ...) { dplyr::group_by(replicate, !!attr(x, "explanatory")) %>% dplyr::summarize(xbar = mean(!!attr(x, "response"), ...)) %>% dplyr::group_by(replicate) %>% - dplyr::summarize(stat = xbar[!!(attr(x, "explanatory")) == order[1]] - - xbar[!!(attr(x, "explanatory")) == order[2]]) + dplyr::summarize( + stat = xbar[!!(attr(x, "explanatory")) == order[1]] - + xbar[!!(attr(x, "explanatory")) == order[2]] + ) } calc_impl.diff_in_medians <- function(stat, x, order, ...) { x %>% dplyr::group_by(replicate, !!(attr(x, "explanatory"))) %>% - dplyr::summarize(xtilde = - stats::median(!!attr(x, "response"), ...)) %>% + dplyr::summarize(xtilde = stats::median(!!attr(x, "response"), ...)) %>% dplyr::group_by(replicate) %>% - dplyr::summarize(stat = xtilde[!!(attr(x, "explanatory")) == order[1]] - - xtilde[!!(attr(x, "explanatory")) == order[2]]) + dplyr::summarize( + stat = xtilde[!!(attr(x, "explanatory")) == order[1]] - + xtilde[!!(attr(x, "explanatory")) == order[2]] + ) } calc_impl.Chisq <- function(stat, x, order, ...) { @@ -246,36 +232,47 @@ calc_impl.Chisq <- function(stat, x, order, ...) { if (!is.null(attr(x, "params"))) { # When `hypothesize()` has been called x %>% - dplyr::summarize(stat = stats::chisq.test(table(!!( - attr(x, "response") - )), p = attr(x, "params"))$stat) + dplyr::summarize( + stat = stats::chisq.test( + table(!!(attr(x, "response"))), p = attr(x, "params") + )$stat + ) } else { # Straight from `specify()` - stop_glue("In order to calculate a Chi-Square Goodness of Fit ", - "statistic, hypothesized values must be given for the `p` ", - "parameter in the `hypothesize()` function prior to ", - "using `calculate()`") + stop_glue( + "In order to calculate a Chi-Square Goodness of Fit statistic, ", + "hypothesized values must be given for the `p` parameter in the ", + "`hypothesize()` function prior to using `calculate()`" + ) } } else { # This is not matching with chisq.test # obs_tab <- x %>% # dplyr::filter(replicate == 1) %>% # dplyr::ungroup() %>% - # dplyr::select(!!attr(x, "response"), - # !!(attr(x, "explanatory"))) %>% + # dplyr::select(!!attr(x, "response"), !!(attr(x, "explanatory"))) %>% # table() # expected <- outer(rowSums(obs_tab), colSums(obs_tab)) / n # df_out <- x %>% - # dplyr::summarize(stat = sum((table(!!(attr(x, "response")), - # !!(attr(x, "explanatory"))) - # - expected)^2 / expected, ...)) + # dplyr::summarize( + # stat = sum( + # (table(!!(attr(x, "response")), !!(attr(x, "explanatory"))) - + # expected)^2 / expected, + # ...) + # ) # Chi-Square Test of Independence result <- x %>% - dplyr::do(broom::tidy(suppressWarnings(stats::chisq.test(table( - .[[as.character(attr(x, "response"))]], - .[[as.character(attr(x, "explanatory"))]] - ))))) %>% + dplyr::do( + broom::tidy( + suppressWarnings(stats::chisq.test( + table( + .[[as.character(attr(x, "response"))]], + .[[as.character(attr(x, "explanatory"))]] + ) + )) + ) + ) %>% dplyr::ungroup() if (!is.null(attr(x, "generate"))) { @@ -304,8 +301,10 @@ calc_impl.diff_in_props <- function(stat, x, order, ...) { x %>% dplyr::group_by(replicate, !!attr(x, "explanatory")) %>% dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>% - dplyr::summarize(stat = prop[!!attr(x, "explanatory") == order[1]] - - prop[!!attr(x, "explanatory") == order[2]]) + dplyr::summarize( + stat = prop[!!attr(x, "explanatory") == order[1]] - + prop[!!attr(x, "explanatory") == order[2]] + ) } calc_impl.t <- function(stat, x, order, ...) { @@ -316,33 +315,39 @@ calc_impl.t <- function(stat, x, order, ...) { x <- reorder_explanatory(x, order) df_out <- x %>% - dplyr::summarize(stat = stats::t.test( - !!attr(x, "response") ~ !!attr(x, "explanatory"), ... - )[["statistic"]]) + dplyr::summarize( + stat = stats::t.test( + !!attr(x, "response") ~ !!attr(x, "explanatory"), ... + )[["statistic"]] + ) } # Standardized slope and standardized correlation are commented out # since there currently is no way to specify which one and # the standardization formulas are different. # # Standardized slope - # else if ( (attr(x, "theory_type") == "Slope/correlation with t") && - # stat == "slope"){ + # else if ( + # (attr(x, "theory_type") == "Slope/correlation with t") && stat == "slope" + # ) { # explan_string <- as.character(attr(x, "explanatory")) - # + # # x %>% - # dplyr::summarize(stat = summary(stats::lm( - # !!attr(x, "response") ~ !!attr(x, "explanatory") - # ))[["coefficients"]][explan_string, "t value"]) + # dplyr::summarize( + # stat = summary(stats::lm( + # !!attr(x, "response") ~ !!attr(x, "explanatory") + # ))[["coefficients"]][explan_string, "t value"] + # ) # } - # + # # # Standardized correlation - # else if ( (attr(x, "theory_type") == "Slope/correlation with t") && - # stat == "correlation"){ - # + # else if ( + # (attr(x, "theory_type") == "Slope/correlation with t") && + # stat == "correlation" + # ) { # x %>% - # dplyr::summarize(corr = cor(!!attr(x, "explanatory"), - # !!attr(x, "response")) - # ) %>% + # dplyr::summarize( + # corr = cor(!!attr(x, "explanatory"), !!attr(x, "response")) + # ) %>% # dplyr::mutate(stat = corr * (sqrt(nrow(x) - 2)) / sqrt(1 - corr ^ 2)) # } @@ -352,16 +357,16 @@ calc_impl.t <- function(stat, x, order, ...) { if (is.null(attr(x, "null"))) { x %>% dplyr::summarize( - stat = stats::t.test(!!attr(x, "response"), - ... - )[["statistic"]]) + stat = stats::t.test(!!attr(x, "response"), ...)[["statistic"]] + ) } else { # For hypothesis testing x %>% - dplyr::summarize(stat = stats::t.test( - !!attr(x, "response"), - mu = attr(x, "params"), - ...)[["statistic"]]) + dplyr::summarize( + stat = stats::t.test( + !!attr(x, "response"), mu = attr(x, "params"), ... + )[["statistic"]] + ) } } } @@ -372,29 +377,26 @@ calc_impl.z <- function(stat, x, order, ...) { col <- attr(x, "response") success <- attr(x, "success") - x$explan <- factor(explanatory_variable(x), - levels = c(order[1], order[2])) + x$explan <- factor( + explanatory_variable(x), levels = c(order[1], order[2]) + ) aggregated <- x %>% dplyr::group_by(replicate, explan) %>% dplyr::summarize( group_num = n(), - prop = mean(rlang::eval_tidy(col) == - rlang::eval_tidy(success)), - num_suc = sum(rlang::eval_tidy(col) == - rlang::eval_tidy(success)) + prop = mean(rlang::eval_tidy(col) == rlang::eval_tidy(success)), + num_suc = sum(rlang::eval_tidy(col) == rlang::eval_tidy(success)) ) df_out <- aggregated %>% dplyr::summarize( - diff_prop = prop[explan == order[1]] - - prop[explan == order[2]], + diff_prop = prop[explan == order[1]] - prop[explan == order[2]], total_suc = sum(num_suc), n1 = group_num[1], n2 = group_num[2], p_hat = total_suc / (n1 + n2), - denom = sqrt(p_hat * (1 - p_hat) / n1 - + p_hat * (1 - p_hat) / n2), + denom = sqrt(p_hat * (1 - p_hat) / n1 + p_hat * (1 - p_hat) / n2), stat = diff_prop / denom ) %>% dplyr::select(-total_suc, -n1, -n2) @@ -416,9 +418,11 @@ calc_impl.z <- function(stat, x, order, ...) { # Error given instead df_out <- x %>% - dplyr::summarize(stat = (mean( - rlang::eval_tidy(col) == rlang::eval_tidy(success), ... - ) - p0) / sqrt((p0 * (1 - p0)) / num_rows)) + dplyr::summarize( + stat = ( + mean(rlang::eval_tidy(col) == rlang::eval_tidy(success), ...) - p0 + ) / sqrt((p0 * (1 - p0)) / num_rows) + ) df_out diff --git a/R/conf_int.R b/R/conf_int.R index 6fbadc06..089fdff9 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -40,8 +40,10 @@ conf_int <- function(x, level = 0.95, type = "percentile", check_ci_args(x, level, type, point_estimate) if (type == "percentile") { - ci_vec <- stats::quantile(x[["stat"]], - probs = c((1 - level) / 2, level + (1 - level) / 2)) + ci_vec <- stats::quantile( + x[["stat"]], + probs = c((1 - level) / 2, level + (1 - level) / 2) + ) ci <- tibble::tibble(ci_vec[1], ci_vec[2]) names(ci) <- names(ci_vec) @@ -50,7 +52,8 @@ conf_int <- function(x, level = 0.95, type = "percentile", multiplier <- stats::qnorm(1 - (1 - level) / 2) ci <- tibble::tibble( lower = point_estimate - multiplier * stats::sd(x[["stat"]]), - upper = point_estimate + multiplier * stats::sd(x[["stat"]])) + upper = point_estimate + multiplier * stats::sd(x[["stat"]]) + ) } return(ci) @@ -75,8 +78,9 @@ check_ci_args <- function(x, level, type, point_estimate) { } if (type == "se" && is.null(point_estimate)) { - stop_glue('A numeric value needs to be given for `point_estimate` ', - 'for `type = "se"') + stop_glue( + 'A numeric value needs to be given for `point_estimate` for `type = "se"' + ) } if (type == "se" && is.vector(point_estimate)) { diff --git a/R/generate.R b/R/generate.R index 9f10c488..ed0c475d 100755 --- a/R/generate.R +++ b/R/generate.R @@ -41,23 +41,32 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { attr(x, "generate") <- TRUE - if (type == "permute" && - any(is.null(attr(x, "response")), is.null(attr(x, "explanatory")))) { - stop_glue("Please `specify()` an explanatory and a response variable ", - "when permuting.") + if ( + type == "permute" && + any(is.null(attr(x, "response")), is.null(attr(x, "explanatory"))) + ) { + stop_glue( + "Please `specify()` an explanatory and a response variable when ", + "permuting." + ) } ## Can't get to these anymore with tests -# if (type == "simulate" && -# attr(x, "null") != "point" && -# !(length(grep("p.", names(attr(x, "params")))) >= 1)) { -# stop_glue("Simulation requires a `point` null hypothesis on proportions.") -# } -# if (type == "bootstrap" && -# !(attr(attr(x, "params"), "names") %in% c("mu", "med", "sigma")) && -# !is.null(attr(x, "null")) -# ) { -# stop_glue("Bootstrapping is inappropriate in this setting. ", -# "Consider using `type = permute` or `type = simulate`.") +# if ( +# type == "simulate" && +# attr(x, "null") != "point" && +# !(length(grep("p.", names(attr(x, "params")))) >= 1) +# ) { +# stop_glue("Simulation requires a `point` null hypothesis on proportions.") +# } +# if ( +# type == "bootstrap" && +# !(attr(attr(x, "params"), "names") %in% c("mu", "med", "sigma")) && +# !is.null(attr(x, "null")) +# ) { +# stop_glue( +# "Bootstrapping is inappropriate in this setting. ", +# "Consider using `type = permute` or `type = simulate`." +# ) # } if (type == "bootstrap") { @@ -67,8 +76,10 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { } else if (type == "simulate") { return(simulate(x, reps, ...)) } # else if (!(type %in% c("bootstrap", "permute", "simulate"))) { - # stop_glue("Choose one of the available options for `type`: ", - # '`"bootstrap"`, `"permute"`, or `"simulate"`') + # stop_glue( + # "Choose one of the available options for `type`: ", + # '`"bootstrap"`, `"permute"`, or `"simulate"`' + # ) # } } @@ -154,11 +165,11 @@ permute_once <- function(x, ...) { simulate <- function(x, reps = 1, ...) { fct_levels <- as.character(unique(dplyr::pull(x, !!attr(x, "response")))) - col_simmed <- unlist(replicate(reps, sample(fct_levels, - size = nrow(x), - replace = TRUE, - prob = format_params(x)), - simplify = FALSE)) + col_simmed <- unlist(replicate( + reps, + sample(fct_levels, size = nrow(x), replace = TRUE, prob = format_params(x)), + simplify = FALSE + )) rep_tbl <- tibble::tibble( !!attr(x, "response") := as.factor(col_simmed), diff --git a/R/hypothesize.R b/R/hypothesize.R index af9a6b5c..48cd20dc 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -26,13 +26,17 @@ hypothesize <- function(x, null, ...) { dots <- list(...) if ((null == "point") && (length(dots) == 0)) { - stop_glue("Provide a parameter and a value to check such as `mu = 30` ", - "for the point hypothesis.") + stop_glue( + "Provide a parameter and a value to check such as `mu = 30` for the ", + "point hypothesis." + ) } if ((null == "independence") && (length(dots) > 0)) { - warning_glue("Parameter values are not specified when testing that two ", - "variables are independent.") + warning_glue( + "Parameter values are not specified when testing that two variables are ", + "independent." + ) } if ((length(dots) > 0) && (null == "point")) { @@ -56,8 +60,10 @@ hypothesize <- function(x, null, ...) { if (null == "point") { if (is.factor(response_variable(x))) { if (!any(grepl("p", attr(attr(x, "params"), "names")))) { - stop_glue('Testing one categorical variable requires `p` ', - 'to be used as a parameter.') + stop_glue( + 'Testing one categorical variable requires `p` to be used as a ', + 'parameter.' + ) } } } @@ -70,8 +76,10 @@ hypothesize <- function(x, null, ...) { # !is.factor(response_variable(x)) # & !any(grepl("mu|med|sigma", attr(attr(x, "params"), "names"))) # ) { - # stop_glue('Testing one numerical variable requires one of ', - # '`mu`, `med`, or `sd` to be used as a parameter.') + # stop_glue( + # 'Testing one numerical variable requires one of ', + # '`mu`, `med`, or `sd` to be used as a parameter.' + # ) # } # } diff --git a/R/infer.R b/R/infer.R index 084a98c8..a6621b57 100755 --- a/R/infer.R +++ b/R/infer.R @@ -15,10 +15,12 @@ NULL ## quiets concerns of R CMD check re: the .'s that appear in pipelines ## From Jenny Bryan's googlesheets package if (getRversion() >= "2.15.1") { - utils::globalVariables(c("prop", "stat", "xbar", "xtilde", "x", "..density..", - "statistic", ".", "parameter", "p.value", - "xmin", "xmax", "density", "denom", - "diff_prop", "group_num", "n1", "n2", - "num_suc", "p_hat", "total_suc", "explan", "probs", - "conf.low", "conf.high")) + utils::globalVariables( + c( + "prop", "stat", "xbar", "xtilde", "x", "..density..", "statistic", ".", + "parameter", "p.value", "xmin", "xmax", "density", "denom", "diff_prop", + "group_num", "n1", "n2", "num_suc", "p_hat", "total_suc", "explan", + "probs", "conf.low", "conf.high" + ) + ) } diff --git a/R/p_value.R b/R/p_value.R index fe349ede..09026d02 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -36,12 +36,12 @@ p_value <- function(x, obs_stat, direction) { obs_stat <- check_obs_stat(obs_stat) check_direction(direction) - is_simulation_based <- !is.null(attr(x, "generate")) && - attr(x, "generate") + is_simulation_based <- !is.null(attr(x, "generate")) && attr(x, "generate") if (is_simulation_based) { - pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat, - direction = direction) + pvalue <- simulation_based_p_value( + x = x, obs_stat = obs_stat, direction = direction + ) } ## Theoretical-based p-value @@ -49,16 +49,20 @@ p_value <- function(x, obs_stat, direction) { # else if ( # is.null(attr(x, "theory_type")) || is.null(attr(x, "distr_param")) # ) { - # stop_glue("Attributes have not been set appropriately. ", - # "Check your {{infer}} pipeline again.") + # stop_glue( + # "Attributes have not been set appropriately. ", + # "Check your {{infer}} pipeline again." + # ) # } - + # # if (!("stat" %in% names(x))) { - # # Theoretical distribution - # which_distribution(x, - # theory_type <- attr(x, "theory_type"), - # obs_stat = obs_stat, - # direction = direction) + # # Theoretical distribution + # which_distribution( + # x, + # theory_type = attr(x, "theory_type"), + # obs_stat = obs_stat, + # direction = direction + # ) # } return(pvalue) @@ -81,12 +85,15 @@ simulation_based_p_value <- function(x, obs_stat, direction) { two_sided_p_value <- function(x, obs_stat) { if (stats::median(x$stat) >= obs_stat) { basic_p_value <- get_percentile(x$stat, obs_stat) + - (1 - get_percentile(x$stat, stats::median(x$stat) + - stats::median(x$stat) - obs_stat)) + (1 - get_percentile( + x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat + ) + ) } else { basic_p_value <- 1 - get_percentile(x$stat, obs_stat) + - (get_percentile(x$stat, stats::median(x$stat) + - stats::median(x$stat) - obs_stat)) + get_percentile( + x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat + ) } if (basic_p_value >= 1) { @@ -110,9 +117,8 @@ get_pvalue <- p_value # } # # if (theory_type == "Two sample t") { -# return(pt(q = obs_stat, -# df = param, -# lower.tail = set_lower_tail(direction)) +# return( +# pt(q = obs_stat, df = param, lower.tail = set_lower_tail(direction)) # ) # } # } diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R index 0dc80c99..23e813be 100644 --- a/R/rep_sample_n.R +++ b/R/rep_sample_n.R @@ -70,11 +70,15 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) { dplyr::pull() } - i <- unlist(replicate(reps, sample.int(n, size, replace = replace, - prob = prob), - simplify = FALSE)) - rep_tbl <- cbind(replicate = rep(1:reps, rep(size, reps)), - tbl[i, ]) + i <- unlist(replicate( + reps, + sample.int(n, size, replace = replace, prob = prob), + simplify = FALSE + )) + rep_tbl <- cbind( + replicate = rep(1:reps, rep(size, reps)), + tbl[i, ] + ) rep_tbl <- tibble::as_tibble(rep_tbl) names(rep_tbl)[-1] <- names(tbl) dplyr::group_by(rep_tbl, replicate) diff --git a/R/set_params.R b/R/set_params.R index 827d90b1..4a741843 100755 --- a/R/set_params.R +++ b/R/set_params.R @@ -11,16 +11,18 @@ set_params <- function(x) { } # One variable - if (!is.null(attr(x, "response")) && is.null(attr(x, "explanatory")) && - !is.null(attr(x, "response_type")) && - is.null(attr(x, "explanatory_type"))) { + if ( + !is.null(attr(x, "response")) && is.null(attr(x, "explanatory")) && + !is.null(attr(x, "response_type")) && + is.null(attr(x, "explanatory_type")) + ) { # One mean if (attr(x, "response_type") %in% c("integer", "numeric")) { attr(x, "theory_type") <- "One sample t" attr(x, "distr_param") <- x %>% - dplyr::summarize(df = stats::t.test( - response_variable(x))[["parameter"]] + dplyr::summarize( + df = stats::t.test(response_variable(x))[["parameter"]] ) %>% dplyr::pull() attr(x, "type") <- "bootstrap" @@ -41,14 +43,18 @@ set_params <- function(x) { } # Two variables - if (!is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & - !is.null(attr(x, "response_type")) && - !is.null(attr(x, "explanatory_type"))) { + if ( + !is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & + !is.null(attr(x, "response_type")) && + !is.null(attr(x, "explanatory_type")) + ) { attr(x, "type") <- "bootstrap" # Response is numeric, explanatory is categorical - if (attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") == "factor") { + if ( + attr(x, "response_type") %in% c("integer", "numeric") & + attr(x, "explanatory_type") == "factor" + ) { # Two sample means (t distribution) if (length(levels(explanatory_variable(x))) == 2) { @@ -56,8 +62,10 @@ set_params <- function(x) { # Keep track of Satterthwaite degrees of freedom since lost when # in aggregation w/ calculate()/generate() attr(x, "distr_param") <- x %>% - dplyr::summarize(df = stats::t.test( - !!attr(x, "response") ~ !!attr(x, "explanatory"))[["parameter"]] + dplyr::summarize( + df = stats::t.test( + !!attr(x, "response") ~ !!attr(x, "explanatory") + )[["parameter"]] ) %>% dplyr::pull() } else { @@ -66,43 +74,57 @@ set_params <- function(x) { attr(x, "theory_type") <- "ANOVA" # Get numerator and denominator degrees of freedom attr(x, "distr_param") <- x %>% - dplyr::summarize(df1 = stats::anova(stats::aov( - !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[1] + dplyr::summarize( + df1 = stats::anova(stats::aov( + !!attr(x, "response") ~ !!attr(x, "explanatory") + ))$Df[1] ) %>% dplyr::pull() attr(x, "distr_param2") <- x %>% - dplyr::summarize(df2 = stats::anova(stats::aov( - !!attr(x, "response") ~ !!attr(x, "explanatory")))$Df[2] + dplyr::summarize( + df2 = stats::anova(stats::aov( + !!attr(x, "response") ~ !!attr(x, "explanatory") + ))$Df[2] ) %>% dplyr::pull() } } # Response is categorical, explanatory is categorical - if (attr(x, "response_type") == "factor" & - attr(x, "explanatory_type") == "factor") { + if ( + attr(x, "response_type") == "factor" & + attr(x, "explanatory_type") == "factor" + ) { attr(x, "type") <- "bootstrap" # Two sample proportions (z distribution) # Parameter(s) not needed since standard normal - if (length(levels(response_variable(x))) == 2 & - length(levels(explanatory_variable(x))) == 2) { + if ( + length(levels(response_variable(x))) == 2 & + length(levels(explanatory_variable(x))) == 2 + ) { attr(x, "theory_type") <- "Two sample props z" } else { # >2 sample proportions (chi-square test of indep) attr(x, "theory_type") <- "Chi-square test of indep" attr(x, "distr_param") <- x %>% - dplyr::summarize(df = suppressWarnings(stats::chisq.test( - table(response_variable(x), - explanatory_variable(x)))$parameter)) %>% + dplyr::summarize( + df = suppressWarnings( + stats::chisq.test( + table(response_variable(x), explanatory_variable(x)) + )$parameter + ) + ) %>% dplyr::pull() } } # Response is numeric, explanatory is numeric - if (attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") %in% c("integer", "numeric")) { + if ( + attr(x, "response_type") %in% c("integer", "numeric") & + attr(x, "explanatory_type") %in% c("integer", "numeric") + ) { response_string <- as.character(attr(x, "response")) explanatory_string <- as.character(attr(x, "explanatory")) attr(x, "theory_type") <- "Slope/correlation with t" diff --git a/R/specify.R b/R/specify.R index f0e33193..1293be5c 100755 --- a/R/specify.R +++ b/R/specify.R @@ -61,8 +61,10 @@ specify <- function(x, formula, response = NULL, } if (!(as.character(attr(x, "response")) %in% names(x))) { - stop_glue('The response variable `{attr(x, "response")}` ', - 'cannot be found in this dataframe.') + stop_glue( + 'The response variable `{attr(x, "response")}` cannot be found in this ', + 'dataframe.' + ) } response_col <- rlang::eval_tidy(attr(x, "response"), x) @@ -70,13 +72,20 @@ specify <- function(x, formula, response = NULL, # if there's an explanatory var if (has_explanatory(x)) { if (!as.character(attr(x, "explanatory")) %in% names(x)) { - stop_glue('The explanatory variable `{attr(x, "explanatory")}` ', - 'cannot be found in this dataframe.') + stop_glue( + 'The explanatory variable `{attr(x, "explanatory")}` cannot be found ', + 'in this dataframe.' + ) } - if (identical(as.character(attr(x, "response")), - as.character(attr(x, "explanatory")))) { - stop_glue("The response and explanatory variables must be different ", - "from one another.") + if ( + identical( + as.character(attr(x, "response")), as.character(attr(x, "explanatory")) + ) + ) { + stop_glue( + "The response and explanatory variables must be different from one ", + "another." + ) } explanatory_col <- rlang::eval_tidy(attr(x, "explanatory"), x) if (is.character(explanatory_col)) { @@ -91,22 +100,25 @@ specify <- function(x, formula, response = NULL, stop_glue("`success` must be a string.") } if (!is.factor(response_col)) { - stop_glue("`success` should only be specified if the response is ", - "a categorical variable.") + stop_glue( + "`success` should only be specified if the response is a categorical ", + "variable." + ) } if (!(success %in% levels(response_col))) { stop_glue('{success} is not a valid level of {attr(x, "response")}.') } if (sum(table(response_col) > 0) > 2) { - stop_glue("`success` can only be used if the response has two levels. ", - "`filter()` can reduce a variable to two levels.") + stop_glue( + "`success` can only be used if the response has two levels. ", + "`filter()` can reduce a variable to two levels." + ) } } x <- x %>% select(one_of(c( - as.character((attr(x, "response"))), - as.character(attr(x, "explanatory")) + as.character((attr(x, "response"))), as.character(attr(x, "explanatory")) ))) is_complete <- stats::complete.cases(x) @@ -115,7 +127,6 @@ specify <- function(x, formula, response = NULL, warning_glue("Removed {sum(!is_complete)} rows containing missing values.") } - # To help determine theoretical distribution to plot attr(x, "response_type") <- class(response_variable(x)) @@ -125,14 +136,18 @@ specify <- function(x, formula, response = NULL, attr(x, "explanatory_type") <- class(explanatory_variable(x)) } - if (attr(x, "response_type") == "factor" && is.null(success) && - length(levels(response_variable(x))) == 2 && - (is.null(attr(x, "explanatory_type")) || + if ( + attr(x, "response_type") == "factor" && is.null(success) && + length(levels(response_variable(x))) == 2 && + ( + is.null(attr(x, "explanatory_type")) || (!is.null(attr(x, "explanatory_type")) && - length(levels(explanatory_variable(x))) == 2))) { + length(levels(explanatory_variable(x))) == 2) + ) + ) { stop_glue( - 'A level of the response variable `{attr(x, "response")}` ', - 'needs to be specified for the `success` argument in `specify()`.' + 'A level of the response variable `{attr(x, "response")}` needs to be ', + 'specified for the `success` argument in `specify()`.' ) } diff --git a/R/utils.R b/R/utils.R index 69a77962..26c7d307 100644 --- a/R/utils.R +++ b/R/utils.R @@ -35,9 +35,10 @@ response_variable <- function(x) { } reorder_explanatory <- function(x, order) { - x[[as.character(attr(x, "explanatory"))]] <- - factor(x[[as.character(attr(x, "explanatory"))]], - levels = c(order[1], order[2])) + x[[as.character(attr(x, "explanatory"))]] <- factor( + x[[as.character(attr(x, "explanatory"))]], + levels = c(order[1], order[2]) + ) x } @@ -92,14 +93,18 @@ null_transformer <- function(text, envir) { check_order <- function(x, explanatory_variable, order) { unique_explanatory_variable <- unique(explanatory_variable) if (length(unique_explanatory_variable) != 2) { - stop_glue("Statistic is based on a difference; the explanatory variable ", - "should have two levels.") + stop_glue( + "Statistic is based on a difference; the explanatory variable should ", + "have two levels." + ) } if (is.null(order)) { - stop_glue("Statistic is based on a difference; specify the `order` in ", - "which to subtract the levels of the explanatory variable. ", - '`order = c("first", "second")` means `("first" - "second")`. ', - "Check `?calculate` for details.") + stop_glue( + "Statistic is based on a difference; specify the `order` in which to ", + "subtract the levels of the explanatory variable. ", + '`order = c("first", "second")` means `("first" - "second")`. ', + "Check `?calculate` for details." + ) } else { if (xor(is.na(order[1]), is.na(order[2]))) { stop_glue( @@ -122,16 +127,23 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, stat) { # Could also do `stat <- match.arg(stat)` # but that's not as helpful to beginners with the cryptic error msg - if (!stat %in% c("mean", "median", "sd", "prop", - "diff in means", "diff in medians", "diff in props", - "Chisq", "F", "slope", "correlation", "t", "z")) { - stop_glue("You specified a string for `stat` that is not implemented. ", - "Check your spelling and `?calculate` for current options.") + if ( + !stat %in% c( + "mean", "median", "sd", "prop", "diff in means", "diff in medians", + "diff in props", "Chisq", "F", "slope", "correlation", "t", "z" + ) + ) { + stop_glue( + "You specified a string for `stat` that is not implemented. ", + "Check your spelling and `?calculate` for current options." + ) } if (!("replicate" %in% names(x)) && !is.null(attr(x, "generate"))) { - warning_glue('A `generate()` step was not performed prior to ', - '`calculate()`. Review carefully.') + warning_glue( + 'A `generate()` step was not performed prior to `calculate()`. ', + 'Review carefully.' + ) } if (stat %in% c("F", "slope", "diff in means", "diff in medians")) { @@ -218,8 +230,9 @@ parse_params <- function(dots, x) { sig_ind <- grep("sigma", names(dots)) # error: cannot specify more than one of props, means, medians, or sds - if (length(p_ind) + length(mu_ind) + length(med_ind) - + length(sig_ind) != 1) { + if ( + length(p_ind) + length(mu_ind) + length(med_ind) + length(sig_ind) != 1 + ) { stop_glue( 'Parameter values can be only one of `p`, `mu`, `med`, or `sigma`.' ) @@ -231,22 +244,28 @@ parse_params <- function(dots, x) { if (length(p_ind)) { if (length(dots[[p_ind]]) == 1) { if (attr(x, "null") == "point" && is.null(attr(x, "success"))) { - stop_glue("A point null regarding a proportion requires ", - "that `success` be indicated in `specify()`.") + stop_glue( + "A point null regarding a proportion requires that `success` ", + "be indicated in `specify()`." + ) } if (dots$p < 0 || dots$p > 1) { stop_glue( "The value suggested for `p` is not between 0 and 1, inclusive." ) } - missing_lev <- base::setdiff(unique(pull(x, !!attr(x, "response"))), - attr(x, "success")) + missing_lev <- base::setdiff( + unique(dplyr::pull(x, !!attr(x, "response"))), + attr(x, "success") + ) dots$p <- append(dots$p, 1 - dots$p) names(dots$p) <- c(attr(x, "success"), missing_lev) } else { if (sum(dots$p) != 1) { - stop_glue("Make sure the hypothesized values for the `p` parameters ", - "sum to 1. Please try again.") + stop_glue( + "Make sure the hypothesized values for the `p` parameters sum to 1. ", + "Please try again." + ) } } } @@ -273,8 +292,10 @@ hypothesize_checks <- function(x, null) { } # if (length(null) != 1) { - # stop_glue('Choose between either `"independence"` or `"point"` ', - # 'for `null` argument.') + # stop_glue( + # 'Choose between either `"independence"` or `"point"` for `null` ', + # 'argument.' + # ) # } if (!has_response(x)) { @@ -284,9 +305,11 @@ hypothesize_checks <- function(x, null) { } if (null == "independence" && !has_explanatory(x)) { - stop_glue('Please `specify()` an explanatory and a response variable ', - 'when testing\n', - 'a null hypothesis of `"independence"`.') + stop_glue( + 'Please `specify()` an explanatory and a response variable when ', + 'testing\n', + 'a null hypothesis of `"independence"`.' + ) } } @@ -294,11 +317,13 @@ check_direction <- function(direction = c("less", "greater", "two_sided", "left", "right", "both")) { check_type(direction, is.character) - if (!(direction %in% c("less", "greater", "two_sided", - "left", "right", "both"))) { - stop_glue('The provided value for `direction` is not appropriate. ', - 'Possible values are "less", "greater", "two_sided", ', - '"left", "right", or "both".') + if ( + !(direction %in% c("less", "greater", "two_sided", "left", "right", "both")) + ) { + stop_glue( + 'The provided value for `direction` is not appropriate. Possible values ', + 'are "less", "greater", "two_sided", "left", "right", or "both".' + ) } } @@ -307,8 +332,10 @@ check_obs_stat <- function(obs_stat) { if ("data.frame" %in% class(obs_stat)) { check_type(obs_stat, is.data.frame) if ((nrow(obs_stat) != 1) || (ncol(obs_stat) != 1)) { - warning_glue("The first row and first column value of the given ", - "`obs_stat` will be used.") + warning_glue( + "The first row and first column value of the given `obs_stat` will ", + "be used." + ) } # [[1]] is used in case `stat` is not specified as name of 1x1 diff --git a/R/visualize.R b/R/visualize.R index 121dbf7d..100cd5a3 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -77,7 +77,8 @@ visualize <- function(data, bins = 15, method = "simulation", direction = NULL, endpoints = NULL, endpoints_color = "mediumaquamarine", - ci_fill = "turquoise", ...) { + ci_fill = "turquoise", + ...) { check_type(data, is.data.frame) check_type(bins, is.numeric) check_type(method, is.character) @@ -87,8 +88,10 @@ visualize <- function(data, bins = 15, method = "simulation", if (!is.null(direction)) { check_type(direction, is.character) } - if (is.data.frame(endpoints) && - ((nrow(endpoints) != 1) || (ncol(endpoints) != 2))) { + if ( + is.data.frame(endpoints) && + ((nrow(endpoints) != 1) || (ncol(endpoints) != 2)) + ) { stop_glue( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." ) @@ -104,8 +107,10 @@ visualize <- function(data, bins = 15, method = "simulation", endpoints <- unlist(endpoints) } obs_stat <- check_obs_stat(obs_stat) - if (!is.null(direction) && - (is.null(obs_stat) + is.null(endpoints)) != 1) { + if ( + !is.null(direction) && + (is.null(obs_stat) + is.null(endpoints)) != 1 + ) { stop_glue( "Shading requires either `endpoints` values for a confidence interval ", "or the observed statistic `obs_stat` to be provided." @@ -113,52 +118,64 @@ visualize <- function(data, bins = 15, method = "simulation", } if (method == "simulation") { - infer_plot <- visualize_simulation(data = data, bins = bins, - dens_color = dens_color, - obs_stat = obs_stat, - obs_stat_color = obs_stat_color, - direction = direction, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill, - ...) + infer_plot <- visualize_simulation( + data = data, + bins = bins, + dens_color = dens_color, + obs_stat = obs_stat, + obs_stat_color = obs_stat_color, + direction = direction, + pvalue_fill = pvalue_fill, + endpoints = endpoints, + ci_fill = ci_fill, + ... + ) } else if (method == "theoretical") { - infer_plot <- visualize_theoretical(data = data, - dens_color = dens_color, - obs_stat = obs_stat, - obs_stat_color = obs_stat_color, - direction = direction, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill, - ...) + infer_plot <- visualize_theoretical( + data = data, + dens_color = dens_color, + obs_stat = obs_stat, + obs_stat_color = obs_stat_color, + direction = direction, + pvalue_fill = pvalue_fill, + endpoints = endpoints, + ci_fill = ci_fill, + ... + ) } else if (method == "both") { if (!("stat" %in% names(data))) { - stop_glue('`generate()` and `calculate()` are both required ', - 'to be done prior to `visualize(method = "both")`') + stop_glue( + '`generate()` and `calculate()` are both required to be done prior ', + 'to `visualize(method = "both")`' + ) } - if (("replicate" %in% names(data)) && - length(unique(data$replicate)) < 100) { + if ( + ("replicate" %in% names(data)) && length(unique(data$replicate)) < 100 + ) { warning_glue( "With only {length(unique(data$stat))} replicates, it may be ", "difficult to see the relationship between simulation and theory." ) } - infer_plot <- visualize_both(data = data, bins = bins, - dens_color = dens_color, - obs_stat = obs_stat, - obs_stat_color = obs_stat_color, - direction = direction, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill, - ...) + infer_plot <- visualize_both( + data = data, + bins = bins, + dens_color = dens_color, + obs_stat = obs_stat, + obs_stat_color = obs_stat_color, + direction = direction, + pvalue_fill = pvalue_fill, + endpoints = endpoints, + ci_fill = ci_fill, + ... + ) } else { - stop_glue("Provide `method` with one of three options: ", - '`"theoretical"`, `"both"`, or `"simulation"`. ', - '`"simulation"` is the default.') + stop_glue( + 'Provide `method` with one of three options: `"theoretical"`, `"both"`, ', + 'or `"simulation"`. `"simulation"` is the default.' + ) } if (!is.null(obs_stat)) { # && !is.null(direction) @@ -168,24 +185,31 @@ visualize <- function(data, bins = 15, method = "simulation", if (!is.null(endpoints)) { if (!is.null(obs_stat)) { - warning_glue("Values for both `endpoints` and `obs_stat` were given ", - "when only one should be set. Ignoring `obs_stat` values.") + warning_glue( + "Values for both `endpoints` and `obs_stat` were given when only one ", + "should be set. Ignoring `obs_stat` values." + ) } infer_plot <- infer_plot + - geom_vline(xintercept = endpoints, size = 2, - color = endpoints_color, - ...) + geom_vline( + xintercept = endpoints, size = 2, color = endpoints_color, ... + ) } infer_plot } theory_t_plot <- function(deg_freedom, statistic_text = "t", - dens_color = dens_color, ...) { + dens_color = dens_color, + ...) { ggplot(data.frame(x = c(qt(0.001, deg_freedom), qt(0.999, deg_freedom)))) + - stat_function(mapping = aes(x), fun = dt, args = list(df = deg_freedom), - color = dens_color) + + stat_function( + mapping = aes(x), + fun = dt, + args = list(df = deg_freedom), + color = dens_color + ) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + xlab("") + ylab("") @@ -198,18 +222,22 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", bins, pvalue_fill, endpoints, - ci_fill, ...) { - infer_t_plot <- shade_density_check(data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill) + ci_fill, + ...) { + infer_t_plot <- shade_density_check( + data = data, + obs_stat = obs_stat, + direction = direction, + bins = bins, + endpoints = endpoints, + pvalue_fill = pvalue_fill, + ci_fill = ci_fill + ) infer_t_plot + - stat_function(fun = dt, args = list(df = deg_freedom), - color = dens_color) + + stat_function( + fun = dt, args = list(df = deg_freedom), color = dens_color + ) + ggtitle(glue_null( "Simulation-Based and Theoretical {statistic_text} Null Distributions" )) + @@ -219,12 +247,16 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", theory_anova_plot <- function(deg_freedom_top, deg_freedom_bottom, statistic_text = "F", - dens_color = dens_color, ...) { + dens_color = dens_color, + ...) { ggplot(data.frame(x = c(qf(0.001, deg_freedom_top, deg_freedom_bottom), qf(0.999, deg_freedom_top, deg_freedom_bottom)))) + - stat_function(mapping = aes(x), fun = df, - args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), - color = dens_color) + + stat_function( + mapping = aes(x), + fun = df, + args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), + color = dens_color + ) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + xlab("") + ylab("") @@ -246,18 +278,22 @@ both_anova_plot <- function(data, deg_freedom_top, ) } - infer_anova_plot <- shade_density_check(data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill) + infer_anova_plot <- shade_density_check( + data = data, + obs_stat = obs_stat, + direction = direction, + bins = bins, + endpoints = endpoints, + pvalue_fill = pvalue_fill, + ci_fill = ci_fill + ) infer_anova_plot <- infer_anova_plot + - stat_function(fun = df, - args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), - color = dens_color) + + stat_function( + fun = df, + args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), + color = dens_color + ) + ggtitle(glue_null( "Simulation-Based and Theoretical {statistic_text} Null Distributions" )) + @@ -265,7 +301,7 @@ both_anova_plot <- function(data, deg_freedom_top, ylab("") } -theory_z_plot <- function(statistic_text = "z", dens_color = dens_color, ...) { +theory_z_plot <- function(statistic_text = "z", dens_color = dens_color, ...) { ggplot(data.frame(x = c(qnorm(0.001), qnorm(0.999)))) + stat_function(mapping = aes(x), fun = dnorm, color = dens_color) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + @@ -282,13 +318,15 @@ both_z_plot <- function(data, statistic_text = "z", endpoints, ci_fill, ...) { - infer_z_plot <- shade_density_check(data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill) + infer_z_plot <- shade_density_check( + data = data, + obs_stat = obs_stat, + direction = direction, + bins = bins, + endpoints = endpoints, + pvalue_fill = pvalue_fill, + ci_fill = ci_fill + ) infer_z_plot + stat_function(fun = dnorm, color = dens_color) + ggtitle(glue_null( @@ -300,12 +338,16 @@ both_z_plot <- function(data, statistic_text = "z", theory_chisq_plot <- function(deg_freedom, statistic_text = "Chi-Square", - dens_color = dens_color, ...) { + dens_color = dens_color, + ...) { ggplot(data.frame(x = c(qchisq(0.001, deg_freedom), qchisq(0.999, deg_freedom)))) + - stat_function(mapping = aes(x), fun = dchisq, - args = list(df = deg_freedom), - color = dens_color) + + stat_function( + mapping = aes(x), + fun = dchisq, + args = list(df = deg_freedom), + color = dens_color + ) + ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + xlab("") + ylab("") @@ -325,17 +367,20 @@ both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", "Proceed with caution.") } - infer_chisq_plot <- shade_density_check(data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill) + infer_chisq_plot <- shade_density_check( + data = data, + obs_stat = obs_stat, + direction = direction, + bins = bins, + endpoints = endpoints, + pvalue_fill = pvalue_fill, + ci_fill = ci_fill + ) infer_chisq_plot + - stat_function(fun = dchisq, args = list(df = deg_freedom), - color = dens_color) + + stat_function( + fun = dchisq, args = list(df = deg_freedom), color = dens_color + ) + ggtitle(glue_null( "Simulation-Based and Theoretical {statistic_text} Null Distributions" )) + @@ -350,12 +395,14 @@ shade_density_check <- function(data, density = TRUE, pvalue_fill, endpoints, - ci_fill, ...) { + ci_fill, + ...) { if (is.null(direction) || is.null(obs_stat)) { if (density) { gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + - geom_histogram(bins = bins, color = "white", - mapping = aes(y = ..density..), ...) + geom_histogram( + bins = bins, color = "white", mapping = aes(y = ..density..), ... + ) } # else { # Not sure if needed? Can't get tests to find it # gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + @@ -367,8 +414,9 @@ shade_density_check <- function(data, if (!is.null(direction)) { if (density) { gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + - geom_histogram(bins = bins, color = "white", - mapping = aes(y = ..density..), ...) + geom_histogram( + bins = bins, color = "white", mapping = aes(y = ..density..), ... + ) } else { gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + geom_histogram(bins = bins, color = "white", ...) @@ -376,56 +424,77 @@ shade_density_check <- function(data, if (direction %in% c("less", "left")) { gg_plot <- gg_plot + - geom_rect(fill = pvalue_fill, alpha = 0.01, - aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), - ...) + geom_rect( + fill = pvalue_fill, alpha = 0.01, + aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), + ... + ) } if (direction %in% c("greater", "right")) { gg_plot <- gg_plot + - geom_rect(fill = pvalue_fill, alpha = 0.01, - aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ...) + geom_rect( + fill = pvalue_fill, alpha = 0.01, + aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), + ... + ) } - if (direction %in% c("two_sided", "both") && - obs_stat >= stats::median(data$stat)) { + if ( + direction %in% c("two_sided", "both") && + obs_stat >= stats::median(data$stat) + ) { gg_plot <- gg_plot + - geom_rect(fill = pvalue_fill, alpha = 0.01, - mapping = aes(xmin = obs_stat, xmax = Inf, ymin = 0, - ymax = Inf), ...) + - geom_rect(fill = pvalue_fill, alpha = 0.01, - mapping = aes( - xmin = -Inf, - xmax = stats::quantile( - data$stat, - probs = 1 - get_percentile(data$stat, obs_stat) - ), - ymin = 0, ymax = Inf, ...) + geom_rect( + fill = pvalue_fill, alpha = 0.01, + mapping = aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), + ... + ) + + geom_rect( + fill = pvalue_fill, alpha = 0.01, + mapping = aes( + xmin = -Inf, + xmax = stats::quantile( + data$stat, probs = 1 - get_percentile(data$stat, obs_stat) + ), + ymin = 0, + ymax = Inf, + ... + ) ) } - if (direction %in% c("two_sided", "both") && - obs_stat < stats::median(data$stat)) { + if ( + direction %in% c("two_sided", "both") && + obs_stat < stats::median(data$stat) + ) { gg_plot <- gg_plot + - geom_rect(fill = pvalue_fill, alpha = 0.01, - mapping = aes(xmin = -Inf, xmax = obs_stat, ymin = 0, - ymax = Inf), ...) + - geom_rect(fill = pvalue_fill, alpha = 0.01, - mapping = aes( - xmin = stats::quantile( - data$stat, - probs = 1 - get_percentile(data$stat, obs_stat) - ), xmax = Inf, ymin = 0, ymax = Inf, ...) + geom_rect( + fill = pvalue_fill, alpha = 0.01, + mapping = aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), + ... + ) + + geom_rect( + fill = pvalue_fill, alpha = 0.01, + mapping = aes( + xmin = stats::quantile( + data$stat, probs = 1 - get_percentile(data$stat, obs_stat) + ), + xmax = Inf, + ymin = 0, + ymax = Inf, + ... + ) ) } } if (direction == "between") { gg_plot <- gg_plot + - geom_rect(fill = ci_fill, alpha = 0.01, - aes(xmin = endpoints[1], - xmax = endpoints[2], ymin = 0, ymax = Inf), - ...) + geom_rect( + fill = ci_fill, alpha = 0.01, + aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf), + ... + ) } } gg_plot @@ -439,7 +508,8 @@ visualize_simulation <- function(data, bins, direction, pvalue_fill, endpoints, - ci_fill, ...) { + ci_fill, + ...) { if (is.null(direction)) { if (length(unique(data$stat)) >= 10) { infer_plot <- ggplot(data = data, mapping = aes(x = stat)) + @@ -450,14 +520,16 @@ visualize_simulation <- function(data, bins, xlab("stat") } } else { - infer_plot <- shade_density_check(data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - density = FALSE, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill) + infer_plot <- shade_density_check( + data = data, + obs_stat = obs_stat, + direction = direction, + bins = bins, + density = FALSE, + pvalue_fill = pvalue_fill, + endpoints = endpoints, + ci_fill = ci_fill + ) } infer_plot } @@ -476,21 +548,27 @@ visualize_theoretical <- function(data, "method. {{infer}} currently does not check these for you." ) - if (!is.null(attr(data, "stat")) && - !(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) { + if ( + !is.null(attr(data, "stat")) && + !(attr(data, "stat") %in% c("t", "z", "Chisq", "F")) + ) { warning_glue( "Your `calculate`d statistic and the theoretical distribution are on ", "different scales. Displaying only the theoretical distribution." ) } - if (attr(data, "theory_type") %in% - c("Two sample t", "Slope with t", "One sample t")) { - infer_plot <- theory_t_plot(deg_freedom = attr(data, "distr_param"), - statistic_text = "t", - dens_color = dens_color) + if ( + attr(data, "theory_type") %in% c( + "Two sample t", "Slope with t", "One sample t" + ) + ) { + infer_plot <- theory_t_plot( + deg_freedom = attr(data, "distr_param"), + statistic_text = "t", + dens_color = dens_color + ) } else if (attr(data, "theory_type") == "ANOVA") { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { warning_glue( "F usually corresponds to right-tailed tests. Proceed with caution." @@ -501,23 +579,29 @@ visualize_theoretical <- function(data, deg_freedom_top = attr(data, "distr_param"), deg_freedom_bottom = attr(data, "distr_param2"), statistic_text = "F", - dens_color = dens_color) - } else if (attr(data, "theory_type") %in% - c("One sample prop z", "Two sample props z")) { - - infer_plot <- theory_z_plot(statistic_text = "z", - dens_color = dens_color) - } else if (attr(data, "theory_type") %in% - c("Chi-square test of indep", "Chi-square Goodness of Fit")) { - + dens_color = dens_color + ) + } else if ( + attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z") + ) { + infer_plot <- theory_z_plot(statistic_text = "z", dens_color = dens_color) + } else if ( + attr(data, "theory_type") %in% c( + "Chi-square test of indep", "Chi-square Goodness of Fit" + ) + ) { if (!is.null(direction) && !(direction %in% c("greater", "right"))) { - warning_glue("Chi-square usually corresponds to right-tailed tests. ", - "Proceed with caution.") + warning_glue( + "Chi-square usually corresponds to right-tailed tests. ", + "Proceed with caution." + ) } - infer_plot <- theory_chisq_plot(deg_freedom = attr(data, "distr_param"), - statistic_text = "Chi-Square", - dens_color = dens_color) + infer_plot <- theory_chisq_plot( + deg_freedom = attr(data, "distr_param"), + statistic_text = "Chi-Square", + dens_color = dens_color + ) } # else { # stop_glue( # '"{attr(data, "theory_type")}" is not implemented (possibly yet).' @@ -529,44 +613,55 @@ visualize_theoretical <- function(data, if (!is.null(direction)) { if (direction %in% c("less", "left")) { infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, - alpha = 0.6, - aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), - ...) + geom_rect( + data = data.frame(obs_stat), + fill = pvalue_fill, alpha = 0.6, + aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), + ... + ) } if (direction %in% c("greater", "right")) { infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, - alpha = 0.6, - aes(xmin = obs_stat, - xmax = Inf, ymin = 0, ymax = Inf), - ...) + geom_rect( + data = data.frame(obs_stat), + fill = pvalue_fill, alpha = 0.6, + aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), + ... + ) } # Assuming two-tailed shading will only happen with theoretical # distributions centered at 0 if (direction %in% c("two_sided", "both") && obs_stat >= 0) { infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, - alpha = 0.6, - aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ...) + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, - alpha = 0.6, - aes(xmin = -Inf, xmax = -obs_stat, ymin = 0, ymax = Inf), - ...) + geom_rect( + data = data.frame(obs_stat), + fill = pvalue_fill, alpha = 0.6, + aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), + ... + ) + + geom_rect( + data = data.frame(obs_stat), + fill = pvalue_fill, alpha = 0.6, + aes(xmin = -Inf, xmax = -obs_stat, ymin = 0, ymax = Inf), + ... + ) } if (direction %in% c("two_sided", "both") && obs_stat < 0) { infer_plot <- infer_plot + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, - alpha = 0.6, - aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), - ...) + - geom_rect(data = data.frame(obs_stat), fill = pvalue_fill, - alpha = 0.6, - aes(xmin = -obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ...) + geom_rect( + data = data.frame(obs_stat), + fill = pvalue_fill, alpha = 0.6, + aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), + ... + ) + + geom_rect( + data = data.frame(obs_stat), + fill = pvalue_fill, alpha = 0.6, + aes(xmin = -obs_stat, xmax = Inf, ymin = 0, ymax = Inf), + ... + ) } } } @@ -583,28 +678,33 @@ visualize_both <- function(data, bins, direction, pvalue_fill, endpoints, - ci_fill, ...) { + ci_fill, + ...) { warning_glue( "Check to make sure the conditions have been met for the theoretical ", "method. `infer` currently does not check these for you." ) if (!(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) { - stop_glue("Your `calculate`d statistic and the theoretical distribution ", - "are on different scales. Use a standardized `stat` instead.") + stop_glue( + "Your `calculate`d statistic and the theoretical distribution are on ", + "different scales. Use a standardized `stat` instead." + ) } if (attr(data, "theory_type") %in% c("Two sample t", "Slope with t")) { - infer_plot <- both_t_plot(data = data, - deg_freedom = attr(data, "distr_param"), - statistic_text = "t", - dens_color = dens_color, - bins = bins, - direction = direction, - obs_stat = obs_stat, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill) + infer_plot <- both_t_plot( + data = data, + deg_freedom = attr(data, "distr_param"), + statistic_text = "t", + dens_color = dens_color, + bins = bins, + direction = direction, + obs_stat = obs_stat, + pvalue_fill = pvalue_fill, + endpoints = endpoints, + ci_fill = ci_fill + ) } else if (attr(data, "theory_type") == "ANOVA") { infer_plot <- both_anova_plot( data = data, @@ -617,32 +717,39 @@ visualize_both <- function(data, bins, obs_stat = obs_stat, pvalue_fill = pvalue_fill, endpoints = endpoints, - ci_fill = ci_fill) - } else if (attr(data, "theory_type") %in% - c("One sample prop z", "Two sample props z")) { - infer_plot <- both_z_plot(data = data, - statistic_text = "z", - dens_color = dens_color, - bins = bins, - direction = direction, - obs_stat = obs_stat, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill) + ci_fill = ci_fill + ) } else if ( - attr(data, "theory_type") %in% - c("Chi-square test of indep", "Chi-square Goodness of Fit") + attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z") ) { - infer_plot <- both_chisq_plot(data = data, - deg_freedom = attr(data, "distr_param"), - statistic_text = "Chi-Square", - dens_color = dens_color, - bins = bins, - direction = direction, - obs_stat = obs_stat, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill) + infer_plot <- both_z_plot( + data = data, + statistic_text = "z", + dens_color = dens_color, + bins = bins, + direction = direction, + obs_stat = obs_stat, + pvalue_fill = pvalue_fill, + endpoints = endpoints, + ci_fill = ci_fill + ) + } else if ( + attr(data, "theory_type") %in% c( + "Chi-square test of indep", "Chi-square Goodness of Fit" + ) + ) { + infer_plot <- both_chisq_plot( + data = data, + deg_freedom = attr(data, "distr_param"), + statistic_text = "Chi-Square", + dens_color = dens_color, + bins = bins, + direction = direction, + obs_stat = obs_stat, + pvalue_fill = pvalue_fill, + endpoints = endpoints, + ci_fill = ci_fill + ) } # else { # stop_glue('"{attr(data, "theory_type")}" is not implemented yet.') # } diff --git a/R/wrappers.R b/R/wrappers.R index 17c54eee..713876ee 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -47,59 +47,68 @@ t_test <- function(data, formula, # response = NULL, explanatory = NULL, ### Only currently working with formula interface # if (hasArg(formula)) { if (!is.null(f_rhs(formula))) { - data[[as.character(f_rhs(formula))]] <- - factor(data[[as.character(f_rhs(formula))]], - levels = c(order[1], order[2])) + data[[as.character(f_rhs(formula))]] <- factor( + data[[as.character(f_rhs(formula))]], levels = c(order[1], order[2]) + ) # Two sample case prelim <- data %>% - stats::t.test(formula = formula, data = ., - alternative = alternative, - mu = mu, - conf.level = conf_level, - ...) %>% + stats::t.test( + formula = formula, data = ., + alternative = alternative, + mu = mu, + conf.level = conf_level, + ... + ) %>% broom::glance() } else { # One sample case # To fix weird indexing error convert back to data.frame # (Error: Can't use matrix or array for column indexing) data <- as.data.frame(data) - prelim <- stats::t.test(x = data[[as.character(f_lhs(formula))]], - alternative = alternative, - mu = mu, - conf.level = conf_level, - ...) %>% + prelim <- stats::t.test( + x = data[[as.character(f_lhs(formula))]], + alternative = alternative, + mu = mu, + conf.level = conf_level, + ... + ) %>% broom::glance() } if (conf_int) { results <- prelim %>% - dplyr::select(statistic, t_df = parameter, p_value = p.value, - alternative, - lower_ci = conf.low, - upper_ci = conf.high) + dplyr::select( + statistic, t_df = parameter, p_value = p.value, alternative, + lower_ci = conf.low, upper_ci = conf.high + ) } else { results <- prelim %>% - dplyr::select(statistic, t_df = parameter, p_value = p.value, - alternative) + dplyr::select( + statistic, t_df = parameter, p_value = p.value, alternative + ) } return(results) -# } else { - # data %>% - # stats::t.test(formula = substitute(response) ~ substitute(explanatory), - # data = ., - # alternative = alternative) %>% - # broom::glance() %>% - # dplyr::select(statistic, t_df = parameter, p_value = p.value, - # alternative) - -# t.test(y = data[[as.character(substitute(response))]], -# x = data[[as.character(substitute(explanatory))]], -# alternative = alternative) %>% -# broom::glance() %>% -# select(statistic, t_df = parameter, p_value = p.value, alternative) -# } +# } else { +# data %>% +# stats::t.test( +# formula = substitute(response) ~ substitute(explanatory), data = ., +# alternative = alternative +# ) %>% +# broom::glance() %>% +# dplyr::select( +# statistic, t_df = parameter, p_value = p.value, alternative +# ) +# +# t.test( +# y = data[[as.character(substitute(response))]], +# x = data[[as.character(substitute(explanatory))]], +# alternative = alternative +# ) %>% +# broom::glance() %>% +# select(statistic, t_df = parameter, p_value = p.value, alternative) +# } } #' Tidy t-test statistic @@ -181,9 +190,9 @@ chisq_stat <- function(data, formula, ...) { } check_conf_level <- function(conf_level) { - if (class(conf_level) != "numeric" | - conf_level < 0 | - conf_level > 1) { + if ( + class(conf_level) != "numeric" | conf_level < 0 | conf_level > 1 + ) { stop_glue("The `conf_level` argument must be a number between 0 and 1.") } } diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index f682ff99..e34bc222 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -3,10 +3,10 @@ context("calculate") iris_df <- tibble::as_tibble(iris) iris_tbl <- iris_df %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), + Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") + ) # calculate arguments test_that("x is a tibble", { @@ -29,19 +29,19 @@ test_that("stat argument is appropriate", { }) test_that("response attribute has been set", { - expect_error(tibble::as.tibble(iris) %>% - calculate(stat = "median") - ) + expect_error( + tibble::as.tibble(iris) %>% calculate(stat = "median") + ) }) test_that("variable chosen is of appropriate class (one var problems)", { # One sample chisq example gen_iris1 <- iris %>% specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = .5, - "versicolor" = .25, - "virginica" = .25)) %>% + hypothesize( + null = "point", + p = c("setosa" = .5, "versicolor" = .25, "virginica" = .25) + ) %>% generate(reps = 10, type = "simulate") expect_error(calculate(gen_iris1, stat = "mean")) @@ -106,8 +106,9 @@ test_that("response variable is a factor (two var problems)", { # Species has more than 2 levels gen_iris4 <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% specify(Sepal.Length.Group ~ Species, success = ">5") %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") @@ -117,17 +118,21 @@ test_that("response variable is a factor (two var problems)", { # Check successful diff in props gen_iris4a <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% - dplyr::mutate(Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) %>% + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% + dplyr::mutate( + Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") + ) %>% specify(Sepal.Length.Group ~ Sepal.Width.Group, success = ">5") %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") - expect_silent(calculate(gen_iris4a, stat = "diff in props", - order = c("large", "small"))) - expect_silent(calculate(gen_iris4a, stat = "z", - order = c("large", "small"))) + expect_silent( + calculate(gen_iris4a, stat = "diff in props", order = c("large", "small")) + ) + expect_silent( + calculate(gen_iris4a, stat = "z", order = c("large", "small")) + ) expect_error(calculate(gen_iris4a, stat = "z")) }) @@ -141,17 +146,18 @@ test_that("response variable is numeric (two var problems)", { test_that("two sample mean-type problems are working", { gen_iris5a <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% specify(Sepal.Width ~ Sepal.Length.Group) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") expect_error(calculate(gen_iris5a, stat = "diff in means")) - expect_silent(calculate(gen_iris5a, stat = "diff in means", - order = c(">5", "<=5"))) + expect_silent( + calculate(gen_iris5a, stat = "diff in means", order = c(">5", "<=5")) + ) expect_error(calculate(gen_iris5a, stat = "t")) - expect_silent(calculate(gen_iris5a, stat = "t", - order = c(">5", "<=5"))) + expect_silent(calculate(gen_iris5a, stat = "t", order = c(">5", "<=5"))) }) test_that("properties of tibble passed-in are correct", { @@ -167,21 +173,27 @@ test_that("properties of tibble passed-in are correct", { test_that("order is working for diff in means", { gen_iris7 <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% specify(Sepal.Width ~ Sepal.Length.Group) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") - expect_equal(nrow(calculate(gen_iris7, stat = "diff in means", - order = c(">5", "<=5"))), 10) - expect_equal(ncol(calculate(gen_iris7, stat = "diff in means", - order = c(">5", "<=5"))), 2) + expect_equal( + nrow(calculate(gen_iris7, stat = "diff in means", order = c(">5", "<=5"))), + 10 + ) + expect_equal( + ncol(calculate(gen_iris7, stat = "diff in means", order = c(">5", "<=5"))), + 2 + ) }) test_that("chi-square matches chisq.test value", { gen_iris8 <- iris %>% - dplyr::mutate(Petal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + dplyr::mutate( + Petal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% specify(Petal.Length.Group ~ Species, success = ">5") %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") @@ -189,8 +201,9 @@ test_that("chi-square matches chisq.test value", { # chisq.test way trad_way <- gen_iris8 %>% dplyr::group_by(replicate) %>% - dplyr::do(broom::tidy(stats::chisq.test(table(.$Petal.Length.Group, - .$Species)))) %>% + dplyr::do(broom::tidy( + stats::chisq.test(table(.$Petal.Length.Group, .$Species)) + )) %>% dplyr::ungroup() %>% dplyr::select(replicate, stat = statistic) # Equal not including attributes @@ -198,85 +211,93 @@ test_that("chi-square matches chisq.test value", { gen_iris9 <- iris %>% specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = 1/3, - "versicolor" = 1/3, - "virginica" = 1/3)) %>% + hypothesize( + null = "point", + p = c("setosa" = 1/3, "versicolor" = 1/3, "virginica" = 1/3) + ) %>% generate(reps = 10, type = "simulate") infer_way <- calculate(gen_iris9, stat = "Chisq") # chisq.test way trad_way <- gen_iris9 %>% dplyr::group_by(replicate) %>% - dplyr::do(broom::tidy(stats::chisq.test(table(.$Species)))) %>% + dplyr::do(broom::tidy( + stats::chisq.test(table(.$Species)) + )) %>% dplyr::select(replicate, stat = statistic) expect_equal(infer_way, trad_way) gen_iris9a <- iris %>% specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = 0.8, - "versicolor" = 0.1, - "virginica" = 0.1)) %>% + hypothesize( + null = "point", + p = c("setosa" = 0.8, "versicolor" = 0.1, "virginica" = 0.1) + ) %>% generate(reps = 10, type = "simulate") infer_way <- calculate(gen_iris9a, stat = "Chisq") # chisq.test way trad_way <- gen_iris9a %>% dplyr::group_by(replicate) %>% - dplyr::do(broom::tidy(stats::chisq.test(table(.$Species), - p = c(0.8, 0.1, 0.1)))) %>% + dplyr::do(broom::tidy( + stats::chisq.test(table(.$Species), p = c(0.8, 0.1, 0.1)) + )) %>% dplyr::select(replicate, stat = statistic) expect_equal(infer_way, trad_way) }) test_that("`order` is working", { gen_iris10 <- iris %>% - dplyr::mutate(Petal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + dplyr::mutate( + Petal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% specify(Petal.Width ~ Petal.Length.Group) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") - expect_error(calculate(gen_iris10, stat = "diff in means", - order = c(TRUE, FALSE))) + expect_error( + calculate(gen_iris10, stat = "diff in means", order = c(TRUE, FALSE)) + ) gen_iris11 <- iris %>% - dplyr::mutate(Petal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + dplyr::mutate( + Petal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% specify(Petal.Width ~ Petal.Length.Group) %>% generate(reps = 10, type = "bootstrap") - expect_error(calculate(gen_iris11, stat = "diff in medians", - order = ">5")) - expect_error(calculate(gen_iris11, stat = "diff in medians", - order = c(NA, ">5"))) - expect_error(calculate(gen_iris11, stat = "diff in medians", - order = c(">5", "<=4"))) - expect_silent(calculate(gen_iris11, stat = "diff in medians", - order = c(">5", "<=5"))) - expect_error(calculate(gen_iris11, stat = "diff in means", - order = c(">5", "<=4", ">4"))) + expect_error( + calculate(gen_iris11, stat = "diff in medians", order = ">5") + ) + expect_error( + calculate(gen_iris11, stat = "diff in medians", order = c(NA, ">5")) + ) + expect_error( + calculate(gen_iris11, stat = "diff in medians", order = c(">5", "<=4")) + ) + expect_silent( + calculate(gen_iris11, stat = "diff in medians", order = c(">5", "<=5")) + ) + expect_error( + calculate(gen_iris11, stat = "diff in means", order = c(">5", "<=4", ">4")) + ) # order not given expect_error(calculate(gen_iris11, stat = "diff in means")) }) test_that('success is working for stat = "prop"', { gen_iris12 <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) %>% + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% specify(Sepal.Length.Group ~ NULL, success = ">5") %>% hypothesize(null = "point", p = 0.3) %>% generate(reps = 10, type = "simulate") - expect_silent(gen_iris12 %>% - calculate(stat = "prop")) - expect_silent(gen_iris12 %>% - calculate(stat = "z")) + expect_silent(gen_iris12 %>% calculate(stat = "prop")) + expect_silent(gen_iris12 %>% calculate(stat = "z")) }) test_that("NULL response gives error", { iris_improp <- tibble::as_tibble(iris) %>% dplyr::select(Sepal.Width, Sepal.Length) - expect_error( - iris_improp %>% calculate(stat = "mean") - ) + expect_error(iris_improp %>% calculate(stat = "mean")) }) test_that("Permute F test works", { @@ -298,43 +319,45 @@ test_that("Permute slope/correlation test works", { test_that("order being given when not needed gives warning", { gen_iris15 <- iris %>% - dplyr::mutate(Petal.Length.Group = - dplyr::if_else(Sepal.Length > 4, ">4", "<=4")) %>% + dplyr::mutate( + Petal.Length.Group = dplyr::if_else(Sepal.Length > 4, ">4", "<=4") + ) %>% specify(Petal.Length.Group ~ Species, success = ">4") %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") - expect_warning(calculate(gen_iris15, stat = "Chisq", - order = c("setosa", "virginica"))) + expect_warning( + calculate(gen_iris15, stat = "Chisq", order = c("setosa", "virginica")) + ) }) ## Breaks oldrel build. Commented out for now. # test_that("warning given if calculate without generate", { -# expect_warning(iris %>% -# specify(Species ~ NULL) %>% -# hypothesize(null = "point", -# p = c("setosa" = 0.4, -# "versicolor" = 0.4, -# "virginica" = 0.2)) %>% -# #generate(reps = 10, type = "simulate") %>% -# calculate(stat = "Chisq") +# expect_warning( +# iris %>% +# specify(Species ~ NULL) %>% +# hypothesize( +# null = "point", +# p = c("setosa" = 0.4, "versicolor" = 0.4, "virginica" = 0.2) +# ) %>% +# # generate(reps = 10, type = "simulate") %>% +# calculate(stat = "Chisq") # ) -# # }) test_that("specify() %>% calculate() works", { - expect_silent(iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - calculate(stat = "mean") + expect_silent( + iris_tbl %>% specify(Petal.Width ~ NULL) %>% calculate(stat = "mean") ) - expect_error(iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% - calculate(stat = "mean") + expect_error( + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 4) %>% + calculate(stat = "mean") ) - expect_error(iris_tbl %>% - specify(Species ~ NULL) %>% - calculate(stat = "Chisq")) + expect_error( + iris_tbl %>% specify(Species ~ NULL) %>% calculate(stat = "Chisq") + ) }) test_that("One sample t hypothesis test is working", { @@ -352,23 +375,21 @@ test_that("specify done before calculate", { dplyr::select(stat = Sepal.Width) expect_error(calculate(iris_mean, stat = "mean")) - iris_prop <- iris_tbl %>% - dplyr::select(Sepal.Length.Group) + iris_prop <- iris_tbl %>% dplyr::select(Sepal.Length.Group) attr(iris_prop, "response") <- "Sepal.Length.Group" expect_error(calculate(iris_prop, stat = "prop")) }) test_that("chisq GoF has params specified for observed stat", { - no_params <- iris_df %>% - specify(response = Species) + no_params <- iris_df %>% specify(response = Species) expect_error(calculate(no_params, stat = "Chisq")) params <- iris_df %>% specify(response = Species) %>% - hypothesize(null = "point", - p = c("setosa" = .5, - "versicolor" = .25, - "virginica" = .25)) + hypothesize( + null = "point", + p = c("setosa" = .5, "versicolor" = .25, "virginica" = .25) + ) expect_silent(calculate(params, stat = "Chisq")) }) diff --git a/tests/testthat/test-conf_int.R b/tests/testthat/test-conf_int.R index 6413db4a..60da5354 100644 --- a/tests/testthat/test-conf_int.R +++ b/tests/testthat/test-conf_int.R @@ -1,53 +1,32 @@ context("conf_int") iris_tbl <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), + Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") + ) iris_calc <- iris_tbl %>% - specify(Sepal.Length.Group ~ Sepal.Width.Group, - success = "<=5") %>% + specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("large", "small")) obs_diff <- iris_tbl %>% - specify(Sepal.Length.Group ~ Sepal.Width.Group, - success = "<=5") %>% + specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% calculate(stat = "diff in props", order = c("large", "small")) set.seed(2018) test_df <- tibble::tibble(stat = rnorm(100)) test_that("basics work", { - expect_silent( - test_df %>% - conf_int() - ) - expect_error( - test_df %>% - conf_int(type = "other") - ) - expect_error( - test_df %>% - conf_int(level = 1.2) - ) - expect_error( - test_df %>% - conf_int(point_estimate = "help") - ) - expect_silent( - iris_calc %>% get_ci(type = "se", point_estimate = 4) - ) - expect_silent( - iris_calc %>% get_ci(type = "se", point_estimate = obs_diff) - ) - expect_error( - iris_calc %>% get_ci(type = "se", point_estimate = "error") - ) - expect_error( - iris_calc %>% get_ci(type = "se") - ) + expect_silent(test_df %>% conf_int()) + expect_error(test_df %>% conf_int(type = "other")) + expect_error(test_df %>% conf_int(level = 1.2)) + expect_error(test_df %>% conf_int(point_estimate = "help")) + + expect_silent(iris_calc %>% get_ci(type = "se", point_estimate = 4)) + expect_silent(iris_calc %>% get_ci(type = "se", point_estimate = obs_diff)) + expect_error(iris_calc %>% get_ci(type = "se", point_estimate = "error")) + expect_error(iris_calc %>% get_ci(type = "se")) }) diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index e92d5f77..c2de7d46 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -1,11 +1,10 @@ context("generate") mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) + dplyr::mutate( + cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), + carb = factor(carb) + ) hyp_prop <- mtcars %>% specify(response = am, success = "1") %>% @@ -72,19 +71,19 @@ test_that("cohesion with type argument", { }) test_that("sensible output", { - expect_equal(nrow(mtcars) * 500, - nrow(generate(hyp_prop, reps = 500, type = "simulate"))) + expect_equal( + nrow(mtcars) * 500, nrow(generate(hyp_prop, reps = 500, type = "simulate")) + ) expect_silent(generate(hyp_mean, reps = 1, type = "bootstrap")) expect_error(generate(hyp_mean, reps = 1, type = "other")) }) test_that("auto `type` works (generate)", { mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) + dplyr::mutate( + cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), + carb = factor(carb) + ) one_mean <- mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL @@ -169,36 +168,43 @@ test_that("auto `type` works (generate)", { expect_error(mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", mu = 25) %>% - generate(reps = 100, type = "permute")) + generate(reps = 100, type = "permute") + ) expect_error(mtcars %>% specify(response = mpg) %>% - generate(reps = 100, type = "simulate")) + generate(reps = 100, type = "simulate") + ) expect_error(mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", med = 26) %>% - generate(reps = 100, type = "permute")) + generate(reps = 100, type = "permute") + ) expect_error(mtcars %>% specify(response = am, success = "1") %>% # formula alt: am ~ NULL hypothesize(null = "point", p = .25) %>% - generate(reps = 100, type = "bootstrap")) + generate(reps = 100, type = "bootstrap") + ) expect_error(mtcars %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap")) + generate(reps = 100, type = "bootstrap") + ) expect_error(mtcars %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% - generate(reps = 100, type = "bootstrap")) + generate(reps = 100, type = "bootstrap") + ) expect_error(mtcars %>% specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am hypothesize(null = "independence") %>% - generate(reps = 100, type = "simulate")) + generate(reps = 100, type = "simulate") + ) expect_error(mtcars %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am @@ -208,43 +214,54 @@ test_that("auto `type` works (generate)", { expect_error(mtcars %>% specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% - generate(reps = 100, type = "simulate")) + generate(reps = 100, type = "simulate") + ) expect_error(mtcars %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap")) + generate(reps = 100, type = "bootstrap") + ) expect_error(mtcars %>% specify(response = am, success = "1") %>% - generate(reps = 100, type = "simulate")) + generate(reps = 100, type = "simulate") + ) expect_error(mtcars %>% specify(mpg ~ am) %>% - generate(reps = 100, type = "permute")) + generate(reps = 100, type = "permute") + ) expect_error(mtcars %>% specify(am ~ vs, success = "1") %>% - generate(reps = 100, type = "simulate")) + generate(reps = 100, type = "simulate") + ) expect_error(mtcars %>% specify(mpg ~ hp) %>% - generate(reps = 100, type = "simulate")) + generate(reps = 100, type = "simulate") + ) }) test_that("mismatches lead to error", { expect_error(mtcars %>% generate(reps = 10, type = "permute")) - expect_error(mtcars %>% specify(am ~ NULL, success = "1") %>% - hypothesize(null = "independence", p = c("1" = 0.5)) %>% - generate(reps = 100, type = "simulate")) - expect_error(mtcars %>% - specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, - "8" = .25)) %>% - generate(reps = 100, type = "bootstrap")) - expect_error(mtcars %>% - specify(mpg ~ hp) %>% - generate(reps = 100, type = "other")) + expect_error( + mtcars %>% + specify(am ~ NULL, success = "1") %>% + hypothesize(null = "independence", p = c("1" = 0.5)) %>% + generate(reps = 100, type = "simulate") + ) + expect_error( + mtcars %>% + specify(cyl ~ NULL) %>% # alt: response = cyl + hypothesize( + null = "point", p = c("4" = .5, "6" = .25, "8" = .25) + ) %>% + generate(reps = 100, type = "bootstrap")) + expect_error( + mtcars %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "other") + ) }) test_that("generate() handles `NULL` value of `type`", { diff --git a/tests/testthat/test-hypothesize.R b/tests/testthat/test-hypothesize.R index 3ffbc004..22d80e8a 100644 --- a/tests/testthat/test-hypothesize.R +++ b/tests/testthat/test-hypothesize.R @@ -1,11 +1,10 @@ context("hypothesize") mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) + dplyr::mutate( + cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), + carb = factor(carb) + ) one_mean <- mtcars %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL @@ -80,34 +79,47 @@ test_that("hypothesize arguments function", { expect_error(mtcars_s %>% hypothesize(null = "independence")) expect_error(mtcars_s %>% hypothesize(null = "point")) # Produces error on win-build -# expect_warning(mtcars_s %>% -# hypothesize(null = c("point", "independence"), mu = 3)) - - expect_error(mtcars %>% dplyr::select(vs) %>% - hypothesize(null = "point", mu = 1)) - - expect_error(mtcars %>% specify(response = vs) %>% - hypothesize(null = "point", mu = 1)) - - expect_error(mtcars %>% specify(response = vs, success = "1") %>% - hypothesize(null = "point", p = 1.1)) - expect_error(mtcars %>% specify(response = vs, success = "1") %>% - hypothesize(null = "point", p = -23)) - - expect_error(mtcars_s %>% - hypothesize(null = "point", - p = c("4" = .2, "6" = .25, "8" = .25))) +# expect_warning( +# mtcars_s %>% hypothesize(null = c("point", "independence"), mu = 3) +# ) + + expect_error( + mtcars %>% dplyr::select(vs) %>% hypothesize(null = "point", mu = 1) + ) + + expect_error( + mtcars %>% specify(response = vs) %>% hypothesize(null = "point", mu = 1) + ) + + expect_error( + mtcars %>% + specify(response = vs, success = "1") %>% + hypothesize(null = "point", p = 1.1) + ) + expect_error( + mtcars %>% + specify(response = vs, success = "1") %>% + hypothesize(null = "point", p = -23) + ) + + expect_error( + mtcars_s %>% + hypothesize( + null = "point", p = c("4" = .2, "6" = .25, "8" = .25) + ) + ) expect_error(mtcars_s %>% hypothesize(null = "point", p = 0.2)) - expect_warning(mtcars %>% specify(mpg ~ vs) %>% - hypothesize(null = "independence", p = 0.5)) + expect_warning( + mtcars %>% + specify(mpg ~ vs) %>% + hypothesize(null = "independence", p = 0.5) + ) expect_error(mtcars_s %>% hypothesize()) }) test_that("params correct", { - expect_error(hypothesize(one_prop_specify, - null = "point", mu = 2)) - expect_error(hypothesize(one_mean_specify, - null = "point", mean = 0.5)) + expect_error(hypothesize(one_prop_specify, null = "point", mu = 2)) + expect_error(hypothesize(one_mean_specify, null = "point", mean = 0.5)) }) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index 7ea6d232..a24951a5 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -1,14 +1,13 @@ context("p_value") iris_tbl <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), + Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") + ) iris_calc <- iris_tbl %>% - specify(Sepal.Length.Group ~ Sepal.Width.Group, - success = "<=5") %>% + specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("large", "small")) @@ -17,10 +16,7 @@ set.seed(2018) test_df <- tibble::tibble(stat = rnorm(100)) test_that("direction is appropriate", { - expect_error( - test_df %>% - p_value(obs_stat = 0.5, direction = "righ") - ) + expect_error(test_df %>% p_value(obs_stat = 0.5, direction = "righ")) }) test_that("p_value makes sense", { @@ -38,8 +34,7 @@ test_that("p_value makes sense", { ) expect_equal( iris_calc %>% - p_value(obs_stat = median(iris_calc$stat), - direction = "both") %>% + p_value(obs_stat = median(iris_calc$stat), direction = "both") %>% dplyr::pull(), expected = 1 ) @@ -57,8 +52,7 @@ test_that("p_value makes sense", { ) expect_equal( iris_calc %>% - get_pvalue(obs_stat = median(iris_calc$stat) + 1, - direction = "two_sided") %>% + get_pvalue(obs_stat = median(iris_calc$stat) + 1, direction = "two_sided") %>% dplyr::pull(), expected = 0 ) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index 8933bde3..118287bc 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -1,8 +1,10 @@ context("print") test_that("print works", { - expect_output(print(iris %>% - specify(Sepal.Length ~ Sepal.Width) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute"))) + expect_output(print( + iris %>% + specify(Sepal.Length ~ Sepal.Width) %>% + hypothesize(null = "independence") %>% + generate(reps = 10, type = "permute") + )) }) diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R index a6e0eca9..ef9fae6d 100644 --- a/tests/testthat/test-rep_sample_n.R +++ b/tests/testthat/test-rep_sample_n.R @@ -7,18 +7,20 @@ population <- tibble::data_frame( ) test_that("rep_sample_n works", { - expect_silent(population %>% - rep_sample_n(size = 2, reps = 10)) - expect_error(population %>% - rep_sample_n(size = 2, reps = 10, - prob = rep(x = 1/5, times = 100))) - expect_error(population %>% - rep_sample_n(size = 2, reps = 10, - prob = c(1/2, 1/2))) - expect_error(population %>% - rep_sample_n(size = 2, reps = 10, - prob = c(0.25, 1/5, 1/5, 1/5, 0.15))) - test_rep <- population %>% - rep_sample_n(size = 2, reps = 10) + expect_silent(population %>% rep_sample_n(size = 2, reps = 10)) + expect_error( + population %>% + rep_sample_n(size = 2, reps = 10, prob = rep(x = 1/5, times = 100)) + ) + expect_error( + population %>% + rep_sample_n(size = 2, reps = 10, prob = c(1/2, 1/2)) + ) + expect_error( + population %>% + rep_sample_n(size = 2, reps = 10, prob = c(0.25, 1/5, 1/5, 1/5, 0.15)) + ) + + test_rep <- population %>% rep_sample_n(size = 2, reps = 10) expect_equal(c("replicate", names(population)), names(test_rep)) }) diff --git a/tests/testthat/test-specify.R b/tests/testthat/test-specify.R index f858e3be..001f14ec 100644 --- a/tests/testthat/test-specify.R +++ b/tests/testthat/test-specify.R @@ -1,26 +1,20 @@ context("specify") mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) + dplyr::mutate( + cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), + carb = factor(carb) + ) -one_nonshift_mean <- mtcars %>% - specify(response = mpg) +one_nonshift_mean <- mtcars %>% specify(response = mpg) -one_nonshift_prop <- mtcars %>% - specify(response = am, success = "1") +one_nonshift_prop <- mtcars %>% specify(response = am, success = "1") -two_means_boot <- mtcars %>% - specify(mpg ~ am) +two_means_boot <- mtcars %>% specify(mpg ~ am) -two_props_boot <- mtcars %>% - specify(am ~ vs, success = "1") +two_props_boot <- mtcars %>% specify(am ~ vs, success = "1") -slope_boot <- mtcars %>% - specify(mpg ~ hp) +slope_boot <- mtcars %>% specify(mpg ~ hp) test_that("auto `type` works (specify)", { expect_equal(attr(one_nonshift_mean, "type"), "bootstrap") diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 6973527d..b69a6b39 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -2,7 +2,6 @@ context("visualize") library(dplyr) - Sepal.Width_resamp <- iris %>% specify(Sepal.Width ~ NULL) %>% hypothesize(null = "point", med = 3) %>% @@ -10,13 +9,12 @@ Sepal.Width_resamp <- iris %>% calculate(stat = "median") iris_tbl <- tibble::as_tibble(iris) %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = - dplyr::if_else(Sepal.Width > 3, "large", "small")) + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), + Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") + ) -obs_slope <- lm(Sepal.Length ~ Sepal.Width, - data = iris_tbl) %>% +obs_slope <- lm(Sepal.Length ~ Sepal.Width, data = iris_tbl) %>% broom::tidy() %>% dplyr::filter(term == "Sepal.Width") %>% dplyr::select(estimate) %>% @@ -28,11 +26,14 @@ obs_diff <- iris_tbl %>% summarize(diff(prop)) %>% pull() -obs_z <- sqrt(stats::prop.test(x = table(iris_tbl$Sepal.Length.Group, - iris_tbl$Sepal.Width.Group), - n = nrow(iris_tbl), - alternative = "two.sided", - correct = FALSE)$statistic) +obs_z <- sqrt( + stats::prop.test( + x = table(iris_tbl$Sepal.Length.Group, iris_tbl$Sepal.Width.Group), + n = nrow(iris_tbl), + alternative = "two.sided", + correct = FALSE + )$statistic +) obs_diff_mean <- iris_tbl %>% group_by(Sepal.Length.Group) %>% @@ -41,8 +42,7 @@ obs_diff_mean <- iris_tbl %>% pull() obs_t <- iris_tbl %>% - t_stat(Sepal.Width ~ Sepal.Length.Group, - order = c(">5", "<=5")) + t_stat(Sepal.Width ~ Sepal.Length.Group, order = c(">5", "<=5")) obs_F <- anova( aov(formula = Sepal.Width ~ Species, data = iris_tbl) @@ -51,219 +51,238 @@ obs_F <- anova( test_that("visualize basic tests", { expect_silent(visualize(Sepal.Width_resamp)) - expect_error( - Sepal.Width_resamp %>% visualize(bins = "yep") + expect_error(Sepal.Width_resamp %>% visualize(bins = "yep")) + expect_silent( + iris_tbl %>% + specify(Sepal.Length ~ Sepal.Width) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "slope") %>% + visualize(obs_stat = obs_slope, direction = "right") ) - expect_silent(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Width) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") %>% - visualize(obs_stat = obs_slope, direction = "right")) # obs_stat not specified - expect_error(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% - visualize(direction = "both")) - - expect_silent(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% - visualize(direction = "both", obs_stat = obs_diff)) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - calculate(stat = "z", order = c(">5", "<=5")) %>% - visualize(method = "theoretical")) + expect_error( + iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", order = c(">5", "<=5")) %>% + visualize(direction = "both") + ) + + expect_silent( + iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", order = c(">5", "<=5")) %>% + visualize(direction = "both", obs_stat = obs_diff) + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + calculate(stat = "z", order = c(">5", "<=5")) %>% + visualize(method = "theoretical") + ) # diff in props and z on different scales - expect_error(expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% - visualize(method = "both", direction = "both", - obs_stat = obs_diff) - )) - - expect_silent(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) %>% - visualize()) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", - order = c(">5", "<=5")) %>% - visualize(method = "both", direction = "both", - obs_stat = obs_z)) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", - order = c("<=5", ">5")) %>% - visualize(method = "both", direction = "both", - obs_stat = -obs_z)) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large")) %>% - visualize(method = "both", direction = "left", - obs_stat = -obs_t)) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% -# generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("small", "large")) %>% - visualize(method = "theoretical", direction = "left", - obs_stat = -obs_t)) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Sepal.Length.Group) %>% - hypothesize(null = "independence") %>% - visualize(method = "theoretical")) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - visualize(method = "theoretical")) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = obs_F, - direction = "right")) - - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = obs_F, - direction = "left")) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = obs_F, - direction = "right")) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% - # calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = obs_F, - direction = "right")) - - expect_warning(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = 0.4, - "versicolor" = 0.4, - "virginica" = 0.2)) %>% - generate(reps = 100, type = "simulate") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both")) + expect_error( + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", order = c(">5", "<=5")) %>% + visualize(method = "both", direction = "both", obs_stat = obs_diff) + ) + ) + + expect_silent( + iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in props", order = c(">5", "<=5")) %>% + visualize() + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", order = c(">5", "<=5")) %>% + visualize(method = "both", direction = "both", obs_stat = obs_z) + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", order = c("<=5", ">5")) %>% + visualize(method = "both", direction = "both", obs_stat = -obs_z) + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("small", "large")) %>% + visualize(method = "both", direction = "left", obs_stat = -obs_t) + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% +# generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("small", "large")) %>% + visualize(method = "theoretical", direction = "left", obs_stat = -obs_t) + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Sepal.Length.Group) %>% + hypothesize(null = "independence") %>% + visualize(method = "theoretical") + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + visualize(method = "theoretical") + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = obs_F, direction = "right") + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = obs_F, direction = "left") + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Species, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "Chisq") %>% + visualize(method = "both", obs_stat = obs_F, direction = "right") + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Species, success = "large") %>% + hypothesize(null = "independence") %>% +# calculate(stat = "Chisq") %>% + visualize(method = "theoretical", obs_stat = obs_F, direction = "right") + ) + + expect_warning( + iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize( + null = "point", + p = c("setosa" = 0.4, "versicolor" = 0.4, "virginica" = 0.2) + ) %>% + generate(reps = 100, type = "simulate") %>% + calculate(stat = "Chisq") %>% + visualize(method = "both")) # traditional instead of theoretical - expect_error(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = 0.4, - "versicolor" = 0.4, - "virginica" = 0.2)) %>% -# generate(reps = 100, type = "simulate") %>% -# calculate(stat = "Chisq") %>% - visualize(method = "traditional")) - - expect_warning(iris_tbl %>% - specify(Species ~ NULL) %>% - hypothesize(null = "point", - p = c("setosa" = 0.4, - "versicolor" = 0.4, - "virginica" = 0.2)) %>% - # generate(reps = 100, type = "simulate") %>% - # calculate(stat = "Chisq") %>% - visualize(method = "theoretical")) - - expect_silent(iris_tbl %>% - specify(Petal.Width ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% - calculate(stat = "diff in means", - order = c("large", "small")) %>% - visualize(direction = "both", - obs_stat = obs_diff_mean) + expect_error( + iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize( + null = "point", + p = c("setosa" = 0.4, "versicolor" = 0.4, "virginica" = 0.2) + ) %>% +# generate(reps = 100, type = "simulate") %>% +# calculate(stat = "Chisq") %>% + visualize(method = "traditional") + ) + + expect_warning( + iris_tbl %>% + specify(Species ~ NULL) %>% + hypothesize( + null = "point", + p = c("setosa" = 0.4, "versicolor" = 0.4, "virginica" = 0.2) + ) %>% +# generate(reps = 100, type = "simulate") %>% +# calculate(stat = "Chisq") %>% + visualize(method = "theoretical") + ) + + expect_silent( + iris_tbl %>% + specify(Petal.Width ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 10, type = "permute") %>% + calculate(stat = "diff in means", order = c("large", "small")) %>% + visualize(direction = "both",obs_stat = obs_diff_mean) ) # Produces warning first for not checking conditions but would also error - expect_error(expect_warning(iris_tbl %>% - specify(Petal.Width ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", - order = c("large", "small")) %>% - visualize(method = "both", direction = "both", - obs_stat = obs_diff_mean) - )) - - expect_warning(iris_tbl %>% - specify(Petal.Width ~ Sepal.Width.Group) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", - order = c("large", "small")) %>% - visualize(method = "theoretical", direction = "both", - obs_stat = obs_diff_mean)) - - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ NULL, success = "small") %>% - hypothesize(null = "point", p = 0.8) %>% -# generate(reps = 100, type = "simulate") %>% -# calculate(stat = "z") %>% - visualize(method = "theoretical", - obs_stat = 2, # Should probably update - direction = "both")) - - expect_silent(iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 1.3) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% - visualize(direction = "left", - obs_stat = mean(iris$Petal.Width))) + expect_error( + expect_warning( + iris_tbl %>% + specify(Petal.Width ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in means", order = c("large", "small")) %>% + visualize(method = "both", direction = "both", obs_stat = obs_diff_mean) + ) + ) + + expect_warning( + iris_tbl %>% + specify(Petal.Width ~ Sepal.Width.Group) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in means", order = c("large", "small")) %>% + visualize( + method = "theoretical", direction = "both", obs_stat = obs_diff_mean + ) + ) + + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ NULL, success = "small") %>% + hypothesize(null = "point", p = 0.8) %>% +# generate(reps = 100, type = "simulate") %>% +# calculate(stat = "z") %>% + visualize( + method = "theoretical", + obs_stat = 2, # Should probably update + direction = "both" + ) + ) + + expect_silent( + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 1.3) %>% + generate(reps = 100, type = "bootstrap") %>% + calculate(stat = "mean") %>% + visualize(direction = "left", obs_stat = mean(iris$Petal.Width)) + ) }) test_that("get_percentile works", { @@ -272,101 +291,105 @@ test_that("get_percentile works", { test_that("obs_stat as a data.frame works", { mean_petal_width <- iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - calculate(stat = "mean") - expect_silent(iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% - visualize(obs_stat = mean_petal_width)) + specify(Petal.Width ~ NULL) %>% + calculate(stat = "mean") + expect_silent( + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 4) %>% + generate(reps = 100, type = "bootstrap") %>% + calculate(stat = "mean") %>% + visualize(obs_stat = mean_petal_width) + ) mean_df_test <- data.frame(x = c(4.1, 1), y = c(1, 2)) - expect_warning(iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% - visualize(obs_stat = mean_df_test)) + expect_warning( + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 4) %>% + generate(reps = 100, type = "bootstrap") %>% + calculate(stat = "mean") %>% + visualize(obs_stat = mean_df_test) + ) }) test_that('method = "both" behaves nicely', { - # stop_glue('`generate()` and `calculate()` are both required ', - # 'to be done prior to `visualize(method = "both")`') - expect_error(iris_tbl %>% - specify(Petal.Width ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 100, type = "bootstrap") %>% -# calculate(stat = "mean") %>% - visualize(method = "both")) - - expect_warning(iris_tbl %>% - specify(Petal.Width ~ Sepal.Length.Group) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 10, type = "bootstrap") %>% - calculate(stat = "t", order = c(">5", "<=5")) %>% - visualize(method = "both")) + # stop_glue( + # '`generate()` and `calculate()` are both required to be done prior ', + # 'to `visualize(method = "both")`' + # ) + expect_error( + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 4) %>% + generate(reps = 100, type = "bootstrap") %>% +# calculate(stat = "mean") %>% + visualize(method = "both") + ) + + expect_warning( + iris_tbl %>% + specify(Petal.Width ~ Sepal.Length.Group) %>% + hypothesize(null = "point", mu = 4) %>% + generate(reps = 10, type = "bootstrap") %>% + calculate(stat = "t", order = c(">5", "<=5")) %>% + visualize(method = "both") + ) }) test_that("Traditional right-tailed tests have warning if not right-tailed", { - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = 2, direction = "left")) - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "both", obs_stat = 2, - direction = "two_sided")) - expect_warning(iris_tbl %>% - specify(Sepal.Width.Group ~ Species, - success = "large") %>% - hypothesize(null = "independence") %>% -# generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = 2, - direction = "left")) - expect_warning(iris_tbl %>% - specify(Sepal.Length ~ Species) %>% - hypothesize(null = "independence") %>% -# generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% - visualize(method = "theoretical", obs_stat = 2, - direction = "two_sided")) + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Species, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "Chisq") %>% + visualize(method = "both", obs_stat = 2, direction = "left") + ) + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "both", obs_stat = 2, direction = "two_sided") + ) + expect_warning( + iris_tbl %>% + specify(Sepal.Width.Group ~ Species, success = "large") %>% + hypothesize(null = "independence") %>% +# generate(reps = 100, type = "permute") %>% + calculate(stat = "Chisq") %>% + visualize(method = "theoretical", obs_stat = 2, direction = "left") + ) + expect_warning( + iris_tbl %>% + specify(Sepal.Length ~ Species) %>% + hypothesize(null = "independence") %>% +# generate(reps = 100, type = "permute") %>% + calculate(stat = "F") %>% + visualize(method = "theoretical", obs_stat = 2, direction = "two_sided") + ) }) test_that("confidence interval plots are working", { iris_boot <- iris_tbl %>% - specify(Sepal.Width.Group ~ Sepal.Length.Group, - success = "large") %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% generate(reps = 100) %>% - calculate(stat = "diff in props", - order = c(">5", "<=5")) + calculate(stat = "diff in props", order = c(">5", "<=5")) df_error <- tibble::tibble(col1 = rnorm(5), col2 = rnorm(5)) vec_error <- 1:10 perc_ci <- iris_boot %>% get_ci() - expect_error( - iris_boot %>% visualize(endpoints = df_error) - ) + expect_error(iris_boot %>% visualize(endpoints = df_error)) - expect_warning( - iris_boot %>% visualize(endpoints = vec_error) - ) + expect_warning(iris_boot %>% visualize(endpoints = vec_error)) expect_silent( - iris_boot %>% visualize(endpoints = perc_ci, - direction = "between") + iris_boot %>% visualize(endpoints = perc_ci, direction = "between") ) - expect_warning( - iris_boot %>% visualize(obs_stat = 3, endpoints = perc_ci) - ) + expect_warning(iris_boot %>% visualize(obs_stat = 3, endpoints = perc_ci)) }) diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R index 028ebaa7..6f676e75 100644 --- a/tests/testthat/test-wrappers.R +++ b/tests/testthat/test-wrappers.R @@ -5,48 +5,47 @@ iris2 <- iris %>% droplevels(.$Species) iris3 <- iris %>% - dplyr::mutate(Sepal.Length.Group = - dplyr::if_else(Sepal.Length > 5, ">5", "<=5")) + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) test_that("t_test works", { # order is missing expect_error(iris2 %>% t_test(Sepal.Width ~ Species)) - expect_error(iris2 %>% t_test(response = "Sepal.Width", - explanatory = "Species")) + expect_error( + iris2 %>% t_test(response = "Sepal.Width", explanatory = "Species") + ) ## Not implemented -# expect_silent(iris2 %>% t_test(response = Sepal.Width, -# explanatory = Species)) +# expect_silent( +# iris2 %>% t_test(response = Sepal.Width, explanatory = Species) +# ) }) test_that("chisq_test works", { expect_silent(iris3 %>% chisq_test(Sepal.Length.Group ~ Species)) new_way <- iris3 %>% chisq_test(Sepal.Length.Group ~ Species) - old_way <- chisq.test(x = table(iris3$Species, - iris3$Sepal.Length.Group)) %>% + old_way <- chisq.test(x = table(iris3$Species, iris3$Sepal.Length.Group)) %>% broom::glance() %>% dplyr::select(statistic, chisq_df = parameter, p_value = p.value) expect_equal(new_way, old_way) ## Not implemented - # expect_silent(iris3 %>% chisq_test(response = Sepal.Length.Group, - # explanatory = Species)) + # expect_silent( + # iris3 %>% chisq_test(response = Sepal.Length.Group, explanatory = Species) + # ) }) test_that("_stat functions work", { # Test of independence - expect_silent( - iris3 %>% chisq_stat(Sepal.Length.Group ~ Species) - ) + expect_silent(iris3 %>% chisq_stat(Sepal.Length.Group ~ Species)) another_way <- iris3 %>% chisq_test(Sepal.Length.Group ~ Species) %>% dplyr::select(statistic) %>% dplyr::rename(stat = statistic) - obs_stat_way <- iris3 %>% - chisq_stat(Sepal.Length.Group ~ Species) + obs_stat_way <- iris3 %>% chisq_stat(Sepal.Length.Group ~ Species) one_more <- chisq.test( - table(iris3$Species, - iris3$Sepal.Length.Group) + table(iris3$Species, iris3$Sepal.Length.Group) )$statistic expect_equivalent(another_way, obs_stat_way) @@ -64,8 +63,9 @@ test_that("_stat functions work", { # Two sample t expect_silent( - iris2 %>% t_stat(Sepal.Width ~ Species, - order = c("virginica", "versicolor")) + iris2 %>% t_stat( + Sepal.Width ~ Species, order = c("virginica", "versicolor") + ) ) another_way <- iris2 %>% t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor")) %>% @@ -86,31 +86,43 @@ test_that("_stat functions work", { test_that("conf_int argument works", { expect_equal( - names(iris2 %>% - t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor"), - conf_int = FALSE)), + names( + iris2 %>% + t_test( + Sepal.Width ~ Species, order = c("virginica", "versicolor"), + conf_int = FALSE + ) + ), c("statistic", "t_df", "p_value", "alternative") ) expect_equal( - names(iris2 %>% - t_test(Sepal.Width ~ Species, order = c("virginica", "versicolor"), - conf_int = TRUE)), + names( + iris2 %>% + t_test( + Sepal.Width ~ Species, order = c("virginica", "versicolor"), + conf_int = TRUE + ) + ), c("statistic", "t_df", "p_value", "alternative", "lower_ci", "upper_ci") ) ci_test <- iris2 %>% - t_test(Sepal.Width ~ Species, order = c("versicolor", "virginica"), - conf_int = TRUE, conf_level = 0.9) - old_way <- t.test(formula = Sepal.Width ~ Species, - data = iris2, - conf.level = 0.9)[["conf.int"]] + t_test( + Sepal.Width ~ Species, order = c("versicolor", "virginica"), + conf_int = TRUE, conf_level = 0.9 + ) + old_way <- t.test( + formula = Sepal.Width ~ Species, data = iris2, conf.level = 0.9 + )[["conf.int"]] expect_equal(ci_test$lower_ci[1], old_way[1]) expect_equal(ci_test$upper_ci[1], old_way[2]) expect_error( iris2 %>% - t_test(Petal.Width ~ Species, order = c("versicolor", "virginica"), - conf_int = TRUE, conf_level = 1.1) + t_test( + Petal.Width ~ Species, order = c("versicolor", "virginica"), + conf_int = TRUE, conf_level = 1.1 + ) ) # Check that var.equal produces different results @@ -120,11 +132,11 @@ test_that("conf_int argument works", { no_var_equal <- iris_small %>% t_stat(Petal.Width ~ Species, order = c("versicolor", "virginica")) var_equal <- iris_small %>% - t_stat(Petal.Width ~ Species, order = c("versicolor", "virginica"), - var.equal = TRUE) - expect_false( - no_var_equal == var_equal - ) + t_stat( + Petal.Width ~ Species, order = c("versicolor", "virginica"), + var.equal = TRUE + ) + expect_false(no_var_equal == var_equal) shortcut_no_var_equal <- iris_small %>% specify(Petal.Width ~ Species) %>% @@ -132,9 +144,9 @@ test_that("conf_int argument works", { shortcut_var_equal <- iris_small %>% specify(Petal.Width ~ Species) %>% - calculate(stat = "t", order = c("versicolor", "virginica"), - var.equal = TRUE) - expect_false( - shortcut_no_var_equal == shortcut_var_equal - ) + calculate( + stat = "t", order = c("versicolor", "virginica"), + var.equal = TRUE + ) + expect_false(shortcut_no_var_equal == shortcut_var_equal) }) From ce92b84bc130c1879f616f663d7872336024b395 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 5 Aug 2018 17:06:30 +0300 Subject: [PATCH 06/78] Remove unnecessary `return()`s. --- R/calculate.R | 2 +- R/conf_int.R | 2 +- R/generate.R | 12 ++++++------ R/hypothesize.R | 2 +- R/p_value.R | 6 +++--- R/specify.R | 2 +- R/utils.R | 8 ++++---- R/wrappers.R | 2 +- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 488b3db0..c6b8a84e 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -117,7 +117,7 @@ calculate <- function(x, result <- select(result, stat) } - return(result) + result } calc_impl <- function(type, x, order, ...) { diff --git a/R/conf_int.R b/R/conf_int.R index 089fdff9..9e24b3f1 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -56,7 +56,7 @@ conf_int <- function(x, level = 0.95, type = "percentile", ) } - return(ci) + ci } check_ci_args <- function(x, level, type, point_estimate) { diff --git a/R/generate.R b/R/generate.R index ed0c475d..20a265e5 100755 --- a/R/generate.R +++ b/R/generate.R @@ -70,11 +70,11 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { # } if (type == "bootstrap") { - return(bootstrap(x, reps, ...)) + bootstrap(x, reps, ...) } else if (type == "permute") { - return(permute(x, reps, ...)) + permute(x, reps, ...) } else if (type == "simulate") { - return(simulate(x, reps, ...)) + simulate(x, reps, ...) } # else if (!(type %in% c("bootstrap", "permute", "simulate"))) { # stop_glue( # "Choose one of the available options for `type`: ", @@ -130,7 +130,7 @@ bootstrap <- function(x, reps = 1, ...) { class(result) <- append("infer", class(result)) - return(result) + result } #' @importFrom dplyr bind_rows group_by @@ -144,7 +144,7 @@ permute <- function(x, reps = 1, ...) { class(df_out) <- append("infer", class(df_out)) - return(df_out) + df_out } permute_once <- function(x, ...) { @@ -180,5 +180,5 @@ simulate <- function(x, reps = 1, ...) { class(rep_tbl) <- append("infer", class(rep_tbl)) - return(dplyr::group_by(rep_tbl, replicate)) + dplyr::group_by(rep_tbl, replicate) } diff --git a/R/hypothesize.R b/R/hypothesize.R index 48cd20dc..4902d2a2 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -83,5 +83,5 @@ hypothesize <- function(x, null, ...) { # } # } - return(tibble::as_tibble(x)) + tibble::as_tibble(x) } diff --git a/R/p_value.R b/R/p_value.R index 09026d02..cd5f8af9 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -65,7 +65,7 @@ p_value <- function(x, obs_stat, direction) { # ) # } - return(pvalue) + pvalue } simulation_based_p_value <- function(x, obs_stat, direction) { @@ -100,9 +100,9 @@ two_sided_p_value <- function(x, obs_stat) { # Catch all if adding both sides produces a number # larger than 1. Should update with test in that # scenario instead of using >= - return(tibble::tibble(p_value = 1)) + tibble::tibble(p_value = 1) } else { - return(tibble::tibble(p_value = basic_p_value)) + tibble::tibble(p_value = basic_p_value) } } diff --git a/R/specify.R b/R/specify.R index 1293be5c..854559c8 100755 --- a/R/specify.R +++ b/R/specify.R @@ -157,5 +157,5 @@ specify <- function(x, formula, response = NULL, # add "infer" class class(x) <- append("infer", class(x)) - return(x) + x } diff --git a/R/utils.R b/R/utils.R index 26c7d307..2dfbac5d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,12 +1,12 @@ format_params <- function(x) { par_levels <- get_par_levels(x) fct_levels <- as.character(unique(dplyr::pull(x, !!attr(x, "response")))) - return(attr(x, "params")[match(fct_levels, par_levels)]) + attr(x, "params")[match(fct_levels, par_levels)] } get_par_levels <- function(x) { par_names <- names(attr(x, "params")) - return(gsub("^.\\.", "", par_names)) + gsub("^.\\.", "", par_names) } set_attributes <- function(to, from = x) { @@ -23,7 +23,7 @@ set_attributes <- function(to, from = x) { attr(to, "generate") <- attr(from, "generate") attr(to, "type") <- attr(from, "type") - return(to) + to } explanatory_variable <- function(x) { @@ -275,7 +275,7 @@ parse_params <- function(dots, x) { # warning_glue("Proportions do not sum to 1, normalizing automatically.") # } - return(unlist(dots)) + unlist(dots) } hypothesize_checks <- function(x, null) { diff --git a/R/wrappers.R b/R/wrappers.R index 713876ee..b68b623e 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -89,7 +89,7 @@ t_test <- function(data, formula, # response = NULL, explanatory = NULL, ) } - return(results) + results # } else { # data %>% # stats::t.test( From 5b6aeb635d8167d22e480877461e7cbb9a8f2825 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 5 Aug 2018 17:41:10 +0300 Subject: [PATCH 07/78] Update predicates inside `if`. --- R/calculate.R | 15 +++++++++------ R/conf_int.R | 6 +++--- R/generate.R | 8 ++++---- R/hypothesize.R | 2 +- R/set_params.R | 24 +++++++++++------------- R/specify.R | 12 +++++++----- R/utils.R | 20 ++++++++++---------- R/visualize.R | 16 ++++++++-------- R/wrappers.R | 2 +- 9 files changed, 54 insertions(+), 51 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index c6b8a84e..6c7b03a0 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -73,15 +73,17 @@ calculate <- function(x, } if ( - stat %in% c("diff in means", "diff in medians", "diff in props") || - (!is.null(attr(x, "theory_type")) && - attr(x, "theory_type") %in% c("Two sample props z", "Two sample t")) + (stat %in% c("diff in means", "diff in medians", "diff in props")) || + ( + !is.null(attr(x, "theory_type")) && + (attr(x, "theory_type") %in% c("Two sample props z", "Two sample t")) + ) ) { check_order(x, explanatory_variable(x), order) } if (!( - stat %in% c("diff in means", "diff in medians", "diff in props") || + (stat %in% c("diff in means", "diff in medians", "diff in props")) || ( !is.null(attr(x, "theory_type")) && attr(x, "theory_type") %in% c("Two sample props z", "Two sample t") @@ -327,7 +329,8 @@ calc_impl.t <- function(stat, x, order, ...) { # the standardization formulas are different. # # Standardized slope # else if ( - # (attr(x, "theory_type") == "Slope/correlation with t") && stat == "slope" + # (attr(x, "theory_type") == "Slope/correlation with t") && + # (stat == "slope") # ) { # explan_string <- as.character(attr(x, "explanatory")) # @@ -342,7 +345,7 @@ calc_impl.t <- function(stat, x, order, ...) { # # Standardized correlation # else if ( # (attr(x, "theory_type") == "Slope/correlation with t") && - # stat == "correlation" + # (stat == "correlation") # ) { # x %>% # dplyr::summarize( diff --git a/R/conf_int.R b/R/conf_int.R index 9e24b3f1..2632bf51 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -69,7 +69,7 @@ check_ci_args <- function(x, level, type, point_estimate) { } check_type(x, is.data.frame) check_type(level, is.numeric) - if (level <= 0 || level >= 1) { + if ((level <= 0) || (level >= 1)) { stop_glue("The value of `level` must be between 0 and 1 non-inclusive.") } @@ -77,13 +77,13 @@ check_ci_args <- function(x, level, type, point_estimate) { stop_glue('The options for `type` are "percentile" or "se".') } - if (type == "se" && is.null(point_estimate)) { + if ((type == "se") && is.null(point_estimate)) { stop_glue( 'A numeric value needs to be given for `point_estimate` for `type = "se"' ) } - if (type == "se" && is.vector(point_estimate)) { + if ((type == "se") && is.vector(point_estimate)) { check_type(point_estimate, is.numeric) } } diff --git a/R/generate.R b/R/generate.R index 20a265e5..16e54745 100755 --- a/R/generate.R +++ b/R/generate.R @@ -42,7 +42,7 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { attr(x, "generate") <- TRUE if ( - type == "permute" && + (type == "permute") && any(is.null(attr(x, "response")), is.null(attr(x, "explanatory"))) ) { stop_glue( @@ -52,14 +52,14 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { } ## Can't get to these anymore with tests # if ( -# type == "simulate" && -# attr(x, "null") != "point" && +# (type == "simulate") && +# (attr(x, "null") != "point") && # !(length(grep("p.", names(attr(x, "params")))) >= 1) # ) { # stop_glue("Simulation requires a `point` null hypothesis on proportions.") # } # if ( -# type == "bootstrap" && +# (type == "bootstrap") && # !(attr(attr(x, "params"), "names") %in% c("mu", "med", "sigma")) && # !is.null(attr(x, "null")) # ) { diff --git a/R/hypothesize.R b/R/hypothesize.R index 4902d2a2..2603958d 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -52,7 +52,7 @@ hypothesize <- function(x, null, ...) { } - if (!is.null(null) && null == "independence") { + if (!is.null(null) && (null == "independence")) { attr(x, "type") <- "permute" } diff --git a/R/set_params.R b/R/set_params.R index 4a741843..0867a5f7 100755 --- a/R/set_params.R +++ b/R/set_params.R @@ -13,8 +13,7 @@ set_params <- function(x) { # One variable if ( !is.null(attr(x, "response")) && is.null(attr(x, "explanatory")) && - !is.null(attr(x, "response_type")) && - is.null(attr(x, "explanatory_type")) + !is.null(attr(x, "response_type")) && is.null(attr(x, "explanatory_type")) ) { # One mean @@ -29,7 +28,7 @@ set_params <- function(x) { } else if ( # One prop - attr(x, "response_type") == "factor" && (num_response_levels == 2) + (attr(x, "response_type") == "factor") && (num_response_levels == 2) ) { # No parameters since standard normal attr(x, "theory_type") <- "One sample prop z" @@ -45,15 +44,14 @@ set_params <- function(x) { # Two variables if ( !is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & - !is.null(attr(x, "response_type")) && - !is.null(attr(x, "explanatory_type")) + !is.null(attr(x, "response_type")) && !is.null(attr(x, "explanatory_type")) ) { attr(x, "type") <- "bootstrap" # Response is numeric, explanatory is categorical if ( - attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") == "factor" + (attr(x, "response_type") %in% c("integer", "numeric")) & + (attr(x, "explanatory_type") == "factor") ) { # Two sample means (t distribution) @@ -92,16 +90,16 @@ set_params <- function(x) { # Response is categorical, explanatory is categorical if ( - attr(x, "response_type") == "factor" & - attr(x, "explanatory_type") == "factor" + (attr(x, "response_type") == "factor") & + (attr(x, "explanatory_type") == "factor") ) { attr(x, "type") <- "bootstrap" # Two sample proportions (z distribution) # Parameter(s) not needed since standard normal if ( - length(levels(response_variable(x))) == 2 & - length(levels(explanatory_variable(x))) == 2 + (length(levels(response_variable(x))) == 2) & + (length(levels(explanatory_variable(x))) == 2) ) { attr(x, "theory_type") <- "Two sample props z" } else { @@ -122,8 +120,8 @@ set_params <- function(x) { # Response is numeric, explanatory is numeric if ( - attr(x, "response_type") %in% c("integer", "numeric") & - attr(x, "explanatory_type") %in% c("integer", "numeric") + (attr(x, "response_type") %in% c("integer", "numeric")) & + (attr(x, "explanatory_type") %in% c("integer", "numeric")) ) { response_string <- as.character(attr(x, "response")) explanatory_string <- as.character(attr(x, "explanatory")) diff --git a/R/specify.R b/R/specify.R index 854559c8..937b3f09 100755 --- a/R/specify.R +++ b/R/specify.R @@ -39,7 +39,7 @@ specify <- function(x, formula, response = NULL, mutate_if(is.character, as.factor) %>% mutate_if(is.logical, as.factor) - if ((!methods::hasArg(formula) && !methods::hasArg(response))) { + if (!methods::hasArg(formula) && !methods::hasArg(response)) { stop_glue("Please give the `response` variable.") } if (methods::hasArg(formula)) { @@ -137,12 +137,14 @@ specify <- function(x, formula, response = NULL, } if ( - attr(x, "response_type") == "factor" && is.null(success) && - length(levels(response_variable(x))) == 2 && + (attr(x, "response_type") == "factor") && is.null(success) && + (length(levels(response_variable(x))) == 2) && ( is.null(attr(x, "explanatory_type")) || - (!is.null(attr(x, "explanatory_type")) && - length(levels(explanatory_variable(x))) == 2) + ( + !is.null(attr(x, "explanatory_type")) && + (length(levels(explanatory_variable(x))) == 2) + ) ) ) { stop_glue( diff --git a/R/utils.R b/R/utils.R index 2dfbac5d..66af83d3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -195,28 +195,28 @@ check_point_params <- function(x, stat) { hyp_text <- 'to be set in `hypothesize()`.' if (!is.null(attr(x, "null"))) { if (stat %in% c("mean", "median", "sd", "prop")) { - if ((stat == "mean" && !("mu" %in% param_names))) { + if ((stat == "mean") && !("mu" %in% param_names)) { stop_glue('`stat == "mean"` requires `"mu"` {hyp_text}') } - if ((!(stat == "mean") && ("mu" %in% param_names))) { + if (!(stat == "mean") && ("mu" %in% param_names)) { stop_glue('`"mu"` does not correspond to `stat = "{stat}"`.') } - if ((stat == "median" && !("med" %in% param_names))) { + if ((stat == "median") && !("med" %in% param_names)) { stop_glue('`stat == "median"` requires `"med"` {hyp_text}') } - if ((!(stat == "median") && ("med" %in% param_names))) { + if (!(stat == "median") && ("med" %in% param_names)) { stop_glue('`"med"` does not correspond to `stat = "{stat}"`.') } ## Tests unable to get to - # if ((stat == "sigma" && !("sd" %in% param_names))) { + # if ((stat == "sigma") && !("sd" %in% param_names)) { # stop_glue('`stat == "sd"` requires `"sigma"` {hyp_text}') # } - if ((!(stat == "sd") && ("sigma" %in% param_names))) { + if (!(stat == "sd") && ("sigma" %in% param_names)) { stop_glue('`"sigma"` does not correspond to `stat = "{stat}"`.') } ## Tests unable to get to - # if (stat == "prop" && !(any(grepl("p.", param_names)))) { + # if ((stat == "prop") && !any(grepl("p.", param_names))) { # stop_glue('`stat == "prop"` requires `"p"` {hyp_text}') # } } @@ -243,13 +243,13 @@ parse_params <- function(dots, x) { # 0 index of dots if (length(p_ind)) { if (length(dots[[p_ind]]) == 1) { - if (attr(x, "null") == "point" && is.null(attr(x, "success"))) { + if ((attr(x, "null") == "point") && is.null(attr(x, "success"))) { stop_glue( "A point null regarding a proportion requires that `success` ", "be indicated in `specify()`." ) } - if (dots$p < 0 || dots$p > 1) { + if ((dots$p < 0) || (dots$p > 1)) { stop_glue( "The value suggested for `p` is not between 0 and 1, inclusive." ) @@ -304,7 +304,7 @@ hypothesize_checks <- function(x, null) { ) } - if (null == "independence" && !has_explanatory(x)) { + if ((null == "independence") && !has_explanatory(x)) { stop_glue( 'Please `specify()` an explanatory and a response variable when ', 'testing\n', diff --git a/R/visualize.R b/R/visualize.R index 100cd5a3..bda7887c 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -109,7 +109,7 @@ visualize <- function(data, bins = 15, method = "simulation", obs_stat <- check_obs_stat(obs_stat) if ( !is.null(direction) && - (is.null(obs_stat) + is.null(endpoints)) != 1 + (is.null(obs_stat) + is.null(endpoints) != 1) ) { stop_glue( "Shading requires either `endpoints` values for a confidence interval ", @@ -151,7 +151,7 @@ visualize <- function(data, bins = 15, method = "simulation", } if ( - ("replicate" %in% names(data)) && length(unique(data$replicate)) < 100 + ("replicate" %in% names(data)) && (length(unique(data$replicate)) < 100) ) { warning_glue( "With only {length(unique(data$stat))} replicates, it may be ", @@ -440,8 +440,8 @@ shade_density_check <- function(data, } if ( - direction %in% c("two_sided", "both") && - obs_stat >= stats::median(data$stat) + (direction %in% c("two_sided", "both")) && + (obs_stat >= stats::median(data$stat)) ) { gg_plot <- gg_plot + geom_rect( @@ -464,8 +464,8 @@ shade_density_check <- function(data, } if ( - direction %in% c("two_sided", "both") && - obs_stat < stats::median(data$stat) + (direction %in% c("two_sided", "both")) && + (obs_stat < stats::median(data$stat)) ) { gg_plot <- gg_plot + geom_rect( @@ -632,7 +632,7 @@ visualize_theoretical <- function(data, # Assuming two-tailed shading will only happen with theoretical # distributions centered at 0 - if (direction %in% c("two_sided", "both") && obs_stat >= 0) { + if ((direction %in% c("two_sided", "both")) && (obs_stat >= 0)) { infer_plot <- infer_plot + geom_rect( data = data.frame(obs_stat), @@ -648,7 +648,7 @@ visualize_theoretical <- function(data, ) } - if (direction %in% c("two_sided", "both") && obs_stat < 0) { + if ((direction %in% c("two_sided", "both")) && (obs_stat < 0)) { infer_plot <- infer_plot + geom_rect( data = data.frame(obs_stat), diff --git a/R/wrappers.R b/R/wrappers.R index b68b623e..a6097acc 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -191,7 +191,7 @@ chisq_stat <- function(data, formula, ...) { check_conf_level <- function(conf_level) { if ( - class(conf_level) != "numeric" | conf_level < 0 | conf_level > 1 + (class(conf_level) != "numeric") | (conf_level < 0) | (conf_level > 1) ) { stop_glue("The `conf_level` argument must be a number between 0 and 1.") } From c0a892b36243471d74b314854f9fb415934f5eb0 Mon Sep 17 00:00:00 2001 From: ismayc Date: Sun, 5 Aug 2018 15:30:35 -0700 Subject: [PATCH 08/78] Fix long line --- tests/testthat/test-p_value.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index a24951a5..a6458bbc 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -52,7 +52,9 @@ test_that("p_value makes sense", { ) expect_equal( iris_calc %>% - get_pvalue(obs_stat = median(iris_calc$stat) + 1, direction = "two_sided") %>% + get_pvalue(obs_stat = median(iris_calc$stat) + 1, + direction = "two_sided" + ) %>% dplyr::pull(), expected = 0 ) From 43a1119bc942be344638a203b535861cb9060179 Mon Sep 17 00:00:00 2001 From: ismayc Date: Sun, 5 Aug 2018 16:26:46 -0700 Subject: [PATCH 09/78] Get spacing under 80 chars per line --- tests/testthat/test-p_value.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index a6458bbc..e239ec50 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -53,8 +53,7 @@ test_that("p_value makes sense", { expect_equal( iris_calc %>% get_pvalue(obs_stat = median(iris_calc$stat) + 1, - direction = "two_sided" - ) %>% + direction = "two_sided") %>% dplyr::pull(), expected = 0 ) From a84a101c85f97cdfc9cba011dcd6c9283d1e9ba0 Mon Sep 17 00:00:00 2001 From: ismayc Date: Mon, 6 Aug 2018 08:12:15 -0700 Subject: [PATCH 10/78] Fix indentation to match common style guide --- tests/testthat/test-p_value.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index e239ec50..f572764f 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -52,8 +52,9 @@ test_that("p_value makes sense", { ) expect_equal( iris_calc %>% - get_pvalue(obs_stat = median(iris_calc$stat) + 1, - direction = "two_sided") %>% + get_pvalue( + obs_stat = median(iris_calc$stat) + 1, direction = "two_sided" + ) %>% dplyr::pull(), expected = 0 ) From 5e1ff74b4fa4798ddb20099c74d39f7fed82ded4 Mon Sep 17 00:00:00 2001 From: ismayc Date: Mon, 6 Aug 2018 08:12:42 -0700 Subject: [PATCH 11/78] Update vignettes and documentation --- inst/doc/chisq_test.R | 2 +- inst/doc/chisq_test.Rmd | 2 +- inst/doc/chisq_test.html | 8 ++++---- inst/doc/two_sample_t.html | 9 ++++----- man/get_pvalue.Rd | 4 ++-- man/visualize.Rd | 4 ++-- 6 files changed, 14 insertions(+), 15 deletions(-) diff --git a/inst/doc/chisq_test.R b/inst/doc/chisq_test.R index eaa45c86..69d754ed 100644 --- a/inst/doc/chisq_test.R +++ b/inst/doc/chisq_test.R @@ -58,7 +58,7 @@ fli_small %>% ## ----eval=FALSE---------------------------------------------------------- # fli_small %>% -# specify(origin ~ season) %>% %>% # alt: response = origin, explanatory = season +# specify(origin ~ season) %>% # alt: response = origin, explanatory = season # hypothesize(null = "independence") %>% # generate(reps = 1000, type = "permute") %>% # calculate(stat = "Chisq") %>% diff --git a/inst/doc/chisq_test.Rmd b/inst/doc/chisq_test.Rmd index 54c3d53b..f2dd959a 100644 --- a/inst/doc/chisq_test.Rmd +++ b/inst/doc/chisq_test.Rmd @@ -118,7 +118,7 @@ fli_small %>% ```{r eval=FALSE} fli_small %>% - specify(origin ~ season) %>% %>% # alt: response = origin, explanatory = season + specify(origin ~ season) %>% # alt: response = origin, explanatory = season hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") %>% diff --git a/inst/doc/chisq_test.html b/inst/doc/chisq_test.html index 56f2b356..285dae6a 100644 --- a/inst/doc/chisq_test.html +++ b/inst/doc/chisq_test.html @@ -12,7 +12,7 @@ - + Chi-squared test example using nycflights13 flights data @@ -70,7 +70,7 @@

Chi-squared test example using nycflights13 flights data

Chester Ismay

-

2018-07-06

+

2018-08-06

@@ -210,13 +210,13 @@

Theoretical distribution

calculate(stat = "Chisq") %>% visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. `infer` currently does not check these for you.
+## theoretical method. {infer} currently does not check these for you.

Overlay appropriate \(\chi^2\) distribution on top of permuted statistics

fli_small %>%
-  specify(origin ~ season) %>%  %>% # alt: response = origin, explanatory = season
+  specify(origin ~ season) %>% # alt: response = origin, explanatory = season
   hypothesize(null = "independence") %>%
   generate(reps = 1000, type = "permute") %>%
   calculate(stat = "Chisq") %>% 
diff --git a/inst/doc/two_sample_t.html b/inst/doc/two_sample_t.html
index 6e7cd5f0..605cdffa 100644
--- a/inst/doc/two_sample_t.html
+++ b/inst/doc/two_sample_t.html
@@ -12,7 +12,7 @@
 
 
 
-
+
 
 Two sample t test example using nycflights13 flights data
 
@@ -70,7 +70,7 @@
 
 

Two sample \(t\) test example using nycflights13 flights data

Chester Ismay

-

2018-07-06

+

2018-08-06

@@ -141,13 +141,12 @@

Calculate observed statistic

Or using another shortcut function in infer:

obs_t <- fli_small %>% 
   t_stat(formula = arr_delay ~ half_year, order = c("h1", "h2"))
-
## Warning: Removed 15 rows containing missing values.
The observed \(t\) statistic is
- + @@ -201,7 +200,7 @@

Theoretical distribution

visualize(method ="theoretical", obs_stat = obs_t, direction ="two_sided")
## Warning: Removed 15 rows containing missing values.
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. `infer` currently does not check these for you.
+## theoretical method. {infer} currently does not check these for you.

diff --git a/man/get_pvalue.Rd b/man/get_pvalue.Rd index 2ba28733..da59b651 100644 --- a/man/get_pvalue.Rd +++ b/man/get_pvalue.Rd @@ -33,11 +33,11 @@ d_hat <- mtcars_df \%>\% specify(mpg ~ am) \%>\% calculate(stat = "diff in means", order = c("1", "0")) null_distn <- mtcars_df \%>\% - specify(mpg ~ am) \%>\% + specify(mpg ~ am) \%>\% hypothesize(null = "independence") \%>\% generate(reps = 100) \%>\% calculate(stat = "diff in means", order = c("1", "0")) -null_distn \%>\% +null_distn \%>\% p_value(obs_stat = d_hat, direction = "right") } diff --git a/man/visualize.Rd b/man/visualize.Rd index e7c2a8c4..e3799c12 100755 --- a/man/visualize.Rd +++ b/man/visualize.Rd @@ -57,7 +57,7 @@ Visualize the distribution of the simulation-based inferential statistics or the theoretical distribution (or both!). } \examples{ -# Permutations to create a simulation-based null distribution for +# Permutations to create a simulation-based null distribution for # one numerical response and one categorical predictor # using t statistic mtcars \%>\% @@ -68,7 +68,7 @@ mtcars \%>\% calculate(stat = "t", order = c("1", "0")) \%>\% visualize(method = "simulation") #default method -# Theoretical t distribution for +# Theoretical t distribution for # one numerical response and one categorical predictor # using t statistic mtcars \%>\% From 3d761f9c14978ff6eda9b137c91ebd2155ca102f Mon Sep 17 00:00:00 2001 From: ismayc Date: Mon, 6 Aug 2018 08:13:27 -0700 Subject: [PATCH 12/78] Ignore docs folder since netlify set up to push to gh-pages-dev branch now --- .Rbuildignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.Rbuildignore b/.Rbuildignore index a4139f1e..df4589d6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,3 +18,5 @@ ^TO-DO\.md$ ^\.httr-oauth$ ^_pkgdown.yml +^_pkgdown\.yml$ +^docs$ From 477f56c4e82a866f3047d0953a4a1450abf3df20 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Mon, 6 Aug 2018 22:49:16 +0300 Subject: [PATCH 13/78] Fix `calculate()` to not depend on order of `p` (fixes #122). --- NEWS.md | 1 + R/calculate.R | 5 ++++- tests/testthat/test-calculate.R | 17 +++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 3e0c02a4..00d4c356 100755 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ - Update documentation code to follow tidyverse style guide (#159). - Remove help page for internal `set_params()` (#165). - Fully use {tibble} (#166). +- Fix `calculate()` to not depend on order of `p` for `type = "simulate"` (#122). # infer 0.3.1 diff --git a/R/calculate.R b/R/calculate.R index 6c7b03a0..8fce9081 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -233,10 +233,13 @@ calc_impl.Chisq <- function(stat, x, order, ...) { # Chi-Square Goodness of Fit if (!is.null(attr(x, "params"))) { # When `hypothesize()` has been called + p_levels <- get_par_levels(x) x %>% dplyr::summarize( stat = stats::chisq.test( - table(!!(attr(x, "response"))), p = attr(x, "params") + # Ensure correct ordering of parameters + table(!!(attr(x, "response")))[p_levels], + p = attr(x, "params") )$stat ) } else { diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index e34bc222..6b28150e 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -409,3 +409,20 @@ test_that("One sample t bootstrap is working", { calculate(stat = "t") ) }) + +test_that("calculate doesn't depend on order of `p` (#122)", { + calc_chisq <- function(p) { + set.seed(111) + + iris %>% + specify(Species ~ NULL) %>% + hypothesize(null = "point", p = p) %>% + generate(reps = 10, type = "simulate") %>% + calculate("Chisq") + } + + expect_equal( + calc_chisq(c("versicolor" = 0.25, "setosa" = 0.5, "virginica" = 0.25)), + calc_chisq(c("virginica" = 0.25, "versicolor" = 0.25, "setosa" = 0.5)) + ) +}) From 1083bb32974c7a7b0ebf926b53c6b904771b6fcb Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 10:36:50 +0300 Subject: [PATCH 14/78] Use `is_nuat(x, at)` wrapper for `is.null(attr(x, at))`. --- R/calculate.R | 18 +++++++++--------- R/generate.R | 6 +++--- R/p_value.R | 6 +++--- R/set_params.R | 12 ++++++------ R/specify.R | 8 ++++---- R/utils.R | 14 +++++++++----- R/visualize.R | 2 +- 7 files changed, 35 insertions(+), 31 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 8fce9081..814267ad 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -48,8 +48,8 @@ calculate <- function(x, ) } - if (is.null(attr(x, "generate")) || !attr(x, "generate")) { - if (is.null(attr(x, "null"))) { + if (is_nuat(x, "generate") || !attr(x, "generate")) { + if (is_nuat(x, "null")) { x$replicate <- 1L } else if ( stat %in% c( @@ -75,7 +75,7 @@ calculate <- function(x, if ( (stat %in% c("diff in means", "diff in medians", "diff in props")) || ( - !is.null(attr(x, "theory_type")) && + !is_nuat(x, "theory_type") && (attr(x, "theory_type") %in% c("Two sample props z", "Two sample t")) ) ) { @@ -85,7 +85,7 @@ calculate <- function(x, if (!( (stat %in% c("diff in means", "diff in medians", "diff in props")) || ( - !is.null(attr(x, "theory_type")) && + !is_nuat(x, "theory_type") && attr(x, "theory_type") %in% c("Two sample props z", "Two sample t") ) )) { @@ -161,7 +161,7 @@ calc_impl.prop <- function(stat, x, order, ...) { # ) # } - if (is.null(attr(x, "success"))) { + if (is_nuat(x, "success")) { stop_glue( 'To calculate a proportion, the `"success"` argument must be provided ', 'in `specify()`.' @@ -229,9 +229,9 @@ calc_impl.diff_in_medians <- function(stat, x, order, ...) { calc_impl.Chisq <- function(stat, x, order, ...) { ## The following could stand to be cleaned up - if (is.null(attr(x, "explanatory"))) { + if (is_nuat(x, "explanatory")) { # Chi-Square Goodness of Fit - if (!is.null(attr(x, "params"))) { + if (!is_nuat(x, "params")) { # When `hypothesize()` has been called p_levels <- get_par_levels(x) x %>% @@ -280,7 +280,7 @@ calc_impl.Chisq <- function(stat, x, order, ...) { ) %>% dplyr::ungroup() - if (!is.null(attr(x, "generate"))) { + if (!is_nuat(x, "generate")) { result <- result %>% dplyr::select(replicate, stat = statistic) } else { result <- result %>% dplyr::select(stat = statistic) @@ -360,7 +360,7 @@ calc_impl.t <- function(stat, x, order, ...) { # One sample mean else if (attr(x, "theory_type") == "One sample t") { # For bootstrap - if (is.null(attr(x, "null"))) { + if (is_nuat(x, "null")) { x %>% dplyr::summarize( stat = stats::t.test(!!attr(x, "response"), ...)[["statistic"]] diff --git a/R/generate.R b/R/generate.R index 16e54745..44a81ad0 100755 --- a/R/generate.R +++ b/R/generate.R @@ -43,7 +43,7 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { if ( (type == "permute") && - any(is.null(attr(x, "response")), is.null(attr(x, "explanatory"))) + any(is_nuat(x, "response"), is_nuat(x, "explanatory")) ) { stop_glue( "Please `specify()` an explanatory and a response variable when ", @@ -61,7 +61,7 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { # if ( # (type == "bootstrap") && # !(attr(attr(x, "params"), "names") %in% c("mu", "med", "sigma")) && -# !is.null(attr(x, "null")) +# !is_nuat(x, "null") # ) { # stop_glue( # "Bootstrapping is inappropriate in this setting. ", @@ -85,7 +85,7 @@ generate <- function(x, reps = 1, type = attr(x, "type"), ...) { bootstrap <- function(x, reps = 1, ...) { # Check if hypothesis test chosen - if (!is.null(attr(x, "null"))) { + if (!is_nuat(x, "null")) { # If so, shift the variable chosen to have a mean corresponding # to that specified in `hypothesize` if (attr(attr(x, "params"), "names") == "mu") { diff --git a/R/p_value.R b/R/p_value.R index cd5f8af9..963329a4 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -36,7 +36,7 @@ p_value <- function(x, obs_stat, direction) { obs_stat <- check_obs_stat(obs_stat) check_direction(direction) - is_simulation_based <- !is.null(attr(x, "generate")) && attr(x, "generate") + is_simulation_based <- !is_nuat(x, "generate") && attr(x, "generate") if (is_simulation_based) { pvalue <- simulation_based_p_value( @@ -47,7 +47,7 @@ p_value <- function(x, obs_stat, direction) { ## Theoretical-based p-value # Could be more specific # else if ( - # is.null(attr(x, "theory_type")) || is.null(attr(x, "distr_param")) + # is_nuat(x, "theory_type") || is_nuat(x, "distr_param") # ) { # stop_glue( # "Attributes have not been set appropriately. ", @@ -112,7 +112,7 @@ get_pvalue <- p_value # which_distribution <- function(x, theory_type, obs_stat, direction) { # param <- attr(x, "distr_param") -# if (!is.null(attr(x, "distr_param2"))) { +# if (!is_nuat(x, "distr_param2")) { # param2 <- attr(x, "distr_param2") # } # diff --git a/R/set_params.R b/R/set_params.R index 0867a5f7..d7513ba2 100755 --- a/R/set_params.R +++ b/R/set_params.R @@ -6,14 +6,14 @@ set_params <- function(x) { attr(x, "theory_type") <- NULL - if (!is.null(attr(x, "response"))) { + if (!is_nuat(x, "response")) { num_response_levels <- length(levels(response_variable(x))) } # One variable if ( - !is.null(attr(x, "response")) && is.null(attr(x, "explanatory")) && - !is.null(attr(x, "response_type")) && is.null(attr(x, "explanatory_type")) + !is_nuat(x, "response") && is_nuat(x, "explanatory") && + !is_nuat(x, "response_type") && is_nuat(x, "explanatory_type") ) { # One mean @@ -43,8 +43,8 @@ set_params <- function(x) { # Two variables if ( - !is.null(attr(x, "response")) && !is.null(attr(x, "explanatory")) & - !is.null(attr(x, "response_type")) && !is.null(attr(x, "explanatory_type")) + !is_nuat(x, "response") && !is_nuat(x, "explanatory") & + !is_nuat(x, "response_type") && !is_nuat(x, "explanatory_type") ) { attr(x, "type") <- "bootstrap" @@ -130,7 +130,7 @@ set_params <- function(x) { } } -# if(is.null(attr(x, "theory_type"))) +# if(is_nuat(x, "theory_type")) # warning_glue("Theoretical type not yet implemented") x diff --git a/R/specify.R b/R/specify.R index 937b3f09..93a69306 100755 --- a/R/specify.R +++ b/R/specify.R @@ -56,7 +56,7 @@ specify <- function(x, formula, response = NULL, attr(x, "explanatory") <- f_rhs(formula) } - if (is.null(attr(x, "response"))) { + if (is_nuat(x, "response")) { stop_glue("Supply not `NULL` response variable.") } @@ -130,7 +130,7 @@ specify <- function(x, formula, response = NULL, # To help determine theoretical distribution to plot attr(x, "response_type") <- class(response_variable(x)) - if (is.null(attr(x, "explanatory"))) { + if (is_nuat(x, "explanatory")) { attr(x, "explanatory_type") <- NULL } else { attr(x, "explanatory_type") <- class(explanatory_variable(x)) @@ -140,9 +140,9 @@ specify <- function(x, formula, response = NULL, (attr(x, "response_type") == "factor") && is.null(success) && (length(levels(response_variable(x))) == 2) && ( - is.null(attr(x, "explanatory_type")) || + is_nuat(x, "explanatory_type") || ( - !is.null(attr(x, "explanatory_type")) && + !is_nuat(x, "explanatory_type") && (length(levels(explanatory_variable(x))) == 2) ) ) diff --git a/R/utils.R b/R/utils.R index 66af83d3..c0b5a90b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -26,6 +26,10 @@ set_attributes <- function(to, from = x) { to } +is_nuat <- function(x, at) { + is.null(attr(x, at)) +} + explanatory_variable <- function(x) { x[[as.character(attr(x, "explanatory"))]] } @@ -43,11 +47,11 @@ reorder_explanatory <- function(x, order) { } has_explanatory <- function(x) { - !is.null(attr(x, "explanatory")) + !is_nuat(x, "explanatory") } has_response <- function(x) { - !is.null(attr(x, "response")) + !is_nuat(x, "response") } stop_glue <- function(..., .sep = "", .envir = parent.frame(), @@ -139,7 +143,7 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, ) } - if (!("replicate" %in% names(x)) && !is.null(attr(x, "generate"))) { + if (!("replicate" %in% names(x)) && !is_nuat(x, "generate")) { warning_glue( 'A `generate()` step was not performed prior to `calculate()`. ', 'Review carefully.' @@ -193,7 +197,7 @@ check_for_factor_stat <- function(x, stat, explanatory_variable) { check_point_params <- function(x, stat) { param_names <- attr(attr(x, "params"), "names") hyp_text <- 'to be set in `hypothesize()`.' - if (!is.null(attr(x, "null"))) { + if (!is_nuat(x, "null")) { if (stat %in% c("mean", "median", "sd", "prop")) { if ((stat == "mean") && !("mu" %in% param_names)) { stop_glue('`stat == "mean"` requires `"mu"` {hyp_text}') @@ -243,7 +247,7 @@ parse_params <- function(dots, x) { # 0 index of dots if (length(p_ind)) { if (length(dots[[p_ind]]) == 1) { - if ((attr(x, "null") == "point") && is.null(attr(x, "success"))) { + if ((attr(x, "null") == "point") && is_nuat(x, "success")) { stop_glue( "A point null regarding a proportion requires that `success` ", "be indicated in `specify()`." diff --git a/R/visualize.R b/R/visualize.R index bda7887c..3d71b4be 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -549,7 +549,7 @@ visualize_theoretical <- function(data, ) if ( - !is.null(attr(data, "stat")) && + !is_nuat(data, "stat") && !(attr(data, "stat") %in% c("t", "z", "Chisq", "F")) ) { warning_glue( From e641688a7df5fdc5282f7403b5a0474cb9e15ef9 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 10:56:55 +0300 Subject: [PATCH 15/78] Refactor `set_attributes()` into more flexible `copy_attrs()`. --- R/calculate.R | 19 ++++++++----------- R/generate.R | 6 +++--- R/utils.R | 24 ++++++++++-------------- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 814267ad..d0c3a234 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -111,7 +111,7 @@ calculate <- function(x, # class(result) <- append("infer", class(result)) # } - result <- set_attributes(to = result, from = x) + result <- copy_attrs(to = result, from = x) attr(result, "stat") <- stat # For returning a 1x1 observed statistic value @@ -286,16 +286,13 @@ calc_impl.Chisq <- function(stat, x, order, ...) { result <- result %>% dplyr::select(stat = statistic) } - attr(result, "response") <- attr(x, "response") - attr(result, "success") <- attr(x, "success") - attr(result, "explanatory") <- attr(x, "explanatory") - attr(result, "response_type") <- attr(x, "response_type") - attr(result, "explanatory_type") <- attr(x, "explanatory_type") - attr(result, "distr_param") <- attr(x, "distr_param") - attr(result, "distr_param2") <- attr(x, "distr_param2") - attr(result, "theory_type") <- attr(x, "theory_type") - - result + copy_attrs( + to = result, from = x, + attrs = c( + "response", "success", "explanatory", "response_type", + "explanatory_type", "distr_param", "distr_param2", "theory_type" + ) + ) } } diff --git a/R/generate.R b/R/generate.R index 44a81ad0..61f3856a 100755 --- a/R/generate.R +++ b/R/generate.R @@ -126,7 +126,7 @@ bootstrap <- function(x, reps = 1, ...) { # Set variables for use in calculate() result <- rep_sample_n(x, size = nrow(x), replace = TRUE, reps = reps) - result <- set_attributes(to = result, from = x) + result <- copy_attrs(to = result, from = x) class(result) <- append("infer", class(result)) @@ -140,7 +140,7 @@ permute <- function(x, reps = 1, ...) { dplyr::mutate(replicate = rep(1:reps, each = nrow(x))) %>% dplyr::group_by(replicate) - df_out <- set_attributes(to = df_out, from = x) + df_out <- copy_attrs(to = df_out, from = x) class(df_out) <- append("infer", class(df_out)) @@ -176,7 +176,7 @@ simulate <- function(x, reps = 1, ...) { replicate = as.factor(rep(1:reps, rep(nrow(x), reps))) ) - rep_tbl <- set_attributes(to = rep_tbl, from = x) + rep_tbl <- copy_attrs(to = rep_tbl, from = x) class(rep_tbl) <- append("infer", class(rep_tbl)) diff --git a/R/utils.R b/R/utils.R index c0b5a90b..2bee15d5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,20 +9,16 @@ get_par_levels <- function(x) { gsub("^.\\.", "", par_names) } -set_attributes <- function(to, from = x) { - attr(to, "response") <- attr(from, "response") - attr(to, "success") <- attr(from, "success") - attr(to, "explanatory") <- attr(from, "explanatory") - attr(to, "response_type") <- attr(from, "response_type") - attr(to, "explanatory_type") <- attr(from, "explanatory_type") - attr(to, "distr_param") <- attr(from, "distr_param") - attr(to, "distr_param2") <- attr(from, "distr_param2") - attr(to, "null") <- attr(from, "null") - attr(to, "params") <- attr(from, "params") - attr(to, "theory_type") <- attr(from, "theory_type") - attr(to, "generate") <- attr(from, "generate") - attr(to, "type") <- attr(from, "type") - +copy_attrs <- function(to, from, + attrs = c( + "response", "success", "explanatory", "response_type", + "explanatory_type", "distr_param", "distr_param2", + "null", "params", "theory_type", "generate", "type" + )) { + for (at in attrs) { + attr(to, at) <- attr(from, at) + } + to } From d18be575690e5c3098e78635a55c38af27586f6e Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 12:00:50 +0300 Subject: [PATCH 16/78] Refactor `calc_impl()` methods. --- R/calculate.R | 79 ++++++++++++++------------------- R/infer.R | 2 +- tests/testthat/test-calculate.R | 8 ++++ 3 files changed, 42 insertions(+), 47 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index d0c3a234..328801c5 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -126,31 +126,23 @@ calc_impl <- function(type, x, order, ...) { UseMethod("calc_impl", type) } -calc_impl.mean <- function(stat, x, order, ...) { - col <- base::setdiff(names(x), "replicate") - - x %>% - dplyr::group_by(replicate) %>% - dplyr::summarize(stat = mean(!!(sym(col)), ...)) +calc_impl_one_f <- function(f) { + function(type, x, order, ...) { + col <- base::setdiff(names(x), "replicate") + + x %>% + dplyr::group_by(replicate) %>% + dplyr::summarize(stat = f(!!(sym(col)), ...)) + } } -calc_impl.median <- function(stat, x, order, ...) { - col <- base::setdiff(names(x), "replicate") +calc_impl.mean <- calc_impl_one_f(mean) - x %>% - dplyr::group_by(replicate) %>% - dplyr::summarize(stat = stats::median(!!(sym(col)), ...)) -} - -calc_impl.sd <- function(stat, x, order, ...) { - col <- base::setdiff(names(x), "replicate") +calc_impl.median <- calc_impl_one_f(stats::median) - x %>% - dplyr::group_by(replicate) %>% - dplyr::summarize(stat = stats::sd(!!(sym(col)), ...)) -} +calc_impl.sd <- calc_impl_one_f(stats::sd) -calc_impl.prop <- function(stat, x, order, ...) { +calc_impl.prop <- function(type, x, order, ...) { col <- base::setdiff(names(x), "replicate") ## No longer needed with implementation of `check_point_params()` @@ -179,7 +171,7 @@ calc_impl.prop <- function(stat, x, order, ...) { ) } -calc_impl.F <- function(stat, x, order, ...) { +calc_impl.F <- function(type, x, order, ...) { x %>% dplyr::summarize( stat = stats::anova( @@ -188,7 +180,7 @@ calc_impl.F <- function(stat, x, order, ...) { ) } -calc_impl.slope <- function(stat, x, order, ...) { +calc_impl.slope <- function(type, x, order, ...) { x %>% dplyr::summarize( stat = stats::coef( @@ -197,36 +189,31 @@ calc_impl.slope <- function(stat, x, order, ...) { ) } -calc_impl.correlation <- function(stat, x, order, ...) { +calc_impl.correlation <- function(type, x, order, ...) { x %>% dplyr::summarize( stat = stats::cor(!!attr(x, "explanatory"), !!attr(x, "response")) ) } -calc_impl.diff_in_means <- function(stat, x, order, ...) { - x %>% - dplyr::group_by(replicate, !!attr(x, "explanatory")) %>% - dplyr::summarize(xbar = mean(!!attr(x, "response"), ...)) %>% - dplyr::group_by(replicate) %>% - dplyr::summarize( - stat = xbar[!!(attr(x, "explanatory")) == order[1]] - - xbar[!!(attr(x, "explanatory")) == order[2]] - ) +calc_impl_diff_f <- function(f) { + function(type, x, order, ...) { + x %>% + dplyr::group_by(replicate, !!attr(x, "explanatory")) %>% + dplyr::summarize(value = f(!!attr(x, "response"), ...)) %>% + dplyr::group_by(replicate) %>% + dplyr::summarize( + stat = value[!!(attr(x, "explanatory")) == order[1]] - + value[!!(attr(x, "explanatory")) == order[2]] + ) + } } -calc_impl.diff_in_medians <- function(stat, x, order, ...) { - x %>% - dplyr::group_by(replicate, !!(attr(x, "explanatory"))) %>% - dplyr::summarize(xtilde = stats::median(!!attr(x, "response"), ...)) %>% - dplyr::group_by(replicate) %>% - dplyr::summarize( - stat = xtilde[!!(attr(x, "explanatory")) == order[1]] - - xtilde[!!(attr(x, "explanatory")) == order[2]] - ) -} +calc_impl.diff_in_means <- calc_impl_diff_f(mean) + +calc_impl.diff_in_medians <- calc_impl_diff_f(stats::median) -calc_impl.Chisq <- function(stat, x, order, ...) { +calc_impl.Chisq <- function(type, x, order, ...) { ## The following could stand to be cleaned up if (is_nuat(x, "explanatory")) { @@ -296,7 +283,7 @@ calc_impl.Chisq <- function(stat, x, order, ...) { } } -calc_impl.diff_in_props <- function(stat, x, order, ...) { +calc_impl.diff_in_props <- function(type, x, order, ...) { col <- attr(x, "response") success <- attr(x, "success") @@ -309,7 +296,7 @@ calc_impl.diff_in_props <- function(stat, x, order, ...) { ) } -calc_impl.t <- function(stat, x, order, ...) { +calc_impl.t <- function(type, x, order, ...) { # Two sample means if (attr(x, "theory_type") == "Two sample t") { @@ -374,7 +361,7 @@ calc_impl.t <- function(stat, x, order, ...) { } } -calc_impl.z <- function(stat, x, order, ...) { +calc_impl.z <- function(type, x, order, ...) { # Two sample proportions if (attr(x, "theory_type") == "Two sample props z") { col <- attr(x, "response") diff --git a/R/infer.R b/R/infer.R index a6621b57..69614a05 100755 --- a/R/infer.R +++ b/R/infer.R @@ -17,7 +17,7 @@ NULL if (getRversion() >= "2.15.1") { utils::globalVariables( c( - "prop", "stat", "xbar", "xtilde", "x", "..density..", "statistic", ".", + "prop", "stat", "value", "x", "..density..", "statistic", ".", "parameter", "p.value", "xmin", "xmax", "density", "denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat", "total_suc", "explan", "probs", "conf.low", "conf.high" diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 6b28150e..096cb750 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -426,3 +426,11 @@ test_that("calculate doesn't depend on order of `p` (#122)", { calc_chisq(c("virginica" = 0.25, "versicolor" = 0.25, "setosa" = 0.5)) ) }) + +test_that("calc_impl_one_f works", { + expect_true(is.function(calc_impl_one_f(mean))) +}) + +test_that("calc_impl_diff_f works", { + expect_true(is.function(calc_impl_diff_f(mean))) +}) \ No newline at end of file From a88f0288280913808b223a1ba755af17eb7c91ea Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 12:41:03 +0300 Subject: [PATCH 17/78] Move common data in tests into separate helper file. --- tests/testthat/helper-data.R | 18 ++++++ tests/testthat/test-calculate.R | 8 +-- tests/testthat/test-conf_int.R | 12 ---- tests/testthat/test-generate.R | 97 ++++++++++++++----------------- tests/testthat/test-hypothesize.R | 40 ++++++------- tests/testthat/test-p_value.R | 12 ---- tests/testthat/test-specify.R | 56 ++++++++---------- tests/testthat/test-visualize.R | 6 -- 8 files changed, 104 insertions(+), 145 deletions(-) create mode 100644 tests/testthat/helper-data.R diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R new file mode 100644 index 00000000..12701584 --- /dev/null +++ b/tests/testthat/helper-data.R @@ -0,0 +1,18 @@ +iris_tbl <- iris %>% + tibble::as_tibble() %>% + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), + Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") + ) + +iris_calc <- iris_tbl %>% + specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% + hypothesize(null = "independence") %>% + generate(reps = 1000) %>% + calculate(stat = "diff in props", order = c("large", "small")) + +mtcars_df <- mtcars %>% + dplyr::mutate( + cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), + carb = factor(carb) + ) diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 096cb750..daefde6c 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -2,12 +2,6 @@ context("calculate") iris_df <- tibble::as_tibble(iris) -iris_tbl <- iris_df %>% - dplyr::mutate( - Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") - ) - # calculate arguments test_that("x is a tibble", { vec <- 1:10 @@ -433,4 +427,4 @@ test_that("calc_impl_one_f works", { test_that("calc_impl_diff_f works", { expect_true(is.function(calc_impl_diff_f(mean))) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-conf_int.R b/tests/testthat/test-conf_int.R index 60da5354..5cfe0060 100644 --- a/tests/testthat/test-conf_int.R +++ b/tests/testthat/test-conf_int.R @@ -1,17 +1,5 @@ context("conf_int") -iris_tbl <- iris %>% - dplyr::mutate( - Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") - ) - -iris_calc <- iris_tbl %>% - specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% - calculate(stat = "diff in props", order = c("large", "small")) - obs_diff <- iris_tbl %>% specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% calculate(stat = "diff in props", order = c("large", "small")) diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index c2de7d46..70088b90 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -1,44 +1,38 @@ context("generate") -mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate( - cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), - carb = factor(carb) - ) - -hyp_prop <- mtcars %>% +hyp_prop <- mtcars_df %>% specify(response = am, success = "1") %>% hypothesize(null = "point", p = .5) -hyp_diff_in_props <- mtcars %>% +hyp_diff_in_props <- mtcars_df %>% specify(am ~ vs, success = "1") %>% hypothesize(null = "independence") -hyp_chisq_gof <- mtcars %>% +hyp_chisq_gof <- mtcars_df %>% specify(response = cyl) %>% hypothesize(null = "point", p = c("4" = 1/3, "6" = 1/3, "8" = 1/3)) -hyp_chisq_ind <- mtcars %>% +hyp_chisq_ind <- mtcars_df %>% specify(cyl ~ vs) %>% hypothesize(null = "independence") -hyp_mean <- mtcars %>% +hyp_mean <- mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 3) -hyp_median <- mtcars %>% +hyp_median <- mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", med = 3) -hyp_sd <- mtcars %>% +hyp_sd <- mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", sigma = 7) -hyp_diff_in_means <- mtcars %>% +hyp_diff_in_means <- mtcars_df %>% specify(mpg ~ vs) %>% hypothesize(null = "independence") -hyp_anova <- mtcars %>% +hyp_anova <- mtcars_df %>% specify(mpg ~ cyl) %>% hypothesize(null = "independence") @@ -72,81 +66,76 @@ test_that("cohesion with type argument", { test_that("sensible output", { expect_equal( - nrow(mtcars) * 500, nrow(generate(hyp_prop, reps = 500, type = "simulate")) + nrow(mtcars_df) * 500, + nrow(generate(hyp_prop, reps = 500, type = "simulate")) ) expect_silent(generate(hyp_mean, reps = 1, type = "bootstrap")) expect_error(generate(hyp_mean, reps = 1, type = "other")) }) test_that("auto `type` works (generate)", { - mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate( - cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), - carb = factor(carb) - ) - - one_mean <- mtcars %>% + one_mean <- mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", mu = 25) %>% generate(reps = 100) - one_nonshift_mean <- mtcars %>% + one_nonshift_mean <- mtcars_df %>% specify(response = mpg) %>% generate(reps = 100) - one_median <- mtcars %>% + one_median <- mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", med = 26) %>% generate(reps = 100) - one_prop <- mtcars %>% + one_prop <- mtcars_df %>% specify(response = am, success = "1") %>% # formula alt: am ~ NULL hypothesize(null = "point", p = .25) %>% generate(reps = 100) - two_props <- mtcars %>% + two_props <- mtcars_df %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") %>% generate(reps = 100) - gof_chisq <- mtcars %>% + gof_chisq <- mtcars_df %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% generate(reps = 100) - indep_chisq <- mtcars %>% + indep_chisq <- mtcars_df %>% specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100) - two_means <- mtcars %>% + two_means <- mtcars_df %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100) - anova_f <- mtcars %>% + anova_f <- mtcars_df %>% specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100) - slopes <- mtcars %>% + slopes <- mtcars_df %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100) - one_nonshift_prop <- mtcars %>% + one_nonshift_prop <- mtcars_df %>% specify(response = am, success = "1") %>% generate(reps = 100) - two_means_boot <- mtcars %>% + two_means_boot <- mtcars_df %>% specify(mpg ~ am) %>% generate(reps = 100) - two_props_boot <- mtcars %>% + two_props_boot <- mtcars_df %>% specify(am ~ vs, success = "1") %>% generate(reps = 100) - slope_boot <- mtcars %>% + slope_boot <- mtcars_df %>% specify(mpg ~ hp) %>% generate(reps = 100) @@ -165,102 +154,102 @@ test_that("auto `type` works (generate)", { expect_equal(attr(two_props_boot, "type"), "bootstrap") expect_equal(attr(slope_boot, "type"), "bootstrap") - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", mu = 25) %>% generate(reps = 100, type = "permute") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(response = mpg) %>% generate(reps = 100, type = "simulate") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", med = 26) %>% generate(reps = 100, type = "permute") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(response = am, success = "1") %>% # formula alt: am ~ NULL hypothesize(null = "point", p = .25) %>% generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100, type = "simulate") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap")) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100, type = "simulate") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(response = am, success = "1") %>% generate(reps = 100, type = "simulate") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(mpg ~ am) %>% generate(reps = 100, type = "permute") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(am ~ vs, success = "1") %>% generate(reps = 100, type = "simulate") ) - expect_error(mtcars %>% + expect_error(mtcars_df %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "simulate") ) }) test_that("mismatches lead to error", { - expect_error(mtcars %>% generate(reps = 10, type = "permute")) + expect_error(mtcars_df %>% generate(reps = 10, type = "permute")) expect_error( - mtcars %>% + mtcars_df %>% specify(am ~ NULL, success = "1") %>% hypothesize(null = "independence", p = c("1" = 0.5)) %>% generate(reps = 100, type = "simulate") ) expect_error( - mtcars %>% + mtcars_df %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize( null = "point", p = c("4" = .5, "6" = .25, "8" = .25) ) %>% generate(reps = 100, type = "bootstrap")) expect_error( - mtcars %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "other") + mtcars_df %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "other") ) }) diff --git a/tests/testthat/test-hypothesize.R b/tests/testthat/test-hypothesize.R index 22d80e8a..1641c8fa 100644 --- a/tests/testthat/test-hypothesize.R +++ b/tests/testthat/test-hypothesize.R @@ -1,54 +1,48 @@ context("hypothesize") -mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate( - cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), - carb = factor(carb) - ) - -one_mean <- mtcars %>% +one_mean <- mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", mu = 25) -one_mean_specify <- mtcars %>% +one_mean_specify <- mtcars_df %>% specify(response = mpg) -one_median <- mtcars %>% +one_median <- mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", med = 26) -one_prop <- mtcars %>% +one_prop <- mtcars_df %>% specify(response = am, success = "1") %>% # formula alt: am ~ NULL hypothesize(null = "point", p = .25) -one_prop_specify <- mtcars %>% +one_prop_specify <- mtcars_df %>% specify(response = am, success = "1") -two_props <- mtcars %>% +two_props <- mtcars_df %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") -gof_chisq <- mtcars %>% +gof_chisq <- mtcars_df %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) -indep_chisq <- mtcars %>% +indep_chisq <- mtcars_df %>% specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am hypothesize(null = "independence") -two_means <- mtcars %>% +two_means <- mtcars_df %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") -two_medians <- mtcars %>% +two_medians <- mtcars_df %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") -anova_f <- mtcars %>% +anova_f <- mtcars_df %>% specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") -slopes <- mtcars %>% +slopes <- mtcars_df %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") @@ -84,20 +78,20 @@ test_that("hypothesize arguments function", { # ) expect_error( - mtcars %>% dplyr::select(vs) %>% hypothesize(null = "point", mu = 1) + mtcars_df %>% dplyr::select(vs) %>% hypothesize(null = "point", mu = 1) ) expect_error( - mtcars %>% specify(response = vs) %>% hypothesize(null = "point", mu = 1) + mtcars_df %>% specify(response = vs) %>% hypothesize(null = "point", mu = 1) ) expect_error( - mtcars %>% + mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize(null = "point", p = 1.1) ) expect_error( - mtcars %>% + mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize(null = "point", p = -23) ) @@ -111,7 +105,7 @@ test_that("hypothesize arguments function", { expect_error(mtcars_s %>% hypothesize(null = "point", p = 0.2)) expect_warning( - mtcars %>% + mtcars_df %>% specify(mpg ~ vs) %>% hypothesize(null = "independence", p = 0.5) ) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index f572764f..c3b758b3 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -1,17 +1,5 @@ context("p_value") -iris_tbl <- iris %>% - dplyr::mutate( - Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") - ) - -iris_calc <- iris_tbl %>% - specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% - calculate(stat = "diff in props", order = c("large", "small")) - set.seed(2018) test_df <- tibble::tibble(stat = rnorm(100)) diff --git a/tests/testthat/test-specify.R b/tests/testthat/test-specify.R index 001f14ec..392d5694 100644 --- a/tests/testthat/test-specify.R +++ b/tests/testthat/test-specify.R @@ -1,20 +1,14 @@ context("specify") -mtcars <- as.data.frame(mtcars) %>% - dplyr::mutate( - cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), - carb = factor(carb) - ) +one_nonshift_mean <- mtcars_df %>% specify(response = mpg) -one_nonshift_mean <- mtcars %>% specify(response = mpg) +one_nonshift_prop <- mtcars_df %>% specify(response = am, success = "1") -one_nonshift_prop <- mtcars %>% specify(response = am, success = "1") +two_means_boot <- mtcars_df %>% specify(mpg ~ am) -two_means_boot <- mtcars %>% specify(mpg ~ am) +two_props_boot <- mtcars_df %>% specify(am ~ vs, success = "1") -two_props_boot <- mtcars %>% specify(am ~ vs, success = "1") - -slope_boot <- mtcars %>% specify(mpg ~ hp) +slope_boot <- mtcars_df %>% specify(mpg ~ hp) test_that("auto `type` works (specify)", { expect_equal(attr(one_nonshift_mean, "type"), "bootstrap") @@ -27,42 +21,42 @@ test_that("auto `type` works (specify)", { test_that("data argument", { expect_error(specify(blah ~ cyl)) expect_error(specify(1:3)) - expect_is(mtcars, "data.frame") - expect_error(specify(mtcars, mtcars$mpg)) + expect_is(mtcars_df, "data.frame") + expect_error(specify(mtcars_df, mtcars_df$mpg)) }) test_that("response and explanatory arguments", { - expect_error(specify(mtcars, response = blah)) - expect_error(specify(mtcars, response = "blah")) - expect_error(specify(mtcars, formula = mpg ~ blah)) + expect_error(specify(mtcars_df, response = blah)) + expect_error(specify(mtcars_df, response = "blah")) + expect_error(specify(mtcars_df, formula = mpg ~ blah)) expect_error(specify(blah ~ cyl)) - expect_error(specify(mtcars_f, blah2 ~ cyl)) - expect_error(specify(mtcars)) - expect_error(specify(mtcars, formula = mpg ~ mpg)) - expect_error(specify(mtcars, formula = mpg ~ "cyl")) - expect_silent(specify(mtcars, formula = mpg ~ cyl)) + expect_error(specify(mtcars_df, blah2 ~ cyl)) + expect_error(specify(mtcars_df)) + expect_error(specify(mtcars_df, formula = mpg ~ mpg)) + expect_error(specify(mtcars_df, formula = mpg ~ "cyl")) + expect_silent(specify(mtcars_df, formula = mpg ~ cyl)) - expect_error(specify(mtcars, formula = NULL ~ cyl), "NULL.*response") + expect_error(specify(mtcars_df, formula = NULL ~ cyl), "NULL.*response") }) test_that("success argument", { - expect_error(specify(mtcars, response = vs, success = 1)) - expect_error(specify(mtcars, response = vs, success = "bogus")) - expect_error(specify(mtcars, response = mpg, success = "1")) - expect_error(specify(mtcars, response = cyl, success = "4")) + expect_error(specify(mtcars_df, response = vs, success = 1)) + expect_error(specify(mtcars_df, response = vs, success = "bogus")) + expect_error(specify(mtcars_df, response = mpg, success = "1")) + expect_error(specify(mtcars_df, response = cyl, success = "4")) # success not given - expect_error(specify(mtcars, response = am)) + expect_error(specify(mtcars_df, response = am)) }) test_that("sensible output", { - expect_equal(ncol(specify(mtcars, formula = mpg ~ NULL)), 1) - expect_equal(ncol(specify(mtcars, formula = mpg ~ wt)), 2) - expect_equal(class(specify(mtcars, formula = mpg ~ wt))[1], "infer") + expect_equal(ncol(specify(mtcars_df, formula = mpg ~ NULL)), 1) + expect_equal(ncol(specify(mtcars_df, formula = mpg ~ wt)), 2) + expect_equal(class(specify(mtcars_df, formula = mpg ~ wt))[1], "infer") }) test_that("formula argument is a formula", { - expect_error(specify(mtcars, formula = "vs", success = 1)) + expect_error(specify(mtcars_df, formula = "vs", success = 1)) }) test_that("is_complete works", { diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index b69a6b39..222807ac 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -8,12 +8,6 @@ Sepal.Width_resamp <- iris %>% generate(reps = 10, type = "bootstrap") %>% calculate(stat = "median") -iris_tbl <- tibble::as_tibble(iris) %>% - dplyr::mutate( - Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5"), - Sepal.Width.Group = dplyr::if_else(Sepal.Width > 3, "large", "small") - ) - obs_slope <- lm(Sepal.Length ~ Sepal.Width, data = iris_tbl) %>% broom::tidy() %>% dplyr::filter(term == "Sepal.Width") %>% From f684df4aa8cd3ef63e8b82e5d906e990698f1fb4 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 19:03:27 +0300 Subject: [PATCH 18/78] DRY visualization of theoretical distributions. --- R/visualize.R | 92 +++++++++++++++------------------------------------ 1 file changed, 27 insertions(+), 65 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index 3d71b4be..e12d8154 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -199,20 +199,15 @@ visualize <- function(data, bins = 15, method = "simulation", infer_plot } -theory_t_plot <- function(deg_freedom, statistic_text = "t", - dens_color = dens_color, - ...) { - ggplot(data.frame(x = c(qt(0.001, deg_freedom), - qt(0.999, deg_freedom)))) + +theory_plot <- function(d_fun, q_fun, args_list, stat_name, dens_color) { + x_range <- do.call(q_fun, c(p = list(c(0.001, 0.999)), args_list)) + + ggplot(data.frame(x = x_range)) + stat_function( - mapping = aes(x), - fun = dt, - args = list(df = deg_freedom), - color = dens_color + mapping = aes(x), fun = d_fun, args = args_list, color = dens_color ) + - ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + - xlab("") + - ylab("") + ggtitle(glue_null("Theoretical {stat_name} Null Distribution")) + + xlab("") + ylab("") } both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", @@ -245,23 +240,6 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", ylab("") } -theory_anova_plot <- function(deg_freedom_top, deg_freedom_bottom, - statistic_text = "F", - dens_color = dens_color, - ...) { - ggplot(data.frame(x = c(qf(0.001, deg_freedom_top, deg_freedom_bottom), - qf(0.999, deg_freedom_top, deg_freedom_bottom)))) + - stat_function( - mapping = aes(x), - fun = df, - args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), - color = dens_color - ) + - ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + - xlab("") + - ylab("") -} - both_anova_plot <- function(data, deg_freedom_top, deg_freedom_bottom, statistic_text = "F", dens_color, @@ -301,14 +279,6 @@ both_anova_plot <- function(data, deg_freedom_top, ylab("") } -theory_z_plot <- function(statistic_text = "z", dens_color = dens_color, ...) { - ggplot(data.frame(x = c(qnorm(0.001), qnorm(0.999)))) + - stat_function(mapping = aes(x), fun = dnorm, color = dens_color) + - ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + - xlab("") + - ylab("") -} - both_z_plot <- function(data, statistic_text = "z", dens_color, obs_stat, @@ -336,23 +306,6 @@ both_z_plot <- function(data, statistic_text = "z", ylab("") } -theory_chisq_plot <- function(deg_freedom, - statistic_text = "Chi-Square", - dens_color = dens_color, - ...) { - ggplot(data.frame(x = c(qchisq(0.001, deg_freedom), - qchisq(0.999, deg_freedom)))) + - stat_function( - mapping = aes(x), - fun = dchisq, - args = list(df = deg_freedom), - color = dens_color - ) + - ggtitle(glue_null("Theoretical {statistic_text} Null Distribution")) + - xlab("") + - ylab("") -} - both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", dens_color, obs_stat, @@ -563,9 +516,10 @@ visualize_theoretical <- function(data, "Two sample t", "Slope with t", "One sample t" ) ) { - infer_plot <- theory_t_plot( - deg_freedom = attr(data, "distr_param"), - statistic_text = "t", + infer_plot <- theory_plot( + d_fun = dt, q_fun = qt, + args_list = list(df = attr(data, "distr_param")), + stat_name = "t", dens_color = dens_color ) } else if (attr(data, "theory_type") == "ANOVA") { @@ -575,16 +529,23 @@ visualize_theoretical <- function(data, ) } - infer_plot <- theory_anova_plot( - deg_freedom_top = attr(data, "distr_param"), - deg_freedom_bottom = attr(data, "distr_param2"), - statistic_text = "F", + infer_plot <- theory_plot( + d_fun = df, q_fun = qf, + args_list = list( + df1 = attr(data, "distr_param"), df2 = attr(data, "distr_param2") + ), + stat_name = "F", dens_color = dens_color ) } else if ( attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z") ) { - infer_plot <- theory_z_plot(statistic_text = "z", dens_color = dens_color) + infer_plot <- theory_plot( + d_fun = dnorm, q_fun = qnorm, + args_list = list(), + stat_name = "z", + dens_color = dens_color + ) } else if ( attr(data, "theory_type") %in% c( "Chi-square test of indep", "Chi-square Goodness of Fit" @@ -597,9 +558,10 @@ visualize_theoretical <- function(data, ) } - infer_plot <- theory_chisq_plot( - deg_freedom = attr(data, "distr_param"), - statistic_text = "Chi-Square", + infer_plot <- theory_plot( + d_fun = dchisq, q_fun = qchisq, + args_list = list(df = attr(data, "distr_param")), + stat_name = "Chi-Square", dens_color = dens_color ) } # else { From 738bd0c86007bddcd5b767e97f5bbb4d75d81d2b Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 19:49:33 +0300 Subject: [PATCH 19/78] DRY visualization of "both" distributions. --- R/visualize.R | 169 ++++++++++++-------------------------------------- 1 file changed, 40 insertions(+), 129 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index e12d8154..4b795b7e 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -210,16 +210,10 @@ theory_plot <- function(d_fun, q_fun, args_list, stat_name, dens_color) { xlab("") + ylab("") } -both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", - dens_color, - obs_stat, - direction, - bins, - pvalue_fill, - endpoints, - ci_fill, - ...) { - infer_t_plot <- shade_density_check( +both_plot <- function(data, d_fun, args_list, stat_name, stat_label, dens_color, + obs_stat, direction, bins, pvalue_fill, endpoints, + ci_fill, ...) { + infer_plot <- shade_density_check( data = data, obs_stat = obs_stat, direction = direction, @@ -228,117 +222,15 @@ both_t_plot <- function(data = data, deg_freedom, statistic_text = "t", pvalue_fill = pvalue_fill, ci_fill = ci_fill ) - - infer_t_plot + - stat_function( - fun = dt, args = list(df = deg_freedom), color = dens_color - ) + - ggtitle(glue_null( - "Simulation-Based and Theoretical {statistic_text} Null Distributions" - )) + - xlab("tstat") + - ylab("") -} - -both_anova_plot <- function(data, deg_freedom_top, - deg_freedom_bottom, statistic_text = "F", - dens_color, - obs_stat, - direction, - bins, - endpoints, - pvalue_fill, - ci_fill, - ...) { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { - warning_glue( - "F usually corresponds to right-tailed tests. Proceed with caution." - ) - } - - infer_anova_plot <- shade_density_check( - data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill - ) - - infer_anova_plot <- infer_anova_plot + - stat_function( - fun = df, - args = list(df1 = deg_freedom_top, df2 = deg_freedom_bottom), - color = dens_color - ) + - ggtitle(glue_null( - "Simulation-Based and Theoretical {statistic_text} Null Distributions" - )) + - xlab("Fstat") + - ylab("") -} - -both_z_plot <- function(data, statistic_text = "z", - dens_color, - obs_stat, - direction, - pvalue_fill, - bins, - endpoints, - ci_fill, - ...) { - infer_z_plot <- shade_density_check( - data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill - ) - infer_z_plot + - stat_function(fun = dnorm, color = dens_color) + - ggtitle(glue_null( - "Simulation-Based and Theoretical {statistic_text} Null Distributions" - )) + - xlab("zstat") + - ylab("") -} - -both_chisq_plot <- function(data, deg_freedom, statistic_text = "Chi-Square", - dens_color, - obs_stat, - direction, - bins, - endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill, - ...) { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { - warning_glue("Chi-square usually corresponds to right-tailed tests. ", - "Proceed with caution.") - } - - infer_chisq_plot <- shade_density_check( - data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill - ) - - infer_chisq_plot + + + infer_plot + stat_function( - fun = dchisq, args = list(df = deg_freedom), color = dens_color + fun = d_fun, args = args_list, color = dens_color ) + ggtitle(glue_null( - "Simulation-Based and Theoretical {statistic_text} Null Distributions" + "Simulation-Based and Theoretical {stat_name} Null Distributions" )) + - xlab("chisqstat") + - ylab("") + xlab(stat_label) + ylab("") } shade_density_check <- function(data, @@ -655,10 +547,11 @@ visualize_both <- function(data, bins, } if (attr(data, "theory_type") %in% c("Two sample t", "Slope with t")) { - infer_plot <- both_t_plot( + infer_plot <- both_plot( data = data, - deg_freedom = attr(data, "distr_param"), - statistic_text = "t", + d_fun = dt, + args_list = list(df = attr(data, "distr_param")), + stat_name = "t", stat_label = "tstat", dens_color = dens_color, bins = bins, direction = direction, @@ -668,11 +561,19 @@ visualize_both <- function(data, bins, ci_fill = ci_fill ) } else if (attr(data, "theory_type") == "ANOVA") { - infer_plot <- both_anova_plot( + if (!is.null(direction) && !(direction %in% c("greater", "right"))) { + warning_glue( + "F usually corresponds to right-tailed tests. Proceed with caution." + ) + } + + infer_plot <- both_plot( data = data, - deg_freedom_top = attr(data, "distr_param"), - deg_freedom_bottom = attr(data, "distr_param2"), - statistic_text = "F", + d_fun = df, + args_list = list( + df1 = attr(data, "distr_param"), df2 = attr(data, "distr_param2") + ), + stat_name = "F", stat_label = "Fstat", dens_color = dens_color, bins = bins, direction = direction, @@ -684,9 +585,11 @@ visualize_both <- function(data, bins, } else if ( attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z") ) { - infer_plot <- both_z_plot( + infer_plot <- both_plot( data = data, - statistic_text = "z", + d_fun = dnorm, + args_list = list(), + stat_name = "z", stat_label = "zstat", dens_color = dens_color, bins = bins, direction = direction, @@ -700,10 +603,18 @@ visualize_both <- function(data, bins, "Chi-square test of indep", "Chi-square Goodness of Fit" ) ) { - infer_plot <- both_chisq_plot( + if (!is.null(direction) && !(direction %in% c("greater", "right"))) { + warning_glue( + "Chi-square usually corresponds to right-tailed tests. ", + "Proceed with caution." + ) + } + + infer_plot <- both_plot( data = data, - deg_freedom = attr(data, "distr_param"), - statistic_text = "Chi-Square", + d_fun = dchisq, + args_list = list(df = attr(data, "distr_param")), + stat_name = "Chi-Square", stat_label = "chisqstat", dens_color = dens_color, bins = bins, direction = direction, From 80f016b161e65c6e9ab19a25d6248e9ad6d8375d Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 20:00:25 +0300 Subject: [PATCH 20/78] DRY warning in visualization about right-tailed tests. --- R/visualize.R | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index 4b795b7e..90976451 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -415,11 +415,7 @@ visualize_theoretical <- function(data, dens_color = dens_color ) } else if (attr(data, "theory_type") == "ANOVA") { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { - warning_glue( - "F usually corresponds to right-tailed tests. Proceed with caution." - ) - } + warn_right_tail_test(direction, "F") infer_plot <- theory_plot( d_fun = df, q_fun = qf, @@ -443,12 +439,7 @@ visualize_theoretical <- function(data, "Chi-square test of indep", "Chi-square Goodness of Fit" ) ) { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { - warning_glue( - "Chi-square usually corresponds to right-tailed tests. ", - "Proceed with caution." - ) - } + warn_right_tail_test(direction, "Chi-square") infer_plot <- theory_plot( d_fun = dchisq, q_fun = qchisq, @@ -561,11 +552,7 @@ visualize_both <- function(data, bins, ci_fill = ci_fill ) } else if (attr(data, "theory_type") == "ANOVA") { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { - warning_glue( - "F usually corresponds to right-tailed tests. Proceed with caution." - ) - } + warn_right_tail_test(direction, "F") infer_plot <- both_plot( data = data, @@ -603,12 +590,7 @@ visualize_both <- function(data, bins, "Chi-square test of indep", "Chi-square Goodness of Fit" ) ) { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { - warning_glue( - "Chi-square usually corresponds to right-tailed tests. ", - "Proceed with caution." - ) - } + warn_right_tail_test(direction, "Chi-square") infer_plot <- both_plot( data = data, @@ -633,3 +615,14 @@ visualize_both <- function(data, bins, get_percentile <- function(vector, observation) { stats::ecdf(vector)(observation) } + +warn_right_tail_test <- function(direction, stat_name) { + if (!is.null(direction) && !(direction %in% c("greater", "right"))) { + warning_glue( + "{stat_name} usually corresponds to right-tailed tests. ", + "Proceed with caution." + ) + } + + TRUE +} \ No newline at end of file From 2cc4fe78e38b2b2e4b83a4ed54c6c14202095bf0 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 21:47:30 +0300 Subject: [PATCH 21/78] DRY visualization of p-value shades. This also implements fixed shade transparency for all types of plots. Earlier it was fixed for "theoretical" and varied for "simulation" and "both". --- R/visualize.R | 177 +++++++++++++++++--------------------------------- 1 file changed, 59 insertions(+), 118 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index 90976451..1846a479 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -267,80 +267,31 @@ shade_density_check <- function(data, geom_histogram(bins = bins, color = "white", ...) } - if (direction %in% c("less", "left")) { + if (direction %in% c("less", "left", "greater", "right")) { gg_plot <- gg_plot + - geom_rect( - fill = pvalue_fill, alpha = 0.01, - aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), - ... - ) - } - if (direction %in% c("greater", "right")) { - gg_plot <- gg_plot + - geom_rect( - fill = pvalue_fill, alpha = 0.01, - aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ... - ) + geom_tail(direction, obs_stat, pvalue_fill) } - - if ( - (direction %in% c("two_sided", "both")) && - (obs_stat >= stats::median(data$stat)) - ) { + + if (direction %in% c("two_sided", "both")) { gg_plot <- gg_plot + - geom_rect( - fill = pvalue_fill, alpha = 0.01, - mapping = aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ... - ) + - geom_rect( - fill = pvalue_fill, alpha = 0.01, - mapping = aes( - xmin = -Inf, - xmax = stats::quantile( - data$stat, probs = 1 - get_percentile(data$stat, obs_stat) - ), - ymin = 0, - ymax = Inf, - ... - ) + geom_both_tails( + border_1 = obs_stat, + border_2 = mirror_obs_stat(data$stat, obs_stat), + fill = pvalue_fill ) } - if ( - (direction %in% c("two_sided", "both")) && - (obs_stat < stats::median(data$stat)) - ) { + if (direction == "between") { gg_plot <- gg_plot + geom_rect( - fill = pvalue_fill, alpha = 0.01, - mapping = aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), + data = data.frame(endpoints[1]), + fill = ci_fill, alpha = 0.6, + aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf), + inherit.aes = FALSE, ... - ) + - geom_rect( - fill = pvalue_fill, alpha = 0.01, - mapping = aes( - xmin = stats::quantile( - data$stat, probs = 1 - get_percentile(data$stat, obs_stat) - ), - xmax = Inf, - ymin = 0, - ymax = Inf, - ... - ) ) } } - - if (direction == "between") { - gg_plot <- gg_plot + - geom_rect( - fill = ci_fill, alpha = 0.01, - aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf), - ... - ) - } } gg_plot } @@ -453,61 +404,17 @@ visualize_theoretical <- function(data, # ) # } - # Move into its own function - if (!is.null(obs_stat)) { - if (!is.null(direction)) { - if (direction %in% c("less", "left")) { - infer_plot <- infer_plot + - geom_rect( - data = data.frame(obs_stat), - fill = pvalue_fill, alpha = 0.6, - aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), - ... - ) - } - if (direction %in% c("greater", "right")) { - infer_plot <- infer_plot + - geom_rect( - data = data.frame(obs_stat), - fill = pvalue_fill, alpha = 0.6, - aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ... - ) - } - - # Assuming two-tailed shading will only happen with theoretical - # distributions centered at 0 - if ((direction %in% c("two_sided", "both")) && (obs_stat >= 0)) { - infer_plot <- infer_plot + - geom_rect( - data = data.frame(obs_stat), - fill = pvalue_fill, alpha = 0.6, - aes(xmin = obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ... - ) + - geom_rect( - data = data.frame(obs_stat), - fill = pvalue_fill, alpha = 0.6, - aes(xmin = -Inf, xmax = -obs_stat, ymin = 0, ymax = Inf), - ... - ) - } - - if ((direction %in% c("two_sided", "both")) && (obs_stat < 0)) { - infer_plot <- infer_plot + - geom_rect( - data = data.frame(obs_stat), - fill = pvalue_fill, alpha = 0.6, - aes(xmin = -Inf, xmax = obs_stat, ymin = 0, ymax = Inf), - ... - ) + - geom_rect( - data = data.frame(obs_stat), - fill = pvalue_fill, alpha = 0.6, - aes(xmin = -obs_stat, xmax = Inf, ymin = 0, ymax = Inf), - ... - ) - } + # Plot tails + if (!is.null(obs_stat) && !is.null(direction)) { + if (direction %in% c("less", "left", "greater", "right")) { + infer_plot <- infer_plot + + geom_tail(direction, obs_stat, pvalue_fill, ...) + } + # Assuming two-tailed shading will only happen with theoretical + # distributions centered at 0 + if (direction %in% c("two_sided", "both")) { + infer_plot <- infer_plot + + geom_both_tails(obs_stat, -obs_stat, pvalue_fill, ...) } } @@ -616,6 +523,12 @@ get_percentile <- function(vector, observation) { stats::ecdf(vector)(observation) } +mirror_obs_stat <- function(vector, observation) { + obs_percentile <- get_percentile(vector, observation) + + stats::quantile(vector, probs = 1 - obs_percentile) +} + warn_right_tail_test <- function(direction, stat_name) { if (!is.null(direction) && !(direction %in% c("greater", "right"))) { warning_glue( @@ -625,4 +538,32 @@ warn_right_tail_test <- function(direction, stat_name) { } TRUE -} \ No newline at end of file +} + +geom_tail <- function(dir, border, fill, ...) { + if (dir %in% c("less", "left")) { + x_range <- c(-Inf, border) + } else if (dir %in% c("greater", "right")) { + x_range <- c(border, Inf) + } + + list( + geom_rect( + data = data.frame(border), + aes(xmin = x_range[1], xmax = x_range[2], ymin = 0, ymax = Inf), + fill = fill, alpha = 0.6, + inherit.aes = FALSE, + ... + ) + ) +} + +geom_both_tails <- function(border_1, border_2, fill, ...) { + left_border <- min(border_1, border_2) + right_border <- max(border_1, border_2) + + c( + geom_tail("left", left_border, fill, ...), + geom_tail("right", right_border, fill, ...) + ) +} From f592b7f58c73c750624c21fd2f3fbef4cc516d19 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 22:14:19 +0300 Subject: [PATCH 22/78] DRY and standardize theory type names. Also this renames "Chi-square" into "Chi-Square" in warnings about right-tailed tests. --- R/visualize.R | 54 +++++++++++++++++---------------- tests/testthat/test-visualize.R | 9 ++++++ 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index 1846a479..4beb1ea2 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -353,19 +353,17 @@ visualize_theoretical <- function(data, "different scales. Displaying only the theoretical distribution." ) } + + theory_type <- short_theory_type(data) - if ( - attr(data, "theory_type") %in% c( - "Two sample t", "Slope with t", "One sample t" - ) - ) { + if (theory_type == "t") { infer_plot <- theory_plot( d_fun = dt, q_fun = qt, args_list = list(df = attr(data, "distr_param")), stat_name = "t", dens_color = dens_color ) - } else if (attr(data, "theory_type") == "ANOVA") { + } else if (theory_type == "F") { warn_right_tail_test(direction, "F") infer_plot <- theory_plot( @@ -376,21 +374,15 @@ visualize_theoretical <- function(data, stat_name = "F", dens_color = dens_color ) - } else if ( - attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z") - ) { + } else if (theory_type == "z") { infer_plot <- theory_plot( d_fun = dnorm, q_fun = qnorm, args_list = list(), stat_name = "z", dens_color = dens_color ) - } else if ( - attr(data, "theory_type") %in% c( - "Chi-square test of indep", "Chi-square Goodness of Fit" - ) - ) { - warn_right_tail_test(direction, "Chi-square") + } else if (theory_type == "Chi-Square") { + warn_right_tail_test(direction, "Chi-Square") infer_plot <- theory_plot( d_fun = dchisq, q_fun = qchisq, @@ -444,7 +436,9 @@ visualize_both <- function(data, bins, ) } - if (attr(data, "theory_type") %in% c("Two sample t", "Slope with t")) { + theory_type <- short_theory_type(data) + + if (theory_type == "t") { infer_plot <- both_plot( data = data, d_fun = dt, @@ -458,7 +452,7 @@ visualize_both <- function(data, bins, endpoints = endpoints, ci_fill = ci_fill ) - } else if (attr(data, "theory_type") == "ANOVA") { + } else if (theory_type == "F") { warn_right_tail_test(direction, "F") infer_plot <- both_plot( @@ -476,9 +470,7 @@ visualize_both <- function(data, bins, endpoints = endpoints, ci_fill = ci_fill ) - } else if ( - attr(data, "theory_type") %in% c("One sample prop z", "Two sample props z") - ) { + } else if (theory_type == "z") { infer_plot <- both_plot( data = data, d_fun = dnorm, @@ -492,12 +484,8 @@ visualize_both <- function(data, bins, endpoints = endpoints, ci_fill = ci_fill ) - } else if ( - attr(data, "theory_type") %in% c( - "Chi-square test of indep", "Chi-square Goodness of Fit" - ) - ) { - warn_right_tail_test(direction, "Chi-square") + } else if (theory_type == "Chi-Square") { + warn_right_tail_test(direction, "Chi-Square") infer_plot <- both_plot( data = data, @@ -529,6 +517,20 @@ mirror_obs_stat <- function(vector, observation) { stats::quantile(vector, probs = 1 - obs_percentile) } +short_theory_type <- function(x) { + theory_attr <- attr(x, "theory_type") + theory_types <- list( + t = c("Two sample t", "Slope with t", "One sample t"), + `F` = "ANOVA", + z = c("One sample prop z", "Two sample props z"), + `Chi-Square` = c("Chi-square test of indep", "Chi-square Goodness of Fit") + ) + + is_type <- vapply(theory_types, function(x) {theory_attr %in% x}, logical(1)) + + names(theory_types)[which(is_type)[1]] +} + warn_right_tail_test <- function(direction, stat_name) { if (!is.null(direction) && !(direction %in% c("greater", "right"))) { warning_glue( diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 222807ac..20dfd454 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -138,6 +138,15 @@ test_that("visualize basic tests", { calculate(stat = "t", order = c("small", "large")) %>% visualize(method = "theoretical", direction = "left", obs_stat = -obs_t) ) + + expect_warning( + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + hypothesize(null = "point", mu = 1) %>% + generate(reps = 100) %>% + calculate(stat = "t") %>% + visualize(method = "both") + ) expect_warning( iris_tbl %>% From 421e91f594b0fc323e36b18825cd8af3212ea098 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 22:14:43 +0300 Subject: [PATCH 23/78] Fix style in 'test-visualize.R'. --- tests/testthat/test-visualize.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 20dfd454..69f49586 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -206,7 +206,8 @@ test_that("visualize basic tests", { ) %>% generate(reps = 100, type = "simulate") %>% calculate(stat = "Chisq") %>% - visualize(method = "both")) + visualize(method = "both") + ) # traditional instead of theoretical expect_error( From a675cf5dc75c0042a447863906b686a144073f02 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 12 Aug 2018 22:18:54 +0300 Subject: [PATCH 24/78] Update 'NEWS.md'. --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 00d4c356..b4caca49 100755 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,9 @@ - Remove help page for internal `set_params()` (#165). - Fully use {tibble} (#166). - Fix `calculate()` to not depend on order of `p` for `type = "simulate"` (#122). +- Reduce code duplication (#173). +- Make transparancy in `visualize()` to not depend on method and data volume. +- Make `visualize()` work for "One sample t" theoretical type with `method = "both"`. # infer 0.3.1 From 8ea69e60ec30681afcb41c0d4223a9af6e708cf0 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sat, 18 Aug 2018 11:24:02 +0300 Subject: [PATCH 25/78] Add `stat = "sum"` option to `calculate()`. --- R/calculate.R | 14 ++++++++------ R/utils.R | 4 ++-- man/calculate.Rd | 11 +++++------ tests/testthat/test-calculate.R | 19 +++++++++++++++++++ 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 328801c5..2343ce15 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -3,9 +3,9 @@ #' @param x The output from [generate()] for computation-based inference or the #' output from [hypothesize()] piped in to here for theory-based inference. #' @param stat A string giving the type of the statistic to calculate. Current -#' options include `"mean"`, `"median"`, `"sd"`, `"prop"`, `"diff in means"`, -#' `"diff in medians"`, `"diff in props"`, `"Chisq"`, `"F"`, `"t"`, `"z"`, -#' `"slope"`, and `"correlation"`. +#' options include `"mean"`, `"median"`, `"sum"`, `"sd"`, `"prop"`, `"diff in +#' means"`, `"diff in medians"`, `"diff in props"`, `"Chisq"`, `"F"`, `"t"`, +#' `"z"`, `"slope"`, and `"correlation"`. #' @param order A string vector of specifying the order in which the levels of #' the explanatory variable should be ordered for subtraction, where `order = #' c("first", "second")` means `("first" - "second")` Needed for inference on @@ -29,7 +29,7 @@ #' @export calculate <- function(x, stat = c( - "mean", "median", "sd", "prop", "diff in means", + "mean", "median", "sum", "sd", "prop", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z" ), @@ -53,8 +53,8 @@ calculate <- function(x, x$replicate <- 1L } else if ( stat %in% c( - "mean", "median", "sd", "prop", "diff in means", "diff in medians", - "diff in props", "slope", "correlation" + "mean", "median", "sum", "sd", "prop", "diff in means", + "diff in medians", "diff in props", "slope", "correlation" ) ) { stop_glue( @@ -140,6 +140,8 @@ calc_impl.mean <- calc_impl_one_f(mean) calc_impl.median <- calc_impl_one_f(stats::median) +calc_impl.sum <- calc_impl_one_f(sum) + calc_impl.sd <- calc_impl_one_f(stats::sd) calc_impl.prop <- function(type, x, order, ...) { diff --git a/R/utils.R b/R/utils.R index 2bee15d5..af55c9f4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -129,7 +129,7 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, # but that's not as helpful to beginners with the cryptic error msg if ( !stat %in% c( - "mean", "median", "sd", "prop", "diff in means", "diff in medians", + "mean", "median", "sum", "sd", "prop", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z" ) ) { @@ -166,7 +166,7 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, } check_for_numeric_stat <- function(x, stat) { - if (stat %in% c("mean", "median", "sd")) { + if (stat %in% c("mean", "median", "sum", "sd")) { col <- base::setdiff(names(x), "replicate") if (!is.numeric(x[[as.character(col)]])) { diff --git a/man/calculate.Rd b/man/calculate.Rd index 05947bb8..4b81f413 100755 --- a/man/calculate.Rd +++ b/man/calculate.Rd @@ -4,18 +4,17 @@ \alias{calculate} \title{Calculate summary statistics} \usage{ -calculate(x, stat = c("mean", "median", "sd", "prop", "diff in means", - "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", - "t", "z"), order = NULL, ...) +calculate(x, stat = c("mean", "median", "sum", "sd", "prop", + "diff in means", "diff in medians", "diff in props", "Chisq", "F", + "slope", "correlation", "t", "z"), order = NULL, ...) } \arguments{ \item{x}{The output from \code{\link[=generate]{generate()}} for computation-based inference or the output from \code{\link[=hypothesize]{hypothesize()}} piped in to here for theory-based inference.} \item{stat}{A string giving the type of the statistic to calculate. Current -options include \code{"mean"}, \code{"median"}, \code{"sd"}, \code{"prop"}, \code{"diff in means"}, -\code{"diff in medians"}, \code{"diff in props"}, \code{"Chisq"}, \code{"F"}, \code{"t"}, \code{"z"}, -\code{"slope"}, and \code{"correlation"}.} +options include \code{"mean"}, \code{"median"}, \code{"sum"}, \code{"sd"}, \code{"prop"}, \code{"diff in means"}, \code{"diff in medians"}, \code{"diff in props"}, \code{"Chisq"}, \code{"F"}, \code{"t"}, +\code{"z"}, \code{"slope"}, and \code{"correlation"}.} \item{order}{A string vector of specifying the order in which the levels of the explanatory variable should be ordered for subtraction, where \code{order = c("first", "second")} means \code{("first" - "second")} Needed for inference on diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index daefde6c..1c017b52 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -428,3 +428,22 @@ test_that("calc_impl_one_f works", { test_that("calc_impl_diff_f works", { expect_true(is.function(calc_impl_diff_f(mean))) }) + +test_that("calc_impl.sum works", { + expect_equal( + iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + calculate(stat = "sum") %>% + `[[`(1), + sum(iris_tbl$Petal.Width) + ) + + gen_iris16 <- iris_tbl %>% + specify(Petal.Width ~ NULL) %>% + generate(10) + + expect_equal( + gen_iris16 %>% calculate(stat = "sum"), + gen_iris16 %>% dplyr::summarise(stat = sum(Petal.Width)) + ) +}) \ No newline at end of file From f959c08983f300a1a647427397b95e4f732b4d53 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sat, 18 Aug 2018 12:20:00 +0300 Subject: [PATCH 26/78] Add `stat = "count"` option to `calculate()`. --- R/calculate.R | 75 ++++++++++++++++++--------------- R/utils.R | 5 ++- man/calculate.Rd | 7 +-- tests/testthat/test-calculate.R | 42 ++++++++++++++---- 4 files changed, 82 insertions(+), 47 deletions(-) diff --git a/R/calculate.R b/R/calculate.R index 2343ce15..5b1aae43 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -3,9 +3,9 @@ #' @param x The output from [generate()] for computation-based inference or the #' output from [hypothesize()] piped in to here for theory-based inference. #' @param stat A string giving the type of the statistic to calculate. Current -#' options include `"mean"`, `"median"`, `"sum"`, `"sd"`, `"prop"`, `"diff in -#' means"`, `"diff in medians"`, `"diff in props"`, `"Chisq"`, `"F"`, `"t"`, -#' `"z"`, `"slope"`, and `"correlation"`. +#' options include `"mean"`, `"median"`, `"sum"`, `"sd"`, `"prop"`, `"count"`, +#' `"diff in means"`, `"diff in medians"`, `"diff in props"`, `"Chisq"`, +#' `"F"`, `"t"`, `"z"`, `"slope"`, and `"correlation"`. #' @param order A string vector of specifying the order in which the levels of #' the explanatory variable should be ordered for subtraction, where `order = #' c("first", "second")` means `("first" - "second")` Needed for inference on @@ -29,9 +29,9 @@ #' @export calculate <- function(x, stat = c( - "mean", "median", "sum", "sd", "prop", "diff in means", - "diff in medians", "diff in props", "Chisq", "F", - "slope", "correlation", "t", "z" + "mean", "median", "sum", "sd", "prop", "count", + "diff in means", "diff in medians", "diff in props", + "Chisq", "F", "slope", "correlation", "t", "z" ), order = NULL, ...) { @@ -53,7 +53,7 @@ calculate <- function(x, x$replicate <- 1L } else if ( stat %in% c( - "mean", "median", "sum", "sd", "prop", "diff in means", + "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "slope", "correlation" ) ) { @@ -62,7 +62,7 @@ calculate <- function(x, "implemented) for `stat` = \"{stat}\". Are you missing ", "a `generate()` step?" ) - } else if (!(stat %in% c("Chisq", "prop"))) { + } else if (!(stat %in% c("Chisq", "prop", "count"))) { # From `hypothesize()` to `calculate()` # Catch-all if generate was not called # warning_glue("You unexpectantly went from `hypothesize()` to ", @@ -144,35 +144,42 @@ calc_impl.sum <- calc_impl_one_f(sum) calc_impl.sd <- calc_impl_one_f(stats::sd) -calc_impl.prop <- function(type, x, order, ...) { - col <- base::setdiff(names(x), "replicate") - - ## No longer needed with implementation of `check_point_params()` - # if (!is.factor(x[[col]])) { - # stop_glue( - # "Calculating a {stat} here is not appropriate since the `{col}` ", - # "variable is not a factor." - # ) - # } - - if (is_nuat(x, "success")) { - stop_glue( - 'To calculate a proportion, the `"success"` argument must be provided ', - 'in `specify()`.' - ) - } - - success <- attr(x, "success") - x %>% - dplyr::group_by(replicate) %>% - dplyr::summarize( - stat = mean( - # rlang::eval_tidy(col) == rlang::eval_tidy(success), ... - !!sym(col) == success, ... +calc_impl_success_f <- function(f, output_name) { + function(type, x, order, ...) { + col <- base::setdiff(names(x), "replicate") + + ## No longer needed with implementation of `check_point_params()` + # if (!is.factor(x[[col]])) { + # stop_glue( + # "Calculating a {stat} here is not appropriate since the `{col}` ", + # "variable is not a factor." + # ) + # } + + if (is_nuat(x, "success")) { + stop_glue( + 'To calculate a {output_name}, the `"success"` argument must be ', + 'provided in `specify()`.' ) - ) + } + + success <- attr(x, "success") + x %>% + dplyr::group_by(replicate) %>% + dplyr::summarize(stat = f(!!sym(col), success)) + } } +calc_impl.prop <- calc_impl_success_f( + f = function(response, success, ...) {mean(response == success, ...)}, + output_name = "proportion" +) + +calc_impl.count <- calc_impl_success_f( + f = function(response, success, ...) {sum(response == success, ...)}, + output_name = "count" +) + calc_impl.F <- function(type, x, order, ...) { x %>% dplyr::summarize( diff --git a/R/utils.R b/R/utils.R index af55c9f4..9c7d0e96 100644 --- a/R/utils.R +++ b/R/utils.R @@ -129,8 +129,9 @@ check_args_and_attr <- function(x, explanatory_variable, response_variable, # but that's not as helpful to beginners with the cryptic error msg if ( !stat %in% c( - "mean", "median", "sum", "sd", "prop", "diff in means", "diff in medians", - "diff in props", "Chisq", "F", "slope", "correlation", "t", "z" + "mean", "median", "sum", "sd", "prop", "count", "diff in means", + "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", + "t", "z" ) ) { stop_glue( diff --git a/man/calculate.Rd b/man/calculate.Rd index 4b81f413..6861eb6e 100755 --- a/man/calculate.Rd +++ b/man/calculate.Rd @@ -4,7 +4,7 @@ \alias{calculate} \title{Calculate summary statistics} \usage{ -calculate(x, stat = c("mean", "median", "sum", "sd", "prop", +calculate(x, stat = c("mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z"), order = NULL, ...) } @@ -13,8 +13,9 @@ calculate(x, stat = c("mean", "median", "sum", "sd", "prop", output from \code{\link[=hypothesize]{hypothesize()}} piped in to here for theory-based inference.} \item{stat}{A string giving the type of the statistic to calculate. Current -options include \code{"mean"}, \code{"median"}, \code{"sum"}, \code{"sd"}, \code{"prop"}, \code{"diff in means"}, \code{"diff in medians"}, \code{"diff in props"}, \code{"Chisq"}, \code{"F"}, \code{"t"}, -\code{"z"}, \code{"slope"}, and \code{"correlation"}.} +options include \code{"mean"}, \code{"median"}, \code{"sum"}, \code{"sd"}, \code{"prop"}, \code{"count"}, +\code{"diff in means"}, \code{"diff in medians"}, \code{"diff in props"}, \code{"Chisq"}, +\code{"F"}, \code{"t"}, \code{"z"}, \code{"slope"}, and \code{"correlation"}.} \item{order}{A string vector of specifying the order in which the levels of the explanatory variable should be ordered for subtraction, where \code{order = c("first", "second")} means \code{("first" - "second")} Needed for inference on diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 1c017b52..b03a2453 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -275,14 +275,15 @@ test_that("`order` is working", { expect_error(calculate(gen_iris11, stat = "diff in means")) }) +gen_iris12 <- iris %>% + dplyr::mutate( + Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") + ) %>% + specify(Sepal.Length.Group ~ NULL, success = ">5") %>% + hypothesize(null = "point", p = 0.3) %>% + generate(reps = 10, type = "simulate") + test_that('success is working for stat = "prop"', { - gen_iris12 <- iris %>% - dplyr::mutate( - Sepal.Length.Group = dplyr::if_else(Sepal.Length > 5, ">5", "<=5") - ) %>% - specify(Sepal.Length.Group ~ NULL, success = ">5") %>% - hypothesize(null = "point", p = 0.3) %>% - generate(reps = 10, type = "simulate") expect_silent(gen_iris12 %>% calculate(stat = "prop")) expect_silent(gen_iris12 %>% calculate(stat = "z")) }) @@ -372,6 +373,7 @@ test_that("specify done before calculate", { iris_prop <- iris_tbl %>% dplyr::select(Sepal.Length.Group) attr(iris_prop, "response") <- "Sepal.Length.Group" expect_error(calculate(iris_prop, stat = "prop")) + expect_error(calculate(iris_prop, stat = "count")) }) test_that("chisq GoF has params specified for observed stat", { @@ -446,4 +448,28 @@ test_that("calc_impl.sum works", { gen_iris16 %>% calculate(stat = "sum"), gen_iris16 %>% dplyr::summarise(stat = sum(Petal.Width)) ) -}) \ No newline at end of file +}) + +test_that("calc_impl_success_f works", { + expect_true( + is.function(calc_impl_success_f( + f = function(response, success, ...) {mean(response == success, ...)}, + output_name = "proportion" + )) + ) +}) + +test_that("calc_impl.count works", { + expect_equal( + iris_tbl %>% + specify(Sepal.Length.Group ~ NULL, success = ">5") %>% + calculate(stat = "count") %>% + `[[`(1), + sum(iris_tbl$Sepal.Length.Group == ">5") + ) + + expect_equal( + gen_iris12 %>% calculate(stat = "count"), + gen_iris12 %>% dplyr::summarise(stat = sum(Sepal.Length.Group == ">5")) + ) +}) From 99a57c0c5fa6a0ff05f962ee07a5bb46fb6002b1 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sat, 18 Aug 2018 12:25:29 +0300 Subject: [PATCH 27/78] Update 'NEWS.md'. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index b4caca49..2901c160 100755 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ - Reduce code duplication (#173). - Make transparancy in `visualize()` to not depend on method and data volume. - Make `visualize()` work for "One sample t" theoretical type with `method = "both"`. +- Add `stat = "sum"` and `stat = "count"` options to `calculate()` (#50). # infer 0.3.1 From c5258949e3164029d1b6b1d0a91833f44cdf580f Mon Sep 17 00:00:00 2001 From: rudeboybert Date: Thu, 23 Aug 2018 09:10:02 -0400 Subject: [PATCH 28/78] glue package version requirement --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e61ed7b6..a894ff0a 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,7 @@ Imports: rlang (>= 0.2.0), ggplot2, magrittr, - glue + glue (>= 1.3.0) Depends: R (>= 3.1.2) Suggests: From 8470f851f0744bad68201be8b42d08924eac3b86 Mon Sep 17 00:00:00 2001 From: rudeboybert Date: Thu, 23 Aug 2018 10:28:24 -0400 Subject: [PATCH 29/78] Added glue::glue_collpase to NAMESPACE --- NAMESPACE | 1 + R/print_methods.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index a6bf6e27..ebc46efd 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ importFrom(ggplot2,ggtitle) importFrom(ggplot2,stat_function) importFrom(ggplot2,xlab) importFrom(ggplot2,ylab) +importFrom(glue,glue_collapse) importFrom(magrittr,"%>%") importFrom(methods,hasArg) importFrom(rlang,"!!") diff --git a/R/print_methods.R b/R/print_methods.R index a1cf028c..2ccffc2d 100644 --- a/R/print_methods.R +++ b/R/print_methods.R @@ -3,6 +3,7 @@ #' @param x An object of class `infer`, i.e. output from [specify()] or #' [hypothesize()]. #' @param ... Arguments passed to methods. +#' @importFrom glue glue_collapse #' #' @export print.infer <- function(x, ...) { From 30b5801c079c0224758d50095d963463aa8fd895 Mon Sep 17 00:00:00 2001 From: echasnovski Date: Fri, 24 Aug 2018 08:45:51 +0000 Subject: [PATCH 30/78] Get back to development version in `develop` branch. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a894ff0a..b2fea636 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: infer Type: Package Title: Tidy Statistical Inference -Version: 0.3.1 +Version: 0.3.1.9000 Authors@R: c( person("Andrew", "Bray", email = "abray@reed.edu", role = c("aut", "cre")), person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"), From 4fdbdbc4ab857d01df64774835a4d967f03d275d Mon Sep 17 00:00:00 2001 From: Brian Fannin Date: Thu, 30 Aug 2018 19:31:51 -0400 Subject: [PATCH 31/78] Revised error message when `formula` argument is not a formula type. This partially resolves issue #110. --- R/specify.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/specify.R b/R/specify.R index 93a69306..9440b418 100755 --- a/R/specify.R +++ b/R/specify.R @@ -44,7 +44,9 @@ specify <- function(x, formula, response = NULL, } if (methods::hasArg(formula)) { if (!rlang::is_formula(formula)) { - stop_glue("The `formula` argument is not recognized as a formula.") + stop_glue("The first unnamed argument must be a formula. + * You passed in '{get_type(formula)}'. + * Did you forget to name one or more aguments?") } } From 961c32f8110532b0ed1f3c2895dbbf8db73abb2e Mon Sep 17 00:00:00 2001 From: Brian Fannin Date: Fri, 31 Aug 2018 11:03:58 -0400 Subject: [PATCH 32/78] Catch additional misspecification of formula argument, add informative error message and test. This addresses issue #110. --- R/specify.R | 11 ++++++++++- tests/testthat/test-specify.R | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/R/specify.R b/R/specify.R index 9440b418..517e3b8b 100755 --- a/R/specify.R +++ b/R/specify.R @@ -43,7 +43,16 @@ specify <- function(x, formula, response = NULL, stop_glue("Please give the `response` variable.") } if (methods::hasArg(formula)) { - if (!rlang::is_formula(formula)) { + + tryCatch( + formula_arg_is_formula <- rlang::is_formula(formula) + , error = function(e) { + stop_glue("The argument you passed in for the formula does not exist. + * Were you trying to pass in an unquoted column name? + * Did you forget to name one or more aguments?") + } + ) + if (!formula_arg_is_formula) { stop_glue("The first unnamed argument must be a formula. * You passed in '{get_type(formula)}'. * Did you forget to name one or more aguments?") diff --git a/tests/testthat/test-specify.R b/tests/testthat/test-specify.R index 392d5694..6d47fadb 100644 --- a/tests/testthat/test-specify.R +++ b/tests/testthat/test-specify.R @@ -57,6 +57,10 @@ test_that("sensible output", { test_that("formula argument is a formula", { expect_error(specify(mtcars_df, formula = "vs", success = 1)) + + # Issue #110: https://github.com/tidymodels/infer/issues/110 + expect_error(specify(mtcars, am, success = "1")) + expect_error(specify(mtcars, response = am, "1")) }) test_that("is_complete works", { From 166af9ed246fee9c34691575ee52e090123c1639 Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Fri, 31 Aug 2018 08:12:48 -0700 Subject: [PATCH 33/78] Fix typo --- R/specify.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/specify.R b/R/specify.R index 517e3b8b..83acd4ed 100755 --- a/R/specify.R +++ b/R/specify.R @@ -49,7 +49,7 @@ specify <- function(x, formula, response = NULL, , error = function(e) { stop_glue("The argument you passed in for the formula does not exist. * Were you trying to pass in an unquoted column name? - * Did you forget to name one or more aguments?") + * Did you forget to name one or more arguments?") } ) if (!formula_arg_is_formula) { From b124dc6bdf56dc7e0a995bc236804490742b2623 Mon Sep 17 00:00:00 2001 From: Brian Fannin Date: Fri, 31 Aug 2018 11:03:58 -0400 Subject: [PATCH 34/78] Catch additional misspecification of formula argument, add informative error message and test. This addresses issue #110. --- R/specify.R | 11 ++++++++++- tests/testthat/test-specify.R | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/R/specify.R b/R/specify.R index 9440b418..517e3b8b 100755 --- a/R/specify.R +++ b/R/specify.R @@ -43,7 +43,16 @@ specify <- function(x, formula, response = NULL, stop_glue("Please give the `response` variable.") } if (methods::hasArg(formula)) { - if (!rlang::is_formula(formula)) { + + tryCatch( + formula_arg_is_formula <- rlang::is_formula(formula) + , error = function(e) { + stop_glue("The argument you passed in for the formula does not exist. + * Were you trying to pass in an unquoted column name? + * Did you forget to name one or more aguments?") + } + ) + if (!formula_arg_is_formula) { stop_glue("The first unnamed argument must be a formula. * You passed in '{get_type(formula)}'. * Did you forget to name one or more aguments?") diff --git a/tests/testthat/test-specify.R b/tests/testthat/test-specify.R index 392d5694..5cb00f62 100644 --- a/tests/testthat/test-specify.R +++ b/tests/testthat/test-specify.R @@ -57,6 +57,15 @@ test_that("sensible output", { test_that("formula argument is a formula", { expect_error(specify(mtcars_df, formula = "vs", success = 1)) + + # Issue #110: https://github.com/tidymodels/infer/issues/110 + expect_error(specify(mtcars, am, success = "1")) + expect_error(specify(mtcars, response = am, "1")) + expect_silent({ + mtcars %>% + dplyr::mutate(am = factor(am)) %>% + specify(response = am, success = "1") + }) }) test_that("is_complete works", { From a14987fa2b8893dd618dec243ded4f67309cd259 Mon Sep 17 00:00:00 2001 From: Richard Cotton Date: Mon, 3 Sep 2018 08:40:26 -0400 Subject: [PATCH 35/78] use get_p_value() & get_confidence_interval() consistently --- NAMESPACE | 1 + NEWS.md | 2 + R/conf_int.R | 75 +++++---- R/deprecated.R | 49 ++++++ R/p_value.R | 134 ++++++++-------- inst/doc/chisq_test.R | 2 +- inst/doc/chisq_test.Rmd | 2 +- inst/doc/chisq_test.html | 10 +- inst/doc/observed_stat_examples.R | 30 ++-- inst/doc/observed_stat_examples.Rmd | 30 ++-- inst/doc/observed_stat_examples.html | 146 ++++++++++++------ inst/doc/two_sample_t.R | 2 +- inst/doc/two_sample_t.Rmd | 2 +- inst/doc/two_sample_t.html | 10 +- man/deprecated.Rd | 38 +++++ man/{get_ci.Rd => get_confidence_interval.Rd} | 30 ++-- man/{get_pvalue.Rd => get_p_value.Rd} | 28 ++-- tests/testthat/test-conf_int.R | 18 +-- tests/testthat/test-p_value.R | 18 +-- vignettes/chisq_test.Rmd | 2 +- vignettes/observed_stat_examples.Rmd | 30 ++-- vignettes/two_sample_t.Rmd | 2 +- 22 files changed, 407 insertions(+), 254 deletions(-) create mode 100644 R/deprecated.R create mode 100644 man/deprecated.Rd rename man/{get_ci.Rd => get_confidence_interval.Rd} (71%) rename man/{get_pvalue.Rd => get_p_value.Rd} (65%) diff --git a/NAMESPACE b/NAMESPACE index a6bf6e27..fa237fef 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(conf_int) export(generate) export(get_ci) export(get_confidence_interval) +export(get_p_value) export(get_pvalue) export(hypothesize) export(p_value) diff --git a/NEWS.md b/NEWS.md index 2901c160..1539f44f 100755 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,8 @@ - Make transparancy in `visualize()` to not depend on method and data volume. - Make `visualize()` work for "One sample t" theoretical type with `method = "both"`. - Add `stat = "sum"` and `stat = "count"` options to `calculate()` (#50). +- Deprecated `p_value()` and `get_pvalue()` (use `get_p_value()` instead) (#180). +- Deprecated `cont_int()` and `get_ci()` (use `get_confidence_interval()` instead) (#180). # infer 0.3.1 diff --git a/R/conf_int.R b/R/conf_int.R index 2632bf51..1889a88d 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -1,7 +1,6 @@ #' Compute confidence interval #' #' Only simulation-based methods are (currently only) supported. -#' `get_confidence_interval()` and `get_ci()` are both aliases of `conf_int()`. #' #' @param x Data frame of calculated statistics or containing attributes of #' theoretical distribution values. Currently, dependent on statistics being @@ -16,34 +15,42 @@ #' #' @return A 1 x 2 tibble with values corresponding to lower and upper values in #' the confidence interval. +#' @section Aliases: +#' `conf_int()` and `get_ci()` are deprecated aliases of `get_confidence_interval()`. #' #' @examples +#' # Prepare the dataset #' mtcars_df <- mtcars %>% #' dplyr::mutate(am = factor(am)) +#' +#' # Calculate the difference in means in the dataset #' d_hat <- mtcars_df %>% #' specify(mpg ~ am) %>% #' calculate(stat = "diff in means", order = c("1", "0")) +#' +#' # Same calculation on 100 bootstrap replicates #' bootstrap_distn <- mtcars_df %>% #' specify(mpg ~ am) %>% -#' generate(reps = 100) %>% +#' generate(reps = 100, type = "bootstrap") %>% #' calculate(stat = "diff in means", order = c("1", "0")) -#' bootstrap_distn %>% conf_int(level = 0.9) -#' bootstrap_distn %>% conf_int(type = "se", point_estimate = d_hat) #' -#' @name get_ci -NULL - -#' @rdname get_ci +#' # Use level to set the confidence level +#' bootstrap_distn %>% +#' get_confidence_interval(level = 0.9) +#' +#' # To calculate std error, set the type and point estimate +#' bootstrap_distn %>% +#' get_confidence_interval(type = "se", point_estimate = d_hat) +#' @name get_confidence_interval #' @export -conf_int <- function(x, level = 0.95, type = "percentile", - point_estimate = NULL) { +get_confidence_interval <- function(x, level = 0.95, type = "percentile", + point_estimate = NULL){ + check_ci_args(x, level, type, point_estimate) - if (type == "percentile") { - ci_vec <- stats::quantile( - x[["stat"]], - probs = c((1 - level) / 2, level + (1 - level) / 2) - ) + if(type == "percentile") { + ci_vec <- stats::quantile(x[["stat"]], + probs = c((1 - level) / 2, level + (1 - level) / 2)) ci <- tibble::tibble(ci_vec[1], ci_vec[2]) names(ci) <- names(ci_vec) @@ -52,47 +59,35 @@ conf_int <- function(x, level = 0.95, type = "percentile", multiplier <- stats::qnorm(1 - (1 - level) / 2) ci <- tibble::tibble( lower = point_estimate - multiplier * stats::sd(x[["stat"]]), - upper = point_estimate + multiplier * stats::sd(x[["stat"]]) - ) + upper = point_estimate + multiplier * stats::sd(x[["stat"]])) } - ci + return(ci) } -check_ci_args <- function(x, level, type, point_estimate) { - if (!is.null(point_estimate)) { - if (!is.data.frame(point_estimate)) { +check_ci_args <- function(x, level, type, point_estimate){ + + if(!is.null(point_estimate)){ + if(!is.data.frame(point_estimate)) check_type(point_estimate, is.numeric) - } else { + else check_type(point_estimate, is.data.frame) - } } check_type(x, is.data.frame) check_type(level, is.numeric) - if ((level <= 0) || (level >= 1)) { + if(level <= 0 || level >= 1){ stop_glue("The value of `level` must be between 0 and 1 non-inclusive.") } - if (!(type %in% c("percentile", "se"))) { + if(!(type %in% c("percentile", "se"))){ stop_glue('The options for `type` are "percentile" or "se".') } - if ((type == "se") && is.null(point_estimate)) { - stop_glue( - 'A numeric value needs to be given for `point_estimate` for `type = "se"' - ) - } + if(type == "se" && is.null(point_estimate)) + stop_glue('A numeric value needs to be given for `point_estimate` ', + 'for `type = "se"') - if ((type == "se") && is.vector(point_estimate)) { + if(type == "se" && is.vector(point_estimate)) check_type(point_estimate, is.numeric) - } } - -#' @rdname get_ci -#' @export -get_ci <- conf_int - -#' @rdname get_ci -#' @export -get_confidence_interval <- conf_int diff --git a/R/deprecated.R b/R/deprecated.R new file mode 100644 index 00000000..8c4e7b67 --- /dev/null +++ b/R/deprecated.R @@ -0,0 +1,49 @@ +#' Deprecated functions +#' +#' These functions should no longer be used. They will be removed in a +#' future release of \code{infer}. +#' @param x See the non-deprecated function. +#' @param level See the non-deprecated function. +#' @param type See the non-deprecated function. +#' @param point_estimate See the non-deprecated function. +#' @param obs_stat See the non-deprecated function. +#' @param direction See the non-deprecated function. +#' @seealso \code{\link{get_p_value}}, \code{\link{get_confidence_interval}} +#' @name deprecated +NULL + + +#' @rdname deprecated +#' @export +conf_int <- function(x, level = 0.95, type = "percentile", + point_estimate = NULL) { + .Deprecated("get_confidence_interval") + get_confidence_interval( + x, level = level, type = type, point_estimate = point_estimate + ) +} + +#' @rdname deprecated +#' @export +get_ci <- function(x, level = 0.95, type = "percentile", + point_estimate = NULL) { + .Deprecated("get_confidence_interval") + get_confidence_interval( + x, level = level, type = type, point_estimate = point_estimate + ) +} + + +#' @rdname deprecated +#' @export +p_value <- function(x, obs_stat, direction) { + .Deprecated("get_p_value") + get_p_value(x = x, obs_stat = obs_stat, direction = direction) +} + +#' @rdname deprecated +#' @export +get_pvalue <- function(x, obs_stat, direction) { + .Deprecated("get_p_value") + get_p_value(x = x, obs_stat = obs_stat, direction = direction) +} diff --git a/R/p_value.R b/R/p_value.R index 963329a4..6be7147c 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -1,7 +1,6 @@ #' Compute p-value #' -#' Only simulation-based methods are (currently only) supported. `get_pvalue()` -#' is an alias of `p_value`. +#' Only simulation-based methods are (currently only) supported. #' #' @param x Data frame of calculated statistics or containing attributes of #' theoretical distribution values. @@ -12,125 +11,124 @@ #' #' @return A 1x1 data frame with value between 0 and 1. #' +#' @section Aliases: +#' `p_value` and `get_pvalue()` are deprecated aliases of `get_p_value()`. +#' #' @examples +#' # Prepare the dataset #' mtcars_df <- mtcars %>% #' dplyr::mutate(am = factor(am)) +#' +#' # Calculate the difference in means in the dataset #' d_hat <- mtcars_df %>% #' specify(mpg ~ am) %>% #' calculate(stat = "diff in means", order = c("1", "0")) +#' +#' # Same calculation on 100 permutation replicates #' null_distn <- mtcars_df %>% #' specify(mpg ~ am) %>% #' hypothesize(null = "independence") %>% #' generate(reps = 100) %>% #' calculate(stat = "diff in means", order = c("1", "0")) -#' null_distn %>% -#' p_value(obs_stat = d_hat, direction = "right") #' -#' @name get_pvalue +#' # What proportion of replicates had a difference +#' # in means more extreme than in the dataset? +#' null_distn %>% +#' get_p_value(obs_stat = d_hat, direction = "right") +#' @name get_p_value NULL -#' @rdname get_pvalue +#' @rdname get_p_value #' @export -p_value <- function(x, obs_stat, direction) { +get_p_value <- function(x, obs_stat, direction){ + check_type(x, is.data.frame) obs_stat <- check_obs_stat(obs_stat) check_direction(direction) - is_simulation_based <- !is_nuat(x, "generate") && attr(x, "generate") + is_simulation_based <- !is.null(attr(x, "generate")) && + attr(x, "generate") - if (is_simulation_based) { - pvalue <- simulation_based_p_value( - x = x, obs_stat = obs_stat, direction = direction - ) - } + if(is_simulation_based) + pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat, + direction = direction) ## Theoretical-based p-value # Could be more specific - # else if ( - # is_nuat(x, "theory_type") || is_nuat(x, "distr_param") - # ) { - # stop_glue( - # "Attributes have not been set appropriately. ", - # "Check your {{infer}} pipeline again." - # ) - # } - # - # if (!("stat" %in% names(x))) { - # # Theoretical distribution - # which_distribution( - # x, - # theory_type = attr(x, "theory_type"), - # obs_stat = obs_stat, - # direction = direction - # ) + # else if(is.null(attr(x, "theory_type")) || is.null(attr(x, "distr_param"))) + # stop_glue("Attributes have not been set appropriately. ", + # "Check your {{infer}} pipeline again.") + + # if(!("stat" %in% names(x))){ + # # Theoretical distribution + # which_distribution(x, + # theory_type <- attr(x, "theory_type"), + # obs_stat = obs_stat, + # direction = direction) # } - pvalue + return(pvalue) } -simulation_based_p_value <- function(x, obs_stat, direction) { - if (direction %in% c("less", "left")) { +simulation_based_p_value <- function(x, obs_stat, direction){ + + if(direction %in% c("less", "left")){ p_value <- x %>% dplyr::summarize(p_value = mean(stat <= obs_stat)) - } else if (direction %in% c("greater", "right")) { + } + else if(direction %in% c("greater", "right")){ p_value <- x %>% dplyr::summarize(p_value = mean(stat >= obs_stat)) - } else { + } + else{ p_value <- x %>% two_sided_p_value(obs_stat = obs_stat) } p_value } -two_sided_p_value <- function(x, obs_stat) { - if (stats::median(x$stat) >= obs_stat) { +two_sided_p_value <- function(x, obs_stat){ + + if(stats::median(x$stat) >= obs_stat){ basic_p_value <- get_percentile(x$stat, obs_stat) + - (1 - get_percentile( - x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat - ) - ) + (1 - get_percentile(x$stat, stats::median(x$stat) + + stats::median(x$stat) - obs_stat)) } else { basic_p_value <- 1 - get_percentile(x$stat, obs_stat) + - get_percentile( - x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat - ) + (get_percentile(x$stat, stats::median(x$stat) + + stats::median(x$stat) - obs_stat)) } - if (basic_p_value >= 1) { + if(basic_p_value >= 1) # Catch all if adding both sides produces a number # larger than 1. Should update with test in that # scenario instead of using >= - tibble::tibble(p_value = 1) - } else { - tibble::tibble(p_value = basic_p_value) - } + return(tibble::tibble(p_value = 1)) + else + return(tibble::tibble(p_value = basic_p_value)) } -#' @rdname get_pvalue -#' @export -get_pvalue <- p_value - -# which_distribution <- function(x, theory_type, obs_stat, direction) { +# which_distribution <- function(x, theory_type, obs_stat, direction){ +# # param <- attr(x, "distr_param") -# if (!is_nuat(x, "distr_param2")) { +# if(!is.null(attr(x, "distr_param2"))) # param2 <- attr(x, "distr_param2") -# } -# -# if (theory_type == "Two sample t") { -# return( -# pt(q = obs_stat, df = param, lower.tail = set_lower_tail(direction)) -# ) -# } +# +# if(theory_type == "Two sample t") +# return(pt(q = obs_stat, +# df = param, +# lower.tail = set_lower_tail(direction)) +# ) +# # } -# theory_t_pvalue <- +#theory_t_pvalue <- -# set_lower_tail <- function(direction) { -# if (direction %in% c("greater", "right")) { +# set_lower_tail <- function(direction){ +# if(direction %in% c("greater", "right")) # lower_tail <- FALSE -# } else { +# else # lower_tail <- TRUE -# } -# +# # lower_tail # } diff --git a/inst/doc/chisq_test.R b/inst/doc/chisq_test.R index 69d754ed..7d0faec3 100644 --- a/inst/doc/chisq_test.R +++ b/inst/doc/chisq_test.R @@ -46,7 +46,7 @@ chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater") ## ------------------------------------------------------------------------ chisq_null_distn %>% - get_pvalue(obs_stat = obs_chisq, direction = "greater") + get_p_value(obs_stat = obs_chisq, direction = "greater") ## ------------------------------------------------------------------------ fli_small %>% diff --git a/inst/doc/chisq_test.Rmd b/inst/doc/chisq_test.Rmd index f2dd959a..d56b5201 100644 --- a/inst/doc/chisq_test.Rmd +++ b/inst/doc/chisq_test.Rmd @@ -99,7 +99,7 @@ chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater") ```{r} chisq_null_distn %>% - get_pvalue(obs_stat = obs_chisq, direction = "greater") + get_p_value(obs_stat = obs_chisq, direction = "greater") ``` diff --git a/inst/doc/chisq_test.html b/inst/doc/chisq_test.html index 285dae6a..25831c30 100644 --- a/inst/doc/chisq_test.html +++ b/inst/doc/chisq_test.html @@ -12,7 +12,7 @@ - + Chi-squared test example using nycflights13 flights data @@ -70,7 +70,7 @@

Chi-squared test example using nycflights13 flights data

Chester Ismay

-

2018-08-06

+

2018-09-03

@@ -180,12 +180,12 @@

Randomization approach to \(\chi^2\)-statis generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater")

-

+

Calculate the randomization-based \(p\)-value

chisq_null_distn %>% 
-  get_pvalue(obs_stat = obs_chisq, direction = "greater")
+ get_p_value(obs_stat = obs_chisq, direction = "greater")
statstatistic
@@ -223,7 +223,7 @@

Overlay appropriate \(\chi^2\) distribution visualize(method = "both", obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. `infer` currently does not check these for you.
-

+

Compute theoretical p-value

diff --git a/inst/doc/observed_stat_examples.R b/inst/doc/observed_stat_examples.R index dd087087..67be6ff5 100644 --- a/inst/doc/observed_stat_examples.R +++ b/inst/doc/observed_stat_examples.R @@ -37,7 +37,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = x_bar, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = x_bar, direction = "two_sided") + get_p_value(obs_stat = x_bar, direction = "two_sided") ## ------------------------------------------------------------------------ ( t_bar <- fli_small %>% @@ -53,7 +53,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_bar, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_bar, direction = "two_sided") + get_p_value(obs_stat = t_bar, direction = "two_sided") ## ------------------------------------------------------------------------ ( x_tilde <- fli_small %>% @@ -69,7 +69,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = x_tilde, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = x_tilde, direction = "two_sided") + get_p_value(obs_stat = x_tilde, direction = "two_sided") ## ------------------------------------------------------------------------ ( p_hat <- fli_small %>% @@ -85,7 +85,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = p_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = p_hat, direction = "two_sided") + get_p_value(obs_stat = p_hat, direction = "two_sided") ## ------------------------------------------------------------------------ null_distn <- fli_small %>% @@ -109,7 +109,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ## ------------------------------------------------------------------------ ( z_hat <- fli_small %>% @@ -125,7 +125,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = z_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = z_hat, direction = "two_sided") + get_p_value(obs_stat = z_hat, direction = "two_sided") ## ------------------------------------------------------------------------ ( Chisq_hat <- fli_small %>% @@ -144,7 +144,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = Chisq_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = Chisq_hat, direction = "greater") + get_p_value(obs_stat = Chisq_hat, direction = "greater") ## ------------------------------------------------------------------------ ( Chisq_hat <- fli_small %>% @@ -160,7 +160,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = Chisq_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = Chisq_hat, direction = "greater") + get_p_value(obs_stat = Chisq_hat, direction = "greater") ## ------------------------------------------------------------------------ ( d_hat <- fli_small %>% @@ -176,7 +176,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ## ------------------------------------------------------------------------ ( t_hat <- fli_small %>% @@ -192,7 +192,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_hat, direction = "two_sided") + get_p_value(obs_stat = t_hat, direction = "two_sided") ## ------------------------------------------------------------------------ ( d_hat <- fli_small %>% @@ -209,7 +209,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ## ------------------------------------------------------------------------ ( F_hat <- fli_small %>% @@ -225,7 +225,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = F_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = F_hat, direction = "greater") + get_p_value(obs_stat = F_hat, direction = "greater") ## ------------------------------------------------------------------------ ( slope_hat <- fli_small %>% @@ -241,7 +241,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = slope_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = slope_hat, direction = "two_sided") + get_p_value(obs_stat = slope_hat, direction = "two_sided") ## ------------------------------------------------------------------------ ( correlation_hat <- fli_small %>% @@ -257,7 +257,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = correlation_hat, direction = "two_sided") + get_p_value(obs_stat = correlation_hat, direction = "two_sided") ## ----echo=FALSE, eval=FALSE---------------------------------------------- # # **Standardized observed stat** @@ -274,7 +274,7 @@ null_distn %>% # null_distn %>% # visualize(obs_stat = t_hat, direction = "two_sided") # null_distn %>% -# get_pvalue(obs_stat = t_hat, direction = "two_sided") +# get_p_value(obs_stat = t_hat, direction = "two_sided") ## ------------------------------------------------------------------------ ( x_bar <- fli_small %>% diff --git a/inst/doc/observed_stat_examples.Rmd b/inst/doc/observed_stat_examples.Rmd index cec5e8f1..3a2f3dc7 100644 --- a/inst/doc/observed_stat_examples.Rmd +++ b/inst/doc/observed_stat_examples.Rmd @@ -72,7 +72,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = x_bar, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = x_bar, direction = "two_sided") + get_p_value(obs_stat = x_bar, direction = "two_sided") ``` ### One numerical variable (standardized mean $t$) @@ -93,7 +93,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_bar, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_bar, direction = "two_sided") + get_p_value(obs_stat = t_bar, direction = "two_sided") ``` @@ -116,7 +116,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = x_tilde, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = x_tilde, direction = "two_sided") + get_p_value(obs_stat = x_tilde, direction = "two_sided") ``` ### One categorical (one proportion) @@ -138,7 +138,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = p_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = p_hat, direction = "two_sided") + get_p_value(obs_stat = p_hat, direction = "two_sided") ``` Logical variables will be coerced to factors: @@ -176,7 +176,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ``` ### Two categorical (2 level) variables (z) @@ -198,7 +198,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = z_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = z_hat, direction = "two_sided") + get_p_value(obs_stat = z_hat, direction = "two_sided") ``` Note the similarities in this plot and the previous one. @@ -227,7 +227,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = Chisq_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = Chisq_hat, direction = "greater") + get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` ### Two categorical (>2 level) variables @@ -249,7 +249,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = Chisq_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = Chisq_hat, direction = "greater") + get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` ### One numerical variable, one categorical (2 levels) (diff in means) @@ -271,7 +271,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ``` ### One numerical variable, one categorical (2 levels) (t) @@ -293,7 +293,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_hat, direction = "two_sided") + get_p_value(obs_stat = t_hat, direction = "two_sided") ``` Note the similarities in this plot and the previous one. @@ -318,7 +318,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ``` ### One numerical, one categorical (>2 levels) - ANOVA @@ -340,7 +340,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = F_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = F_hat, direction = "greater") + get_p_value(obs_stat = F_hat, direction = "greater") ``` ### Two numerical vars - SLR @@ -362,7 +362,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = slope_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = slope_hat, direction = "two_sided") + get_p_value(obs_stat = slope_hat, direction = "two_sided") ``` ### Two numerical vars - correlation @@ -384,7 +384,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = correlation_hat, direction = "two_sided") + get_p_value(obs_stat = correlation_hat, direction = "two_sided") ``` @@ -409,7 +409,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_hat, direction = "two_sided") + get_p_value(obs_stat = t_hat, direction = "two_sided") ``` diff --git a/inst/doc/observed_stat_examples.html b/inst/doc/observed_stat_examples.html index 68e443ae..3cba2531 100644 --- a/inst/doc/observed_stat_examples.html +++ b/inst/doc/observed_stat_examples.html @@ -135,9 +135,9 @@

One numerical variable (mean)

calculate(stat = "mean") null_distn %>% visualize(obs_stat = x_bar, direction = "two_sided")
-

+

null_distn %>%
-  get_pvalue(obs_stat = x_bar, direction = "two_sided")
+ get_p_value(obs_stat = x_bar, direction = "two_sided")

@@ -180,9 +180,9 @@

One numerical variable (standardized mean \(t\) calculate(stat = "t") null_distn %>% visualize(obs_stat = t_bar, direction = "two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = t_bar, direction = "two_sided")
+ get_p_value(obs_stat = t_bar, direction = "two_sided")

@@ -225,9 +225,9 @@

One numerical variable (median)

calculate(stat ="median") null_distn %>% visualize(obs_stat = x_tilde, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = x_tilde, direction = "two_sided")
+ get_p_value(obs_stat = x_tilde, direction ="two_sided")
@@ -270,9 +270,9 @@

One categorical (one proportion)

calculate(stat ="prop") null_distn %>% visualize(obs_stat = p_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = p_hat, direction = "two_sided")
+ get_p_value(obs_stat = p_hat, direction ="two_sided")
@@ -326,9 +326,9 @@

Two categorical (2 level) variables

calculate(stat ="diff in props", order =c("winter", "summer")) null_distn %>% visualize(obs_stat = d_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = d_hat, direction = "two_sided")
+ get_p_value(obs_stat = d_hat, direction ="two_sided")
@@ -371,9 +371,9 @@

Two categorical (2 level) variables (z)

calculate(stat ="z", order =c("winter", "summer")) null_distn %>% visualize(obs_stat = z_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = z_hat, direction = "two_sided")
+ get_p_value(obs_stat = z_hat, direction ="two_sided")
@@ -421,9 +421,9 @@

One categorical (>2 level) - GoF

calculate(stat ="Chisq") null_distn %>% visualize(obs_stat = Chisq_hat, direction ="greater") -

+

null_distn %>%
-  get_pvalue(obs_stat = Chisq_hat, direction = "greater")
+ get_p_value(obs_stat = Chisq_hat, direction ="greater")
@@ -466,9 +466,9 @@

Two categorical (>2 level) variables

calculate(stat ="Chisq") null_distn %>% visualize(obs_stat = Chisq_hat, direction ="greater") -

+

null_distn %>%
-  get_pvalue(obs_stat = Chisq_hat, direction = "greater")
+ get_p_value(obs_stat = Chisq_hat, direction ="greater")
@@ -511,9 +511,9 @@

One numerical variable, one categorical (2 levels) (diff in means)

calculate(stat ="diff in means", order =c("summer", "winter")) null_distn %>% visualize(obs_stat = d_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = d_hat, direction = "two_sided")
+ get_p_value(obs_stat = d_hat, direction ="two_sided")
@@ -556,9 +556,9 @@

One numerical variable, one categorical (2 levels) (t)

calculate(stat ="t", order =c("summer", "winter")) null_distn %>% visualize(obs_stat = t_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = t_hat, direction = "two_sided")
+ get_p_value(obs_stat = t_hat, direction ="two_sided")
@@ -603,9 +603,9 @@

One numerical variable, one categorical (2 levels) (diff in medians)

calculate(stat ="diff in medians", order =c("summer", "winter")) null_distn %>% visualize(obs_stat = d_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = d_hat, direction = "two_sided")
+ get_p_value(obs_stat = d_hat, direction ="two_sided")
@@ -648,9 +648,9 @@

One numerical, one categorical (>2 levels) - ANOVA

calculate(stat ="F") null_distn %>% visualize(obs_stat = F_hat, direction ="greater") -

+

null_distn %>%
-  get_pvalue(obs_stat = F_hat, direction = "greater")
+ get_p_value(obs_stat = F_hat, direction ="greater")
@@ -693,9 +693,9 @@

Two numerical vars - SLR

calculate(stat ="slope") null_distn %>% visualize(obs_stat = slope_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = slope_hat, direction = "two_sided")
+ get_p_value(obs_stat = slope_hat, direction ="two_sided")
@@ -738,9 +738,9 @@

Two numerical vars - correlation

calculate(stat ="correlation") null_distn %>% visualize(obs_stat = correlation_hat, direction ="two_sided") -

+

null_distn %>%
-  get_pvalue(obs_stat = correlation_hat, direction = "two_sided")
+ get_p_value(obs_stat = correlation_hat, direction ="two_sided")
@@ -787,6 +787,9 @@

One numerical (one mean)

generate(reps =1000, type ="bootstrap") %>% calculate(stat ="mean") ( percentile_ci <- get_ci(boot) ) +
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -804,8 +807,11 @@

One numerical (one mean)

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -823,7 +829,7 @@

One numerical (one mean)

boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+

One numerical (one mean - standardized)

@@ -850,6 +856,9 @@

One numerical (one mean - standardized)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -867,8 +876,11 @@

One numerical (one mean - standardized)

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -886,7 +898,7 @@

One numerical (one mean - standardized)

boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+

One categorical (one proportion)

@@ -913,6 +925,9 @@

One categorical (one proportion)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -930,8 +945,11 @@

One categorical (one proportion)

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -949,7 +967,7 @@

One categorical (one proportion)

boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+

One categorical variable (standardized proportion \(z\))

@@ -980,6 +998,9 @@

One numerical variable, one categorical (2 levels) (diff in means)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -997,8 +1018,11 @@

One numerical variable, one categorical (2 levels) (diff in means)

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1016,7 +1040,7 @@

One numerical variable, one categorical (2 levels) (diff in means)

boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+

One numerical variable, one categorical (2 levels) (t)

@@ -1043,6 +1067,9 @@

One numerical variable, one categorical (2 levels) (t)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1060,8 +1087,11 @@

One numerical variable, one categorical (2 levels) (t)

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1079,7 +1109,7 @@

One numerical variable, one categorical (2 levels) (t)

boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+

Two categorical variables (diff in proportions)

@@ -1106,6 +1136,9 @@

Two categorical variables (diff in proportions)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1123,8 +1156,11 @@

Two categorical variables (diff in proportions)

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1142,7 +1178,7 @@

Two categorical variables (diff in proportions)

boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+

Two categorical variables (z)

@@ -1169,6 +1205,9 @@

Two categorical variables (z)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1186,8 +1225,11 @@

Two categorical variables (z)

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1205,7 +1247,7 @@

Two categorical variables (z)

boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+

Two numerical vars - SLR

@@ -1232,6 +1274,9 @@

Two numerical vars - SLR

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1249,8 +1294,11 @@

Two numerical vars - SLR

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1268,7 +1316,7 @@

Two numerical vars - SLR

boot %>% visualize(endpoints = standard_error_ci, direction = "between") 
-

+

Two numerical vars - correlation

@@ -1295,6 +1343,9 @@

Two numerical vars - correlation

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1312,9 +1363,12 @@

Two numerical vars - correlation

boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+

( standard_error_ci <- get_ci(boot, type = "se", 
                             point_estimate = correlation_hat) )
+
## Warning: 'get_ci' is deprecated.
+## Use 'get_confidence_interval' instead.
+## See help("Deprecated")
@@ -1332,7 +1386,7 @@

Two numerical vars - correlation

boot %>% visualize(endpoints = standard_error_ci, direction = "between")  
-

+

Two numerical vars - t

diff --git a/inst/doc/two_sample_t.R b/inst/doc/two_sample_t.R index 1c46e550..80ce97a0 100644 --- a/inst/doc/two_sample_t.R +++ b/inst/doc/two_sample_t.R @@ -47,7 +47,7 @@ t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided") ## ------------------------------------------------------------------------ t_null_distn %>% - get_pvalue(obs_stat = obs_t, direction = "two_sided") + get_p_value(obs_stat = obs_t, direction = "two_sided") ## ------------------------------------------------------------------------ fli_small %>% diff --git a/inst/doc/two_sample_t.Rmd b/inst/doc/two_sample_t.Rmd index 9a2295f4..9fbe54ab 100755 --- a/inst/doc/two_sample_t.Rmd +++ b/inst/doc/two_sample_t.Rmd @@ -100,7 +100,7 @@ t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided") ```{r} t_null_distn %>% - get_pvalue(obs_stat = obs_t, direction = "two_sided") + get_p_value(obs_stat = obs_t, direction = "two_sided") ``` diff --git a/inst/doc/two_sample_t.html b/inst/doc/two_sample_t.html index 605cdffa..7a8ecc8c 100644 --- a/inst/doc/two_sample_t.html +++ b/inst/doc/two_sample_t.html @@ -12,7 +12,7 @@ - + Two sample t test example using nycflights13 flights data @@ -70,7 +70,7 @@

Two sample \(t\) test example using nycflights13 flights data

Chester Ismay

-

2018-08-06

+

2018-09-03

@@ -168,12 +168,12 @@

Randomization approach to t-statistic

calculate(stat = "t", order = c("h1", "h2"))
## Warning: Removed 15 rows containing missing values.
t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided")
-

+

Calculate the randomization-based \(p\)-value

t_null_distn %>% 
-  get_pvalue(obs_stat = obs_t, direction = "two_sided")
+ get_p_value(obs_stat = obs_t, direction = "two_sided")
@@ -214,7 +214,7 @@

Overlay appropriate \(t\) distribution on t visualize(method = "both", obs_stat = obs_t, direction = "two_sided")
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. `infer` currently does not check these for you.
-

+

Compute theoretical p-value

diff --git a/man/deprecated.Rd b/man/deprecated.Rd new file mode 100644 index 00000000..d4692abc --- /dev/null +++ b/man/deprecated.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deprecated.R +\name{deprecated} +\alias{deprecated} +\alias{conf_int} +\alias{get_ci} +\alias{p_value} +\alias{get_pvalue} +\title{Deprecated functions} +\usage{ +conf_int(x, level = 0.95, type = "percentile", point_estimate = NULL) + +get_ci(x, level = 0.95, type = "percentile", point_estimate = NULL) + +p_value(x, obs_stat, direction) + +get_pvalue(x, obs_stat, direction) +} +\arguments{ +\item{x}{See the non-deprecated function.} + +\item{level}{See the non-deprecated function.} + +\item{type}{See the non-deprecated function.} + +\item{point_estimate}{See the non-deprecated function.} + +\item{obs_stat}{See the non-deprecated function.} + +\item{direction}{See the non-deprecated function.} +} +\description{ +These functions should no longer be used. They will be removed in a +future release of \code{infer}. +} +\seealso{ +\code{\link{get_p_value}}, \code{\link{get_confidence_interval}} +} diff --git a/man/get_ci.Rd b/man/get_confidence_interval.Rd similarity index 71% rename from man/get_ci.Rd rename to man/get_confidence_interval.Rd index 82df2640..7ec6bae3 100644 --- a/man/get_ci.Rd +++ b/man/get_confidence_interval.Rd @@ -1,15 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/conf_int.R -\name{get_ci} -\alias{get_ci} -\alias{conf_int} +\name{get_confidence_interval} \alias{get_confidence_interval} \title{Compute confidence interval} \usage{ -conf_int(x, level = 0.95, type = "percentile", point_estimate = NULL) - -get_ci(x, level = 0.95, type = "percentile", point_estimate = NULL) - get_confidence_interval(x, level = 0.95, type = "percentile", point_estimate = NULL) } @@ -34,19 +28,33 @@ the confidence interval. } \description{ Only simulation-based methods are (currently only) supported. -\code{get_confidence_interval()} and \code{get_ci()} are both aliases of \code{conf_int()}. } +\section{Aliases}{ + +\code{conf_int()} and \code{get_ci()} are deprecated aliases of \code{get_confidence_interval()}. +} + \examples{ +# Prepare the dataset mtcars_df <- mtcars \%>\% dplyr::mutate(am = factor(am)) + +# Calculate the difference in means in the dataset d_hat <- mtcars_df \%>\% specify(mpg ~ am) \%>\% calculate(stat = "diff in means", order = c("1", "0")) + +# Same calculation on 100 bootstrap replicates bootstrap_distn <- mtcars_df \%>\% specify(mpg ~ am) \%>\% - generate(reps = 100) \%>\% + generate(reps = 100, type = "bootstrap") \%>\% calculate(stat = "diff in means", order = c("1", "0")) -bootstrap_distn \%>\% conf_int(level = 0.9) -bootstrap_distn \%>\% conf_int(type = "se", point_estimate = d_hat) +# Use level to set the confidence level +bootstrap_distn \%>\% + get_confidence_interval(level = 0.9) + +# To calculate std error, set the type and point estimate +bootstrap_distn \%>\% + get_confidence_interval(type = "se", point_estimate = d_hat) } diff --git a/man/get_pvalue.Rd b/man/get_p_value.Rd similarity index 65% rename from man/get_pvalue.Rd rename to man/get_p_value.Rd index da59b651..9df0e25c 100644 --- a/man/get_pvalue.Rd +++ b/man/get_p_value.Rd @@ -1,13 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/p_value.R -\name{get_pvalue} -\alias{get_pvalue} -\alias{p_value} +\name{get_p_value} +\alias{get_p_value} \title{Compute p-value} \usage{ -p_value(x, obs_stat, direction) - -get_pvalue(x, obs_stat, direction) +get_p_value(x, obs_stat, direction) } \arguments{ \item{x}{Data frame of calculated statistics or containing attributes of @@ -23,21 +20,32 @@ extreme than this).} A 1x1 data frame with value between 0 and 1. } \description{ -Only simulation-based methods are (currently only) supported. \code{get_pvalue()} -is an alias of \code{p_value}. +Only simulation-based methods are (currently only) supported. +} +\section{Aliases}{ + +\code{p_value} and \code{get_pvalue()} are deprecated aliases of \code{get_p_value()}. } + \examples{ +# Prepare the dataset mtcars_df <- mtcars \%>\% dplyr::mutate(am = factor(am)) + +# Calculate the difference in means in the dataset d_hat <- mtcars_df \%>\% specify(mpg ~ am) \%>\% calculate(stat = "diff in means", order = c("1", "0")) + +# Same calculation on 100 permutation replicates null_distn <- mtcars_df \%>\% specify(mpg ~ am) \%>\% hypothesize(null = "independence") \%>\% generate(reps = 100) \%>\% calculate(stat = "diff in means", order = c("1", "0")) -null_distn \%>\% - p_value(obs_stat = d_hat, direction = "right") +# What proportion of replicates had a difference +# in means more extreme than in the dataset? +null_distn \%>\% + get_p_value(obs_stat = d_hat, direction = "right") } diff --git a/tests/testthat/test-conf_int.R b/tests/testthat/test-conf_int.R index 5cfe0060..4a8e9727 100644 --- a/tests/testthat/test-conf_int.R +++ b/tests/testthat/test-conf_int.R @@ -8,13 +8,13 @@ set.seed(2018) test_df <- tibble::tibble(stat = rnorm(100)) test_that("basics work", { - expect_silent(test_df %>% conf_int()) - expect_error(test_df %>% conf_int(type = "other")) - expect_error(test_df %>% conf_int(level = 1.2)) - expect_error(test_df %>% conf_int(point_estimate = "help")) - - expect_silent(iris_calc %>% get_ci(type = "se", point_estimate = 4)) - expect_silent(iris_calc %>% get_ci(type = "se", point_estimate = obs_diff)) - expect_error(iris_calc %>% get_ci(type = "se", point_estimate = "error")) - expect_error(iris_calc %>% get_ci(type = "se")) + expect_silent(test_df %>% get_confidence_interval()) + expect_error(test_df %>% get_confidence_interval(type = "other")) + expect_error(test_df %>% get_confidence_interval(level = 1.2)) + expect_error(test_df %>% get_confidence_interval(point_estimate = "help")) + + expect_silent(iris_calc %>% get_confidence_interval(type = "se", point_estimate = 4)) + expect_silent(iris_calc %>% get_confidence_interval(type = "se", point_estimate = obs_diff)) + expect_error(iris_calc %>% get_confidence_interval(type = "se", point_estimate = "error")) + expect_error(iris_calc %>% get_confidence_interval(type = "se")) }) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-p_value.R index c3b758b3..1c5c063f 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-p_value.R @@ -1,46 +1,46 @@ -context("p_value") +context("get_p_value") set.seed(2018) test_df <- tibble::tibble(stat = rnorm(100)) test_that("direction is appropriate", { - expect_error(test_df %>% p_value(obs_stat = 0.5, direction = "righ")) + expect_error(test_df %>% get_p_value(obs_stat = 0.5, direction = "righ")) }) -test_that("p_value makes sense", { +test_that("get_p_value makes sense", { expect_lt( iris_calc %>% - p_value(obs_stat = 0.1, direction = "right") %>% + get_p_value(obs_stat = 0.1, direction = "right") %>% dplyr::pull(), expected = 0.1 ) expect_gt( iris_calc %>% - p_value(obs_stat = -0.1, direction = "greater") %>% + get_p_value(obs_stat = -0.1, direction = "greater") %>% dplyr::pull(), expected = 0.9 ) expect_equal( iris_calc %>% - p_value(obs_stat = median(iris_calc$stat), direction = "both") %>% + get_p_value(obs_stat = median(iris_calc$stat), direction = "both") %>% dplyr::pull(), expected = 1 ) expect_lt( iris_calc %>% - p_value(obs_stat = -0.2, direction = "left") %>% + get_p_value(obs_stat = -0.2, direction = "left") %>% dplyr::pull(), expected = 0.02 ) expect_gt( iris_calc %>% - p_value(obs_stat = -0.2, direction = "right") %>% + get_p_value(obs_stat = -0.2, direction = "right") %>% dplyr::pull(), expected = 0.98 ) expect_equal( iris_calc %>% - get_pvalue( + get_p_value( obs_stat = median(iris_calc$stat) + 1, direction = "two_sided" ) %>% dplyr::pull(), diff --git a/vignettes/chisq_test.Rmd b/vignettes/chisq_test.Rmd index f2dd959a..d56b5201 100644 --- a/vignettes/chisq_test.Rmd +++ b/vignettes/chisq_test.Rmd @@ -99,7 +99,7 @@ chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater") ```{r} chisq_null_distn %>% - get_pvalue(obs_stat = obs_chisq, direction = "greater") + get_p_value(obs_stat = obs_chisq, direction = "greater") ``` diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd index cec5e8f1..3a2f3dc7 100644 --- a/vignettes/observed_stat_examples.Rmd +++ b/vignettes/observed_stat_examples.Rmd @@ -72,7 +72,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = x_bar, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = x_bar, direction = "two_sided") + get_p_value(obs_stat = x_bar, direction = "two_sided") ``` ### One numerical variable (standardized mean $t$) @@ -93,7 +93,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_bar, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_bar, direction = "two_sided") + get_p_value(obs_stat = t_bar, direction = "two_sided") ``` @@ -116,7 +116,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = x_tilde, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = x_tilde, direction = "two_sided") + get_p_value(obs_stat = x_tilde, direction = "two_sided") ``` ### One categorical (one proportion) @@ -138,7 +138,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = p_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = p_hat, direction = "two_sided") + get_p_value(obs_stat = p_hat, direction = "two_sided") ``` Logical variables will be coerced to factors: @@ -176,7 +176,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ``` ### Two categorical (2 level) variables (z) @@ -198,7 +198,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = z_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = z_hat, direction = "two_sided") + get_p_value(obs_stat = z_hat, direction = "two_sided") ``` Note the similarities in this plot and the previous one. @@ -227,7 +227,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = Chisq_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = Chisq_hat, direction = "greater") + get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` ### Two categorical (>2 level) variables @@ -249,7 +249,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = Chisq_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = Chisq_hat, direction = "greater") + get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` ### One numerical variable, one categorical (2 levels) (diff in means) @@ -271,7 +271,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ``` ### One numerical variable, one categorical (2 levels) (t) @@ -293,7 +293,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_hat, direction = "two_sided") + get_p_value(obs_stat = t_hat, direction = "two_sided") ``` Note the similarities in this plot and the previous one. @@ -318,7 +318,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = d_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = d_hat, direction = "two_sided") + get_p_value(obs_stat = d_hat, direction = "two_sided") ``` ### One numerical, one categorical (>2 levels) - ANOVA @@ -340,7 +340,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = F_hat, direction = "greater") null_distn %>% - get_pvalue(obs_stat = F_hat, direction = "greater") + get_p_value(obs_stat = F_hat, direction = "greater") ``` ### Two numerical vars - SLR @@ -362,7 +362,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = slope_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = slope_hat, direction = "two_sided") + get_p_value(obs_stat = slope_hat, direction = "two_sided") ``` ### Two numerical vars - correlation @@ -384,7 +384,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = correlation_hat, direction = "two_sided") + get_p_value(obs_stat = correlation_hat, direction = "two_sided") ``` @@ -409,7 +409,7 @@ null_distn <- fli_small %>% null_distn %>% visualize(obs_stat = t_hat, direction = "two_sided") null_distn %>% - get_pvalue(obs_stat = t_hat, direction = "two_sided") + get_p_value(obs_stat = t_hat, direction = "two_sided") ``` diff --git a/vignettes/two_sample_t.Rmd b/vignettes/two_sample_t.Rmd index 9a2295f4..9fbe54ab 100755 --- a/vignettes/two_sample_t.Rmd +++ b/vignettes/two_sample_t.Rmd @@ -100,7 +100,7 @@ t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided") ```{r} t_null_distn %>% - get_pvalue(obs_stat = obs_t, direction = "two_sided") + get_p_value(obs_stat = obs_t, direction = "two_sided") ``` From 39ee0ddc74d4395aee2819b16404224c444bb4a8 Mon Sep 17 00:00:00 2001 From: Richard Cotton Date: Mon, 3 Sep 2018 11:53:41 -0400 Subject: [PATCH 36/78] don't deprecate get_pvalue(), get_ci() --- NEWS.md | 4 ++-- R/conf_int.R | 12 +++++++++++- R/deprecated.R | 16 ---------------- R/p_value.R | 9 ++++++++- man/deprecated.Rd | 6 ------ man/get_confidence_interval.Rd | 6 +++++- man/get_p_value.Rd | 6 +++++- 7 files changed, 31 insertions(+), 28 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1539f44f..d3915e1f 100755 --- a/NEWS.md +++ b/NEWS.md @@ -9,8 +9,8 @@ - Make transparancy in `visualize()` to not depend on method and data volume. - Make `visualize()` work for "One sample t" theoretical type with `method = "both"`. - Add `stat = "sum"` and `stat = "count"` options to `calculate()` (#50). -- Deprecated `p_value()` and `get_pvalue()` (use `get_p_value()` instead) (#180). -- Deprecated `cont_int()` and `get_ci()` (use `get_confidence_interval()` instead) (#180). +- Deprecated `p_value()` (use `get_p_value()` instead) (#180). +- Deprecated `cont_int()` (use `get_confidence_interval()` instead) (#180). # infer 0.3.1 diff --git a/R/conf_int.R b/R/conf_int.R index 1889a88d..397891f5 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -16,7 +16,8 @@ #' @return A 1 x 2 tibble with values corresponding to lower and upper values in #' the confidence interval. #' @section Aliases: -#' `conf_int()` and `get_ci()` are deprecated aliases of `get_confidence_interval()`. +#' `get_ci()` is an alias of `get_confidence_interval()`. +#' `conf_int` is a deprecated alias of `get_confidence_interval()`. #' #' @examples #' # Prepare the dataset @@ -65,6 +66,15 @@ get_confidence_interval <- function(x, level = 0.95, type = "percentile", return(ci) } +#' @rdname get_confidence_interval +#' @export +get_ci <- function(x, level = 0.95, type = "percentile", + point_estimate = NULL) { + get_confidence_interval( + x, level = level, type = type, point_estimate = point_estimate + ) +} + check_ci_args <- function(x, level, type, point_estimate){ if(!is.null(point_estimate)){ diff --git a/R/deprecated.R b/R/deprecated.R index 8c4e7b67..6d64d129 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -23,16 +23,6 @@ conf_int <- function(x, level = 0.95, type = "percentile", ) } -#' @rdname deprecated -#' @export -get_ci <- function(x, level = 0.95, type = "percentile", - point_estimate = NULL) { - .Deprecated("get_confidence_interval") - get_confidence_interval( - x, level = level, type = type, point_estimate = point_estimate - ) -} - #' @rdname deprecated #' @export @@ -41,9 +31,3 @@ p_value <- function(x, obs_stat, direction) { get_p_value(x = x, obs_stat = obs_stat, direction = direction) } -#' @rdname deprecated -#' @export -get_pvalue <- function(x, obs_stat, direction) { - .Deprecated("get_p_value") - get_p_value(x = x, obs_stat = obs_stat, direction = direction) -} diff --git a/R/p_value.R b/R/p_value.R index 6be7147c..8147cb56 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -12,7 +12,8 @@ #' @return A 1x1 data frame with value between 0 and 1. #' #' @section Aliases: -#' `p_value` and `get_pvalue()` are deprecated aliases of `get_p_value()`. +#' `get_pvalue()` is an alias of `get_p_value()`. +#' `p_value` is a deprecated alias of `get_p_value()`. #' #' @examples #' # Prepare the dataset @@ -70,6 +71,12 @@ get_p_value <- function(x, obs_stat, direction){ return(pvalue) } +#' @rdname get_p_value +#' @export +get_pvalue <- function(x, obs_stat, direction) { + get_p_value(x = x, obs_stat = obs_stat, direction = direction) +} + simulation_based_p_value <- function(x, obs_stat, direction){ if(direction %in% c("less", "left")){ diff --git a/man/deprecated.Rd b/man/deprecated.Rd index d4692abc..d733d8d2 100644 --- a/man/deprecated.Rd +++ b/man/deprecated.Rd @@ -3,18 +3,12 @@ \name{deprecated} \alias{deprecated} \alias{conf_int} -\alias{get_ci} \alias{p_value} -\alias{get_pvalue} \title{Deprecated functions} \usage{ conf_int(x, level = 0.95, type = "percentile", point_estimate = NULL) -get_ci(x, level = 0.95, type = "percentile", point_estimate = NULL) - p_value(x, obs_stat, direction) - -get_pvalue(x, obs_stat, direction) } \arguments{ \item{x}{See the non-deprecated function.} diff --git a/man/get_confidence_interval.Rd b/man/get_confidence_interval.Rd index 7ec6bae3..db5096fe 100644 --- a/man/get_confidence_interval.Rd +++ b/man/get_confidence_interval.Rd @@ -2,10 +2,13 @@ % Please edit documentation in R/conf_int.R \name{get_confidence_interval} \alias{get_confidence_interval} +\alias{get_ci} \title{Compute confidence interval} \usage{ get_confidence_interval(x, level = 0.95, type = "percentile", point_estimate = NULL) + +get_ci(x, level = 0.95, type = "percentile", point_estimate = NULL) } \arguments{ \item{x}{Data frame of calculated statistics or containing attributes of @@ -31,7 +34,8 @@ Only simulation-based methods are (currently only) supported. } \section{Aliases}{ -\code{conf_int()} and \code{get_ci()} are deprecated aliases of \code{get_confidence_interval()}. +\code{get_ci()} is an alias of \code{get_confidence_interval()}. +\code{conf_int} is a deprecated alias of \code{get_confidence_interval()}. } \examples{ diff --git a/man/get_p_value.Rd b/man/get_p_value.Rd index 9df0e25c..5b935608 100644 --- a/man/get_p_value.Rd +++ b/man/get_p_value.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/p_value.R \name{get_p_value} \alias{get_p_value} +\alias{get_pvalue} \title{Compute p-value} \usage{ get_p_value(x, obs_stat, direction) + +get_pvalue(x, obs_stat, direction) } \arguments{ \item{x}{Data frame of calculated statistics or containing attributes of @@ -24,7 +27,8 @@ Only simulation-based methods are (currently only) supported. } \section{Aliases}{ -\code{p_value} and \code{get_pvalue()} are deprecated aliases of \code{get_p_value()}. +\code{get_pvalue()} is an alias of \code{get_p_value()}. +\code{p_value} is a deprecated alias of \code{get_p_value()}. } \examples{ From d6f987cb78f24cb607c3742dff62f898fb82150e Mon Sep 17 00:00:00 2001 From: Richard Cotton Date: Mon, 3 Sep 2018 12:10:57 -0400 Subject: [PATCH 37/78] refactor generate() --- NAMESPACE | 1 + R/generate.R | 112 +++++++++++++++++--------------- man/generate.Rd | 8 ++- tests/testthat/test-calculate.R | 16 ++--- tests/testthat/test-generate.R | 110 ++++++++++++++++--------------- 5 files changed, 133 insertions(+), 114 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a6bf6e27..cf359b30 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(print,infer) export("%>%") +export(GENERATION_TYPES) export(calculate) export(chisq_stat) export(chisq_test) diff --git a/R/generate.R b/R/generate.R index 61f3856a..fda851df 100755 --- a/R/generate.R +++ b/R/generate.R @@ -21,66 +21,74 @@ #' #' @importFrom dplyr group_by #' @export -generate <- function(x, reps = 1, type = attr(x, "type"), ...) { - auto_type <- attr(x, "type") +generate <- function(x, reps = 1, type = NULL, ...) { + type <- sanitize_generation_type(type) + auto_type <- sanitize_generation_type(attr(x, "type")) + type <- if(!is.null(type)) { # User specifies type + compare_type_vs_auto_type(type, auto_type) + } else { # Use default + use_auto_type(auto_type) + } - if (!is.null(auto_type)) { - if (is.null(type)) { - stop_glue("Supply not `NULL` value of `type`.") - } + attr(x, "generate") <- TRUE - if (auto_type != type) { - stop_glue( - "You have specified `type = \"{type}\"`, but `type` is expected to be ", - "`\"{auto_type}\"`. Please try again with appropriate `type` value." - ) - } else { - type <- auto_type - } + switch( + type, + bootstrap = bootstrap(x, reps, ...), + permute = { + check_permutation_attributes(x) + permute(x, reps, ...) + }, + simulate = simulate(x, reps, ...) + ) +} + +#' @rdname generate +#' @export +GENERATION_TYPES <- c("bootstrap", "permute", "simulate") + +sanitize_generation_type <- function(x) { + if(is.null(x)) return(x) + match.arg(x, GENERATION_TYPES) +} + +compare_type_vs_auto_type <- function(type, auto_type) { + if(is.null(auto_type)) { + # No default; use whatever they specified. + return(type) + } + if (auto_type != type) { + # User is overriding the default, so warn of potential stupidity. + warning_glue( + "You have specified `type = \"{type}\"`, but `type` is expected", + "to be`\"{auto_type}\"`. This workflow is untested and", + "the results may not mean what you think they mean.", + .sep = " " + ) } + type +} - attr(x, "generate") <- TRUE +use_auto_type <- function(auto_type) { + if(is.null(auto_type)) { + stop_glue( + "There is no default `type`;", + "please set it to one of {toString(shQuote(GENERATION_TYPES))}.", + .sep = " " + ) + } + message_glue('Setting `type = "{auto_type}"`.') + auto_type +} - if ( - (type == "permute") && - any(is_nuat(x, "response"), is_nuat(x, "explanatory")) - ) { +check_permutation_attributes <- function(x, attr) { + if (any(is_nuat(x, "response"), is_nuat(x, "explanatory"))) { stop_glue( - "Please `specify()` an explanatory and a response variable when ", - "permuting." + "Please `specify()` an explanatory and a response variable", + "when permuting.", + .sep = " " ) } -## Can't get to these anymore with tests -# if ( -# (type == "simulate") && -# (attr(x, "null") != "point") && -# !(length(grep("p.", names(attr(x, "params")))) >= 1) -# ) { -# stop_glue("Simulation requires a `point` null hypothesis on proportions.") -# } -# if ( -# (type == "bootstrap") && -# !(attr(attr(x, "params"), "names") %in% c("mu", "med", "sigma")) && -# !is_nuat(x, "null") -# ) { -# stop_glue( -# "Bootstrapping is inappropriate in this setting. ", -# "Consider using `type = permute` or `type = simulate`." -# ) -# } - - if (type == "bootstrap") { - bootstrap(x, reps, ...) - } else if (type == "permute") { - permute(x, reps, ...) - } else if (type == "simulate") { - simulate(x, reps, ...) - } # else if (!(type %in% c("bootstrap", "permute", "simulate"))) { - # stop_glue( - # "Choose one of the available options for `type`: ", - # '`"bootstrap"`, `"permute"`, or `"simulate"`' - # ) - # } } bootstrap <- function(x, reps = 1, ...) { diff --git a/man/generate.Rd b/man/generate.Rd index f8fae0f1..6df55386 100755 --- a/man/generate.Rd +++ b/man/generate.Rd @@ -1,10 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/generate.R +\docType{data} \name{generate} \alias{generate} +\alias{GENERATION_TYPES} \title{Generate resamples, permutations, or simulations} +\format{An object of class \code{character} of length 3.} \usage{ -generate(x, reps = 1, type = attr(x, "type"), ...) +generate(x, reps = 1, type = NULL, ...) + +GENERATION_TYPES } \arguments{ \item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.} @@ -32,3 +37,4 @@ mtcars \%>\% generate(reps = 100, type = "permute") } +\keyword{datasets} diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index b03a2453..dd2bacea 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -77,7 +77,7 @@ test_that("grouping (explanatory) variable is a factor (two var problems)", { expect_error(calculate(gen_iris2, stat = "diff in medians")) # Since shifts to "Slope with t" ## Not implemented - # expect_silent(calculate(gen_iris2, stat = "t")) + # expect_silent(calculate(gen_iris2, stat = "t")) }) test_that("grouping (explanatory) variable is numeric (two var problems)", { @@ -356,7 +356,7 @@ test_that("specify() %>% calculate() works", { }) test_that("One sample t hypothesis test is working", { - expect_silent( + expect_message( iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 1) %>% @@ -398,7 +398,7 @@ test_that("generate not done before calculate", { }) test_that("One sample t bootstrap is working", { - expect_silent( + expect_message( iris_tbl %>% specify(Petal.Width ~ NULL) %>% generate(reps = 10) %>% @@ -409,14 +409,14 @@ test_that("One sample t bootstrap is working", { test_that("calculate doesn't depend on order of `p` (#122)", { calc_chisq <- function(p) { set.seed(111) - + iris %>% specify(Species ~ NULL) %>% hypothesize(null = "point", p = p) %>% generate(reps = 10, type = "simulate") %>% calculate("Chisq") } - + expect_equal( calc_chisq(c("versicolor" = 0.25, "setosa" = 0.5, "virginica" = 0.25)), calc_chisq(c("virginica" = 0.25, "versicolor" = 0.25, "setosa" = 0.5)) @@ -439,11 +439,11 @@ test_that("calc_impl.sum works", { `[[`(1), sum(iris_tbl$Petal.Width) ) - + gen_iris16 <- iris_tbl %>% specify(Petal.Width ~ NULL) %>% generate(10) - + expect_equal( gen_iris16 %>% calculate(stat = "sum"), gen_iris16 %>% dplyr::summarise(stat = sum(Petal.Width)) @@ -467,7 +467,7 @@ test_that("calc_impl.count works", { `[[`(1), sum(iris_tbl$Sepal.Length.Group == ">5") ) - + expect_equal( gen_iris12 %>% calculate(stat = "count"), gen_iris12 %>% dplyr::summarise(stat = sum(Sepal.Length.Group == ">5")) diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index 70088b90..76cc80ff 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -37,9 +37,9 @@ hyp_anova <- mtcars_df %>% hypothesize(null = "independence") test_that("cohesion with type argument", { - expect_error(generate(hyp_prop, type = "bootstrap")) + expect_warning(generate(hyp_prop, type = "bootstrap")) expect_error(generate(hyp_diff_in_props, type = "bootstrap")) - expect_error(generate(hyp_chisq_gof, type = "bootstrap")) + expect_warning(generate(hyp_chisq_gof, type = "bootstrap")) expect_error(generate(hyp_chisq_ind, type = "bootstrap")) expect_silent(generate(hyp_mean, type = "bootstrap")) expect_silent(generate(hyp_median, type = "bootstrap")) @@ -48,12 +48,12 @@ test_that("cohesion with type argument", { expect_error(generate(hyp_anova, type = "bootstrap")) expect_silent(generate(hyp_prop, type = "simulate")) - expect_error(generate(hyp_diff_in_props, type = "simulate")) + expect_warning(generate(hyp_diff_in_props, type = "simulate")) expect_silent(generate(hyp_chisq_gof, type = "simulate")) - expect_error(generate(hyp_chisq_ind, type = "simulate")) + expect_warning(generate(hyp_chisq_ind, type = "simulate")) expect_error(generate(hyp_mean, type = "simulate")) - expect_error(generate(hyp_diff_in_means, type = "simulate")) - expect_error(generate(hyp_anova, type = "simulate")) + expect_warning(generate(hyp_diff_in_means, type = "simulate")) + expect_warning(generate(hyp_anova, type = "simulate")) expect_error(generate(hyp_prop, type = "permute")) expect_silent(generate(hyp_diff_in_props, type = "permute")) @@ -155,81 +155,81 @@ test_that("auto `type` works (generate)", { expect_equal(attr(slope_boot, "type"), "bootstrap") expect_error(mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% - generate(reps = 100, type = "permute") + specify(response = mpg) %>% # formula alt: mpg ~ NULL + hypothesize(null = "point", mu = 25) %>% + generate(reps = 100, type = "permute") ) - expect_error(mtcars_df %>% - specify(response = mpg) %>% - generate(reps = 100, type = "simulate") + expect_warning(mtcars_df %>% + specify(response = mpg) %>% + generate(reps = 100, type = "simulate") ) expect_error(mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% - generate(reps = 100, type = "permute") + specify(response = mpg) %>% # formula alt: mpg ~ NULL + hypothesize(null = "point", med = 26) %>% + generate(reps = 100, type = "permute") ) - expect_error(mtcars_df %>% - specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% - generate(reps = 100, type = "bootstrap") + expect_warning(mtcars_df %>% + specify(response = am, success = "1") %>% # formula alt: am ~ NULL + hypothesize(null = "point", p = .25) %>% + generate(reps = 100, type = "bootstrap") ) expect_error(mtcars_df %>% - specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs - hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap") + specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs + hypothesize(null = "independence") %>% + generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars_df %>% - specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% - generate(reps = 100, type = "bootstrap") + expect_warning(mtcars_df %>% + specify(cyl ~ NULL) %>% # alt: response = cyl + hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% + generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars_df %>% - specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "simulate") + expect_warning(mtcars_df %>% + specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am + hypothesize(null = "independence") %>% + generate(reps = 100, type = "simulate") ) expect_error(mtcars_df %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap")) + specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am + hypothesize(null = "independence") %>% + generate(reps = 100, type = "bootstrap")) - expect_error(mtcars_df %>% - specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "simulate") + expect_warning(mtcars_df %>% + specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") %>% + generate(reps = 100, type = "simulate") ) expect_error(mtcars_df %>% - specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap") + specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") %>% + generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars_df %>% - specify(response = am, success = "1") %>% - generate(reps = 100, type = "simulate") + expect_warning(mtcars_df %>% + specify(response = am, success = "1") %>% + generate(reps = 100, type = "simulate") ) expect_error(mtcars_df %>% - specify(mpg ~ am) %>% - generate(reps = 100, type = "permute") + specify(mpg ~ am) %>% + generate(reps = 100, type = "permute") ) - expect_error(mtcars_df %>% - specify(am ~ vs, success = "1") %>% - generate(reps = 100, type = "simulate") + expect_warning(mtcars_df %>% + specify(am ~ vs, success = "1") %>% + generate(reps = 100, type = "simulate") ) - expect_error(mtcars_df %>% - specify(mpg ~ hp) %>% - generate(reps = 100, type = "simulate") + expect_warning(mtcars_df %>% + specify(mpg ~ hp) %>% + generate(reps = 100, type = "simulate") ) }) @@ -241,7 +241,7 @@ test_that("mismatches lead to error", { hypothesize(null = "independence", p = c("1" = 0.5)) %>% generate(reps = 100, type = "simulate") ) - expect_error( + expect_warning( mtcars_df %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize( @@ -254,5 +254,9 @@ test_that("mismatches lead to error", { }) test_that("generate() handles `NULL` value of `type`", { - expect_error(generate(hyp_prop, type = NULL), "NULL.*type") + expect_message( + generate(hyp_prop, type = NULL), + 'Setting `type = "simulate"`.', + fixed = TRUE + ) }) From 2f437b34c986049c78de50e83e2e211d0da25313 Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Mon, 3 Sep 2018 09:48:24 -0700 Subject: [PATCH 38/78] Update p_value.R --- R/p_value.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/p_value.R b/R/p_value.R index 8147cb56..5758f8a4 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -1,6 +1,6 @@ #' Compute p-value #' -#' Only simulation-based methods are (currently only) supported. +#' Simulation-based methods are (currently only) supported. #' #' @param x Data frame of calculated statistics or containing attributes of #' theoretical distribution values. From 47881518a200dfb60debfbd32e9b385440627743 Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Mon, 3 Sep 2018 09:49:09 -0700 Subject: [PATCH 39/78] Update conf_int.R --- R/conf_int.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/conf_int.R b/R/conf_int.R index 397891f5..7acecbc3 100644 --- a/R/conf_int.R +++ b/R/conf_int.R @@ -17,7 +17,7 @@ #' the confidence interval. #' @section Aliases: #' `get_ci()` is an alias of `get_confidence_interval()`. -#' `conf_int` is a deprecated alias of `get_confidence_interval()`. +#' `conf_int()` is a deprecated alias of `get_confidence_interval()`. #' #' @examples #' # Prepare the dataset From 0dc65810328d5f24598dd5ef6e863379161b33cc Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Mon, 3 Sep 2018 09:54:12 -0700 Subject: [PATCH 40/78] Update p_value.R --- R/p_value.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/p_value.R b/R/p_value.R index 5758f8a4..6abbfa7f 100644 --- a/R/p_value.R +++ b/R/p_value.R @@ -122,11 +122,12 @@ two_sided_p_value <- function(x, obs_stat){ # param2 <- attr(x, "distr_param2") # # if(theory_type == "Two sample t") -# return(pt(q = obs_stat, -# df = param, -# lower.tail = set_lower_tail(direction)) -# ) -# +# return( +# pt(q = obs_stat, +# df = param, +# lower.tail = set_lower_tail(direction) +# ) +# ) # } #theory_t_pvalue <- From 9746fb33b13feb9c6167a67f61621c10e639ff0f Mon Sep 17 00:00:00 2001 From: ismayc Date: Mon, 3 Sep 2018 10:22:00 -0700 Subject: [PATCH 41/78] Update tests and file names --- R/{conf_int.R => get_confidence_interval.R} | 0 R/{p_value.R => get_p_value.R} | 0 man/get_confidence_interval.Rd | 4 ++-- man/get_p_value.Rd | 4 ++-- tests/testthat/helper-data.R | 7 ++++++ tests/testthat/test-aliases.R | 24 +++++++++++++++++++ tests/testthat/test-conf_int.R | 20 ---------------- tests/testthat/test-get_confidence_interval.R | 20 ++++++++++++++++ .../{test-p_value.R => test-get_p_value.R} | 1 + 9 files changed, 56 insertions(+), 24 deletions(-) rename R/{conf_int.R => get_confidence_interval.R} (100%) rename R/{p_value.R => get_p_value.R} (100%) create mode 100644 tests/testthat/test-aliases.R delete mode 100644 tests/testthat/test-conf_int.R create mode 100644 tests/testthat/test-get_confidence_interval.R rename tests/testthat/{test-p_value.R => test-get_p_value.R} (99%) diff --git a/R/conf_int.R b/R/get_confidence_interval.R similarity index 100% rename from R/conf_int.R rename to R/get_confidence_interval.R diff --git a/R/p_value.R b/R/get_p_value.R similarity index 100% rename from R/p_value.R rename to R/get_p_value.R diff --git a/man/get_confidence_interval.Rd b/man/get_confidence_interval.Rd index db5096fe..211204c8 100644 --- a/man/get_confidence_interval.Rd +++ b/man/get_confidence_interval.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/conf_int.R +% Please edit documentation in R/get_confidence_interval.R \name{get_confidence_interval} \alias{get_confidence_interval} \alias{get_ci} @@ -35,7 +35,7 @@ Only simulation-based methods are (currently only) supported. \section{Aliases}{ \code{get_ci()} is an alias of \code{get_confidence_interval()}. -\code{conf_int} is a deprecated alias of \code{get_confidence_interval()}. +\code{conf_int()} is a deprecated alias of \code{get_confidence_interval()}. } \examples{ diff --git a/man/get_p_value.Rd b/man/get_p_value.Rd index 5b935608..11d9e49a 100644 --- a/man/get_p_value.Rd +++ b/man/get_p_value.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/p_value.R +% Please edit documentation in R/get_p_value.R \name{get_p_value} \alias{get_p_value} \alias{get_pvalue} @@ -23,7 +23,7 @@ extreme than this).} A 1x1 data frame with value between 0 and 1. } \description{ -Only simulation-based methods are (currently only) supported. +Simulation-based methods are (currently only) supported. } \section{Aliases}{ diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R index 12701584..69720a13 100644 --- a/tests/testthat/helper-data.R +++ b/tests/testthat/helper-data.R @@ -16,3 +16,10 @@ mtcars_df <- mtcars %>% cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), carb = factor(carb) ) + +obs_diff <- iris_tbl %>% + specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% + calculate(stat = "diff in props", order = c("large", "small")) + +set.seed(2018) +test_df <- tibble::tibble(stat = rnorm(100)) diff --git a/tests/testthat/test-aliases.R b/tests/testthat/test-aliases.R new file mode 100644 index 00000000..705308f8 --- /dev/null +++ b/tests/testthat/test-aliases.R @@ -0,0 +1,24 @@ +context("aliases") + +test_that("aliases work", { + expect_gt( + iris_calc %>% + get_pvalue(obs_stat = -0.2, direction = "right") %>% + dplyr::pull(), + expected = 0.98 + ) + + expect_silent(test_df %>% get_ci()) +}) + +test_that("old aliases produce warning", { + expect_warning( + iris_calc %>% + p_value(obs_stat = -0.2, direction = "right") %>% + dplyr::pull(), + expected = 0.98 + ) + + expect_warning(test_df %>% conf_int()) + +}) \ No newline at end of file diff --git a/tests/testthat/test-conf_int.R b/tests/testthat/test-conf_int.R deleted file mode 100644 index 4a8e9727..00000000 --- a/tests/testthat/test-conf_int.R +++ /dev/null @@ -1,20 +0,0 @@ -context("conf_int") - -obs_diff <- iris_tbl %>% - specify(Sepal.Length.Group ~ Sepal.Width.Group, success = "<=5") %>% - calculate(stat = "diff in props", order = c("large", "small")) - -set.seed(2018) -test_df <- tibble::tibble(stat = rnorm(100)) - -test_that("basics work", { - expect_silent(test_df %>% get_confidence_interval()) - expect_error(test_df %>% get_confidence_interval(type = "other")) - expect_error(test_df %>% get_confidence_interval(level = 1.2)) - expect_error(test_df %>% get_confidence_interval(point_estimate = "help")) - - expect_silent(iris_calc %>% get_confidence_interval(type = "se", point_estimate = 4)) - expect_silent(iris_calc %>% get_confidence_interval(type = "se", point_estimate = obs_diff)) - expect_error(iris_calc %>% get_confidence_interval(type = "se", point_estimate = "error")) - expect_error(iris_calc %>% get_confidence_interval(type = "se")) -}) diff --git a/tests/testthat/test-get_confidence_interval.R b/tests/testthat/test-get_confidence_interval.R new file mode 100644 index 00000000..918defe5 --- /dev/null +++ b/tests/testthat/test-get_confidence_interval.R @@ -0,0 +1,20 @@ +context("conf_int") + +test_that("basics work", { + expect_silent(test_df %>% get_confidence_interval()) + expect_error(test_df %>% get_confidence_interval(type = "other")) + expect_error(test_df %>% get_confidence_interval(level = 1.2)) + expect_error(test_df %>% get_confidence_interval(point_estimate = "help")) + + expect_silent(iris_calc %>% + get_confidence_interval(type = "se", + point_estimate = 4)) + expect_silent(iris_calc %>% + get_confidence_interval(type = "se", + point_estimate = obs_diff)) + expect_error(iris_calc %>% + get_confidence_interval(type = "se", + point_estimate = "error")) + expect_error(iris_calc %>% + get_confidence_interval(type = "se")) +}) diff --git a/tests/testthat/test-p_value.R b/tests/testthat/test-get_p_value.R similarity index 99% rename from tests/testthat/test-p_value.R rename to tests/testthat/test-get_p_value.R index 1c5c063f..03b3a96a 100644 --- a/tests/testthat/test-p_value.R +++ b/tests/testthat/test-get_p_value.R @@ -47,3 +47,4 @@ test_that("get_p_value makes sense", { expected = 0 ) }) + From 70eeb686d6441dec0d1a012f264605fb0bfb51ab Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Mon, 3 Sep 2018 10:57:23 -0700 Subject: [PATCH 42/78] Update test-calculate.R --- tests/testthat/test-calculate.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index dd2bacea..3aa32c6a 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -1,7 +1,5 @@ context("calculate") -iris_df <- tibble::as_tibble(iris) - # calculate arguments test_that("x is a tibble", { vec <- 1:10 From cf4b3bf70e2f0447af44c629bbcce02b0c469436 Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Mon, 3 Sep 2018 10:58:35 -0700 Subject: [PATCH 43/78] Update helper-data.R --- tests/testthat/helper-data.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R index 12701584..e92b8d18 100644 --- a/tests/testthat/helper-data.R +++ b/tests/testthat/helper-data.R @@ -1,3 +1,5 @@ +iris_df <- tibble::as_tibble(iris) + iris_tbl <- iris %>% tibble::as_tibble() %>% dplyr::mutate( From 794f038cdf6dcb04fcfccdd3be09c2442bbb1bdc Mon Sep 17 00:00:00 2001 From: ismayc Date: Mon, 3 Sep 2018 11:21:04 -0700 Subject: [PATCH 44/78] Comment out extra check that cannot currently get to --- R/generate.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/generate.R b/R/generate.R index fda851df..d6cb0f4c 100755 --- a/R/generate.R +++ b/R/generate.R @@ -70,13 +70,15 @@ compare_type_vs_auto_type <- function(type, auto_type) { } use_auto_type <- function(auto_type) { - if(is.null(auto_type)) { - stop_glue( - "There is no default `type`;", - "please set it to one of {toString(shQuote(GENERATION_TYPES))}.", - .sep = " " - ) - } + ## Commented out since no way to currently get to this + ## All variable types specified have an auto_type set + # if(is.null(auto_type)) { + # stop_glue( + # "There is no default `type`;", + # "please set it to one of {toString(shQuote(GENERATION_TYPES))}.", + # .sep = " " + # ) + # } message_glue('Setting `type = "{auto_type}"`.') auto_type } From c955327307cc526a83e3dab545bab2001dc9ebd8 Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Mon, 3 Sep 2018 11:35:54 -0700 Subject: [PATCH 45/78] Update specify.R --- R/specify.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/specify.R b/R/specify.R index 61703547..46cc0070 100755 --- a/R/specify.R +++ b/R/specify.R @@ -45,8 +45,8 @@ specify <- function(x, formula, response = NULL, if (methods::hasArg(formula)) { tryCatch( - formula_arg_is_formula <- rlang::is_formula(formula) - , error = function(e) { + formula_arg_is_formula <- rlang::is_formula(formula), + error = function(e) { stop_glue("The argument you passed in for the formula does not exist. * Were you trying to pass in an unquoted column name? * Did you forget to name one or more arguments?") From 89c9275563ce167fcbec74ecec341eb0aa55e81c Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Mon, 3 Sep 2018 12:10:38 -0700 Subject: [PATCH 46/78] Update DESCRIPTION --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b2fea636..cd371d59 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,6 +7,7 @@ Authors@R: c( person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"), person("Ben", "Baumer", email = "ben.baumer@gmail.com", role = "aut"), person("Mine", "Cetinkaya-Rundel", email = "mine@stat.duke.edu", role = "aut"), + person("Evgeni", "Chasnovski", email = "evgeni.chasnovski@gmail.com", role = "ctb"), person("Ted", "Laderas", email = "tedladeras@gmail.com", role = "ctb"), person("Nick", "Solomon", email = "nick.solomon@datacamp.com", role = "ctb"), person("Johanna", "Hardin", email = "Jo.Hardin@pomona.edu", role = "ctb"), @@ -14,7 +15,8 @@ Authors@R: c( person("Neal", "Fultz", email = "nfultz@gmail.com", role = "ctb"), person("Doug", "Friedman", email = "doug.nhp@gmail.com", role = "ctb"), person("Richie", "Cotton", email = "richie@datacamp.com", role = "ctb"), - person("Evgeni", "Chasnovski", email = "evgeni.chasnovski@gmail.com", role = "ctb")) + person("Brian", "Fannon", email = "captain@pirategrunt.com", role = "ctb") +) Description: The objective of this package is to perform inference using an expressive statistical grammar that coheres with the tidy design framework. License: CC0 Encoding: UTF-8 From 77a0875ead019512c69d9333947758b5348496fd Mon Sep 17 00:00:00 2001 From: ismayc Date: Mon, 3 Sep 2018 12:26:43 -0700 Subject: [PATCH 47/78] Add Brian Fannon --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cd371d59..c3301693 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,8 +15,7 @@ Authors@R: c( person("Neal", "Fultz", email = "nfultz@gmail.com", role = "ctb"), person("Doug", "Friedman", email = "doug.nhp@gmail.com", role = "ctb"), person("Richie", "Cotton", email = "richie@datacamp.com", role = "ctb"), - person("Brian", "Fannon", email = "captain@pirategrunt.com", role = "ctb") -) + person("Brian", "Fannon", email = "captain@pirategrunt.com", role = "ctb")) Description: The objective of this package is to perform inference using an expressive statistical grammar that coheres with the tidy design framework. License: CC0 Encoding: UTF-8 From a16590b39ce8a66fccd21aee5c672898f335c7fc Mon Sep 17 00:00:00 2001 From: ismayc Date: Mon, 3 Sep 2018 13:01:55 -0700 Subject: [PATCH 48/78] Fix broken tests (warnings <-> errors) --- DESCRIPTION | 3 +- R/generate.R | 24 ++++++++------- tests/testthat/test-generate.R | 54 +++++++++++++++++++++------------- 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b2fea636..c3301693 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,6 +7,7 @@ Authors@R: c( person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"), person("Ben", "Baumer", email = "ben.baumer@gmail.com", role = "aut"), person("Mine", "Cetinkaya-Rundel", email = "mine@stat.duke.edu", role = "aut"), + person("Evgeni", "Chasnovski", email = "evgeni.chasnovski@gmail.com", role = "ctb"), person("Ted", "Laderas", email = "tedladeras@gmail.com", role = "ctb"), person("Nick", "Solomon", email = "nick.solomon@datacamp.com", role = "ctb"), person("Johanna", "Hardin", email = "Jo.Hardin@pomona.edu", role = "ctb"), @@ -14,7 +15,7 @@ Authors@R: c( person("Neal", "Fultz", email = "nfultz@gmail.com", role = "ctb"), person("Doug", "Friedman", email = "doug.nhp@gmail.com", role = "ctb"), person("Richie", "Cotton", email = "richie@datacamp.com", role = "ctb"), - person("Evgeni", "Chasnovski", email = "evgeni.chasnovski@gmail.com", role = "ctb")) + person("Brian", "Fannon", email = "captain@pirategrunt.com", role = "ctb")) Description: The objective of this package is to perform inference using an expressive statistical grammar that coheres with the tidy design framework. License: CC0 Encoding: UTF-8 diff --git a/R/generate.R b/R/generate.R index d6cb0f4c..5ee1e324 100755 --- a/R/generate.R +++ b/R/generate.R @@ -60,8 +60,8 @@ compare_type_vs_auto_type <- function(type, auto_type) { if (auto_type != type) { # User is overriding the default, so warn of potential stupidity. warning_glue( - "You have specified `type = \"{type}\"`, but `type` is expected", - "to be`\"{auto_type}\"`. This workflow is untested and", + "You have given `type = \"{type}\"`, but `type` is expected", + "to be `\"{auto_type}\"`. This workflow is untested and", "the results may not mean what you think they mean.", .sep = " " ) @@ -79,7 +79,7 @@ use_auto_type <- function(auto_type) { # .sep = " " # ) # } - message_glue('Setting `type = "{auto_type}"`.') + message_glue('Setting `type = "{auto_type}"` in `generate()`.') auto_type } @@ -98,10 +98,11 @@ bootstrap <- function(x, reps = 1, ...) { if (!is_nuat(x, "null")) { # If so, shift the variable chosen to have a mean corresponding # to that specified in `hypothesize` - if (attr(attr(x, "params"), "names") == "mu") { - col <- as.character(attr(x, "response")) + if (!is.null(attr(attr(x, "params"), "names"))){ + if (attr(attr(x, "params"), "names") == "mu") { + col <- as.character(attr(x, "response")) # if (attr(x, "theory_type") != "One sample t") { - x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params") + x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params") # } # Standardize after centering above @@ -116,11 +117,11 @@ bootstrap <- function(x, reps = 1, ...) { } # Similarly for median - else if (attr(attr(x, "params"), "names") == "med") { - col <- as.character(attr(x, "response")) - x[[col]] <- x[[col]] - - stats::median(x[[col]], na.rm = TRUE) + attr(x, "params") - } + else if (attr(attr(x, "params"), "names") == "med") { + col <- as.character(attr(x, "response")) + x[[col]] <- x[[col]] - + stats::median(x[[col]], na.rm = TRUE) + attr(x, "params") + } # Implement confidence interval for bootstrapped proportions? # Implement z transformation? @@ -132,6 +133,7 @@ bootstrap <- function(x, reps = 1, ...) { # x[[col]] <- x[[col]] - # stats::sd(x[[col]], na.rm = TRUE) + attr(x, "params") # } + } } # Set variables for use in calculate() diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index 76cc80ff..04f70b31 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -38,28 +38,36 @@ hyp_anova <- mtcars_df %>% test_that("cohesion with type argument", { expect_warning(generate(hyp_prop, type = "bootstrap")) - expect_error(generate(hyp_diff_in_props, type = "bootstrap")) + expect_warning(generate(hyp_diff_in_props, type = "bootstrap")) expect_warning(generate(hyp_chisq_gof, type = "bootstrap")) - expect_error(generate(hyp_chisq_ind, type = "bootstrap")) + expect_warning(generate(hyp_chisq_ind, type = "bootstrap")) expect_silent(generate(hyp_mean, type = "bootstrap")) expect_silent(generate(hyp_median, type = "bootstrap")) expect_silent(generate(hyp_sd, type = "bootstrap")) - expect_error(generate(hyp_diff_in_means, type = "bootstrap")) - expect_error(generate(hyp_anova, type = "bootstrap")) + expect_warning(generate(hyp_diff_in_means, type = "bootstrap")) + expect_warning(generate(hyp_anova, type = "bootstrap")) expect_silent(generate(hyp_prop, type = "simulate")) expect_warning(generate(hyp_diff_in_props, type = "simulate")) expect_silent(generate(hyp_chisq_gof, type = "simulate")) expect_warning(generate(hyp_chisq_ind, type = "simulate")) - expect_error(generate(hyp_mean, type = "simulate")) + expect_error( + expect_warning(generate(hyp_mean, type = "simulate")) + ) expect_warning(generate(hyp_diff_in_means, type = "simulate")) expect_warning(generate(hyp_anova, type = "simulate")) - expect_error(generate(hyp_prop, type = "permute")) + expect_error( + expect_warning(generate(hyp_prop, type = "permute")) + ) expect_silent(generate(hyp_diff_in_props, type = "permute")) - expect_error(generate(hyp_chisq_gof, type = "permute")) + expect_error( + expect_warning(generate(hyp_chisq_gof, type = "permute")) + ) expect_silent(generate(hyp_chisq_ind, type = "permute")) - expect_error(generate(hyp_mean, type = "permute")) + expect_error( + expect_warning(generate(hyp_mean, type = "permute")) + ) expect_silent(generate(hyp_diff_in_means, type = "permute")) expect_silent(generate(hyp_anova, type = "permute")) }) @@ -154,10 +162,12 @@ test_that("auto `type` works (generate)", { expect_equal(attr(two_props_boot, "type"), "bootstrap") expect_equal(attr(slope_boot, "type"), "bootstrap") - expect_error(mtcars_df %>% + expect_error( + expect_warning(mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", mu = 25) %>% generate(reps = 100, type = "permute") + ) ) expect_warning(mtcars_df %>% @@ -165,10 +175,12 @@ test_that("auto `type` works (generate)", { generate(reps = 100, type = "simulate") ) - expect_error(mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% - generate(reps = 100, type = "permute") + expect_warning( + expect_error(mtcars_df %>% + specify(response = mpg) %>% # formula alt: mpg ~ NULL + hypothesize(null = "point", med = 26) %>% + generate(reps = 100, type = "permute") + ) ) expect_warning(mtcars_df %>% @@ -177,7 +189,7 @@ test_that("auto `type` works (generate)", { generate(reps = 100, type = "bootstrap") ) - expect_error(mtcars_df %>% + expect_warning(mtcars_df %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap") @@ -195,7 +207,7 @@ test_that("auto `type` works (generate)", { generate(reps = 100, type = "simulate") ) - expect_error(mtcars_df %>% + expect_warning(mtcars_df %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap")) @@ -206,7 +218,7 @@ test_that("auto `type` works (generate)", { generate(reps = 100, type = "simulate") ) - expect_error(mtcars_df %>% + expect_warning(mtcars_df %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap") @@ -217,9 +229,11 @@ test_that("auto `type` works (generate)", { generate(reps = 100, type = "simulate") ) - expect_error(mtcars_df %>% - specify(mpg ~ am) %>% - generate(reps = 100, type = "permute") + expect_error( + expect_warning(mtcars_df %>% + specify(mpg ~ am) %>% + generate(reps = 100, type = "permute") + ) ) expect_warning(mtcars_df %>% @@ -256,7 +270,7 @@ test_that("mismatches lead to error", { test_that("generate() handles `NULL` value of `type`", { expect_message( generate(hyp_prop, type = NULL), - 'Setting `type = "simulate"`.', + 'Setting `type = "simulate"` in `generate()`.', fixed = TRUE ) }) From e8d28096b84fc0fc890e5f7d8fcc77eb63ceaded Mon Sep 17 00:00:00 2001 From: ismayc Date: Sat, 8 Sep 2018 18:42:14 -0700 Subject: [PATCH 49/78] Add error message if theoretical p-value requested (not yet impl'd) --- R/get_p_value.R | 24 ++++++++++++++++-------- man/get_p_value.Rd | 3 +-- tests/testthat/helper-data.R | 1 + tests/testthat/test-get_p_value.R | 19 +++++++++++++++++++ 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/R/get_p_value.R b/R/get_p_value.R index 6abbfa7f..785765ff 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -2,8 +2,7 @@ #' #' Simulation-based methods are (currently only) supported. #' -#' @param x Data frame of calculated statistics or containing attributes of -#' theoretical distribution values. +#' @param x Data frame of calculated statistics as returned by \code{\link{`generate()`}} #' @param obs_stat A numeric value or a 1x1 data frame (as extreme or more #' extreme than this). #' @param direction A character string. Options are `"less"`, `"greater"`, or @@ -44,15 +43,20 @@ NULL get_p_value <- function(x, obs_stat, direction){ check_type(x, is.data.frame) + if(!is_generated(x)) { + stop_glue( + "Theoretical p-values are not yet supported.", + "`x` should be the result of calling `generate()`.", + .sep = " " + ) + } obs_stat <- check_obs_stat(obs_stat) check_direction(direction) - is_simulation_based <- !is.null(attr(x, "generate")) && - attr(x, "generate") - - if(is_simulation_based) - pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat, - direction = direction) + pvalue <- simulation_based_p_value( + x = x, + obs_stat = obs_stat, + direction = direction) ## Theoretical-based p-value # Could be more specific @@ -115,6 +119,10 @@ two_sided_p_value <- function(x, obs_stat){ return(tibble::tibble(p_value = basic_p_value)) } +is_generated <- function(x) { + !is.null(attr(x, "generate")) && attr(x, "generate") +} + # which_distribution <- function(x, theory_type, obs_stat, direction){ # # param <- attr(x, "distr_param") diff --git a/man/get_p_value.Rd b/man/get_p_value.Rd index 11d9e49a..65a1d4fd 100644 --- a/man/get_p_value.Rd +++ b/man/get_p_value.Rd @@ -10,8 +10,7 @@ get_p_value(x, obs_stat, direction) get_pvalue(x, obs_stat, direction) } \arguments{ -\item{x}{Data frame of calculated statistics or containing attributes of -theoretical distribution values.} +\item{x}{Data frame of calculated statistics as returned by \code{\link{`generate()`}}} \item{obs_stat}{A numeric value or a 1x1 data frame (as extreme or more extreme than this).} diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R index 24d4c4b8..245816b6 100644 --- a/tests/testthat/helper-data.R +++ b/tests/testthat/helper-data.R @@ -25,3 +25,4 @@ obs_diff <- iris_tbl %>% set.seed(2018) test_df <- tibble::tibble(stat = rnorm(100)) + diff --git a/tests/testthat/test-get_p_value.R b/tests/testthat/test-get_p_value.R index 03b3a96a..71d6cb33 100644 --- a/tests/testthat/test-get_p_value.R +++ b/tests/testthat/test-get_p_value.R @@ -46,5 +46,24 @@ test_that("get_p_value makes sense", { dplyr::pull(), expected = 0 ) + expect_error( + iris_calc %>% + get_p_value( + obs_stat = median(iris_calc$stat) + 1, direction = "wrong" + ) + ) }) +test_that("theoretical p-value not supported error", { + obs_F <- iris_tbl %>% + specify(Sepal.Width ~ Species) %>% + calculate(stat = "F") + expect_error( + iris_tbl %>% + specify(Sepal.Width ~ Species) %>% + hypothesize(null = "independence") %>% + calculate(stat = "F") %>% + get_p_value(obs_stat = obs_F, direction = "right") + ) + +}) From 5a5ea93f4564979a89245972a8293f3824e8dc52 Mon Sep 17 00:00:00 2001 From: ismayc Date: Sat, 8 Sep 2018 22:05:57 -0700 Subject: [PATCH 50/78] Switch to markdown roxygen --- R/get_p_value.R | 4 ++-- man/get_p_value.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_p_value.R b/R/get_p_value.R index 785765ff..8a831897 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -2,11 +2,11 @@ #' #' Simulation-based methods are (currently only) supported. #' -#' @param x Data frame of calculated statistics as returned by \code{\link{`generate()`}} +#' @param x Data frame of calculated statistics as returned by [generate()] #' @param obs_stat A numeric value or a 1x1 data frame (as extreme or more #' extreme than this). #' @param direction A character string. Options are `"less"`, `"greater"`, or -#' `"two_sided"`. Can also specify `"left"`, `"right"`, or `"both"`. +#' `"two_sided"`. Can also use `"left"`, `"right"`, or `"both"`. #' #' @return A 1x1 data frame with value between 0 and 1. #' diff --git a/man/get_p_value.Rd b/man/get_p_value.Rd index 65a1d4fd..83e5cac1 100644 --- a/man/get_p_value.Rd +++ b/man/get_p_value.Rd @@ -10,13 +10,13 @@ get_p_value(x, obs_stat, direction) get_pvalue(x, obs_stat, direction) } \arguments{ -\item{x}{Data frame of calculated statistics as returned by \code{\link{`generate()`}}} +\item{x}{Data frame of calculated statistics as returned by \code{\link[=generate]{generate()}}} \item{obs_stat}{A numeric value or a 1x1 data frame (as extreme or more extreme than this).} \item{direction}{A character string. Options are \code{"less"}, \code{"greater"}, or -\code{"two_sided"}. Can also specify \code{"left"}, \code{"right"}, or \code{"both"}.} +\code{"two_sided"}. Can also use \code{"left"}, \code{"right"}, or \code{"both"}.} } \value{ A 1x1 data frame with value between 0 and 1. From 8c521e0f0477d4191d33239345180acc462c45c8 Mon Sep 17 00:00:00 2001 From: ismayc Date: Sat, 8 Sep 2018 22:18:58 -0700 Subject: [PATCH 51/78] Added better assertion for theoretical so that p-values w/o {infer} work --- R/get_p_value.R | 6 +++++- tests/testthat/test-get_p_value.R | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/R/get_p_value.R b/R/get_p_value.R index 8a831897..f081f98a 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -43,7 +43,7 @@ NULL get_p_value <- function(x, obs_stat, direction){ check_type(x, is.data.frame) - if(!is_generated(x)) { + if(!is_generated(x) & is_hypothesized(x)) { stop_glue( "Theoretical p-values are not yet supported.", "`x` should be the result of calling `generate()`.", @@ -123,6 +123,10 @@ is_generated <- function(x) { !is.null(attr(x, "generate")) && attr(x, "generate") } +is_hypothesized <- function(x){ + !is.null(attr(x, "null")) +} + # which_distribution <- function(x, theory_type, obs_stat, direction){ # # param <- attr(x, "distr_param") diff --git a/tests/testthat/test-get_p_value.R b/tests/testthat/test-get_p_value.R index 71d6cb33..b1d78aec 100644 --- a/tests/testthat/test-get_p_value.R +++ b/tests/testthat/test-get_p_value.R @@ -8,6 +8,10 @@ test_that("direction is appropriate", { }) test_that("get_p_value makes sense", { + expect_silent( + test_df %>% + get_p_value(obs_stat = 0.7, direction = "right") + ) expect_lt( iris_calc %>% get_p_value(obs_stat = 0.1, direction = "right") %>% From b09d853002c05310626ed8975eec887b9b3d7ac9 Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Sat, 8 Sep 2018 22:37:33 -0700 Subject: [PATCH 52/78] Update .travis.yml --- .travis.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 867a3310..d37dc8cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,11 +33,8 @@ deploy: keep-history: true local-dir: docs github_token: $GITHUBTRAVIS -# target-branch: gh-pages-dev - target-branch: gh-pages - on: -# branch: develop - branch: master + target-branch: gh-pages-dev +# target-branch: gh-pages after_success: - Rscript -e 'covr::codecov()' From 30f813c69fe2c3e56b23e7097603673b6146f753 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Thu, 13 Sep 2018 17:41:55 +0300 Subject: [PATCH 53/78] Start refactoring of `visualize()` (#178). Also: - Adds `shade_p_value()`, `shade_pvalue()`, `shade_confidence_interval()`, and `shade_ci()`. --- NAMESPACE | 4 + R/infer.R | 6 +- R/visualize.R | 766 ++++++++++++++++--------------- man/shade_confidence_interval.Rd | 51 ++ man/shade_p_value.Rd | 55 +++ man/visualize.Rd | 31 +- tests/testthat/test-visualize.R | 82 +++- 7 files changed, 595 insertions(+), 400 deletions(-) create mode 100644 man/shade_confidence_interval.Rd create mode 100644 man/shade_p_value.Rd diff --git a/NAMESPACE b/NAMESPACE index 4f8da22c..bb52bb40 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,10 @@ export(get_pvalue) export(hypothesize) export(p_value) export(rep_sample_n) +export(shade_ci) +export(shade_confidence_interval) +export(shade_p_value) +export(shade_pvalue) export(specify) export(t_stat) export(t_test) diff --git a/R/infer.R b/R/infer.R index 69614a05..5978a096 100755 --- a/R/infer.R +++ b/R/infer.R @@ -18,9 +18,9 @@ if (getRversion() >= "2.15.1") { utils::globalVariables( c( "prop", "stat", "value", "x", "..density..", "statistic", ".", - "parameter", "p.value", "xmin", "xmax", "density", "denom", "diff_prop", - "group_num", "n1", "n2", "num_suc", "p_hat", "total_suc", "explan", - "probs", "conf.low", "conf.high" + "parameter", "p.value", "xmin", "x_min", "xmax", "x_max", "density", + "denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat", + "total_suc", "explan", "probs", "conf.low", "conf.high" ) ) } diff --git a/R/visualize.R b/R/visualize.R index f2847ad3..91983036 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -11,28 +11,42 @@ #' @param dens_color A character or hex string specifying the color of the #' theoretical density curve. #' @param obs_stat A numeric value or 1x1 data frame corresponding to what the -#' observed statistic is. +#' observed statistic is. **Deprecated (see Details)**. #' @param obs_stat_color A character or hex string specifying the color of the -#' observed statistic as a vertical line on the plot. +#' observed statistic as a vertical line on the plot. **Deprecated (see +#' Details)**. #' @param pvalue_fill A character or hex string specifying the color to shade #' the p-value. In previous versions of the package this was the `shade_color` -#' argument. +#' argument. **Deprecated (see Details)**. #' @param direction A string specifying in which direction the shading should #' occur. Options are `"less"`, `"greater"`, or `"two_sided"` for p-value. Can #' also give `"left"`, `"right"`, or `"both"` for p-value. For confidence #' intervals, use `"between"` and give the endpoint values in `endpoints`. +#' **Deprecated (see Details)**. #' @param endpoints A 2 element vector or a 1 x 2 data frame containing the #' lower and upper values to be plotted. Most useful for visualizing -#' conference intervals. +#' conference intervals. **Deprecated (see Details)**. #' @param endpoints_color A character or hex string specifying the color of the -#' observed statistic as a vertical line on the plot. +#' observed statistic as a vertical line on the plot. **Deprecated (see +#' Details)**. #' @param ci_fill A character or hex string specifying the color to shade the -#' confidence interval. -#' @param ... Other arguments passed along to ggplot2. +#' confidence interval. **Deprecated (see Details)**. +#' @param ... Other arguments passed along to \\{ggplot2\\} functions. #' +#' @details In order to make visualization workflow more straightforward and +#' explicit `visualize()` now only should be used to plot statistics directly. +#' That is why arguments not related to this task are deprecated and will be +#' removed in a future release of \\{infer\\}. +#' +#' To add to plot information related to p-value use [shade_p_value()]. To add +#' to plot information related to confidence interval use +#' [shade_confidence_interval()]. +#' #' @return A ggplot object showing the simulation-based distribution as a #' histogram or bar graph. Also used to show the theoretical curves. #' +#' @seealso [shade_p_value()], [shade_confidence_interval()]. +#' #' @examples #' # Permutations to create a simulation-based null distribution for #' # one numerical response and one categorical predictor @@ -79,6 +93,43 @@ visualize <- function(data, bins = 15, method = "simulation", endpoints_color = "mediumaquamarine", ci_fill = "turquoise", ...) { + check_visualize_args( + data, bins, method, dens_color, obs_stat, obs_stat_color, + pvalue_fill, direction, endpoints, endpoints_color, ci_fill + ) + warn_depricated_args(obs_stat, endpoints) + endpoints <- impute_enpoints(endpoints) + obs_stat <- impute_obs_stat(obs_stat, direction, endpoints) + + # Add `method` to `data` attributes to enable later possibility of + # complicated computation of p-value regions (in case `direction = "both"`) + # in `shade_p_value()`. + attr(data, "viz_method") <- method + + infer_plot <- ggplot(data) + + simulation_layer(data, bins, ...) + + theoretical_layer(data, dens_color, ...) + + title_labels_layer(data) + + shade_p_value( + obs_stat, direction, obs_stat_color, pvalue_fill, ... + ) + + if (!is.null(direction) && (direction == "between")) { + infer_plot <- infer_plot + + shade_ci(endpoints, endpoints_color, ci_fill, ...) + } + + infer_plot +} + +#' @rdname visualize +#' @export +visualise <- visualize + +check_visualize_args <- function(data, bins, method, dens_color, + obs_stat, obs_stat_color, + pvalue_fill, direction, + endpoints, endpoints_color, ci_fill) { check_type(data, is.data.frame) check_type(bins, is.numeric) check_type(method, is.character) @@ -96,417 +147,369 @@ visualize <- function(data, bins = 15, method = "simulation", "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." ) } - if (is.vector(endpoints) && (length(endpoints) != 2)) { - warning_glue( - "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. ", - "Using the first two entries as the `endpoints`." - ) - endpoints <- endpoints[1:2] - } - if (is.data.frame(endpoints)) { - endpoints <- unlist(endpoints) - } - obs_stat <- check_obs_stat(obs_stat) - if ( - !is.null(direction) && - (is.null(obs_stat) + is.null(endpoints) != 1) - ) { + + if (!(method %in% c("simulation", "theoretical", "both"))) { stop_glue( - "Shading requires either `endpoints` values for a confidence interval ", - "or the observed statistic `obs_stat` to be provided." + 'Provide `method` with one of three options: `"theoretical"`, `"both"`, ', + 'or `"simulation"`. `"simulation"` is the default.' ) } - - if (method == "simulation") { - infer_plot <- visualize_simulation( - data = data, - bins = bins, - dens_color = dens_color, - obs_stat = obs_stat, - obs_stat_color = obs_stat_color, - direction = direction, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill, - ... - ) - } else if (method == "theoretical") { - infer_plot <- visualize_theoretical( - data = data, - dens_color = dens_color, - obs_stat = obs_stat, - obs_stat_color = obs_stat_color, - direction = direction, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill, - ... - ) - } else if (method == "both") { + + if (method == "both") { if (!("stat" %in% names(data))) { stop_glue( '`generate()` and `calculate()` are both required to be done prior ', 'to `visualize(method = "both")`' ) } - + if ( ("replicate" %in% names(data)) && (length(unique(data$replicate)) < 100) ) { warning_glue( - "With only {length(unique(data$stat))} replicates, it may be ", + "With only {length(unique(data$replicate))} replicates, it may be ", "difficult to see the relationship between simulation and theory." ) } - - infer_plot <- visualize_both( - data = data, - bins = bins, - dens_color = dens_color, - obs_stat = obs_stat, - obs_stat_color = obs_stat_color, - direction = direction, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill, - ... - ) - } else { - stop_glue( - 'Provide `method` with one of three options: `"theoretical"`, `"both"`, ', - 'or `"simulation"`. `"simulation"` is the default.' + } + + if (!is.null(obs_stat) && !is.null(endpoints)) { + warning_glue( + "Values for both `endpoints` and `obs_stat` were given when only one ", + "should be set. Ignoring `obs_stat` values." ) } - - if (!is.null(obs_stat)) { # && !is.null(direction) - infer_plot <- infer_plot + - geom_vline(xintercept = obs_stat, size = 2, color = obs_stat_color, ...) + + theory_type <- short_theory_type(data) + if (theory_type %in% c("F", "Chi-Square")) { + warn_right_tail_test(direction, theory_type) } + + TRUE +} +warn_depricated_args <- function(obs_stat, endpoints) { + if (!is.null(obs_stat)) { + warning_glue( + "`visualize()` shouldn't be used to plot p-value. Arguments `obs_stat`, ", + "`obs_stat_color`, `pvalue_fill`, and `direction` are deprecated. ", + "Use `add_p_value()` instead." + ) + } + if (!is.null(endpoints)) { - if (!is.null(obs_stat)) { - warning_glue( - "Values for both `endpoints` and `obs_stat` were given when only one ", - "should be set. Ignoring `obs_stat` values." - ) - } - infer_plot <- infer_plot + - geom_vline( - xintercept = endpoints, size = 2, color = endpoints_color, ... - ) + warning_glue( + "`visualize()` shouldn't be used to plot confidence interval. Arguments ", + "`endpoints`, `endpoints_color`, and `ci_fill` are deprecated. ", + "Use `add_ci()` instead." + ) } - - infer_plot + + TRUE } -theory_plot <- function(d_fun, q_fun, args_list, stat_name, dens_color) { - x_range <- do.call(q_fun, c(p = list(c(0.001, 0.999)), args_list)) - - ggplot(data.frame(x = x_range)) + - stat_function( - mapping = aes(x), fun = d_fun, args = args_list, color = dens_color - ) + - ggtitle(glue_null("Theoretical {stat_name} Null Distribution")) + - xlab("") + ylab("") +impute_enpoints <- function(endpoints) { + if (is.vector(endpoints) && (length(endpoints) != 2)) { + warning_glue( + "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. ", + "Using the first two entries as the `endpoints`." + ) + endpoints <- endpoints[1:2] + } + if (is.data.frame(endpoints)) { + endpoints <- unlist(endpoints) + } + + endpoints } -both_plot <- function(data, d_fun, args_list, stat_name, stat_label, dens_color, - obs_stat, direction, bins, pvalue_fill, endpoints, - ci_fill, ...) { - infer_plot <- shade_density_check( - data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - endpoints = endpoints, - pvalue_fill = pvalue_fill, - ci_fill = ci_fill - ) +impute_obs_stat <- function(obs_stat, direction, endpoints) { + obs_stat <- check_obs_stat(obs_stat) - infer_plot + - stat_function( - fun = d_fun, args = args_list, color = dens_color - ) + - ggtitle(glue_null( - "Simulation-Based and Theoretical {stat_name} Null Distributions" - )) + - xlab(stat_label) + ylab("") + if ( + !is.null(direction) && + (is.null(obs_stat) + is.null(endpoints) != 1) + ) { + stop_glue( + "Shading requires either `endpoints` values for a confidence interval ", + "or the observed statistic `obs_stat` to be provided." + ) + } + + obs_stat } -shade_density_check <- function(data, - obs_stat, - direction, - bins, - density = TRUE, - pvalue_fill, - endpoints, - ci_fill, - ...) { - if (is.null(direction) || is.null(obs_stat)) { - if (density) { - gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + +simulation_layer <- function(data, bins, ...) { + method <- get_viz_method(data) + + if (method == "theoretical") { + return(list()) + } + + if (method == "simulation") { + if (length(unique(data$stat)) >= 10) { + res <- list( geom_histogram( - bins = bins, color = "white", mapping = aes(y = ..density..), ... + mapping = aes(x = stat), bins = bins, color = "white", ... ) - } # else { - # Not sure if needed? Can't get tests to find it - # gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + - # geom_histogram(bins = bins, color = "white", ...) - # } - } - - if (xor(!is.null(obs_stat), !is.null(endpoints))) { - if (!is.null(direction)) { - if (density) { - gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + - geom_histogram( - bins = bins, color = "white", mapping = aes(y = ..density..), ... - ) - } else { - gg_plot <- ggplot(data = data, mapping = aes(x = stat)) + - geom_histogram(bins = bins, color = "white", ...) - } - - if (direction %in% c("less", "left", "greater", "right")) { - gg_plot <- gg_plot + - geom_tail(direction, obs_stat, pvalue_fill) - } - - if (direction %in% c("two_sided", "both")) { - gg_plot <- gg_plot + - geom_both_tails( - border_1 = obs_stat, - border_2 = mirror_obs_stat(data$stat, obs_stat), - fill = pvalue_fill - ) - } - - if (direction == "between") { - gg_plot <- gg_plot + - geom_rect( - data = data.frame(endpoints[1]), - fill = ci_fill, alpha = 0.6, - aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf), - inherit.aes = FALSE, - ... - ) - } + ) + } else { + res <- list(geom_bar(mapping = aes(x = stat), ...)) } + } else if (method == "both") { + res <- list( + geom_histogram( + mapping = aes(x = stat, y = ..density..), bins = bins, + color = "white", ... + ) + ) } - gg_plot + + res } -visualize_simulation <- function(data, bins, - method = "simulation", - dens_color, - obs_stat, - obs_stat_color, - direction, - pvalue_fill, - endpoints, - ci_fill, - ...) { - if (is.null(direction)) { - if (length(unique(data$stat)) >= 10) { - infer_plot <- ggplot(data = data, mapping = aes(x = stat)) + - geom_histogram(bins = bins, color = "white", ...) - } else { - infer_plot <- ggplot(data = data, mapping = aes(x = stat)) + - geom_bar(...) + - xlab("stat") - } - } else { - infer_plot <- shade_density_check( - data = data, - obs_stat = obs_stat, - direction = direction, - bins = bins, - density = FALSE, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill - ) +theoretical_layer <- function(data, dens_color, ...) { + method <- get_viz_method(data) + + if (method == "simulation") { + return(list()) } - infer_plot + + warn_theoretical_layer(data) + + theory_type <- short_theory_type(data) + + switch( + theory_type, + t = theory_curve( + method, dt, qt, list(df = attr(data, "distr_param")), dens_color + ), + `F` = theory_curve( + method, df, qf, + list( + df1 = attr(data, "distr_param"), df2 = attr(data, "distr_param2") + ), + dens_color = dens_color + ), + z = theory_curve(method, dnorm, qnorm, list(), dens_color), + `Chi-Square` = theory_curve( + method, dchisq, qchisq, list(df = attr(data, "distr_param")), dens_color + ) + ) } -visualize_theoretical <- function(data, - dens_color, - obs_stat, - obs_stat_color, - direction, - pvalue_fill, - endpoints, - ci_fill, - ...) { +warn_theoretical_layer <- function(data) { + method <- get_viz_method(data) + warning_glue( "Check to make sure the conditions have been met for the theoretical ", "method. {{infer}} currently does not check these for you." ) - + if ( !is_nuat(data, "stat") && !(attr(data, "stat") %in% c("t", "z", "Chisq", "F")) ) { - warning_glue( - "Your `calculate`d statistic and the theoretical distribution are on ", - "different scales. Displaying only the theoretical distribution." - ) + if (method == "theoretical") { + warning_glue( + "Your `calculate`d statistic and the theoretical distribution are on ", + "different scales. Displaying only the theoretical distribution." + ) + } else if (method == "both") { + stop_glue( + "Your `calculate`d statistic and the theoretical distribution are on ", + "different scales. Use a standardized `stat` instead." + ) + } } - - theory_type <- short_theory_type(data) - - if (theory_type == "t") { - infer_plot <- theory_plot( - d_fun = dt, q_fun = qt, - args_list = list(df = attr(data, "distr_param")), - stat_name = "t", - dens_color = dens_color - ) - } else if (theory_type == "F") { - warn_right_tail_test(direction, "F") +} - infer_plot <- theory_plot( - d_fun = df, q_fun = qf, - args_list = list( - df1 = attr(data, "distr_param"), df2 = attr(data, "distr_param2") - ), - stat_name = "F", - dens_color = dens_color +theory_curve <- function(method, d_fun, q_fun, args_list, dens_color) { + if (method == "theoretical") { + x_range <- do.call(q_fun, c(p = list(c(0.001, 0.999)), args_list)) + + res <- list( + stat_function( + data = data.frame(x = x_range), mapping = aes(x), + fun = d_fun, args = args_list, color = dens_color + ) ) - } else if (theory_type == "z") { - infer_plot <- theory_plot( - d_fun = dnorm, q_fun = qnorm, - args_list = list(), - stat_name = "z", - dens_color = dens_color + } else if (method == "both") { + res <- list( + stat_function( + mapping = aes(x = stat), fun = d_fun, args = args_list, + color = dens_color + ) ) - } else if (theory_type == "Chi-Square") { - warn_right_tail_test(direction, "Chi-Square") + } + + res +} - infer_plot <- theory_plot( - d_fun = dchisq, q_fun = qchisq, - args_list = list(df = attr(data, "distr_param")), - stat_name = "Chi-Square", - dens_color = dens_color - ) - } # else { - # stop_glue( - # '"{attr(data, "theory_type")}" is not implemented (possibly yet).' - # ) - # } +title_labels_layer <- function(data) { + method <- get_viz_method(data) + theory_type <- short_theory_type(data) + + title_string <- switch( + method, + simulation = "Simulation-Based Null Distribution", + theoretical = "Theoretical {theory_type} Null Distribution", + both = "Simulation-Based and Theoretical {theory_type} Null Distributions" + ) + + x_lab <- switch(method, simulation = "stat", "{theory_type} stat") + y_lab <- switch(method, simulation = "count", "density") + + list( + ggtitle(glue_null(title_string)), + xlab(glue_null(x_lab)), + ylab(glue_null(y_lab)) + ) +} - # Plot tails - if (!is.null(obs_stat) && !is.null(direction)) { +#' Add information about p-value region(s) +#' +#' `shade_p_value()` plots p-value region(s) on top of the [visualize()] output. +#' It should be used as \\{ggplot2\\} layer function (see examples). +#' `shade_pvalue()` is its alias. +#' +#' @param obs_stat A numeric value or 1x1 data frame corresponding to what the +#' observed statistic is. +#' @param direction A string specifying in which direction the shading should +#' occur. Options are `"less"`, `"greater"`, or `"two_sided"`. Can +#' also give `"left"`, `"right"`, or `"both"`. If `NULL` then no shading is +#' actually done. +#' @param color A character or hex string specifying the color of the observed +#' statistic as a vertical line on the plot. +#' @param fill A character or hex string specifying the color to shade the +#' p-value region. If `NULL` then no shading is actually done. +#' @param ... Other arguments passed along to \\{ggplot2\\} functions. +#' +#' @return A list of \\{ggplot2\\} objects to be added to the `visualize()` +#' output. +#' +#' @seealso [shade_confidence_interval()] to add information about confidence +#' interval. +#' +#' @examples +#' viz_plot <- mtcars %>% +#' dplyr::mutate(am = factor(am)) %>% +#' specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am +#' hypothesize(null = "independence") %>% +#' generate(reps = 100, type = "permute") %>% +#' calculate(stat = "t", order = c("1", "0")) %>% +#' visualize(method = "both") +#' +#' viz_plot + shade_p_value(1.5, direction = "right") +#' viz_plot + shade_p_value(1.5, direction = "both") +#' viz_plot + shade_p_value(1.5, direction = NULL) +#' +#' @name shade_p_value +NULL + +#' @rdname shade_p_value +#' @export +shade_p_value <- function(obs_stat, direction, + color = "red2", fill = "pink", ...) { + res <- list() + if (is.null(obs_stat)) { + return(res) + } + + # Add shading + if (!is.null(direction) && !is.null(fill)) { if (direction %in% c("less", "left", "greater", "right")) { - infer_plot <- infer_plot + - geom_tail(direction, obs_stat, pvalue_fill, ...) - } - # Assuming two-tailed shading will only happen with theoretical - # distributions centered at 0 - if (direction %in% c("two_sided", "both")) { - infer_plot <- infer_plot + - geom_both_tails(obs_stat, -obs_stat, pvalue_fill, ...) + tail_data <- one_tail_data(obs_stat, direction) + + res <- c(res, list(geom_tail(tail_data, fill, ...))) + } else if (direction %in% c("two_sided", "both")) { + tail_data <- two_tail_data(obs_stat) + + res <- c(res, list(geom_tail(tail_data, fill, ...))) + } else { + warning_glue( + '`direction` should be one of `"less"`, `"left"`, `"greater"`, ", + "`"right"`, `"two_sided"`, `"both"`.' + ) } } - - # To implement: plotting of theoretical confidence interval values - - infer_plot -} - -visualize_both <- function(data, bins, - dens_color, - obs_stat, - obs_stat_color, - direction, - pvalue_fill, - endpoints, - ci_fill, - ...) { - warning_glue( - "Check to make sure the conditions have been met for the theoretical ", - "method. `infer` currently does not check these for you." + + # Add vertical line at `obs_stat` + c( + res, list(geom_vline(xintercept = obs_stat, size = 2, color = color, ...)) ) +} - if (!(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))) { - stop_glue( - "Your `calculate`d statistic and the theoretical distribution are on ", - "different scales. Use a standardized `stat` instead." - ) +#' @rdname shade_p_value +#' @export +shade_pvalue <- shade_p_value + +#' Add information about confidence interval +#' +#' `shade_confidence_interval()` plots confidence interval region on top of the +#' [visualize()] output. It should be used as \\{ggplot2\\} layer function (see +#' examples). `shade_ci()` is its alias. +#' +#' @param endpoints A 2 element vector or a 1 x 2 data frame containing the +#' lower and upper values to be plotted. Most useful for visualizing +#' conference intervals. +#' @param color A character or hex string specifying the color of the +#' end points as a vertical lines on the plot. +#' @param fill A character or hex string specifying the color to shade the +#' confidence interval. If `NULL` then no shading is actually done. +#' @param ... Other arguments passed along to \\{ggplot2\\} functions. +#' @return A list of \\{ggplot2\\} objects to be added to the `visualize()` +#' output. +#' +#' @seealso [shade_p_value()] to add information about p-value region. +#' +#' @examples +#' viz_plot <- mtcars %>% +#' dplyr::mutate(am = factor(am)) %>% +#' specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am +#' hypothesize(null = "independence") %>% +#' generate(reps = 100, type = "permute") %>% +#' calculate(stat = "t", order = c("1", "0")) %>% +#' visualize(method = "both") +#' +#' viz_plot + shade_confidence_interval(c(-1.5, 1.5)) +#' viz_plot + shade_confidence_interval(c(-1.5, 1.5), fill = NULL) +#' +#' @name shade_confidence_interval +NULL + +#' @rdname shade_confidence_interval +#' @export +shade_confidence_interval <- function(endpoints, color = "mediumaquamarine", + fill = "turquoise", ...) { + res <- list() + if (is.null(endpoints)) { + return(res) } - - theory_type <- short_theory_type(data) - if (theory_type == "t") { - infer_plot <- both_plot( - data = data, - d_fun = dt, - args_list = list(df = attr(data, "distr_param")), - stat_name = "t", stat_label = "tstat", - dens_color = dens_color, - bins = bins, - direction = direction, - obs_stat = obs_stat, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill - ) - } else if (theory_type == "F") { - warn_right_tail_test(direction, "F") - - infer_plot <- both_plot( - data = data, - d_fun = df, - args_list = list( - df1 = attr(data, "distr_param"), df2 = attr(data, "distr_param2") - ), - stat_name = "F", stat_label = "Fstat", - dens_color = dens_color, - bins = bins, - direction = direction, - obs_stat = obs_stat, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill - ) - } else if (theory_type == "z") { - infer_plot <- both_plot( - data = data, - d_fun = dnorm, - args_list = list(), - stat_name = "z", stat_label = "zstat", - dens_color = dens_color, - bins = bins, - direction = direction, - obs_stat = obs_stat, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill - ) - } else if (theory_type == "Chi-Square") { - warn_right_tail_test(direction, "Chi-Square") - - infer_plot <- both_plot( - data = data, - d_fun = dchisq, - args_list = list(df = attr(data, "distr_param")), - stat_name = "Chi-Square", stat_label = "chisqstat", - dens_color = dens_color, - bins = bins, - direction = direction, - obs_stat = obs_stat, - pvalue_fill = pvalue_fill, - endpoints = endpoints, - ci_fill = ci_fill + if (!is.null(fill)) { + res <- c( + res, list( + geom_rect( + data = data.frame(endpoints[1]), + fill = fill, alpha = 0.6, + aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf), + inherit.aes = FALSE, + ... + ) + ) ) - } # else { - # stop_glue('"{attr(data, "theory_type")}" is not implemented yet.') - # } - - infer_plot + } + + c( + res, list(geom_vline(xintercept = endpoints, size = 2, color = color, ...)) + ) } +#' @rdname shade_confidence_interval +#' @export +shade_ci <- shade_confidence_interval + get_percentile <- function(vector, observation) { stats::ecdf(vector)(observation) } @@ -542,17 +545,11 @@ warn_right_tail_test <- function(direction, stat_name) { TRUE } -geom_tail <- function(dir, border, fill, ...) { - if (dir %in% c("less", "left")) { - x_range <- c(-Inf, border) - } else if (dir %in% c("greater", "right")) { - x_range <- c(border, Inf) - } - +geom_tail <- function(tail_data, fill, ...) { list( geom_rect( - data = data.frame(border), - aes(xmin = x_range[1], xmax = x_range[2], ymin = 0, ymax = Inf), + data = tail_data, + aes(xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf), fill = fill, alpha = 0.6, inherit.aes = FALSE, ... @@ -560,16 +557,33 @@ geom_tail <- function(dir, border, fill, ...) { ) } -geom_both_tails <- function(border_1, border_2, fill, ...) { - left_border <- min(border_1, border_2) - right_border <- max(border_1, border_2) - - c( - geom_tail("left", left_border, fill, ...), - geom_tail("right", right_border, fill, ...) - ) +one_tail_data <- function(obs_stat, direction) { + if (direction %in% c("less", "left")) { + data.frame(x_min = -Inf, x_max = obs_stat) + } else if (direction %in% c("greater", "right")) { + data.frame(x_min = obs_stat, x_max = Inf) + } } -#' @rdname visualize -#' @export -visualise <- visualize +two_tail_data <- function(obs_stat) { + # Take advantage of {ggplot2} functionality to accept function as `data` + # This is needed to make possible existence of `shade_p_value()` in case of + # `direction = "both"`, as it depends on actual `data` but adding it as + # argument to `shade_p_value()` is very bad. + function(data) { + if (get_viz_method(data) == "theoretical") { + second_border <- -obs_stat + } else { + second_border <- mirror_obs_stat(data$stat, obs_stat) + } + + data.frame( + x_min = c(-Inf, max(obs_stat, second_border)), + x_max = c(min(obs_stat, second_border), Inf) + ) + } +} + +get_viz_method <- function(data) { + attr(data, "viz_method") +} diff --git a/man/shade_confidence_interval.Rd b/man/shade_confidence_interval.Rd new file mode 100644 index 00000000..02377da6 --- /dev/null +++ b/man/shade_confidence_interval.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualize.R +\name{shade_confidence_interval} +\alias{shade_confidence_interval} +\alias{shade_ci} +\title{Add information about confidence interval} +\usage{ +shade_confidence_interval(endpoints, color = "mediumaquamarine", + fill = "turquoise", ...) + +shade_ci(endpoints, color = "mediumaquamarine", fill = "turquoise", + ...) +} +\arguments{ +\item{endpoints}{A 2 element vector or a 1 x 2 data frame containing the +lower and upper values to be plotted. Most useful for visualizing +conference intervals.} + +\item{color}{A character or hex string specifying the color of the +end points as a vertical lines on the plot.} + +\item{fill}{A character or hex string specifying the color to shade the +confidence interval. If \code{NULL} then no shading is actually done.} + +\item{...}{Other arguments passed along to \{ggplot2\} functions.} +} +\value{ +A list of \{ggplot2\} objects to be added to the \code{visualize()} +output. +} +\description{ +\code{shade_confidence_interval()} plots confidence interval region on top of the +\code{\link[=visualize]{visualize()}} output. It should be used as \{ggplot2\} layer function (see +examples). \code{shade_ci()} is its alias. +} +\examples{ +viz_plot <- mtcars \%>\% + dplyr::mutate(am = factor(am)) \%>\% + specify(mpg ~ am) \%>\% # alt: response = mpg, explanatory = am + hypothesize(null = "independence") \%>\% + generate(reps = 100, type = "permute") \%>\% + calculate(stat = "t", order = c("1", "0")) \%>\% + visualize(method = "both") + +viz_plot + shade_confidence_interval(c(-1.5, 1.5)) +viz_plot + shade_confidence_interval(c(-1.5, 1.5), fill = NULL) + +} +\seealso{ +\code{\link[=shade_p_value]{shade_p_value()}} to add information about p-value region. +} diff --git a/man/shade_p_value.Rd b/man/shade_p_value.Rd new file mode 100644 index 00000000..48cffef4 --- /dev/null +++ b/man/shade_p_value.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualize.R +\name{shade_p_value} +\alias{shade_p_value} +\alias{shade_pvalue} +\title{Add information about p-value region(s)} +\usage{ +shade_p_value(obs_stat, direction, color = "red2", fill = "pink", ...) + +shade_pvalue(obs_stat, direction, color = "red2", fill = "pink", ...) +} +\arguments{ +\item{obs_stat}{A numeric value or 1x1 data frame corresponding to what the +observed statistic is.} + +\item{direction}{A string specifying in which direction the shading should +occur. Options are \code{"less"}, \code{"greater"}, or \code{"two_sided"}. Can +also give \code{"left"}, \code{"right"}, or \code{"both"}. If \code{NULL} then no shading is +actually done.} + +\item{color}{A character or hex string specifying the color of the observed +statistic as a vertical line on the plot.} + +\item{fill}{A character or hex string specifying the color to shade the +p-value region. If \code{NULL} then no shading is actually done.} + +\item{...}{Other arguments passed along to \{ggplot2\} functions.} +} +\value{ +A list of \{ggplot2\} objects to be added to the \code{visualize()} +output. +} +\description{ +\code{shade_p_value()} plots p-value region(s) on top of the \code{\link[=visualize]{visualize()}} output. +It should be used as \{ggplot2\} layer function (see examples). +\code{shade_pvalue()} is its alias. +} +\examples{ +viz_plot <- mtcars \%>\% + dplyr::mutate(am = factor(am)) \%>\% + specify(mpg ~ am) \%>\% # alt: response = mpg, explanatory = am + hypothesize(null = "independence") \%>\% + generate(reps = 100, type = "permute") \%>\% + calculate(stat = "t", order = c("1", "0")) \%>\% + visualize(method = "both") + +viz_plot + shade_p_value(1.5, direction = "right") +viz_plot + shade_p_value(1.5, direction = "both") +viz_plot + shade_p_value(1.5, direction = NULL) + +} +\seealso{ +\code{\link[=shade_confidence_interval]{shade_confidence_interval()}} to add information about confidence +interval. +} diff --git a/man/visualize.Rd b/man/visualize.Rd index 314096b3..56697ee4 100755 --- a/man/visualize.Rd +++ b/man/visualize.Rd @@ -28,31 +28,34 @@ visualise(data, bins = 15, method = "simulation", theoretical density curve.} \item{obs_stat}{A numeric value or 1x1 data frame corresponding to what the -observed statistic is.} +observed statistic is. \strong{Deprecated (see Details)}.} \item{obs_stat_color}{A character or hex string specifying the color of the -observed statistic as a vertical line on the plot.} +observed statistic as a vertical line on the plot. \strong{Deprecated (see +Details)}.} \item{pvalue_fill}{A character or hex string specifying the color to shade the p-value. In previous versions of the package this was the \code{shade_color} -argument.} +argument. \strong{Deprecated (see Details)}.} \item{direction}{A string specifying in which direction the shading should occur. Options are \code{"less"}, \code{"greater"}, or \code{"two_sided"} for p-value. Can also give \code{"left"}, \code{"right"}, or \code{"both"} for p-value. For confidence -intervals, use \code{"between"} and give the endpoint values in \code{endpoints}.} +intervals, use \code{"between"} and give the endpoint values in \code{endpoints}. +\strong{Deprecated (see Details)}.} \item{endpoints}{A 2 element vector or a 1 x 2 data frame containing the lower and upper values to be plotted. Most useful for visualizing -conference intervals.} +conference intervals. \strong{Deprecated (see Details)}.} \item{endpoints_color}{A character or hex string specifying the color of the -observed statistic as a vertical line on the plot.} +observed statistic as a vertical line on the plot. \strong{Deprecated (see +Details)}.} \item{ci_fill}{A character or hex string specifying the color to shade the -confidence interval.} +confidence interval. \strong{Deprecated (see Details)}.} -\item{...}{Other arguments passed along to ggplot2.} +\item{...}{Other arguments passed along to \{ggplot2\} functions.} } \value{ A ggplot object showing the simulation-based distribution as a @@ -62,6 +65,15 @@ histogram or bar graph. Also used to show the theoretical curves. Visualize the distribution of the simulation-based inferential statistics or the theoretical distribution (or both!). } +\details{ +In order to make visualization workflow more straightforward and +explicit \code{visualize()} now only should be used to plot statistics directly. +That is why arguments, not related to this task, are deprecated and will be +removed in a future release of \{infer\}. + +To add information related to p-value use \code{\link[=shade_p_value]{shade_p_value()}}. To add +information related to confidence interval use \code{\link[=shade_confidence_interval]{shade_confidence_interval()}}. +} \examples{ # Permutations to create a simulation-based null distribution for # one numerical response and one categorical predictor @@ -95,3 +107,6 @@ mtcars \%>\% visualize(method = "both") } +\seealso{ +\code{\link[=shade_p_value]{shade_p_value()}}, \code{\link[=shade_confidence_interval]{shade_confidence_interval()}}. +} diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 59b46537..bbe039a2 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -42,7 +42,6 @@ obs_F <- anova( aov(formula = Sepal.Width ~ Species, data = iris_tbl) )$`F value`[1] - test_that("visualize basic tests", { expect_silent(visualize(Sepal.Width_resamp)) @@ -51,13 +50,14 @@ test_that("visualize basic tests", { expect_silent(visualise(Sepal.Width_resamp)) expect_error(Sepal.Width_resamp %>% visualize(bins = "yep")) - expect_silent( + expect_warning( iris_tbl %>% specify(Sepal.Length ~ Sepal.Width) %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% calculate(stat = "slope") %>% - visualize(obs_stat = obs_slope, direction = "right") + visualize(obs_stat = obs_slope, direction = "right"), + "deprecated" ) # obs_stat not specified @@ -70,13 +70,14 @@ test_that("visualize basic tests", { visualize(direction = "both") ) - expect_silent( + expect_warning( iris_tbl %>% specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% calculate(stat = "diff in props", order = c(">5", "<=5")) %>% - visualize(direction = "both", obs_stat = obs_diff) + visualize(direction = "both", obs_stat = obs_diff), + "deprecated" ) expect_warning( @@ -239,13 +240,14 @@ test_that("visualize basic tests", { visualize(method = "theoretical") ) - expect_silent( + expect_warning( iris_tbl %>% specify(Petal.Width ~ Sepal.Width.Group) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") %>% calculate(stat = "diff in means", order = c("large", "small")) %>% - visualize(direction = "both",obs_stat = obs_diff_mean) + visualize(direction = "both",obs_stat = obs_diff_mean), + "deprecated" ) # Produces warning first for not checking conditions but would also error @@ -284,13 +286,14 @@ test_that("visualize basic tests", { ) ) - expect_silent( + expect_warning( iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 1.3) %>% generate(reps = 100, type = "bootstrap") %>% calculate(stat = "mean") %>% - visualize(direction = "left", obs_stat = mean(iris$Petal.Width)) + visualize(direction = "left", obs_stat = mean(iris$Petal.Width)), + "deprecated" ) }) @@ -302,13 +305,14 @@ test_that("obs_stat as a data.frame works", { mean_petal_width <- iris_tbl %>% specify(Petal.Width ~ NULL) %>% calculate(stat = "mean") - expect_silent( + expect_warning( iris_tbl %>% specify(Petal.Width ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% calculate(stat = "mean") %>% - visualize(obs_stat = mean_petal_width) + visualize(obs_stat = mean_petal_width), + "deprecated" ) mean_df_test <- data.frame(x = c(4.1, 1), y = c(1, 2)) expect_warning( @@ -396,9 +400,61 @@ test_that("confidence interval plots are working", { expect_warning(iris_boot %>% visualize(endpoints = vec_error)) - expect_silent( - iris_boot %>% visualize(endpoints = perc_ci, direction = "between") + expect_warning( + iris_boot %>% visualize(endpoints = perc_ci, direction = "between"), + "deprecated" ) expect_warning(iris_boot %>% visualize(obs_stat = 3, endpoints = perc_ci)) }) + +iris_permute <- iris_tbl %>% + specify(Sepal.Width.Group ~ Sepal.Length.Group, success = "large") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", order = c(">5", "<=5")) +iris_viz_sim <- iris_permute %>% visualize(method = "simulation") +# Warnings are about checking conditions for the theoretical method. +iris_viz_theor <- suppressWarnings( + iris_permute %>% visualize(method = "theoretical") +) +iris_viz_both <- suppressWarnings( + iris_permute %>% visualize(method = "both") +) + +test_that("shade_p_value works", { + expect_silent_pval <- function(viz_obj) { + for (dir in c("right", "left", "both")) { + expect_silent(viz_obj + shade_p_value(1, dir)) + expect_silent(viz_obj + shade_p_value(NULL, dir)) + } + + expect_silent(viz_obj + shade_p_value(1, NULL)) + + expect_warning(viz_obj + shade_p_value(1, "aaa"), "direction") + } + + expect_silent_pval(iris_viz_sim) + expect_silent_pval(iris_viz_theor) + expect_silent_pval(iris_viz_both) +}) + +test_that("shade_confidence_interval works", { + expect_silent_ci <- function(viz_obj) { + expect_silent(viz_obj + shade_confidence_interval(c(-1, 1))) + expect_silent(viz_obj + shade_confidence_interval(NULL)) + expect_silent(viz_obj + shade_confidence_interval(c(-1, 1), fill = NULL)) + } + + expect_silent_ci(iris_viz_sim) + expect_silent_ci(iris_viz_theor) + expect_silent_ci(iris_viz_both) +}) + +test_that("two_tail_data works", { + attr(iris_permute, "viz_method") <- "both" + expect_equal(colnames(two_tail_data(1)(iris_permute)), c("x_min", "x_max")) + + attr(iris_permute, "viz_method") <- "theoretical" + expect_equal(colnames(two_tail_data(1)(iris_permute)), c("x_min", "x_max")) +}) From c73d3d01f5b88f8e21a9eadf97a14a6f0afa8661 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Fri, 14 Sep 2018 16:42:02 +0300 Subject: [PATCH 54/78] Fix messages for `visualize()` argument deprecation. --- R/visualize.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index 91983036..4156a419 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -193,7 +193,7 @@ warn_depricated_args <- function(obs_stat, endpoints) { warning_glue( "`visualize()` shouldn't be used to plot p-value. Arguments `obs_stat`, ", "`obs_stat_color`, `pvalue_fill`, and `direction` are deprecated. ", - "Use `add_p_value()` instead." + "Use `shade_p_value()` instead." ) } @@ -201,7 +201,7 @@ warn_depricated_args <- function(obs_stat, endpoints) { warning_glue( "`visualize()` shouldn't be used to plot confidence interval. Arguments ", "`endpoints`, `endpoints_color`, and `ci_fill` are deprecated. ", - "Use `add_ci()` instead." + "Use `shade_confidence_interval()` instead." ) } From 6e475d3738744d856b1aaeea8c4c10c35e5a87ea Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Fri, 14 Sep 2018 17:54:34 +0300 Subject: [PATCH 55/78] Update `shade_*()` functions to properly check their arguments. --- DESCRIPTION | 3 ++- R/utils.R | 5 +++++ R/visualize.R | 37 +++++++++++++++++++++++++++++++-- tests/testthat/test-visualize.R | 23 ++++++++++++++++++++ 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c3301693..ad8812c4 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,8 @@ Imports: rlang (>= 0.2.0), ggplot2, magrittr, - glue (>= 1.3.0) + glue (>= 1.3.0), + grDevices Depends: R (>= 3.1.2) Suggests: diff --git a/R/utils.R b/R/utils.R index 9c7d0e96..ee1411e9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -50,6 +50,11 @@ has_response <- function(x) { !is_nuat(x, "response") } +is_color_string <- function(x) { + rlang::is_string(x) && + tryCatch(is.matrix(grDevices::col2rgb(x)), error = function(e) {FALSE}) +} + stop_glue <- function(..., .sep = "", .envir = parent.frame(), call. = FALSE, .domain = NULL) { stop( diff --git a/R/visualize.R b/R/visualize.R index 4156a419..b556aaff 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -98,7 +98,7 @@ visualize <- function(data, bins = 15, method = "simulation", pvalue_fill, direction, endpoints, endpoints_color, ci_fill ) warn_depricated_args(obs_stat, endpoints) - endpoints <- impute_enpoints(endpoints) + endpoints <- impute_endpoints(endpoints) obs_stat <- impute_obs_stat(obs_stat, direction, endpoints) # Add `method` to `data` attributes to enable later possibility of @@ -208,7 +208,7 @@ warn_depricated_args <- function(obs_stat, endpoints) { TRUE } -impute_enpoints <- function(endpoints) { +impute_endpoints <- function(endpoints) { if (is.vector(endpoints) && (length(endpoints) != 2)) { warning_glue( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. ", @@ -216,7 +216,14 @@ impute_enpoints <- function(endpoints) { ) endpoints <- endpoints[1:2] } + if (is.data.frame(endpoints)) { + if ((nrow(endpoints) != 1) || (ncol(endpoints) != 2)) { + stop_glue( + "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." + ) + } + endpoints <- unlist(endpoints) } @@ -411,6 +418,9 @@ NULL #' @export shade_p_value <- function(obs_stat, direction, color = "red2", fill = "pink", ...) { + obs_stat <- check_obs_stat(obs_stat) + check_shade_p_value_args(obs_stat, direction, color, fill) + res <- list() if (is.null(obs_stat)) { return(res) @@ -444,6 +454,19 @@ shade_p_value <- function(obs_stat, direction, #' @export shade_pvalue <- shade_p_value +check_shade_p_value_args <- function(obs_stat, direction, color, fill) { + if (!is.null(obs_stat)) { + check_type(obs_stat, is.numeric) + } + if (!is.null(direction)) { + check_type(direction, is.character) + } + check_type(color, is_color_string, "color string") + check_type(fill, is_color_string, "color string") + + TRUE +} + #' Add information about confidence interval #' #' `shade_confidence_interval()` plots confidence interval region on top of the @@ -482,6 +505,9 @@ NULL #' @export shade_confidence_interval <- function(endpoints, color = "mediumaquamarine", fill = "turquoise", ...) { + endpoints <- impute_endpoints(endpoints) + check_shade_confidence_interval_args(color, fill) + res <- list() if (is.null(endpoints)) { return(res) @@ -510,6 +536,13 @@ shade_confidence_interval <- function(endpoints, color = "mediumaquamarine", #' @export shade_ci <- shade_confidence_interval +check_shade_confidence_interval_args <- function(color, fill) { + check_type(color, is_color_string, "color string") + if (!is.null(fill)) { + check_type(fill, is_color_string, "color string") + } +} + get_percentile <- function(vector, observation) { stats::ecdf(vector)(observation) } diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index bbe039a2..f3a94e48 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -439,6 +439,13 @@ test_that("shade_p_value works", { expect_silent_pval(iris_viz_both) }) +test_that("shade_p_value throws errors", { + expect_error(iris_viz_sim + shade_p_value("a", "right"), "numeric") + expect_error(iris_viz_sim + shade_p_value(1, 1), "character") + expect_error(iris_viz_sim + shade_p_value(1, "right", color = "x"), "color") + expect_error(iris_viz_sim + shade_p_value(1, "right", fill = "x"), "color") +}) + test_that("shade_confidence_interval works", { expect_silent_ci <- function(viz_obj) { expect_silent(viz_obj + shade_confidence_interval(c(-1, 1))) @@ -451,6 +458,22 @@ test_that("shade_confidence_interval works", { expect_silent_ci(iris_viz_both) }) +test_that("shade_confidence_interval throws errors and warnings", { + expect_warning(iris_viz_sim + shade_confidence_interval(c(1, 2, 3)), "2") + expect_error( + iris_viz_sim + shade_confidence_interval(data.frame(x = 1)), + "1 x 2" + ) + expect_error( + iris_viz_sim + shade_confidence_interval(c(-1, 1), color = "x"), + "color" + ) + expect_error( + iris_viz_sim + shade_confidence_interval(c(-1, 1), fill = "x"), + "color" + ) +}) + test_that("two_tail_data works", { attr(iris_permute, "viz_method") <- "both" expect_equal(colnames(two_tail_data(1)(iris_permute)), c("x_min", "x_max")) From 1d74bff028778a3567036f9a950d380b7bbd68a8 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Fri, 14 Sep 2018 19:46:07 +0300 Subject: [PATCH 56/78] Update vignettes to use `shade_*()` visualization functions. --- inst/doc/chisq_test.R | 13 +- inst/doc/chisq_test.Rmd | 13 +- inst/doc/chisq_test.html | 30 +-- inst/doc/flights_examples.html | 40 ++-- inst/doc/observed_stat_examples.R | 125 +++++++++---- inst/doc/observed_stat_examples.Rmd | 125 +++++++++---- inst/doc/observed_stat_examples.html | 263 ++++++++++++++------------- inst/doc/two_sample_t.R | 13 +- inst/doc/two_sample_t.Rmd | 13 +- inst/doc/two_sample_t.html | 22 ++- vignettes/chisq_test.Rmd | 13 +- vignettes/observed_stat_examples.Rmd | 125 +++++++++---- vignettes/two_sample_t.Rmd | 13 +- 13 files changed, 507 insertions(+), 301 deletions(-) diff --git a/inst/doc/chisq_test.R b/inst/doc/chisq_test.R index 7d0faec3..e93599f9 100644 --- a/inst/doc/chisq_test.R +++ b/inst/doc/chisq_test.R @@ -42,7 +42,9 @@ chisq_null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater") +chisq_null_distn %>% + visualize() + + shade_p_value(obs_stat = obs_chisq, direction = "greater") ## ------------------------------------------------------------------------ chisq_null_distn %>% @@ -54,7 +56,8 @@ fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right") + visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ## ----eval=FALSE---------------------------------------------------------- # fli_small %>% @@ -62,12 +65,14 @@ fli_small %>% # hypothesize(null = "independence") %>% # generate(reps = 1000, type = "permute") %>% # calculate(stat = "Chisq") %>% -# visualize(method = "both", obs_stat = obs_chisq, direction = "right") +# visualize(method = "both") + +# shade_p_value(obs_stat = obs_chisq, direction = "right") ## ----echo=FALSE---------------------------------------------------------- # To use same distribution calculated above chisq_null_distn %>% - visualize(method = "both", obs_stat = obs_chisq, direction = "right") + visualize(method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ## ------------------------------------------------------------------------ fli_small %>% diff --git a/inst/doc/chisq_test.Rmd b/inst/doc/chisq_test.Rmd index d56b5201..0f7ea2f0 100644 --- a/inst/doc/chisq_test.Rmd +++ b/inst/doc/chisq_test.Rmd @@ -92,7 +92,9 @@ chisq_null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater") +chisq_null_distn %>% + visualize() + + shade_p_value(obs_stat = obs_chisq, direction = "greater") ``` ## Calculate the randomization-based $p$-value @@ -111,7 +113,8 @@ fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right") + visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Overlay appropriate $\chi^2$ distribution on top of permuted statistics @@ -122,13 +125,15 @@ fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = obs_chisq, direction = "right") + visualize(method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ```{r echo=FALSE} # To use same distribution calculated above chisq_null_distn %>% - visualize(method = "both", obs_stat = obs_chisq, direction = "right") + visualize(method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` diff --git a/inst/doc/chisq_test.html b/inst/doc/chisq_test.html index 25831c30..0f324cff 100644 --- a/inst/doc/chisq_test.html +++ b/inst/doc/chisq_test.html @@ -12,7 +12,7 @@ - + Chi-squared test example using nycflights13 flights data @@ -70,7 +70,7 @@

Chi-squared test example using nycflights13 flights data

Chester Ismay

-

2018-09-03

+

2018-09-14

@@ -126,7 +126,7 @@

Calculate observed statistic

- +
0.5718980.5719
@@ -146,7 +146,7 @@

Calculate observed statistic

-0.571898 +0.5719 @@ -165,7 +165,7 @@

Calculate observed statistic

-0.571898 +0.5719 @@ -179,8 +179,10 @@

Randomization approach to \(\chi^2\)-statis hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater")

-

+chisq_null_distn %>% + visualize() + + shade_p_value(obs_stat = obs_chisq, direction = "greater") +

Calculate the randomization-based \(p\)-value

@@ -208,10 +210,11 @@

Theoretical distribution

hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right")
+ visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. {infer} currently does not check these for you.
-

+

Overlay appropriate \(\chi^2\) distribution on top of permuted statistics

@@ -220,10 +223,11 @@

Overlay appropriate \(\chi^2\) distribution hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = obs_chisq, direction = "right")

+ visualize(method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. `infer` currently does not check these for you.
-

+## theoretical method. {infer} currently does not check these for you. +

Compute theoretical p-value

@@ -231,7 +235,7 @@

Compute theoretical p-value

chisq_test(formula = origin ~ season) %>% dplyr::select(p_value) %>% dplyr::pull()
-
## [1] 0.7513009
+
## [1] 0.7513
diff --git a/inst/doc/flights_examples.html b/inst/doc/flights_examples.html index 55215ca2..70b57a27 100644 --- a/inst/doc/flights_examples.html +++ b/inst/doc/flights_examples.html @@ -124,7 +124,7 @@

One numerical variable (mean)

ggplot(data = null_distn, mapping = aes(x = stat)) + geom_density() + geom_vline(xintercept = x_bar, color = "red") -

+

null_distn %>%
   summarize(p_value = mean(stat >= x_bar) * 2)
@@ -155,7 +155,7 @@

One numerical variable (median)

ggplot(null_distn, aes(x = stat)) + geom_bar() + geom_vline(xintercept = x_tilde, color = "red")
-

+

null_distn %>%
   summarize(p_value = mean(stat <= x_tilde) * 2)
@@ -186,7 +186,7 @@

One categorical (one proportion)

ggplot(null_distn, aes(x = stat)) + geom_bar() + geom_vline(xintercept = p_hat, color = "red")
-

+

null_distn %>%
   summarize(p_value = mean(stat <= p_hat) * 2)
@@ -226,7 +226,7 @@

Two categorical (2 level) variables

ggplot(null_distn, aes(x = stat)) + geom_density() + geom_vline(xintercept = d_hat, color = "red")
-

+

null_distn %>%
   summarize(p_value = mean(stat <= d_hat) * 2) %>%
   pull()
@@ -248,7 +248,7 @@

One categorical (>2 level) - GoF

ggplot(null_distn, aes(x = stat)) + geom_density() + geom_vline(xintercept = pull(Chisq_hat), color = "red") -

+

null_distn %>%
   summarize(p_value = mean(stat >= pull(Chisq_hat))) %>%
   pull()
@@ -266,7 +266,7 @@

Two categorical (>2 level) variables

ggplot(null_distn, aes(x = stat)) + geom_density() + geom_vline(xintercept = pull(Chisq_hat), color = "red") -

+

null_distn %>%
   summarize(p_value = mean(stat >= pull(Chisq_hat))) %>%
   pull()
@@ -289,7 +289,7 @@

One numerical variable, one categorical (2 levels) (diff in means)

ggplot(null_distn, aes(x = stat)) + geom_density() + geom_vline(xintercept = d_hat, color = "red") -

+

null_distn %>%
   summarize(p_value = mean(stat <= d_hat) * 2) %>%
   pull()
@@ -312,7 +312,7 @@

One numerical variable, one categorical (2 levels) (diff in medians)

ggplot(null_distn, aes(x = stat)) + geom_bar() + geom_vline(xintercept = d_hat, color = "red") -

+

null_distn %>%
   summarize(p_value = mean(stat >= d_hat) * 2) %>%
   pull()
@@ -332,7 +332,7 @@

One numerical, one categorical (>2 levels) - ANOVA

ggplot(null_distn, aes(x = stat)) + geom_density() + geom_vline(xintercept = F_hat, color = "red") -

+

null_distn %>% 
   summarize(p_value = mean(stat >= F_hat)) %>%
   pull()
@@ -353,7 +353,7 @@

Two numerical vars - SLR

ggplot(null_distn, aes(x = stat)) + geom_density() + geom_vline(xintercept = slope_hat, color = "red") -

+

null_distn %>% 
   summarize(p_value = mean(stat >= slope_hat) * 2) %>%
   pull()
@@ -373,8 +373,8 @@

One numerical (one mean)

pull() c(lower = x_bar - 2 * sd(boot), upper = x_bar + 2 * sd(boot)) -
##    lower    upper 
-## 1.122209 8.021791
+
## lower upper 
+## 1.122 8.022

One categorical (one proportion)

@@ -388,8 +388,8 @@

One categorical (one proportion)

pull() c(lower = p_hat - 2 * sd(boot), upper = p_hat + 2 * sd(boot))
-
##     lower     upper 
-## 0.4194756 0.5125244
+
##  lower  upper 
+## 0.4195 0.5125

One numerical variable, one categorical (2 levels) (diff in means)

@@ -406,8 +406,8 @@

One numerical variable, one categorical (2 levels) (diff in means)

pull() c(lower = d_hat - 2 * sd(boot), upper = d_hat + 2 * sd(boot))
-
##     lower     upper 
-## -7.704370  6.213971
+
##  lower  upper 
+## -7.704  6.214

Two categorical variables (diff in proportions)

@@ -424,8 +424,8 @@

Two categorical variables (diff in proportions)

pull() c(lower = d_hat - 2 * sd(boot), upper = d_hat + 2 * sd(boot))
-
##       lower       upper 
-## -0.07149487  0.11258550
+
##    lower    upper 
+## -0.07149  0.11259

Two numerical vars - SLR

@@ -441,8 +441,8 @@

Two numerical vars - SLR

pull() c(lower = slope_hat - 2 * sd(boot), upper = slope_hat + 2 * sd(boot))
-
##     lower     upper 
-## 0.9657595 1.0681384
+
##  lower  upper 
+## 0.9658 1.0681
diff --git a/inst/doc/observed_stat_examples.R b/inst/doc/observed_stat_examples.R index 67be6ff5..4beac9aa 100644 --- a/inst/doc/observed_stat_examples.R +++ b/inst/doc/observed_stat_examples.R @@ -35,7 +35,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "mean") null_distn %>% - visualize(obs_stat = x_bar, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = x_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_bar, direction = "two_sided") @@ -51,7 +52,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "t") null_distn %>% - visualize(obs_stat = t_bar, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_bar, direction = "two_sided") @@ -67,7 +69,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "median") null_distn %>% - visualize(obs_stat = x_tilde, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = x_tilde, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_tilde, direction = "two_sided") @@ -83,7 +86,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "prop") null_distn %>% - visualize(obs_stat = p_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = p_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = p_hat, direction = "two_sided") @@ -107,7 +111,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("winter", "summer")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") @@ -123,7 +128,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "z", order = c("winter", "summer")) null_distn %>% - visualize(obs_stat = z_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = z_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = z_hat, direction = "two_sided") @@ -142,7 +148,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "simulate") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") @@ -158,7 +165,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") @@ -174,7 +182,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in means", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") @@ -190,7 +199,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = t_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") @@ -207,7 +217,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in medians", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") @@ -223,7 +234,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "F") null_distn %>% - visualize(obs_stat = F_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = F_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = F_hat, direction = "greater") @@ -239,7 +251,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "slope") null_distn %>% - visualize(obs_stat = slope_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = slope_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = slope_hat, direction = "two_sided") @@ -255,7 +268,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "correlation") null_distn %>% - visualize(obs_stat = correlation_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = correlation_hat, direction = "two_sided") @@ -272,7 +286,8 @@ null_distn %>% # generate(reps = 1000, type = "permute") %>% # calculate(stat = "t") # null_distn %>% -# visualize(obs_stat = t_hat, direction = "two_sided") +# visualize() + +# shade_p_value(obs_stat = t_hat, direction = "two_sided") # null_distn %>% # get_p_value(obs_stat = t_hat, direction = "two_sided") @@ -287,9 +302,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( t_hat <- fli_small %>% @@ -302,9 +321,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( p_hat <- fli_small %>% @@ -317,9 +340,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( d_hat <- fli_small %>% @@ -332,9 +359,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( t_hat <- fli_small %>% @@ -347,9 +378,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( d_hat <- fli_small %>% @@ -362,9 +397,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( z_hat <- fli_small %>% @@ -377,9 +416,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( slope_hat <- fli_small %>% @@ -392,9 +435,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( correlation_hat <- fli_small %>% @@ -407,10 +454,14 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = correlation_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ## ----eval=FALSE, echo=FALSE---------------------------------------------- # # **Point estimate** @@ -424,7 +475,11 @@ boot %>% visualize(endpoints = standard_error_ci, direction = "between") # generate(reps = 1000, type = "bootstrap") %>% # calculate(stat = "t") # ( percentile_ci <- get_ci(boot) ) -# boot %>% visualize(endpoints = percentile_ci, direction = "between") +# boot %>% +# visualize() + +# shade_confidence_interval(endpoints = percentile_ci) # ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -# boot %>% visualize(endpoints = standard_error_ci, direction = "between") +# boot %>% +# visualize() + +# shade_confidence_interval(endpoints = standard_error_ci) diff --git a/inst/doc/observed_stat_examples.Rmd b/inst/doc/observed_stat_examples.Rmd index 3a2f3dc7..30404cda 100644 --- a/inst/doc/observed_stat_examples.Rmd +++ b/inst/doc/observed_stat_examples.Rmd @@ -70,7 +70,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "mean") null_distn %>% - visualize(obs_stat = x_bar, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = x_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_bar, direction = "two_sided") ``` @@ -91,7 +92,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "t") null_distn %>% - visualize(obs_stat = t_bar, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_bar, direction = "two_sided") ``` @@ -114,7 +116,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "median") null_distn %>% - visualize(obs_stat = x_tilde, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = x_tilde, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_tilde, direction = "two_sided") ``` @@ -136,7 +139,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "prop") null_distn %>% - visualize(obs_stat = p_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = p_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = p_hat, direction = "two_sided") ``` @@ -174,7 +178,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("winter", "summer")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -196,7 +201,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "z", order = c("winter", "summer")) null_distn %>% - visualize(obs_stat = z_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = z_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = z_hat, direction = "two_sided") ``` @@ -225,7 +231,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "simulate") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -247,7 +254,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -269,7 +277,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in means", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -291,7 +300,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = t_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -316,7 +326,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in medians", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -338,7 +349,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "F") null_distn %>% - visualize(obs_stat = F_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = F_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = F_hat, direction = "greater") ``` @@ -360,7 +372,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "slope") null_distn %>% - visualize(obs_stat = slope_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = slope_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = slope_hat, direction = "two_sided") ``` @@ -382,7 +395,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "correlation") null_distn %>% - visualize(obs_stat = correlation_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = correlation_hat, direction = "two_sided") ``` @@ -407,7 +421,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t") null_distn %>% - visualize(obs_stat = t_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -431,9 +446,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical (one mean - standardized) @@ -452,9 +471,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -474,9 +497,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One categorical variable (standardized proportion $z$) @@ -499,9 +526,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical variable, one categorical (2 levels) (t) @@ -520,9 +551,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -542,9 +577,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two categorical variables (z) @@ -563,9 +602,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -585,9 +628,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two numerical vars - correlation @@ -606,10 +653,14 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = correlation_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -631,7 +682,11 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` diff --git a/inst/doc/observed_stat_examples.html b/inst/doc/observed_stat_examples.html index 3cba2531..bb6704a4 100644 --- a/inst/doc/observed_stat_examples.html +++ b/inst/doc/observed_stat_examples.html @@ -132,10 +132,12 @@

One numerical variable (mean)

specify(response = dep_delay) %>% hypothesize(null = "point", mu = 10) %>% generate(reps = 1000) %>% - calculate(stat = "mean") -null_distn %>% - visualize(obs_stat = x_bar, direction = "two_sided") -

+ calculate(stat = "mean") +
## Setting `type = "bootstrap"` in `generate()`.
+
null_distn %>% 
+  visualize() +
+    shade_p_value(obs_stat = x_bar, direction = "two_sided")
+

null_distn %>%
   get_p_value(obs_stat = x_bar, direction = "two_sided")
@@ -177,10 +179,12 @@

One numerical variable (standardized mean \(t\) specify(response = dep_delay) %>% hypothesize(null = "point", mu = 8) %>% generate(reps = 1000) %>% - calculate(stat = "t") -null_distn %>% - visualize(obs_stat = t_bar, direction = "two_sided")

-

+ calculate(stat = "t") +
## Setting `type = "bootstrap"` in `generate()`.
+
null_distn %>% 
+  visualize() +
+    shade_p_value(obs_stat = t_bar, direction = "two_sided")
+

null_distn %>%
   get_p_value(obs_stat = t_bar, direction = "two_sided")
@@ -222,10 +226,12 @@

One numerical variable (median)

specify(response = dep_delay) %>% hypothesize(null = "point", med = -1) %>% generate(reps = 1000) %>% - calculate(stat = "median") -null_distn %>% - visualize(obs_stat = x_tilde, direction = "two_sided")
-

+ calculate(stat = "median") +
## Setting `type = "bootstrap"` in `generate()`.
+
null_distn %>% 
+  visualize() +
+    shade_p_value(obs_stat = x_tilde, direction = "two_sided")
+

null_distn %>%
   get_p_value(obs_stat = x_tilde, direction = "two_sided")
@@ -267,10 +273,12 @@

One categorical (one proportion)

specify(response = day_hour, success = "morning") %>% hypothesize(null = "point", p = .5) %>% generate(reps = 1000) %>% - calculate(stat = "prop") -null_distn %>% - visualize(obs_stat = p_hat, direction = "two_sided")
-

+ calculate(stat = "prop") +
## Setting `type = "simulate"` in `generate()`.
+
null_distn %>% 
+  visualize() +
+    shade_p_value(obs_stat = p_hat, direction = "two_sided")
+

null_distn %>%
   get_p_value(obs_stat = p_hat, direction = "two_sided")
@@ -294,6 +302,7 @@

One categorical (one proportion)

hypothesize(null = "point", p = .5) %>% generate(reps = 1000) %>% calculate(stat = "prop")
+
## Setting `type = "simulate"` in `generate()`.

One categorical variable (standardized proportion \(z\))

@@ -323,10 +332,12 @@

Two categorical (2 level) variables

specify(day_hour ~ season, success = "morning") %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% - calculate(stat = "diff in props", order = c("winter", "summer")) -null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided")
-

+ calculate(stat = "diff in props", order = c("winter", "summer")) +
## Setting `type = "permute"` in `generate()`.
+
null_distn %>% 
+  visualize() +
+    shade_p_value(obs_stat = d_hat, direction = "two_sided")
+

null_distn %>%
   get_p_value(obs_stat = d_hat, direction = "two_sided")
@@ -368,10 +379,12 @@

Two categorical (2 level) variables (z)

specify(day_hour ~ season, success = "morning") %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% - calculate(stat = "z", order = c("winter", "summer")) -null_distn %>% - visualize(obs_stat = z_hat, direction = "two_sided")
-

+ calculate(stat = "z", order = c("winter", "summer")) +
## Setting `type = "permute"` in `generate()`.
+
null_distn %>% 
+  visualize() +
+    shade_p_value(obs_stat = z_hat, direction = "two_sided")
+

null_distn %>%
   get_p_value(obs_stat = z_hat, direction = "two_sided")
@@ -420,8 +433,9 @@

One categorical (>2 level) - GoF

generate(reps = 1000, type = "simulate") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater")
-

+ visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") +

null_distn %>%
   get_p_value(obs_stat = Chisq_hat, direction = "greater")
@@ -465,8 +479,9 @@

Two categorical (>2 level) variables

generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater")
-

+ visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") +

null_distn %>%
   get_p_value(obs_stat = Chisq_hat, direction = "greater")
@@ -510,8 +525,9 @@

One numerical variable, one categorical (2 levels) (diff in means)

generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in means", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided")
-

+ visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") +

null_distn %>%
   get_p_value(obs_stat = d_hat, direction = "two_sided")
@@ -555,8 +571,9 @@

One numerical variable, one categorical (2 levels) (t)

generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = t_hat, direction = "two_sided")
-

+ visualize() + + shade_p_value(obs_stat = t_hat, direction = "two_sided") +

null_distn %>%
   get_p_value(obs_stat = t_hat, direction = "two_sided")
@@ -602,8 +619,9 @@

One numerical variable, one categorical (2 levels) (diff in medians)

generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in medians", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided")
-

+ visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") +

null_distn %>%
   get_p_value(obs_stat = d_hat, direction = "two_sided")
@@ -647,8 +665,9 @@

One numerical, one categorical (>2 levels) - ANOVA

generate(reps = 1000, type = "permute") %>% calculate(stat = "F") null_distn %>% - visualize(obs_stat = F_hat, direction = "greater")
-

+ visualize() + + shade_p_value(obs_stat = F_hat, direction = "greater") +

null_distn %>%
   get_p_value(obs_stat = F_hat, direction = "greater")
@@ -692,8 +711,9 @@

Two numerical vars - SLR

generate(reps = 1000, type = "permute") %>% calculate(stat = "slope") null_distn %>% - visualize(obs_stat = slope_hat, direction = "two_sided")
-

+ visualize() + + shade_p_value(obs_stat = slope_hat, direction = "two_sided") +

null_distn %>%
   get_p_value(obs_stat = slope_hat, direction = "two_sided")
@@ -737,8 +757,9 @@

Two numerical vars - correlation

generate(reps = 1000, type = "permute") %>% calculate(stat = "correlation") null_distn %>% - visualize(obs_stat = correlation_hat, direction = "two_sided")
-

+ visualize() + + shade_p_value(obs_stat = correlation_hat, direction = "two_sided") +

null_distn %>%
   get_p_value(obs_stat = correlation_hat, direction = "two_sided")
@@ -787,9 +808,6 @@

One numerical (one mean)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -806,12 +824,11 @@

One numerical (one mean)

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -828,8 +845,10 @@

One numerical (one mean)

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

One numerical (one mean - standardized)

@@ -856,9 +875,6 @@

One numerical (one mean - standardized)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -875,12 +891,11 @@

One numerical (one mean - standardized)

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -897,8 +912,10 @@

One numerical (one mean - standardized)

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

One categorical (one proportion)

@@ -925,9 +942,6 @@

One categorical (one proportion)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -944,12 +958,11 @@

One categorical (one proportion)

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -966,8 +979,10 @@

One categorical (one proportion)

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

One categorical variable (standardized proportion \(z\))

@@ -998,9 +1013,6 @@

One numerical variable, one categorical (2 levels) (diff in means)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1017,12 +1029,11 @@

One numerical variable, one categorical (2 levels) (diff in means)

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1039,8 +1050,10 @@

One numerical variable, one categorical (2 levels) (diff in means)

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

One numerical variable, one categorical (2 levels) (t)

@@ -1067,9 +1080,6 @@

One numerical variable, one categorical (2 levels) (t)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1086,12 +1096,11 @@

One numerical variable, one categorical (2 levels) (t)

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1108,8 +1117,10 @@

One numerical variable, one categorical (2 levels) (t)

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

Two categorical variables (diff in proportions)

@@ -1136,9 +1147,6 @@

Two categorical variables (diff in proportions)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1155,12 +1163,11 @@

Two categorical variables (diff in proportions)

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1177,8 +1184,10 @@

Two categorical variables (diff in proportions)

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

Two categorical variables (z)

@@ -1205,9 +1214,6 @@

Two categorical variables (z)

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1224,12 +1230,11 @@

Two categorical variables (z)

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1246,8 +1251,10 @@

Two categorical variables (z)

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

Two numerical vars - SLR

@@ -1274,9 +1281,6 @@

Two numerical vars - SLR

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1293,12 +1297,11 @@

Two numerical vars - SLR

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1315,8 +1318,10 @@

Two numerical vars - SLR

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between") 
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

Two numerical vars - correlation

@@ -1343,9 +1348,6 @@

Two numerical vars - correlation

generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1362,13 +1364,12 @@

Two numerical vars - correlation

-
boot %>% visualize(endpoints = percentile_ci, direction = "between")
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = percentile_ci)
+

( standard_error_ci <- get_ci(boot, type = "se", 
                             point_estimate = correlation_hat) )
-
## Warning: 'get_ci' is deprecated.
-## Use 'get_confidence_interval' instead.
-## See help("Deprecated")
@@ -1385,8 +1386,10 @@

Two numerical vars - correlation

-
boot %>% visualize(endpoints = standard_error_ci, direction = "between")  
-

+
boot %>%
+  visualize() +
+    shade_confidence_interval(endpoints = standard_error_ci)
+

Two numerical vars - t

diff --git a/inst/doc/two_sample_t.R b/inst/doc/two_sample_t.R index 80ce97a0..69ce6fd4 100644 --- a/inst/doc/two_sample_t.R +++ b/inst/doc/two_sample_t.R @@ -43,7 +43,9 @@ t_null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) -t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided") +t_null_distn %>% + visualize() + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ## ------------------------------------------------------------------------ t_null_distn %>% @@ -56,7 +58,8 @@ fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "theoretical", obs_stat = obs_t, direction = "two_sided") + visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ## ----eval=FALSE---------------------------------------------------------- # fli_small %>% @@ -65,12 +68,14 @@ fli_small %>% # hypothesize(null = "independence") %>% # generate(reps = 1000, type = "permute") %>% # calculate(stat = "t", order = c("h1", "h2")) %>% -# visualize(method = "both", obs_stat = obs_t, direction = "two_sided") +# visualize(method = "both") + +# shade_p_value(obs_stat = obs_t, direction = "two_sided") ## ----echo=FALSE---------------------------------------------------------- # To use same distribution calculated above t_null_distn %>% - visualize(method = "both", obs_stat = obs_t, direction = "two_sided") + visualize(method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ## ------------------------------------------------------------------------ fli_small %>% diff --git a/inst/doc/two_sample_t.Rmd b/inst/doc/two_sample_t.Rmd index 9fbe54ab..c77980bb 100755 --- a/inst/doc/two_sample_t.Rmd +++ b/inst/doc/two_sample_t.Rmd @@ -93,7 +93,9 @@ t_null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) -t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided") +t_null_distn %>% + visualize() + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Calculate the randomization-based $p$-value @@ -113,7 +115,8 @@ fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "theoretical", obs_stat = obs_t, direction = "two_sided") + visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Overlay appropriate $t$ distribution on top of permuted t-statistics @@ -125,13 +128,15 @@ fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "both", obs_stat = obs_t, direction = "two_sided") + visualize(method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ```{r echo=FALSE} # To use same distribution calculated above t_null_distn %>% - visualize(method = "both", obs_stat = obs_t, direction = "two_sided") + visualize(method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` diff --git a/inst/doc/two_sample_t.html b/inst/doc/two_sample_t.html index 7a8ecc8c..94ebc223 100644 --- a/inst/doc/two_sample_t.html +++ b/inst/doc/two_sample_t.html @@ -12,7 +12,7 @@ - + Two sample t test example using nycflights13 flights data @@ -70,7 +70,7 @@

Two sample \(t\) test example using nycflights13 flights data

Chester Ismay

-

2018-09-03

+

2018-09-14

@@ -167,8 +167,10 @@

Randomization approach to t-statistic

generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2"))
## Warning: Removed 15 rows containing missing values.
-
t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided")
-

+
t_null_distn %>%
+  visualize() +
+    shade_p_value(obs_stat = obs_t, direction = "two_sided")
+

Calculate the randomization-based \(p\)-value

@@ -197,11 +199,12 @@

Theoretical distribution

hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "theoretical", obs_stat = obs_t, direction = "two_sided")
+ visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_t, direction = "two_sided")
## Warning: Removed 15 rows containing missing values.
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. {infer} currently does not check these for you.
-

+

Overlay appropriate \(t\) distribution on top of permuted t-statistics

@@ -211,10 +214,11 @@

Overlay appropriate \(t\) distribution on t hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "both", obs_stat = obs_t, direction = "two_sided")

+ visualize(method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided")
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. `infer` currently does not check these for you.
-

+## theoretical method. {infer} currently does not check these for you. +

Compute theoretical p-value

diff --git a/vignettes/chisq_test.Rmd b/vignettes/chisq_test.Rmd index d56b5201..0f7ea2f0 100644 --- a/vignettes/chisq_test.Rmd +++ b/vignettes/chisq_test.Rmd @@ -92,7 +92,9 @@ chisq_null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater") +chisq_null_distn %>% + visualize() + + shade_p_value(obs_stat = obs_chisq, direction = "greater") ``` ## Calculate the randomization-based $p$-value @@ -111,7 +113,8 @@ fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") %>% - visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right") + visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Overlay appropriate $\chi^2$ distribution on top of permuted statistics @@ -122,13 +125,15 @@ fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") %>% - visualize(method = "both", obs_stat = obs_chisq, direction = "right") + visualize(method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ```{r echo=FALSE} # To use same distribution calculated above chisq_null_distn %>% - visualize(method = "both", obs_stat = obs_chisq, direction = "right") + visualize(method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd index 3a2f3dc7..30404cda 100644 --- a/vignettes/observed_stat_examples.Rmd +++ b/vignettes/observed_stat_examples.Rmd @@ -70,7 +70,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "mean") null_distn %>% - visualize(obs_stat = x_bar, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = x_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_bar, direction = "two_sided") ``` @@ -91,7 +92,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "t") null_distn %>% - visualize(obs_stat = t_bar, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_bar, direction = "two_sided") ``` @@ -114,7 +116,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "median") null_distn %>% - visualize(obs_stat = x_tilde, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = x_tilde, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_tilde, direction = "two_sided") ``` @@ -136,7 +139,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "prop") null_distn %>% - visualize(obs_stat = p_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = p_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = p_hat, direction = "two_sided") ``` @@ -174,7 +178,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("winter", "summer")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -196,7 +201,8 @@ null_distn <- fli_small %>% generate(reps = 1000) %>% calculate(stat = "z", order = c("winter", "summer")) null_distn %>% - visualize(obs_stat = z_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = z_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = z_hat, direction = "two_sided") ``` @@ -225,7 +231,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "simulate") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -247,7 +254,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") null_distn %>% - visualize(obs_stat = Chisq_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -269,7 +277,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in means", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -291,7 +300,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = t_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -316,7 +326,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in medians", order = c("summer", "winter")) null_distn %>% - visualize(obs_stat = d_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -338,7 +349,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "F") null_distn %>% - visualize(obs_stat = F_hat, direction = "greater") + visualize() + + shade_p_value(obs_stat = F_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = F_hat, direction = "greater") ``` @@ -360,7 +372,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "slope") null_distn %>% - visualize(obs_stat = slope_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = slope_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = slope_hat, direction = "two_sided") ``` @@ -382,7 +395,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "correlation") null_distn %>% - visualize(obs_stat = correlation_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = correlation_hat, direction = "two_sided") ``` @@ -407,7 +421,8 @@ null_distn <- fli_small %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t") null_distn %>% - visualize(obs_stat = t_hat, direction = "two_sided") + visualize() + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -431,9 +446,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical (one mean - standardized) @@ -452,9 +471,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -474,9 +497,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One categorical variable (standardized proportion $z$) @@ -499,9 +526,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical variable, one categorical (2 levels) (t) @@ -520,9 +551,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -542,9 +577,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two categorical variables (z) @@ -563,9 +602,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -585,9 +628,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two numerical vars - correlation @@ -606,10 +653,14 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = correlation_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -631,7 +682,11 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% visualize(endpoints = percentile_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% visualize(endpoints = standard_error_ci, direction = "between") +boot %>% + visualize() + + shade_confidence_interval(endpoints = standard_error_ci) ``` diff --git a/vignettes/two_sample_t.Rmd b/vignettes/two_sample_t.Rmd index 9fbe54ab..c77980bb 100755 --- a/vignettes/two_sample_t.Rmd +++ b/vignettes/two_sample_t.Rmd @@ -93,7 +93,9 @@ t_null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) -t_null_distn %>% visualize(obs_stat = obs_t, direction = "two_sided") +t_null_distn %>% + visualize() + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Calculate the randomization-based $p$-value @@ -113,7 +115,8 @@ fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "theoretical", obs_stat = obs_t, direction = "two_sided") + visualize(method = "theoretical") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Overlay appropriate $t$ distribution on top of permuted t-statistics @@ -125,13 +128,15 @@ fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "both", obs_stat = obs_t, direction = "two_sided") + visualize(method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ```{r echo=FALSE} # To use same distribution calculated above t_null_distn %>% - visualize(method = "both", obs_stat = obs_t, direction = "two_sided") + visualize(method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` From 861f85f39e271e9d02ccce1889d4a94f441a6346 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 16 Sep 2018 10:00:57 +0300 Subject: [PATCH 57/78] Move warning about right-tail tests into `shade_p_value()`. --- R/visualize.R | 31 ++++++++++++++++++------------- tests/testthat/test-visualize.R | 28 ++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index b556aaff..4034c49c 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -180,11 +180,6 @@ check_visualize_args <- function(data, bins, method, dens_color, ) } - theory_type <- short_theory_type(data) - if (theory_type %in% c("F", "Chi-Square")) { - warn_right_tail_test(direction, theory_type) - } - TRUE } @@ -433,7 +428,7 @@ shade_p_value <- function(obs_stat, direction, res <- c(res, list(geom_tail(tail_data, fill, ...))) } else if (direction %in% c("two_sided", "both")) { - tail_data <- two_tail_data(obs_stat) + tail_data <- two_tail_data(obs_stat, direction) res <- c(res, list(geom_tail(tail_data, fill, ...))) } else { @@ -568,7 +563,8 @@ short_theory_type <- function(x) { } warn_right_tail_test <- function(direction, stat_name) { - if (!is.null(direction) && !(direction %in% c("greater", "right"))) { + if (!is.null(direction) && !(direction %in% c("greater", "right")) && + (stat_name %in% c("F", "Chi-Square"))) { warning_glue( "{stat_name} usually corresponds to right-tailed tests. ", "Proceed with caution." @@ -591,19 +587,28 @@ geom_tail <- function(tail_data, fill, ...) { } one_tail_data <- function(obs_stat, direction) { - if (direction %in% c("less", "left")) { - data.frame(x_min = -Inf, x_max = obs_stat) - } else if (direction %in% c("greater", "right")) { - data.frame(x_min = obs_stat, x_max = Inf) + # Take advantage of {ggplot2} functionality to accept function as `data`. + # Needed to warn about incorrect usage of right tail tests. + function(data) { + warn_right_tail_test(direction, short_theory_type(data)) + + if (direction %in% c("less", "left")) { + data.frame(x_min = -Inf, x_max = obs_stat) + } else if (direction %in% c("greater", "right")) { + data.frame(x_min = obs_stat, x_max = Inf) + } } } -two_tail_data <- function(obs_stat) { - # Take advantage of {ggplot2} functionality to accept function as `data` +two_tail_data <- function(obs_stat, direction) { + # Take advantage of {ggplot2} functionality to accept function as `data`. # This is needed to make possible existence of `shade_p_value()` in case of # `direction = "both"`, as it depends on actual `data` but adding it as # argument to `shade_p_value()` is very bad. + # Also needed to warn about incorrect usage of right tail tests. function(data) { + warn_right_tail_test(direction, short_theory_type(data)) + if (get_viz_method(data) == "theoretical") { second_border <- -obs_stat } else { diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index f3a94e48..53222d9f 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -474,10 +474,34 @@ test_that("shade_confidence_interval throws errors and warnings", { ) }) +test_that("warn_right_tail_test works", { + expect_warn_right_tail <- function(stat_name) { + warn_regex <- paste0(stat_name, ".*right-tailed") + + expect_silent(warn_right_tail_test(NULL, stat_name)) + expect_silent(warn_right_tail_test("right", stat_name)) + expect_warning(warn_right_tail_test("left", stat_name), warn_regex) + expect_warning(warn_right_tail_test("two_sided", stat_name), warn_regex) + } + + expect_warn_right_tail("F") + expect_warn_right_tail("Chi-Square") +}) + +test_that("one_tail_data works", { + fun_output_left <- one_tail_data(1, "left") + expect_equal(colnames(fun_output_left(iris_permute)), c("x_min", "x_max")) + + fun_output_right <- one_tail_data(1, "right") + expect_equal(colnames(fun_output_right(iris_permute)), c("x_min", "x_max")) +}) + test_that("two_tail_data works", { + fun_output <- two_tail_data(1, "two_sided") + attr(iris_permute, "viz_method") <- "both" - expect_equal(colnames(two_tail_data(1)(iris_permute)), c("x_min", "x_max")) + expect_equal(colnames(fun_output(iris_permute)), c("x_min", "x_max")) attr(iris_permute, "viz_method") <- "theoretical" - expect_equal(colnames(two_tail_data(1)(iris_permute)), c("x_min", "x_max")) + expect_equal(colnames(fun_output(iris_permute)), c("x_min", "x_max")) }) From c65c52bb8d5b13b94f05e2e6847d403ebf1ceba6 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 16 Sep 2018 10:15:12 +0300 Subject: [PATCH 58/78] Update 'NEWS.md'. --- NEWS.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index d3915e1f..c101e33e 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,18 @@ # infer 0.3.1.9000 +## Deprecation changes + +- Deprecated `p_value()` (use `get_p_value()` instead) (#180). +- Deprecated `conf_int()` (use `get_confidence_interval()` instead) (#180). +- Deprecated (via warnings) plotting p-value and confidence interval in `visualize()` (use new functions `shade_p_value()` and `shade_confidence_interval()` instead) (#178). + +## New functions + +- `shade_p_value()` - {ggplot2}-like layer function to add information about p-value region to `visualize()` output. Has alias `shade_pvalue()`. +- `shade_confidence_interval()` - {ggplot2}-like layer function to add information about confidence interval region to `visualize()` output. Has alias `shade_ci()`. + +## Other + - Account for `NULL` value in left hand side of formula in `specify()` (#156) and `type` in `generate()` (#157). - Update documentation code to follow tidyverse style guide (#159). - Remove help page for internal `set_params()` (#165). @@ -9,8 +22,6 @@ - Make transparancy in `visualize()` to not depend on method and data volume. - Make `visualize()` work for "One sample t" theoretical type with `method = "both"`. - Add `stat = "sum"` and `stat = "count"` options to `calculate()` (#50). -- Deprecated `p_value()` (use `get_p_value()` instead) (#180). -- Deprecated `cont_int()` (use `get_confidence_interval()` instead) (#180). # infer 0.3.1 From d90eb1b9c30f245bc7dea32c076801d045664ab3 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Sun, 16 Sep 2018 10:34:01 +0300 Subject: [PATCH 59/78] Use `shade_confidence_interval()` in `visualize()` instead of `shade_ci()`. --- R/visualize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualize.R b/R/visualize.R index 4034c49c..119cce3b 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -116,7 +116,7 @@ visualize <- function(data, bins = 15, method = "simulation", if (!is.null(direction) && (direction == "between")) { infer_plot <- infer_plot + - shade_ci(endpoints, endpoints_color, ci_fill, ...) + shade_confidence_interval(endpoints, endpoints_color, ci_fill, ...) } infer_plot From 005af2bdf1528638a9eb775e6f0c5a39c371bf97 Mon Sep 17 00:00:00 2001 From: "Albert Y. Kim" Date: Sun, 16 Sep 2018 10:36:43 -0400 Subject: [PATCH 60/78] Added middle initial to "Albert Y. Kim" thanks --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c3301693..c0a8128b 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Authors@R: c( person("Ted", "Laderas", email = "tedladeras@gmail.com", role = "ctb"), person("Nick", "Solomon", email = "nick.solomon@datacamp.com", role = "ctb"), person("Johanna", "Hardin", email = "Jo.Hardin@pomona.edu", role = "ctb"), - person("Albert", "Kim", email = "albert.ys.kim@gmail.com", role = "ctb"), + person("Albert Y.", "Kim", email = "albert.ys.kim@gmail.com", role = "ctb"), person("Neal", "Fultz", email = "nfultz@gmail.com", role = "ctb"), person("Doug", "Friedman", email = "doug.nhp@gmail.com", role = "ctb"), person("Richie", "Cotton", email = "richie@datacamp.com", role = "ctb"), From bf3e403451a0931ce238682d34b99a786381a88e Mon Sep 17 00:00:00 2001 From: alex hayes Date: Mon, 17 Sep 2018 16:19:26 -0500 Subject: [PATCH 61/78] Use pull() only (without select()) in vignettes --- vignettes/chisq_test.Rmd | 3 +-- vignettes/flights_examples.Rmd | 6 ++---- vignettes/two_sample_t.Rmd | 6 ++---- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/vignettes/chisq_test.Rmd b/vignettes/chisq_test.Rmd index f2dd959a..636b553a 100644 --- a/vignettes/chisq_test.Rmd +++ b/vignettes/chisq_test.Rmd @@ -137,7 +137,6 @@ chisq_null_distn %>% ```{r} fli_small %>% chisq_test(formula = origin ~ season) %>% - dplyr::select(p_value) %>% - dplyr::pull() + dplyr::pull(p_value) ``` diff --git a/vignettes/flights_examples.Rmd b/vignettes/flights_examples.Rmd index 07f3f82c..059d2da2 100644 --- a/vignettes/flights_examples.Rmd +++ b/vignettes/flights_examples.Rmd @@ -259,8 +259,7 @@ null_distn %>% slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% broom::tidy() %>% filter(term == "dep_delay") %>% - select(estimate) %>% - pull() + pull(estimate) null_distn <- fli_small %>% specify(arr_delay ~ dep_delay) %>% hypothesize(null = "independence") %>% @@ -348,8 +347,7 @@ c(lower = d_hat - 2 * sd(boot), slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% broom::tidy() %>% filter(term == "dep_delay") %>% - select(estimate) %>% - pull() + pull(estimate) boot <- fli_small %>% specify(arr_delay ~ dep_delay) %>% generate(reps = 1000, type = "bootstrap") %>% diff --git a/vignettes/two_sample_t.Rmd b/vignettes/two_sample_t.Rmd index 9a2295f4..67d2a1a4 100755 --- a/vignettes/two_sample_t.Rmd +++ b/vignettes/two_sample_t.Rmd @@ -69,8 +69,7 @@ Or using `t_test` in `infer` obs_t <- fli_small %>% t_test(formula = arr_delay ~ half_year, alternative = "two_sided", order = c("h1", "h2")) %>% - dplyr::select(statistic) %>% - dplyr::pull() + dplyr::pull(statistic) ``` The observed $t$ statistic is `r obs_t`. @@ -142,7 +141,6 @@ fli_small %>% t_test(formula = arr_delay ~ half_year, alternative = "two_sided", order = c("h1", "h2")) %>% - dplyr::select(p_value) %>% - dplyr::pull() + dplyr::pull(p_value) ``` From d0780dcad63a066c8839c59117bdc2c8062497fa Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Mon, 24 Sep 2018 21:07:24 +0300 Subject: [PATCH 62/78] Separate long pipelines ending with plot into two: build data and visualize. --- vignettes/chisq_test.Rmd | 27 ++++++++------------------- vignettes/two_sample_t.Rmd | 28 ++++++++-------------------- 2 files changed, 16 insertions(+), 39 deletions(-) diff --git a/vignettes/chisq_test.Rmd b/vignettes/chisq_test.Rmd index 0454873b..1cb2f535 100644 --- a/vignettes/chisq_test.Rmd +++ b/vignettes/chisq_test.Rmd @@ -87,12 +87,12 @@ Lastly, the observed $\chi^2$ statistic is `r obs_chisq`. ## Randomization approach to $\chi^2$-statistic ```{r} -chisq_null_distn <- fli_small %>% +chisq_null_perm <- fli_small %>% specify(origin ~ season) %>% # alt: response = origin, explanatory = season hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_distn %>% +chisq_null_perm %>% visualize() + shade_p_value(obs_stat = obs_chisq, direction = "greater") ``` @@ -100,7 +100,7 @@ chisq_null_distn %>% ## Calculate the randomization-based $p$-value ```{r} -chisq_null_distn %>% +chisq_null_perm %>% get_p_value(obs_stat = obs_chisq, direction = "greater") ``` @@ -108,35 +108,24 @@ chisq_null_distn %>% ## Theoretical distribution ```{r } -fli_small %>% +chisq_null_theor <- fli_small %>% specify(origin ~ season) %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical - calculate(stat = "Chisq") %>% + calculate(stat = "Chisq") +chisq_null_theor %>% visualize(method = "theoretical") + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Overlay appropriate $\chi^2$ distribution on top of permuted statistics -```{r eval=FALSE} -fli_small %>% - specify(origin ~ season) %>% # alt: response = origin, explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_chisq, direction = "right") -``` - -```{r echo=FALSE} -# To use same distribution calculated above -chisq_null_distn %>% +```{r} +chisq_null_perm %>% visualize(method = "both") + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` - ## Compute theoretical p-value ```{r} diff --git a/vignettes/two_sample_t.Rmd b/vignettes/two_sample_t.Rmd index 3916a88c..a4ef6534 100755 --- a/vignettes/two_sample_t.Rmd +++ b/vignettes/two_sample_t.Rmd @@ -86,13 +86,13 @@ The observed $t$ statistic is `r obs_t`. ## Randomization approach to t-statistic ```{r} -t_null_distn <- fli_small %>% +t_null_perm <- fli_small %>% # alt: response = arr_delay, explanatory = half_year specify(arr_delay ~ half_year) %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) -t_null_distn %>% +t_null_perm %>% visualize() + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` @@ -100,7 +100,7 @@ t_null_distn %>% ## Calculate the randomization-based $p$-value ```{r} -t_null_distn %>% +t_null_perm %>% get_p_value(obs_stat = obs_t, direction = "two_sided") ``` @@ -108,37 +108,25 @@ t_null_distn %>% ## Theoretical distribution ```{r } -fli_small %>% +t_null_theor <- fli_small %>% # alt: response = arr_delay, explanatory = half_year specify(arr_delay ~ half_year) %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical - calculate(stat = "t", order = c("h1", "h2")) %>% + calculate(stat = "t", order = c("h1", "h2")) +t_null_theor %>% visualize(method = "theoretical") + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Overlay appropriate $t$ distribution on top of permuted t-statistics -```{r eval=FALSE} -fli_small %>% - # alt: response = arr_delay, explanatory = half_year - specify(arr_delay ~ half_year) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "t", order = c("h1", "h2")) %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") -``` - -```{r echo=FALSE} -# To use same distribution calculated above -t_null_distn %>% +```{r} +t_null_perm %>% visualize(method = "both") + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` - ## Compute theoretical p-value ```{r} From cf00041adc7167da800e5c4f03ec344d10753fa8 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Mon, 24 Sep 2018 21:07:48 +0300 Subject: [PATCH 63/78] Rebuild vignettes. --- inst/doc/chisq_test.R | 28 +++++++++------------------- inst/doc/chisq_test.Rmd | 30 +++++++++--------------------- inst/doc/chisq_test.html | 24 ++++++++++-------------- inst/doc/flights_examples.R | 6 ++---- inst/doc/flights_examples.Rmd | 6 ++---- inst/doc/flights_examples.html | 6 ++---- inst/doc/two_sample_t.R | 32 ++++++++++---------------------- inst/doc/two_sample_t.Rmd | 34 ++++++++++------------------------ inst/doc/two_sample_t.html | 30 ++++++++++++------------------ 9 files changed, 66 insertions(+), 130 deletions(-) diff --git a/inst/doc/chisq_test.R b/inst/doc/chisq_test.R index e93599f9..8f338c9f 100644 --- a/inst/doc/chisq_test.R +++ b/inst/doc/chisq_test.R @@ -37,46 +37,36 @@ obs_chisq <- fli_small %>% chisq_stat(formula = origin ~ season) ## ------------------------------------------------------------------------ -chisq_null_distn <- fli_small %>% +chisq_null_perm <- fli_small %>% specify(origin ~ season) %>% # alt: response = origin, explanatory = season hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_distn %>% +chisq_null_perm %>% visualize() + shade_p_value(obs_stat = obs_chisq, direction = "greater") ## ------------------------------------------------------------------------ -chisq_null_distn %>% +chisq_null_perm %>% get_p_value(obs_stat = obs_chisq, direction = "greater") ## ------------------------------------------------------------------------ -fli_small %>% +chisq_null_theor <- fli_small %>% specify(origin ~ season) %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical - calculate(stat = "Chisq") %>% + calculate(stat = "Chisq") +chisq_null_theor %>% visualize(method = "theoretical") + shade_p_value(obs_stat = obs_chisq, direction = "right") -## ----eval=FALSE---------------------------------------------------------- -# fli_small %>% -# specify(origin ~ season) %>% # alt: response = origin, explanatory = season -# hypothesize(null = "independence") %>% -# generate(reps = 1000, type = "permute") %>% -# calculate(stat = "Chisq") %>% -# visualize(method = "both") + -# shade_p_value(obs_stat = obs_chisq, direction = "right") - -## ----echo=FALSE---------------------------------------------------------- -# To use same distribution calculated above -chisq_null_distn %>% +## ------------------------------------------------------------------------ +chisq_null_perm %>% visualize(method = "both") + shade_p_value(obs_stat = obs_chisq, direction = "right") ## ------------------------------------------------------------------------ fli_small %>% chisq_test(formula = origin ~ season) %>% - dplyr::select(p_value) %>% - dplyr::pull() + dplyr::pull(p_value) diff --git a/inst/doc/chisq_test.Rmd b/inst/doc/chisq_test.Rmd index 0f7ea2f0..1cb2f535 100644 --- a/inst/doc/chisq_test.Rmd +++ b/inst/doc/chisq_test.Rmd @@ -87,12 +87,12 @@ Lastly, the observed $\chi^2$ statistic is `r obs_chisq`. ## Randomization approach to $\chi^2$-statistic ```{r} -chisq_null_distn <- fli_small %>% +chisq_null_perm <- fli_small %>% specify(origin ~ season) %>% # alt: response = origin, explanatory = season hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_distn %>% +chisq_null_perm %>% visualize() + shade_p_value(obs_stat = obs_chisq, direction = "greater") ``` @@ -100,7 +100,7 @@ chisq_null_distn %>% ## Calculate the randomization-based $p$-value ```{r} -chisq_null_distn %>% +chisq_null_perm %>% get_p_value(obs_stat = obs_chisq, direction = "greater") ``` @@ -108,41 +108,29 @@ chisq_null_distn %>% ## Theoretical distribution ```{r } -fli_small %>% +chisq_null_theor <- fli_small %>% specify(origin ~ season) %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical - calculate(stat = "Chisq") %>% + calculate(stat = "Chisq") +chisq_null_theor %>% visualize(method = "theoretical") + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Overlay appropriate $\chi^2$ distribution on top of permuted statistics -```{r eval=FALSE} -fli_small %>% - specify(origin ~ season) %>% # alt: response = origin, explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_chisq, direction = "right") -``` - -```{r echo=FALSE} -# To use same distribution calculated above -chisq_null_distn %>% +```{r} +chisq_null_perm %>% visualize(method = "both") + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` - ## Compute theoretical p-value ```{r} fli_small %>% chisq_test(formula = origin ~ season) %>% - dplyr::select(p_value) %>% - dplyr::pull() + dplyr::pull(p_value) ``` diff --git a/inst/doc/chisq_test.html b/inst/doc/chisq_test.html index 0f324cff..4f3e1658 100644 --- a/inst/doc/chisq_test.html +++ b/inst/doc/chisq_test.html @@ -12,7 +12,7 @@ - + Chi-squared test example using nycflights13 flights data @@ -70,7 +70,7 @@

Chi-squared test example using nycflights13 flights data

Chester Ismay

-

2018-09-14

+

2018-09-24

@@ -174,19 +174,19 @@

Calculate observed statistic

Randomization approach to \(\chi^2\)-statistic

-
chisq_null_distn <- fli_small %>%
+
chisq_null_perm <- fli_small %>%
   specify(origin ~ season) %>% # alt: response = origin, explanatory = season
   hypothesize(null = "independence") %>%
   generate(reps = 1000, type = "permute") %>%
   calculate(stat = "Chisq")
-chisq_null_distn %>%
+chisq_null_perm %>%
   visualize() +
     shade_p_value(obs_stat = obs_chisq, direction = "greater")

Calculate the randomization-based \(p\)-value

-
chisq_null_distn %>% 
+
chisq_null_perm %>% 
   get_p_value(obs_stat = obs_chisq, direction = "greater")
@@ -205,11 +205,12 @@

Calculate the randomization-based \(p\)-val

Theoretical distribution

-
fli_small %>%
+
chisq_null_theor <- fli_small %>%
   specify(origin ~ season) %>% 
   hypothesize(null = "independence") %>%
   # generate() ## Not used for theoretical
-  calculate(stat = "Chisq") %>%
+  calculate(stat = "Chisq")
+chisq_null_theor %>%
   visualize(method = "theoretical") +
     shade_p_value(obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
@@ -218,11 +219,7 @@ 

Theoretical distribution

Overlay appropriate \(\chi^2\) distribution on top of permuted statistics

-
fli_small %>%
-  specify(origin ~ season) %>% # alt: response = origin, explanatory = season
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "Chisq") %>% 
+
chisq_null_perm %>% 
   visualize(method = "both") +
     shade_p_value(obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
@@ -233,8 +230,7 @@ 

Overlay appropriate \(\chi^2\) distribution

Compute theoretical p-value

fli_small %>% 
   chisq_test(formula = origin ~ season) %>% 
-  dplyr::select(p_value) %>% 
-  dplyr::pull()
+ dplyr::pull(p_value)
## [1] 0.7513
diff --git a/inst/doc/flights_examples.R b/inst/doc/flights_examples.R index f72fc849..0d8022e1 100644 --- a/inst/doc/flights_examples.R +++ b/inst/doc/flights_examples.R @@ -190,8 +190,7 @@ null_distn %>% slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% broom::tidy() %>% filter(term == "dep_delay") %>% - select(estimate) %>% - pull() + pull(estimate) null_distn <- fli_small %>% specify(arr_delay ~ dep_delay) %>% hypothesize(null = "independence") %>% @@ -262,8 +261,7 @@ c(lower = d_hat - 2 * sd(boot), slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% broom::tidy() %>% filter(term == "dep_delay") %>% - select(estimate) %>% - pull() + pull(estimate) boot <- fli_small %>% specify(arr_delay ~ dep_delay) %>% generate(reps = 1000, type = "bootstrap") %>% diff --git a/inst/doc/flights_examples.Rmd b/inst/doc/flights_examples.Rmd index 07f3f82c..059d2da2 100644 --- a/inst/doc/flights_examples.Rmd +++ b/inst/doc/flights_examples.Rmd @@ -259,8 +259,7 @@ null_distn %>% slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% broom::tidy() %>% filter(term == "dep_delay") %>% - select(estimate) %>% - pull() + pull(estimate) null_distn <- fli_small %>% specify(arr_delay ~ dep_delay) %>% hypothesize(null = "independence") %>% @@ -348,8 +347,7 @@ c(lower = d_hat - 2 * sd(boot), slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% broom::tidy() %>% filter(term == "dep_delay") %>% - select(estimate) %>% - pull() + pull(estimate) boot <- fli_small %>% specify(arr_delay ~ dep_delay) %>% generate(reps = 1000, type = "bootstrap") %>% diff --git a/inst/doc/flights_examples.html b/inst/doc/flights_examples.html index 70b57a27..084b8af7 100644 --- a/inst/doc/flights_examples.html +++ b/inst/doc/flights_examples.html @@ -343,8 +343,7 @@

Two numerical vars - SLR

slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% 
   broom::tidy() %>% 
   filter(term == "dep_delay") %>% 
-  select(estimate) %>% 
-  pull()
+  pull(estimate)
 null_distn <- fli_small %>%
    specify(arr_delay ~ dep_delay) %>% 
    hypothesize(null = "independence") %>%
@@ -432,8 +431,7 @@ 

Two numerical vars - SLR

slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% 
   broom::tidy() %>% 
   filter(term == "dep_delay") %>% 
-  select(estimate) %>% 
-  pull()
+  pull(estimate)
 boot <- fli_small %>%
    specify(arr_delay ~ dep_delay) %>% 
    generate(reps = 1000, type = "bootstrap") %>%
diff --git a/inst/doc/two_sample_t.R b/inst/doc/two_sample_t.R
index 69ce6fd4..ff907dfa 100644
--- a/inst/doc/two_sample_t.R
+++ b/inst/doc/two_sample_t.R
@@ -29,51 +29,40 @@ obs_t <- fli_small %>%
 obs_t <- fli_small %>% 
   t_test(formula = arr_delay ~ half_year, alternative = "two_sided",
          order = c("h1", "h2")) %>% 
-  dplyr::select(statistic) %>% 
-  dplyr::pull()
+  dplyr::pull(statistic)
 
 ## ------------------------------------------------------------------------
 obs_t <- fli_small %>% 
   t_stat(formula = arr_delay ~ half_year, order = c("h1", "h2"))
 
 ## ------------------------------------------------------------------------
-t_null_distn <- fli_small %>%
+t_null_perm <- fli_small %>%
   # alt: response = arr_delay, explanatory = half_year
   specify(arr_delay ~ half_year) %>%
   hypothesize(null = "independence") %>%
   generate(reps = 1000, type = "permute") %>%
   calculate(stat = "t", order = c("h1", "h2"))
-t_null_distn %>%
+t_null_perm %>%
   visualize() +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
 
 ## ------------------------------------------------------------------------
-t_null_distn %>% 
+t_null_perm %>% 
   get_p_value(obs_stat = obs_t, direction = "two_sided")
 
 ## ------------------------------------------------------------------------
-fli_small %>%
+t_null_theor <- fli_small %>%
   # alt: response = arr_delay, explanatory = half_year
   specify(arr_delay ~ half_year) %>%
   hypothesize(null = "independence") %>%
   # generate() ## Not used for theoretical
-  calculate(stat = "t", order = c("h1", "h2")) %>%
+  calculate(stat = "t", order = c("h1", "h2"))
+t_null_theor %>%
   visualize(method = "theoretical") +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
 
-## ----eval=FALSE----------------------------------------------------------
-#  fli_small %>%
-#    # alt: response = arr_delay, explanatory = half_year
-#    specify(arr_delay ~ half_year) %>%
-#    hypothesize(null = "independence") %>%
-#    generate(reps = 1000, type = "permute") %>%
-#    calculate(stat = "t", order = c("h1", "h2")) %>%
-#    visualize(method = "both") +
-#      shade_p_value(obs_stat = obs_t, direction = "two_sided")
-
-## ----echo=FALSE----------------------------------------------------------
-# To use same distribution calculated above
-t_null_distn %>% 
+## ------------------------------------------------------------------------
+t_null_perm %>% 
   visualize(method = "both") +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
 
@@ -82,6 +71,5 @@ fli_small %>%
   t_test(formula = arr_delay ~ half_year,
          alternative = "two_sided",
          order = c("h1", "h2")) %>% 
-  dplyr::select(p_value) %>% 
-  dplyr::pull()
+  dplyr::pull(p_value)
 
diff --git a/inst/doc/two_sample_t.Rmd b/inst/doc/two_sample_t.Rmd
index c77980bb..a4ef6534 100755
--- a/inst/doc/two_sample_t.Rmd
+++ b/inst/doc/two_sample_t.Rmd
@@ -69,8 +69,7 @@ Or using `t_test` in `infer`
 obs_t <- fli_small %>% 
   t_test(formula = arr_delay ~ half_year, alternative = "two_sided",
          order = c("h1", "h2")) %>% 
-  dplyr::select(statistic) %>% 
-  dplyr::pull()
+  dplyr::pull(statistic)
 ```
 
 The observed $t$ statistic is `r obs_t`.
@@ -87,13 +86,13 @@ The observed $t$ statistic is `r obs_t`.
 ## Randomization approach to t-statistic
 
 ```{r}
-t_null_distn <- fli_small %>%
+t_null_perm <- fli_small %>%
   # alt: response = arr_delay, explanatory = half_year
   specify(arr_delay ~ half_year) %>%
   hypothesize(null = "independence") %>%
   generate(reps = 1000, type = "permute") %>%
   calculate(stat = "t", order = c("h1", "h2"))
-t_null_distn %>%
+t_null_perm %>%
   visualize() +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
 ```
@@ -101,7 +100,7 @@ t_null_distn %>%
 ## Calculate the randomization-based $p$-value
 
 ```{r}
-t_null_distn %>% 
+t_null_perm %>% 
   get_p_value(obs_stat = obs_t, direction = "two_sided")
 ```
 
@@ -109,37 +108,25 @@ t_null_distn %>%
 ## Theoretical distribution
 
 ```{r }
-fli_small %>%
+t_null_theor <- fli_small %>%
   # alt: response = arr_delay, explanatory = half_year
   specify(arr_delay ~ half_year) %>%
   hypothesize(null = "independence") %>%
   # generate() ## Not used for theoretical
-  calculate(stat = "t", order = c("h1", "h2")) %>%
+  calculate(stat = "t", order = c("h1", "h2"))
+t_null_theor %>%
   visualize(method = "theoretical") +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
 ```
 
 ## Overlay appropriate $t$ distribution on top of permuted t-statistics
 
-```{r eval=FALSE}
-fli_small %>%
-  # alt: response = arr_delay, explanatory = half_year
-  specify(arr_delay ~ half_year) %>%
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "t", order = c("h1", "h2")) %>% 
-  visualize(method = "both") +
-    shade_p_value(obs_stat = obs_t, direction = "two_sided")
-```
-
-```{r echo=FALSE}
-# To use same distribution calculated above
-t_null_distn %>% 
+```{r}
+t_null_perm %>% 
   visualize(method = "both") +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
 ```
 
-
 ## Compute theoretical p-value
 
 ```{r}
@@ -147,7 +134,6 @@ fli_small %>%
   t_test(formula = arr_delay ~ half_year,
          alternative = "two_sided",
          order = c("h1", "h2")) %>% 
-  dplyr::select(p_value) %>% 
-  dplyr::pull()
+  dplyr::pull(p_value)
 ```
 
diff --git a/inst/doc/two_sample_t.html b/inst/doc/two_sample_t.html
index 94ebc223..c115c4a5 100644
--- a/inst/doc/two_sample_t.html
+++ b/inst/doc/two_sample_t.html
@@ -12,7 +12,7 @@
 
 
 
-
+
 
 Two sample t test example using nycflights13 flights data
 
@@ -70,7 +70,7 @@
 
 

Two sample \(t\) test example using nycflights13 flights data

Chester Ismay

-

2018-09-14

+

2018-09-24

@@ -135,8 +135,7 @@

Calculate observed statistic

obs_t <- fli_small %>% 
   t_test(formula = arr_delay ~ half_year, alternative = "two_sided",
          order = c("h1", "h2")) %>% 
-  dplyr::select(statistic) %>% 
-  dplyr::pull()
+ dplyr::pull(statistic)

The observed \(t\) statistic is 0.8685.

Or using another shortcut function in infer:

obs_t <- fli_small %>% 
@@ -160,21 +159,21 @@ 

Calculate observed statistic

Randomization approach to t-statistic

-
t_null_distn <- fli_small %>%
+
t_null_perm <- fli_small %>%
   # alt: response = arr_delay, explanatory = half_year
   specify(arr_delay ~ half_year) %>%
   hypothesize(null = "independence") %>%
   generate(reps = 1000, type = "permute") %>%
   calculate(stat = "t", order = c("h1", "h2"))
## Warning: Removed 15 rows containing missing values.
-
t_null_distn %>%
+
t_null_perm %>%
   visualize() +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")

Calculate the randomization-based \(p\)-value

-
t_null_distn %>% 
+
t_null_perm %>% 
   get_p_value(obs_stat = obs_t, direction = "two_sided")

@@ -193,27 +192,23 @@

Calculate the randomization-based \(p\)-val

Theoretical distribution

-
fli_small %>%
+
t_null_theor <- fli_small %>%
   # alt: response = arr_delay, explanatory = half_year
   specify(arr_delay ~ half_year) %>%
   hypothesize(null = "independence") %>%
   # generate() ## Not used for theoretical
-  calculate(stat = "t", order = c("h1", "h2")) %>%
+  calculate(stat = "t", order = c("h1", "h2"))
+
## Warning: Removed 15 rows containing missing values.
+
t_null_theor %>%
   visualize(method = "theoretical") +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
-
## Warning: Removed 15 rows containing missing values.
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. {infer} currently does not check these for you.

Overlay appropriate \(t\) distribution on top of permuted t-statistics

-
fli_small %>%
-  # alt: response = arr_delay, explanatory = half_year
-  specify(arr_delay ~ half_year) %>%
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "t", order = c("h1", "h2")) %>% 
+
t_null_perm %>% 
   visualize(method = "both") +
     shade_p_value(obs_stat = obs_t, direction = "two_sided")
## Warning: Check to make sure the conditions have been met for the
@@ -226,8 +221,7 @@ 

Compute theoretical p-value

t_test(formula = arr_delay ~ half_year, alternative = "two_sided", order = c("h1", "h2")) %>% - dplyr::select(p_value) %>% - dplyr::pull()
+ dplyr::pull(p_value)
## [1] 0.3855
From 35f3d52632ec168ec66de36c80efd7c6ce3160a7 Mon Sep 17 00:00:00 2001 From: "evgeni.chasnovski" Date: Mon, 24 Sep 2018 22:26:22 +0300 Subject: [PATCH 64/78] Stop using `%>%` and `+` in one pipeline in vignettes. --- inst/doc/chisq_test.R | 17 +-- inst/doc/chisq_test.Rmd | 17 +-- inst/doc/chisq_test.html | 17 +-- inst/doc/observed_stat_examples.R | 210 +++++++++++++-------------- inst/doc/observed_stat_examples.Rmd | 210 +++++++++++++-------------- inst/doc/observed_stat_examples.html | 168 +++++++++------------ inst/doc/two_sample_t.R | 17 +-- inst/doc/two_sample_t.Rmd | 17 +-- inst/doc/two_sample_t.html | 15 +- vignettes/chisq_test.Rmd | 17 +-- vignettes/observed_stat_examples.Rmd | 210 +++++++++++++-------------- vignettes/two_sample_t.Rmd | 17 +-- 12 files changed, 449 insertions(+), 483 deletions(-) diff --git a/inst/doc/chisq_test.R b/inst/doc/chisq_test.R index 8f338c9f..fc1d4f24 100644 --- a/inst/doc/chisq_test.R +++ b/inst/doc/chisq_test.R @@ -42,9 +42,9 @@ chisq_null_perm <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_perm %>% - visualize() + - shade_p_value(obs_stat = obs_chisq, direction = "greater") + +visualize(chisq_null_perm) + + shade_p_value(obs_stat = obs_chisq, direction = "greater") ## ------------------------------------------------------------------------ chisq_null_perm %>% @@ -56,14 +56,13 @@ chisq_null_theor <- fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") -chisq_null_theor %>% - visualize(method = "theoretical") + - shade_p_value(obs_stat = obs_chisq, direction = "right") + +visualize(chisq_null_theor, method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ## ------------------------------------------------------------------------ -chisq_null_perm %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_chisq, direction = "right") +visualize(chisq_null_perm, method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ## ------------------------------------------------------------------------ fli_small %>% diff --git a/inst/doc/chisq_test.Rmd b/inst/doc/chisq_test.Rmd index 1cb2f535..2693b8c3 100644 --- a/inst/doc/chisq_test.Rmd +++ b/inst/doc/chisq_test.Rmd @@ -92,9 +92,9 @@ chisq_null_perm <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_perm %>% - visualize() + - shade_p_value(obs_stat = obs_chisq, direction = "greater") + +visualize(chisq_null_perm) + + shade_p_value(obs_stat = obs_chisq, direction = "greater") ``` ## Calculate the randomization-based $p$-value @@ -113,17 +113,16 @@ chisq_null_theor <- fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") -chisq_null_theor %>% - visualize(method = "theoretical") + - shade_p_value(obs_stat = obs_chisq, direction = "right") + +visualize(chisq_null_theor, method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Overlay appropriate $\chi^2$ distribution on top of permuted statistics ```{r} -chisq_null_perm %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_chisq, direction = "right") +visualize(chisq_null_perm, method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Compute theoretical p-value diff --git a/inst/doc/chisq_test.html b/inst/doc/chisq_test.html index 4f3e1658..b6f3c74b 100644 --- a/inst/doc/chisq_test.html +++ b/inst/doc/chisq_test.html @@ -179,9 +179,9 @@

Randomization approach to \(\chi^2\)-statis hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_perm %>% - visualize() + - shade_p_value(obs_stat = obs_chisq, direction = "greater") + +visualize(chisq_null_perm) + + shade_p_value(obs_stat = obs_chisq, direction = "greater")

@@ -210,18 +210,17 @@

Theoretical distribution

hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") -chisq_null_theor %>% - visualize(method = "theoretical") + - shade_p_value(obs_stat = obs_chisq, direction = "right")
+ +visualize(chisq_null_theor, method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. {infer} currently does not check these for you.

Overlay appropriate \(\chi^2\) distribution on top of permuted statistics

-
chisq_null_perm %>% 
-  visualize(method = "both") +
-    shade_p_value(obs_stat = obs_chisq, direction = "right")
+
visualize(chisq_null_perm, method = "both") +
+  shade_p_value(obs_stat = obs_chisq, direction = "right")
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. {infer} currently does not check these for you.

diff --git a/inst/doc/observed_stat_examples.R b/inst/doc/observed_stat_examples.R index 4beac9aa..e4556a1b 100644 --- a/inst/doc/observed_stat_examples.R +++ b/inst/doc/observed_stat_examples.R @@ -34,9 +34,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", mu = 10) %>% generate(reps = 1000) %>% calculate(stat = "mean") -null_distn %>% - visualize() + - shade_p_value(obs_stat = x_bar, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = x_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_bar, direction = "two_sided") @@ -51,9 +51,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", mu = 8) %>% generate(reps = 1000) %>% calculate(stat = "t") -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_bar, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_bar, direction = "two_sided") @@ -68,9 +68,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", med = -1) %>% generate(reps = 1000) %>% calculate(stat = "median") -null_distn %>% - visualize() + - shade_p_value(obs_stat = x_tilde, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = x_tilde, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_tilde, direction = "two_sided") @@ -85,9 +85,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", p = .5) %>% generate(reps = 1000) %>% calculate(stat = "prop") -null_distn %>% - visualize() + - shade_p_value(obs_stat = p_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = p_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = p_hat, direction = "two_sided") @@ -110,9 +110,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("winter", "summer")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") @@ -127,9 +127,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "z", order = c("winter", "summer")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = z_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = z_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = z_hat, direction = "two_sided") @@ -147,9 +147,9 @@ null_distn <- fli_small %>% p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% generate(reps = 1000, type = "simulate") %>% calculate(stat = "Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") @@ -164,9 +164,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") @@ -181,9 +181,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in means", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") @@ -198,9 +198,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") @@ -216,9 +216,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in medians", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") @@ -233,9 +233,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "F") -null_distn %>% - visualize() + - shade_p_value(obs_stat = F_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = F_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = F_hat, direction = "greater") @@ -250,9 +250,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "slope") -null_distn %>% - visualize() + - shade_p_value(obs_stat = slope_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = slope_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = slope_hat, direction = "two_sided") @@ -267,9 +267,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "correlation") -null_distn %>% - visualize() + - shade_p_value(obs_stat = correlation_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = correlation_hat, direction = "two_sided") @@ -285,9 +285,9 @@ null_distn %>% # hypothesize(null = "independence") %>% # generate(reps = 1000, type = "permute") %>% # calculate(stat = "t") -# null_distn %>% -# visualize() + -# shade_p_value(obs_stat = t_hat, direction = "two_sided") +# +# visualize(null_distn) + +# shade_p_value(obs_stat = t_hat, direction = "two_sided") # null_distn %>% # get_p_value(obs_stat = t_hat, direction = "two_sided") @@ -302,13 +302,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( t_hat <- fli_small %>% @@ -321,13 +321,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( p_hat <- fli_small %>% @@ -340,13 +340,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( d_hat <- fli_small %>% @@ -359,13 +359,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( t_hat <- fli_small %>% @@ -378,13 +378,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( d_hat <- fli_small %>% @@ -397,13 +397,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( z_hat <- fli_small %>% @@ -416,13 +416,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( slope_hat <- fli_small %>% @@ -435,13 +435,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ------------------------------------------------------------------------ ( correlation_hat <- fli_small %>% @@ -454,14 +454,14 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = correlation_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ## ----eval=FALSE, echo=FALSE---------------------------------------------- # # **Point estimate** @@ -475,11 +475,11 @@ boot %>% # generate(reps = 1000, type = "bootstrap") %>% # calculate(stat = "t") # ( percentile_ci <- get_ci(boot) ) -# boot %>% -# visualize() + -# shade_confidence_interval(endpoints = percentile_ci) +# +# visualize(boot) + +# shade_confidence_interval(endpoints = percentile_ci) # ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -# boot %>% -# visualize() + -# shade_confidence_interval(endpoints = standard_error_ci) +# +# visualize(boot) + +# shade_confidence_interval(endpoints = standard_error_ci) diff --git a/inst/doc/observed_stat_examples.Rmd b/inst/doc/observed_stat_examples.Rmd index 30404cda..780512b3 100644 --- a/inst/doc/observed_stat_examples.Rmd +++ b/inst/doc/observed_stat_examples.Rmd @@ -69,9 +69,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", mu = 10) %>% generate(reps = 1000) %>% calculate(stat = "mean") -null_distn %>% - visualize() + - shade_p_value(obs_stat = x_bar, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = x_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_bar, direction = "two_sided") ``` @@ -91,9 +91,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", mu = 8) %>% generate(reps = 1000) %>% calculate(stat = "t") -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_bar, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_bar, direction = "two_sided") ``` @@ -115,9 +115,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", med = -1) %>% generate(reps = 1000) %>% calculate(stat = "median") -null_distn %>% - visualize() + - shade_p_value(obs_stat = x_tilde, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = x_tilde, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_tilde, direction = "two_sided") ``` @@ -138,9 +138,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", p = .5) %>% generate(reps = 1000) %>% calculate(stat = "prop") -null_distn %>% - visualize() + - shade_p_value(obs_stat = p_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = p_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = p_hat, direction = "two_sided") ``` @@ -177,9 +177,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("winter", "summer")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -200,9 +200,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "z", order = c("winter", "summer")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = z_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = z_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = z_hat, direction = "two_sided") ``` @@ -230,9 +230,9 @@ null_distn <- fli_small %>% p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% generate(reps = 1000, type = "simulate") %>% calculate(stat = "Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -253,9 +253,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -276,9 +276,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in means", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -299,9 +299,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -325,9 +325,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in medians", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -348,9 +348,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "F") -null_distn %>% - visualize() + - shade_p_value(obs_stat = F_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = F_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = F_hat, direction = "greater") ``` @@ -371,9 +371,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "slope") -null_distn %>% - visualize() + - shade_p_value(obs_stat = slope_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = slope_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = slope_hat, direction = "two_sided") ``` @@ -394,9 +394,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "correlation") -null_distn %>% - visualize() + - shade_p_value(obs_stat = correlation_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = correlation_hat, direction = "two_sided") ``` @@ -420,9 +420,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t") -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -446,13 +446,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical (one mean - standardized) @@ -471,13 +471,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -497,13 +497,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One categorical variable (standardized proportion $z$) @@ -526,13 +526,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical variable, one categorical (2 levels) (t) @@ -551,13 +551,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -577,13 +577,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two categorical variables (z) @@ -602,13 +602,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -628,13 +628,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two numerical vars - correlation @@ -653,14 +653,14 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = correlation_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -682,11 +682,11 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` diff --git a/inst/doc/observed_stat_examples.html b/inst/doc/observed_stat_examples.html index bb6704a4..0b5a1667 100644 --- a/inst/doc/observed_stat_examples.html +++ b/inst/doc/observed_stat_examples.html @@ -134,9 +134,8 @@

One numerical variable (mean)

generate(reps = 1000) %>% calculate(stat = "mean")
## Setting `type = "bootstrap"` in `generate()`.
-
null_distn %>% 
-  visualize() +
-    shade_p_value(obs_stat = x_bar, direction = "two_sided")
+
visualize(null_distn) +
+  shade_p_value(obs_stat = x_bar, direction = "two_sided")

null_distn %>%
   get_p_value(obs_stat = x_bar, direction = "two_sided")
@@ -181,9 +180,8 @@

One numerical variable (standardized mean \(t\) generate(reps = 1000) %>% calculate(stat = "t")
## Setting `type = "bootstrap"` in `generate()`.
-
null_distn %>% 
-  visualize() +
-    shade_p_value(obs_stat = t_bar, direction = "two_sided")
+
visualize(null_distn) +
+  shade_p_value(obs_stat = t_bar, direction = "two_sided")

null_distn %>%
   get_p_value(obs_stat = t_bar, direction = "two_sided")
@@ -228,9 +226,8 @@

One numerical variable (median)

generate(reps = 1000) %>% calculate(stat = "median")
## Setting `type = "bootstrap"` in `generate()`.
-
null_distn %>% 
-  visualize() +
-    shade_p_value(obs_stat = x_tilde, direction = "two_sided")
+
visualize(null_distn) +
+  shade_p_value(obs_stat = x_tilde, direction = "two_sided")

null_distn %>%
   get_p_value(obs_stat = x_tilde, direction = "two_sided")
@@ -275,9 +272,8 @@

One categorical (one proportion)

generate(reps =1000) %>% calculate(stat ="prop")
## Setting `type = "simulate"` in `generate()`.
-
null_distn %>% 
-  visualize() +
-    shade_p_value(obs_stat = p_hat, direction = "two_sided")
+
visualize(null_distn) +
+  shade_p_value(obs_stat = p_hat, direction = "two_sided")

null_distn %>%
   get_p_value(obs_stat = p_hat, direction = "two_sided")
@@ -334,9 +330,8 @@

Two categorical (2 level) variables

generate(reps =1000) %>% calculate(stat ="diff in props", order =c("winter", "summer"))
## Setting `type = "permute"` in `generate()`.
-
null_distn %>% 
-  visualize() +
-    shade_p_value(obs_stat = d_hat, direction = "two_sided")
+
visualize(null_distn) +
+  shade_p_value(obs_stat = d_hat, direction = "two_sided")

null_distn %>%
   get_p_value(obs_stat = d_hat, direction = "two_sided")
@@ -381,9 +376,8 @@

Two categorical (2 level) variables (z)

generate(reps =1000) %>% calculate(stat ="z", order =c("winter", "summer"))
## Setting `type = "permute"` in `generate()`.
-
null_distn %>% 
-  visualize() +
-    shade_p_value(obs_stat = z_hat, direction = "two_sided")
+
visualize(null_distn) +
+  shade_p_value(obs_stat = z_hat, direction = "two_sided")

null_distn %>%
   get_p_value(obs_stat = z_hat, direction = "two_sided")
@@ -432,9 +426,9 @@

One categorical (>2 level) - GoF

p =c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% generate(reps =1000, type ="simulate") %>% calculate(stat ="Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction ="greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction ="greater")

null_distn %>%
   get_p_value(obs_stat = Chisq_hat, direction = "greater")
@@ -478,9 +472,9 @@

Two categorical (>2 level) variables

hypothesize(null ="independence") %>% generate(reps =1000, type ="permute") %>% calculate(stat ="Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction ="greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction ="greater")

null_distn %>%
   get_p_value(obs_stat = Chisq_hat, direction = "greater")
@@ -524,9 +518,9 @@

One numerical variable, one categorical (2 levels) (diff in means)

hypothesize(null ="independence") %>% generate(reps =1000, type ="permute") %>% calculate(stat ="diff in means", order =c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction ="two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction ="two_sided")

null_distn %>%
   get_p_value(obs_stat = d_hat, direction = "two_sided")
@@ -570,9 +564,9 @@

One numerical variable, one categorical (2 levels) (t)

hypothesize(null ="independence") %>% generate(reps =1000, type ="permute") %>% calculate(stat ="t", order =c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_hat, direction ="two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_hat, direction ="two_sided")

null_distn %>%
   get_p_value(obs_stat = t_hat, direction = "two_sided")
@@ -618,9 +612,9 @@

One numerical variable, one categorical (2 levels) (diff in medians)

hypothesize(null ="independence") %>% generate(reps =1000, type ="permute") %>% calculate(stat ="diff in medians", order =c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction ="two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction ="two_sided")

null_distn %>%
   get_p_value(obs_stat = d_hat, direction = "two_sided")
@@ -664,9 +658,9 @@

One numerical, one categorical (>2 levels) - ANOVA

hypothesize(null ="independence") %>% generate(reps =1000, type ="permute") %>% calculate(stat ="F") -null_distn %>% - visualize() + - shade_p_value(obs_stat = F_hat, direction ="greater") + +visualize(null_distn) + + shade_p_value(obs_stat = F_hat, direction ="greater")

null_distn %>%
   get_p_value(obs_stat = F_hat, direction = "greater")
@@ -710,9 +704,9 @@

Two numerical vars - SLR

hypothesize(null ="independence") %>% generate(reps =1000, type ="permute") %>% calculate(stat ="slope") -null_distn %>% - visualize() + - shade_p_value(obs_stat = slope_hat, direction ="two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = slope_hat, direction ="two_sided")

null_distn %>%
   get_p_value(obs_stat = slope_hat, direction = "two_sided")
@@ -756,9 +750,9 @@

Two numerical vars - correlation

hypothesize(null ="independence") %>% generate(reps =1000, type ="permute") %>% calculate(stat ="correlation") -null_distn %>% - visualize() + - shade_p_value(obs_stat = correlation_hat, direction ="two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = correlation_hat, direction ="two_sided")

null_distn %>%
   get_p_value(obs_stat = correlation_hat, direction = "two_sided")
@@ -824,9 +818,8 @@

One numerical (one mean)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) )
@@ -845,9 +838,8 @@

One numerical (one mean)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -891,9 +883,8 @@

One numerical (one mean - standardized)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
@@ -912,9 +903,8 @@

One numerical (one mean - standardized)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -958,9 +948,8 @@

One categorical (one proportion)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) )
@@ -979,9 +968,8 @@

One categorical (one proportion)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -1029,9 +1017,8 @@

One numerical variable, one categorical (2 levels) (diff in means)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
@@ -1050,9 +1037,8 @@

One numerical variable, one categorical (2 levels) (diff in means)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -1096,9 +1082,8 @@

One numerical variable, one categorical (2 levels) (t)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
@@ -1117,9 +1102,8 @@

One numerical variable, one categorical (2 levels) (t)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -1163,9 +1147,8 @@

Two categorical variables (diff in proportions)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
@@ -1184,9 +1167,8 @@

Two categorical variables (diff in proportions)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -1230,9 +1212,8 @@

Two categorical variables (z)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) )
@@ -1251,9 +1232,8 @@

Two categorical variables (z)

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -1297,9 +1277,8 @@

Two numerical vars - SLR

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) )
@@ -1318,9 +1297,8 @@

Two numerical vars - SLR

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

@@ -1364,9 +1342,8 @@

Two numerical vars - correlation

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = percentile_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = percentile_ci)

( standard_error_ci <- get_ci(boot, type = "se", 
                             point_estimate = correlation_hat) )
@@ -1386,9 +1363,8 @@

Two numerical vars - correlation

-
boot %>%
-  visualize() +
-    shade_confidence_interval(endpoints = standard_error_ci)
+
visualize(boot) +
+  shade_confidence_interval(endpoints = standard_error_ci)

diff --git a/inst/doc/two_sample_t.R b/inst/doc/two_sample_t.R index ff907dfa..ea06c5c3 100644 --- a/inst/doc/two_sample_t.R +++ b/inst/doc/two_sample_t.R @@ -42,9 +42,9 @@ t_null_perm <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) -t_null_perm %>% - visualize() + - shade_p_value(obs_stat = obs_t, direction = "two_sided") + +visualize(t_null_perm) + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ## ------------------------------------------------------------------------ t_null_perm %>% @@ -57,14 +57,13 @@ t_null_theor <- fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2")) -t_null_theor %>% - visualize(method = "theoretical") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") + +visualize(t_null_theor, method = "theoretical") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ## ------------------------------------------------------------------------ -t_null_perm %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") +visualize(t_null_perm, method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ## ------------------------------------------------------------------------ fli_small %>% diff --git a/inst/doc/two_sample_t.Rmd b/inst/doc/two_sample_t.Rmd index a4ef6534..e4a2b100 100755 --- a/inst/doc/two_sample_t.Rmd +++ b/inst/doc/two_sample_t.Rmd @@ -92,9 +92,9 @@ t_null_perm <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) -t_null_perm %>% - visualize() + - shade_p_value(obs_stat = obs_t, direction = "two_sided") + +visualize(t_null_perm) + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Calculate the randomization-based $p$-value @@ -114,17 +114,16 @@ t_null_theor <- fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2")) -t_null_theor %>% - visualize(method = "theoretical") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") + +visualize(t_null_theor, method = "theoretical") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Overlay appropriate $t$ distribution on top of permuted t-statistics ```{r} -t_null_perm %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") +visualize(t_null_perm, method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Compute theoretical p-value diff --git a/inst/doc/two_sample_t.html b/inst/doc/two_sample_t.html index c115c4a5..f14c8554 100644 --- a/inst/doc/two_sample_t.html +++ b/inst/doc/two_sample_t.html @@ -166,9 +166,8 @@

Randomization approach to t-statistic

generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2"))
## Warning: Removed 15 rows containing missing values.
-
t_null_perm %>%
-  visualize() +
-    shade_p_value(obs_stat = obs_t, direction = "two_sided")
+
visualize(t_null_perm) +
+  shade_p_value(obs_stat = obs_t, direction = "two_sided")

@@ -199,18 +198,16 @@

Theoretical distribution

# generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2"))
## Warning: Removed 15 rows containing missing values.
-
t_null_theor %>%
-  visualize(method = "theoretical") +
-    shade_p_value(obs_stat = obs_t, direction = "two_sided")
+
visualize(t_null_theor, method = "theoretical") +
+  shade_p_value(obs_stat = obs_t, direction = "two_sided")
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. {infer} currently does not check these for you.

Overlay appropriate \(t\) distribution on top of permuted t-statistics

-
t_null_perm %>% 
-  visualize(method = "both") +
-    shade_p_value(obs_stat = obs_t, direction = "two_sided")
+
visualize(t_null_perm, method = "both") +
+  shade_p_value(obs_stat = obs_t, direction = "two_sided")
## Warning: Check to make sure the conditions have been met for the
 ## theoretical method. {infer} currently does not check these for you.

diff --git a/vignettes/chisq_test.Rmd b/vignettes/chisq_test.Rmd index 1cb2f535..2693b8c3 100644 --- a/vignettes/chisq_test.Rmd +++ b/vignettes/chisq_test.Rmd @@ -92,9 +92,9 @@ chisq_null_perm <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -chisq_null_perm %>% - visualize() + - shade_p_value(obs_stat = obs_chisq, direction = "greater") + +visualize(chisq_null_perm) + + shade_p_value(obs_stat = obs_chisq, direction = "greater") ``` ## Calculate the randomization-based $p$-value @@ -113,17 +113,16 @@ chisq_null_theor <- fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "Chisq") -chisq_null_theor %>% - visualize(method = "theoretical") + - shade_p_value(obs_stat = obs_chisq, direction = "right") + +visualize(chisq_null_theor, method = "theoretical") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Overlay appropriate $\chi^2$ distribution on top of permuted statistics ```{r} -chisq_null_perm %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_chisq, direction = "right") +visualize(chisq_null_perm, method = "both") + + shade_p_value(obs_stat = obs_chisq, direction = "right") ``` ## Compute theoretical p-value diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd index 30404cda..780512b3 100644 --- a/vignettes/observed_stat_examples.Rmd +++ b/vignettes/observed_stat_examples.Rmd @@ -69,9 +69,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", mu = 10) %>% generate(reps = 1000) %>% calculate(stat = "mean") -null_distn %>% - visualize() + - shade_p_value(obs_stat = x_bar, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = x_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_bar, direction = "two_sided") ``` @@ -91,9 +91,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", mu = 8) %>% generate(reps = 1000) %>% calculate(stat = "t") -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_bar, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_bar, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_bar, direction = "two_sided") ``` @@ -115,9 +115,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", med = -1) %>% generate(reps = 1000) %>% calculate(stat = "median") -null_distn %>% - visualize() + - shade_p_value(obs_stat = x_tilde, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = x_tilde, direction = "two_sided") null_distn %>% get_p_value(obs_stat = x_tilde, direction = "two_sided") ``` @@ -138,9 +138,9 @@ null_distn <- fli_small %>% hypothesize(null = "point", p = .5) %>% generate(reps = 1000) %>% calculate(stat = "prop") -null_distn %>% - visualize() + - shade_p_value(obs_stat = p_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = p_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = p_hat, direction = "two_sided") ``` @@ -177,9 +177,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "diff in props", order = c("winter", "summer")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -200,9 +200,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000) %>% calculate(stat = "z", order = c("winter", "summer")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = z_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = z_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = z_hat, direction = "two_sided") ``` @@ -230,9 +230,9 @@ null_distn <- fli_small %>% p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% generate(reps = 1000, type = "simulate") %>% calculate(stat = "Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -253,9 +253,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "Chisq") -null_distn %>% - visualize() + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = Chisq_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -276,9 +276,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in means", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -299,9 +299,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -325,9 +325,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "diff in medians", order = c("summer", "winter")) -null_distn %>% - visualize() + - shade_p_value(obs_stat = d_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = d_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = d_hat, direction = "two_sided") ``` @@ -348,9 +348,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "F") -null_distn %>% - visualize() + - shade_p_value(obs_stat = F_hat, direction = "greater") + +visualize(null_distn) + + shade_p_value(obs_stat = F_hat, direction = "greater") null_distn %>% get_p_value(obs_stat = F_hat, direction = "greater") ``` @@ -371,9 +371,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "slope") -null_distn %>% - visualize() + - shade_p_value(obs_stat = slope_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = slope_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = slope_hat, direction = "two_sided") ``` @@ -394,9 +394,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "correlation") -null_distn %>% - visualize() + - shade_p_value(obs_stat = correlation_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = correlation_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = correlation_hat, direction = "two_sided") ``` @@ -420,9 +420,9 @@ null_distn <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t") -null_distn %>% - visualize() + - shade_p_value(obs_stat = t_hat, direction = "two_sided") + +visualize(null_distn) + + shade_p_value(obs_stat = t_hat, direction = "two_sided") null_distn %>% get_p_value(obs_stat = t_hat, direction = "two_sided") ``` @@ -446,13 +446,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical (one mean - standardized) @@ -471,13 +471,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -497,13 +497,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "prop") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One categorical variable (standardized proportion $z$) @@ -526,13 +526,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in means", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### One numerical variable, one categorical (2 levels) (t) @@ -551,13 +551,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -577,13 +577,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "diff in props", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two categorical variables (z) @@ -602,13 +602,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "z", order = c("summer", "winter")) ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -628,13 +628,13 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "slope") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` ### Two numerical vars - correlation @@ -653,14 +653,14 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "correlation") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = correlation_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` @@ -682,11 +682,11 @@ boot <- fli_small %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "t") ( percentile_ci <- get_ci(boot) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = percentile_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = percentile_ci) ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -boot %>% - visualize() + - shade_confidence_interval(endpoints = standard_error_ci) + +visualize(boot) + + shade_confidence_interval(endpoints = standard_error_ci) ``` diff --git a/vignettes/two_sample_t.Rmd b/vignettes/two_sample_t.Rmd index a4ef6534..e4a2b100 100755 --- a/vignettes/two_sample_t.Rmd +++ b/vignettes/two_sample_t.Rmd @@ -92,9 +92,9 @@ t_null_perm <- fli_small %>% hypothesize(null = "independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "t", order = c("h1", "h2")) -t_null_perm %>% - visualize() + - shade_p_value(obs_stat = obs_t, direction = "two_sided") + +visualize(t_null_perm) + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Calculate the randomization-based $p$-value @@ -114,17 +114,16 @@ t_null_theor <- fli_small %>% hypothesize(null = "independence") %>% # generate() ## Not used for theoretical calculate(stat = "t", order = c("h1", "h2")) -t_null_theor %>% - visualize(method = "theoretical") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") + +visualize(t_null_theor, method = "theoretical") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Overlay appropriate $t$ distribution on top of permuted t-statistics ```{r} -t_null_perm %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") +visualize(t_null_perm, method = "both") + + shade_p_value(obs_stat = obs_t, direction = "two_sided") ``` ## Compute theoretical p-value From be1f24537edb62e8b9539b24028d3b3fb7639191 Mon Sep 17 00:00:00 2001 From: Brian Fannin Date: Mon, 24 Sep 2018 16:03:29 -0400 Subject: [PATCH 65/78] Correct spelling of PirateGrunt's lasts name --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8411bd6b..7e036e33 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,7 @@ Authors@R: c( person("Neal", "Fultz", email = "nfultz@gmail.com", role = "ctb"), person("Doug", "Friedman", email = "doug.nhp@gmail.com", role = "ctb"), person("Richie", "Cotton", email = "richie@datacamp.com", role = "ctb"), - person("Brian", "Fannon", email = "captain@pirategrunt.com", role = "ctb")) + person("Brian", "Fannin", email = "captain@pirategrunt.com", role = "ctb")) Description: The objective of this package is to perform inference using an expressive statistical grammar that coheres with the tidy design framework. License: CC0 Encoding: UTF-8 From 802068b7ea4833fa0d896a08e5c720abafd6ac48 Mon Sep 17 00:00:00 2001 From: echasnovski Date: Sat, 27 Oct 2018 21:41:15 +0300 Subject: [PATCH 66/78] Update formatting in 'get_p_value.R'. --- R/get_p_value.R | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/R/get_p_value.R b/R/get_p_value.R index f081f98a..d313e94d 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -40,8 +40,7 @@ NULL #' @rdname get_p_value #' @export -get_p_value <- function(x, obs_stat, direction){ - +get_p_value <- function(x, obs_stat, direction) { check_type(x, is.data.frame) if(!is_generated(x) & is_hypothesized(x)) { stop_glue( @@ -56,7 +55,8 @@ get_p_value <- function(x, obs_stat, direction){ pvalue <- simulation_based_p_value( x = x, obs_stat = obs_stat, - direction = direction) + direction = direction + ) ## Theoretical-based p-value # Could be more specific @@ -81,26 +81,22 @@ get_pvalue <- function(x, obs_stat, direction) { get_p_value(x = x, obs_stat = obs_stat, direction = direction) } -simulation_based_p_value <- function(x, obs_stat, direction){ - - if(direction %in% c("less", "left")){ +simulation_based_p_value <- function(x, obs_stat, direction) { + if (direction %in% c("less", "left")) { p_value <- x %>% dplyr::summarize(p_value = mean(stat <= obs_stat)) - } - else if(direction %in% c("greater", "right")){ + } else if (direction %in% c("greater", "right")) { p_value <- x %>% dplyr::summarize(p_value = mean(stat >= obs_stat)) - } - else{ + } else { p_value <- x %>% two_sided_p_value(obs_stat = obs_stat) } p_value } -two_sided_p_value <- function(x, obs_stat){ - - if(stats::median(x$stat) >= obs_stat){ +two_sided_p_value <- function(x, obs_stat) { + if (stats::median(x$stat) >= obs_stat) { basic_p_value <- get_percentile(x$stat, obs_stat) + (1 - get_percentile(x$stat, stats::median(x$stat) + stats::median(x$stat) - obs_stat)) @@ -110,13 +106,14 @@ two_sided_p_value <- function(x, obs_stat){ stats::median(x$stat) - obs_stat)) } - if(basic_p_value >= 1) + if (basic_p_value >= 1) { # Catch all if adding both sides produces a number # larger than 1. Should update with test in that # scenario instead of using >= return(tibble::tibble(p_value = 1)) - else + } else { return(tibble::tibble(p_value = basic_p_value)) + } } is_generated <- function(x) { From f10fa5e52ec072162dda3e52feec7dd614e8bd82 Mon Sep 17 00:00:00 2001 From: echasnovski Date: Sat, 27 Oct 2018 22:28:52 +0300 Subject: [PATCH 67/78] Update `get_p_value()` (#205). Details: - Changed method of computing two-sided p-value. - It now returns tibble. --- R/get_p_value.R | 43 ++++++++----------- man/get_p_value.Rd | 2 +- tests/testthat/test-get_p_value.R | 69 ++++++++++--------------------- 3 files changed, 41 insertions(+), 73 deletions(-) diff --git a/R/get_p_value.R b/R/get_p_value.R index d313e94d..7e8119e6 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -8,7 +8,7 @@ #' @param direction A character string. Options are `"less"`, `"greater"`, or #' `"two_sided"`. Can also use `"left"`, `"right"`, or `"both"`. #' -#' @return A 1x1 data frame with value between 0 and 1. +#' @return A 1x1 [tibble][tibble::tibble] with value between 0 and 1. #' #' @section Aliases: #' `get_pvalue()` is an alias of `get_p_value()`. @@ -83,37 +83,30 @@ get_pvalue <- function(x, obs_stat, direction) { simulation_based_p_value <- function(x, obs_stat, direction) { if (direction %in% c("less", "left")) { - p_value <- x %>% - dplyr::summarize(p_value = mean(stat <= obs_stat)) + pval <- left_p_value(x[["stat"]], obs_stat) } else if (direction %in% c("greater", "right")) { - p_value <- x %>% - dplyr::summarize(p_value = mean(stat >= obs_stat)) + pval <- right_p_value(x[["stat"]], obs_stat) } else { - p_value <- x %>% two_sided_p_value(obs_stat = obs_stat) + pval <- two_sided_p_value(x[["stat"]], obs_stat) } - p_value + tibble::tibble(p_value = pval) } -two_sided_p_value <- function(x, obs_stat) { - if (stats::median(x$stat) >= obs_stat) { - basic_p_value <- get_percentile(x$stat, obs_stat) + - (1 - get_percentile(x$stat, stats::median(x$stat) + - stats::median(x$stat) - obs_stat)) - } else { - basic_p_value <- 1 - get_percentile(x$stat, obs_stat) + - (get_percentile(x$stat, stats::median(x$stat) + - stats::median(x$stat) - obs_stat)) - } +left_p_value <- function(vec, obs_stat) { + mean(vec <= obs_stat) +} - if (basic_p_value >= 1) { - # Catch all if adding both sides produces a number - # larger than 1. Should update with test in that - # scenario instead of using >= - return(tibble::tibble(p_value = 1)) - } else { - return(tibble::tibble(p_value = basic_p_value)) - } +right_p_value <- function(vec, obs_stat) { + mean(vec >= obs_stat) +} + +two_sided_p_value <- function(vec, obs_stat) { + left_pval <- left_p_value(vec, obs_stat) + right_pval <- right_p_value(vec, obs_stat) + raw_res <- 2 * min(left_pval, right_pval) + + min(raw_res, 1) } is_generated <- function(x) { diff --git a/man/get_p_value.Rd b/man/get_p_value.Rd index 83e5cac1..21a7ab73 100644 --- a/man/get_p_value.Rd +++ b/man/get_p_value.Rd @@ -19,7 +19,7 @@ extreme than this).} \code{"two_sided"}. Can also use \code{"left"}, \code{"right"}, or \code{"both"}.} } \value{ -A 1x1 data frame with value between 0 and 1. +A 1x1 \link[tibble:tibble]{tibble} with value between 0 and 1. } \description{ Simulation-based methods are (currently only) supported. diff --git a/tests/testthat/test-get_p_value.R b/tests/testthat/test-get_p_value.R index b1d78aec..832466a0 100644 --- a/tests/testthat/test-get_p_value.R +++ b/tests/testthat/test-get_p_value.R @@ -1,60 +1,36 @@ context("get_p_value") set.seed(2018) -test_df <- tibble::tibble(stat = rnorm(100)) +test_df <- tibble::tibble( + stat = sample(c( + -5, -4, -4, -4, -1, -0.5, rep(0, 6), 1, 1, 3.999, 4, 4, 4.001, 5, 5 + )) +) test_that("direction is appropriate", { expect_error(test_df %>% get_p_value(obs_stat = 0.5, direction = "righ")) }) -test_that("get_p_value makes sense", { - expect_silent( - test_df %>% - get_p_value(obs_stat = 0.7, direction = "right") - ) - expect_lt( - iris_calc %>% - get_p_value(obs_stat = 0.1, direction = "right") %>% - dplyr::pull(), - expected = 0.1 - ) - expect_gt( - iris_calc %>% - get_p_value(obs_stat = -0.1, direction = "greater") %>% - dplyr::pull(), - expected = 0.9 - ) +test_that("get_p_value works", { + expect_equal(get_p_value(test_df, 4, "right")[[1]][1], 5/20) + expect_equal(get_p_value(test_df, 4, "left")[[1]][1], 17/20) + expect_equal(get_p_value(test_df, 4, "both")[[1]][1], 10/20) + + expect_equal(get_p_value(test_df, 0, "right")[[1]][1], 14/20) + expect_equal(get_p_value(test_df, 0, "left")[[1]][1], 12/20) + # This is also a check for not returning value more than 1 + expect_equal(get_p_value(test_df, 0, "both")[[1]][1], 1) + + expect_equal(get_p_value(test_df, -3.999, "right")[[1]][1], 16/20) + expect_equal(get_p_value(test_df, -3.999, "left")[[1]][1], 4/20) + expect_equal(get_p_value(test_df, -3.999, "both")[[1]][1], 8/20) + expect_equal( - iris_calc %>% - get_p_value(obs_stat = median(iris_calc$stat), direction = "both") %>% - dplyr::pull(), - expected = 1 - ) - expect_lt( - iris_calc %>% - get_p_value(obs_stat = -0.2, direction = "left") %>% - dplyr::pull(), - expected = 0.02 - ) - expect_gt( - iris_calc %>% - get_p_value(obs_stat = -0.2, direction = "right") %>% - dplyr::pull(), - expected = 0.98 + get_p_value(test_df, 4, "greater"), get_p_value(test_df, 4, "right") ) + expect_equal(get_p_value(test_df, 4, "less"), get_p_value(test_df, 4, "left")) expect_equal( - iris_calc %>% - get_p_value( - obs_stat = median(iris_calc$stat) + 1, direction = "two_sided" - ) %>% - dplyr::pull(), - expected = 0 - ) - expect_error( - iris_calc %>% - get_p_value( - obs_stat = median(iris_calc$stat) + 1, direction = "wrong" - ) + get_p_value(test_df, 4, "two_sided"), get_p_value(test_df, 4, "both") ) }) @@ -69,5 +45,4 @@ test_that("theoretical p-value not supported error", { calculate(stat = "F") %>% get_p_value(obs_stat = obs_F, direction = "right") ) - }) From ba9e05a012a6d7aee3a12ce69163ddbd0979d8ee Mon Sep 17 00:00:00 2001 From: echasnovski Date: Sat, 27 Oct 2018 22:35:03 +0300 Subject: [PATCH 68/78] Mention breaking change of two-sided p-value method in 'NEWS.md'. --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index c101e33e..49bc9c27 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # infer 0.3.1.9000 +## Breaking changes + +- Changed method of computing two-sided p-value to a more conventional one. It also makes `get_pvalue()` and `visualize()` more aligned (#205). + ## Deprecation changes - Deprecated `p_value()` (use `get_p_value()` instead) (#180). From 9466153eae0b624e927163c288d6727ef2f1ceb9 Mon Sep 17 00:00:00 2001 From: echasnovski Date: Sat, 27 Oct 2018 22:45:44 +0300 Subject: [PATCH 69/78] Remove `get_percentile()`. --- R/visualize.R | 16 ++++++---------- man/visualize.Rd | 7 ++++--- tests/testthat/test-visualize.R | 4 ++-- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index 119cce3b..186448d7 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -538,16 +538,6 @@ check_shade_confidence_interval_args <- function(color, fill) { } } -get_percentile <- function(vector, observation) { - stats::ecdf(vector)(observation) -} - -mirror_obs_stat <- function(vector, observation) { - obs_percentile <- get_percentile(vector, observation) - - stats::quantile(vector, probs = 1 - obs_percentile) -} - short_theory_type <- function(x) { theory_attr <- attr(x, "theory_type") theory_types <- list( @@ -622,6 +612,12 @@ two_tail_data <- function(obs_stat, direction) { } } +mirror_obs_stat <- function(vector, observation) { + obs_percentile <- stats::ecdf(vector)(observation) + + stats::quantile(vector, probs = 1 - obs_percentile) +} + get_viz_method <- function(data) { attr(data, "viz_method") } diff --git a/man/visualize.Rd b/man/visualize.Rd index 56697ee4..5494cb3e 100755 --- a/man/visualize.Rd +++ b/man/visualize.Rd @@ -68,11 +68,12 @@ the theoretical distribution (or both!). \details{ In order to make visualization workflow more straightforward and explicit \code{visualize()} now only should be used to plot statistics directly. -That is why arguments, not related to this task, are deprecated and will be +That is why arguments not related to this task are deprecated and will be removed in a future release of \{infer\}. -To add information related to p-value use \code{\link[=shade_p_value]{shade_p_value()}}. To add -information related to confidence interval use \code{\link[=shade_confidence_interval]{shade_confidence_interval()}}. +To add to plot information related to p-value use \code{\link[=shade_p_value]{shade_p_value()}}. To add +to plot information related to confidence interval use +\code{\link[=shade_confidence_interval]{shade_confidence_interval()}}. } \examples{ # Permutations to create a simulation-based null distribution for diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 53222d9f..26c0e750 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -297,8 +297,8 @@ test_that("visualize basic tests", { ) }) -test_that("get_percentile works", { - expect_equal(get_percentile(1:10, 4), 0.4) +test_that("mirror_obs_stat works", { + expect_equal(mirror_obs_stat(1:10, 4), c(`60%` = 6.4)) }) test_that("obs_stat as a data.frame works", { From b71939affd046f063099f6d936d393aa4ca4d940 Mon Sep 17 00:00:00 2001 From: echasnovski Date: Sat, 27 Oct 2018 22:50:15 +0300 Subject: [PATCH 70/78] Fix missed formatting issues in `get_p_value.R`. --- R/get_p_value.R | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/R/get_p_value.R b/R/get_p_value.R index 7e8119e6..0081053d 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -42,7 +42,7 @@ NULL #' @export get_p_value <- function(x, obs_stat, direction) { check_type(x, is.data.frame) - if(!is_generated(x) & is_hypothesized(x)) { + if (!is_generated(x) & is_hypothesized(x)) { stop_glue( "Theoretical p-values are not yet supported.", "`x` should be the result of calling `generate()`.", @@ -52,11 +52,7 @@ get_p_value <- function(x, obs_stat, direction) { obs_stat <- check_obs_stat(obs_stat) check_direction(direction) - pvalue <- simulation_based_p_value( - x = x, - obs_stat = obs_stat, - direction = direction - ) + simulation_based_p_value(x = x, obs_stat = obs_stat, direction = direction) ## Theoretical-based p-value # Could be more specific @@ -71,8 +67,6 @@ get_p_value <- function(x, obs_stat, direction) { # obs_stat = obs_stat, # direction = direction) # } - - return(pvalue) } #' @rdname get_p_value From 7f45b2f750e85e0ad7b14aba7973ef248aea5075 Mon Sep 17 00:00:00 2001 From: Andrew Bray Date: Wed, 7 Nov 2018 17:05:21 -0800 Subject: [PATCH 71/78] switch over to publish at gh-pages --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index d37dc8cc..702ebfa0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ notifications: r_packages: - devtools - + r_github_packages: - hadley/pkgdown - tidymodels/infer @@ -33,8 +33,8 @@ deploy: keep-history: true local-dir: docs github_token: $GITHUBTRAVIS - target-branch: gh-pages-dev -# target-branch: gh-pages +# target-branch: gh-pages-dev + target-branch: gh-pages after_success: - Rscript -e 'covr::codecov()' From c7b29bd574b7275f9dce134fd07828850a967acc Mon Sep 17 00:00:00 2001 From: Andrew Bray Date: Wed, 7 Nov 2018 17:06:11 -0800 Subject: [PATCH 72/78] fix typo --- R/visualize.R | 120 +++++++++++++++++++++++++------------------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/R/visualize.R b/R/visualize.R index 186448d7..7095a5c0 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -37,11 +37,11 @@ #' explicit `visualize()` now only should be used to plot statistics directly. #' That is why arguments not related to this task are deprecated and will be #' removed in a future release of \\{infer\\}. -#' +#' #' To add to plot information related to p-value use [shade_p_value()]. To add #' to plot information related to confidence interval use #' [shade_confidence_interval()]. -#' +#' #' @return A ggplot object showing the simulation-based distribution as a #' histogram or bar graph. Also used to show the theoretical curves. #' @@ -97,15 +97,15 @@ visualize <- function(data, bins = 15, method = "simulation", data, bins, method, dens_color, obs_stat, obs_stat_color, pvalue_fill, direction, endpoints, endpoints_color, ci_fill ) - warn_depricated_args(obs_stat, endpoints) + warn_deprecated_args(obs_stat, endpoints) endpoints <- impute_endpoints(endpoints) obs_stat <- impute_obs_stat(obs_stat, direction, endpoints) - + # Add `method` to `data` attributes to enable later possibility of # complicated computation of p-value regions (in case `direction = "both"`) # in `shade_p_value()`. attr(data, "viz_method") <- method - + infer_plot <- ggplot(data) + simulation_layer(data, bins, ...) + theoretical_layer(data, dens_color, ...) + @@ -113,12 +113,12 @@ visualize <- function(data, bins = 15, method = "simulation", shade_p_value( obs_stat, direction, obs_stat_color, pvalue_fill, ... ) - + if (!is.null(direction) && (direction == "between")) { infer_plot <- infer_plot + shade_confidence_interval(endpoints, endpoints_color, ci_fill, ...) } - + infer_plot } @@ -147,14 +147,14 @@ check_visualize_args <- function(data, bins, method, dens_color, "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." ) } - + if (!(method %in% c("simulation", "theoretical", "both"))) { stop_glue( 'Provide `method` with one of three options: `"theoretical"`, `"both"`, ', 'or `"simulation"`. `"simulation"` is the default.' ) } - + if (method == "both") { if (!("stat" %in% names(data))) { stop_glue( @@ -162,7 +162,7 @@ check_visualize_args <- function(data, bins, method, dens_color, 'to `visualize(method = "both")`' ) } - + if ( ("replicate" %in% names(data)) && (length(unique(data$replicate)) < 100) ) { @@ -172,18 +172,18 @@ check_visualize_args <- function(data, bins, method, dens_color, ) } } - + if (!is.null(obs_stat) && !is.null(endpoints)) { warning_glue( "Values for both `endpoints` and `obs_stat` were given when only one ", "should be set. Ignoring `obs_stat` values." ) } - + TRUE } -warn_depricated_args <- function(obs_stat, endpoints) { +warn_deprecated_args <- function(obs_stat, endpoints) { if (!is.null(obs_stat)) { warning_glue( "`visualize()` shouldn't be used to plot p-value. Arguments `obs_stat`, ", @@ -191,7 +191,7 @@ warn_depricated_args <- function(obs_stat, endpoints) { "Use `shade_p_value()` instead." ) } - + if (!is.null(endpoints)) { warning_glue( "`visualize()` shouldn't be used to plot confidence interval. Arguments ", @@ -199,7 +199,7 @@ warn_depricated_args <- function(obs_stat, endpoints) { "Use `shade_confidence_interval()` instead." ) } - + TRUE } @@ -211,23 +211,23 @@ impute_endpoints <- function(endpoints) { ) endpoints <- endpoints[1:2] } - + if (is.data.frame(endpoints)) { if ((nrow(endpoints) != 1) || (ncol(endpoints) != 2)) { stop_glue( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector." ) } - + endpoints <- unlist(endpoints) } - + endpoints } impute_obs_stat <- function(obs_stat, direction, endpoints) { obs_stat <- check_obs_stat(obs_stat) - + if ( !is.null(direction) && (is.null(obs_stat) + is.null(endpoints) != 1) @@ -237,17 +237,17 @@ impute_obs_stat <- function(obs_stat, direction, endpoints) { "or the observed statistic `obs_stat` to be provided." ) } - + obs_stat } simulation_layer <- function(data, bins, ...) { method <- get_viz_method(data) - + if (method == "theoretical") { return(list()) } - + if (method == "simulation") { if (length(unique(data$stat)) >= 10) { res <- list( @@ -266,21 +266,21 @@ simulation_layer <- function(data, bins, ...) { ) ) } - + res } theoretical_layer <- function(data, dens_color, ...) { method <- get_viz_method(data) - + if (method == "simulation") { return(list()) } - + warn_theoretical_layer(data) - + theory_type <- short_theory_type(data) - + switch( theory_type, t = theory_curve( @@ -302,12 +302,12 @@ theoretical_layer <- function(data, dens_color, ...) { warn_theoretical_layer <- function(data) { method <- get_viz_method(data) - + warning_glue( "Check to make sure the conditions have been met for the theoretical ", "method. {{infer}} currently does not check these for you." ) - + if ( !is_nuat(data, "stat") && !(attr(data, "stat") %in% c("t", "z", "Chisq", "F")) @@ -329,7 +329,7 @@ warn_theoretical_layer <- function(data) { theory_curve <- function(method, d_fun, q_fun, args_list, dens_color) { if (method == "theoretical") { x_range <- do.call(q_fun, c(p = list(c(0.001, 0.999)), args_list)) - + res <- list( stat_function( data = data.frame(x = x_range), mapping = aes(x), @@ -344,24 +344,24 @@ theory_curve <- function(method, d_fun, q_fun, args_list, dens_color) { ) ) } - + res } title_labels_layer <- function(data) { method <- get_viz_method(data) theory_type <- short_theory_type(data) - + title_string <- switch( method, simulation = "Simulation-Based Null Distribution", theoretical = "Theoretical {theory_type} Null Distribution", both = "Simulation-Based and Theoretical {theory_type} Null Distributions" ) - + x_lab <- switch(method, simulation = "stat", "{theory_type} stat") y_lab <- switch(method, simulation = "count", "density") - + list( ggtitle(glue_null(title_string)), xlab(glue_null(x_lab)), @@ -370,11 +370,11 @@ title_labels_layer <- function(data) { } #' Add information about p-value region(s) -#' +#' #' `shade_p_value()` plots p-value region(s) on top of the [visualize()] output. #' It should be used as \\{ggplot2\\} layer function (see examples). #' `shade_pvalue()` is its alias. -#' +#' #' @param obs_stat A numeric value or 1x1 data frame corresponding to what the #' observed statistic is. #' @param direction A string specifying in which direction the shading should @@ -389,7 +389,7 @@ title_labels_layer <- function(data) { #' #' @return A list of \\{ggplot2\\} objects to be added to the `visualize()` #' output. -#' +#' #' @seealso [shade_confidence_interval()] to add information about confidence #' interval. #' @@ -401,11 +401,11 @@ title_labels_layer <- function(data) { #' generate(reps = 100, type = "permute") %>% #' calculate(stat = "t", order = c("1", "0")) %>% #' visualize(method = "both") -#' +#' #' viz_plot + shade_p_value(1.5, direction = "right") #' viz_plot + shade_p_value(1.5, direction = "both") #' viz_plot + shade_p_value(1.5, direction = NULL) -#' +#' #' @name shade_p_value NULL @@ -415,21 +415,21 @@ shade_p_value <- function(obs_stat, direction, color = "red2", fill = "pink", ...) { obs_stat <- check_obs_stat(obs_stat) check_shade_p_value_args(obs_stat, direction, color, fill) - + res <- list() if (is.null(obs_stat)) { return(res) } - + # Add shading if (!is.null(direction) && !is.null(fill)) { if (direction %in% c("less", "left", "greater", "right")) { tail_data <- one_tail_data(obs_stat, direction) - + res <- c(res, list(geom_tail(tail_data, fill, ...))) } else if (direction %in% c("two_sided", "both")) { tail_data <- two_tail_data(obs_stat, direction) - + res <- c(res, list(geom_tail(tail_data, fill, ...))) } else { warning_glue( @@ -438,7 +438,7 @@ shade_p_value <- function(obs_stat, direction, ) } } - + # Add vertical line at `obs_stat` c( res, list(geom_vline(xintercept = obs_stat, size = 2, color = color, ...)) @@ -458,16 +458,16 @@ check_shade_p_value_args <- function(obs_stat, direction, color, fill) { } check_type(color, is_color_string, "color string") check_type(fill, is_color_string, "color string") - + TRUE } #' Add information about confidence interval -#' +#' #' `shade_confidence_interval()` plots confidence interval region on top of the #' [visualize()] output. It should be used as \\{ggplot2\\} layer function (see #' examples). `shade_ci()` is its alias. -#' +#' #' @param endpoints A 2 element vector or a 1 x 2 data frame containing the #' lower and upper values to be plotted. Most useful for visualizing #' conference intervals. @@ -478,7 +478,7 @@ check_shade_p_value_args <- function(obs_stat, direction, color, fill) { #' @param ... Other arguments passed along to \\{ggplot2\\} functions. #' @return A list of \\{ggplot2\\} objects to be added to the `visualize()` #' output. -#' +#' #' @seealso [shade_p_value()] to add information about p-value region. #' #' @examples @@ -489,10 +489,10 @@ check_shade_p_value_args <- function(obs_stat, direction, color, fill) { #' generate(reps = 100, type = "permute") %>% #' calculate(stat = "t", order = c("1", "0")) %>% #' visualize(method = "both") -#' +#' #' viz_plot + shade_confidence_interval(c(-1.5, 1.5)) #' viz_plot + shade_confidence_interval(c(-1.5, 1.5), fill = NULL) -#' +#' #' @name shade_confidence_interval NULL @@ -502,12 +502,12 @@ shade_confidence_interval <- function(endpoints, color = "mediumaquamarine", fill = "turquoise", ...) { endpoints <- impute_endpoints(endpoints) check_shade_confidence_interval_args(color, fill) - + res <- list() if (is.null(endpoints)) { return(res) } - + if (!is.null(fill)) { res <- c( res, list( @@ -521,7 +521,7 @@ shade_confidence_interval <- function(endpoints, color = "mediumaquamarine", ) ) } - + c( res, list(geom_vline(xintercept = endpoints, size = 2, color = color, ...)) ) @@ -546,9 +546,9 @@ short_theory_type <- function(x) { z = c("One sample prop z", "Two sample props z"), `Chi-Square` = c("Chi-square test of indep", "Chi-square Goodness of Fit") ) - + is_type <- vapply(theory_types, function(x) {theory_attr %in% x}, logical(1)) - + names(theory_types)[which(is_type)[1]] } @@ -560,7 +560,7 @@ warn_right_tail_test <- function(direction, stat_name) { "Proceed with caution." ) } - + TRUE } @@ -581,7 +581,7 @@ one_tail_data <- function(obs_stat, direction) { # Needed to warn about incorrect usage of right tail tests. function(data) { warn_right_tail_test(direction, short_theory_type(data)) - + if (direction %in% c("less", "left")) { data.frame(x_min = -Inf, x_max = obs_stat) } else if (direction %in% c("greater", "right")) { @@ -598,13 +598,13 @@ two_tail_data <- function(obs_stat, direction) { # Also needed to warn about incorrect usage of right tail tests. function(data) { warn_right_tail_test(direction, short_theory_type(data)) - + if (get_viz_method(data) == "theoretical") { second_border <- -obs_stat } else { second_border <- mirror_obs_stat(data$stat, obs_stat) } - + data.frame( x_min = c(-Inf, max(obs_stat, second_border)), x_max = c(min(obs_stat, second_border), Inf) @@ -614,7 +614,7 @@ two_tail_data <- function(obs_stat, direction) { mirror_obs_stat <- function(vector, observation) { obs_percentile <- stats::ecdf(vector)(observation) - + stats::quantile(vector, probs = 1 - obs_percentile) } From 2c542bce91570f3369b4f422bde155625c9cd7c4 Mon Sep 17 00:00:00 2001 From: Andrew Bray Date: Sun, 11 Nov 2018 16:06:28 -0800 Subject: [PATCH 73/78] bump version for cran release --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 49bc9c27..f3141d62 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# infer 0.3.1.9000 +# infer 0.4.0 ## Breaking changes From ffa63d40f73173626ccaecd40e26eb6b25f0f6e3 Mon Sep 17 00:00:00 2001 From: Chester Ismay Date: Sun, 11 Nov 2018 16:12:47 -0800 Subject: [PATCH 74/78] Bump to 0.4.0 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7e036e33..12bdc562 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: infer Type: Package Title: Tidy Statistical Inference -Version: 0.3.1.9000 +Version: 0.4.0 Authors@R: c( person("Andrew", "Bray", email = "abray@reed.edu", role = c("aut", "cre")), person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"), From 93ded4d59249b439e343ee5f370c32cb8477ecf2 Mon Sep 17 00:00:00 2001 From: echasnovski Date: Tue, 13 Nov 2018 22:16:36 +0200 Subject: [PATCH 75/78] Exclude possible vector conditions in `bootstrap()` (related to #210). --- R/generate.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/generate.R b/R/generate.R index 5ee1e324..400f5b0b 100755 --- a/R/generate.R +++ b/R/generate.R @@ -99,7 +99,7 @@ bootstrap <- function(x, reps = 1, ...) { # If so, shift the variable chosen to have a mean corresponding # to that specified in `hypothesize` if (!is.null(attr(attr(x, "params"), "names"))){ - if (attr(attr(x, "params"), "names") == "mu") { + if (identical(attr(attr(x, "params"), "names"), "mu")) { col <- as.character(attr(x, "response")) # if (attr(x, "theory_type") != "One sample t") { x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params") @@ -117,7 +117,7 @@ bootstrap <- function(x, reps = 1, ...) { } # Similarly for median - else if (attr(attr(x, "params"), "names") == "med") { + else if (identical(attr(attr(x, "params"), "names"), "med")) { col <- as.character(attr(x, "response")) x[[col]] <- x[[col]] - stats::median(x[[col]], na.rm = TRUE) + attr(x, "params") @@ -128,7 +128,7 @@ bootstrap <- function(x, reps = 1, ...) { # Similarly for sd ## Temporarily removed since this implementation does not scale correctly - # else if (attr(attr(x, "params"), "names") == "sigma") { + # else if (identical(attr(attr(x, "params"), "names"), "sigma")) { # col <- as.character(attr(x, "response")) # x[[col]] <- x[[col]] - # stats::sd(x[[col]], na.rm = TRUE) + attr(x, "params") From c60e759012ad230e63c54d016ac093e2a8e85284 Mon Sep 17 00:00:00 2001 From: Andrew Bray Date: Tue, 13 Nov 2018 15:03:34 -0800 Subject: [PATCH 76/78] remove inst dir --- inst/doc/chisq_test.R | 71 -- inst/doc/chisq_test.Rmd | 135 --- inst/doc/chisq_test.html | 250 ----- inst/doc/flights_examples.R | 272 ----- inst/doc/flights_examples.Rmd | 358 ------- inst/doc/flights_examples.html | 461 --------- inst/doc/mtcars_examples.R | 140 --- inst/doc/mtcars_examples.Rmd | 223 ----- inst/doc/mtcars_examples.html | 468 --------- inst/doc/observed_stat_examples.R | 485 --------- inst/doc/observed_stat_examples.Rmd | 692 ------------- inst/doc/observed_stat_examples.html | 1390 -------------------------- inst/doc/two_sample_t.R | 74 -- inst/doc/two_sample_t.Rmd | 138 --- inst/doc/two_sample_t.html | 239 ----- 15 files changed, 5396 deletions(-) delete mode 100644 inst/doc/chisq_test.R delete mode 100644 inst/doc/chisq_test.Rmd delete mode 100644 inst/doc/chisq_test.html delete mode 100644 inst/doc/flights_examples.R delete mode 100644 inst/doc/flights_examples.Rmd delete mode 100644 inst/doc/flights_examples.html delete mode 100644 inst/doc/mtcars_examples.R delete mode 100644 inst/doc/mtcars_examples.Rmd delete mode 100644 inst/doc/mtcars_examples.html delete mode 100644 inst/doc/observed_stat_examples.R delete mode 100644 inst/doc/observed_stat_examples.Rmd delete mode 100644 inst/doc/observed_stat_examples.html delete mode 100644 inst/doc/two_sample_t.R delete mode 100755 inst/doc/two_sample_t.Rmd delete mode 100644 inst/doc/two_sample_t.html diff --git a/inst/doc/chisq_test.R b/inst/doc/chisq_test.R deleted file mode 100644 index fc1d4f24..00000000 --- a/inst/doc/chisq_test.R +++ /dev/null @@ -1,71 +0,0 @@ -## ----include=FALSE------------------------------------------------------- -knitr::opts_chunk$set(fig.width = 8, fig.height = 3) - -## ----message=FALSE, warning=FALSE---------------------------------------- -library(nycflights13) -library(dplyr) -library(ggplot2) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - na.omit() %>% - sample_n(size = 500) %>% - mutate(season = case_when( - month %in% c(10:12, 1:3) ~ "winter", - month %in% c(4:9) ~ "summer" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, season, - day_hour, origin, carrier) - -## ------------------------------------------------------------------------ -obs_chisq <- fli_small %>% - specify(origin ~ season) %>% # alt: response = origin, explanatory = season - calculate(stat = "Chisq") - -## ------------------------------------------------------------------------ -obs_chisq <- fli_small %>% - chisq_test(formula = origin ~ season) %>% - dplyr::select(statistic) - -## ------------------------------------------------------------------------ -obs_chisq <- fli_small %>% - chisq_stat(formula = origin ~ season) - -## ------------------------------------------------------------------------ -chisq_null_perm <- fli_small %>% - specify(origin ~ season) %>% # alt: response = origin, explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") - -visualize(chisq_null_perm) + - shade_p_value(obs_stat = obs_chisq, direction = "greater") - -## ------------------------------------------------------------------------ -chisq_null_perm %>% - get_p_value(obs_stat = obs_chisq, direction = "greater") - -## ------------------------------------------------------------------------ -chisq_null_theor <- fli_small %>% - specify(origin ~ season) %>% - hypothesize(null = "independence") %>% - # generate() ## Not used for theoretical - calculate(stat = "Chisq") - -visualize(chisq_null_theor, method = "theoretical") + - shade_p_value(obs_stat = obs_chisq, direction = "right") - -## ------------------------------------------------------------------------ -visualize(chisq_null_perm, method = "both") + - shade_p_value(obs_stat = obs_chisq, direction = "right") - -## ------------------------------------------------------------------------ -fli_small %>% - chisq_test(formula = origin ~ season) %>% - dplyr::pull(p_value) - diff --git a/inst/doc/chisq_test.Rmd b/inst/doc/chisq_test.Rmd deleted file mode 100644 index 2693b8c3..00000000 --- a/inst/doc/chisq_test.Rmd +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: "Chi-squared test example using `nycflights13` `flights` data" -author: "Chester Ismay" -date: "`r Sys.Date()`" -output: - rmarkdown::html_vignette: - df_print: kable -vignette: | - %\VignetteIndexEntry{Chi-squared test flights example} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r include=FALSE} -knitr::opts_chunk$set(fig.width = 8, fig.height = 3) -``` - -**Note**: The `type` argument in `generate()` is automatically filled based on the entries for `specify()` and -`hypothesize()`. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed. - -## Data preparation - -```{r message=FALSE, warning=FALSE} -library(nycflights13) -library(dplyr) -library(ggplot2) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - na.omit() %>% - sample_n(size = 500) %>% - mutate(season = case_when( - month %in% c(10:12, 1:3) ~ "winter", - month %in% c(4:9) ~ "summer" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, season, - day_hour, origin, carrier) -``` - -* Two numeric - `arr_delay`, `dep_delay` -* Two categories - - `season` (`"winter"`, `"summer"`), - - `day_hour` (`"morning"`, `"not morning"`) -* Three categories - `origin` (`"EWR"`, `"JFK"`, `"LGA"`) -* Sixteen categories - `carrier` - -*** - -# One numerical variable, one categorical (2 levels) - -## Calculate observed statistic - -The recommended approach is to use `specify() %>% calculate()`: - -```{r} -obs_chisq <- fli_small %>% - specify(origin ~ season) %>% # alt: response = origin, explanatory = season - calculate(stat = "Chisq") -``` - -The observed $\chi^2$ statistic is `r obs_chisq`. - -Or using `chisq_test` in `infer` - -```{r} -obs_chisq <- fli_small %>% - chisq_test(formula = origin ~ season) %>% - dplyr::select(statistic) -``` - -Again, the observed $\chi^2$ statistic is `r obs_chisq`. - -Or using another shortcut function in `infer`: - -```{r} -obs_chisq <- fli_small %>% - chisq_stat(formula = origin ~ season) -``` - -Lastly, the observed $\chi^2$ statistic is `r obs_chisq`. - -## Randomization approach to $\chi^2$-statistic - -```{r} -chisq_null_perm <- fli_small %>% - specify(origin ~ season) %>% # alt: response = origin, explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") - -visualize(chisq_null_perm) + - shade_p_value(obs_stat = obs_chisq, direction = "greater") -``` - -## Calculate the randomization-based $p$-value - -```{r} -chisq_null_perm %>% - get_p_value(obs_stat = obs_chisq, direction = "greater") -``` - - -## Theoretical distribution - -```{r } -chisq_null_theor <- fli_small %>% - specify(origin ~ season) %>% - hypothesize(null = "independence") %>% - # generate() ## Not used for theoretical - calculate(stat = "Chisq") - -visualize(chisq_null_theor, method = "theoretical") + - shade_p_value(obs_stat = obs_chisq, direction = "right") -``` - -## Overlay appropriate $\chi^2$ distribution on top of permuted statistics - -```{r} -visualize(chisq_null_perm, method = "both") + - shade_p_value(obs_stat = obs_chisq, direction = "right") -``` - -## Compute theoretical p-value - -```{r} -fli_small %>% - chisq_test(formula = origin ~ season) %>% - dplyr::pull(p_value) -``` - diff --git a/inst/doc/chisq_test.html b/inst/doc/chisq_test.html deleted file mode 100644 index b6f3c74b..00000000 --- a/inst/doc/chisq_test.html +++ /dev/null @@ -1,250 +0,0 @@ - - - - - - - - - - - - - - - - -Chi-squared test example using nycflights13 flights data - - - - - - - - - - - - - - - - - -

Chi-squared test example using nycflights13 flights data

-

Chester Ismay

-

2018-09-24

- - - -

Note: The type argument in generate() is automatically filled based on the entries for specify() and hypothesize(). It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.

-
-

Data preparation

-
library(nycflights13)
-library(dplyr)
-library(ggplot2)
-library(stringr)
-library(infer)
-set.seed(2017)
-fli_small <- flights %>% 
-  na.omit() %>% 
-  sample_n(size = 500) %>% 
-  mutate(season = case_when(
-    month %in% c(10:12, 1:3) ~ "winter",
-    month %in% c(4:9) ~ "summer"
-  )) %>% 
-  mutate(day_hour = case_when(
-    between(hour, 1, 12) ~ "morning",
-    between(hour, 13, 24) ~ "not morning"
-  )) %>% 
-  select(arr_delay, dep_delay, season, 
-         day_hour, origin, carrier)
-
    -
  • Two numeric - arr_delay, dep_delay
  • -
  • Two categories -
      -
    • season ("winter", "summer"),
    • -
    • day_hour ("morning", "not morning")
    • -
  • -
  • Three categories - origin ("EWR", "JFK", "LGA")
  • -
  • Sixteen categories - carrier
  • -
-
-
-
-

One numerical variable, one categorical (2 levels)

-
-

Calculate observed statistic

-

The recommended approach is to use specify() %>% calculate():

-
obs_chisq <- fli_small %>%
-  specify(origin ~ season) %>% # alt: response = origin, explanatory = season
-  calculate(stat = "Chisq")
-The observed \(\chi^2\) statistic is -
- - - - - - - - - - - -
stat
0.5719
-
-

.

-

Or using chisq_test in infer

-
obs_chisq <- fli_small %>% 
-  chisq_test(formula = origin ~ season) %>% 
-  dplyr::select(statistic)
-Again, the observed \(\chi^2\) statistic is -
- - - - - - - - - - - -
statistic
0.5719
-
-

.

-

Or using another shortcut function in infer:

-
obs_chisq <- fli_small %>% 
-  chisq_stat(formula = origin ~ season)
-Lastly, the observed \(\chi^2\) statistic is -
- - - - - - - - - - - -
stat
0.5719
-
-

.

-
-
-

Randomization approach to \(\chi^2\)-statistic

-
chisq_null_perm <- fli_small %>%
-  specify(origin ~ season) %>% # alt: response = origin, explanatory = season
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "Chisq")
-
-visualize(chisq_null_perm) +
-  shade_p_value(obs_stat = obs_chisq, direction = "greater")
-

-
-
-

Calculate the randomization-based \(p\)-value

-
chisq_null_perm %>% 
-  get_p_value(obs_stat = obs_chisq, direction = "greater")
-
- - - - - - - - - - - -
p_value
0.748
-
-
-
-

Theoretical distribution

-
chisq_null_theor <- fli_small %>%
-  specify(origin ~ season) %>% 
-  hypothesize(null = "independence") %>%
-  # generate() ## Not used for theoretical
-  calculate(stat = "Chisq")
-
-visualize(chisq_null_theor, method = "theoretical") +
-  shade_p_value(obs_stat = obs_chisq, direction = "right")
-
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. {infer} currently does not check these for you.
-

-
-
-

Overlay appropriate \(\chi^2\) distribution on top of permuted statistics

-
visualize(chisq_null_perm, method = "both") +
-  shade_p_value(obs_stat = obs_chisq, direction = "right")
-
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. {infer} currently does not check these for you.
-

-
-
-

Compute theoretical p-value

-
fli_small %>% 
-  chisq_test(formula = origin ~ season) %>% 
-  dplyr::pull(p_value)
-
## [1] 0.7513
-
-
- - - - - - - - diff --git a/inst/doc/flights_examples.R b/inst/doc/flights_examples.R deleted file mode 100644 index 0d8022e1..00000000 --- a/inst/doc/flights_examples.R +++ /dev/null @@ -1,272 +0,0 @@ -## ----include=FALSE------------------------------------------------------- -knitr::opts_chunk$set(fig.width = 8, fig.height = 5) - -## ----message=FALSE, warning=FALSE---------------------------------------- -library(nycflights13) -library(dplyr) -library(ggplot2) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - na.omit() %>% - sample_n(size = 500) %>% - mutate(season = case_when( - month %in% c(10:12, 1:3) ~ "winter", - month %in% c(4:9) ~ "summer" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, season, - day_hour, origin, carrier) - -## ------------------------------------------------------------------------ -x_bar <- fli_small %>% - summarize(mean(dep_delay)) %>% - pull() - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", mu = 10) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "mean") -ggplot(data = null_distn, mapping = aes(x = stat)) + - geom_density() + - geom_vline(xintercept = x_bar, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= x_bar) * 2) - -## ------------------------------------------------------------------------ -x_tilde <- fli_small %>% - summarize(median(dep_delay)) %>% - pull() -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", med = -1) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "median") -ggplot(null_distn, aes(x = stat)) + - geom_bar() + - geom_vline(xintercept = x_tilde, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= x_tilde) * 2) - -## ------------------------------------------------------------------------ -p_hat <- fli_small %>% - summarize(mean(day_hour == "morning")) %>% - pull() -null_distn <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "prop") -ggplot(null_distn, aes(x = stat)) + - geom_bar() + - geom_vline(xintercept = p_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= p_hat) * 2) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - mutate(day_hour_logical = (day_hour == "morning")) %>% - specify(response = day_hour_logical, success = "TRUE") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "prop") - -## ------------------------------------------------------------------------ -d_hat <- fli_small %>% - group_by(season) %>% - summarize(prop = mean(day_hour == "morning")) %>% - summarize(diff(prop)) %>% - pull() -null_distn <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in props", order = c("winter", "summer")) -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = d_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= d_hat) * 2) %>% - pull() - -## ------------------------------------------------------------------------ -Chisq_hat <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - calculate(stat = "Chisq") -null_distn <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "Chisq") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = pull(Chisq_hat), color = "red") -null_distn %>% - summarize(p_value = mean(stat >= pull(Chisq_hat))) %>% - pull() - -## ------------------------------------------------------------------------ -Chisq_hat <- fli_small %>% - chisq_stat(formula = day_hour ~ origin) -null_distn <- fli_small %>% - specify(day_hour ~ origin, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = pull(Chisq_hat), color = "red") -null_distn %>% - summarize(p_value = mean(stat >= pull(Chisq_hat))) %>% - pull() - -## ------------------------------------------------------------------------ -d_hat <- fli_small %>% - group_by(season) %>% - summarize(mean_stat = mean(dep_delay)) %>% - # Since summer - winter - summarize(-diff(mean_stat)) %>% - pull() -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% # alt: response = dep_delay, - # explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = d_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= d_hat) * 2) %>% - pull() - -## ------------------------------------------------------------------------ -d_hat <- fli_small %>% - group_by(season) %>% - summarize(median_stat = median(dep_delay)) %>% - # Since summer - winter - summarize(-diff(median_stat)) %>% - pull() -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% # alt: response = dep_delay, - # explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in medians", order = c("summer", "winter")) -ggplot(null_distn, aes(x = stat)) + - geom_bar() + - geom_vline(xintercept = d_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= d_hat) * 2) %>% - pull() - -## ------------------------------------------------------------------------ -F_hat <- anova( - aov(formula = arr_delay ~ origin, data = fli_small) - )$`F value`[1] -null_distn <- fli_small %>% - specify(arr_delay ~ origin) %>% # alt: response = arr_delay, - # explanatory = origin - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "F") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = F_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= F_hat)) %>% - pull() - -## ------------------------------------------------------------------------ -slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% - broom::tidy() %>% - filter(term == "dep_delay") %>% - pull(estimate) -null_distn <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "slope") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = slope_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= slope_hat) * 2) %>% - pull() - -## ------------------------------------------------------------------------ -x_bar <- fli_small %>% - summarize(mean(arr_delay)) %>% - pull() -boot <- fli_small %>% - specify(response = arr_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "mean") %>% - pull() -c(lower = x_bar - 2 * sd(boot), - upper = x_bar + 2 * sd(boot)) - -## ------------------------------------------------------------------------ -p_hat <- fli_small %>% - summarize(mean(day_hour == "morning")) %>% - pull() -boot <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "prop") %>% - pull() -c(lower = p_hat - 2 * sd(boot), - upper = p_hat + 2 * sd(boot)) - -## ------------------------------------------------------------------------ -d_hat <- fli_small %>% - group_by(season) %>% - summarize(mean_stat = mean(arr_delay)) %>% - # Since summer - winter - summarize(-diff(mean_stat)) %>% - pull() -boot <- fli_small %>% - specify(arr_delay ~ season) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) %>% - pull() -c(lower = d_hat - 2 * sd(boot), - upper = d_hat + 2 * sd(boot)) - -## ------------------------------------------------------------------------ -d_hat <- fli_small %>% - group_by(season) %>% - summarize(prop = mean(day_hour == "morning")) %>% - # Since summer - winter - summarize(-diff(prop)) %>% - pull() -boot <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in props", order = c("summer", "winter")) %>% - pull() -c(lower = d_hat - 2 * sd(boot), - upper = d_hat + 2 * sd(boot)) - -## ------------------------------------------------------------------------ -slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% - broom::tidy() %>% - filter(term == "dep_delay") %>% - pull(estimate) -boot <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "slope") %>% - pull() -c(lower = slope_hat - 2 * sd(boot), - upper = slope_hat + 2 * sd(boot)) - diff --git a/inst/doc/flights_examples.Rmd b/inst/doc/flights_examples.Rmd deleted file mode 100644 index 059d2da2..00000000 --- a/inst/doc/flights_examples.Rmd +++ /dev/null @@ -1,358 +0,0 @@ ---- -title: "Randomization Examples using `nycflights13` `flights` data" -author: "Chester Ismay and Andrew Bray" -date: "2018-01-05" -output: - rmarkdown::html_vignette: - df_print: kable -vignette: | - %\VignetteIndexEntry{flights example} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console ---- - -**Note**: The `type` argument in `generate()` is automatically filled based on the entries for `specify()` and -`hypothesize()`. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed. - -```{r include=FALSE} -knitr::opts_chunk$set(fig.width = 8, fig.height = 5) -``` - -This vignette is designed to show how to use the {infer} package with {dplyr} syntax. It does not show how to calculate observed statistics or p-values using the {infer} package. To see examples of these, check out the "Computation of observed statistics..." vignette instead. - -## Data preparation - -```{r message=FALSE, warning=FALSE} -library(nycflights13) -library(dplyr) -library(ggplot2) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - na.omit() %>% - sample_n(size = 500) %>% - mutate(season = case_when( - month %in% c(10:12, 1:3) ~ "winter", - month %in% c(4:9) ~ "summer" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, season, - day_hour, origin, carrier) -``` - -* Two numeric - `arr_delay`, `dep_delay` -* Two categories - - `season` (`"winter"`, `"summer"`), - - `day_hour` (`"morning"`, `"not morning"`) -* Three categories - `origin` (`"EWR"`, `"JFK"`, `"LGA"`) -* Sixteen categories - `carrier` - -*** - -# Hypothesis tests - -### One numerical variable (mean) - -```{r} -x_bar <- fli_small %>% - summarize(mean(dep_delay)) %>% - pull() -``` - -```{r} -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", mu = 10) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "mean") -ggplot(data = null_distn, mapping = aes(x = stat)) + - geom_density() + - geom_vline(xintercept = x_bar, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= x_bar) * 2) -``` - -### One numerical variable (median) - -```{r} -x_tilde <- fli_small %>% - summarize(median(dep_delay)) %>% - pull() -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", med = -1) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "median") -ggplot(null_distn, aes(x = stat)) + - geom_bar() + - geom_vline(xintercept = x_tilde, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= x_tilde) * 2) -``` - -### One categorical (one proportion) - -```{r} -p_hat <- fli_small %>% - summarize(mean(day_hour == "morning")) %>% - pull() -null_distn <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "prop") -ggplot(null_distn, aes(x = stat)) + - geom_bar() + - geom_vline(xintercept = p_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= p_hat) * 2) -``` - -Logical variables will be coerced to factors: - -```{r} -null_distn <- fli_small %>% - mutate(day_hour_logical = (day_hour == "morning")) %>% - specify(response = day_hour_logical, success = "TRUE") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "prop") -``` - - -### Two categorical (2 level) variables - -```{r} -d_hat <- fli_small %>% - group_by(season) %>% - summarize(prop = mean(day_hour == "morning")) %>% - summarize(diff(prop)) %>% - pull() -null_distn <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in props", order = c("winter", "summer")) -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = d_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= d_hat) * 2) %>% - pull() -``` - -### One categorical (>2 level) - GoF - -```{r} -Chisq_hat <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - calculate(stat = "Chisq") -null_distn <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "Chisq") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = pull(Chisq_hat), color = "red") -null_distn %>% - summarize(p_value = mean(stat >= pull(Chisq_hat))) %>% - pull() -``` - -### Two categorical (>2 level) variables - -```{r} -Chisq_hat <- fli_small %>% - chisq_stat(formula = day_hour ~ origin) -null_distn <- fli_small %>% - specify(day_hour ~ origin, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = pull(Chisq_hat), color = "red") -null_distn %>% - summarize(p_value = mean(stat >= pull(Chisq_hat))) %>% - pull() -``` - -### One numerical variable, one categorical (2 levels) (diff in means) - -```{r} -d_hat <- fli_small %>% - group_by(season) %>% - summarize(mean_stat = mean(dep_delay)) %>% - # Since summer - winter - summarize(-diff(mean_stat)) %>% - pull() -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% # alt: response = dep_delay, - # explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = d_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat <= d_hat) * 2) %>% - pull() -``` - -### One numerical variable, one categorical (2 levels) (diff in medians) - -```{r} -d_hat <- fli_small %>% - group_by(season) %>% - summarize(median_stat = median(dep_delay)) %>% - # Since summer - winter - summarize(-diff(median_stat)) %>% - pull() -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% # alt: response = dep_delay, - # explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in medians", order = c("summer", "winter")) -ggplot(null_distn, aes(x = stat)) + - geom_bar() + - geom_vline(xintercept = d_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= d_hat) * 2) %>% - pull() -``` - -### One numerical, one categorical (>2 levels) - ANOVA - -```{r} -F_hat <- anova( - aov(formula = arr_delay ~ origin, data = fli_small) - )$`F value`[1] -null_distn <- fli_small %>% - specify(arr_delay ~ origin) %>% # alt: response = arr_delay, - # explanatory = origin - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "F") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = F_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= F_hat)) %>% - pull() -``` - -### Two numerical vars - SLR - -```{r} -slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% - broom::tidy() %>% - filter(term == "dep_delay") %>% - pull(estimate) -null_distn <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "slope") -ggplot(null_distn, aes(x = stat)) + - geom_density() + - geom_vline(xintercept = slope_hat, color = "red") -null_distn %>% - summarize(p_value = mean(stat >= slope_hat) * 2) %>% - pull() -``` - -## Confidence intervals - -### One numerical (one mean) - -```{r} -x_bar <- fli_small %>% - summarize(mean(arr_delay)) %>% - pull() -boot <- fli_small %>% - specify(response = arr_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "mean") %>% - pull() -c(lower = x_bar - 2 * sd(boot), - upper = x_bar + 2 * sd(boot)) -``` - -### One categorical (one proportion) - -```{r} -p_hat <- fli_small %>% - summarize(mean(day_hour == "morning")) %>% - pull() -boot <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "prop") %>% - pull() -c(lower = p_hat - 2 * sd(boot), - upper = p_hat + 2 * sd(boot)) -``` - -### One numerical variable, one categorical (2 levels) (diff in means) - -```{r} -d_hat <- fli_small %>% - group_by(season) %>% - summarize(mean_stat = mean(arr_delay)) %>% - # Since summer - winter - summarize(-diff(mean_stat)) %>% - pull() -boot <- fli_small %>% - specify(arr_delay ~ season) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) %>% - pull() -c(lower = d_hat - 2 * sd(boot), - upper = d_hat + 2 * sd(boot)) -``` - -### Two categorical variables (diff in proportions) - -```{r} -d_hat <- fli_small %>% - group_by(season) %>% - summarize(prop = mean(day_hour == "morning")) %>% - # Since summer - winter - summarize(-diff(prop)) %>% - pull() -boot <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in props", order = c("summer", "winter")) %>% - pull() -c(lower = d_hat - 2 * sd(boot), - upper = d_hat + 2 * sd(boot)) -``` - -### Two numerical vars - SLR - -```{r} -slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% - broom::tidy() %>% - filter(term == "dep_delay") %>% - pull(estimate) -boot <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "slope") %>% - pull() -c(lower = slope_hat - 2 * sd(boot), - upper = slope_hat + 2 * sd(boot)) -``` diff --git a/inst/doc/flights_examples.html b/inst/doc/flights_examples.html deleted file mode 100644 index 084b8af7..00000000 --- a/inst/doc/flights_examples.html +++ /dev/null @@ -1,461 +0,0 @@ - - - - - - - - - - - - - - - - -Randomization Examples using nycflights13 flights data - - - - - - - - - - - - - - - - - -

Randomization Examples using nycflights13 flights data

-

Chester Ismay and Andrew Bray

-

2018-01-05

- - - -

Note: The type argument in generate() is automatically filled based on the entries for specify() and hypothesize(). It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.

-

This vignette is designed to show how to use the {infer} package with {dplyr} syntax. It does not show how to calculate observed statistics or p-values using the {infer} package. To see examples of these, check out the “Computation of observed statistics…” vignette instead.

-
-

Data preparation

-
library(nycflights13)
-library(dplyr)
-library(ggplot2)
-library(stringr)
-library(infer)
-set.seed(2017)
-fli_small <- flights %>% 
-  na.omit() %>%
-  sample_n(size = 500) %>% 
-  mutate(season = case_when(
-    month %in% c(10:12, 1:3) ~ "winter",
-    month %in% c(4:9) ~ "summer"
-  )) %>% 
-  mutate(day_hour = case_when(
-    between(hour, 1, 12) ~ "morning",
-    between(hour, 13, 24) ~ "not morning"
-  )) %>% 
-  select(arr_delay, dep_delay, season, 
-         day_hour, origin, carrier)
-
    -
  • Two numeric - arr_delay, dep_delay
  • -
  • Two categories -
      -
    • season ("winter", "summer"),
    • -
    • day_hour ("morning", "not morning")
    • -
  • -
  • Three categories - origin ("EWR", "JFK", "LGA")
  • -
  • Sixteen categories - carrier
  • -
-
-
-
-

Hypothesis tests

-
-

One numerical variable (mean)

-
x_bar <- fli_small %>%
-  summarize(mean(dep_delay)) %>%
-  pull()
-
null_distn <- fli_small %>%
-  specify(response = dep_delay) %>%
-  hypothesize(null = "point", mu = 10) %>%
-  generate(reps = 1000, type = "bootstrap") %>%
-  calculate(stat = "mean")
-ggplot(data = null_distn, mapping = aes(x = stat)) +
-  geom_density() +
-  geom_vline(xintercept = x_bar, color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat >= x_bar) * 2)
-
- - - - - - - - - - - -
p_value
0.794
-
-
-
-

One numerical variable (median)

-
x_tilde <- fli_small %>%
-  summarize(median(dep_delay)) %>%
-  pull()
-null_distn <- fli_small %>%
-  specify(response = dep_delay) %>%
-  hypothesize(null = "point", med = -1) %>% 
-  generate(reps = 1000, type = "bootstrap") %>% 
-  calculate(stat = "median")
-ggplot(null_distn, aes(x = stat)) +
-  geom_bar() +
-  geom_vline(xintercept = x_tilde, color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat <= x_tilde) * 2)
-
- - - - - - - - - - - -
p_value
0.178
-
-
-
-

One categorical (one proportion)

-
p_hat <- fli_small %>%
-  summarize(mean(day_hour == "morning")) %>%
-  pull()
-null_distn <- fli_small %>%
-  specify(response = day_hour, success = "morning") %>%
-  hypothesize(null = "point", p = .5) %>%
-  generate(reps = 1000, type = "simulate") %>%
-  calculate(stat = "prop")
-ggplot(null_distn, aes(x = stat)) +
-  geom_bar() +
-  geom_vline(xintercept = p_hat, color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat <= p_hat) * 2)
-
- - - - - - - - - - - -
p_value
0.132
-
-

Logical variables will be coerced to factors:

-
null_distn <- fli_small %>%
-  mutate(day_hour_logical = (day_hour == "morning")) %>%
-  specify(response = day_hour_logical, success = "TRUE") %>%
-  hypothesize(null = "point", p = .5) %>%
-  generate(reps = 1000, type = "simulate") %>%
-  calculate(stat = "prop")
-
-
-

Two categorical (2 level) variables

-
d_hat <- fli_small %>%
-  group_by(season) %>%
-  summarize(prop = mean(day_hour == "morning")) %>%
-  summarize(diff(prop)) %>%
-  pull()
-null_distn <- fli_small %>%
-  specify(day_hour ~ season, success = "morning") %>%
-  hypothesize(null = "independence") %>% 
-  generate(reps = 1000, type = "permute") %>% 
-  calculate(stat = "diff in props", order = c("winter", "summer"))
-ggplot(null_distn, aes(x = stat)) +
-  geom_density() +
-  geom_vline(xintercept = d_hat, color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat <= d_hat) * 2) %>%
-  pull()
-
## [1] 0.758
-
-
-

One categorical (>2 level) - GoF

-
Chisq_hat <- fli_small %>%
-  specify(response = origin) %>%
-  hypothesize(null = "point", 
-              p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% 
-  calculate(stat = "Chisq")
-null_distn <- fli_small %>%
-  specify(response = origin) %>%
-  hypothesize(null = "point", 
-              p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% 
-  generate(reps = 1000, type = "simulate") %>% 
-  calculate(stat = "Chisq")
-ggplot(null_distn, aes(x = stat)) +
-  geom_density() +
-  geom_vline(xintercept = pull(Chisq_hat), color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat >= pull(Chisq_hat))) %>%
-  pull()
-
## [1] 0.002
-
-
-

Two categorical (>2 level) variables

-
Chisq_hat <- fli_small %>%
-  chisq_stat(formula = day_hour ~ origin)
-null_distn <- fli_small %>%
-  specify(day_hour ~ origin, success = "morning") %>%
-  hypothesize(null = "independence") %>% 
-  generate(reps = 1000, type = "permute") %>% 
-  calculate(stat = "Chisq")
-ggplot(null_distn, aes(x = stat)) +
-  geom_density() +
-  geom_vline(xintercept = pull(Chisq_hat), color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat >= pull(Chisq_hat))) %>%
-  pull()
-
## [1] 0.017
-
-
-

One numerical variable, one categorical (2 levels) (diff in means)

-
d_hat <- fli_small %>% 
-  group_by(season) %>% 
-  summarize(mean_stat = mean(dep_delay)) %>% 
-  # Since summer - winter
-  summarize(-diff(mean_stat)) %>% 
-  pull()
-null_distn <- fli_small %>%
-  specify(dep_delay ~ season) %>% # alt: response = dep_delay, 
-  # explanatory = season
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "diff in means", order = c("summer", "winter"))
-ggplot(null_distn, aes(x = stat)) +
-  geom_density() +
-  geom_vline(xintercept = d_hat, color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat <= d_hat) * 2) %>%
-  pull()
-
## [1] 1.574
-
-
-

One numerical variable, one categorical (2 levels) (diff in medians)

-
d_hat <- fli_small %>% 
-  group_by(season) %>% 
-  summarize(median_stat = median(dep_delay)) %>% 
-  # Since summer - winter
-  summarize(-diff(median_stat)) %>% 
-  pull()
-null_distn <- fli_small %>%
-  specify(dep_delay ~ season) %>% # alt: response = dep_delay, 
-  # explanatory = season
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "diff in medians", order = c("summer", "winter"))
-ggplot(null_distn, aes(x = stat)) +
-  geom_bar() +
-  geom_vline(xintercept = d_hat, color = "red")
-

-
null_distn %>%
-  summarize(p_value = mean(stat >= d_hat) * 2) %>%
-  pull()
-
## [1] 0.068
-
-
-

One numerical, one categorical (>2 levels) - ANOVA

-
F_hat <- anova(
-               aov(formula = arr_delay ~ origin, data = fli_small)
-               )$`F value`[1]
-null_distn <- fli_small %>%
-   specify(arr_delay ~ origin) %>% # alt: response = arr_delay, 
-   # explanatory = origin
-   hypothesize(null = "independence") %>%
-   generate(reps = 1000, type = "permute") %>%
-   calculate(stat = "F")
-ggplot(null_distn, aes(x = stat)) +
-  geom_density() +
-  geom_vline(xintercept = F_hat, color = "red")  
-

-
null_distn %>% 
-  summarize(p_value = mean(stat >= F_hat)) %>%
-  pull()
-
## [1] 0.351
-
-
-

Two numerical vars - SLR

-
slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% 
-  broom::tidy() %>% 
-  filter(term == "dep_delay") %>% 
-  pull(estimate)
-null_distn <- fli_small %>%
-   specify(arr_delay ~ dep_delay) %>% 
-   hypothesize(null = "independence") %>%
-   generate(reps = 1000, type = "permute") %>%
-   calculate(stat = "slope")
-ggplot(null_distn, aes(x = stat)) +
-  geom_density() +
-  geom_vline(xintercept = slope_hat, color = "red")  
-

-
null_distn %>% 
-  summarize(p_value = mean(stat >= slope_hat) * 2) %>%
-  pull()
-
## [1] 0
-
-
-

Confidence intervals

-
-

One numerical (one mean)

-
x_bar <- fli_small %>%
-   summarize(mean(arr_delay)) %>%
-   pull()
-boot <- fli_small %>%
-   specify(response = arr_delay) %>%
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "mean") %>%
-   pull()
-c(lower = x_bar - 2 * sd(boot),
-  upper = x_bar + 2 * sd(boot))
-
## lower upper 
-## 1.122 8.022
-
-
-

One categorical (one proportion)

-
p_hat <- fli_small %>%
- summarize(mean(day_hour == "morning")) %>%
- pull()
-boot <- fli_small %>%
- specify(response = day_hour, success = "morning") %>%
- generate(reps = 1000, type = "bootstrap") %>%
- calculate(stat = "prop") %>%
- pull()
-c(lower = p_hat - 2 * sd(boot),
- upper = p_hat + 2 * sd(boot))
-
##  lower  upper 
-## 0.4195 0.5125
-
-
-

One numerical variable, one categorical (2 levels) (diff in means)

-
d_hat <- fli_small %>% 
-  group_by(season) %>% 
-  summarize(mean_stat = mean(arr_delay)) %>% 
-  # Since summer - winter
-  summarize(-diff(mean_stat)) %>% 
-  pull()
-boot <- fli_small %>%
-   specify(arr_delay ~ season) %>%
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "diff in means", order = c("summer", "winter")) %>% 
-   pull()
-c(lower = d_hat - 2 * sd(boot), 
-  upper = d_hat + 2 * sd(boot))
-
##  lower  upper 
-## -7.704  6.214
-
-
-

Two categorical variables (diff in proportions)

-
d_hat <- fli_small %>%
-  group_by(season) %>%
-  summarize(prop = mean(day_hour == "morning")) %>%
-  # Since summer - winter
-  summarize(-diff(prop)) %>%
-  pull()
-boot <- fli_small %>%
-  specify(day_hour ~ season, success = "morning") %>%
-  generate(reps = 1000, type = "bootstrap") %>% 
-  calculate(stat = "diff in props", order = c("summer", "winter")) %>%
-  pull()
-c(lower = d_hat - 2 * sd(boot), 
-  upper = d_hat + 2 * sd(boot))
-
##    lower    upper 
-## -0.07149  0.11259
-
-
-

Two numerical vars - SLR

-
slope_hat <- lm(arr_delay ~ dep_delay, data = fli_small) %>% 
-  broom::tidy() %>% 
-  filter(term == "dep_delay") %>% 
-  pull(estimate)
-boot <- fli_small %>%
-   specify(arr_delay ~ dep_delay) %>% 
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "slope") %>% 
-   pull()
-c(lower = slope_hat - 2 * sd(boot), 
-  upper = slope_hat + 2 * sd(boot))   
-
##  lower  upper 
-## 0.9658 1.0681
-
-
-
- - - - - - - - diff --git a/inst/doc/mtcars_examples.R b/inst/doc/mtcars_examples.R deleted file mode 100644 index 8ac3809b..00000000 --- a/inst/doc/mtcars_examples.R +++ /dev/null @@ -1,140 +0,0 @@ -## ----include=FALSE------------------------------------------------------- -knitr::opts_chunk$set(fig.width = 8, fig.height = 5) - -## ----message=FALSE, warning=FALSE---------------------------------------- -library(infer) -library(dplyr) -mtcars <- mtcars %>% - mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) -# For reproducibility -set.seed(2018) - -## ------------------------------------------------------------------------ -mtcars %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "median") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% - generate(reps = 100, type = "simulate") %>% - calculate(stat = "prop") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", order = c("0", "1")) - -## ------------------------------------------------------------------------ -mtcars %>% - specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% - generate(reps = 100, type = "simulate") %>% - calculate(stat = "Chisq") - -## ----warning = FALSE----------------------------------------------------- -mtcars %>% - specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", order = c("0", "1")) - -## ------------------------------------------------------------------------ -mtcars %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in medians", order = c("0", "1")) - -## ------------------------------------------------------------------------ -mtcars %>% - specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") - -## ----eval=FALSE---------------------------------------------------------- -# mtcars %>% -# specify(response = mpg) %>% # formula alt: mpg ~ NULL -# hypothesize(null = "point", sigma = 5) %>% -# generate(reps = 100, type = "bootstrap") %>% -# calculate(stat = "sd") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(response = mpg) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(response = mpg) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "median") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(response = mpg) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "sd") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(response = am, success = "1") %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "prop") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(mpg ~ am) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "diff in means", order = c("0", "1")) - -## ------------------------------------------------------------------------ -mtcars %>% - specify(am ~ vs, success = "1") %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "diff in props", order = c("0", "1")) - -## ------------------------------------------------------------------------ -mtcars %>% - specify(mpg ~ hp) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "slope") - -## ------------------------------------------------------------------------ -mtcars %>% - specify(mpg ~ hp) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "correlation") - diff --git a/inst/doc/mtcars_examples.Rmd b/inst/doc/mtcars_examples.Rmd deleted file mode 100644 index 91bc218c..00000000 --- a/inst/doc/mtcars_examples.Rmd +++ /dev/null @@ -1,223 +0,0 @@ ---- -title: "Examples using `mtcars` data" -author: "Chester Ismay and Andrew Bray" -date: "2018-01-05" -output: - rmarkdown::html_vignette -vignette: | - %\VignetteIndexEntry{mtcars example} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r include=FALSE} -knitr::opts_chunk$set(fig.width = 8, fig.height = 5) -``` - -**Note**: The `type` argument in `generate()` is automatically filled based on the entries for `specify()` and -`hypothesize()`. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed. - -## Data preparation - -```{r message=FALSE, warning=FALSE} -library(infer) -library(dplyr) -mtcars <- mtcars %>% - mutate(cyl = factor(cyl), - vs = factor(vs), - am = factor(am), - gear = factor(gear), - carb = factor(carb)) -# For reproducibility -set.seed(2018) -``` - -*** - -One numerical variable (mean) - -```{r} -mtcars %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") -``` - -One numerical variable (median) - -```{r} -mtcars %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "median") -``` - -One categorical (2 level) variable - -```{r} -mtcars %>% - specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% - generate(reps = 100, type = "simulate") %>% - calculate(stat = "prop") -``` - -Two categorical (2 level) variables - -```{r} -mtcars %>% - specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", order = c("0", "1")) -``` - -One categorical (>2 level) - GoF - -```{r} -mtcars %>% - specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% - generate(reps = 100, type = "simulate") %>% - calculate(stat = "Chisq") -``` - -Two categorical (>2 level) variables - -```{r warning = FALSE} -mtcars %>% - specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") -``` - -One numerical variable one categorical (2 levels) (diff in means) - -```{r} -mtcars %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", order = c("0", "1")) -``` - -One numerical variable one categorical (2 levels) (diff in medians) - -```{r} -mtcars %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in medians", order = c("0", "1")) -``` - -One numerical one categorical (>2 levels) - ANOVA - -```{r} -mtcars %>% - specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") -``` - -Two numerical vars - SLR - -```{r} -mtcars %>% - specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") -``` - -One numerical variable (standard deviation) - -**Not currently implemented** - -```{r eval=FALSE} -mtcars %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", sigma = 5) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "sd") -``` - - -### Confidence intervals - -One numerical (one mean) - -```{r} -mtcars %>% - specify(response = mpg) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") -``` - -One numerical (one median) - -```{r} -mtcars %>% - specify(response = mpg) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "median") -``` - -One numerical (standard deviation) - -```{r} -mtcars %>% - specify(response = mpg) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "sd") -``` - -One categorical (one proportion) - -```{r} -mtcars %>% - specify(response = am, success = "1") %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "prop") -``` - -One numerical variable one categorical (2 levels) (diff in means) - -```{r} -mtcars %>% - specify(mpg ~ am) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "diff in means", order = c("0", "1")) -``` - -Two categorical variables (diff in proportions) - -```{r} -mtcars %>% - specify(am ~ vs, success = "1") %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "diff in props", order = c("0", "1")) -``` - -Two numerical vars - SLR - -```{r} -mtcars %>% - specify(mpg ~ hp) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "slope") -``` - -Two numerical vars - correlation - -```{r} -mtcars %>% - specify(mpg ~ hp) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "correlation") -``` - diff --git a/inst/doc/mtcars_examples.html b/inst/doc/mtcars_examples.html deleted file mode 100644 index 084626ae..00000000 --- a/inst/doc/mtcars_examples.html +++ /dev/null @@ -1,468 +0,0 @@ - - - - - - - - - - - - - - - - -Examples using mtcars data - - - - - - - - - - - - - - - - - -

Examples using mtcars data

-

Chester Ismay and Andrew Bray

-

2018-01-05

- - - -

Note: The type argument in generate() is automatically filled based on the entries for specify() and hypothesize(). It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.

-
-

Data preparation

-
library(infer)
-library(dplyr)
-mtcars <- mtcars %>%
-  mutate(cyl = factor(cyl),
-         vs = factor(vs),
-         am = factor(am),
-         gear = factor(gear),
-         carb = factor(carb))
-# For reproducibility         
-set.seed(2018)         
-
-

One numerical variable (mean)

-
mtcars %>%
-  specify(response = mpg) %>% # formula alt: mpg ~ NULL
-  hypothesize(null = "point", mu = 25) %>% 
-  generate(reps = 100, type = "bootstrap") %>% 
-  calculate(stat = "mean")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1  26.6
-##  2         2  25.1
-##  3         3  25.2
-##  4         4  24.7
-##  5         5  24.6
-##  6         6  25.8
-##  7         7  24.7
-##  8         8  25.6
-##  9         9  25.0
-## 10        10  25.1
-## # ... with 90 more rows
-

One numerical variable (median)

-
mtcars %>%
-  specify(response = mpg) %>% # formula alt: mpg ~ NULL
-  hypothesize(null = "point", med = 26) %>% 
-  generate(reps = 100, type = "bootstrap") %>% 
-  calculate(stat = "median")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1  28.2
-##  2         2  27.2
-##  3         3  26.2
-##  4         4  26  
-##  5         5  26.5
-##  6         6  24.5
-##  7         7  26  
-##  8         8  28.2
-##  9         9  28.2
-## 10        10  23.2
-## # ... with 90 more rows
-

One categorical (2 level) variable

-
mtcars %>%
-  specify(response = am, success = "1") %>% # formula alt: am ~ NULL
-  hypothesize(null = "point", p = .25) %>% 
-  generate(reps = 100, type = "simulate") %>% 
-  calculate(stat = "prop")
-
## # A tibble: 100 x 2
-##    replicate   stat
-##    <fct>      <dbl>
-##  1 1         0.375 
-##  2 2         0.0625
-##  3 3         0.125 
-##  4 4         0.25  
-##  5 5         0.188 
-##  6 6         0.406 
-##  7 7         0.219 
-##  8 8         0.375 
-##  9 9         0.344 
-## 10 10        0.188 
-## # ... with 90 more rows
-

Two categorical (2 level) variables

-
mtcars %>%
-  specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
-  hypothesize(null = "independence") %>%
-  generate(reps = 100, type = "permute") %>%
-  calculate(stat = "diff in props", order = c("0", "1"))
-
## # A tibble: 100 x 2
-##    replicate    stat
-##        <int>   <dbl>
-##  1         1 -0.421 
-##  2         2 -0.167 
-##  3         3 -0.421 
-##  4         4 -0.0397
-##  5         5  0.0873
-##  6         6 -0.0397
-##  7         7 -0.0397
-##  8         8 -0.0397
-##  9         9  0.0873
-## 10        10 -0.167 
-## # ... with 90 more rows
-

One categorical (>2 level) - GoF

-
mtcars %>%
-  specify(cyl ~ NULL) %>% # alt: response = cyl
-  hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
-  generate(reps = 100, type = "simulate") %>%
-  calculate(stat = "Chisq")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##    <fct>     <dbl>
-##  1 1         6.75 
-##  2 2         1.69 
-##  3 3         3.19 
-##  4 4         1.69 
-##  5 5         6    
-##  6 6         2.69 
-##  7 7         4.75 
-##  8 8         0.75 
-##  9 9         0.688
-## 10 10        3.69 
-## # ... with 90 more rows
-

Two categorical (>2 level) variables

-
mtcars %>%
-  specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
-  hypothesize(null = "independence") %>%
-  generate(reps = 100, type = "permute") %>%
-  calculate(stat = "Chisq")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1 1.34 
-##  2         2 1.63 
-##  3         3 1.63 
-##  4         4 2.63 
-##  5         5 3.90 
-##  6         6 1.74 
-##  7         7 0.126
-##  8         8 1.74 
-##  9         9 1.34 
-## 10        10 1.34 
-## # ... with 90 more rows
-

One numerical variable one categorical (2 levels) (diff in means)

-
mtcars %>%
-  specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
-  hypothesize(null = "independence") %>%
-  generate(reps = 100, type = "permute") %>%
-  calculate(stat = "diff in means", order = c("0", "1"))
-
## # A tibble: 100 x 2
-##    replicate   stat
-##        <int>  <dbl>
-##  1         1 -1.10 
-##  2         2  0.217
-##  3         3 -1.08 
-##  4         4 -3.80 
-##  5         5  3.08 
-##  6         6  0.489
-##  7         7  2.34 
-##  8         8  4.10 
-##  9         9 -1.86 
-## 10        10 -0.210
-## # ... with 90 more rows
-

One numerical variable one categorical (2 levels) (diff in medians)

-
mtcars %>%
-  specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
-  hypothesize(null = "independence") %>%
-  generate(reps = 100, type = "permute") %>%
-  calculate(stat = "diff in medians", order = c("0", "1"))
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1  0.5 
-##  2         2 -1.10
-##  3         3  5.20
-##  4         4  1.8 
-##  5         5  0.5 
-##  6         6  3.3 
-##  7         7 -1.60
-##  8         8 -2.3 
-##  9         9  2.90
-## 10        10 -0.5 
-## # ... with 90 more rows
-

One numerical one categorical (>2 levels) - ANOVA

-
mtcars %>%
-  specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
-  hypothesize(null = "independence") %>%
-  generate(reps = 100, type = "permute") %>%
-  calculate(stat = "F")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1 1.43 
-##  2         2 1.65 
-##  3         3 0.318
-##  4         4 0.393
-##  5         5 1.05 
-##  6         6 0.826
-##  7         7 1.32 
-##  8         8 0.833
-##  9         9 0.144
-## 10        10 0.365
-## # ... with 90 more rows
-

Two numerical vars - SLR

-
mtcars %>%
-  specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
-  hypothesize(null = "independence") %>%
-  generate(reps = 100, type = "permute") %>%
-  calculate(stat = "slope")
-
## # A tibble: 100 x 2
-##    replicate     stat
-##        <int>    <dbl>
-##  1         1 -0.0151 
-##  2         2  0.00224
-##  3         3 -0.0120 
-##  4         4  0.00292
-##  5         5  0.0203 
-##  6         6 -0.00730
-##  7         7 -0.0246 
-##  8         8  0.00555
-##  9         9  0.0109 
-## 10        10  0.0176 
-## # ... with 90 more rows
-

One numerical variable (standard deviation)

-

Not currently implemented

-
mtcars %>%
-  specify(response = mpg) %>% # formula alt: mpg ~ NULL
-  hypothesize(null = "point", sigma = 5) %>% 
-  generate(reps = 100, type = "bootstrap") %>% 
-  calculate(stat = "sd")
-
-

Confidence intervals

-

One numerical (one mean)

-
mtcars %>%
-  specify(response = mpg) %>%
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "mean")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1  19.6
-##  2         2  21.8
-##  3         3  18.7
-##  4         4  19.2
-##  5         5  21.6
-##  6         6  19.9
-##  7         7  20.7
-##  8         8  19.3
-##  9         9  21.2
-## 10        10  21.3
-## # ... with 90 more rows
-

One numerical (one median)

-
mtcars %>%
-  specify(response = mpg) %>%
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "median")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1  19.2
-##  2         2  20.1
-##  3         3  21  
-##  4         4  17.8
-##  5         5  20.1
-##  6         6  19.2
-##  7         7  18.4
-##  8         8  19.2
-##  9         9  19.2
-## 10        10  18.0
-## # ... with 90 more rows
-

One numerical (standard deviation)

-
mtcars %>%
-  specify(response = mpg) %>%
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "sd")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1  5.28
-##  2         2  6.74
-##  3         3  5.29
-##  4         4  5.41
-##  5         5  5.56
-##  6         6  5.65
-##  7         7  6.17
-##  8         8  6.40
-##  9         9  6.31
-## 10        10  6.11
-## # ... with 90 more rows
-

One categorical (one proportion)

-
mtcars %>%
-  specify(response = am, success = "1") %>%
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "prop")
-
## # A tibble: 100 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1 0.375
-##  2         2 0.406
-##  3         3 0.406
-##  4         4 0.312
-##  5         5 0.312
-##  6         6 0.469
-##  7         7 0.438
-##  8         8 0.281
-##  9         9 0.438
-## 10        10 0.5  
-## # ... with 90 more rows
-

One numerical variable one categorical (2 levels) (diff in means)

-
mtcars %>%
-  specify(mpg ~ am) %>%
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "diff in means", order = c("0", "1"))
-
## # A tibble: 100 x 2
-##    replicate   stat
-##        <int>  <dbl>
-##  1         1  -9.38
-##  2         2  -5.11
-##  3         3  -4.88
-##  4         4  -5.39
-##  5         5  -9.19
-##  6         6  -7.20
-##  7         7  -5.34
-##  8         8  -3.20
-##  9         9  -5.95
-## 10        10 -11.0 
-## # ... with 90 more rows
-

Two categorical variables (diff in proportions)

-
mtcars %>%
-  specify(am ~ vs, success = "1") %>%
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "diff in props", order = c("0", "1"))
-
## # A tibble: 100 x 2
-##    replicate   stat
-##        <int>  <dbl>
-##  1         1 -0.352
-##  2         2 -0.15 
-##  3         3 -0.294
-##  4         4 -0.254
-##  5         5 -0.438
-##  6         6 -0.126
-##  7         7 -0.188
-##  8         8  0.167
-##  9         9 -0.143
-## 10        10 -0.5  
-## # ... with 90 more rows
-

Two numerical vars - SLR

-
mtcars %>%
-  specify(mpg ~ hp) %>% 
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "slope")
-
## # A tibble: 100 x 2
-##    replicate    stat
-##        <int>   <dbl>
-##  1         1 -0.0850
-##  2         2 -0.0512
-##  3         3 -0.0736
-##  4         4 -0.0569
-##  5         5 -0.0930
-##  6         6 -0.0659
-##  7         7 -0.0710
-##  8         8 -0.0767
-##  9         9 -0.0556
-## 10        10 -0.0627
-## # ... with 90 more rows
-

Two numerical vars - correlation

-
mtcars %>%
-  specify(mpg ~ hp) %>% 
-  generate(reps = 100, type = "bootstrap") %>%
-  calculate(stat = "correlation")
-
## # A tibble: 100 x 2
-##    replicate   stat
-##        <int>  <dbl>
-##  1         1 -0.821
-##  2         2 -0.812
-##  3         3 -0.802
-##  4         4 -0.723
-##  5         5 -0.885
-##  6         6 -0.777
-##  7         7 -0.752
-##  8         8 -0.758
-##  9         9 -0.826
-## 10        10 -0.779
-## # ... with 90 more rows
-
-
- - - - - - - - diff --git a/inst/doc/observed_stat_examples.R b/inst/doc/observed_stat_examples.R deleted file mode 100644 index e4556a1b..00000000 --- a/inst/doc/observed_stat_examples.R +++ /dev/null @@ -1,485 +0,0 @@ -## ----include=FALSE------------------------------------------------------- -knitr::opts_chunk$set(fig.width = 6, fig.height = 3.5) -options(digits = 4) - -## ----message=FALSE, warning=FALSE---------------------------------------- -library(nycflights13) -library(dplyr) -library(ggplot2) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - na.omit() %>% - sample_n(size = 500) %>% - mutate(season = case_when( - month %in% c(10:12, 1:3) ~ "winter", - month %in% c(4:9) ~ "summer" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, season, - day_hour, origin, carrier) - -## ------------------------------------------------------------------------ -( x_bar <- fli_small %>% - specify(response = dep_delay) %>% - calculate(stat = "mean") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", mu = 10) %>% - generate(reps = 1000) %>% - calculate(stat = "mean") - -visualize(null_distn) + - shade_p_value(obs_stat = x_bar, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = x_bar, direction = "two_sided") - -## ------------------------------------------------------------------------ -( t_bar <- fli_small %>% - specify(response = dep_delay) %>% - calculate(stat = "t") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", mu = 8) %>% - generate(reps = 1000) %>% - calculate(stat = "t") - -visualize(null_distn) + - shade_p_value(obs_stat = t_bar, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = t_bar, direction = "two_sided") - -## ------------------------------------------------------------------------ -( x_tilde <- fli_small %>% - specify(response = dep_delay) %>% - calculate(stat = "median") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", med = -1) %>% - generate(reps = 1000) %>% - calculate(stat = "median") - -visualize(null_distn) + - shade_p_value(obs_stat = x_tilde, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = x_tilde, direction = "two_sided") - -## ------------------------------------------------------------------------ -( p_hat <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - calculate(stat = "prop") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000) %>% - calculate(stat = "prop") - -visualize(null_distn) + - shade_p_value(obs_stat = p_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = p_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - mutate(day_hour_logical = (day_hour == "morning")) %>% - specify(response = day_hour_logical, success = "TRUE") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000) %>% - calculate(stat = "prop") - -## ------------------------------------------------------------------------ -( d_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "diff in props", order = c("winter", "summer")) ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% - calculate(stat = "diff in props", order = c("winter", "summer")) - -visualize(null_distn) + - shade_p_value(obs_stat = d_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = d_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -( z_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "z", order = c("winter", "summer")) ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% - calculate(stat = "z", order = c("winter", "summer")) - -visualize(null_distn) + - shade_p_value(obs_stat = z_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = z_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -( Chisq_hat <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - calculate(stat = "Chisq") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "Chisq") - -visualize(null_distn) + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") -null_distn %>% - get_p_value(obs_stat = Chisq_hat, direction = "greater") - -## ------------------------------------------------------------------------ -( Chisq_hat <- fli_small %>% - specify(formula = day_hour ~ origin) %>% - calculate(stat = "Chisq") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(day_hour ~ origin) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") - -visualize(null_distn) + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") -null_distn %>% - get_p_value(obs_stat = Chisq_hat, direction = "greater") - -## ------------------------------------------------------------------------ -( d_hat <- fli_small %>% - specify(dep_delay ~ season) %>% - calculate(stat = "diff in means", order = c("summer", "winter")) ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) - -visualize(null_distn) + - shade_p_value(obs_stat = d_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = d_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -( t_hat <- fli_small %>% - specify(dep_delay ~ season) %>% - calculate(stat = "t", order = c("summer", "winter")) ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "t", order = c("summer", "winter")) - -visualize(null_distn) + - shade_p_value(obs_stat = t_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = t_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -( d_hat <- fli_small %>% - specify(dep_delay ~ season) %>% - calculate(stat = "diff in medians", order = c("summer", "winter")) ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% # alt: response = dep_delay, - # explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in medians", order = c("summer", "winter")) - -visualize(null_distn) + - shade_p_value(obs_stat = d_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = d_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -( F_hat <- fli_small %>% - specify(arr_delay ~ origin) %>% - calculate(stat = "F") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(arr_delay ~ origin) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "F") - -visualize(null_distn) + - shade_p_value(obs_stat = F_hat, direction = "greater") -null_distn %>% - get_p_value(obs_stat = F_hat, direction = "greater") - -## ------------------------------------------------------------------------ -( slope_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "slope") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "slope") - -visualize(null_distn) + - shade_p_value(obs_stat = slope_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = slope_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -( correlation_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "correlation") ) - -## ------------------------------------------------------------------------ -null_distn <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "correlation") - -visualize(null_distn) + - shade_p_value(obs_stat = correlation_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = correlation_hat, direction = "two_sided") - -## ----echo=FALSE, eval=FALSE---------------------------------------------- -# # **Standardized observed stat** -# ( t_hat <- fli_small %>% -# specify(arr_delay ~ dep_delay) %>% -# calculate(stat = "t") ) - -## ----echo=FALSE, eval=FALSE---------------------------------------------- -# null_distn <- fli_small %>% -# specify(arr_delay ~ dep_delay) %>% -# hypothesize(null = "independence") %>% -# generate(reps = 1000, type = "permute") %>% -# calculate(stat = "t") -# -# visualize(null_distn) + -# shade_p_value(obs_stat = t_hat, direction = "two_sided") -# null_distn %>% -# get_p_value(obs_stat = t_hat, direction = "two_sided") - -## ------------------------------------------------------------------------ -( x_bar <- fli_small %>% - specify(response = arr_delay) %>% - calculate(stat = "mean") ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(response = arr_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "mean") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( t_hat <- fli_small %>% - specify(response = arr_delay) %>% - calculate(stat = "t") ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(response = arr_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "t") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( p_hat <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - calculate(stat = "prop") ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "prop") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( d_hat <- fli_small %>% - specify(arr_delay ~ season) %>% - calculate(stat = "diff in means", order = c("summer", "winter")) ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(arr_delay ~ season) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( t_hat <- fli_small %>% - specify(arr_delay ~ season) %>% - calculate(stat = "t", order = c("summer", "winter")) ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(arr_delay ~ season) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "t", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( d_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "diff in props", order = c("summer", "winter")) ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in props", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( z_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "z", order = c("summer", "winter")) ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "z", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( slope_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "slope") ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "slope") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ------------------------------------------------------------------------ -( correlation_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "correlation") ) - -## ------------------------------------------------------------------------ -boot <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "correlation") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", - point_estimate = correlation_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) - -## ----eval=FALSE, echo=FALSE---------------------------------------------- -# # **Point estimate** -# ( t_hat <- fli_small %>% -# specify(arr_delay ~ dep_delay) %>% -# calculate(stat = "t") ) - -## ----eval=FALSE, echo=FALSE---------------------------------------------- -# boot <- fli_small %>% -# specify(arr_delay ~ dep_delay) %>% -# generate(reps = 1000, type = "bootstrap") %>% -# calculate(stat = "t") -# ( percentile_ci <- get_ci(boot) ) -# -# visualize(boot) + -# shade_confidence_interval(endpoints = percentile_ci) -# ( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) -# -# visualize(boot) + -# shade_confidence_interval(endpoints = standard_error_ci) - diff --git a/inst/doc/observed_stat_examples.Rmd b/inst/doc/observed_stat_examples.Rmd deleted file mode 100644 index 780512b3..00000000 --- a/inst/doc/observed_stat_examples.Rmd +++ /dev/null @@ -1,692 +0,0 @@ ---- -title: "Full infer pipeline examples using `nycflights13` `flights` data" -author: "Chester Ismay" -date: "Updated on 2018-06-14" -output: - rmarkdown::html_vignette: - df_print: kable -vignette: | - %\VignetteIndexEntry{Full infer pipeline examples} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console ---- - - -```{r include=FALSE} -knitr::opts_chunk$set(fig.width = 6, fig.height = 3.5) -options(digits = 4) -``` - -## Data preparation - -```{r message=FALSE, warning=FALSE} -library(nycflights13) -library(dplyr) -library(ggplot2) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - na.omit() %>% - sample_n(size = 500) %>% - mutate(season = case_when( - month %in% c(10:12, 1:3) ~ "winter", - month %in% c(4:9) ~ "summer" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, season, - day_hour, origin, carrier) -``` - -* Two numeric - `arr_delay`, `dep_delay` -* Two categories - - `season` (`"winter"`, `"summer"`), - - `day_hour` (`"morning"`, `"not morning"`) -* Three categories - `origin` (`"EWR"`, `"JFK"`, `"LGA"`) -* Sixteen categories - `carrier` - -*** - -# Hypothesis tests - -### One numerical variable (mean) - -**Observed stat** -```{r} -( x_bar <- fli_small %>% - specify(response = dep_delay) %>% - calculate(stat = "mean") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", mu = 10) %>% - generate(reps = 1000) %>% - calculate(stat = "mean") - -visualize(null_distn) + - shade_p_value(obs_stat = x_bar, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = x_bar, direction = "two_sided") -``` - -### One numerical variable (standardized mean $t$) - -**Observed stat** -```{r} -( t_bar <- fli_small %>% - specify(response = dep_delay) %>% - calculate(stat = "t") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", mu = 8) %>% - generate(reps = 1000) %>% - calculate(stat = "t") - -visualize(null_distn) + - shade_p_value(obs_stat = t_bar, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = t_bar, direction = "two_sided") -``` - - -### One numerical variable (median) - -**Observed stat** - -```{r} -( x_tilde <- fli_small %>% - specify(response = dep_delay) %>% - calculate(stat = "median") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(response = dep_delay) %>% - hypothesize(null = "point", med = -1) %>% - generate(reps = 1000) %>% - calculate(stat = "median") - -visualize(null_distn) + - shade_p_value(obs_stat = x_tilde, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = x_tilde, direction = "two_sided") -``` - -### One categorical (one proportion) - -**Observed stat** - -```{r} -( p_hat <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - calculate(stat = "prop") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000) %>% - calculate(stat = "prop") - -visualize(null_distn) + - shade_p_value(obs_stat = p_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = p_hat, direction = "two_sided") -``` - -Logical variables will be coerced to factors: - -```{r} -null_distn <- fli_small %>% - mutate(day_hour_logical = (day_hour == "morning")) %>% - specify(response = day_hour_logical, success = "TRUE") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000) %>% - calculate(stat = "prop") -``` - -### One categorical variable (standardized proportion $z$) - -Not yet implemented. - -### Two categorical (2 level) variables - -**Observed stat** - -```{r} -( d_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "diff in props", order = c("winter", "summer")) ) -``` - - -```{r} -null_distn <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% - calculate(stat = "diff in props", order = c("winter", "summer")) - -visualize(null_distn) + - shade_p_value(obs_stat = d_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = d_hat, direction = "two_sided") -``` - -### Two categorical (2 level) variables (z) - -**Standardized observed stat** - -```{r} -( z_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "z", order = c("winter", "summer")) ) -``` - -```{r} -null_distn <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% - calculate(stat = "z", order = c("winter", "summer")) - -visualize(null_distn) + - shade_p_value(obs_stat = z_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = z_hat, direction = "two_sided") -``` - -Note the similarities in this plot and the previous one. - -### One categorical (>2 level) - GoF - -**Observed stat** - -Note the need to add in the hypothesized values here to compute the observed statistic. - -```{r} -( Chisq_hat <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - calculate(stat = "Chisq") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(response = origin) %>% - hypothesize(null = "point", - p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% - generate(reps = 1000, type = "simulate") %>% - calculate(stat = "Chisq") - -visualize(null_distn) + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") -null_distn %>% - get_p_value(obs_stat = Chisq_hat, direction = "greater") -``` - -### Two categorical (>2 level) variables - -**Observed stat** - -```{r} -( Chisq_hat <- fli_small %>% - specify(formula = day_hour ~ origin) %>% - calculate(stat = "Chisq") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(day_hour ~ origin) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "Chisq") - -visualize(null_distn) + - shade_p_value(obs_stat = Chisq_hat, direction = "greater") -null_distn %>% - get_p_value(obs_stat = Chisq_hat, direction = "greater") -``` - -### One numerical variable, one categorical (2 levels) (diff in means) - -**Observed stat** - -```{r} -( d_hat <- fli_small %>% - specify(dep_delay ~ season) %>% - calculate(stat = "diff in means", order = c("summer", "winter")) ) -``` - -```{r} -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) - -visualize(null_distn) + - shade_p_value(obs_stat = d_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = d_hat, direction = "two_sided") -``` - -### One numerical variable, one categorical (2 levels) (t) - -**Standardized observed stat** - -```{r} -( t_hat <- fli_small %>% - specify(dep_delay ~ season) %>% - calculate(stat = "t", order = c("summer", "winter")) ) -``` - -```{r} -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "t", order = c("summer", "winter")) - -visualize(null_distn) + - shade_p_value(obs_stat = t_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = t_hat, direction = "two_sided") -``` - -Note the similarities in this plot and the previous one. - -### One numerical variable, one categorical (2 levels) (diff in medians) - -**Observed stat** - -```{r} -( d_hat <- fli_small %>% - specify(dep_delay ~ season) %>% - calculate(stat = "diff in medians", order = c("summer", "winter")) ) -``` - -```{r} -null_distn <- fli_small %>% - specify(dep_delay ~ season) %>% # alt: response = dep_delay, - # explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "diff in medians", order = c("summer", "winter")) - -visualize(null_distn) + - shade_p_value(obs_stat = d_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = d_hat, direction = "two_sided") -``` - -### One numerical, one categorical (>2 levels) - ANOVA - -**Observed stat** - -```{r} -( F_hat <- fli_small %>% - specify(arr_delay ~ origin) %>% - calculate(stat = "F") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(arr_delay ~ origin) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "F") - -visualize(null_distn) + - shade_p_value(obs_stat = F_hat, direction = "greater") -null_distn %>% - get_p_value(obs_stat = F_hat, direction = "greater") -``` - -### Two numerical vars - SLR - -**Observed stat** - -```{r} -( slope_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "slope") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "slope") - -visualize(null_distn) + - shade_p_value(obs_stat = slope_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = slope_hat, direction = "two_sided") -``` - -### Two numerical vars - correlation - -**Observed stat** - -```{r} -( correlation_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "correlation") ) -``` - -```{r} -null_distn <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "correlation") - -visualize(null_distn) + - shade_p_value(obs_stat = correlation_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = correlation_hat, direction = "two_sided") -``` - - -### Two numerical vars - SLR (t) - -Not currently implemented since $t$ could refer to standardized slope or standardized correlation. - - -```{r echo=FALSE, eval=FALSE} -# **Standardized observed stat** -( t_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "t") ) -``` - -```{r echo=FALSE, eval=FALSE} -null_distn <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "t") - -visualize(null_distn) + - shade_p_value(obs_stat = t_hat, direction = "two_sided") -null_distn %>% - get_p_value(obs_stat = t_hat, direction = "two_sided") -``` - - -## Confidence intervals - -### One numerical (one mean) - -**Point estimate** - -```{r} -( x_bar <- fli_small %>% - specify(response = arr_delay) %>% - calculate(stat = "mean") ) -``` - -```{r} -boot <- fli_small %>% - specify(response = arr_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "mean") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - -### One numerical (one mean - standardized) - -**Point estimate** - -```{r} -( t_hat <- fli_small %>% - specify(response = arr_delay) %>% - calculate(stat = "t") ) -``` - -```{r} -boot <- fli_small %>% - specify(response = arr_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "t") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - - -### One categorical (one proportion) - -**Point estimate** - -```{r} -( p_hat <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - calculate(stat = "prop") ) -``` - -```{r} -boot <- fli_small %>% - specify(response = day_hour, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "prop") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - -### One categorical variable (standardized proportion $z$) - -Not yet implemented. - -### One numerical variable, one categorical (2 levels) (diff in means) - -**Point estimate** - -```{r} -( d_hat <- fli_small %>% - specify(arr_delay ~ season) %>% - calculate(stat = "diff in means", order = c("summer", "winter")) ) -``` - -```{r} -boot <- fli_small %>% - specify(arr_delay ~ season) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in means", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - -### One numerical variable, one categorical (2 levels) (t) - -**Standardized point estimate** - -```{r} -( t_hat <- fli_small %>% - specify(arr_delay ~ season) %>% - calculate(stat = "t", order = c("summer", "winter")) ) -``` - -```{r} -boot <- fli_small %>% - specify(arr_delay ~ season) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "t", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - - -### Two categorical variables (diff in proportions) - -**Point estimate** - -```{r} -( d_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "diff in props", order = c("summer", "winter")) ) -``` - -```{r} -boot <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in props", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - -### Two categorical variables (z) - -**Standardized point estimate** - -```{r} -( z_hat <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - calculate(stat = "z", order = c("summer", "winter")) ) -``` - -```{r} -boot <- fli_small %>% - specify(day_hour ~ season, success = "morning") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "z", order = c("summer", "winter")) -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - - -### Two numerical vars - SLR - -**Point estimate** - -```{r} -( slope_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "slope") ) -``` - -```{r} -boot <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "slope") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - -### Two numerical vars - correlation - -**Point estimate** - -```{r} -( correlation_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "correlation") ) -``` - -```{r} -boot <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "correlation") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", - point_estimate = correlation_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` - - -### Two numerical vars - t - -Not currently implemented since $t$ could refer to standardized slope or standardized correlation. - - -```{r eval=FALSE, echo=FALSE} -# **Point estimate** -( t_hat <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - calculate(stat = "t") ) -``` - -```{r eval=FALSE, echo=FALSE} -boot <- fli_small %>% - specify(arr_delay ~ dep_delay) %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "t") -( percentile_ci <- get_ci(boot) ) - -visualize(boot) + - shade_confidence_interval(endpoints = percentile_ci) -( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) ) - -visualize(boot) + - shade_confidence_interval(endpoints = standard_error_ci) -``` diff --git a/inst/doc/observed_stat_examples.html b/inst/doc/observed_stat_examples.html deleted file mode 100644 index 0b5a1667..00000000 --- a/inst/doc/observed_stat_examples.html +++ /dev/null @@ -1,1390 +0,0 @@ - - - - - - - - - - - - - - - -Full infer pipeline examples using nycflights13 flights data - - - - - - - - - - - - - - - - - -

Full infer pipeline examples using nycflights13 flights data

-

Chester Ismay

-

Updated on 2018-06-14

- - - -
-

Data preparation

-
library(nycflights13)
-library(dplyr)
-library(ggplot2)
-library(stringr)
-library(infer)
-set.seed(2017)
-fli_small <- flights %>% 
-  na.omit() %>%
-  sample_n(size = 500) %>% 
-  mutate(season = case_when(
-    month %in% c(10:12, 1:3) ~ "winter",
-    month %in% c(4:9) ~ "summer"
-  )) %>% 
-  mutate(day_hour = case_when(
-    between(hour, 1, 12) ~ "morning",
-    between(hour, 13, 24) ~ "not morning"
-  )) %>% 
-  select(arr_delay, dep_delay, season, 
-         day_hour, origin, carrier)
-
    -
  • Two numeric - arr_delay, dep_delay
  • -
  • Two categories -
      -
    • season ("winter", "summer"),
    • -
    • day_hour ("morning", "not morning")
    • -
  • -
  • Three categories - origin ("EWR", "JFK", "LGA")
  • -
  • Sixteen categories - carrier
  • -
-
-
-
-

Hypothesis tests

-
-

One numerical variable (mean)

-

Observed stat

-
( x_bar <- fli_small %>%
-  specify(response = dep_delay) %>%
-  calculate(stat = "mean") )
-
- - - - - - - - - - - -
stat
10.4
-
-
null_distn <- fli_small %>%
-  specify(response = dep_delay) %>%
-  hypothesize(null = "point", mu = 10) %>%
-  generate(reps = 1000) %>%
-  calculate(stat = "mean")
-
## Setting `type = "bootstrap"` in `generate()`.
-
visualize(null_distn) +
-  shade_p_value(obs_stat = x_bar, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = x_bar, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.801
-
-
-
-

One numerical variable (standardized mean \(t\))

-

Observed stat

-
( t_bar <- fli_small %>%
-  specify(response = dep_delay) %>%
-  calculate(stat = "t") )
-
- - - - - - - - - - - -
stat
6.93
-
-
null_distn <- fli_small %>%
-  specify(response = dep_delay) %>%
-  hypothesize(null = "point", mu = 8) %>%
-  generate(reps = 1000) %>%
-  calculate(stat = "t")
-
## Setting `type = "bootstrap"` in `generate()`.
-
visualize(null_distn) +
-  shade_p_value(obs_stat = t_bar, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = t_bar, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0
-
-
-
-

One numerical variable (median)

-

Observed stat

-
( x_tilde <- fli_small %>%
-  specify(response = dep_delay) %>%
-  calculate(stat = "median") )
-
- - - - - - - - - - - -
stat
-2
-
-
null_distn <- fli_small %>%
-  specify(response = dep_delay) %>%
-  hypothesize(null = "point", med = -1) %>% 
-  generate(reps = 1000) %>% 
-  calculate(stat = "median")
-
## Setting `type = "bootstrap"` in `generate()`.
-
visualize(null_distn) +
-  shade_p_value(obs_stat = x_tilde, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = x_tilde, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.076
-
-
-
-

One categorical (one proportion)

-

Observed stat

-
( p_hat <- fli_small %>%
-  specify(response = day_hour, success = "morning") %>%
-  calculate(stat = "prop") )
-
- - - - - - - - - - - -
stat
0.466
-
-
null_distn <- fli_small %>%
-  specify(response = day_hour, success = "morning") %>%
-  hypothesize(null = "point", p = .5) %>%
-  generate(reps = 1000) %>%
-  calculate(stat = "prop")
-
## Setting `type = "simulate"` in `generate()`.
-
visualize(null_distn) +
-  shade_p_value(obs_stat = p_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = p_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.101
-
-

Logical variables will be coerced to factors:

-
null_distn <- fli_small %>%
-  mutate(day_hour_logical = (day_hour == "morning")) %>%
-  specify(response = day_hour_logical, success = "TRUE") %>%
-  hypothesize(null = "point", p = .5) %>%
-  generate(reps = 1000) %>%
-  calculate(stat = "prop")
-
## Setting `type = "simulate"` in `generate()`.
-
-
-

One categorical variable (standardized proportion \(z\))

-

Not yet implemented.

-
-
-

Two categorical (2 level) variables

-

Observed stat

-
( d_hat <- fli_small %>% 
-  specify(day_hour ~ season, success = "morning") %>%
-  calculate(stat = "diff in props", order = c("winter", "summer")) )
-
- - - - - - - - - - - -
stat
-0.0205
-
-
null_distn <- fli_small %>%
-  specify(day_hour ~ season, success = "morning") %>%
-  hypothesize(null = "independence") %>% 
-  generate(reps = 1000) %>% 
-  calculate(stat = "diff in props", order = c("winter", "summer"))
-
## Setting `type = "permute"` in `generate()`.
-
visualize(null_distn) +
-  shade_p_value(obs_stat = d_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = d_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.65
-
-
-
-

Two categorical (2 level) variables (z)

-

Standardized observed stat

-
( z_hat <- fli_small %>% 
-  specify(day_hour ~ season, success = "morning") %>%
-  calculate(stat = "z", order = c("winter", "summer")) )
-
- - - - - - - - - - - -
stat
-0.4605
-
-
null_distn <- fli_small %>%
-  specify(day_hour ~ season, success = "morning") %>%
-  hypothesize(null = "independence") %>% 
-  generate(reps = 1000) %>% 
-  calculate(stat = "z", order = c("winter", "summer"))
-
## Setting `type = "permute"` in `generate()`.
-
visualize(null_distn) +
-  shade_p_value(obs_stat = z_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = z_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.633
-
-

Note the similarities in this plot and the previous one.

-
-
-

One categorical (>2 level) - GoF

-

Observed stat

-

Note the need to add in the hypothesized values here to compute the observed statistic.

-
( Chisq_hat <- fli_small %>%
-  specify(response = origin) %>%
-  hypothesize(null = "point", 
-              p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% 
-  calculate(stat = "Chisq") )
-
- - - - - - - - - - - -
stat
10.4
-
-
null_distn <- fli_small %>%
-  specify(response = origin) %>%
-  hypothesize(null = "point", 
-              p = c("EWR" = .33, "JFK" = .33, "LGA" = .34)) %>% 
-  generate(reps = 1000, type = "simulate") %>% 
-  calculate(stat = "Chisq")
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
-

-
null_distn %>%
-  get_p_value(obs_stat = Chisq_hat, direction = "greater")
-
- - - - - - - - - - - -
p_value
0.005
-
-
-
-

Two categorical (>2 level) variables

-

Observed stat

-
( Chisq_hat <- fli_small %>%
-  specify(formula = day_hour ~ origin) %>% 
-  calculate(stat = "Chisq") )
-
- - - - - - - - - - - -
stat
9.027
-
-
null_distn <- fli_small %>%
-  specify(day_hour ~ origin) %>%
-  hypothesize(null = "independence") %>% 
-  generate(reps = 1000, type = "permute") %>% 
-  calculate(stat = "Chisq")
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
-

-
null_distn %>%
-  get_p_value(obs_stat = Chisq_hat, direction = "greater")
-
- - - - - - - - - - - -
p_value
0.007
-
-
-
-

One numerical variable, one categorical (2 levels) (diff in means)

-

Observed stat

-
( d_hat <- fli_small %>% 
-  specify(dep_delay ~ season) %>% 
-  calculate(stat = "diff in means", order = c("summer", "winter")) )
-
- - - - - - - - - - - -
stat
2.266
-
-
null_distn <- fli_small %>%
-  specify(dep_delay ~ season) %>%
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "diff in means", order = c("summer", "winter"))
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = d_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = d_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.456
-
-
-
-

One numerical variable, one categorical (2 levels) (t)

-

Standardized observed stat

-
( t_hat <- fli_small %>% 
-  specify(dep_delay ~ season) %>% 
-  calculate(stat = "t", order = c("summer", "winter")) )
-
- - - - - - - - - - - -
stat
0.7542
-
-
null_distn <- fli_small %>%
-  specify(dep_delay ~ season) %>%
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "t", order = c("summer", "winter"))
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = t_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = t_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.487
-
-

Note the similarities in this plot and the previous one.

-
-
-

One numerical variable, one categorical (2 levels) (diff in medians)

-

Observed stat

-
( d_hat <- fli_small %>% 
-  specify(dep_delay ~ season) %>% 
-  calculate(stat = "diff in medians", order = c("summer", "winter")) )
-
- - - - - - - - - - - -
stat
2
-
-
null_distn <- fli_small %>%
-  specify(dep_delay ~ season) %>% # alt: response = dep_delay, 
-  # explanatory = season
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "diff in medians", order = c("summer", "winter"))
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = d_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = d_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.029
-
-
-
-

One numerical, one categorical (>2 levels) - ANOVA

-

Observed stat

-
( F_hat <- fli_small %>% 
-  specify(arr_delay ~ origin) %>%
-  calculate(stat = "F") )
-
- - - - - - - - - - - -
stat
1.084
-
-
null_distn <- fli_small %>%
-   specify(arr_delay ~ origin) %>%
-   hypothesize(null = "independence") %>%
-   generate(reps = 1000, type = "permute") %>%
-   calculate(stat = "F")
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = F_hat, direction = "greater")
-

-
null_distn %>%
-  get_p_value(obs_stat = F_hat, direction = "greater")
-
- - - - - - - - - - - -
p_value
0.353
-
-
-
-

Two numerical vars - SLR

-

Observed stat

-
( slope_hat <- fli_small %>% 
-  specify(arr_delay ~ dep_delay) %>% 
-  calculate(stat = "slope") )
-
- - - - - - - - - - - -
stat
1.017
-
-
null_distn <- fli_small %>%
-   specify(arr_delay ~ dep_delay) %>% 
-   hypothesize(null = "independence") %>%
-   generate(reps = 1000, type = "permute") %>%
-   calculate(stat = "slope")
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = slope_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = slope_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0
-
-
-
-

Two numerical vars - correlation

-

Observed stat

-
( correlation_hat <- fli_small %>% 
-  specify(arr_delay ~ dep_delay) %>% 
-  calculate(stat = "correlation") )
-
- - - - - - - - - - - -
stat
0.8943
-
-
null_distn <- fli_small %>%
-   specify(arr_delay ~ dep_delay) %>% 
-   hypothesize(null = "independence") %>%
-   generate(reps = 1000, type = "permute") %>%
-   calculate(stat = "correlation")
-
-visualize(null_distn) +
-  shade_p_value(obs_stat = correlation_hat, direction = "two_sided")
-

-
null_distn %>%
-  get_p_value(obs_stat = correlation_hat, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0
-
-
-
-

Two numerical vars - SLR (t)

-

Not currently implemented since \(t\) could refer to standardized slope or standardized correlation.

-
-
-

Confidence intervals

-
-

One numerical (one mean)

-

Point estimate

-
( x_bar <- fli_small %>% 
-  specify(response = arr_delay) %>%
-  calculate(stat = "mean") )
-
- - - - - - - - - - - -
stat
4.572
-
-
boot <- fli_small %>%
-   specify(response = arr_delay) %>%
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "mean")
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
1.4367.819
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = x_bar) )
-
- - - - - - - - - - - - - -
lowerupper
1.2677.877
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

One numerical (one mean - standardized)

-

Point estimate

-
( t_hat <- fli_small %>% 
-  specify(response = arr_delay) %>%
-  calculate(stat = "t") )
-
- - - - - - - - - - - -
stat
2.679
-
-
boot <- fli_small %>%
-   specify(response = arr_delay) %>%
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "t")
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
0.93384.362
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
-
- - - - - - - - - - - - - -
lowerupper
0.91414.444
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

One categorical (one proportion)

-

Point estimate

-
( p_hat <- fli_small %>% 
-   specify(response = day_hour, success = "morning") %>%
-   calculate(stat = "prop") )
-
- - - - - - - - - - - -
stat
0.466
-
-
boot <- fli_small %>%
- specify(response = day_hour, success = "morning") %>%
- generate(reps = 1000, type = "bootstrap") %>%
- calculate(stat = "prop")
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
0.420.508
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = p_hat) )
-
- - - - - - - - - - - - - -
lowerupper
0.42180.5102
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

One categorical variable (standardized proportion \(z\))

-

Not yet implemented.

-
-
-

One numerical variable, one categorical (2 levels) (diff in means)

-

Point estimate

-
( d_hat <- fli_small %>%
-  specify(arr_delay ~ season) %>%
-  calculate(stat = "diff in means", order = c("summer", "winter")) )
-
- - - - - - - - - - - -
stat
-0.7452
-
-
boot <- fli_small %>%
-   specify(arr_delay ~ season) %>%
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "diff in means", order = c("summer", "winter"))
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
-7.1676.079
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
-
- - - - - - - - - - - - - -
lowerupper
-7.2965.806
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

One numerical variable, one categorical (2 levels) (t)

-

Standardized point estimate

-
( t_hat <- fli_small %>%
-  specify(arr_delay ~ season) %>%
-  calculate(stat = "t", order = c("summer", "winter")) )
-
- - - - - - - - - - - -
stat
-0.2182
-
-
boot <- fli_small %>%
-   specify(arr_delay ~ season) %>%
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "t", order = c("summer", "winter"))
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
-2.2361.718
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = t_hat) )
-
- - - - - - - - - - - - - -
lowerupper
-2.1831.746
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

Two categorical variables (diff in proportions)

-

Point estimate

-
( d_hat <- fli_small %>% 
-  specify(day_hour ~ season, success = "morning") %>%
-  calculate(stat = "diff in props", order = c("summer", "winter")) )
-
- - - - - - - - - - - -
stat
0.0205
-
-
boot <- fli_small %>%
-  specify(day_hour ~ season, success = "morning") %>%
-  generate(reps = 1000, type = "bootstrap") %>% 
-  calculate(stat = "diff in props", order = c("summer", "winter"))
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
-0.06480.1083
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = d_hat) )
-
- - - - - - - - - - - - - -
lowerupper
-0.06760.1087
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

Two categorical variables (z)

-

Standardized point estimate

-
( z_hat <- fli_small %>% 
-  specify(day_hour ~ season, success = "morning") %>%
-  calculate(stat = "z", order = c("summer", "winter")) )
-
- - - - - - - - - - - -
stat
0.4605
-
-
boot <- fli_small %>%
-  specify(day_hour ~ season, success = "morning") %>%
-  generate(reps = 1000, type = "bootstrap") %>% 
-  calculate(stat = "z", order = c("summer", "winter"))
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
-1.4792.501
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = z_hat) )
-
- - - - - - - - - - - - - -
lowerupper
-1.5222.443
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

Two numerical vars - SLR

-

Point estimate

-
( slope_hat <- fli_small %>% 
-  specify(arr_delay ~ dep_delay) %>%
-  calculate(stat = "slope") )
-
- - - - - - - - - - - -
stat
1.017
-
-
boot <- fli_small %>%
-   specify(arr_delay ~ dep_delay) %>% 
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "slope")
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
0.97281.074
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", point_estimate = slope_hat) )
-
- - - - - - - - - - - - - -
lowerupper
0.96531.069
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

Two numerical vars - correlation

-

Point estimate

-
( correlation_hat <- fli_small %>% 
-  specify(arr_delay ~ dep_delay) %>%
-  calculate(stat = "correlation") )
-
- - - - - - - - - - - -
stat
0.8943
-
-
boot <- fli_small %>%
-   specify(arr_delay ~ dep_delay) %>% 
-   generate(reps = 1000, type = "bootstrap") %>%
-   calculate(stat = "correlation")
-( percentile_ci <- get_ci(boot) )
-
- - - - - - - - - - - - - -
2.5%97.5%
0.85020.9218
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = percentile_ci)
-

-
( standard_error_ci <- get_ci(boot, type = "se", 
-                            point_estimate = correlation_hat) )
-
- - - - - - - - - - - - - -
lowerupper
0.8580.9306
-
-
visualize(boot) +
-  shade_confidence_interval(endpoints = standard_error_ci)
-

-
-
-

Two numerical vars - t

-

Not currently implemented since \(t\) could refer to standardized slope or standardized correlation.

-
-
-
- - - - - - - - diff --git a/inst/doc/two_sample_t.R b/inst/doc/two_sample_t.R deleted file mode 100644 index ea06c5c3..00000000 --- a/inst/doc/two_sample_t.R +++ /dev/null @@ -1,74 +0,0 @@ -## ----include=FALSE------------------------------------------------------- -knitr::opts_chunk$set(fig.width = 8, fig.height = 3) - -## ----message=FALSE, warning=FALSE---------------------------------------- -library(nycflights13) -library(dplyr) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - sample_n(size = 500) %>% - mutate(half_year = case_when( - between(month, 1, 6) ~ "h1", - between(month, 7, 12) ~ "h2" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, half_year, - day_hour, origin, carrier) - -## ------------------------------------------------------------------------ -obs_t <- fli_small %>% - specify(arr_delay ~ half_year) %>% - calculate(stat = "t", order = c("h1", "h2")) - -## ------------------------------------------------------------------------ -obs_t <- fli_small %>% - t_test(formula = arr_delay ~ half_year, alternative = "two_sided", - order = c("h1", "h2")) %>% - dplyr::pull(statistic) - -## ------------------------------------------------------------------------ -obs_t <- fli_small %>% - t_stat(formula = arr_delay ~ half_year, order = c("h1", "h2")) - -## ------------------------------------------------------------------------ -t_null_perm <- fli_small %>% - # alt: response = arr_delay, explanatory = half_year - specify(arr_delay ~ half_year) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "t", order = c("h1", "h2")) - -visualize(t_null_perm) + - shade_p_value(obs_stat = obs_t, direction = "two_sided") - -## ------------------------------------------------------------------------ -t_null_perm %>% - get_p_value(obs_stat = obs_t, direction = "two_sided") - -## ------------------------------------------------------------------------ -t_null_theor <- fli_small %>% - # alt: response = arr_delay, explanatory = half_year - specify(arr_delay ~ half_year) %>% - hypothesize(null = "independence") %>% - # generate() ## Not used for theoretical - calculate(stat = "t", order = c("h1", "h2")) - -visualize(t_null_theor, method = "theoretical") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") - -## ------------------------------------------------------------------------ -visualize(t_null_perm, method = "both") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") - -## ------------------------------------------------------------------------ -fli_small %>% - t_test(formula = arr_delay ~ half_year, - alternative = "two_sided", - order = c("h1", "h2")) %>% - dplyr::pull(p_value) - diff --git a/inst/doc/two_sample_t.Rmd b/inst/doc/two_sample_t.Rmd deleted file mode 100755 index e4a2b100..00000000 --- a/inst/doc/two_sample_t.Rmd +++ /dev/null @@ -1,138 +0,0 @@ ---- -title: "Two sample $t$ test example using `nycflights13` `flights` data" -author: "Chester Ismay" -date: "`r Sys.Date()`" -output: - rmarkdown::html_vignette: - df_print: kable -vignette: | - %\VignetteIndexEntry{Two sample t test flights example} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r include=FALSE} -knitr::opts_chunk$set(fig.width = 8, fig.height = 3) -``` - -**Note**: The `type` argument in `generate()` is automatically filled based on the entries for `specify()` and -`hypothesize()`. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed. - -## Data preparation - -```{r message=FALSE, warning=FALSE} -library(nycflights13) -library(dplyr) -library(stringr) -library(infer) -set.seed(2017) -fli_small <- flights %>% - sample_n(size = 500) %>% - mutate(half_year = case_when( - between(month, 1, 6) ~ "h1", - between(month, 7, 12) ~ "h2" - )) %>% - mutate(day_hour = case_when( - between(hour, 1, 12) ~ "morning", - between(hour, 13, 24) ~ "not morning" - )) %>% - select(arr_delay, dep_delay, half_year, - day_hour, origin, carrier) -``` - -* Two numeric - `arr_delay`, `dep_delay` -* Two categories - - `half_year` (`"h1"`, `"h2"`), - - `day_hour` (`"morning"`, `"not morning"`) -* Three categories - `origin` (`"EWR"`, `"JFK"`, `"LGA"`) -* Sixteen categories - `carrier` - -*** - -# One numerical variable, one categorical (2 levels) - -## Calculate observed statistic - -The recommended approach is to use `specify() %>% calculate()`: - -```{r} -obs_t <- fli_small %>% - specify(arr_delay ~ half_year) %>% - calculate(stat = "t", order = c("h1", "h2")) -``` - -The observed $t$ statistic is `r obs_t`. - -Or using `t_test` in `infer` - -```{r} -obs_t <- fli_small %>% - t_test(formula = arr_delay ~ half_year, alternative = "two_sided", - order = c("h1", "h2")) %>% - dplyr::pull(statistic) -``` - -The observed $t$ statistic is `r obs_t`. - -Or using another shortcut function in `infer`: - -```{r} -obs_t <- fli_small %>% - t_stat(formula = arr_delay ~ half_year, order = c("h1", "h2")) -``` - -The observed $t$ statistic is `r obs_t`. - -## Randomization approach to t-statistic - -```{r} -t_null_perm <- fli_small %>% - # alt: response = arr_delay, explanatory = half_year - specify(arr_delay ~ half_year) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - calculate(stat = "t", order = c("h1", "h2")) - -visualize(t_null_perm) + - shade_p_value(obs_stat = obs_t, direction = "two_sided") -``` - -## Calculate the randomization-based $p$-value - -```{r} -t_null_perm %>% - get_p_value(obs_stat = obs_t, direction = "two_sided") -``` - - -## Theoretical distribution - -```{r } -t_null_theor <- fli_small %>% - # alt: response = arr_delay, explanatory = half_year - specify(arr_delay ~ half_year) %>% - hypothesize(null = "independence") %>% - # generate() ## Not used for theoretical - calculate(stat = "t", order = c("h1", "h2")) - -visualize(t_null_theor, method = "theoretical") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") -``` - -## Overlay appropriate $t$ distribution on top of permuted t-statistics - -```{r} -visualize(t_null_perm, method = "both") + - shade_p_value(obs_stat = obs_t, direction = "two_sided") -``` - -## Compute theoretical p-value - -```{r} -fli_small %>% - t_test(formula = arr_delay ~ half_year, - alternative = "two_sided", - order = c("h1", "h2")) %>% - dplyr::pull(p_value) -``` - diff --git a/inst/doc/two_sample_t.html b/inst/doc/two_sample_t.html deleted file mode 100644 index f14c8554..00000000 --- a/inst/doc/two_sample_t.html +++ /dev/null @@ -1,239 +0,0 @@ - - - - - - - - - - - - - - - - -Two sample t test example using nycflights13 flights data - - - - - - - - - - - - - - - - - -

Two sample \(t\) test example using nycflights13 flights data

-

Chester Ismay

-

2018-09-24

- - - -

Note: The type argument in generate() is automatically filled based on the entries for specify() and hypothesize(). It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.

-
-

Data preparation

-
library(nycflights13)
-library(dplyr)
-library(stringr)
-library(infer)
-set.seed(2017)
-fli_small <- flights %>% 
-  sample_n(size = 500) %>% 
-  mutate(half_year = case_when(
-    between(month, 1, 6) ~ "h1",
-    between(month, 7, 12) ~ "h2"
-  )) %>% 
-  mutate(day_hour = case_when(
-    between(hour, 1, 12) ~ "morning",
-    between(hour, 13, 24) ~ "not morning"
-  )) %>% 
-  select(arr_delay, dep_delay, half_year, 
-         day_hour, origin, carrier)
-
    -
  • Two numeric - arr_delay, dep_delay
  • -
  • Two categories -
      -
    • half_year ("h1", "h2"),
    • -
    • day_hour ("morning", "not morning")
    • -
  • -
  • Three categories - origin ("EWR", "JFK", "LGA")
  • -
  • Sixteen categories - carrier
  • -
-
-
-
-

One numerical variable, one categorical (2 levels)

-
-

Calculate observed statistic

-

The recommended approach is to use specify() %>% calculate():

-
obs_t <- fli_small %>%
-  specify(arr_delay ~ half_year) %>%
-  calculate(stat = "t", order = c("h1", "h2"))
-
## Warning: Removed 15 rows containing missing values.
-The observed \(t\) statistic is -
- - - - - - - - - - - -
stat
0.8685
-
-

.

-

Or using t_test in infer

-
obs_t <- fli_small %>% 
-  t_test(formula = arr_delay ~ half_year, alternative = "two_sided",
-         order = c("h1", "h2")) %>% 
-  dplyr::pull(statistic)
-

The observed \(t\) statistic is 0.8685.

-

Or using another shortcut function in infer:

-
obs_t <- fli_small %>% 
-  t_stat(formula = arr_delay ~ half_year, order = c("h1", "h2"))
-The observed \(t\) statistic is -
- - - - - - - - - - - -
statistic
0.8685
-
-

.

-
-
-

Randomization approach to t-statistic

-
t_null_perm <- fli_small %>%
-  # alt: response = arr_delay, explanatory = half_year
-  specify(arr_delay ~ half_year) %>%
-  hypothesize(null = "independence") %>%
-  generate(reps = 1000, type = "permute") %>%
-  calculate(stat = "t", order = c("h1", "h2"))
-
## Warning: Removed 15 rows containing missing values.
-
visualize(t_null_perm) +
-  shade_p_value(obs_stat = obs_t, direction = "two_sided")
-

-
-
-

Calculate the randomization-based \(p\)-value

-
t_null_perm %>% 
-  get_p_value(obs_stat = obs_t, direction = "two_sided")
-
- - - - - - - - - - - -
p_value
0.43
-
-
-
-

Theoretical distribution

-
t_null_theor <- fli_small %>%
-  # alt: response = arr_delay, explanatory = half_year
-  specify(arr_delay ~ half_year) %>%
-  hypothesize(null = "independence") %>%
-  # generate() ## Not used for theoretical
-  calculate(stat = "t", order = c("h1", "h2"))
-
## Warning: Removed 15 rows containing missing values.
-
visualize(t_null_theor, method = "theoretical") +
-  shade_p_value(obs_stat = obs_t, direction = "two_sided")
-
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. {infer} currently does not check these for you.
-

-
-
-

Overlay appropriate \(t\) distribution on top of permuted t-statistics

-
visualize(t_null_perm, method = "both") +
-  shade_p_value(obs_stat = obs_t, direction = "two_sided")
-
## Warning: Check to make sure the conditions have been met for the
-## theoretical method. {infer} currently does not check these for you.
-

-
-
-

Compute theoretical p-value

-
fli_small %>% 
-  t_test(formula = arr_delay ~ half_year,
-         alternative = "two_sided",
-         order = c("h1", "h2")) %>% 
-  dplyr::pull(p_value)
-
## [1] 0.3855
-
-
- - - - - - - - From f326f9d12885f1364a7ac967fb0badf4a2cb37e8 Mon Sep 17 00:00:00 2001 From: andrewpbray Date: Wed, 14 Nov 2018 17:06:48 -0800 Subject: [PATCH 77/78] add tolerance for expect_equal so that the test doesn't fail when run in environments without long doubles. --- tests/testthat/test-wrappers.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R index 6f676e75..21d3cc78 100644 --- a/tests/testthat/test-wrappers.R +++ b/tests/testthat/test-wrappers.R @@ -29,7 +29,7 @@ test_that("chisq_test works", { broom::glance() %>% dplyr::select(statistic, chisq_df = parameter, p_value = p.value) - expect_equal(new_way, old_way) + expect_equal(new_way, old_way, tolerance = 1e-5) ## Not implemented # expect_silent( # iris3 %>% chisq_test(response = Sepal.Length.Group, explanatory = Species) @@ -93,7 +93,8 @@ test_that("conf_int argument works", { conf_int = FALSE ) ), - c("statistic", "t_df", "p_value", "alternative") + c("statistic", "t_df", "p_value", "alternative"), + tolerance = 1e-5 ) expect_equal( names( @@ -103,7 +104,8 @@ test_that("conf_int argument works", { conf_int = TRUE ) ), - c("statistic", "t_df", "p_value", "alternative", "lower_ci", "upper_ci") + c("statistic", "t_df", "p_value", "alternative", "lower_ci", "upper_ci"), + tolerance = 1e-5 ) ci_test <- iris2 %>% @@ -114,8 +116,8 @@ test_that("conf_int argument works", { old_way <- t.test( formula = Sepal.Width ~ Species, data = iris2, conf.level = 0.9 )[["conf.int"]] - expect_equal(ci_test$lower_ci[1], old_way[1]) - expect_equal(ci_test$upper_ci[1], old_way[2]) + expect_equal(ci_test$lower_ci[1], old_way[1], tolerance = 1e-5) + expect_equal(ci_test$upper_ci[1], old_way[2], tolerance = 1e-5) expect_error( iris2 %>% From 64aad2dd4a1d48e65ada211a57655934dc72c164 Mon Sep 17 00:00:00 2001 From: andrewpbray Date: Thu, 15 Nov 2018 11:03:21 -0800 Subject: [PATCH 78/78] add cran release --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index df4589d6..2193547a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,4 @@ +^CRAN-RELEASE$ ^.*\.Rproj$ ^\.Rproj\.user$ ^README\.Rmd$