diff --git a/.Rbuildignore b/.Rbuildignore
index b29a0ee0..f289f8ca 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -4,7 +4,6 @@
 ^README\.Rmd$
 ^figs$
 ^profiles*
-^\.travis\.yml$
 ^examples*
 ^codecov\.yml$
 ^docs*
diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml
index dc2228e7..e309a13c 100644
--- a/.github/workflows/check-standard.yaml
+++ b/.github/workflows/check-standard.yaml
@@ -20,10 +20,11 @@ jobs:
       fail-fast: false
       matrix:
         config:
+          - {os: macOS-latest,   r: 'release'}
           - {os: windows-latest, r: 'release'}
-          - {os: macOS-latest, r: 'release'}
-          - {os: macOS-latest, r: 'devel'}
-          - {os: ubuntu-16.04, r: 'release', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
+          - {os: windows-latest, r: '3.6'}
+          - {os: ubuntu-16.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest", http-user-agent: "R/4.0.0 (ubuntu-16.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
+          - {os: ubuntu-16.04,   r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index cf85d7f3..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-language: r
-
-matrix:
-  include:
-  - r: oldrel
-    # Don't use vdiffr on old R release
-    env: VDIFFR_RUN_TESTS=false
-  - r: release
-  - r: devel
-
-warnings_are_errors: true
-sudo: required
-cache: packages
-latex: false
-
-env:
- global:
-   - CRAN: http://cran.rstudio.com
-   # Use vdiffr by default
-   - VDIFFR_RUN_TESTS: true
-
-notifications:
-  email:
-    on_success: change
-    on_failure: change
-
-r_packages:
-  - devtools
-
-r_github_packages:
-  - r-lib/callr
-  - r-lib/pkgbuild
-  - hadley/pkgdown
-  - tidymodels/infer
-
-before_deploy:
-  - Rscript -e 'pkgdown::build_site()'
-
-deploy:
-  - provider: pages
-    skip-cleanup: true
-    keep-history: true
-    local-dir: docs
-    github_token: $GITHUBTRAVIS
-    target-branch: gh-pages
-    on:
-      branch: master
-  - provider: pages
-    skip-cleanup: true
-    keep-history: true
-    local-dir: docs
-    github_token: $GITHUBTRAVIS
-    target-branch: gh-pages-dev
-    on:
-      branch: develop    
-
-
-after_success:
-  - Rscript -e 'covr::codecov()'
-
-
diff --git a/DESCRIPTION b/DESCRIPTION
index 34137b3a..a827615f 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,53 +1,105 @@
-Package: infer
 Type: Package
+Package: infer
 Title: Tidy Statistical Inference
-Version: 0.5.3
-Authors@R: c(
-    person("Andrew", "Bray", email = "abray@reed.edu", role = c("aut", "cre")),
-    person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"),
-    person("Evgeni", "Chasnovski", email = "evgeni.chasnovski@gmail.com", role = "aut"),
-    person("Ben", "Baumer", email = "ben.baumer@gmail.com", role = "aut"),
-    person("Mine", "Cetinkaya-Rundel", email = "mine@stat.duke.edu", role = "aut"),
-    person("Simon", "Couch", email = "simonpatrickcouch@gmail.com", role = "ctb"),
-    person("Ted", "Laderas", email = "tedladeras@gmail.com", role = "ctb"),
-    person("Nick", "Solomon", email = "nick.solomon@datacamp.com", role = "ctb"),
-    person("Johanna", "Hardin", email = "Jo.Hardin@pomona.edu", role = "ctb"),
-    person("Albert Y.", "Kim", email = "albert.ys.kim@gmail.com", role = "ctb"),
-    person("Neal", "Fultz", email = "nfultz@gmail.com", role = "ctb"),
-    person("Doug", "Friedman", email = "doug.nhp@gmail.com", role = "ctb"),
-    person("Richie", "Cotton", email = "richie@datacamp.com", role = "ctb"),
-    person("Brian", "Fannin", email = "captain@pirategrunt.com", role = "ctb"))
-Description: The objective of this package is to perform inference using an expressive statistical grammar that coheres with the tidy design framework. 
+Version: 0.5.4
+Authors@R:
+    c(person(given = "Andrew",
+             family = "Bray",
+             role = c("aut", "cre"),
+             email = "abray@reed.edu"),
+      person(given = "Chester",
+             family = "Ismay",
+             role = "aut",
+             email = "chester.ismay@gmail.com",
+             comment = c(ORCID = "0000-0003-2820-2547")),
+      person(given = "Evgeni",
+             family = "Chasnovski",
+             role = "aut",
+             email = "evgeni.chasnovski@gmail.com",
+             comment = c(ORCID = "0000-0002-1617-4019")),
+      person(given = "Ben",
+             family = "Baumer",
+             role = "aut",
+             email = "ben.baumer@gmail.com",
+             comment = c(ORCID = "0000-0002-3279-0516")),
+      person(given = "Mine",
+             family = "Cetinkaya-Rundel",
+             role = "aut",
+             email = "mine@stat.duke.edu",
+             comment = c(ORCID = "0000-0001-6452-2420")),
+      person(given = "Simon",
+             family = "Couch",
+             role = "ctb",
+             email = "simonpatrickcouch@gmail.com"),
+      person(given = "Ted",
+             family = "Laderas",
+             role = "ctb",
+             email = "tedladeras@gmail.com",
+             comment = c(ORCID = "0000-0002-6207-7068")),
+      person(given = "Nick",
+             family = "Solomon",
+             role = "ctb",
+             email = "nick.solomon@datacamp.com"),
+      person(given = "Johanna",
+             family = "Hardin",
+             role = "ctb",
+             email = "Jo.Hardin@pomona.edu"),
+      person(given = "Albert Y.",
+             family = "Kim",
+             role = "ctb",
+             email = "albert.ys.kim@gmail.com",
+             comment = c(ORCID = "0000-0001-7824-306X")),
+      person(given = "Neal",
+             family = "Fultz",
+             role = "ctb",
+             email = "nfultz@gmail.com"),
+      person(given = "Doug",
+             family = "Friedman",
+             role = "ctb",
+             email = "doug.nhp@gmail.com"),
+      person(given = "Richie",
+             family = "Cotton",
+             role = "ctb",
+             email = "richie@datacamp.com",
+             comment = c(ORCID = "0000-0003-2504-802X")),
+      person(given = "Brian",
+             family = "Fannin",
+             role = "ctb",
+             email = "captain@pirategrunt.com"))
+Description: The objective of this package is to perform
+    inference using an expressive statistical grammar that coheres with
+    the tidy design framework.
 License: CC0
-Encoding: UTF-8
-LazyData: true
+URL: https://github.com/tidymodels/infer,
+    https://infer.tidymodels.org/
+BugReports: https://github.com/tidymodels/infer/issues
+Depends:
+    R (>= 3.5.0)
 Imports:
     dplyr (>= 0.7.0),
-    methods,
-    tibble,
-    rlang (>= 0.2.0),
     ggplot2,
-    magrittr,
     glue (>= 1.3.0),
     grDevices,
-    purrr
-Depends:
-    R (>= 3.5.0)
-Suggests: 
+    magrittr,
+    methods,
+    purrr,
+    rlang (>= 0.2.0),
+    tibble
+Suggests:
     broom,
+    covr,
     devtools (>= 1.12.0),
+    fs,
     knitr,
-    tidyr,
-    rmarkdown,
     nycflights13,
+    rmarkdown,
     stringr,
     testthat,
-    covr,
-    vdiffr,
-    fs
-URL: https://github.com/tidymodels/infer,
-     https://infer.netlify.com/
-BugReports: https://github.com/tidymodels/infer/issues
+    tidyr,
+    vdiffr
+VignetteBuilder:
+    knitr
+Encoding: UTF-8
+LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.1.1
-VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index ebcfda1b..b66dea2f 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,6 +17,7 @@ export(hypothesize)
 export(p_value)
 export(prop_test)
 export(rep_sample_n)
+export(rep_slice_sample)
 export(shade_ci)
 export(shade_confidence_interval)
 export(shade_p_value)
@@ -28,7 +29,6 @@ export(visualise)
 export(visualize)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,group_by)
-importFrom(dplyr,inner_join)
 importFrom(dplyr,mutate_if)
 importFrom(dplyr,n)
 importFrom(dplyr,one_of)
diff --git a/NEWS.md b/NEWS.md
index abc8a1f7..8218df7a 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,15 @@
+# infer 0.5.4
+
+- `rep_sample_n()` no longer errors when supplied a `prob` argument (#279)
+- Added `rep_slice_sample()`, a light wrapper around `rep_sample_n()`, that
+more closely resembles `dplyr::slice_sample()` (the function that supersedes
+`dplyr::sample_n()`) (#325)
+- Added a `success`, `correct`, and `z` argument to `prop_test()` 
+(#343, #347, #353)
+- Implemented observed statistic calculation for the standardized proportion 
+$z$ statistic (#351, #353)
+- Various bug fixes and improvements to documentation and errors.
+
 # infer 0.5.3
 
 ## Breaking changes
@@ -172,7 +184,7 @@ current implementations being
 They now live in `specify()`.
 - Updated documentation with examples
 - Created `pkgdown` site materials
-    - Deployed to https://infer.netlify.com
+    - Deployed to https://https://infer.tidymodels.org/
 
 
 # infer 0.0.1
diff --git a/R/calculate.R b/R/calculate.R
index 2c9e5e91..ed797775 100755
--- a/R/calculate.R
+++ b/R/calculate.R
@@ -1,10 +1,10 @@
 #' Calculate summary statistics
 #'
 #' @description
-#' 
-#' Calculates summary statistics from outputs of [generate()] or 
+#'
+#' Calculates summary statistics from outputs of [generate()] or
 #' [hypothesize()].
-#' 
+#'
 #' Learn more in `vignette("infer")`.
 #'
 #' @param x The output from [generate()] for computation-based inference or the
@@ -23,10 +23,10 @@
 #' @return A tibble containing a `stat` column of calculated statistics.
 #'
 #' @section Missing levels in small samples:
-#' In some cases, when bootstrapping with small samples, some generated 
-#' bootstrap samples will have only one level of the explanatory variable 
+#' In some cases, when bootstrapping with small samples, some generated
+#' bootstrap samples will have only one level of the explanatory variable
 #' present. For some test statistics, the calculated statistic in these
-#' cases will be NaN. The package will omit non-finite values from 
+#' cases will be NaN. The package will omit non-finite values from
 #' visualizations (with a warning) and raise an error in p-value calculations.
 #'
 #' @examples
@@ -34,18 +34,18 @@
 #' # calculate a null distribution of hours worked per week under
 #' # the null hypothesis that the mean is 40
 #' gss %>%
-#'  specify(response = hours) %>%
-#'  hypothesize(null = "point", mu = 40) %>%
-#'  generate(reps = 200, type = "bootstrap") %>%
-#'  calculate(stat = "mean")
+#'   specify(response = hours) %>%
+#'   hypothesize(null = "point", mu = 40) %>%
+#'   generate(reps = 200, type = "bootstrap") %>%
+#'   calculate(stat = "mean")
 #'
 #' # calculate a null distribution assuming independence between age
 #' # of respondent and whether they have a college degree
 #' gss %>%
-#'  specify(age ~ college) %>%
-#'  hypothesize(null = "independence") %>%
-#'  generate(reps = 200, type = "permute") %>%
-#'  calculate("diff in means", order = c("degree", "no degree"))
+#'   specify(age ~ college) %>%
+#'   hypothesize(null = "independence") %>%
+#'   generate(reps = 200, type = "permute") %>%
+#'   calculate("diff in means", order = c("degree", "no degree"))
 #'
 #' # More in-depth explanation of how to use the infer package
 #' \dontrun{
@@ -83,7 +83,7 @@ calculate <- function(x,
     } else if (
       stat %in% c(
         "mean", "median", "sum", "sd", "prop", "count", "diff in means",
-        "diff in medians", "diff in props", "slope", "correlation", 
+        "diff in medians", "diff in props", "slope", "correlation",
         "ratio of props", "odds ratio"
       )
     ) {
@@ -92,35 +92,44 @@ calculate <- function(x,
         "implemented) for `stat` = \"{stat}\". Are you missing ",
         "a `generate()` step?"
       )
-      } else if (!(stat %in% c("Chisq", "prop", "count")) &
-                 !(stat == "t" & (attr(x, "theory_type") == "One sample t"))) {
+    } else if (
+      !(stat %in% c("Chisq", "prop", "count")) &
+      !(stat %in% c("t", "z") 
+        & (attr(x, "theory_type") %in% 
+           c("One sample t", "One sample prop z", "Two sample props z")))) {
       # From `hypothesize()` to `calculate()`
       # Catch-all if generate was not called
-#      warning_glue("You unexpectantly went from `hypothesize()` to ",
-#                   "`calculate()` skipping over `generate()`. Your current ",
-#                   "data frame is returned.")
+      # warning_glue(
+      #   "You unexpectantly went from `hypothesize()` to ",
+      #   "`calculate()` skipping over `generate()`. Your current ",
+      #   "data frame is returned."
+      # )
       return(x)
     }
   }
-  
+
   if (
-    (stat %in% c("diff in means", "diff in medians", 
-                 "diff in props", "ratio of props", "odds ratio")) ||
-    (
-      !is_nuat(x, "theory_type") &&
-      (attr(x, "theory_type") %in% c("Two sample props z", "Two sample t"))
-    )
+    (stat %in% c(
+      "diff in means", "diff in medians",
+      "diff in props", "ratio of props", "odds ratio"
+    )) ||
+      (
+        !is_nuat(x, "theory_type") &&
+          (attr(x, "theory_type") %in% c("Two sample props z", "Two sample t"))
+      )
   ) {
     order <- check_order(x, explanatory_variable(x), order)
   }
 
   if (!(
-    (stat %in% c("diff in means", "diff in medians", 
-                 "diff in props", "ratio of props", "odds ratio")) ||
-    (
-      !is_nuat(x, "theory_type") &&
-      attr(x, "theory_type") %in% c("Two sample props z", "Two sample t")
-    )
+    (stat %in% c(
+      "diff in means", "diff in medians",
+      "diff in props", "ratio of props", "odds ratio"
+    )) ||
+      (
+        !is_nuat(x, "theory_type") &&
+          attr(x, "theory_type") %in% c("Two sample props z", "Two sample t")
+      )
   )) {
     if (!is.null(order)) {
       warning_glue(
@@ -140,9 +149,9 @@ calculate <- function(x,
       "Your choice of `stat` is invalid for the types of variables `specify`ed."
     )
   }
-#   else {
-#     result <- append_infer_class(result)
-#   }
+  # else {
+  #   result <- append_infer_class(result)
+  # }
 
   result <- copy_attrs(to = result, from = x)
   attr(result, "stat") <- stat
@@ -162,7 +171,7 @@ calc_impl <- function(type, x, order, ...) {
 calc_impl_one_f <- function(f) {
   function(type, x, order, ...) {
     col <- base::setdiff(names(x), "replicate")
-    
+
     x %>%
       dplyr::group_by(replicate) %>%
       dplyr::summarize(stat = f(!!(sym(col)), ...))
@@ -180,7 +189,7 @@ calc_impl.sd <- calc_impl_one_f(stats::sd)
 calc_impl_success_f <- function(f, output_name) {
   function(type, x, order, ...) {
     col <- base::setdiff(names(x), "replicate")
-    
+
     ## No longer needed with implementation of `check_point_params()`
     # if (!is.factor(x[[col]])) {
     #   stop_glue(
@@ -188,14 +197,14 @@ calc_impl_success_f <- function(f, output_name) {
     #     "variable is not a factor."
     #   )
     # }
-    
+
     if (is_nuat(x, "success")) {
       stop_glue(
         'To calculate a {output_name}, the `"success"` argument must be ',
-        'provided in `specify()`.'
+        "provided in `specify()`."
       )
     }
-    
+
     success <- attr(x, "success")
     x %>%
       dplyr::group_by(replicate) %>%
@@ -204,12 +213,16 @@ calc_impl_success_f <- function(f, output_name) {
 }
 
 calc_impl.prop <- calc_impl_success_f(
-  f = function(response, success, ...) {mean(response == success, ...)},
+  f = function(response, success, ...) {
+    mean(response == success, ...)
+  },
   output_name = "proportion"
 )
 
 calc_impl.count <- calc_impl_success_f(
-  f = function(response, success, ...) {sum(response == success, ...)},
+  f = function(response, success, ...) {
+    sum(response == success, ...)
+  },
   output_name = "count"
 )
 
@@ -256,21 +269,26 @@ calc_impl.diff_in_means <- calc_impl_diff_f(mean)
 calc_impl.diff_in_medians <- calc_impl_diff_f(stats::median)
 
 calc_impl.Chisq <- function(type, x, order, ...) {
-  ## The following could stand to be cleaned up
+  resp_var <- as.character(attr(x, "response"))
 
   if (is_nuat(x, "explanatory")) {
     # Chi-Square Goodness of Fit
     if (!is_nuat(x, "params")) {
       # When `hypothesize()` has been called
       p_levels <- get_par_levels(x)
-      x %>%
-        dplyr::summarize(
-          stat = suppressWarnings(stats::chisq.test(
-            # Ensure correct ordering of parameters
-            table(!!(attr(x, "response")))[p_levels],
-            p = attr(x, "params")
-          )$stat
+      chisq_gof <- function(df) {
+        chisq <- suppressWarnings(stats::chisq.test(
+          # Ensure correct ordering of parameters
+          table(df[[resp_var]])[p_levels],
+          p = attr(x, "params")
         ))
+
+        unname(chisq[["statistic"]])
+      }
+
+      result <- x %>%
+        dplyr::nest_by(.key = "data") %>%
+        dplyr::summarise(stat = chisq_gof(data), .groups = "drop")
     } else {
       # Straight from `specify()`
       stop_glue(
@@ -280,61 +298,58 @@ calc_impl.Chisq <- function(type, x, order, ...) {
       )
     }
   } else {
-    # This is not matching with chisq.test
-    # obs_tab <- x %>%
-    #   dplyr::filter(replicate == 1) %>%
-    #   dplyr::ungroup() %>%
-    #   dplyr::select(!!attr(x, "response"), !!(attr(x, "explanatory"))) %>%
-    #   table()
-    # expected <- outer(rowSums(obs_tab), colSums(obs_tab)) / n
-    # df_out <- x %>%
-    #   dplyr::summarize(
-    #     stat = sum(
-    #       (table(!!(attr(x, "response")), !!(attr(x, "explanatory"))) -
-    #          expected)^2 / expected,
-    #       ...)
-    #   )
-
     # Chi-Square Test of Independence
-    result <- x %>%
-      dplyr::do(
-        broom::tidy(
-          suppressWarnings(stats::chisq.test(
-            table(
-              .[[as.character(attr(x, "response"))]],
-              .[[as.character(attr(x, "explanatory"))]]
-            )
-          ))
-        )
-      ) %>%
-      dplyr::ungroup()
+    expl_var <- as.character(attr(x, "explanatory"))
+    chisq_indep <- function(df) {
+      res <- suppressWarnings(stats::chisq.test(
+        x = df[[expl_var]],
+        y = df[[resp_var]]
+      ))
+
+      res[["statistic"]]
+    }
 
-    if (!is_nuat(x, "generate")) {
-      result <- result %>% dplyr::select(replicate, stat = statistic)
-    } else {
-      result <- result %>% dplyr::select(stat = statistic)
+    # Warn about possible unused factor levels
+    if (has_unused_levels(x[[expl_var]])) {
+      warning_glue("Explanatory variable has unused factor levels.")
+    }
+    if (has_unused_levels(x[[resp_var]])) {
+      warning_glue("Response variable has unused factor levels.")
     }
 
-    copy_attrs(
-      to = result, from = x,
-      attrs = c(
-        "response", "success", "explanatory", "response_type",
-        "explanatory_type", "distr_param", "distr_param2", "theory_type"
-      )
-    )
+    # Compute result
+    result <- x %>%
+      dplyr::nest_by(.key = "data") %>%
+      dplyr::summarise(stat = chisq_indep(data), .groups = "drop")
+  }
+
+  if (!is_nuat(x, "generate")) {
+    result <- result %>% dplyr::select(replicate, stat)
+  } else {
+    result <- result %>% dplyr::select(stat)
   }
+
+  copy_attrs(
+    to = result, from = x,
+    attrs = c(
+      "response", "success", "explanatory", "response_type",
+      "explanatory_type", "distr_param", "distr_param2", "theory_type"
+    )
+  )
 }
 
 calc_impl.function_of_props <- function(type, x, order, operator, ...) {
   col <- attr(x, "response")
   success <- attr(x, "success")
-  
+
   x %>%
     dplyr::group_by(replicate, !!attr(x, "explanatory"), .drop = FALSE) %>%
     dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>%
     dplyr::summarize(
-      stat = operator(prop[!!attr(x, "explanatory") == order[1]],
-        prop[!!attr(x, "explanatory") == order[2]])
+      stat = operator(
+        prop[!!attr(x, "explanatory") == order[1]],
+        prop[!!attr(x, "explanatory") == order[2]]
+      )
     )
 }
 
@@ -349,14 +364,14 @@ calc_impl.ratio_of_props <- function(type, x, order, ...) {
 calc_impl.odds_ratio <- function(type, x, order, ...) {
   col <- attr(x, "response")
   success <- attr(x, "success")
-  
+
   x %>%
     dplyr::group_by(replicate, !!attr(x, "explanatory"), .drop = FALSE) %>%
     dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>%
     dplyr::summarize(
       prop_1 = prop[!!attr(x, "explanatory") == order[1]],
       prop_2 = prop[!!attr(x, "explanatory") == order[2]],
-      stat =  (prop_1 / prop_2) / ((1 - prop_1) / (1 - prop_2))
+      stat = (prop_1 / prop_2) / ((1 - prop_1) / (1 - prop_2))
     ) %>%
     dplyr::select(stat)
 }
@@ -385,7 +400,7 @@ calc_impl.t <- function(type, x, order, ...) {
   #   (stat == "slope")
   # ) {
   #   explan_string <- as.character(attr(x, "explanatory"))
-  # 
+  #
   #   x %>%
   #     dplyr::summarize(
   #       stat = summary(stats::lm(
@@ -393,7 +408,7 @@ calc_impl.t <- function(type, x, order, ...) {
   #       ))[["coefficients"]][explan_string, "t value"]
   #     )
   # }
-  # 
+  #
   # # Standardized correlation
   # else if (
   #   (attr(x, "theory_type") == "Slope/correlation with t") &&
@@ -410,25 +425,25 @@ calc_impl.t <- function(type, x, order, ...) {
   else if (attr(x, "theory_type") == "One sample t") {
     # For bootstrap
     if (!is_hypothesized(x)) {
-      
       if (is.null(list(...)$mu)) {
         message_glue(
           "No `mu` argument was hypothesized, so the t-test will ",
           "assume a null hypothesis `mu = 0`."
         )
       }
-      
+
       x %>%
         dplyr::summarize(
           stat = stats::t.test(!!attr(x, "response"), ...)[["statistic"]]
         )
-      
     } else {
       # For hypothesis testing
       x %>%
         dplyr::summarize(
           stat = stats::t.test(
-            !!attr(x, "response"), mu = attr(x, "params"), ...
+            !!attr(x, "response"),
+            mu = attr(x, "params"),
+            ...
           )[["statistic"]]
         )
     }
@@ -442,8 +457,13 @@ calc_impl.z <- function(type, x, order, ...) {
     success <- attr(x, "success")
 
     x$explan <- factor(
-      explanatory_variable(x), levels = c(order[1], order[2])
+      explanatory_variable(x),
+      levels = c(order[1], order[2])
     )
+    
+    if (!"replicate" %in% colnames(x)) {
+      x$replicate <- 1L
+    }
 
     aggregated <- x %>%
       dplyr::group_by(replicate, explan) %>%
@@ -463,24 +483,22 @@ calc_impl.z <- function(type, x, order, ...) {
         denom = sqrt(p_hat * (1 - p_hat) / n1 + p_hat * (1 - p_hat) / n2),
         stat = diff_prop / denom
       ) %>%
-      dplyr::select(-total_suc, -n1, -n2)
+      dplyr::select(stat)
 
     df_out
   } else if (attr(x, "theory_type") == "One sample prop z") {
     # One sample proportion
-    
+
     # When `hypothesize()` has been called
     success <- attr(x, "success")
-
-    p0 <- attr(x, "params")[1]
-    num_rows <- nrow(x) / length(unique(x$replicate))
-
     col <- attr(x, "response")
-#     if (is.null(success)) {
-#       success <- quo(get_par_levels(x)[1])
-#     }
-#     Error given instead
-
+    p0 <- unname(attr(x, "params")[1])
+    if (!is_generated(x)) {
+      num_rows <- nrow(x)
+    } else {
+      num_rows <- nrow(x) / length(unique(x$replicate))
+    }
+      
     df_out <- x %>%
       dplyr::summarize(
         stat = (
diff --git a/R/get_confidence_interval.R b/R/get_confidence_interval.R
index a0e55f67..311f80cf 100644
--- a/R/get_confidence_interval.R
+++ b/R/get_confidence_interval.R
@@ -2,9 +2,9 @@
 #'
 #' @description
 #'
-#' Compute a confidence interval around a summary statistic. Only 
-#' simulation-based methods are (currently only) supported.
-#' 
+#' Compute a confidence interval around a summary statistic. Currently, only
+#' simulation-based methods are supported.
+#'
 #' Learn more in `vignette("infer")`.
 #'
 #' @param x Data frame of calculated statistics or containing attributes of
@@ -22,53 +22,64 @@
 #' @return A 1 x 2 tibble with 'lower_ci' and 'upper_ci' columns. Values
 #'   correspond to lower and upper bounds of the confidence interval.
 #'
+#' @details
+#' A null hypothesis is not required to compute a confidence interval, but
+#' including `hypothesize()` in a chain leading to `get_confidence_interval()`
+#' will not break anything. This can be useful when computing a confidence
+#' interval after previously computing a p-value.
+#'
 #' @section Aliases:
 #' `get_ci()` is an alias of `get_confidence_interval()`.
 #' `conf_int()` is a deprecated alias of `get_confidence_interval()`.
 #'
 #' @examples
-#' 
-#' # find the point estimate---mean number of hours worked per week
-#' point_estimate <- gss %>%
-#'   specify(response = hours) %>%
-#'   calculate(stat = "mean") %>%
-#'   dplyr::pull()
-#' 
-#' # starting with the gss dataset
-#' gss %>%
-#'   # ...we're interested in the number of hours worked per week
+#'
+#' boot_distr <- gss %>%
+#'   # We're interested in the number of hours worked per week
 #'   specify(response = hours) %>%
-#'   # hypothesizing that the mean is 40
-#'   hypothesize(null = "point", mu = 40) %>%
-#'   # generating data points for a null distribution
+#'   # Generate bootstrap samples
 #'   generate(reps = 1000, type = "bootstrap") %>%
-#'   # finding the null distribution
+#'   # Calculate mean of each bootstrap sample
+#'   calculate(stat = "mean")
+#'
+#' boot_distr %>%
+#'   # Calculate the confidence interval around the point estimate
+#'   get_confidence_interval(
+#'     # At the 95% confidence level; percentile method
+#'     level = 0.95
+#'   )
+#'
+#' # For type = "se" or type = "bias-corrected" we need a point estimate
+#' sample_mean <- gss %>%
+#'   specify(response = hours) %>%
 #'   calculate(stat = "mean") %>%
-#    # calculate the confidence interval around the point estimate
+#'   dplyr::pull()
+#'
+#' boot_distr %>%
 #'   get_confidence_interval(
-#'     point_estimate = point_estimate,
-#'     # at the 95% confidence level
+#'     point_estimate = sample_mean,
+#'     # At the 95% confidence level
 #'     level = 0.95,
-#'     # using the standard error method
+#'     # Using the standard error method
 #'     type = "se"
 #'   )
-#'   
+#'
 #' # More in-depth explanation of how to use the infer package
 #' \dontrun{
 #' vignette("infer")
-#' } 
-#'  
+#' }
+#'
 #' @name get_confidence_interval
 #' @export
 get_confidence_interval <- function(x, level = 0.95, type = "percentile",
                                     point_estimate = NULL) {
   check_ci_args(x, level, type, point_estimate)
-  
+
   # Inform if no `level` was explicitly supplied
   if (!("level" %in% rlang::call_args_names(match.call()))) {
     message_glue("Using `level = {level}` to compute confidence interval.")
   }
-  
+
   switch(
     type,
     percentile = ci_percentile(x, level),
@@ -82,40 +93,41 @@ get_confidence_interval <- function(x, level = 0.95, type = "percentile",
 get_ci <- function(x, level = 0.95, type = "percentile",
                    point_estimate = NULL) {
   get_confidence_interval(
-    x, level = level, type = type, point_estimate = point_estimate
+    x,
+    level = level, type = type, point_estimate = point_estimate
   )
 }
 
 ci_percentile <- function(x, level) {
   ci_vec <- stats::quantile(x[["stat"]], probs = (1 + c(-level, level)) / 2)
-  
+
   make_ci_df(ci_vec)
 }
 
 ci_se <- function(x, level, point_estimate) {
   point_estimate <- check_obs_stat(point_estimate)
-  
+
   multiplier <- stats::qnorm((1 + level) / 2)
   ci_vec <- point_estimate + c(-multiplier, multiplier) * stats::sd(x[["stat"]])
-  
+
   make_ci_df(ci_vec)
 }
 
 ci_bias_corrected <- function(x, level, point_estimate) {
   point_estimate <- check_obs_stat(point_estimate)
-  
+
   p <- mean(x[["stat"]] <= point_estimate)
-  z0 <- stats::qnorm(p) 
+  z0 <- stats::qnorm(p)
   # z_alpha_2 is z_(alpha/2)
   z_alpha_2 <- stats::qnorm((1 + c(-level, level)) / 2)
-  new_probs <- stats::pnorm(2*z0 + z_alpha_2)
-  
+  new_probs <- stats::pnorm(2 * z0 + z_alpha_2)
+
   ci_vec <- stats::quantile(x[["stat"]], probs = new_probs)
-  
+
   make_ci_df(ci_vec)
 }
 
-check_ci_args <- function(x, level, type, point_estimate){
+check_ci_args <- function(x, level, type, point_estimate) {
   if (!is.null(point_estimate)) {
     if (!is.data.frame(point_estimate)) {
       check_type(point_estimate, is.numeric)
@@ -126,7 +138,7 @@ check_ci_args <- function(x, level, type, point_estimate){
   }
   check_type(x, is.data.frame)
   check_type(level, is.numeric)
-  
+
   if ((level <= 0) || (level >= 1)) {
     stop_glue("The value of `level` must be between 0 and 1 non-inclusive.")
   }
@@ -139,7 +151,7 @@ check_ci_args <- function(x, level, type, point_estimate){
 
   if ((type %in% c("se", "bias-corrected")) && is.null(point_estimate)) {
     stop_glue(
-      'A numeric value needs to be given for `point_estimate` ',
+      "A numeric value needs to be given for `point_estimate` ",
       'for `type` "se" or "bias-corrected".'
     )
   }
diff --git a/R/gss.R b/R/gss.R
index 0c1838c9..6039514f 100644
--- a/R/gss.R
+++ b/R/gss.R
@@ -2,7 +2,8 @@
 #' 
 #' The General Social Survey is a high-quality survey which gathers data on 
 #' American society and opinions, conducted since 1972. This data set is a 
-#' sample of 500 entries from the GSS, including demographic markers and some 
+#' sample of 500 entries from the GSS, spanning years 1973-2018,
+#' including demographic markers and some 
 #' economic variables. Note that this data is included for demonstration only, 
 #' and should not be assumed to provide accurate estimates relating to the GSS. 
 #' However, due to the high quality of the GSS, the unweighted data will 
diff --git a/R/hypothesize.R b/R/hypothesize.R
index 8478f8c4..c76f638d 100755
--- a/R/hypothesize.R
+++ b/R/hypothesize.R
@@ -75,7 +75,8 @@ hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL)
       }
     }
   )
-  append_infer_class(tibble::as_tibble(x))
+  res <- append_infer_class(tibble::as_tibble(x))
+  copy_attrs(res, x, "params")
 }
 
 is_hypothesized <- function(x){
diff --git a/R/infer.R b/R/infer.R
index 9ed38e64..61035576 100755
--- a/R/infer.R
+++ b/R/infer.R
@@ -1,11 +1,11 @@
 #' infer: a grammar for statistical inference
 #'
 #' \if{html}{\figure{infer.png}{options: align='right'}}
-#'  
+#'
 #' The objective of this package is to perform statistical inference using a
 #' grammar that illustrates the underlying concepts and a format that coheres
 #' with the tidyverse.
-#' 
+#'
 #' For an overview of how to use the core functionality, see `vignette("infer")`
 #'
 #'
@@ -21,8 +21,8 @@ if (getRversion() >= "2.15.1") {
       "prop", "stat", "value", "x", "y", "..density..", "statistic", ".",
       "parameter", "p.value", "xmin", "x_min", "xmax", "x_max", "density",
       "denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat",
-      "total_suc", "explan", "probs", "conf.low", "conf.high", "prop_1", 
-      "prop_2"
+      "total_suc", "explan", "probs", "conf.low", "conf.high", "prop_1",
+      "prop_2", "data"
     )
   )
 }
diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index 19e882c6..ee7323de 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -2,86 +2,93 @@
 #'
 #' @description
 #'
-#' Perform repeated sampling of samples of size n. Useful for creating sampling
-#' distributions.
+#' These functions extend the functionality of [dplyr::sample_n()] and
+#' [dplyr::slice_sample()] by allowing for repeated sampling of data.
+#' This operation is especially helpful while creating sampling
+#' distributions—see the examples below!
 #'
-#' @param tbl Data frame of population from which to sample.
-#' @param size Sample size of each sample.
+#' @param tbl,.data Data frame of population from which to sample.
+#' @param size,n Sample size of each sample.
 #' @param replace Should sampling be with replacement?
 #' @param reps Number of samples of size n = `size` to take.
-#' @param prob A vector of probability weights for obtaining the elements of the
-#'   vector being sampled.
+#' @param prob,weight_by A vector of sampling weights for each of the rows in
+#' `tbl`—must have length equal to `nrow(tbl)`.
 #'
-#' @return A tibble of size `rep` times `size` rows corresponding to `rep`
-#'   samples of size n = `size` from `tbl`.
+#' @return A tibble of size `rep * size` rows corresponding to `reps`
+#'   samples of size `size` from `tbl`, grouped by `replicate`.
 #'
-#' @examples
-#' suppressPackageStartupMessages(library(dplyr))
-#' suppressPackageStartupMessages(library(ggplot2))
+#' @details The [dplyr::sample_n()] function (to which `rep_sample_n()` was
+#' originally a supplement) has been superseded by [dplyr::slice_sample()].
+#' `rep_slice_sample()` provides a light wrapper around `rep_sample_n()` that
+#' has a more similar interface to `slice_sample()`.
 #'
-#' # A virtual population of N = 10,010, of which 3091 are hurricanes
-#' population <- dplyr::storms %>%
-#'   select(status)
+#' @examples
+#' library(dplyr)
+#' library(ggplot2)
 #'
-#' # Take samples of size n = 50 storms without replacement; do this 1000 times
-#' samples <- population %>%
+#' # take 1000 samples of size n = 50, without replacement
+#' slices <- gss %>%
 #'   rep_sample_n(size = 50, reps = 1000)
-#' samples
 #'
-#' # Compute p_hats for all 1000 samples = proportion hurricanes
-#' p_hats <- samples %>%
+#' slices
+#'
+#' # compute the proportion of respondents with a college
+#' # degree in each replicate
+#' p_hats <- slices %>%
 #'   group_by(replicate) %>%
-#'   summarize(prop_hurricane = mean(status == "hurricane"))
-#' p_hats
+#'   summarize(prop_college = mean(college == "degree"))
 #'
-#' # Plot sampling distribution
-#' ggplot(p_hats, aes(x = prop_hurricane)) +
+#' # plot sampling distribution
+#' ggplot(p_hats, aes(x = prop_college)) +
 #'   geom_density() +
-#'   labs(x = "p_hat", y = "Number of samples",
-#'   title = "Sampling distribution of p_hat from 1000 samples of size 50")
-#'
-#' @importFrom dplyr pull
-#' @importFrom dplyr inner_join
-#' @importFrom dplyr group_by
+#'   labs(
+#'     x = "p_hat", y = "Number of samples",
+#'     title = "Sampling distribution of p_hat"
+#'   )
+#'   
+#' # sampling with probability weights. Note probabilities are automatically 
+#' # renormalized to sum to 1
+#' library(tibble)
+#' df <- tibble(
+#'   id = 1:5,
+#'   letter = factor(c("a", "b", "c", "d", "e"))
+#' )
+#' rep_sample_n(df, size = 2, reps = 5, prob = c(.5, .4, .3, .2, .1))
 #' @export
 rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
-  n <- nrow(tbl)
-
   check_type(tbl, is.data.frame)
   check_type(size, is.numeric)
   check_type(replace, is.logical)
   check_type(reps, is.numeric)
   if (!is.null(prob)) {
     check_type(prob, is.numeric)
-  }
-
-  # assign non-uniform probabilities
-  # there should be a better way!!
-  # prob needs to be nrow(tbl) -- not just number of factor levels
-  if (!is.null(prob)) {
-    if (length(prob) != n) {
+    if (length(prob) != nrow(tbl)) {
       stop_glue(
         "The argument `prob` must have length `nrow(tbl)` = {nrow(tbl)}"
       )
     }
-
-    prob <- tibble::tibble(vals = levels(dplyr::pull(tbl, 1))) %>%
-      dplyr::mutate(probs = prob) %>%
-      dplyr::inner_join(tbl) %>%
-      dplyr::select(probs) %>%
-      dplyr::pull()
   }
 
+  # Generate row indexes for every future replicate (this way it respects
+  # possibility of  `replace = FALSE`)
+  n <- nrow(tbl)
   i <- unlist(replicate(
     reps,
     sample.int(n, size, replace = replace, prob = prob),
     simplify = FALSE
   ))
-  rep_tbl <- cbind(
-    replicate = rep(1:reps, rep(size, reps)),
-    tbl[i, ]
-  )
-  rep_tbl <- tibble::as_tibble(rep_tbl)
-  names(rep_tbl)[-1] <- names(tbl)
-  dplyr::group_by(rep_tbl, replicate)
+
+  tbl %>%
+    dplyr::slice(i) %>%
+    dplyr::mutate(replicate = rep(seq_len(reps), each = size)) %>%
+    dplyr::select(replicate, dplyr::everything()) %>%
+    tibble::as_tibble() %>%
+    dplyr::group_by(replicate)
+}
+
+#' @rdname rep_sample_n
+#' @export
+rep_slice_sample <- function(.data, n = 1, replace = FALSE, weight_by = NULL,
+                             reps = 1) {
+  rep_sample_n(.data, n, replace, reps, weight_by)
 }
diff --git a/R/utils.R b/R/utils.R
index 2e96ed99..5880c218 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -124,7 +124,7 @@ check_order <- function(x, explanatory_variable, order, in_calculate = TRUE) {
   unique_ex <- sort(unique(explanatory_variable))
   if (length(unique_ex) != 2) {
     stop_glue(
-      "Statistic is based on a difference or ratio; the explanatory variable", 
+      "Statistic is based on a difference or ratio; the explanatory variable ", 
       "should have two levels."
     )
   }
@@ -308,6 +308,17 @@ check_for_nan <- function(x, context) {
   }
 }
 
+has_unused_levels <- function(x) {
+  if (is.factor(x)) {
+    present_levels <- unique(as.character(x))
+    unused_levels <- setdiff(levels(x), present_levels)
+
+    length(unused_levels) > 0
+  } else {
+    FALSE
+  }
+}
+
 # Helpers for hypothesize() -----------------------------------------------
 
 match_null_hypothesis <- function(null) {
@@ -519,4 +530,4 @@ check_for_piped_visualize <- function(...) {
   }
   
   TRUE
-}
\ No newline at end of file
+}
diff --git a/R/wrappers.R b/R/wrappers.R
index 63a8c365..92718629 100755
--- a/R/wrappers.R
+++ b/R/wrappers.R
@@ -30,7 +30,7 @@
 #'
 #' @examples
 #' library(tidyr)
-#' 
+#'
 #' # t test for number of hours worked per week
 #' # by college degree status
 #' gss %>%
@@ -40,45 +40,45 @@
 #'       alternative = "two-sided")
 #'
 #' # see vignette("infer") for more explanation of the
-#' # intuition behind the infer package, and vignette("t_test") 
+#' # intuition behind the infer package, and vignette("t_test")
 #' # for more examples of t-tests using infer
 #'
 #' @importFrom rlang f_lhs
 #' @importFrom rlang f_rhs
 #' @importFrom stats as.formula
 #' @export
-t_test <- function(x, formula, 
-                   response = NULL, 
+t_test <- function(x, formula,
+                   response = NULL,
                    explanatory = NULL,
                    order = NULL,
-                   alternative = "two-sided", 
+                   alternative = "two-sided",
                    mu = 0,
                    conf_int = TRUE,
                    conf_level = 0.95,
                    ...) {
   check_conf_level(conf_level)
-  
+
   # convert all character and logical variables to be factor variables
   x <- tibble::as_tibble(x) %>%
     mutate_if(is.character, as.factor) %>%
     mutate_if(is.logical, as.factor)
-  
+
   # parse response and explanatory variables
   response    <- enquo(response)
   explanatory <- enquo(explanatory)
-  x <- parse_variables(x = x, formula = formula, 
+  x <- parse_variables(x = x, formula = formula,
                        response = response, explanatory = explanatory)
-  
+
   # match with old "dot" syntax in t.test
   if (alternative %in% c("two-sided", "two_sided", "two sided")) {
     alternative <- "two.sided"
   }
-  
+
   # two sample
   if (has_explanatory(x)) {
     # if (!is.null(order)) {
-    #   x[[as.character(attr(x, "explanatory"))]] <- factor(explanatory_variable(x), 
-    #                                                       levels = c(order[1], 
+    #   x[[as.character(attr(x, "explanatory"))]] <- factor(explanatory_variable(x),
+    #                                                       levels = c(order[1],
     #                                                                  order[2]),
     #                                                       ordered = TRUE)
     # }
@@ -100,7 +100,7 @@ t_test <- function(x, formula,
                             conf.level = conf_level) %>%
       broom::glance()
   }
-  
+
   if (conf_int) {
     results <- prelim %>%
       dplyr::select(
@@ -113,14 +113,14 @@ t_test <- function(x, formula,
         statistic, t_df = parameter, p_value = p.value, alternative
       )
   }
-  
+
   results
 }
 
 #' Tidy t-test statistic
 #'
 #' @description
-#' 
+#'
 #' A shortcut wrapper function to get the observed test statistic for a t test.
 #'
 #' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
@@ -144,7 +144,7 @@ t_test <- function(x, formula,
 #'
 #' @examples
 #' library(tidyr)
-#' 
+#'
 #' # t test statistic for true mean number of hours worked
 #' # per week of 40
 #' gss %>%
@@ -159,38 +159,38 @@ t_test <- function(x, formula,
 #'       alternative = "two-sided")
 #'
 #' @export
-t_stat <- function(x, formula, 
-                   response = NULL, 
+t_stat <- function(x, formula,
+                   response = NULL,
                    explanatory = NULL,
                    order = NULL,
-                   alternative = "two-sided", 
+                   alternative = "two-sided",
                    mu = 0,
                    conf_int = FALSE,
                    conf_level = 0.95,
                    ...) {
   check_conf_level(conf_level)
-  
+
   # convert all character and logical variables to be factor variables
   x <- tibble::as_tibble(x) %>%
     mutate_if(is.character, as.factor) %>%
     mutate_if(is.logical, as.factor)
-  
+
   # parse response and explanatory variables
   response    <- enquo(response)
   explanatory <- enquo(explanatory)
-  x <- parse_variables(x = x, formula = formula, 
+  x <- parse_variables(x = x, formula = formula,
                        response = response, explanatory = explanatory)
-  
+
   # match with old "dot" syntax in t.test
   if (alternative %in% c("two-sided", "two_sided", "two sided")) {
     alternative <- "two.sided"
   }
-  
+
   # two sample
   if (has_explanatory(x)) {
     # if (!is.null(order)) {
-    #   x[[as.character(attr(x, "explanatory"))]] <- factor(explanatory_variable(x), 
-    #                                                       levels = c(order[1], 
+    #   x[[as.character(attr(x, "explanatory"))]] <- factor(explanatory_variable(x),
+    #                                                       levels = c(order[1],
     #                                                                  order[2]),
     #                                                       ordered = TRUE)
     # }
@@ -212,13 +212,13 @@ t_stat <- function(x, formula,
                             conf.level = conf_level) %>%
       broom::glance()
   }
-  
-  # removed unnecessary if(conf_int) clause; only the statistic itself 
+
+  # removed unnecessary if(conf_int) clause; only the statistic itself
   # was returned regardless
   results <- prelim %>%
     dplyr::select(statistic) %>%
     pull()
-  
+
   results
 }
 
@@ -239,13 +239,13 @@ t_stat <- function(x, formula,
 #' @param ... Additional arguments for [chisq.test()][stats::chisq.test()].
 #'
 #' @examples
-#' # chi-squared test of independence for college completion 
+#' # chi-squared test of independence for college completion
 #' # status depending on one's self-identified income class
 #' chisq_test(gss, college ~ finrela)
-#' 
-#' # chi-squared goodness of fit test on whether self-identified 
+#'
+#' # chi-squared goodness of fit test on whether self-identified
 #' # income class follows a uniform distribution
-#' chisq_test(gss, 
+#' chisq_test(gss,
 #'            response = finrela,
 #'            p = c("far below average" = 1/6,
 #'                  "below average" = 1/6,
@@ -255,35 +255,35 @@ t_stat <- function(x, formula,
 #'                  "DK" = 1/6))
 #'
 #' @export
-chisq_test <- function(x, formula, response = NULL, 
+chisq_test <- function(x, formula, response = NULL,
                        explanatory = NULL, ...) {
   # Parse response and explanatory variables
   response    <- enquo(response)
   explanatory <- enquo(explanatory)
-  x <- parse_variables(x = x, formula = formula, 
+  x <- parse_variables(x = x, formula = formula,
                        response = response, explanatory = explanatory)
-  
+
   if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
     stop_glue(
       'The response variable of `{attr(x, "response")}` is not appropriate\n',
       "since the response variable is expected to be categorical."
     )
   }
-  if (has_explanatory(x) && 
+  if (has_explanatory(x) &&
       !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))) {
     stop_glue(
       'The explanatory variable of `{attr(x, "explanatory")}` is not appropriate\n',
       "since the explanatory variable is expected to be categorical."
     )
   }
-  
+
   x <- x %>%
     select(one_of(c(
       as.character((attr(x, "response"))), as.character(attr(x, "explanatory"))
     ))) %>%
     mutate_if(is.character, as.factor) %>%
     mutate_if(is.logical, as.factor)
-  
+
   stats::chisq.test(table(x), ...) %>%
     broom::glance() %>%
     dplyr::select(statistic, chisq_df = parameter, p_value = p.value)
@@ -307,15 +307,15 @@ chisq_test <- function(x, formula, response = NULL,
 #' @param ... Additional arguments for [chisq.test()][stats::chisq.test()].
 #'
 #' @examples
-#' # chi-squared test statistic for test of independence 
-#' # of college completion status depending and one's 
+#' # chi-squared test statistic for test of independence
+#' # of college completion status depending and one's
 #' # self-identified income class
 #' chisq_stat(gss, college ~ finrela)
-#' 
-#' # chi-squared test statistic for a goodness of fit 
-#' # test on whether self-identified income class 
+#'
+#' # chi-squared test statistic for a goodness of fit
+#' # test on whether self-identified income class
 #' # follows a uniform distribution
-#' chisq_stat(gss, 
+#' chisq_stat(gss,
 #'            response = finrela,
 #'            p = c("far below average" = 1/6,
 #'                  "below average" = 1/6,
@@ -325,35 +325,35 @@ chisq_test <- function(x, formula, response = NULL,
 #'                  "DK" = 1/6))
 #'
 #' @export
-chisq_stat <- function(x, formula, response = NULL, 
+chisq_stat <- function(x, formula, response = NULL,
                        explanatory = NULL, ...) {
   # Parse response and explanatory variables
   response    <- enquo(response)
   explanatory <- enquo(explanatory)
-  x <- parse_variables(x = x, formula = formula, 
+  x <- parse_variables(x = x, formula = formula,
                        response = response, explanatory = explanatory)
-  
+
   if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
     stop_glue(
       'The response variable of `{attr(x, "response")}` is not appropriate\n',
       "since the response variable is expected to be categorical."
     )
   }
-  if (has_explanatory(x) && 
+  if (has_explanatory(x) &&
       !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))) {
     stop_glue(
       'The explanatory variable of `{attr(x, "explanatory")}` is not appropriate\n',
       "since the response variable is expected to be categorical."
     )
   }
-  
+
   x <- x %>%
     select(one_of(c(
       as.character((attr(x, "response"))), as.character(attr(x, "explanatory"))
     ))) %>%
     mutate_if(is.character, as.factor) %>%
     mutate_if(is.logical, as.factor)
-  
+
   suppressWarnings(stats::chisq.test(table(x), ...)) %>%
     broom::glance() %>%
     dplyr::select(statistic) %>%
@@ -386,55 +386,85 @@ check_conf_level <- function(conf_level) {
 #'   explanatory variable. Optional. This is an alternative to the formula
 #'   interface.
 #' @param order A string vector specifying the order in which the proportions
-#'   should be subtracted, where  `order = c("first", "second")` means 
-#'   `"first" - "second"`. Ignored for one-sample tests, and optional for two 
+#'   should be subtracted, where  `order = c("first", "second")` means
+#'   `"first" - "second"`. Ignored for one-sample tests, and optional for two
 #'   sample tests.
 #' @param alternative Character string giving the direction of the alternative
 #'   hypothesis. Options are `"two-sided"` (default), `"greater"`, or `"less"`.
+#'   Only used when testing the null that a single proportion equals a given
+#'   value, or that two proportions are equal; ignored otherwise.
 #' @param p A numeric vector giving the hypothesized null proportion of
 #' success for each group.
 #' @param conf_int A logical value for whether to report the confidence
 #'   interval or not. `TRUE` by default, ignored if `p` is specified for a
-#'   two-sample test.
+#'   two-sample test. Only used when testing the null that a single
+#'   proportion equals a given value, or that two proportions are equal;
+#'   ignored otherwise.
 #' @param conf_level A numeric value between 0 and 1. Default value is 0.95.
+#'   Only used when testing the null that a single proportion equals a given
+#'   value, or that two proportions are equal; ignored otherwise.
+#' @param success The level of `response` that will be considered a success, as
+#'   a string. Only used when testing the null that a single
+#'   proportion equals a given value, or that two proportions are equal;
+#'   ignored otherwise.
+#' @param correct A logical indicating whether Yates' continuity correction 
+#'   should be applied where possible. If `z = TRUE`, the `correct` argument will 
+#'   be overwritten as `FALSE`. Otherwise defaults to `correct = TRUE`.
+#' @param z A logical value for whether to report the statistic as a standard
+#'   normal deviate or a Pearson's chi-square statistic. \eqn{z^2}  is distributed
+#'   chi-square with 1 degree of freedom, though note that the user will likely
+#'   need to turn off Yates' continuity correction by setting `correct = FALSE`
+#'   to see this connection.
 #' @param ... Additional arguments for [prop.test()][stats::prop.test()].
 #'
 #' @examples
-#' # proportion test for difference in proportions of 
+#' # two-sample proportion test for difference in proportions of
 #' # college completion by respondent sex
-#' prop_test(gss, 
-#'           college ~ sex,  
+#' prop_test(gss,
+#'           college ~ sex,
 #'           order = c("female", "male"))
-#'           
-#' # a one-proportion test for hypothesized null 
+#'
+#' # one-sample proportion test for hypothesized null
 #' # proportion of college completion of .2
 #' prop_test(gss,
 #'           college ~ NULL,
 #'           p = .2)
+#' 
+#' # report as a z-statistic rather than chi-square
+#' # and specify the success level of the response
+#' prop_test(gss,
+#'           college ~ NULL,
+#'           success = "degree",
+#'           p = .2,
+#'           z = TRUE)
 #'
 #' @export
-prop_test <- function(x, formula, 
-                      response = NULL, 
+prop_test <- function(x, formula,
+                      response = NULL,
                       explanatory = NULL,
                       p = NULL,
                       order = NULL,
-                      alternative = "two-sided", 
+                      alternative = "two-sided",
                       conf_int = TRUE,
                       conf_level = 0.95,
+                      success = NULL,
+                      correct = NULL,
+                      z = FALSE,
                       ...) {
   # Parse response and explanatory variables
   response    <- enquo(response)
   explanatory <- enquo(explanatory)
-  x <- parse_variables(x = x, formula = formula, 
+  x <- parse_variables(x = x, formula = formula,
                        response = response, explanatory = explanatory)
-  
+  correct <- if (z) {FALSE} else if (is.null(correct)) {TRUE} else {correct}
+
   if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
     stop_glue(
       'The response variable of `{attr(x, "response")}` is not appropriate\n',
       "since the response variable is expected to be categorical."
     )
   }
-  if (has_explanatory(x) && 
+  if (has_explanatory(x) &&
       !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))) {
     stop_glue(
       'The explanatory variable of `{attr(x, "explanatory")}` is not appropriate\n',
@@ -445,68 +475,118 @@ prop_test <- function(x, formula,
   if (alternative %in% c("two-sided", "two_sided", "two sided")) {
     alternative <- "two.sided"
   }
-  
+
+  # process "success" arg
+  lvls <- levels(factor(response_variable(x)))
+
+  if (!is.null(success)) {
+    check_type(success, rlang::is_string)
+
+    if (!(success %in% lvls)) {
+      stop_glue('{success} is not a valid level of {attr(x, "response")}.')
+    }
+
+    lvls <- c(success, lvls[lvls != success])
+  } else {
+    success <- lvls[1]
+  }
+
   # two sample
   if (has_explanatory(x)) {
-    
+
     order <- check_order(x, explanatory_variable(x), order, in_calculate = FALSE)
-    
+
     # make a summary table to supply to prop.test
     sum_table <- x %>%
-      select(as.character((attr(x, "response"))), 
+      select(as.character((attr(x, "response"))),
              as.character(attr(x, "explanatory"))) %>%
       mutate_if(is.character, as.factor) %>%
       mutate_if(is.logical, as.factor) %>%
       table()
-    
-    # reorder according to the order argument
-    sum_table <- sum_table[, order]
-    
+
+    # reorder according to the order and success arguments
+    sum_table <- sum_table[lvls, order]
+
     prelim <- stats::prop.test(x = sum_table,
                                alternative = alternative,
                                conf.level = conf_level,
                                p = p,
-                               ...) %>%
-      broom::glance()
+                               correct = correct,
+                               ...)
   } else { # one sample
-    response_tbl <- x %>%
-      select(as.character((attr(x, "response")))) %>%
-      mutate_if(is.character, as.factor) %>%
-      mutate_if(is.logical, as.factor) %>%
+    response_tbl <- response_variable(x) %>%
+      factor() %>%
+      stats::relevel(success) %>%
       table()
-    
+
     if (is.null(p)) {
       message_glue(
         "No `p` argument was hypothesized, so the test will ",
         "assume a null hypothesis `p = .5`."
       )
     }
-    
+
     prelim <- stats::prop.test(x = response_tbl,
                                alternative = alternative,
                                conf.level = conf_level,
                                p = p,
-                               ...) %>%
-      broom::glance()
+                               correct = correct,
+                               ...)
+      
   }
-  
-  if (conf_int & is.null(p)) {
-     results <- prelim %>%
-       dplyr::select(statistic, 
-                     chisq_df = parameter, 
-                     p_value = p.value, 
-                     alternative,
-                     lower_ci = conf.low, 
-                     upper_ci = conf.high)
-    
+
+  if (length(prelim$estimate) <= 2) {
+    if (conf_int & is.null(p)) {
+       results <- prelim %>%
+         broom::glance() %>%
+         dplyr::select(statistic,
+                       chisq_df = parameter,
+                       p_value = p.value,
+                       alternative,
+                       lower_ci = conf.low,
+                       upper_ci = conf.high)
+    } else {
+       results <- prelim %>%
+         broom::glance() %>%
+         dplyr::select(statistic,
+                       chisq_df = parameter,
+                       p_value = p.value,
+                       alternative)
+    }
   } else {
-     results <- prelim %>%
-       dplyr::select(statistic, 
-                     chisq_df = parameter, 
-                     p_value = p.value, 
-                     alternative)
+    results <- prelim %>%
+      broom::glance() %>%
+      dplyr::select(statistic,
+                    chisq_df = parameter,
+                    p_value = p.value)
   }
   
+  if (z) {
+    results <- calculate_z(x, results, success, p, order)
+  }
+
   results
 }
 
+calculate_z <- function(x, results, success, p, order) {
+  exp <- if (has_explanatory(x)) {attr(x, "explanatory")} else {"NULL"}
+  
+  form <- as.formula(paste0(attr(x, "response"), " ~ ", exp))
+  
+  stat <- x %>%
+    specify(formula = form, success = success) %>%
+    hypothesize(
+      null = if (has_explanatory(x)) {"independence"} else {"point"},
+      p = if (is.null(p) && !has_explanatory(x)) {.5} else {p}
+    ) %>%
+    calculate(
+      stat = "z",
+      order = if (has_explanatory(x)) {order} else {NULL}
+    ) %>%
+    dplyr::pull()
+  
+  results$statistic <- stat
+  results$chisq_df <- NULL
+  
+  results
+}
diff --git a/README.Rmd b/README.Rmd
index 39622b1d..1d6b8426 100755
--- a/README.Rmd
+++ b/README.Rmd
@@ -13,7 +13,7 @@ output: github_document
 <!--figs/master.svg-->
 <!--https://img.shields.io/codecov/c/github/tidymodels/infer/master.svg-->
 
-[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/infer)](https://cran.r-project.org/package=infer) [![Travis-CI Build Status](https://travis-ci.org/tidymodels/infer.svg?branch=master)](https://travis-ci.org/tidymodels/infer) [![Coverage Status](https://img.shields.io/codecov/c/github/tidymodels/infer/master.svg)](https://codecov.io/github/tidymodels/infer/?branch=master)
+[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/infer)](https://cran.r-project.org/package=infer)[![Coverage Status](https://img.shields.io/codecov/c/github/tidymodels/infer/master.svg)](https://codecov.io/github/tidymodels/infer/?branch=master)
 
 The objective of this package is to perform statistical inference using an expressive statistical grammar that coheres with the `tidyverse` design framework. The package is centered around 4 main verbs, supplemented with many utilities to visualize and extract value from their outputs.
 
@@ -43,8 +43,7 @@ To install the current stable version of `infer` from CRAN:
 install.packages("infer")
 ```
 
-
-To install the developmental stable version of `infer`, make sure to install `remotes` first. The `pkgdown` website for the package is at <https://infer.tidymodels.org>.
+To install the developmental stable version of `infer`, make sure to install `remotes` first. The `pkgdown` website for this version is at <https://https://infer.tidymodels.org/>.
 
 ```{r, eval = FALSE}
 install.packages("remotes")
diff --git a/README.md b/README.md
index cae55f18..0bac984e 100755
--- a/README.md
+++ b/README.md
@@ -12,8 +12,6 @@
 <!--https://img.shields.io/codecov/c/github/tidymodels/infer/master.svg-->
 
 [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/infer)](https://cran.r-project.org/package=infer)
-[![Travis-CI Build
-Status](https://travis-ci.org/tidymodels/infer.svg?branch=master)](https://travis-ci.org/tidymodels/infer)
 [![Coverage
 Status](https://img.shields.io/codecov/c/github/tidymodels/infer/master.svg)](https://codecov.io/github/tidymodels/infer/?branch=master)
 
@@ -47,8 +45,8 @@ install.packages("infer")
 ```
 
 To install the developmental stable version of `infer`, make sure to
-install `remotes` first. The `pkgdown` website for the package is at
-<https://infer.tidymodels.org>.
+install `remotes` first. The `pkgdown` website for this version is at
+<https://https://infer.tidymodels.org/>.
 
 ``` r
 install.packages("remotes")
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index 20c4ab63..00000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-# DO NOT CHANGE the "init" and "install" sections below
-
-# Download script file from GitHub
-init:
-  ps: |
-        $ErrorActionPreference = "Stop"
-        Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
-        Import-Module '..\appveyor-tool.ps1'
-
-install:
-  ps: Bootstrap
-
-# Adapt as necessary starting from here
-
-environment:
-  global:
-    WARNINGS_ARE_ERRORS: 1
-
-build_script:
-  - travis-tool.sh install_deps
-
-test_script:
-  - travis-tool.sh run_tests
-
-on_failure:
-  - 7z a failure.zip *.Rcheck\*
-  - appveyor PushArtifact failure.zip
-
-artifacts:
-  - path: '*.Rcheck\**\*.log'
-    name: Logs
-
-  - path: '*.Rcheck\**\*.out'
-    name: Logs
-
-  - path: '*.Rcheck\**\*.fail'
-    name: Logs
-
-  - path: '*.Rcheck\**\*.Rout'
-    name: Logs
-
-  - path: '\*_*.tar.gz'
-    name: Bits
-
-  - path: '\*_*.zip'
-    name: Bits
\ No newline at end of file
diff --git a/cran-comments.md b/cran-comments.md
index cfd718d5..1e4def24 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,17 +1,8 @@
-## infer 0.5.3
-
-This release fixes failing CRAN checks related to long-double support. This
-submission is a resubmission following an automated pretest failure due to
-a warning resulting from the newly released sf package.
-
-The previous submission was also a resubmission following an automated pretest 
-failure related to visual testing failures on the most recent R-devel version.
-
 ## Test environments
-* local OS X install, R 4.0.2
-* ubuntu 16.04 (on travis-ci), oldrel, release, devel
-* win-builder (devel)
-* rhub: debian-gcc-devel-nold
+* local OS X install, R 3.6.3
+* ubuntu 16.04 (on github actions), release, devel
+* windows (on github actions), R 3.6.3, release
+* windows (on win-builder), devel
 
 ## R CMD check results
 
@@ -19,5 +10,5 @@ failure related to visual testing failures on the most recent R-devel version.
 
 ## Reverse dependencies
 
-We checked five reverse dependencies, two of which are on CRAN, with the
+We checked six reverse dependencies, five of which are on CRAN, with the
 remaining on bioconductor, and found no new issues.
diff --git a/docs/index.html b/docs/index.html
index 9a0e83a6..ba632280 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -133,7 +133,7 @@ <h3 class="hasAnchor">
 <hr>
 <p>To install the current stable version of <code>infer</code> from CRAN:</p>
 <div class="sourceCode" id="cb1"><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span>(<span class="st">"infer"</span>)</pre></div>
-<p>To install the developmental stable version of <code>infer</code>, make sure to install <code>remotes</code> first. The <code>pkgdown</code> website for this version is at <a href="https://infer.netlify.com" class="uri">https://infer.netlify.com</a>.</p>
+<p>To install the developmental stable version of <code>infer</code>, make sure to install <code>remotes</code> first. The <code>pkgdown</code> website for this version is at <a href="https://https://infer.tidymodels.org/" class="uri">https://https://infer.tidymodels.org/</a>.</p>
 <div class="sourceCode" id="cb2"><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span>(<span class="st">"remotes"</span>)
 <span class="kw pkg">remotes</span><span class="kw ns">::</span><span class="fu"><a href="https://remotes.r-lib.org/reference/install_github.html">install_github</a></span>(<span class="st">"tidymodels/infer"</span>)</pre></div>
 <p>To install the experimental version of <code>infer</code> (do so at your own risk), make sure to install <code>remotes</code> first.</p>
@@ -237,7 +237,6 @@ <h2>Developers</h2>
 <h2>Dev status</h2>
 <ul class="list-unstyled">
 <li><a href="https://cran.r-project.org/package=infer"><img src="https://www.r-pkg.org/badges/version/infer" alt="CRAN_Status_Badge"></a></li>
-<li><a href="https://travis-ci.org/tidymodels/infer"><img src="https://travis-ci.org/tidymodels/infer.svg?branch=master" alt="Travis-CI Build Status"></a></li>
 <li><a href="https://codecov.io/github/tidymodels/infer/?branch=master"><img src="https://img.shields.io/codecov/c/github/tidymodels/infer/master.svg" alt="Coverage Status"></a></li>
 </ul>
 </div>
diff --git a/docs/news/index.html b/docs/news/index.html
index 4dacb2f0..f7e3f367 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -402,7 +402,7 @@ <h1 class="page-header" data-toc-text="0.1.0">
 <li>Updated documentation with examples</li>
 <li>Created <code>pkgdown</code> site materials
 <ul>
-<li>Deployed to <a href="https://infer.netlify.com" class="uri">https://infer.netlify.com</a>
+<li>Deployed to <a href="https://https://infer.tidymodels.org/" class="uri">https://https://infer.tidymodels.org/</a>
 </li>
 </ul>
 </li>
diff --git a/docs/reference/infer.html b/docs/reference/infer.html
index 6736e6fb..5fd40946 100644
--- a/docs/reference/infer.html
+++ b/docs/reference/infer.html
@@ -178,7 +178,7 @@ <h2 class="hasAnchor" id="see-also"><a class="anchor" href="#see-also"></a>See a
 
     <div class='dont-index'><p>Useful links:</p><ul>
 <li><p><a href='https://github.com/tidymodels/infer'>https://github.com/tidymodels/infer</a></p></li>
-<li><p><a href='https://infer.netlify.com/'>https://infer.netlify.com/</a></p></li>
+<li><p><a href='https://infer.tidymodels.org/'>https://infer.tidymodels.org/</a></p></li>
 <li><p>Report bugs at <a href='https://github.com/tidymodels/infer/issues'>https://github.com/tidymodels/infer/issues</a></p></li>
 </ul>
 
diff --git a/man/calculate.Rd b/man/calculate.Rd
index d0570754..2ca23cb5 100755
--- a/man/calculate.Rd
+++ b/man/calculate.Rd
@@ -52,18 +52,18 @@ visualizations (with a warning) and raise an error in p-value calculations.
 # calculate a null distribution of hours worked per week under
 # the null hypothesis that the mean is 40
 gss \%>\%
- specify(response = hours) \%>\%
- hypothesize(null = "point", mu = 40) \%>\%
- generate(reps = 200, type = "bootstrap") \%>\%
- calculate(stat = "mean")
+  specify(response = hours) \%>\%
+  hypothesize(null = "point", mu = 40) \%>\%
+  generate(reps = 200, type = "bootstrap") \%>\%
+  calculate(stat = "mean")
 
 # calculate a null distribution assuming independence between age
 # of respondent and whether they have a college degree
 gss \%>\%
- specify(age ~ college) \%>\%
- hypothesize(null = "independence") \%>\%
- generate(reps = 200, type = "permute") \%>\%
- calculate("diff in means", order = c("degree", "no degree"))
+  specify(age ~ college) \%>\%
+  hypothesize(null = "independence") \%>\%
+  generate(reps = 200, type = "permute") \%>\%
+  calculate("diff in means", order = c("degree", "no degree"))
 
 # More in-depth explanation of how to use the infer package
 \dontrun{
diff --git a/man/chisq_stat.Rd b/man/chisq_stat.Rd
index de9076c5..25430061 100644
--- a/man/chisq_stat.Rd
+++ b/man/chisq_stat.Rd
@@ -29,15 +29,15 @@ test. Uses \link[stats:chisq.test]{chisq.test()}, which applies a continuity
 correction.
 }
 \examples{
-# chi-squared test statistic for test of independence 
-# of college completion status depending and one's 
+# chi-squared test statistic for test of independence
+# of college completion status depending and one's
 # self-identified income class
 chisq_stat(gss, college ~ finrela)
 
-# chi-squared test statistic for a goodness of fit 
-# test on whether self-identified income class 
+# chi-squared test statistic for a goodness of fit
+# test on whether self-identified income class
 # follows a uniform distribution
-chisq_stat(gss, 
+chisq_stat(gss,
            response = finrela,
            p = c("far below average" = 1/6,
                  "below average" = 1/6,
diff --git a/man/chisq_test.Rd b/man/chisq_test.Rd
index f1c5b28f..2f3e3f5d 100644
--- a/man/chisq_test.Rd
+++ b/man/chisq_test.Rd
@@ -25,13 +25,13 @@ A tidier version of \link[stats:chisq.test]{chisq.test()} for goodness of fit
 tests and tests of independence.
 }
 \examples{
-# chi-squared test of independence for college completion 
+# chi-squared test of independence for college completion
 # status depending on one's self-identified income class
 chisq_test(gss, college ~ finrela)
 
-# chi-squared goodness of fit test on whether self-identified 
+# chi-squared goodness of fit test on whether self-identified
 # income class follows a uniform distribution
-chisq_test(gss, 
+chisq_test(gss,
            response = finrela,
            p = c("far below average" = 1/6,
                  "below average" = 1/6,
diff --git a/man/get_confidence_interval.Rd b/man/get_confidence_interval.Rd
index f534b35f..d37e70ff 100644
--- a/man/get_confidence_interval.Rd
+++ b/man/get_confidence_interval.Rd
@@ -35,11 +35,17 @@ A 1 x 2 tibble with 'lower_ci' and 'upper_ci' columns. Values
 correspond to lower and upper bounds of the confidence interval.
 }
 \description{
-Compute a confidence interval around a summary statistic. Only
-simulation-based methods are (currently only) supported.
+Compute a confidence interval around a summary statistic. Currently, only
+simulation-based methods are supported.
 
 Learn more in \code{vignette("infer")}.
 }
+\details{
+A null hypothesis is not required to compute a confidence interval, but
+including \code{hypothesize()} in a chain leading to \code{get_confidence_interval()}
+will not break anything. This can be useful when computing a confidence
+interval after previously computing a p-value.
+}
 \section{Aliases}{
 
 \code{get_ci()} is an alias of \code{get_confidence_interval()}.
@@ -48,33 +54,39 @@ Learn more in \code{vignette("infer")}.
 
 \examples{
 
-# find the point estimate---mean number of hours worked per week
-point_estimate <- gss \%>\%
+boot_distr <- gss \%>\%
+  # We're interested in the number of hours worked per week
   specify(response = hours) \%>\%
-  calculate(stat = "mean") \%>\%
-  dplyr::pull()
+  # Generate bootstrap samples
+  generate(reps = 1000, type = "bootstrap") \%>\%
+  # Calculate mean of each bootstrap sample
+  calculate(stat = "mean")
+
+boot_distr \%>\%
+  # Calculate the confidence interval around the point estimate
+  get_confidence_interval(
+    # At the 95\% confidence level; percentile method
+    level = 0.95
+  )
 
-# starting with the gss dataset
-gss \%>\%
-  # ...we're interested in the number of hours worked per week
+# For type = "se" or type = "bias-corrected" we need a point estimate
+sample_mean <- gss \%>\%
   specify(response = hours) \%>\%
-  # hypothesizing that the mean is 40
-  hypothesize(null = "point", mu = 40) \%>\%
-  # generating data points for a null distribution
-  generate(reps = 1000, type = "bootstrap") \%>\%
-  # finding the null distribution
   calculate(stat = "mean") \%>\%
+  dplyr::pull()
+
+boot_distr \%>\%
   get_confidence_interval(
-    point_estimate = point_estimate,
-    # at the 95\% confidence level
+    point_estimate = sample_mean,
+    # At the 95\% confidence level
     level = 0.95,
-    # using the standard error method
+    # Using the standard error method
     type = "se"
   )
-  
+
 # More in-depth explanation of how to use the infer package
 \dontrun{
 vignette("infer")
-} 
- 
+}
+
 }
diff --git a/man/gss.Rd b/man/gss.Rd
index 510a0406..edffe5e2 100644
--- a/man/gss.Rd
+++ b/man/gss.Rd
@@ -30,7 +30,8 @@ gss
 \description{
 The General Social Survey is a high-quality survey which gathers data on
 American society and opinions, conducted since 1972. This data set is a
-sample of 500 entries from the GSS, including demographic markers and some
+sample of 500 entries from the GSS, spanning years 1973-2018,
+including demographic markers and some
 economic variables. Note that this data is included for demonstration only,
 and should not be assumed to provide accurate estimates relating to the GSS.
 However, due to the high quality of the GSS, the unweighted data will
diff --git a/man/infer.Rd b/man/infer.Rd
index ccb56708..d932ff09 100644
--- a/man/infer.Rd
+++ b/man/infer.Rd
@@ -7,19 +7,19 @@
 \title{infer: a grammar for statistical inference}
 \description{
 \if{html}{\figure{infer.png}{options: align='right'}}
-
+}
+\details{
 The objective of this package is to perform statistical inference using a
 grammar that illustrates the underlying concepts and a format that coheres
 with the tidyverse.
-}
-\details{
+
 For an overview of how to use the core functionality, see \code{vignette("infer")}
 }
 \seealso{
 Useful links:
 \itemize{
   \item \url{https://github.com/tidymodels/infer}
-  \item \url{https://infer.netlify.com/}
+  \item \url{https://infer.tidymodels.org/}
   \item Report bugs at \url{https://github.com/tidymodels/infer/issues}
 }
 
@@ -29,22 +29,22 @@ Useful links:
 
 Authors:
 \itemize{
-  \item Chester Ismay \email{chester.ismay@gmail.com}
-  \item Evgeni Chasnovski \email{evgeni.chasnovski@gmail.com}
-  \item Ben Baumer \email{ben.baumer@gmail.com}
-  \item Mine Cetinkaya-Rundel \email{mine@stat.duke.edu}
+  \item Chester Ismay \email{chester.ismay@gmail.com} (\href{https://orcid.org/0000-0003-2820-2547}{ORCID})
+  \item Evgeni Chasnovski \email{evgeni.chasnovski@gmail.com} (\href{https://orcid.org/0000-0002-1617-4019}{ORCID})
+  \item Ben Baumer \email{ben.baumer@gmail.com} (\href{https://orcid.org/0000-0002-3279-0516}{ORCID})
+  \item Mine Cetinkaya-Rundel \email{mine@stat.duke.edu} (\href{https://orcid.org/0000-0001-6452-2420}{ORCID})
 }
 
 Other contributors:
 \itemize{
   \item Simon Couch \email{simonpatrickcouch@gmail.com} [contributor]
-  \item Ted Laderas \email{tedladeras@gmail.com} [contributor]
+  \item Ted Laderas \email{tedladeras@gmail.com} (\href{https://orcid.org/0000-0002-6207-7068}{ORCID}) [contributor]
   \item Nick Solomon \email{nick.solomon@datacamp.com} [contributor]
   \item Johanna Hardin \email{Jo.Hardin@pomona.edu} [contributor]
-  \item Albert Y. Kim \email{albert.ys.kim@gmail.com} [contributor]
+  \item Albert Y. Kim \email{albert.ys.kim@gmail.com} (\href{https://orcid.org/0000-0001-7824-306X}{ORCID}) [contributor]
   \item Neal Fultz \email{nfultz@gmail.com} [contributor]
   \item Doug Friedman \email{doug.nhp@gmail.com} [contributor]
-  \item Richie Cotton \email{richie@datacamp.com} [contributor]
+  \item Richie Cotton \email{richie@datacamp.com} (\href{https://orcid.org/0000-0003-2504-802X}{ORCID}) [contributor]
   \item Brian Fannin \email{captain@pirategrunt.com} [contributor]
 }
 
diff --git a/man/prop_test.Rd b/man/prop_test.Rd
index cb4bb3f9..20c0d947 100644
--- a/man/prop_test.Rd
+++ b/man/prop_test.Rd
@@ -14,6 +14,9 @@ prop_test(
   alternative = "two-sided",
   conf_int = TRUE,
   conf_level = 0.95,
+  success = NULL,
+  correct = NULL,
+  z = FALSE,
   ...
 )
 }
@@ -41,13 +44,34 @@ should be subtracted, where  \code{order = c("first", "second")} means
 sample tests.}
 
 \item{alternative}{Character string giving the direction of the alternative
-hypothesis. Options are \code{"two-sided"} (default), \code{"greater"}, or \code{"less"}.}
+hypothesis. Options are \code{"two-sided"} (default), \code{"greater"}, or \code{"less"}.
+Only used when testing the null that a single proportion equals a given
+value, or that two proportions are equal; ignored otherwise.}
 
 \item{conf_int}{A logical value for whether to report the confidence
 interval or not. \code{TRUE} by default, ignored if \code{p} is specified for a
-two-sample test.}
+two-sample test. Only used when testing the null that a single
+proportion equals a given value, or that two proportions are equal;
+ignored otherwise.}
 
-\item{conf_level}{A numeric value between 0 and 1. Default value is 0.95.}
+\item{conf_level}{A numeric value between 0 and 1. Default value is 0.95.
+Only used when testing the null that a single proportion equals a given
+value, or that two proportions are equal; ignored otherwise.}
+
+\item{success}{The level of \code{response} that will be considered a success, as
+a string. Only used when testing the null that a single
+proportion equals a given value, or that two proportions are equal;
+ignored otherwise.}
+
+\item{correct}{A logical indicating whether Yates' continuity correction
+should be applied where possible. If \code{z = TRUE}, the \code{correct} argument will
+be overwritten as \code{FALSE}. Otherwise defaults to \code{correct = TRUE}.}
+
+\item{z}{A logical value for whether to report the statistic as a standard
+normal deviate or a Pearson's chi-square statistic. \eqn{z^2}  is distributed
+chi-square with 1 degree of freedom, though note that the user will likely
+need to turn off Yates' continuity correction by setting \code{correct = FALSE}
+to see this connection.}
 
 \item{...}{Additional arguments for \link[stats:prop.test]{prop.test()}.}
 }
@@ -56,16 +80,24 @@ A tidier version of \link[stats:prop.test]{prop.test()} for equal or given
 proportions.
 }
 \examples{
-# proportion test for difference in proportions of 
+# two-sample proportion test for difference in proportions of
 # college completion by respondent sex
-prop_test(gss, 
-          college ~ sex,  
+prop_test(gss,
+          college ~ sex,
           order = c("female", "male"))
-          
-# a one-proportion test for hypothesized null 
+
+# one-sample proportion test for hypothesized null
 # proportion of college completion of .2
 prop_test(gss,
           college ~ NULL,
           p = .2)
 
+# report as a z-statistic rather than chi-square
+# and specify the success level of the response
+prop_test(gss,
+          college ~ NULL,
+          success = "degree",
+          p = .2,
+          z = TRUE)
+
 }
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 5607d1df..99482370 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -2,53 +2,71 @@
 % Please edit documentation in R/rep_sample_n.R
 \name{rep_sample_n}
 \alias{rep_sample_n}
+\alias{rep_slice_sample}
 \title{Perform repeated sampling}
 \usage{
 rep_sample_n(tbl, size, replace = FALSE, reps = 1, prob = NULL)
+
+rep_slice_sample(.data, n = 1, replace = FALSE, weight_by = NULL, reps = 1)
 }
 \arguments{
-\item{tbl}{Data frame of population from which to sample.}
+\item{tbl, .data}{Data frame of population from which to sample.}
 
-\item{size}{Sample size of each sample.}
+\item{size, n}{Sample size of each sample.}
 
 \item{replace}{Should sampling be with replacement?}
 
 \item{reps}{Number of samples of size n = \code{size} to take.}
 
-\item{prob}{A vector of probability weights for obtaining the elements of the
-vector being sampled.}
+\item{prob, weight_by}{A vector of sampling weights for each of the rows in
+\code{tbl}—must have length equal to \code{nrow(tbl)}.}
 }
 \value{
-A tibble of size \code{rep} times \code{size} rows corresponding to \code{rep}
-samples of size n = \code{size} from \code{tbl}.
+A tibble of size \code{rep * size} rows corresponding to \code{reps}
+samples of size \code{size} from \code{tbl}, grouped by \code{replicate}.
 }
 \description{
-Perform repeated sampling of samples of size n. Useful for creating sampling
-distributions.
+These functions extend the functionality of \code{\link[dplyr:sample_n]{dplyr::sample_n()}} and
+\code{\link[dplyr:slice]{dplyr::slice_sample()}} by allowing for repeated sampling of data.
+This operation is especially helpful while creating sampling
+distributions—see the examples below!
+}
+\details{
+The \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function (to which \code{rep_sample_n()} was
+originally a supplement) has been superseded by \code{\link[dplyr:slice]{dplyr::slice_sample()}}.
+\code{rep_slice_sample()} provides a light wrapper around \code{rep_sample_n()} that
+has a more similar interface to \code{slice_sample()}.
 }
 \examples{
-suppressPackageStartupMessages(library(dplyr))
-suppressPackageStartupMessages(library(ggplot2))
-
-# A virtual population of N = 10,010, of which 3091 are hurricanes
-population <- dplyr::storms \%>\%
-  select(status)
+library(dplyr)
+library(ggplot2)
 
-# Take samples of size n = 50 storms without replacement; do this 1000 times
-samples <- population \%>\%
+# take 1000 samples of size n = 50, without replacement
+slices <- gss \%>\%
   rep_sample_n(size = 50, reps = 1000)
-samples
 
-# Compute p_hats for all 1000 samples = proportion hurricanes
-p_hats <- samples \%>\%
+slices
+
+# compute the proportion of respondents with a college
+# degree in each replicate
+p_hats <- slices \%>\%
   group_by(replicate) \%>\%
-  summarize(prop_hurricane = mean(status == "hurricane"))
-p_hats
+  summarize(prop_college = mean(college == "degree"))
 
-# Plot sampling distribution
-ggplot(p_hats, aes(x = prop_hurricane)) +
+# plot sampling distribution
+ggplot(p_hats, aes(x = prop_college)) +
   geom_density() +
-  labs(x = "p_hat", y = "Number of samples",
-  title = "Sampling distribution of p_hat from 1000 samples of size 50")
-
+  labs(
+    x = "p_hat", y = "Number of samples",
+    title = "Sampling distribution of p_hat"
+  )
+  
+# sampling with probability weights. Note probabilities are automatically 
+# renormalized to sum to 1
+library(tibble)
+df <- tibble(
+  id = 1:5,
+  letter = factor(c("a", "b", "c", "d", "e"))
+)
+rep_sample_n(df, size = 2, reps = 5, prob = c(.5, .4, .3, .2, .1))
 }
diff --git a/man/t_test.Rd b/man/t_test.Rd
index b6f3434d..7a876e1f 100755
--- a/man/t_test.Rd
+++ b/man/t_test.Rd
@@ -60,7 +60,7 @@ gss \%>\%
       alternative = "two-sided")
 
 # see vignette("infer") for more explanation of the
-# intuition behind the infer package, and vignette("t_test") 
+# intuition behind the infer package, and vignette("t_test")
 # for more examples of t-tests using infer
 
 }
diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R
index 663ff5a9..dd6a1868 100644
--- a/tests/testthat/helper-data.R
+++ b/tests/testthat/helper-data.R
@@ -1,5 +1,10 @@
 set.seed(4242)
 
+expect_doppelganger <- function(title, fig, path = NULL, ...) {
+  testthat::skip_if_not_installed("vdiffr")
+  vdiffr::expect_doppelganger(title, fig, path = path, ...)
+}
+
 eps <- if (capabilities("long.double")) {sqrt(.Machine$double.eps)} else {0.01}
 
 gss_tbl <- tibble::as_tibble(gss) %>%
diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R
index 46eb96a3..9ef0ba89 100644
--- a/tests/testthat/test-calculate.R
+++ b/tests/testthat/test-calculate.R
@@ -142,13 +142,16 @@ test_that("two sample mean-type problems are working", {
     generate(reps = 10, type = "permute")
   expect_warning(calculate(gen_gss5a, stat = "diff in means"))
   expect_silent(
-    calculate(gen_gss5a, 
-              stat = "diff in means", 
-              order = c("no degree", "degree"))
+    calculate(gen_gss5a,
+      stat = "diff in means",
+      order = c("no degree", "degree")
+    )
   )
   expect_warning(calculate(gen_gss5a, stat = "t"))
-  expect_silent(calculate(gen_gss5a, stat = "t", 
-                          order = c("no degree", "degree")))
+  expect_silent(calculate(gen_gss5a,
+    stat = "t",
+    order = c("no degree", "degree")
+  ))
 })
 
 test_that("properties of tibble passed-in are correct", {
@@ -168,15 +171,17 @@ test_that("order is working for diff in means", {
     hypothesize(null = "independence") %>%
     generate(reps = 10, type = "permute")
   expect_equal(
-    nrow(calculate(gen_gss7, 
-                   stat = "diff in means", 
-                   order = c("no degree", "degree"))),
+    nrow(calculate(gen_gss7,
+      stat = "diff in means",
+      order = c("no degree", "degree")
+    )),
     10
   )
   expect_equal(
-    ncol(calculate(gen_gss7, 
-                   stat = "diff in means", 
-                   order = c("no degree", "degree"))),
+    ncol(calculate(gen_gss7,
+      stat = "diff in means",
+      order = c("no degree", "degree")
+    )),
     2
   )
 })
@@ -189,13 +194,13 @@ test_that("chi-square matches chisq.test value", {
   infer_way <- calculate(gen_gss8, stat = "Chisq")
   # chisq.test way
   suppressWarnings(
-  trad_way <- gen_gss8 %>%
-    dplyr::group_by(replicate) %>%
-    dplyr::do(broom::tidy(
-      stats::chisq.test(table(.$sex, .$partyid))
-    )) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(replicate, stat = statistic)
+    trad_way <- gen_gss8 %>%
+      dplyr::group_by(replicate) %>%
+      dplyr::do(broom::tidy(
+        stats::chisq.test(table(.$sex, .$partyid))
+      )) %>%
+      dplyr::ungroup() %>%
+      dplyr::select(replicate, stat = statistic)
   )
   # Equal not including attributes
   expect_equivalent(infer_way, trad_way)
@@ -204,7 +209,7 @@ test_that("chi-square matches chisq.test value", {
     specify(partyid ~ NULL) %>%
     hypothesize(
       null = "point",
-      p = c("dem" = 1/3, "rep" = 1/3, "ind" = 1/3)
+      p = c("dem" = 1 / 3, "rep" = 1 / 3, "ind" = 1 / 3)
     ) %>%
     generate(reps = 10, type = "simulate")
   infer_way <- calculate(gen_gss9, stat = "Chisq")
@@ -235,6 +240,35 @@ test_that("chi-square matches chisq.test value", {
   expect_equivalent(infer_way, trad_way)
 })
 
+test_that("chi-square works with factors with unused levels", {
+  test_tbl <- tibble(
+    x = factor(c("a", "b", "c"), levels = c("a", "b", "c", "d")),
+    y = factor(c("e", "e", "f"))
+  )
+
+  # Unused levels in explanatory variable
+  expect_warning(
+    out <- test_tbl %>%
+      specify(y ~ x) %>%
+      calculate(stat = "Chisq") %>%
+      pull(),
+    "Explanatory.*unused.*levels"
+  )
+  expect_true(!is.na(out))
+
+  # Unused levels in response variable
+  test_tbl[["x"]] <- factor(test_tbl[["x"]])
+  levels(test_tbl[["y"]]) <- c("e", "f", "g")
+  expect_warning(
+    out <- test_tbl %>%
+      specify(y ~ x) %>%
+      calculate(stat = "Chisq") %>%
+      pull(),
+    "Response.*unused.*levels"
+  )
+  expect_true(!is.na(out))
+})
+
 test_that("`order` is working", {
   gen_gss_tbl10 <- gss_tbl %>%
     specify(hours ~ college) %>%
@@ -248,33 +282,40 @@ test_that("`order` is working", {
     specify(hours ~ college) %>%
     generate(reps = 10, type = "bootstrap")
   expect_error(
-    calculate(gen_gss_tbl11, 
-              stat = "diff in medians", 
-              order = "no degree")
+    calculate(gen_gss_tbl11,
+      stat = "diff in medians",
+      order = "no degree"
+    )
   )
   expect_error(
-    calculate(gen_gss_tbl11, 
-              stat = "diff in medians", 
-              order = c(NA, "no degree"))
+    calculate(gen_gss_tbl11,
+      stat = "diff in medians",
+      order = c(NA, "no degree")
+    )
   )
   expect_error(
-    calculate(gen_gss_tbl11, 
-              stat = "diff in medians", 
-              order = c("no degree", "other"))
+    calculate(gen_gss_tbl11,
+      stat = "diff in medians",
+      order = c("no degree", "other")
+    )
   )
   expect_silent(
-    calculate(gen_gss_tbl11, 
-              stat = "diff in medians", 
-              order = c("no degree", "degree"))
+    calculate(gen_gss_tbl11,
+      stat = "diff in medians",
+      order = c("no degree", "degree")
+    )
   )
   expect_error(
-    calculate(gen_gss_tbl11, 
-              stat = "diff in means", 
-              order = c("no degree", "degree", "the last one"))
+    calculate(gen_gss_tbl11,
+      stat = "diff in means",
+      order = c("no degree", "degree", "the last one")
+    )
   )
   # order not given
-  expect_warning(calculate(gen_gss_tbl11, stat = "diff in means"),
-                 "The statistic is based on a difference or ratio")
+  expect_warning(
+    calculate(gen_gss_tbl11, stat = "diff in means"),
+    "The statistic is based on a difference or ratio"
+  )
 })
 
 gen_gss_tbl12 <- gss_tbl %>%
@@ -359,14 +400,14 @@ test_that("One sample t hypothesis test is working", {
       generate(reps = 10) %>%
       calculate(stat = "t")
   )
-  
+
   expect_message(
     gss_tbl %>%
       specify(response = hours) %>%
       calculate(stat = "t"),
     "the t-test will assume a null hypothesis"
   )
-  
+
   gss_tbl %>%
     specify(response = hours) %>%
     calculate(stat = "t", mu = 1)
@@ -401,9 +442,10 @@ test_that("generate not done before calculate", {
     specify(hours ~ college) %>%
     hypothesize(null = "independence")
   attr(gss_tbl_hyp, "generate") <- TRUE
-  expect_warning(calculate(gss_tbl_hyp, 
-                           stat = "t", 
-                           order = c("no degree", "degree")))
+  expect_warning(calculate(gss_tbl_hyp,
+    stat = "t",
+    order = c("no degree", "degree")
+  ))
 })
 
 test_that("One sample t bootstrap is working", {
@@ -423,9 +465,8 @@ test_that("calculate doesn't depend on order of `p` (#122)", {
       specify(partyid ~ NULL) %>%
       hypothesize(null = "point", p = p) %>%
       generate(reps = 500, type = "simulate") %>%
-      calculate("Chisq") %>% 
+      calculate("Chisq") %>%
       get_p_value(obs_stat = 5, direction = "right")
-      
   }
 
   expect_equal(
@@ -466,7 +507,9 @@ test_that("calc_impl.sum works", {
 test_that("calc_impl_success_f works", {
   expect_true(
     is.function(calc_impl_success_f(
-      f = function(response, success, ...) {mean(response == success, ...)},
+      f = function(response, success, ...) {
+        mean(response == success, ...)
+      },
       output_name = "proportion"
     ))
   )
@@ -489,35 +532,53 @@ test_that("calc_impl.count works", {
 })
 
 
-gss_biased <- gss_tbl %>% 
+gss_biased <- gss_tbl %>%
   dplyr::filter(!(sex == "male" & college == "no degree" & age < 40))
 
 gss_tbl <- table(gss_biased$sex, gss_biased$college)
 
 test_that("calc_impl.odds_ratio works", {
-  base_odds_ratio <- {(gss_tbl [1,1] * gss_tbl [2,2]) / 
-    (gss_tbl [1,2] * gss_tbl [2,1])}
-  
+  base_odds_ratio <- {
+    (gss_tbl [1, 1] * gss_tbl [2, 2]) /
+      (gss_tbl [1, 2] * gss_tbl [2, 1])
+  }
+
   expect_equal(
-    gss_biased %>% 
+    gss_biased %>%
       specify(college ~ sex, success = "degree") %>%
       calculate(stat = "odds ratio", order = c("female", "male")) %>%
       dplyr::pull(),
     expected = base_odds_ratio,
-    tolerance = eps)
+    tolerance = eps
+  )
 })
 
 test_that("calc_impl.ratio_of_props works", {
-  base_ratio_of_props <- {(gss_tbl [1,2] / sum(gss_tbl [1,])) / 
-      (gss_tbl [2,2] / sum(gss_tbl [2,]))}
-  
+  base_ratio_of_props <- {
+    (gss_tbl [1, 2] / sum(gss_tbl [1, ])) /
+      (gss_tbl [2, 2] / sum(gss_tbl [2, ]))
+  }
+
   expect_equal(
-    gss_biased %>% 
+    gss_biased %>%
       specify(college ~ sex, success = "degree") %>%
       calculate(stat = "ratio of props", order = c("male", "female")) %>%
       dplyr::pull(),
     expected = base_ratio_of_props,
-    tolerance = eps)
-  
+    tolerance = eps
+  )
 })
 
+test_that("calc_impl.z works for one sample proportions", {
+  infer_obs_stat <- gss %>%
+    specify(response = sex, success = "female") %>%
+    hypothesize(null = "point", p = .5) %>%
+    calculate(stat = "z") %>%
+    dplyr::pull()
+  
+  base_obs_stat <- 
+    (mean(gss$sex == "female") - .5) / 
+    sqrt(.5^2 / nrow(gss))
+  
+  expect_equal(infer_obs_stat, base_obs_stat, tolerance = eps)
+})
diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R
index 9bdbc4b0..a79bce0f 100644
--- a/tests/testthat/test-rep_sample_n.R
+++ b/tests/testthat/test-rep_sample_n.R
@@ -2,25 +2,103 @@ context("rep_sample_n")
 
 N <- 5
 population <- tibble::tibble(
-  ball_ID = 1:N,
+  ball_id = 1:N,
   color = factor(c(rep("red", 3), rep("white", N - 3)))
 )
 
-test_that("rep_sample_n works", {
-  expect_silent(population %>% rep_sample_n(size = 2, reps = 10))
+test_that("rep_sample_n is sensitive to the size argument", {
+  set.seed(1)
+  reps <- 10
+  s1 <- 2
+  s2 <- 3
+
+  res1 <- population %>% rep_sample_n(size = s1, reps = reps)
+  res2 <- population %>% rep_sample_n(size = s2, reps = reps)
+
+  expect_equal(ncol(res1), ncol(res2))
+  expect_equal(ncol(res1), 3)
+
+  expect_equal(nrow(res1) / s1, nrow(res2) / s2)
+  expect_equal(nrow(res1), reps * s1)
+})
+
+test_that("rep_sample_n is sensitive to the reps argument", {
+  set.seed(1)
+  r1 <- 10
+  r2 <- 5
+  size <- 2
+
+  res1 <- population %>% rep_sample_n(size = size, reps = r1)
+  res2 <- population %>% rep_sample_n(size = size, reps = r2)
+
+  expect_equal(ncol(res1), ncol(res2))
+  expect_equal(ncol(res1), 3)
+
+  expect_equal(nrow(res1) / r1, nrow(res2) / r2)
+  expect_equal(nrow(res1), r1 * size)
+})
+
+test_that("rep_sample_n is sensitive to the replace argument", {
+  set.seed(1)
+  res1 <- population %>% rep_sample_n(size = 5, reps = 100, replace = TRUE)
+
+  set.seed(1)
+  res2 <- population %>% rep_sample_n(size = 5, reps = 100, replace = FALSE)
+
+  expect_true(all(res1$replicate == res2$replicate))
+  expect_false(all(res1$ball_id == res2$ball_id))
+  expect_false(all(res1$color == res2$color))
+
+  expect_equal(ncol(res1), ncol(res2))
+  expect_equal(ncol(res1), 3)
+
+  # Check if there are actually no duplicates in case `replace = FALSE`
+  no_duplicates <- all(tapply(res2$ball_id, res2$replicate, anyDuplicated) == 0)
+  expect_true(no_duplicates)
+})
+
+test_that("rep_sample_n is sensitive to the prob argument", {
+  set.seed(1)
+  res1 <- population %>%
+    rep_sample_n(
+      size = 5,
+      reps = 100,
+      replace = TRUE,
+      prob = c(1, rep(0, 4))
+    )
+
+  expect_true(all(res1$ball_id == 1))
+  expect_true(all(res1$color == "red"))
+})
+
+test_that("rep_sample_n errors with bad arguments", {
+  expect_error(
+    population %>%
+      rep_sample_n(size = 2, reps = 10, prob = rep(x = 1 / 5, times = 100))
+  )
+
   expect_error(
     population %>%
-      rep_sample_n(size = 2, reps = 10, prob = rep(x = 1/5, times = 100))
+      rep_sample_n(size = 2, reps = 10, prob = c(1 / 2, 1 / 2))
   )
+
   expect_error(
     population %>%
-      rep_sample_n(size = 2, reps = 10, prob = c(1/2, 1/2))
+      rep_sample_n(size = "a lot", reps = 10)
   )
+
   expect_error(
     population %>%
-      rep_sample_n(size = 2, reps = 10, prob = c(0.25, 1/5, 1/5, 1/5, 0.15))
+      rep_sample_n(size = 2, reps = "a lot")
   )
-  
-  test_rep <- population %>% rep_sample_n(size = 2, reps = 10)
-  expect_equal(c("replicate", names(population)), names(test_rep))
+})
+
+test_that("rep_slice_sample works", {
+  set.seed(1)
+  res1 <- rep_sample_n(population, size = 2, reps = 5, prob = rep(1 / N, N))
+
+  set.seed(1)
+  res2 <- rep_slice_sample(population, n = 2, reps = 5, weight_by = rep(1 / N, N))
+
+  expect_equal(res1, res2)
 })
diff --git a/tests/testthat/test-shade_confidence_interval.R b/tests/testthat/test-shade_confidence_interval.R
index 11fc8c9e..828a8b97 100644
--- a/tests/testthat/test-shade_confidence_interval.R
+++ b/tests/testthat/test-shade_confidence_interval.R
@@ -1,8 +1,5 @@
 context("shade_confidence_interval")
 
-library(vdiffr)
-
-
 # shade_confidence_interval -----------------------------------------------
 test_that("shade_confidence_interval works", {
   skip_if(getRversion() > "4.0.2")
diff --git a/tests/testthat/test-shade_p_value.R b/tests/testthat/test-shade_p_value.R
index b5f575b3..42d4e657 100644
--- a/tests/testthat/test-shade_p_value.R
+++ b/tests/testthat/test-shade_p_value.R
@@ -1,8 +1,5 @@
 context("shade_p_value")
 
-library(vdiffr)
-
-
 # shade_p_value -----------------------------------------------------------
 test_that("shade_p_value works", {
   skip_if(getRversion() > "4.0.2")
diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R
index d96ba85a..8d4b8e32 100644
--- a/tests/testthat/test-visualize.R
+++ b/tests/testthat/test-visualize.R
@@ -1,7 +1,6 @@
 context("visualize")
 
 library(dplyr)
-library(vdiffr)
 
 set.seed(42)
 
diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R
index cf6e3aaf..33c508b2 100644
--- a/tests/testthat/test-wrappers.R
+++ b/tests/testthat/test-wrappers.R
@@ -7,29 +7,29 @@ test_that("t_test works", {
   expect_error(
     gss_tbl %>% t_test(response = "hours", explanatory = "sex")
   )
-  
-  new_way <- t_test(gss_tbl, 
-                    hours ~ sex, 
+
+  new_way <- t_test(gss_tbl,
+                    hours ~ sex,
                     order = c("male", "female"))
-  new_way_alt <- t_test(gss_tbl, 
+  new_way_alt <- t_test(gss_tbl,
                     response = hours,
                     explanatory = sex,
                     order = c("male", "female"))
   old_way <- t.test(hours ~ sex, data = gss_tbl) %>%
     broom::glance() %>%
-    dplyr::select(statistic, t_df = parameter, p_value = p.value, 
+    dplyr::select(statistic, t_df = parameter, p_value = p.value,
                   alternative, lower_ci = conf.low, upper_ci = conf.high)
-  
+
   expect_equal(new_way, new_way_alt, tolerance = 1e-5)
   expect_equal(new_way, old_way, tolerance = 1e-5)
-  
+
   # check that the order argument changes output
-  new_way2 <- t_test(gss_tbl, 
-                    hours ~ sex, 
-                    order = c("female", "male"))  
+  new_way2 <- t_test(gss_tbl,
+                    hours ~ sex,
+                    order = c("female", "male"))
   expect_equal(new_way[["lower_ci"]], -new_way2[["upper_ci"]])
   expect_equal(new_way[["statistic"]], -new_way2[["statistic"]])
-  
+
   # One Sample
   new_way <- gss_tbl %>%
     t_test(hours ~ NULL, mu = 0)
@@ -37,20 +37,20 @@ test_that("t_test works", {
     t_test(response = hours, mu = 0)
   old_way <- t.test(x = gss_tbl$hours, mu = 0) %>%
     broom::glance() %>%
-    dplyr::select(statistic, t_df = parameter, p_value = p.value, 
+    dplyr::select(statistic, t_df = parameter, p_value = p.value,
                   alternative, lower_ci = conf.low, upper_ci = conf.high)
-  
+
   expect_equal(new_way, new_way_alt, tolerance = 1e-5)
   expect_equal(new_way, old_way, tolerance = 1e-5)
 })
 
 test_that("chisq_test works", {
   # maleependence
-  expect_silent(gss_tbl %>% 
+  expect_silent(gss_tbl %>%
                   chisq_test(college ~ partyid))
-  new_way <- gss_tbl %>% 
+  new_way <- gss_tbl %>%
     chisq_test(college ~ partyid)
-  new_way_alt <- gss_tbl %>% 
+  new_way_alt <- gss_tbl %>%
     chisq_test(response = college, explanatory = partyid)
   old_way <- chisq.test(x = table(gss_tbl$partyid, gss_tbl$college)) %>%
     broom::glance() %>%
@@ -58,24 +58,24 @@ test_that("chisq_test works", {
 
   expect_equal(new_way, new_way_alt, tolerance = eps)
   expect_equal(new_way, old_way, tolerance = eps)
-  
+
   # Goodness of Fit
-  expect_silent(gss_tbl %>% 
+  expect_silent(gss_tbl %>%
                   chisq_test(response = partyid, p = c(.3, .4, .3)))
-  new_way <- gss_tbl %>% 
+  new_way <- gss_tbl %>%
     chisq_test(partyid ~ NULL, p = c(.3, .4, .3))
-  new_way_alt <- gss_tbl %>% 
+  new_way_alt <- gss_tbl %>%
     chisq_test(response = partyid, p = c(.3, .4, .3))
   old_way <- chisq.test(x = table(gss_tbl$partyid), p = c(.3, .4, .3)) %>%
     broom::glance() %>%
     dplyr::select(statistic, chisq_df = parameter, p_value = p.value)
-  
+
   expect_equal(new_way, new_way_alt, tolerance = 1e-5)
   expect_equal(new_way, old_way, tolerance = 1e-5)
-  
+
   # check that function errors out when response is numeric
   expect_error(chisq_test(x = gss_tbl, response = age, explanatory = partyid))
-  
+
   # check that function errors out when explanatory is numeric
   expect_error(chisq_test(x = gss_tbl, response = partyid, explanatory = age))
 
@@ -103,16 +103,16 @@ test_that("_stat functions work", {
    chisq_stat(partyid ~ NULL)
  obs_stat_way_alt <- gss_tbl %>%
    chisq_stat(response = partyid)
- 
+
  expect_equivalent(dplyr::pull(new_way), obs_stat_way)
  expect_equivalent(dplyr::pull(new_way), obs_stat_way_alt)
- 
+
  # robust to the named vector
  unordered_p <- gss_tbl %>%
    chisq_test(response = partyid, p = c(.2, .3, .5))
  ordered_p <- gss_tbl %>%
    chisq_test(response = partyid, p = c(ind = .2, rep = .3, dem = .5))
- 
+
  expect_equivalent(unordered_p, ordered_p)
 
   # Two sample t
@@ -129,9 +129,9 @@ test_that("_stat functions work", {
     t_stat(hours ~ sex, order = c("male", "female"))
   obs_stat_way_alt <- gss_tbl %>%
     t_stat(response = hours,
-           explanatory = sex, 
+           explanatory = sex,
            order = c("male", "female"))
-  
+
   expect_equivalent(another_way, obs_stat_way)
   expect_equivalent(another_way, obs_stat_way_alt)
 
@@ -145,10 +145,10 @@ test_that("_stat functions work", {
     t_stat(hours ~ NULL)
   obs_stat_way_alt <- gss_tbl %>%
     t_stat(response = hours)
-  
+
   expect_equivalent(another_way, obs_stat_way)
   expect_equivalent(another_way, obs_stat_way_alt)
-  
+
   expect_error(chisq_stat(x = gss_tbl, response = age, explanatory = sex))
   expect_error(chisq_stat(x = gss_tbl, response = sex, explanatory = age))
 })
@@ -156,11 +156,11 @@ test_that("_stat functions work", {
 test_that("conf_int argument works", {
   expect_equal(
     names(
-      gss_tbl %>% 
-        t_test(hours ~ sex, 
+      gss_tbl %>%
+        t_test(hours ~ sex,
                order = c("male", "female"), conf_int = FALSE)
     ),
-    c("statistic", "t_df", "p_value", "alternative"), 
+    c("statistic", "t_df", "p_value", "alternative"),
     tolerance = 1e-5
   )
   expect_equal(
@@ -196,16 +196,16 @@ test_that("conf_int argument works", {
 
   # Check that var.equal produces different results
   # Thanks for fmaleing this @EllaKaye!
-  gss_tbl_small <- gss_tbl %>% slice(1:6, 90:100)
+  gss_tbl_small <- gss_tbl %>% dplyr::slice(1:6, 90:100)
 
   no_var_equal <- gss_tbl_small %>%
     t_stat(hours ~ sex, order = c("female", "male"))
-  
+
   var_equal <- gss_tbl_small %>%
     t_stat(
       hours ~ sex, order = c("female", "male"),
       var.equal = TRUE
-    ) 
+    )
   expect_false(no_var_equal == var_equal)
 
   shortcut_no_var_equal <- gss_tbl_small %>%
@@ -222,18 +222,19 @@ test_that("conf_int argument works", {
 })
 
 # generate some data to test the prop.test wrapper
-df <- data.frame(resp = c(rep("c", 450), 
-                          rep("d", 50), 
-                          rep("c", 400), 
-                          rep("d", 100)), 
-                 exp = rep(c("a", "b"), each = 500))
+df <- data.frame(resp = c(rep("c", 450),
+                          rep("d", 50),
+                          rep("c", 400),
+                          rep("d", 100)),
+                 exp = rep(c("a", "b"), each = 500),
+                 stringsAsFactors = FALSE)
 
 sum_df <- table(df)
 
-bad_df <- data.frame(resp = 1:5, 
+bad_df <- data.frame(resp = 1:5,
                      exp = letters[1:5])
 
-bad_df2 <- data.frame(resp = letters[1:5], 
+bad_df2 <- data.frame(resp = letters[1:5],
                      exp = 1:5)
 
 test_that("two sample prop_test works", {
@@ -241,43 +242,47 @@ test_that("two sample prop_test works", {
   # run the tests with default args
   base <- prop.test(sum_df)
   infer <- prop_test(df, resp ~ exp, order = c("a", "b"))
-  
+
   # check that results are same
-  expect_equal(base[["statistic"]], 
-               infer[["statistic"]], 
+  expect_equal(base[["statistic"]],
+               infer[["statistic"]],
                tolerance = .001)
-  expect_equal(base[["parameter"]], 
+  expect_equal(base[["parameter"]],
                infer[["chisq_df"]])
-  expect_equal(base[["p.value"]], 
+  expect_equal(base[["p.value"]],
                infer[["p_value"]],
                tolerance = .001)
 
   # expect warning for unspecified order
   expect_warning(prop_test(df, resp ~ exp))
-  
+
   # check that the functions respond to "p" in the same way
   base2 <- prop.test(sum_df, p = c(.1, .1))
   infer2 <- prop_test(df, resp ~ exp, order = c("a", "b"), p = c(.1, .1))
-  expect_equal(base2[["statistic"]], 
-               infer2[["statistic"]], 
+  expect_equal(base2[["statistic"]],
+               infer2[["statistic"]],
                tolerance = .001)
-  expect_equal(base2[["parameter"]], 
+  expect_equal(base2[["parameter"]],
                infer2[["chisq_df"]])
-  expect_equal(base2[["p.value"]], 
+  expect_equal(base2[["p.value"]],
                infer2[["p_value"]],
                tolerance = .001)
-  
+
   # check confidence interval argument
   infer3 <- prop_test(df, resp ~ exp, order = c("a", "b"), conf_int = TRUE)
   expect_length(infer3, 6)
   expect_length(infer2, 4)
-  
+
   # check that the order argument changes output
   infer4 <- prop_test(df, resp ~ exp, order = c("b", "a"), conf_int = TRUE)
-  expect_equal(infer4[["lower_ci"]], -infer3[["upper_ci"]])
-  
+  expect_equal(infer4[["lower_ci"]], -infer3[["upper_ci"]], tolerance = .001)
+
   expect_error(prop_test(bad_df, resp ~ exp))
   expect_error(prop_test(bad_df2, resp ~ exp))
+
+  # check that the success argument changes output
+  infer5 <- prop_test(df, resp ~ exp, order = c("a", "b"), success = "d", conf_int = TRUE)
+  expect_equal(infer3[["upper_ci"]], -infer5[["lower_ci"]], tolerance = .001)
 })
 
 # ...and some data for the one sample wrapper
@@ -287,31 +292,63 @@ df_1 <- df %>%
 sum_df_1 <- table(df_1)
 
 test_that("one sample prop_test works", {
-  
+
   # check that results with default args are the same
   base <- prop.test(sum_df_1)
   infer <- prop_test(df_1, resp ~ NULL, p = .5)
-  expect_equal(base[["statistic"]], 
-               infer[["statistic"]], 
+  expect_equal(base[["statistic"]],
+               infer[["statistic"]],
                tolerance = .001)
-  expect_equal(base[["parameter"]], 
+  expect_equal(base[["parameter"]],
                infer[["chisq_df"]])
-  expect_equal(base[["p.value"]], 
+  expect_equal(base[["p.value"]],
                infer[["p_value"]],
                tolerance = .001)
-  
+
   # check that the functions respond to "p" in the same way
   base2 <- prop.test(sum_df_1, p = .86)
   infer2 <- prop_test(df_1, resp ~ NULL, p = .86)
-  expect_equal(base2[["statistic"]], 
-               infer2[["statistic"]], 
+  expect_equal(base2[["statistic"]],
+               infer2[["statistic"]],
                tolerance = .001)
-  expect_equal(base2[["parameter"]], 
+  expect_equal(base2[["parameter"]],
                infer2[["chisq_df"]])
-  expect_equal(base2[["p.value"]], 
+  expect_equal(base2[["p.value"]],
                infer2[["p_value"]],
                tolerance = .001)
-  
+
   # expect message for unspecified p
   expect_message(prop_test(df_1, resp ~ NULL))
+
+  # check that the success argument changes output
+  infer3 <- prop_test(df_1, resp ~ NULL, p = .2, success = "c")
+  infer4 <- prop_test(df_1, resp ~ NULL, p = .8, success = "d")
+  expect_equal(infer3[["chisq_df"]], infer4[["chisq_df"]], tolerance = .001)
+})
+
+test_that("prop_test output dimensionality is correct", {
+  infer_1_sample <- prop_test(df, resp ~ NULL, p = .5)
+  infer_1_sample_z <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)
+  infer_2_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
+  infer_2_sample_no_int <- prop_test(df, resp ~ exp, order = c("a", "b"), 
+                                     conf_int = FALSE)
+  
+  # introduce a third response level
+  df$resp[c(1:10, 490:510, 990:1000)] <- "e"
+  
+  infer_3_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
+  
+  expect_length(infer_1_sample, 4)
+  expect_length(infer_1_sample, length(infer_1_sample_z) + 1)
+  expect_length(infer_2_sample, 6)
+  expect_length(infer_2_sample_no_int, 4)
+  expect_length(infer_3_sample, 3)
+})
+
+test_that("prop_test z argument works as expected", {
+  chi_res <- prop_test(df, resp ~ NULL, p = .5, correct = FALSE)
+    
+  z_res <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)
+    
+  expect_equal(unname(chi_res$statistic), z_res$statistic^2, tolerance = eps)
 })
diff --git a/vignettes/infer.Rmd b/vignettes/infer.Rmd
index 301abe7e..94623974 100644
--- a/vignettes/infer.Rmd
+++ b/vignettes/infer.Rmd
@@ -68,11 +68,11 @@ We can see that the `infer` class has been appended on top of the dataframe clas
 If you're interested in two variables--`age` and `partyid`, for example--you can `specify` their relationship in one of two (equivalent) ways:
 
 ```{r specify-two, warning = FALSE, message = FALSE}
-# with the named arguments
+# as a formula
 gss %>%
   specify(age ~ partyid)
 
-# as a formula
+# with the named arguments
 gss %>%
   specify(response = age, explanatory = partyid)
 ```
diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd
index 5f764535..5fb86f44 100644
--- a/vignettes/observed_stat_examples.Rmd
+++ b/vignettes/observed_stat_examples.Rmd
@@ -229,7 +229,40 @@ null_distn <- gss %>%
 
 ### One categorical variable (standardized proportion $z$)
 
-While the standardized proportion $z$ statistic has not yet been implemented in the randomization-based framework, the package supplies a wrapper around `prop.test` to allow for tests of a single proportion on tidy data.
+Calculating the observed statistic,
+
+```{r}
+p_hat <- gss %>%
+  specify(response = sex, success = "female") %>%
+  hypothesize(null = "point", p = .5) %>%
+  calculate(stat = "z")
+```
+
+Then, generating the null distribution,
+
+```{r}
+null_distn <- gss %>%
+  specify(response = sex, success = "female") %>%
+  hypothesize(null = "point", p = .5) %>%
+  generate(reps = 1000, type = "simulate") %>%
+  calculate(stat = "z")
+```
+
+Visualizing the observed statistic alongside the null distribution,
+
+```{r}
+visualize(null_distn) +
+  shade_p_value(obs_stat = p_hat, direction = "two-sided")
+```
+
+Calculating the p-value from the null distribution and observed statistic,
+
+```{r}
+null_distn %>%
+  get_p_value(obs_stat = p_hat, direction = "two-sided")
+```
+
+The package also supplies a wrapper around `prop.test` for tests of a single proportion on tidy data.
 
 ```{r prop_test_1_grp}
 prop_test(gss,