Skip to content

Commit

Permalink
Add diff_tables and replace_all_na
Browse files Browse the repository at this point in the history
  • Loading branch information
izaak-jephson committed Jul 26, 2024
1 parent c2f5527 commit e153da0
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 11 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Imports:
readr,
readxl,
rlang,
stats,
stringr,
tidyr,
tidyselect,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ export(convert_col_date)
export(convert_date)
export(convert_to_age_band)
export(create_sss_calendar)
export(diff_column)
export(diff_tables)
export(extract_table)
export(financial_year)
export(import_raw_tables)
Expand All @@ -18,6 +20,7 @@ export(make_number_percent)
export(make_string_numeric)
export(make_supp_negative)
export(mround)
export(replace_all_na)
export(round_and_suppress)
export(transpose_data)
importFrom(magrittr,"%>%")
Expand Down
22 changes: 20 additions & 2 deletions R/convert_types.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ make_string_numeric <- function(value){
#' This function applies [make_string_numeric()] across a whole table.
#' @param table A data frame
#' @param across Columns to convert across. Takes tidyselect specification.
#' #' Defaults to `everything()`
#' Defaults to `everything()`
#' @export

make_all_string_numeric <- function(table, across = tidyselect::everything()) {
Expand Down Expand Up @@ -95,7 +95,7 @@ make_number_percent <- function(value){
#' This function applies [make_number_percent()] across a whole table.
#' @param table A data frame
#' @param across Columns to convert across. Takes tidyselect specification.
#' #' Defaults to `contains("Percentage")`
#' Defaults to `contains("Percentage")`
#' @export

make_all_number_percent <- function(table, across = tidyselect::contains("Percentage")){
Expand All @@ -106,3 +106,21 @@ make_all_number_percent <- function(table, across = tidyselect::contains("Percen
{{across}},
~ purrr::map_vec(.x, ~ make_number_percent(.x))))
}

#' Replace NA Over a Table
#'
#' This function applies [replace_na()] across a whole table.
#' @param table A data frame
#' @param replace Value to replace NA with
#' @param across Columns to convert across. Takes tidyselect specification.
#' Defaults to `everything()`
#' @export

replace_all_na <- function(table, replace, across = tidyselect::everything()) {
table %>%
dplyr::mutate(
dplyr::across(
{{across}},
~tidyr::replace_na(.x, replace)))

}
6 changes: 6 additions & 0 deletions R/import_tables.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#' Extract Publication Table from Raw Import
#'
#' This function takes raw input read from a publication file and extracts the
#' underlying data
#' @param data A data frame read from a publication file
Expand Down Expand Up @@ -28,6 +30,8 @@ extract_table <- function(data) {

}

#' Import Raw Data from Publication Tables
#'
#' This function takes an excel file of publication tables and reads in the data
#' (including titles, heading and notes). Useful for debugging if `import tables()`
#' is not working as expected.
Expand All @@ -42,6 +46,8 @@ import_raw_tables <- function(filepath) {
purrr::map(dplyr::as_tibble)
}

#' Import and Extract Publication Tables
#'
#' This function takes an excel file of publication tables, reads in the data and
#' extracts the data tables.
#' @param filepath Excel file containing publication tables
Expand Down
62 changes: 62 additions & 0 deletions R/qa_tables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Take the Difference of a Matching Column in Two Tables
#'
#' This function returns a vector of differences corresponding to a specified column
#' in two data frames. It will only consider rows present in both data
#' sets based on the first column.
#' @param table_1 First table
#' @param table_2 Second table
#' @param column column to consider
#' @export


diff_column <- function(table_1, table_2, column) {
this_time <-
table_2 %>%
dplyr::filter(dplyr::if_any(1, ~ . %in% table_1[[1]])) %>%
dplyr::filter(dplyr::if_any(1, ~ !stringr::str_detect(., "Total"))) %>%
dplyr::filter(dplyr::if_any(1, ~ !stringr::str_detect(., "Financial"))) %>%
dplyr::pull(table_2[column] %>% colnames())

last_time <-
table_1 %>%
dplyr::filter(dplyr::if_any(1, ~ . %in% table_2[[1]])) %>%
dplyr::filter(dplyr::if_any(1, ~ !stringr::str_detect(., "Total"))) %>%
dplyr::filter(dplyr::if_any(1, ~ !stringr::str_detect(., "Financial"))) %>%
dplyr::pull(table_1[column] %>% colnames())

last_time - this_time

}


#' Take the Difference of All Matching Columns in Two Tables
#'
#' This function returns a table of differences for any matching numeric columns
#' in two dataframes. It will only consider rows and columns present in both data
#' sets (row selection is based on the first column).
#' @param table_1 First table
#' @param table_2 Second table
#' @export
diff_tables <- function(table_1, table_2){

is_col_numeric <- table_1 %>% purrr::map_lgl(is.numeric)

numeric_cols <- colnames(table_1)[is_col_numeric]

other_cols <- colnames(table_1)[!is_col_numeric]

diffs <- purrr::map(numeric_cols, ~ sssstats::diff_column(table_1, table_2, .))

filtered_table <- table_1 %>%
dplyr::filter(dplyr::if_any(1, ~ . %in% table_2[[1]])) %>%
dplyr::filter(dplyr::if_any(1, ~ !stringr::str_detect(., "Total"))) %>%
dplyr::filter(dplyr::if_any(1, ~ !stringr::str_detect(., "Financial")))

# filtered_table[other_cols]
diffs
stats::setNames(diffs, numeric_cols) %>%
dplyr::as_tibble() %>%
dplyr::bind_cols(filtered_table[other_cols]) %>%
dplyr::relocate(tidyselect::all_of(other_cols), .before = tidyselect::everything())

}
20 changes: 20 additions & 0 deletions man/diff_column.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/diff_tables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/extract_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions man/import_raw_tables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/import_tables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/make_all_number_percent.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/make_all_string_numeric.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/replace_all_na.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e153da0

Please sign in to comment.