From 6cc89bd5e694b67555d518bb160cacfca40185ee Mon Sep 17 00:00:00 2001
From: seokhoonj <seokhoonj@gmail.com>
Date: Mon, 26 Feb 2024 10:17:47 +0900
Subject: [PATCH] Replace functions are modified

---
 DESCRIPTION                  |  3 +-
 NAMESPACE                    |  2 ++
 R/group.R                    | 55 ------------------------------------
 R/replace.R                  | 51 +++++++++++++++++++++++++++++++++
 R/string.R                   | 28 ------------------
 man/replace_empty_with_na.Rd | 25 ++++++++++++++++
 man/replace_na_with_zero.Rd  | 25 ++++++++++++++++
 7 files changed, 105 insertions(+), 84 deletions(-)
 delete mode 100644 R/group.R
 create mode 100644 R/replace.R
 delete mode 100644 R/string.R
 create mode 100644 man/replace_empty_with_na.Rd
 create mode 100644 man/replace_na_with_zero.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 0739bf0..a24dab2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,5 +20,6 @@ LinkingTo:
     Rcpp,
     RcppArmadillo
 Suggests: 
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    pryr (>= 0.1.6) 
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 43355db..e142921 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,6 +34,8 @@ export(min_by_rownames)
 export(mkdir)
 export(mostfreq)
 export(paste_list)
+export(replace_empty_with_na)
+export(replace_na_with_zero)
 export(reverse)
 export(rotate)
 export(row_max)
diff --git a/R/group.R b/R/group.R
deleted file mode 100644
index 3e03464..0000000
--- a/R/group.R
+++ /dev/null
@@ -1,55 +0,0 @@
-#' Combine overlapping date ranges
-#'
-#' Combine multiple overlapping date ranges.
-#'
-#' @param df a data.frame with data ranges
-#' @param id_var id column names
-#' @param merge_var column names to be collapsed
-#' @param from_var a column name of start date
-#' @param to_var a column name of end date
-#' @param interval an interval of previous end date to next start date to be combined
-#' @param collapse an optional character string to seperate the result of `merge_var`
-#' @return `data.frame` with no overlapping date ranges.
-#'
-#' @examples
-#' # combine overlapping date ranges
-#' id <- c("A", "A", "B")
-#' work <- c("cleansing", "analysis", "cleansing")
-#' sdate <- as.Date(c("2022-03-01", "2022-03-05", "2022-03-08"))
-#' edate <- as.Date(c("2022-03-06", "2022-03-09", "2022-03-10"))
-#' df <- data.frame(id = id, work = work, sdate = sdate, edate = edate)
-#' combine_overlapping_date_range(df, id, work, sdate, edate, interval = 0)
-#'
-#' @export
-combine_overlapping_date_range <- function(df, id_var, merge_var, from_var, to_var,
-                                           interval = 0, collapse = "|") {
-  old_class <- class(df)
-  set_dt(df)
-  id_var    <- match_cols(df, vapply(substitute(id_var), deparse, "character"))
-  merge_var <- match_cols(df, vapply(substitute(merge_var), deparse, "character"))
-  from_var  <- deparse(substitute(from_var))
-  to_var  <- deparse(substitute(to_var))
-  all_var <- c(id_var, merge_var, from_var, to_var)
-  dt <- df[, .SD, .SDcols = all_var]
-  setnames(dt, c(id_var, merge_var, "from", "to"))
-  setorderv(dt, c(id_var, "from", "to"))
-  set(dt, j = "sub_stay", value = 0)
-  index <- .Call(IndexOverlappingDateRange, dt[, .SD, .SDcols = id_var],
-                 dt$from, dt$to, interval = interval)
-  set(dt, j = "loc", value = index$loc) # group index to combine
-  set(dt, j = "sub", value = index$sub) # days to subtract, if the interval is longer than 0
-  group_var <- c(id_var, "loc")
-  m <- dt[, lapply(.SD, function(x) paste0(unique(x[!is.na(x)]), collapse = collapse)),
-          keyby = group_var, .SDcols = merge_var]
-  from <- to <- sub_stay <- sub <- NULL
-  s <- dt[, list(from = min(from), to = max(to), sub_stay = sum(sub_stay) + sum(sub)),
-          keyby = group_var]
-  z <- m[s, on = group_var]
-  set(z, j = "loc", value = NULL)
-  set(z, j = "stay", value = as.numeric(z$to - z$from + 1 - z$sub_stay))
-  set(z, j = "sub_stay", value = NULL)
-  setnames(z, c(all_var, "stay"))
-  setattr(z, "class", old_class)
-  setattr(df, "class", old_class)
-  return(z)
-}
diff --git a/R/replace.R b/R/replace.R
new file mode 100644
index 0000000..56e876b
--- /dev/null
+++ b/R/replace.R
@@ -0,0 +1,51 @@
+#' Replace NA with zero
+#'
+#' Replace NA_integer_ or NA_real_ values with zero in a memory-efficient way
+#'
+#' @param df a data frame
+#' @return no return value
+#'
+#' @examples
+#' \donttest{df <- data.frame(x = c(1, NA, 3), y = c("A", "B", NA), z = c(NA, 5, NA))
+#' pryr::address(df)
+#' replace_na_with_zero(df)
+#' pryr::address(df)
+#' df}
+#'
+#' @export
+replace_na_with_zero <- function(df) {
+  old_class <- class(df)
+  set_dt(df)
+  class <- sapply(df, class)
+  cols <- names(class)[which(class %in% c("numeric", "integer"))]
+  df[, `:=`((cols), lapply(.SD, function(x) ifelse(is.na(x), 0, x))),
+     .SDcols = cols]
+  setattr(df, "class", old_class)
+  invisible(df)
+}
+
+#' Replace empty with NA
+#'
+#' Replace empty string like "" with NA_character_ in a memory-efficient way
+#'
+#' @param df a data frame
+#' @return no return value
+#'
+#' @examples
+#' \donttest{df <- data.frame(x = c("A", "B", ""), y = c(1, NA, 3), z = c("", "E", ""))
+#' pryr::address(df)
+#' replace_empty_with_na(df)
+#' pryr::address(df)
+#' df}
+#'
+#' @export
+replace_empty_with_na <- function(df) {
+  old_class <- class(df)
+  set_dt(df)
+  class <- sapply(df, class)
+  cols <- names(class)[which(class == "character")]
+  df[, `:=`((cols), lapply(.SD, function(x) ifelse(x == "", NA, x))),
+     .SDcols = cols]
+  setattr(df, "class", old_class)
+  invisible(df)
+}
diff --git a/R/string.R b/R/string.R
deleted file mode 100644
index 7ef2555..0000000
--- a/R/string.R
+++ /dev/null
@@ -1,28 +0,0 @@
-
-# pste_string <- function(x, collapse = "|") paste(x, collapse = collapse)
-# glue_string <- function(x, collapse = "|") paste(unique(x[!is.na(x)]), collapse = collapse)
-# sort_string <- function(x, collapse = "|") paste(sort(unique(x[!is.na(x)])), collapse = collapse)
-# splt_string <- function(x, split = "\\|") {z <- strsplit(x, split = split)[[1L]]; z[!z %in% c(NA, "NA", "")]}
-# srch_string <- function(x) glue_string(paste0(x, "$"))
-# melt_string <- function(x) srch_string(splt_string(pste_string(x)))
-# excl_string <- function(x) paste0('^((?!', x, ').)*$')
-# remv_string <- function(string, x) gsub(string, "", x)
-# pull_string <- function(string, x, ignore.case = TRUE) {
-#   r <- regexpr(string, x, ignore.case = ignore.case, perl = TRUE)
-#   z <- rep(NA, length(x))
-#   z[r != -1] <- regmatches(x, r)
-#   return(z)
-# }
-# pull_string_all <- function(string, x, collapse = "|", ignore.case = TRUE) {
-#   r <- gregexpr(string, x, ignore.case = ignore.case, perl = TRUE)
-#   z <- regmatches(x, r)
-#   sapply(z, function(s) paste(s, collapse = collapse))
-# }
-# pull_excl_part <- function(x, ignore.case = TRUE) {
-#   r <- gregexpr("\\(.*?\\)", x, ignore.case = ignore.case, perl = TRUE)
-#   z <- regmatches(x, r)
-#   gsub("[\\(\\)]", "", z)
-# }
-# pull_excl_term <- function(x, ignore.case = TRUE) {
-#   as_integer(gsub("[0-9].EXCL|EXCL|\\(.*?\\)", "", x, ignore.case = ignore.case, perl = TRUE))
-# }
diff --git a/man/replace_empty_with_na.Rd b/man/replace_empty_with_na.Rd
new file mode 100644
index 0000000..d116d7e
--- /dev/null
+++ b/man/replace_empty_with_na.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/replace.R
+\name{replace_empty_with_na}
+\alias{replace_empty_with_na}
+\title{Replace empty with NA}
+\usage{
+replace_empty_with_na(df)
+}
+\arguments{
+\item{df}{a data frame}
+}
+\value{
+no return value
+}
+\description{
+Replace empty string like "" with NA_character_ in a memory-efficient way
+}
+\examples{
+\donttest{df <- data.frame(x = c("A", "B", ""), y = c(1, NA, 3), z = c("", "E", ""))
+pryr::address(df)
+replace_empty_with_na(df)
+pryr::address(df)
+df}
+
+}
diff --git a/man/replace_na_with_zero.Rd b/man/replace_na_with_zero.Rd
new file mode 100644
index 0000000..b5348b9
--- /dev/null
+++ b/man/replace_na_with_zero.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/replace.R
+\name{replace_na_with_zero}
+\alias{replace_na_with_zero}
+\title{Replace NA with zero}
+\usage{
+replace_na_with_zero(df)
+}
+\arguments{
+\item{df}{a data frame}
+}
+\value{
+no return value
+}
+\description{
+Replace NA_integer_ or NA_real_ values with zero in a memory-efficient way
+}
+\examples{
+\donttest{df <- data.frame(x = c(1, NA, 3), y = c("A", "B", NA), z = c(NA, 5, NA))
+pryr::address(df)
+replace_na_with_zero(df)
+pryr::address(df)
+df}
+
+}