From 3ec8695b5d88a7ab55cfec254b066b94a9a48bd2 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Wed, 31 Jul 2024 10:57:31 -0600 Subject: [PATCH 1/6] adding function to obfuscate gps points --- R/obfuscate_gps.R | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 R/obfuscate_gps.R diff --git a/R/obfuscate_gps.R b/R/obfuscate_gps.R new file mode 100644 index 0000000..4d6b928 --- /dev/null +++ b/R/obfuscate_gps.R @@ -0,0 +1,34 @@ +#' Obfuscate GPS +#' +#' This function fuzzes gps points (or any other numeric values) by first adding +#' error then rounding to a certain number of digits. +#' +#' @param x Numeric. Vector of gps points +#' @param precision Integer. Number of digits to keep. See `round` for more details +#' @param fuzz Numeric. Error to introduce to the gps measurements. This is used +#' to generate the random uniform distribution `runif(1,min = -fuzz, max = fuzz)` +#' +#' @return Numeric. A vector of fuzzed and rounded GPS points +#' @export +#' +#' @examples +#' +#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), lon = c(2.39595, 4.506930, -60.09999901)) +#' +#' gps_data |> +#' # default obfuscation settings correspont to roughly a 27 by 27 km area +#' dplyr::mutate(fuzzed_lat = obfuscate_gps(lat) %>% +#' # can be made more or less precise by changing the number of decimal points +#' # included or modifying the amount of fuzz (error) introduced +#' dplyr::mutate(fuzzed_lon = obfuscate_gps(lon, precision = 4, fuzz = 0.002)) +#' +obfuscate_gps <- function(x, precision = 2, fuzz = 0.125){ + + # fuzz point + gps_error <- runif(1,min = -fuzz, max = fuzz) + x_fuzz <- x + gps_error + + # round to 2 decimal points + out <- round(x_fuzz,digits = precision) + return(out) +} From f2104f12a2f69d67401e5a8b6c25cf0b5027ae2b Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Wed, 31 Jul 2024 10:58:37 -0600 Subject: [PATCH 2/6] Increment version number to 0.3.0 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d6345a9..cec59e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ohcleandat Type: Package Title: One Health Data Cleaning and Quality Checking Package -Version: 0.2.6 +Version: 0.3.0 Authors@R: c( person("Collin", "Schwantes", email = "schwantes@ecohealthalliance.org", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-4014-4896")), person("Johana", "Teigen", email = "teigen@ecohealthalliance.org", role = "aut", comment = c(ORCID = "0000-0002-6209-2321")), From e570c1fea4d12d53d726a6953e01e9dc0b424516 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Wed, 31 Jul 2024 11:00:19 -0600 Subject: [PATCH 3/6] Add NEWS.md --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 NEWS.md diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..34ede4c --- /dev/null +++ b/NEWS.md @@ -0,0 +1,3 @@ +# ohcleandat 0.3.0 + +* Adding GPS obfuscation function From afb62d74a167390289bfec2ec5af50287dd6ede2 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Wed, 31 Jul 2024 11:21:48 -0600 Subject: [PATCH 4/6] updating documentation --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/obfuscate_gps.R | 5 +++-- man/obfuscate_gps.Rd | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 man/obfuscate_gps.Rd diff --git a/DESCRIPTION b/DESCRIPTION index cec59e3..335bb16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,7 @@ Description: This package provides useful functions to orchestrate analytics and License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.2.9000 Suggests: knitr, rmarkdown diff --git a/NAMESPACE b/NAMESPACE index bfb3aa0..8434ec3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(get_species_letter) export(guess_col_type) export(id_checker) export(make_report_urls) +export(obfuscate_gps) export(othertext_lookup) export(read_excel_all_sheets) export(read_googlesheets) diff --git a/R/obfuscate_gps.R b/R/obfuscate_gps.R index 4d6b928..a21987c 100644 --- a/R/obfuscate_gps.R +++ b/R/obfuscate_gps.R @@ -13,7 +13,8 @@ #' #' @examples #' -#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), lon = c(2.39595, 4.506930, -60.09999901)) +#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), +#' lon = c(2.39595, 4.506930, -60.09999901)) #' #' gps_data |> #' # default obfuscation settings correspont to roughly a 27 by 27 km area @@ -25,7 +26,7 @@ obfuscate_gps <- function(x, precision = 2, fuzz = 0.125){ # fuzz point - gps_error <- runif(1,min = -fuzz, max = fuzz) + gps_error <- stats::runif(1,min = -fuzz, max = fuzz) x_fuzz <- x + gps_error # round to 2 decimal points diff --git a/man/obfuscate_gps.Rd b/man/obfuscate_gps.Rd new file mode 100644 index 0000000..fee01a7 --- /dev/null +++ b/man/obfuscate_gps.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/obfuscate_gps.R +\name{obfuscate_gps} +\alias{obfuscate_gps} +\title{Obfuscate GPS} +\usage{ +obfuscate_gps(x, precision = 2, fuzz = 0.125) +} +\arguments{ +\item{x}{Numeric. Vector of gps points} + +\item{precision}{Integer. Number of digits to keep. See \code{round} for more details} + +\item{fuzz}{Numeric. Error to introduce to the gps measurements. This is used +to generate the random uniform distribution \code{runif(1,min = -fuzz, max = fuzz)}} +} +\value{ +Numeric. A vector of fuzzed and rounded GPS points +} +\description{ +This function fuzzes gps points (or any other numeric values) by first adding +error then rounding to a certain number of digits. +} +\examples{ + +gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), + lon = c(2.39595, 4.506930, -60.09999901)) + +gps_data |> + # default obfuscation settings correspont to roughly a 27 by 27 km area + dplyr::mutate(fuzzed_lat = obfuscate_gps(lat) \%>\% + # can be made more or less precise by changing the number of decimal points + # included or modifying the amount of fuzz (error) introduced + dplyr::mutate(fuzzed_lon = obfuscate_gps(lon, precision = 4, fuzz = 0.002)) + +} From fb3d79772b8491689949390efa70369a3067fe2d Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Wed, 31 Jul 2024 11:36:49 -0600 Subject: [PATCH 5/6] converted examples to base r --- R/obfuscate_gps.R | 21 ++++++++++++--------- man/obfuscate_gps.Rd | 19 +++++++++++-------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/R/obfuscate_gps.R b/R/obfuscate_gps.R index a21987c..7d453d9 100644 --- a/R/obfuscate_gps.R +++ b/R/obfuscate_gps.R @@ -13,15 +13,18 @@ #' #' @examples #' -#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), -#' lon = c(2.39595, 4.506930, -60.09999901)) -#' -#' gps_data |> -#' # default obfuscation settings correspont to roughly a 27 by 27 km area -#' dplyr::mutate(fuzzed_lat = obfuscate_gps(lat) %>% -#' # can be made more or less precise by changing the number of decimal points -#' # included or modifying the amount of fuzz (error) introduced -#' dplyr::mutate(fuzzed_lon = obfuscate_gps(lon, precision = 4, fuzz = 0.002)) +#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), +#' lon = c(2.39595, 4.506930, -60.09999901)) +#' +#' # Default obfuscation settings correspont to roughly a 27 by 27 km area +#' gps_data$lat |> +#' obfuscate_gps() +#' +#' # Obfuscation can be made more or less precise by changing the number of +#' # decimal points included or modifying the amount of fuzz (error) +#' # introduced +#' gps_data$lon |> +#' obfuscate_gps(precision = 4, fuzz = 0.002) #' obfuscate_gps <- function(x, precision = 2, fuzz = 0.125){ diff --git a/man/obfuscate_gps.Rd b/man/obfuscate_gps.Rd index fee01a7..61903c2 100644 --- a/man/obfuscate_gps.Rd +++ b/man/obfuscate_gps.Rd @@ -23,14 +23,17 @@ error then rounding to a certain number of digits. } \examples{ -gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), - lon = c(2.39595, 4.506930, -60.09999901)) + gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), + lon = c(2.39595, 4.506930, -60.09999901)) -gps_data |> - # default obfuscation settings correspont to roughly a 27 by 27 km area - dplyr::mutate(fuzzed_lat = obfuscate_gps(lat) \%>\% - # can be made more or less precise by changing the number of decimal points - # included or modifying the amount of fuzz (error) introduced - dplyr::mutate(fuzzed_lon = obfuscate_gps(lon, precision = 4, fuzz = 0.002)) + # Default obfuscation settings correspont to roughly a 27 by 27 km area + gps_data$lat |> + obfuscate_gps() + + # Obfuscation can be made more or less precise by changing the number of + # decimal points included or modifying the amount of fuzz (error) + # introduced + gps_data$lon |> + obfuscate_gps(precision = 4, fuzz = 0.002) } From 9dfe2c9af1904f08aa6620143ab043528395a9c7 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Thu, 1 Aug 2024 16:14:07 -0600 Subject: [PATCH 6/6] making obfuscate_gps more robust --- NAMESPACE | 3 + NEWS.md | 5 +- R/obfuscate_gps.R | 194 +++++++++++++++++++++++++++++++++++++++---- man/get_precision.Rd | 30 +++++++ man/obfuscate_gps.Rd | 71 ++++++++++++---- 5 files changed, 272 insertions(+), 31 deletions(-) create mode 100644 man/get_precision.Rd diff --git a/NAMESPACE b/NAMESPACE index 8434ec3..a3a77b6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,11 +16,14 @@ export(dropbox_upload) export(get_dropbox_val_logs) export(get_odk_form_schema) export(get_odk_responses) +export(get_precision) export(get_species_letter) export(guess_col_type) export(id_checker) export(make_report_urls) export(obfuscate_gps) +export(obfuscate_lat) +export(obfuscate_lon) export(othertext_lookup) export(read_excel_all_sheets) export(read_googlesheets) diff --git a/NEWS.md b/NEWS.md index 34ede4c..e14876f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ # ohcleandat 0.3.0 -* Adding GPS obfuscation function +* Adding GPS obfuscation function - this function uses two methods to reduce the +accuracy of GPS points. The first is adding some amount of error to the measurement +from a user defined random uniform distribution. The second is by rounding to +remove precision from the measurement. diff --git a/R/obfuscate_gps.R b/R/obfuscate_gps.R index 7d453d9..f1f5bbe 100644 --- a/R/obfuscate_gps.R +++ b/R/obfuscate_gps.R @@ -1,32 +1,99 @@ #' Obfuscate GPS #' -#' This function fuzzes gps points (or any other numeric values) by first adding +#' This function fuzzes gps points by first adding #' error then rounding to a certain number of digits. #' +#' #' @param x Numeric. Vector of gps points -#' @param precision Integer. Number of digits to keep. See `round` for more details -#' @param fuzz Numeric. Error to introduce to the gps measurements. This is used -#' to generate the random uniform distribution `runif(1,min = -fuzz, max = fuzz)` +#' @param precision Integer. Number of digits to keep. See `round` for more +#' details +#' @param fuzz Numeric. Positive number indicating how much error to introduce +#' to the gps measurements. This is used to generate the random uniform +#' distribution `runif(1,min = -fuzz, max = fuzz)` +#' @param type Character. One of "lat" or "lon" #' #' @return Numeric. A vector of fuzzed and rounded GPS points #' @export #' #' @examples #' -#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), -#' lon = c(2.39595, 4.506930, -60.09999901)) +#' # make data +#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), +#' lon = c(2.39595, 4.506930, -60.09999901)) +#' +#' # Default obfuscation settings correspont to roughly a 27 by 27 km area +#' gps_data$lat |> +#' obfuscate_gps(type = "lat") +#' +#' # Obfuscation can be made more or less precise by changing the number of +#' # decimal points included or modifying the amount of fuzz (error) +#' # introduced +#' gps_data$lon |> +#' obfuscate_gps(precision = 4, fuzz = 0.002, type = "lon") +#' +#' ### working at the poles +#' gps_data_poles <- data.frame(lat = c(89.0001, 89.22223, -89.8881), +#' lon = c(2.39595, 4.506930, -60.09999901)) +#' +#' +#' gps_data_poles$lat |> +#' obfuscate_gps(fuzz = 1, type = "lat") +#' +#' +#' ### working at the 180th meridian +#' gps_data_180 <- data.frame(lat = c(2, 3, 4), +#' lon = c(179.39595, -179.506930, -178.09999901)) +#' gps_data_180$lon |> +#' obfuscate_gps(fuzz = 1, type = "lon") #' -#' # Default obfuscation settings correspont to roughly a 27 by 27 km area -#' gps_data$lat |> -#' obfuscate_gps() +#' ### GPS is on the fritz! +#' \dontrun{ +#' gps_data_fritz <- data.frame(lat = c(91, -91, 90), +#' lon = c(181.0001, -181.9877, -178.09999901)) +#' gps_data_fritz$lon |> +#' obfuscate_gps(fuzz = 1, type = "lon") #' -#' # Obfuscation can be made more or less precise by changing the number of -#' # decimal points included or modifying the amount of fuzz (error) -#' # introduced -#' gps_data$lon |> -#' obfuscate_gps(precision = 4, fuzz = 0.002) +#' gps_data_fritz$lat |> +#' obfuscate_gps(fuzz = 1, type = "lat") +#' } #' -obfuscate_gps <- function(x, precision = 2, fuzz = 0.125){ +obfuscate_gps <- function(x, precision = 2, fuzz = 0.125, type = c("lat","lon")){ + + ## max precision in your data + # find value in x with most decimal points + data_precision <- get_precision(x,func = min) + + msg_data_precision <- sprintf("The data have a max precision of: %s",data_precision) + message(msg_data_precision) + + ## warning for max shift is fuzz+1e-precision + max_shift <- fuzz+10^-(precision/2) + msg_max_shift <- sprintf("The max shift from the combination of precision and fuzz is: %s degrees",max_shift) + message(msg_max_shift) + + if(max_shift/fuzz > 2){ + message("The majority of the obfuscation is coming from rounding, this + potentially makes re-identification easier") + } + + ## check if obfuscation will have an impact on the data + + + + type <- match.arg(type, c("lat","lon")) + + if(type == "lat"){ + out <- obfuscate_lat(x,precision,fuzz) + } + + if(type == "lon"){ + out <- obfuscate_lon(x,precision,fuzz) + } + + return(out) +} + +obfuscate_point <- function(x, precision = 2, fuzz = 0.125){ # fuzz point gps_error <- stats::runif(1,min = -fuzz, max = fuzz) @@ -36,3 +103,100 @@ obfuscate_gps <- function(x, precision = 2, fuzz = 0.125){ out <- round(x_fuzz,digits = precision) return(out) } + + +#' Obfuscates latitude data +#' +#' @rdname obfuscate_gps +#' +#' @return Numeric vector +#' @export +#' +obfuscate_lat <- function(x, precision = 2, fuzz = 0.125){ + + ## check that fuzz doesnt exceed maximum values + if(fuzz > 90){ + stop("fuzz greater than range of latitude on earth") + } + + if(any(x > 90 | x < -90)){ + stop("Latitude is outside the range of latitude on earth") + } + + points <- obfuscate_point(x,precision,fuzz) + + # make sure point is between 90 and -90 + points_in_range <- purrr::map_dbl(points,function(point){ + while(all(point > 90 | point < -90)){ + point <- obfuscate_point(point,precision,fuzz) + } + return(point) + }) + + return(points_in_range) +} + + +#' Obfuscates longitude data +#' +#' @rdname obfuscate_gps +#' +#' @return Numeric vector +#' @export +#' +obfuscate_lon <- function(x, precision = 2, fuzz = 0.125){ + + ## check that fuzz doesnt exceed maximum values + if(fuzz > 180){ + stop("fuzz greater than range of longitude on earth") + } + + if(any(x > 180 | x < -180)){ + stop("Longitude is outside the range of longitude on earth ") + } + + points <- obfuscate_point(x,precision,fuzz) + + ### wrap points near the 180th meridian + points_in_range <- purrr::map_dbl(points,function(point){ + + # if point greater than 180, wrap + if(point > 180){ + difference <- point - 180 + point <- -180 + difference + } + + # if point less than -180, wrap + if(point < -180){ + difference <- -180-point + point <- 180 - difference + } + + return(point) + }) + + return(points_in_range) +} + +#' Get Precision +#' +#' @param x Numeric. Vector of gps points +#' @param func Function. Apply some function to the vector of precisions. Default is c so that +#' all values are returned +#' +#' @return output of func - likely a vector +#' @export +#' @author Nathan Layman +#' +#' @examples +#' +#' x <- c(1,100,1.11) +#' get_precision(x,func = min) +#' +#' +get_precision <- function(x,func = c) { + # number of characters with the decimal - number of chacters without it + precision <- 10^-(nchar(gsub("\\.", "", as.character(x))) - nchar(as.character(trunc(x)))) + out <- func(precision) + return(out) +} diff --git a/man/get_precision.Rd b/man/get_precision.Rd new file mode 100644 index 0000000..51e356c --- /dev/null +++ b/man/get_precision.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/obfuscate_gps.R +\name{get_precision} +\alias{get_precision} +\title{Get Precision} +\usage{ +get_precision(x, func = c) +} +\arguments{ +\item{x}{Numeric. Vector of gps points} + +\item{func}{Function. Apply some function to the vector of precisions. Default is c so that +all values are returned} +} +\value{ +output of func - likely a vector +} +\description{ +Get Precision +} +\examples{ + +x <- c(1,100,1.11) +get_precision(x,func = min) + + +} +\author{ +Nathan Layman +} diff --git a/man/obfuscate_gps.Rd b/man/obfuscate_gps.Rd index 61903c2..c69e348 100644 --- a/man/obfuscate_gps.Rd +++ b/man/obfuscate_gps.Rd @@ -2,38 +2,79 @@ % Please edit documentation in R/obfuscate_gps.R \name{obfuscate_gps} \alias{obfuscate_gps} +\alias{obfuscate_lat} +\alias{obfuscate_lon} \title{Obfuscate GPS} \usage{ -obfuscate_gps(x, precision = 2, fuzz = 0.125) +obfuscate_gps(x, precision = 2, fuzz = 0.125, type = c("lat", "lon")) + +obfuscate_lat(x, precision = 2, fuzz = 0.125) + +obfuscate_lon(x, precision = 2, fuzz = 0.125) } \arguments{ \item{x}{Numeric. Vector of gps points} -\item{precision}{Integer. Number of digits to keep. See \code{round} for more details} +\item{precision}{Integer. Number of digits to keep. See \code{round} for more +details} -\item{fuzz}{Numeric. Error to introduce to the gps measurements. This is used -to generate the random uniform distribution \code{runif(1,min = -fuzz, max = fuzz)}} +\item{fuzz}{Numeric. Positive number indicating how much error to introduce +to the gps measurements. This is used to generate the random uniform +distribution \code{runif(1,min = -fuzz, max = fuzz)}} + +\item{type}{Character. One of "lat" or "lon"} } \value{ Numeric. A vector of fuzzed and rounded GPS points + +Numeric vector + +Numeric vector } \description{ -This function fuzzes gps points (or any other numeric values) by first adding +This function fuzzes gps points by first adding error then rounding to a certain number of digits. } \examples{ - gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), - lon = c(2.39595, 4.506930, -60.09999901)) +# make data +gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588), + lon = c(2.39595, 4.506930, -60.09999901)) + +# Default obfuscation settings correspont to roughly a 27 by 27 km area +gps_data$lat |> + obfuscate_gps(type = "lat") + +# Obfuscation can be made more or less precise by changing the number of +# decimal points included or modifying the amount of fuzz (error) +# introduced +gps_data$lon |> + obfuscate_gps(precision = 4, fuzz = 0.002, type = "lon") - # Default obfuscation settings correspont to roughly a 27 by 27 km area - gps_data$lat |> - obfuscate_gps() +### working at the poles +gps_data_poles <- data.frame(lat = c(89.0001, 89.22223, -89.8881), + lon = c(2.39595, 4.506930, -60.09999901)) - # Obfuscation can be made more or less precise by changing the number of - # decimal points included or modifying the amount of fuzz (error) - # introduced - gps_data$lon |> - obfuscate_gps(precision = 4, fuzz = 0.002) + +gps_data_poles$lat |> + obfuscate_gps(fuzz = 1, type = "lat") + + +### working at the 180th meridian +gps_data_180 <- data.frame(lat = c(2, 3, 4), + lon = c(179.39595, -179.506930, -178.09999901)) +gps_data_180$lon |> + obfuscate_gps(fuzz = 1, type = "lon") + +### GPS is on the fritz! +\dontrun{ +gps_data_fritz <- data.frame(lat = c(91, -91, 90), + lon = c(181.0001, -181.9877, -178.09999901)) +gps_data_fritz$lon |> + obfuscate_gps(fuzz = 1, type = "lon") + +gps_data_fritz$lat |> + obfuscate_gps(fuzz = 1, type = "lat") +} }