all names changed from ESmisc package

rOpenSpain · Dec 4, 2017 · 02f28a0 · 02f28a0
commit 02f28a0
Show file tree

Hide file tree

Showing 20 changed files with 663 additions and 0 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,5 @@
+# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
+
+language: R
+sudo: false
+cache: packages
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,33 @@
+Package: spanish
+Type: Package
+Title: Misc Functions For Spanish Data
+Version: 0.2.0
+Date: 2017-07-01
+Author: person( "Jose Manuel","Vera Oteo", email = "[email protected]",
+                role = c("aut","cre")
+URL: https://github.com/verajosemanuel
+BugReports: https://github.com/verajosemanuel/spanish/issues
+Maintainer: Jose M. Vera <[email protected]>
+Depends: magrittr, xml2
+Description: Character vector to numerical translation in Euros from spanish
+    spelled monetary quantities. Text must be previously cleaned & removed
+    extraneous words, symbols or cents. Quantities MUST be written in a correct
+    Spanish cause this isn't a grammar tool. Upper limit is up to the millions
+    range.
+    Geocoding from cadastral reference number. Source data must be a valid
+    cadastral reference or downloaded kml files from catastro website.
+    Be careful geocoding from catastro. You will be banned if many request
+    are issued in a short period of time. geocode_cadastral() waits 2 seconds
+    between requests.
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+Collate:
+    'geocode_cadastral.R'
+    'to_number.R'
+    'cadastral_references-data.R'
+    'cantidades-data.R'
+    'spanish.R'
+    'zzz.R'
+RoxygenNote: 6.0.1.9000
+Suggests: testthat, tidyr
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,6 @@
+# Generated by roxygen2: do not edit by hand
+
+import(magrittr)
+import(xml2)
+export(geocode_cadastral)
+export(to_number)
diff --git a/R/cadastral_references-data.R b/R/cadastral_references-data.R
@@ -0,0 +1,45 @@
+#' Cadastral references test data
+#'
+#' Randomly selected data from catastro to test geocode_cadastral function
+#'
+#' @docType data
+#'
+#' @usage data(cadastral_references)
+#'
+#' @format A data frame.
+#'
+#' @keywords datasets
+#'
+#' @references Catastro. Ministerio de Hacienda y función pública.
+#' (\href{http://www.catastro.meh.es/}{Catastro})
+#'
+#' @source \href{http://www.catastro.meh.es/}{Sede Electrónica del Catastro}
+#'
+#' @examples
+#' ## source is cadastral reference number ##
+#'
+#' geocode_cadastral("0636105UF3403N", parse_files = FALSE)
+#'
+#' ## Use lapply to geocode cadastral references from dataframe columns.
+#'
+#' cadastral_references$new <- lapply(cadastral_references$cadref1, geocode_cadastral)
+#'
+#' ## separate previously generated "new" data into columns usign tidyr
+#'
+#' library(tidyr)
+#' separate(cadastral_references, new, into = c('longitude','latitude'), sep = "," )
+#'
+#' ## source is folder. A loop is needed to process each kml file ##
+#'
+#' \dontrun{
+#' files <- list.files("folder", full.names = T)
+#'
+#' for (f in files) {
+#'  coords <- geocode_cadastral(f, parse_files = TRUE)
+#'  d <- as.data.frame(rbind(d , as.data.frame(coords, stringsAsFactors = F )))
+#' }
+#'
+#'# separate lat/lon into columns if you prefer using tidyr
+#' d <- tidyr::separate(coords, into = c("longitude","latitude"), sep = "," )
+#'}
+"cadastral_references"
diff --git a/R/cantidades-data.R b/R/cantidades-data.R
@@ -0,0 +1,22 @@
+#' Cantidades test data
+#'
+#' Randomly generated spanish spelled monetary integers to test to_number function
+#'
+#' @docType data
+#'
+#' @usage data(cantidades)
+#'
+#' @format A data frame.
+#'
+#' @keywords datasets
+#'
+#'
+#' @examples
+#' to_number("mil trescientos noventa y dos")
+#'
+#'
+#' ## testing provided dataframe: cantidades
+#'
+#' cantidades$var3 <- lapply(cantidades$var2, to_number)
+#'
+"cantidades"
diff --git a/R/geocode_cadastral.R b/R/geocode_cadastral.R
@@ -0,0 +1,90 @@
+#' geocode by longitude and latitude from cadastral references.
+#'
+#' Get longitude/latitude from valid cadastral ref. or kml files from catastro.
+#'
+#' @keywords geocoding, latitude, longitude, cadastre, cadastral reference.
+#' @param x A valid spanish cadastral reference.
+#' @param parse_files bool. Default to FALSE. Set TRUE if source are KML files.
+#' @return A string for longitude/latitude if found. NA if not found.
+#' @section Warning: You may be banned if many requests in short time are made.
+#' @export
+#' @examples
+#' ## source is cadastral reference number ##
+#' \dontrun{
+#' # geocode_cadastral("0636105UF3403N", parse_files = FALSE)
+#'
+#' ##"36.5209422288168,-4.89298751473745"
+#'
+#' ## Use lapply to geocode cadastral references from dataframe columns.
+#'
+#' cadastral_references$new <- lapply(cadastral_references$cadref1, geocode_cadastral)
+#'
+#' ## separate previously generated "new" data into columns usign tidyr
+#' 
+#' # library(tidyr)
+#' # separate(cadastral_references, new, into = c('longitude','latitude'), sep = "," )
+#'
+#' ## source is folder. A loop is needed to process each kml file ##
+#'
+#' # files <- list.files("folder", full.names = T)
+#'
+#' # for (f in files) {
+#' #  coords <- geocode_cadastral(f, parse_files = TRUE)
+#' # d <- as.data.frame(rbind(d , as.data.frame(coords, stringsAsFactors = F )))
+#' # }
+#'
+#'# separate lat/lon into columns if you prefer using tidyr
+#' # d <- tidyr::separate(coords, into = c("longitude","latitude"), sep = "," )
+#'}
+
+utils::globalVariables(".")
+
+geocode_cadastral <- function(x, parse_files) {
+
+  if (missing(parse_files)) {
+    parse_files <- FALSE
+  }
+
+  if (!requireNamespace("magrittr", quietly = TRUE)) {
+    stop("magrittr needed for this function to work. Please install it.",
+         call. = FALSE)
+  }
+
+  if (!requireNamespace("xml2", quietly = TRUE)) {
+    stop("xml2 needed for this function to work. Please install it.",
+         call. = FALSE)
+  }
+
+
+  if (parse_files) {
+
+    con <- file(x, "rb")
+
+  } else {
+
+    con <-
+      paste0(
+        "http://ovc.catastro.meh.es/Cartografia/WMS/BuscarParcelaGoogle.aspx?RefCat=",
+        x
+      )
+    Sys.sleep(2)
+
+  }
+
+  try(
+    coords <- xml2::read_xml(con) %>%
+    sub("kml xmlns", "kml xmlns:X", .) %>%
+    xml2::as_xml_document() %>%
+    xml2::xml_find_all("//Point/coordinates") %>%
+    xml2::xml_text() %>%
+    gsub('.{2}$', '', .),
+    silent = TRUE)
+
+  if (length(coords) == 0) coords <- NA
+
+  if (parse_files) close(con)
+
+  return(coords)
+
+}
+
diff --git a/R/spanish.R b/R/spanish.R
@@ -0,0 +1,23 @@
+#' spanish: A package for spanish related data functions.
+#'
+#' The spanish package provides two functions:
+#' to_number() and geocode_cadastral()
+#'
+#' @section to_number():
+#' Translate spanish spelled quantities into their integer counterparts.
+#' Allows you to translate to integer numerical words spelled in spanish.
+#' Text must be previously cleaned & removed extraneous words or symbols.
+#' Quantities MUST be written in a correct Spanish (this is not a grammar tool)
+#' The upper limit is up to the millions range. Cents must be removed.
+#' (in my TODO list to parse cents part)
+#'
+#' @section geocode_cadastral():
+#' Geocode by longitude and latitude from cadastral references.
+#' Get longitude/latitude from valid cadastral ref. or kml files from catastro.
+#'
+#' @section Warning: You may be banned if many requests in short time are made
+#' to catastro. Please be warned.
+#'
+#' @docType package
+#' @name spanish
+NULL
diff --git a/R/to_number.R b/R/to_number.R
@@ -0,0 +1,80 @@
+#' translate spanish spelled quantities into their integer counterparts.
+#'
+#' Allows you to translate to integer numerical words spelled in spanish.
+#' Text must be previously cleaned & removed extraneous words or symbols
+#' Quantities MUST be written in a correct Spanish (this is not a grammar tool)
+#' The upper limit is up to the millions range. Cents must be removed.
+#'
+#' @keywords money, currency, euros
+#' @export
+#' @param x A spanish spelled number.
+#' @examples
+#' to_number("mil trescientos noventa y dos")
+#'
+#'
+#' ## Example dataframe is provided: cantidades
+#'
+#' cantidades$var3 <- lapply(cantidades$var2, to_number)
+#'
+
+utils::globalVariables(".")
+
+to_number <- function(x) {
+
+  if (!requireNamespace("magrittr", quietly = TRUE)) {
+    stop("magrittr needed for this function to work. Please install it.",
+         call. = FALSE)
+  }
+
+    x <- gsub("^mil", "1000)+", x , ignore.case = T) %>%
+    gsub("once", "+11", . , ignore.case = T) %>%
+    gsub("doce", "+12", . , ignore.case = T ) %>%
+    gsub("trece", "+13", . , ignore.case = T) %>%
+    gsub("catorce", "+14", . , ignore.case = T) %>%
+    gsub("quince", "+15", . , ignore.case = T) %>%
+    gsub("dieciseis", "+16", . , ignore.case = T) %>%
+    gsub("diecisiete|diez y siete", "+17", . , ignore.case = T) %>%
+    gsub("dieciocho", "+18", . , ignore.case = T) %>%
+    gsub("diecinueve", "+19", . , ignore.case = T) %>%
+    gsub("veinte|veinti", "+20", . , ignore.case = T) %>%
+    gsub("treinta", "+30", . , ignore.case = T) %>%
+    gsub("cuarenta", "+40", . , ignore.case = T) %>%
+    gsub("cincuenta", "+50", . , ignore.case = T) %>%
+    gsub("sesenta", "+60", . , ignore.case = T) %>%
+    gsub("setenta", "+70", . , ignore.case = T) %>%
+    gsub("ochenta", "+80", . , ignore.case = T) %>%
+    gsub("noventa", "+90", . , ignore.case = T) %>%
+    gsub("doscientos", "+200", . , ignore.case = T) %>%
+    gsub("trescientos", "+300", . , ignore.case = T) %>%
+    gsub("cuatrocientos", "+400", . , ignore.case = T) %>%
+    gsub("quinientos", "+500", . , ignore.case = T) %>%
+    gsub("seiscientos", "+600", . , ignore.case = T) %>%
+    gsub("setecientos", "+700", . , ignore.case = T) %>%
+    gsub("ochocientos", "+800", . , ignore.case = T) %>%
+    gsub("novecientos", "+900", . , ignore.case = T) %>%
+    gsub("uno", "+1", . , ignore.case = T) %>%
+    gsub("dos", "+2", . , ignore.case = T) %>%
+    gsub("tres", "+3", . , ignore.case = T) %>%
+    gsub("cuatro", "+4", . , ignore.case = T) %>%
+    gsub("cinco", "+5", . , ignore.case = T) %>%
+    gsub("seis", "+6", . , ignore.case = T) %>%
+    gsub("siete", "+7", . , ignore.case = T) %>%
+    gsub("ocho", "+8", . , ignore.case = T) %>%
+    gsub("nueve", "+9", . , ignore.case = T) %>%
+    gsub("millones", ")*(1000000)+(0", . , ignore.case = T) %>%
+    gsub("millon", ")*(1000000)+(0", . , ignore.case = T) %>%
+    gsub("mil", ")*(1000)+(0", . , ignore.case = T) %>%
+    gsub("ciento", "+100", . , ignore.case = T) %>%
+    gsub("cien", "+100", . , ignore.case = T) %>%
+    gsub("diez", "+10", . , ignore.case = T) %>%
+    gsub("un", "+1", . , ignore.case = T) %>%
+    gsub("Y", "", . , ignore.case = T) %>%
+    gsub(" ", "", . , ignore.case = T) %>%
+    gsub("^", "(0", . , ignore.case = T) %>%
+    gsub("$", ")", . , ignore.case = T) %>%
+    gsub("\\(0\\(", "", . , ignore.case = T ) %>%
+    gsub("\\+\\+", "\\+\\(", . , ignore.case = T ) %>%
+    gsub("\\)\\+\\)", "\\)", . , ignore.case = T )
+
+  return(as.integer(eval(parse(text = x))))
+}
diff --git a/R/zzz.R b/R/zzz.R
@@ -0,0 +1,20 @@
+.onLoad <- function(libname, pkgname) {
+  if (!interactive() || stats::runif(1) > 0.1) return()
+  tips <- c(
+    "https://github.com/verajosemanuel/spanish",
+    "https://github.com/verajosemanuel/spanish"
+  )
+  tip <- sample(tips, 1)
+  packageStartupMessage(paste(strwrap(tip), collapse = "\n"))
+}
+
+.onAttach <- function(libname, pkgname) {
+  if (!interactive() || stats::runif(1) > 0.1) return()
+  tips <- c(
+    "https://github.com/verajosemanuel/spanish",
+    "https://github.com/verajosemanuel/spanish"
+  )
+
+  tip <- sample(tips, 1)
+  packageStartupMessage(paste(strwrap(tip), collapse = "\n"))
+}