Skip to content

Commit

Permalink
all names changed from ESmisc package
Browse files Browse the repository at this point in the history
  • Loading branch information
verajosemanuel committed Dec 4, 2017
0 parents commit 02f28a0
Show file tree
Hide file tree
Showing 20 changed files with 663 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r

language: R
sudo: false
cache: packages
33 changes: 33 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
Package: spanish
Type: Package
Title: Misc Functions For Spanish Data
Version: 0.2.0
Date: 2017-07-01
Author: person( "Jose Manuel","Vera Oteo", email = "[email protected]",
role = c("aut","cre")
URL: https://github.com/verajosemanuel
BugReports: https://github.com/verajosemanuel/spanish/issues
Maintainer: Jose M. Vera <[email protected]>
Depends: magrittr, xml2
Description: Character vector to numerical translation in Euros from spanish
spelled monetary quantities. Text must be previously cleaned & removed
extraneous words, symbols or cents. Quantities MUST be written in a correct
Spanish cause this isn't a grammar tool. Upper limit is up to the millions
range.
Geocoding from cadastral reference number. Source data must be a valid
cadastral reference or downloaded kml files from catastro website.
Be careful geocoding from catastro. You will be banned if many request
are issued in a short period of time. geocode_cadastral() waits 2 seconds
between requests.
License: GPL-3
Encoding: UTF-8
LazyData: true
Collate:
'geocode_cadastral.R'
'to_number.R'
'cadastral_references-data.R'
'cantidades-data.R'
'spanish.R'
'zzz.R'
RoxygenNote: 6.0.1.9000
Suggests: testthat, tidyr
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Generated by roxygen2: do not edit by hand

import(magrittr)
import(xml2)
export(geocode_cadastral)
export(to_number)
45 changes: 45 additions & 0 deletions R/cadastral_references-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#' Cadastral references test data
#'
#' Randomly selected data from catastro to test geocode_cadastral function
#'
#' @docType data
#'
#' @usage data(cadastral_references)
#'
#' @format A data frame.
#'
#' @keywords datasets
#'
#' @references Catastro. Ministerio de Hacienda y función pública.
#' (\href{http://www.catastro.meh.es/}{Catastro})
#'
#' @source \href{http://www.catastro.meh.es/}{Sede Electrónica del Catastro}
#'
#' @examples
#' ## source is cadastral reference number ##
#'
#' geocode_cadastral("0636105UF3403N", parse_files = FALSE)
#'
#' ## Use lapply to geocode cadastral references from dataframe columns.
#'
#' cadastral_references$new <- lapply(cadastral_references$cadref1, geocode_cadastral)
#'
#' ## separate previously generated "new" data into columns usign tidyr
#'
#' library(tidyr)
#' separate(cadastral_references, new, into = c('longitude','latitude'), sep = "," )
#'
#' ## source is folder. A loop is needed to process each kml file ##
#'
#' \dontrun{
#' files <- list.files("folder", full.names = T)
#'
#' for (f in files) {
#' coords <- geocode_cadastral(f, parse_files = TRUE)
#' d <- as.data.frame(rbind(d , as.data.frame(coords, stringsAsFactors = F )))
#' }
#'
#'# separate lat/lon into columns if you prefer using tidyr
#' d <- tidyr::separate(coords, into = c("longitude","latitude"), sep = "," )
#'}
"cadastral_references"
22 changes: 22 additions & 0 deletions R/cantidades-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Cantidades test data
#'
#' Randomly generated spanish spelled monetary integers to test to_number function
#'
#' @docType data
#'
#' @usage data(cantidades)
#'
#' @format A data frame.
#'
#' @keywords datasets
#'
#'
#' @examples
#' to_number("mil trescientos noventa y dos")
#'
#'
#' ## testing provided dataframe: cantidades
#'
#' cantidades$var3 <- lapply(cantidades$var2, to_number)
#'
"cantidades"
90 changes: 90 additions & 0 deletions R/geocode_cadastral.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#' geocode by longitude and latitude from cadastral references.
#'
#' Get longitude/latitude from valid cadastral ref. or kml files from catastro.
#'
#' @keywords geocoding, latitude, longitude, cadastre, cadastral reference.
#' @param x A valid spanish cadastral reference.
#' @param parse_files bool. Default to FALSE. Set TRUE if source are KML files.
#' @return A string for longitude/latitude if found. NA if not found.
#' @section Warning: You may be banned if many requests in short time are made.
#' @export
#' @examples
#' ## source is cadastral reference number ##
#' \dontrun{
#' # geocode_cadastral("0636105UF3403N", parse_files = FALSE)
#'
#' ##"36.5209422288168,-4.89298751473745"
#'
#' ## Use lapply to geocode cadastral references from dataframe columns.
#'
#' cadastral_references$new <- lapply(cadastral_references$cadref1, geocode_cadastral)
#'
#' ## separate previously generated "new" data into columns usign tidyr
#'
#' # library(tidyr)
#' # separate(cadastral_references, new, into = c('longitude','latitude'), sep = "," )
#'
#' ## source is folder. A loop is needed to process each kml file ##
#'
#' # files <- list.files("folder", full.names = T)
#'
#' # for (f in files) {
#' # coords <- geocode_cadastral(f, parse_files = TRUE)
#' # d <- as.data.frame(rbind(d , as.data.frame(coords, stringsAsFactors = F )))
#' # }
#'
#'# separate lat/lon into columns if you prefer using tidyr
#' # d <- tidyr::separate(coords, into = c("longitude","latitude"), sep = "," )
#'}

utils::globalVariables(".")

geocode_cadastral <- function(x, parse_files) {

if (missing(parse_files)) {
parse_files <- FALSE
}

if (!requireNamespace("magrittr", quietly = TRUE)) {
stop("magrittr needed for this function to work. Please install it.",
call. = FALSE)
}

if (!requireNamespace("xml2", quietly = TRUE)) {
stop("xml2 needed for this function to work. Please install it.",
call. = FALSE)
}


if (parse_files) {

con <- file(x, "rb")

} else {

con <-
paste0(
"http://ovc.catastro.meh.es/Cartografia/WMS/BuscarParcelaGoogle.aspx?RefCat=",
x
)
Sys.sleep(2)

}

try(
coords <- xml2::read_xml(con) %>%
sub("kml xmlns", "kml xmlns:X", .) %>%
xml2::as_xml_document() %>%
xml2::xml_find_all("//Point/coordinates") %>%
xml2::xml_text() %>%
gsub('.{2}$', '', .),
silent = TRUE)

if (length(coords) == 0) coords <- NA

if (parse_files) close(con)

return(coords)

}

23 changes: 23 additions & 0 deletions R/spanish.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' spanish: A package for spanish related data functions.
#'
#' The spanish package provides two functions:
#' to_number() and geocode_cadastral()
#'
#' @section to_number():
#' Translate spanish spelled quantities into their integer counterparts.
#' Allows you to translate to integer numerical words spelled in spanish.
#' Text must be previously cleaned & removed extraneous words or symbols.
#' Quantities MUST be written in a correct Spanish (this is not a grammar tool)
#' The upper limit is up to the millions range. Cents must be removed.
#' (in my TODO list to parse cents part)
#'
#' @section geocode_cadastral():
#' Geocode by longitude and latitude from cadastral references.
#' Get longitude/latitude from valid cadastral ref. or kml files from catastro.
#'
#' @section Warning: You may be banned if many requests in short time are made
#' to catastro. Please be warned.
#'
#' @docType package
#' @name spanish
NULL
80 changes: 80 additions & 0 deletions R/to_number.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' translate spanish spelled quantities into their integer counterparts.
#'
#' Allows you to translate to integer numerical words spelled in spanish.
#' Text must be previously cleaned & removed extraneous words or symbols
#' Quantities MUST be written in a correct Spanish (this is not a grammar tool)
#' The upper limit is up to the millions range. Cents must be removed.
#'
#' @keywords money, currency, euros
#' @export
#' @param x A spanish spelled number.
#' @examples
#' to_number("mil trescientos noventa y dos")
#'
#'
#' ## Example dataframe is provided: cantidades
#'
#' cantidades$var3 <- lapply(cantidades$var2, to_number)
#'

utils::globalVariables(".")

to_number <- function(x) {

if (!requireNamespace("magrittr", quietly = TRUE)) {
stop("magrittr needed for this function to work. Please install it.",
call. = FALSE)
}

x <- gsub("^mil", "1000)+", x , ignore.case = T) %>%
gsub("once", "+11", . , ignore.case = T) %>%
gsub("doce", "+12", . , ignore.case = T ) %>%
gsub("trece", "+13", . , ignore.case = T) %>%
gsub("catorce", "+14", . , ignore.case = T) %>%
gsub("quince", "+15", . , ignore.case = T) %>%
gsub("dieciseis", "+16", . , ignore.case = T) %>%
gsub("diecisiete|diez y siete", "+17", . , ignore.case = T) %>%
gsub("dieciocho", "+18", . , ignore.case = T) %>%
gsub("diecinueve", "+19", . , ignore.case = T) %>%
gsub("veinte|veinti", "+20", . , ignore.case = T) %>%
gsub("treinta", "+30", . , ignore.case = T) %>%
gsub("cuarenta", "+40", . , ignore.case = T) %>%
gsub("cincuenta", "+50", . , ignore.case = T) %>%
gsub("sesenta", "+60", . , ignore.case = T) %>%
gsub("setenta", "+70", . , ignore.case = T) %>%
gsub("ochenta", "+80", . , ignore.case = T) %>%
gsub("noventa", "+90", . , ignore.case = T) %>%
gsub("doscientos", "+200", . , ignore.case = T) %>%
gsub("trescientos", "+300", . , ignore.case = T) %>%
gsub("cuatrocientos", "+400", . , ignore.case = T) %>%
gsub("quinientos", "+500", . , ignore.case = T) %>%
gsub("seiscientos", "+600", . , ignore.case = T) %>%
gsub("setecientos", "+700", . , ignore.case = T) %>%
gsub("ochocientos", "+800", . , ignore.case = T) %>%
gsub("novecientos", "+900", . , ignore.case = T) %>%
gsub("uno", "+1", . , ignore.case = T) %>%
gsub("dos", "+2", . , ignore.case = T) %>%
gsub("tres", "+3", . , ignore.case = T) %>%
gsub("cuatro", "+4", . , ignore.case = T) %>%
gsub("cinco", "+5", . , ignore.case = T) %>%
gsub("seis", "+6", . , ignore.case = T) %>%
gsub("siete", "+7", . , ignore.case = T) %>%
gsub("ocho", "+8", . , ignore.case = T) %>%
gsub("nueve", "+9", . , ignore.case = T) %>%
gsub("millones", ")*(1000000)+(0", . , ignore.case = T) %>%
gsub("millon", ")*(1000000)+(0", . , ignore.case = T) %>%
gsub("mil", ")*(1000)+(0", . , ignore.case = T) %>%
gsub("ciento", "+100", . , ignore.case = T) %>%
gsub("cien", "+100", . , ignore.case = T) %>%
gsub("diez", "+10", . , ignore.case = T) %>%
gsub("un", "+1", . , ignore.case = T) %>%
gsub("Y", "", . , ignore.case = T) %>%
gsub(" ", "", . , ignore.case = T) %>%
gsub("^", "(0", . , ignore.case = T) %>%
gsub("$", ")", . , ignore.case = T) %>%
gsub("\\(0\\(", "", . , ignore.case = T ) %>%
gsub("\\+\\+", "\\+\\(", . , ignore.case = T ) %>%
gsub("\\)\\+\\)", "\\)", . , ignore.case = T )

return(as.integer(eval(parse(text = x))))
}
20 changes: 20 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.onLoad <- function(libname, pkgname) {
if (!interactive() || stats::runif(1) > 0.1) return()
tips <- c(
"https://github.com/verajosemanuel/spanish",
"https://github.com/verajosemanuel/spanish"
)
tip <- sample(tips, 1)
packageStartupMessage(paste(strwrap(tip), collapse = "\n"))
}

.onAttach <- function(libname, pkgname) {
if (!interactive() || stats::runif(1) > 0.1) return()
tips <- c(
"https://github.com/verajosemanuel/spanish",
"https://github.com/verajosemanuel/spanish"
)

tip <- sample(tips, 1)
packageStartupMessage(paste(strwrap(tip), collapse = "\n"))
}
Loading

0 comments on commit 02f28a0

Please sign in to comment.