-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
all names changed from ESmisc package
- Loading branch information
0 parents
commit 02f28a0
Showing
20 changed files
with
663 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r | ||
|
||
language: R | ||
sudo: false | ||
cache: packages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
Package: spanish | ||
Type: Package | ||
Title: Misc Functions For Spanish Data | ||
Version: 0.2.0 | ||
Date: 2017-07-01 | ||
Author: person( "Jose Manuel","Vera Oteo", email = "[email protected]", | ||
role = c("aut","cre") | ||
URL: https://github.com/verajosemanuel | ||
BugReports: https://github.com/verajosemanuel/spanish/issues | ||
Maintainer: Jose M. Vera <[email protected]> | ||
Depends: magrittr, xml2 | ||
Description: Character vector to numerical translation in Euros from spanish | ||
spelled monetary quantities. Text must be previously cleaned & removed | ||
extraneous words, symbols or cents. Quantities MUST be written in a correct | ||
Spanish cause this isn't a grammar tool. Upper limit is up to the millions | ||
range. | ||
Geocoding from cadastral reference number. Source data must be a valid | ||
cadastral reference or downloaded kml files from catastro website. | ||
Be careful geocoding from catastro. You will be banned if many request | ||
are issued in a short period of time. geocode_cadastral() waits 2 seconds | ||
between requests. | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
Collate: | ||
'geocode_cadastral.R' | ||
'to_number.R' | ||
'cadastral_references-data.R' | ||
'cantidades-data.R' | ||
'spanish.R' | ||
'zzz.R' | ||
RoxygenNote: 6.0.1.9000 | ||
Suggests: testthat, tidyr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
import(magrittr) | ||
import(xml2) | ||
export(geocode_cadastral) | ||
export(to_number) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#' Cadastral references test data | ||
#' | ||
#' Randomly selected data from catastro to test geocode_cadastral function | ||
#' | ||
#' @docType data | ||
#' | ||
#' @usage data(cadastral_references) | ||
#' | ||
#' @format A data frame. | ||
#' | ||
#' @keywords datasets | ||
#' | ||
#' @references Catastro. Ministerio de Hacienda y función pública. | ||
#' (\href{http://www.catastro.meh.es/}{Catastro}) | ||
#' | ||
#' @source \href{http://www.catastro.meh.es/}{Sede Electrónica del Catastro} | ||
#' | ||
#' @examples | ||
#' ## source is cadastral reference number ## | ||
#' | ||
#' geocode_cadastral("0636105UF3403N", parse_files = FALSE) | ||
#' | ||
#' ## Use lapply to geocode cadastral references from dataframe columns. | ||
#' | ||
#' cadastral_references$new <- lapply(cadastral_references$cadref1, geocode_cadastral) | ||
#' | ||
#' ## separate previously generated "new" data into columns usign tidyr | ||
#' | ||
#' library(tidyr) | ||
#' separate(cadastral_references, new, into = c('longitude','latitude'), sep = "," ) | ||
#' | ||
#' ## source is folder. A loop is needed to process each kml file ## | ||
#' | ||
#' \dontrun{ | ||
#' files <- list.files("folder", full.names = T) | ||
#' | ||
#' for (f in files) { | ||
#' coords <- geocode_cadastral(f, parse_files = TRUE) | ||
#' d <- as.data.frame(rbind(d , as.data.frame(coords, stringsAsFactors = F ))) | ||
#' } | ||
#' | ||
#'# separate lat/lon into columns if you prefer using tidyr | ||
#' d <- tidyr::separate(coords, into = c("longitude","latitude"), sep = "," ) | ||
#'} | ||
"cadastral_references" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#' Cantidades test data | ||
#' | ||
#' Randomly generated spanish spelled monetary integers to test to_number function | ||
#' | ||
#' @docType data | ||
#' | ||
#' @usage data(cantidades) | ||
#' | ||
#' @format A data frame. | ||
#' | ||
#' @keywords datasets | ||
#' | ||
#' | ||
#' @examples | ||
#' to_number("mil trescientos noventa y dos") | ||
#' | ||
#' | ||
#' ## testing provided dataframe: cantidades | ||
#' | ||
#' cantidades$var3 <- lapply(cantidades$var2, to_number) | ||
#' | ||
"cantidades" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
#' geocode by longitude and latitude from cadastral references. | ||
#' | ||
#' Get longitude/latitude from valid cadastral ref. or kml files from catastro. | ||
#' | ||
#' @keywords geocoding, latitude, longitude, cadastre, cadastral reference. | ||
#' @param x A valid spanish cadastral reference. | ||
#' @param parse_files bool. Default to FALSE. Set TRUE if source are KML files. | ||
#' @return A string for longitude/latitude if found. NA if not found. | ||
#' @section Warning: You may be banned if many requests in short time are made. | ||
#' @export | ||
#' @examples | ||
#' ## source is cadastral reference number ## | ||
#' \dontrun{ | ||
#' # geocode_cadastral("0636105UF3403N", parse_files = FALSE) | ||
#' | ||
#' ##"36.5209422288168,-4.89298751473745" | ||
#' | ||
#' ## Use lapply to geocode cadastral references from dataframe columns. | ||
#' | ||
#' cadastral_references$new <- lapply(cadastral_references$cadref1, geocode_cadastral) | ||
#' | ||
#' ## separate previously generated "new" data into columns usign tidyr | ||
#' | ||
#' # library(tidyr) | ||
#' # separate(cadastral_references, new, into = c('longitude','latitude'), sep = "," ) | ||
#' | ||
#' ## source is folder. A loop is needed to process each kml file ## | ||
#' | ||
#' # files <- list.files("folder", full.names = T) | ||
#' | ||
#' # for (f in files) { | ||
#' # coords <- geocode_cadastral(f, parse_files = TRUE) | ||
#' # d <- as.data.frame(rbind(d , as.data.frame(coords, stringsAsFactors = F ))) | ||
#' # } | ||
#' | ||
#'# separate lat/lon into columns if you prefer using tidyr | ||
#' # d <- tidyr::separate(coords, into = c("longitude","latitude"), sep = "," ) | ||
#'} | ||
|
||
utils::globalVariables(".") | ||
|
||
geocode_cadastral <- function(x, parse_files) { | ||
|
||
if (missing(parse_files)) { | ||
parse_files <- FALSE | ||
} | ||
|
||
if (!requireNamespace("magrittr", quietly = TRUE)) { | ||
stop("magrittr needed for this function to work. Please install it.", | ||
call. = FALSE) | ||
} | ||
|
||
if (!requireNamespace("xml2", quietly = TRUE)) { | ||
stop("xml2 needed for this function to work. Please install it.", | ||
call. = FALSE) | ||
} | ||
|
||
|
||
if (parse_files) { | ||
|
||
con <- file(x, "rb") | ||
|
||
} else { | ||
|
||
con <- | ||
paste0( | ||
"http://ovc.catastro.meh.es/Cartografia/WMS/BuscarParcelaGoogle.aspx?RefCat=", | ||
x | ||
) | ||
Sys.sleep(2) | ||
|
||
} | ||
|
||
try( | ||
coords <- xml2::read_xml(con) %>% | ||
sub("kml xmlns", "kml xmlns:X", .) %>% | ||
xml2::as_xml_document() %>% | ||
xml2::xml_find_all("//Point/coordinates") %>% | ||
xml2::xml_text() %>% | ||
gsub('.{2}$', '', .), | ||
silent = TRUE) | ||
|
||
if (length(coords) == 0) coords <- NA | ||
|
||
if (parse_files) close(con) | ||
|
||
return(coords) | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#' spanish: A package for spanish related data functions. | ||
#' | ||
#' The spanish package provides two functions: | ||
#' to_number() and geocode_cadastral() | ||
#' | ||
#' @section to_number(): | ||
#' Translate spanish spelled quantities into their integer counterparts. | ||
#' Allows you to translate to integer numerical words spelled in spanish. | ||
#' Text must be previously cleaned & removed extraneous words or symbols. | ||
#' Quantities MUST be written in a correct Spanish (this is not a grammar tool) | ||
#' The upper limit is up to the millions range. Cents must be removed. | ||
#' (in my TODO list to parse cents part) | ||
#' | ||
#' @section geocode_cadastral(): | ||
#' Geocode by longitude and latitude from cadastral references. | ||
#' Get longitude/latitude from valid cadastral ref. or kml files from catastro. | ||
#' | ||
#' @section Warning: You may be banned if many requests in short time are made | ||
#' to catastro. Please be warned. | ||
#' | ||
#' @docType package | ||
#' @name spanish | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#' translate spanish spelled quantities into their integer counterparts. | ||
#' | ||
#' Allows you to translate to integer numerical words spelled in spanish. | ||
#' Text must be previously cleaned & removed extraneous words or symbols | ||
#' Quantities MUST be written in a correct Spanish (this is not a grammar tool) | ||
#' The upper limit is up to the millions range. Cents must be removed. | ||
#' | ||
#' @keywords money, currency, euros | ||
#' @export | ||
#' @param x A spanish spelled number. | ||
#' @examples | ||
#' to_number("mil trescientos noventa y dos") | ||
#' | ||
#' | ||
#' ## Example dataframe is provided: cantidades | ||
#' | ||
#' cantidades$var3 <- lapply(cantidades$var2, to_number) | ||
#' | ||
|
||
utils::globalVariables(".") | ||
|
||
to_number <- function(x) { | ||
|
||
if (!requireNamespace("magrittr", quietly = TRUE)) { | ||
stop("magrittr needed for this function to work. Please install it.", | ||
call. = FALSE) | ||
} | ||
|
||
x <- gsub("^mil", "1000)+", x , ignore.case = T) %>% | ||
gsub("once", "+11", . , ignore.case = T) %>% | ||
gsub("doce", "+12", . , ignore.case = T ) %>% | ||
gsub("trece", "+13", . , ignore.case = T) %>% | ||
gsub("catorce", "+14", . , ignore.case = T) %>% | ||
gsub("quince", "+15", . , ignore.case = T) %>% | ||
gsub("dieciseis", "+16", . , ignore.case = T) %>% | ||
gsub("diecisiete|diez y siete", "+17", . , ignore.case = T) %>% | ||
gsub("dieciocho", "+18", . , ignore.case = T) %>% | ||
gsub("diecinueve", "+19", . , ignore.case = T) %>% | ||
gsub("veinte|veinti", "+20", . , ignore.case = T) %>% | ||
gsub("treinta", "+30", . , ignore.case = T) %>% | ||
gsub("cuarenta", "+40", . , ignore.case = T) %>% | ||
gsub("cincuenta", "+50", . , ignore.case = T) %>% | ||
gsub("sesenta", "+60", . , ignore.case = T) %>% | ||
gsub("setenta", "+70", . , ignore.case = T) %>% | ||
gsub("ochenta", "+80", . , ignore.case = T) %>% | ||
gsub("noventa", "+90", . , ignore.case = T) %>% | ||
gsub("doscientos", "+200", . , ignore.case = T) %>% | ||
gsub("trescientos", "+300", . , ignore.case = T) %>% | ||
gsub("cuatrocientos", "+400", . , ignore.case = T) %>% | ||
gsub("quinientos", "+500", . , ignore.case = T) %>% | ||
gsub("seiscientos", "+600", . , ignore.case = T) %>% | ||
gsub("setecientos", "+700", . , ignore.case = T) %>% | ||
gsub("ochocientos", "+800", . , ignore.case = T) %>% | ||
gsub("novecientos", "+900", . , ignore.case = T) %>% | ||
gsub("uno", "+1", . , ignore.case = T) %>% | ||
gsub("dos", "+2", . , ignore.case = T) %>% | ||
gsub("tres", "+3", . , ignore.case = T) %>% | ||
gsub("cuatro", "+4", . , ignore.case = T) %>% | ||
gsub("cinco", "+5", . , ignore.case = T) %>% | ||
gsub("seis", "+6", . , ignore.case = T) %>% | ||
gsub("siete", "+7", . , ignore.case = T) %>% | ||
gsub("ocho", "+8", . , ignore.case = T) %>% | ||
gsub("nueve", "+9", . , ignore.case = T) %>% | ||
gsub("millones", ")*(1000000)+(0", . , ignore.case = T) %>% | ||
gsub("millon", ")*(1000000)+(0", . , ignore.case = T) %>% | ||
gsub("mil", ")*(1000)+(0", . , ignore.case = T) %>% | ||
gsub("ciento", "+100", . , ignore.case = T) %>% | ||
gsub("cien", "+100", . , ignore.case = T) %>% | ||
gsub("diez", "+10", . , ignore.case = T) %>% | ||
gsub("un", "+1", . , ignore.case = T) %>% | ||
gsub("Y", "", . , ignore.case = T) %>% | ||
gsub(" ", "", . , ignore.case = T) %>% | ||
gsub("^", "(0", . , ignore.case = T) %>% | ||
gsub("$", ")", . , ignore.case = T) %>% | ||
gsub("\\(0\\(", "", . , ignore.case = T ) %>% | ||
gsub("\\+\\+", "\\+\\(", . , ignore.case = T ) %>% | ||
gsub("\\)\\+\\)", "\\)", . , ignore.case = T ) | ||
|
||
return(as.integer(eval(parse(text = x)))) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
.onLoad <- function(libname, pkgname) { | ||
if (!interactive() || stats::runif(1) > 0.1) return() | ||
tips <- c( | ||
"https://github.com/verajosemanuel/spanish", | ||
"https://github.com/verajosemanuel/spanish" | ||
) | ||
tip <- sample(tips, 1) | ||
packageStartupMessage(paste(strwrap(tip), collapse = "\n")) | ||
} | ||
|
||
.onAttach <- function(libname, pkgname) { | ||
if (!interactive() || stats::runif(1) > 0.1) return() | ||
tips <- c( | ||
"https://github.com/verajosemanuel/spanish", | ||
"https://github.com/verajosemanuel/spanish" | ||
) | ||
|
||
tip <- sample(tips, 1) | ||
packageStartupMessage(paste(strwrap(tip), collapse = "\n")) | ||
} |
Oops, something went wrong.