rOpenSpain · eugenividal · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 28, 2025
diff --git a/R/duckdb-helpers.R b/R/duckdb-helpers.R
@@ -21,7 +21,7 @@
 #'   \item{activity_destination}{\code{factor}. The type of activity at the destination location (e.g., 'home', 'other'). \strong{Note:} Only available for district level data.}
 #'   \item{residence_province_ine_code}{\code{factor}. The province of residence for the group of individual making the trip, encoded according to the INE classification. \strong{Note:} Only available for district level data.}
 #'   \item{residence_province_name}{\code{factor}. The province of residence for the group of individuals making the trip (e.g., 'Cuenca', 'Girona'). \strong{Note:} Only available for district level data.}
-#'   \item{time_slot}{\code{integer}. The time slot (the hour of the day) during which the trip started, represented as an integer (e.g., 0, 1, 2).}
+#'   \item{hour}{\code{integer}. The time slot (the hour of the day) during which the trip started, represented as an integer (e.g., 0, 1, 2).}
 #'   \item{distance}{\code{factor}. The distance category of the trip, represented as a code (e.g., '002-005' for 2-5 km).}
 #'   \item{n_trips}{\code{double}. The number of trips taken within the specified time slot and distance.}
 #'   \item{trips_total_length_km}{\code{double}. The total length of all trips in kilometers for the specified time slot and distance.}

diff --git a/README.md b/README.md
@@ -32,17 +32,21 @@ downloading and formatting Spanish open mobility data released by the
 Ministry of Transport and Sustainable Mobility of Spain (Ministerio de
 Transportes y Movilidad Sostenible (MITMS) 2024).
 
-It supports the two versions of the Spanish mobility data that consists
-of origin-destination matrices and some additional data sets. [The first
-version](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/estudios-de-movilidad-anteriores/covid-19/opendata-movilidad)
-covers data from 2020 and 2021, including the period of the COVID-19
-pandemic. [The second
-version](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad)
-contains data from January 2022 onwards and is regularly updated. Both
-versions of the data primarily consist of mobile phone positioning data,
-and include matrices for overnight stays, individual movements, and
-trips of Spanish residents at different geographical levels. See the
-[package website](https://rOpenSpain.github.io/spanishoddata/) and
+It supports the two versions of the Spanish mobility data. [The first
+version (2020 to
+2021)](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/estudios-de-movilidad-anteriores/covid-19/opendata-movilidad),
+covering the period of the COVID-19 pandemic, contains tables detailing
+trip numbers and distances, broken down by origin, destination,
+activity, residence province, time interval, distance interval, and
+date. It also provides tables of individual counts by location and trip
+frequency. [The second version (2022
+onwards)](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad)
+improves spatial resolution, adds trips to and from Portugal and France,
+and introduces new fields for study-related activities and
+sociodemographic factors (income, age, and sex) in the
+origin-destination tables, along with additional tables showing
+individual counts by overnight stay location, residence, and date. See
+the [package website](https://rOpenSpain.github.io/spanishoddata/) and
 vignettes for
 [v1](https://rOpenSpain.github.io/spanishoddata/articles/v1-2020-2021-mitma-data-codebook)
 and
@@ -58,12 +62,13 @@ To effectively work with multiple data files, it’s recommended you set
 up a data directory where the package can search for the data and
 download only the files that are not already present.
 
-# Examples of available data
+## Examples of available data
 
 <div id="fig-barcelona-flows">
 
 ![](vignettes/media/flows_plot.svg)
 
+
 Figure 1: Example of the data available through the package: daily flows
 in Barcelona
 
@@ -78,15 +83,16 @@ To create static maps like that see our vignette
 
 ![](https://ropenspain.github.io/spanishoddata/media/spain-folding-flows.gif)
 
+
 Figure 2: Example of the data available through the package: interactive
 daily flows in Spain
 
 </div>
-
 <div id="fig-spain-flows">
 
 ![](https://ropenspain.github.io/spanishoddata/media/barcelona-time.gif)
 
+
 Figure 3: Example of the data available through the package: interactive
 daily flows in Barcelona with time filter
 
@@ -104,9 +110,7 @@ install.packages("spanishoddata")
 ```
 
 <details>
-
 <summary>
-
 Alternative installation and developemnt
 </summary>
 
@@ -167,9 +171,7 @@ The function above will also ensure that the directory is created and
 that you have sufficient permissions to write to it.
 
 <details>
-
 <summary>
-
 Setting data directory for advanced users
 </summary>
 
@@ -238,11 +240,12 @@ package.
 <img src="man/figures/package-functions-overview.svg"
 style="width:78.0%" />
 
+
 Figure 4: The overview of package functions to get the data
 
 </div>
 
-# Showcase
+## Showcase
 
 To run the code in this README we will use the following setup:
 
@@ -276,7 +279,7 @@ metadata
     # ℹ 9,432 more rows
     # ℹ 1 more variable: local_path <chr>
 
-## Zones
+### Zones
 
 Zones can be downloaded as follows:
 
@@ -290,7 +293,7 @@ plot(sf::st_geometry(distritos_wgs84))
 
 ![](man/figures/README-distritos-1.png)
 
-## OD data
+### OD data
 
 ``` r
 od_db <- spod_get(
@@ -308,7 +311,7 @@ class(od_db)
 colnames(od_db)
 ```
 
-     [1] "full_date"                   "time_slot"                  
+     [1] "full_date"                   "hour"                  
      [3] "id_origin"                   "id_destination"             
      [5] "distance"                    "activity_origin"            
      [7] "activity_destination"        "study_possible_origin"      
@@ -326,10 +329,10 @@ aggregation to find the total number trips per hour over the 7 days:
 
 ``` r
 n_per_hour <- od_db |>
-  group_by(date, time_slot) |>
+  group_by(date, hour) |>
   summarise(n = n(), Trips = sum(n_trips)) |>
   collect() |>
-  mutate(Time = lubridate::ymd_h(paste0(date, time_slot, sep = " "))) |>
+  mutate(Time = lubridate::ymd_h(paste0(date, hour, sep = " "))) |>
   mutate(Day = lubridate::wday(Time, label = TRUE))
 n_per_hour |>
   ggplot(aes(x = Time, y = Trips)) +
@@ -342,7 +345,7 @@ n_per_hour |>
 The figure above summarises 925,874,012 trips over the 7 days associated
 with 135,866,524 records.
 
-## `spanishoddata` advantage over accessing the data yourself
+### `spanishoddata` advantage over accessing the data yourself
 
 As we demonstrated above, you can perform very quick analysis using just
 a few lines of code.
@@ -373,7 +376,7 @@ We did all of that for you and present you with a few simple functions
 that get you straight to the data in one line of code, and you are ready
 to run any analysis on it.
 
-# Desire lines
+## Desire lines
 
 We’ll use the same input data to pick-out the most important flows in
 Spain, with a focus on longer trips for visualisation:
@@ -467,7 +470,7 @@ ggplot() +
 
 ![](man/figures/README-salamanca-plot-1.png)
 
-# Further information
+## Further information
 
 For more information on the package, see:
 
@@ -494,7 +497,7 @@ For more information on the package, see:
   - [Quickly getting daily aggregated 2022+ data at municipality
     level](https://ropenspain.github.io/spanishoddata/articles/quick-get.html)
 
-## Citation
+### Citation
 
 To cite the `spanishoddata` R package use:
 
@@ -508,6 +511,7 @@ To cite the official website of the mobility study use:
 Ministerio de Transportes y Movilidad Sostenible (MITMS) (2024).
 “Estudio de la movilidad con Big Data (Study of mobility with Big
 Data).”
+
 <https://www.transportes.gob.es/ministerio/proyectos-singulares/estudio-de-movilidad-con-big-data>.
 
 To cite the methodology for 2022 and onwards data use:
@@ -561,16 +565,12 @@ BibTeX:
       url = {https://cdn.mitma.gob.es/portal-web-drupal/covid-19/bigdata/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3.pdf},
     }
 
-# References
+## References
 
 <!-- metadata for better search engine indexing -->
-
 <!-- should be picked up by pkgdown -->
-
 <!-- update metadata before release with  -->
-
 <!-- cffr::cff_write() -->
-
 <!-- codemetar::write_codemeta(write_minimeta = T) -->
 
 <div id="refs" class="references csl-bib-body hanging-indent"

diff --git a/README.qmd b/README.qmd
@@ -27,11 +27,12 @@ default-image-extension: ""
 
 **spanishoddata** is an R package that provides functions for downloading and formatting Spanish open mobility data released by the Ministry of Transport and Sustainable Mobility of Spain [@mitms_mobility_web].
 
-It supports the two versions of the Spanish mobility data that consists of origin-destination matrices and some additional data sets. [The first version](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/estudios-de-movilidad-anteriores/covid-19/opendata-movilidad) covers data from 2020 and 2021, including the period of the COVID-19 pandemic. [The second version](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad) contains data from January 2022 onwards and is regularly updated. Both versions of the data primarily consist of mobile phone positioning data, and include matrices for overnight stays, individual movements, and trips of Spanish residents at different geographical levels. See the [package website](https://rOpenSpain.github.io/spanishoddata/) and vignettes for [v1](https://rOpenSpain.github.io/spanishoddata/articles/v1-2020-2021-mitma-data-codebook) and [v2](https://rOpenSpain.github.io/spanishoddata/articles/v2-2022-onwards-mitma-data-codebook) data for more details.
+It supports the two versions of the Spanish mobility data. [The first version (2020 to 2021)](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/estudios-de-movilidad-anteriores/covid-19/opendata-movilidad), covering the period of the COVID-19 pandemic, contains tables detailing trip numbers and distances, broken down by origin, destination, activity, residence province, time interval, distance interval, and date. It also provides tables of individual counts by location and trip frequency. [The second version (2022 onwards)](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad) improves spatial resolution, adds trips to and from Portugal and France, and introduces new fields for study-related activities and sociodemographic factors (income, age, and sex) in the origin-destination tables, along with additional tables showing individual counts by overnight stay location, residence, and date.
+See the [package website](https://rOpenSpain.github.io/spanishoddata/) and vignettes for [v1](https://rOpenSpain.github.io/spanishoddata/articles/v1-2020-2021-mitma-data-codebook) and [v2](https://rOpenSpain.github.io/spanishoddata/articles/v2-2022-onwards-mitma-data-codebook) data for more details.
 
 **spanishoddata** is designed to save time by providing the data in analysis-ready formats. Automating the process of downloading, cleaning, and importing the data can also reduce the risk of errors in the laborious process of data preparation. It also reduces computational resources by using computationally efficient packages behind the scenes. To effectively work with multiple data files, it’s recommended you set up a data directory where the package can search for the data and download only the files that are not already present.
 
-# Examples of available data
+## Examples of available data
 
 ![Example of the data available through the package: daily flows in Barcelona](vignettes/media/flows_plot.svg){#fig-barcelona-flows}
 
@@ -62,7 +63,7 @@ library(spanishoddata)
 
 ![The overview of package functions to get the data](man/figures/package-functions-overview.svg){#fig-overall-flow width="78%"}
 
-# Showcase
+## Showcase
 
 To run the code in this README we will use the following setup:
 
@@ -105,7 +106,7 @@ metadata
 ```
 
 
-## Zones
+### Zones
 
 Zones can be downloaded as follows:
 
@@ -120,7 +121,7 @@ plot(sf::st_geometry(distritos_wgs84))
 
 ![](man/figures/README-distritos-1.png)
 
-## OD data
+### OD data
 
 ```{r}
 od_db <- spod_get(
@@ -141,7 +142,7 @@ colnames(od_db)
 ```
 
 ```
- [1] "full_date"                   "time_slot"                  
+ [1] "full_date"                   "hour"                  
  [3] "id_origin"                   "id_destination"             
  [5] "distance"                    "activity_origin"            
  [7] "activity_destination"        "study_possible_origin"      
@@ -159,10 +160,10 @@ Let's do an aggregation to find the total number trips per hour over the 7 days:
 ```{r}
 #| label: trips-per-hour
 n_per_hour <- od_db |>
-  group_by(date, time_slot) |>
+  group_by(date, hour) |>
   summarise(n = n(), Trips = sum(n_trips)) |>
   collect() |>
-  mutate(Time = lubridate::ymd_h(paste0(date, time_slot, sep = " "))) |>
+  mutate(Time = lubridate::ymd_h(paste0(date, hour, sep = " "))) |>
   mutate(Day = lubridate::wday(Time, label = TRUE))
 n_per_hour |>
   ggplot(aes(x = Time, y = Trips)) +
@@ -175,7 +176,7 @@ n_per_hour |>
 
 The figure above summarises 925,874,012 trips over the 7 days associated with 135,866,524 records.
 
-## `spanishoddata` advantage over accessing the data yourself
+### `spanishoddata` advantage over accessing the data yourself
 
 As we demonstrated above, you can perform very quick analysis using just a few lines of code.
 
@@ -198,7 +199,7 @@ To highlight the benefits of the package, here is how you would do this manually
 We did all of that for you and present you with a few simple functions that get you straight to the data in one line of code, and you are ready to run any analysis on it.
 
 
-# Desire lines
+## Desire lines
 
 We'll use the same input data to pick-out the most important flows in Spain, with a focus on longer trips for visualisation:
 
@@ -295,7 +296,7 @@ ggplot() +
 
 ![](man/figures/README-salamanca-plot-1.png)
 
-# Further information
+## Further information
 
 For more information on the package, see:
 
@@ -336,7 +337,7 @@ usethis::use_tidy_description()
 ```
 
 
-## Citation
+### Citation
 
 ```{r}
 #| eval: true
@@ -358,7 +359,7 @@ toBibtex(citation("spanishoddata"))
 
 
 
-# References
+## References
 
 <!-- metadata for better search engine indexing -->
 <!-- should be picked up by pkgdown -->

diff --git a/inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-en.sql b/inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-en.sql
@@ -1,5 +1,6 @@
 CREATE VIEW od_csv_clean AS SELECT
     fecha AS date,
+    periodo AS hour,
     CAST (CASE origen
         WHEN 'externo' THEN 'external'
         ELSE origen
@@ -10,6 +11,7 @@ CREATE VIEW od_csv_clean AS SELECT
         ELSE destino
         END AS ZONES_ENUM)
         AS id_destination,
+    CAST(distancia AS DISTANCE_ENUM) AS distance,
     CAST(CASE actividad_origen
         WHEN 'casa' THEN 'home'
         WHEN 'otros' THEN 'other'
@@ -75,8 +77,6 @@ CREATE VIEW od_csv_clean AS SELECT
         WHEN '51' THEN 'Ceuta'
         WHEN '52' THEN 'Melilla'
         END AS INE_PROV_NAME_ENUM) AS residence_province_name,
-    periodo AS time_slot,
-    CAST(distancia AS DISTANCE_ENUM) AS distance,
     viajes AS n_trips,
     viajes_km AS trips_total_length_km,
     CAST(year AS INTEGER) AS year,

diff --git a/inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-es.sql b/inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-es.sql
@@ -1,5 +1,6 @@
 CREATE VIEW od_csv_clean AS SELECT
     fecha AS date,
+    periodo,
     CAST(origen AS ZONES_ENUM) AS origen,
     CAST(destino AS ZONES_ENUM) AS destino,
     CAST(CASE actividad_origen
@@ -12,6 +13,7 @@ CREATE VIEW od_csv_clean AS SELECT
         WHEN 'otros' THEN 'other'
         WHEN 'trabajo_estudio' THEN 'work_or_study'
         END AS ACTIV_ENUM) AS actividad_destino,
+    CAST(distancia AS DISTANCE_ENUM) AS distancia,
     CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia,
     CAST (CASE residencia
         WHEN '01' THEN 'Araba/Álava'
@@ -67,8 +69,6 @@ CREATE VIEW od_csv_clean AS SELECT
         WHEN '51' THEN 'Ceuta'
         WHEN '52' THEN 'Melilla'
         END AS INE_PROV_NAME_ENUM) AS residencia_nombre,
-    periodo,
-    CAST(distancia AS DISTANCE_ENUM) AS distancia,
     viajes,
     viajes_km,
     CAST(year AS INTEGER) AS ano,

diff --git a/inst/extdata/sql-queries/v1-od-municipios-clean-csv-view-en.sql b/inst/extdata/sql-queries/v1-od-municipios-clean-csv-view-en.sql
@@ -84,7 +84,7 @@ SELECT
         WHEN '51' THEN 'Ceuta'
         WHEN '52' THEN 'Melilla'
         END AS INE_PROV_NAME_ENUM) AS residence_province_name,
-    d.periodo AS time_slot,
+    d.periodo AS hour,
     CAST(d.distancia AS DISTANCE_ENUM) AS distance,
     SUM(d.viajes) AS n_trips,
     SUM(d.viajes_km) AS trips_total_length_km,

diff --git a/inst/extdata/sql-queries/v2-od-distritos-clean-csv-view-en.sql b/inst/extdata/sql-queries/v2-od-distritos-clean-csv-view-en.sql
@@ -1,6 +1,6 @@
 CREATE VIEW od_csv_clean AS SELECT
     fecha AS date,
-    periodo AS time_slot,
+    periodo AS hour,
     CAST (CASE origen
         WHEN 'externo' THEN 'external'
         ELSE origen

diff --git a/inst/extdata/sql-queries/v2-od-gau-clean-csv-view-en.sql b/inst/extdata/sql-queries/v2-od-gau-clean-csv-view-en.sql
@@ -1,6 +1,6 @@
 CREATE VIEW od_csv_clean AS SELECT
     fecha AS date,
-    periodo AS time_slot,
+    periodo AS hour,
     CAST (CASE origen
         WHEN 'externo' THEN 'external'
         ELSE origen