Skip to content

Commit

Permalink
refactor lovac cleaning and filtering
Browse files Browse the repository at this point in the history
Signed-off-by: Raphaël Courivaud <[email protected]>
  • Loading branch information
rcourivaud committed Jan 22, 2025
1 parent fdaca43 commit e9b2af1
Show file tree
Hide file tree
Showing 24 changed files with 281 additions and 18 deletions.
170 changes: 170 additions & 0 deletions analytics/dagster/logs/event.log

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions analytics/dbt/macros/lovac/deduplicate_lovac.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{% macro deduplicate_lovac() %}
QUALIFY
ROW_NUMBER () OVER (PARTITION BY local_id ORDER BY debutvacance DESC) = 1
{% endmacro %}
32 changes: 28 additions & 4 deletions analytics/dbt/macros/lovac/handle_lovac_different_years.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% macro handle_lovac_different_years () %}
{% macro handle_lovac_different_years (new_version=False) %}
cleaned_data AS (
SELECT
annee as data_year,
Expand All @@ -14,13 +14,37 @@ TRY_CAST (debutvacance as INTEGER) as debutvacance,
ccodep,
lpad (ccodep, 2, '0') || lpad (commune, 3, '0') AS geo_code,
nature AS housing_kind,
ff_stoth,
CASE
WHEN potentiel_tlv_thlv = ' ' OR potentiel_tlv_thlv IS NULL THEN false
WHEN potentiel_tlv_thlv = '*' THEN true
ELSE NULL
END as potentiel_tlv_thlv,
TRY_CAST(ff_stoth AS NUMERIC) as living_area,
{% if new_version %}
TRY_CAST(ff_dcntpa AS NUMERIC) as plot_area,
TRY_CAST(ff_jdatnss_1 AS DATE) AS ff_jdatnss_1,
TRY_CAST(ff_jdatnss_2 AS DATE) AS ff_jdatnss_2,
TRY_CAST(ff_jdatnss_3 AS DATE) AS ff_jdatnss_3,
TRY_CAST(ff_jdatnss_4 AS DATE) AS ff_jdatnss_4,
TRY_CAST(ff_jdatnss_5 AS DATE) AS ff_jdatnss_5,
TRY_CAST(ff_jdatnss_6 AS DATE) AS ff_jdatnss_6,

{% else %}
0 as plot_area,
NULL AS ff_jdatnss_1,
NULL AS ff_jdatnss_2,
NULL AS ff_jdatnss_3,
NULL AS ff_jdatnss_4,
NULL AS ff_jdatnss_5,
NULL AS ff_jdatnss_6,
{% endif %}


source.*

FROM
source
)

SELECT * FROM cleaned_data
QUALIFY
ROW_NUMBER () OVER (PARTITION BY local_id ORDER BY debutvacance DESC) = 1
{% endmacro %}
2 changes: 2 additions & 0 deletions analytics/dbt/models/intermediate/lovac/int_lovac_ex_2019.sql
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
SELECT * FROM {{ ref ('stg_lovac_2019') }}
{{ filter_lovac(ccthp = True, vacancy = False) }}
{{ deduplicate_lovac() }}

Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2020') }}
{{ filter_lovac(ccthp = True, vacancy = False) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2021') }}
{{ filter_lovac(ccthp = True, vacancy = False) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2022') }}
{{ filter_lovac(ccthp = True, vacancy = False) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2023') }}
{{ filter_lovac(ccthp = True, vacancy = False) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2024') }}
{{ filter_lovac(ccthp = True, vacancy = False) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2019') }}
{{ filter_lovac(ccthp = True) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2020') }}
{{ filter_lovac(ccthp = True) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2021') }}
{{ filter_lovac(ccthp = True) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2022') }}
{{ filter_lovac(ccthp = True) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2023') }}
{{ filter_lovac(ccthp = False) }}
{{ deduplicate_lovac() }}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT * FROM {{ ref ('stg_lovac_2024') }}
{{ filter_lovac(ccthp = True) }}
{{ deduplicate_lovac() }}
45 changes: 39 additions & 6 deletions analytics/dbt/models/marts/common/marts_common_morphology.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ WITH all_lovac AS (
ff_ccthp,
housing_kind,
aff,
groupe
groupe,
plot_area,
living_area
FROM {{ ref ("stg_lovac_2024") }}
UNION ALL
SELECT
Expand All @@ -20,7 +22,9 @@ WITH all_lovac AS (
ff_ccthp,
housing_kind,
aff,
groupe
groupe,
plot_area,
living_area
FROM {{ ref ("stg_lovac_2023") }}
UNION ALL
SELECT
Expand All @@ -32,7 +36,9 @@ WITH all_lovac AS (
ff_ccthp,
housing_kind,
aff,
groupe
groupe,
plot_area,
living_area
FROM {{ ref ("stg_lovac_2022") }}
UNION ALL
SELECT
Expand All @@ -44,7 +50,9 @@ WITH all_lovac AS (
ff_ccthp,
housing_kind,
aff,
groupe
groupe,
plot_area,
living_area
FROM {{ ref ("stg_lovac_2021") }}
UNION ALL
SELECT
Expand All @@ -56,7 +64,9 @@ WITH all_lovac AS (
ff_ccthp,
housing_kind,
aff,
groupe
groupe,
plot_area,
living_area
FROM {{ ref ("stg_lovac_2020") }}
UNION ALL
SELECT
Expand All @@ -68,7 +78,9 @@ WITH all_lovac AS (
ff_ccthp,
housing_kind,
aff,
groupe
groupe,
plot_area,
living_area
FROM {{ ref ("stg_lovac_2019") }}
),

Expand Down Expand Up @@ -103,6 +115,8 @@ lovac AS (
local_id
, year
, geo_code
, plot_area
, living_area
, CASE
WHEN
(housing_kind IN ('APPART', 'MAISON') AND aff = 'H')
Expand Down Expand Up @@ -152,6 +166,23 @@ lovac_geo_code_year AS (
ELSE 0
END
) AS count_vacant_housing_private_fil_ccthp
, SUM(CASE
WHEN
is_housing = 1
AND is_private = 1
AND is_vacant_fil_ccthp = 1
THEN living_area
ELSE 0
END
) as sum_living_area_vacant_housing_private_fil_ccthp
, SUM(CASE
WHEN
is_housing = 1
AND is_private = 1
AND is_vacant_fil_ccthp = 1
THEN plot_area
ELSE 0
END) as sum_plot_area_vacant_housing_private_fil_ccthp
FROM lovac
GROUP BY year, geo_code
),
Expand Down Expand Up @@ -204,6 +235,8 @@ SELECT
, lovac.count_vacant_housing_private
, lovac.count_vacant_housing_private_fil
, lovac.count_vacant_housing_private_fil_ccthp
, lovac.sum_living_area_vacant_housing_private_fil_ccthp
, lovac.sum_plot_area_vacant_housing_private_fil_ccthp
, ff.count_housing
, ff.count_housing_private
, ff.count_housing_private_rented
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ SELECT
SUM(count_housing) AS count_housing,
SUM(count_housing_private) AS count_housing_private,
SUM(count_housing_private_rented) AS count_housing_private_rented,
SUM(count_housing_production) AS count_housing_production

SUM(count_housing_production) AS count_housing_production,
SUM(sum_living_area_vacant_housing_private_fil_ccthp) as sum_living_area_vacant_housing_private_fil_ccthp,
SUM(sum_plot_area_vacant_housing_private_fil_ccthp) as sum_plot_area_vacant_housing_private_fil_ccthp
FROM {{ ref ('int_production_establishments') }} pe
LEFT JOIN {{ ref ('int_production_establishments_localities') }} pel ON pe.id = pel.establishment_id
LEFT JOIN {{ ref ('marts_common_morphology') }} mcm ON pel.geo_code = mcm.geo_code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ WITH base_data AS (
'count_vacant_housing_private_fil_ccthp' AS count_type,
count_vacant_housing_private_fil_ccthp AS count_value
FROM base_data
SELECT
establishment_id,
year,
'sum_living_area_vacant_housing_private_fil_ccthp' AS count_type,
sum_living_area_vacant_housing_private_fil_ccthp AS count_value
FROM base_data
SELECT
establishment_id,
year,
'sum_plot_area_vacant_housing_private_fil_ccthp' AS count_type,
sum_plot_area_vacant_housing_private_fil_ccthp AS count_value
FROM base_data
UNION ALL
SELECT
establishment_id,
Expand Down Expand Up @@ -78,6 +90,12 @@ WITH base_data AS (
WHEN
count_type = 'count_vacant_housing_private_fil'
THEN 'Logements Vacants du Parc Privé (FIL)'
WHEN
count_type = 'sum_living_area_vacant_housing_private_fil_ccthp'
THEN 'Somme des surfaces habitables Vacants du Parc Privé (FIL+CCTHP)'
WHEN
count_type = 'sum_plot_area_vacant_housing_private_fil_ccthp'
THEN 'Somme des surfaces foncières Vacants du Parc Privé (FIL+CCTHP)'
WHEN
count_type = 'count_vacant_housing_private_fil_ccthp'
THEN 'Logements Vacants du Parc Privé (FIL+CCTHP)'
Expand Down
2 changes: 1 addition & 1 deletion analytics/dbt/models/staging/lovac/stg_lovac_2019.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
with source as (
SELECT * FROM {{ source ('duckdb_raw', 'raw_lovac_2019') }}
),
{{ handle_lovac_different_years () }}
{{ handle_lovac_different_years(new_version=False) }}
2 changes: 1 addition & 1 deletion analytics/dbt/models/staging/lovac/stg_lovac_2020.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
with source as (
SELECT * FROM {{ source ('duckdb_raw', 'raw_lovac_2020') }}
),
{{ handle_lovac_different_years () }}
{{ handle_lovac_different_years (new_version=False) }}
2 changes: 1 addition & 1 deletion analytics/dbt/models/staging/lovac/stg_lovac_2021.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
with source as (
SELECT * FROM {{ source ('duckdb_raw', 'raw_lovac_2021') }}
),
{{ handle_lovac_different_years () }}
{{ handle_lovac_different_years (new_version=False) }}
2 changes: 1 addition & 1 deletion analytics/dbt/models/staging/lovac/stg_lovac_2022.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
with source as (
SELECT * FROM {{ source ('duckdb_raw', 'raw_lovac_2022') }}
),
{{ handle_lovac_different_years () }}
{{ handle_lovac_different_years (new_version=False) }}
2 changes: 1 addition & 1 deletion analytics/dbt/models/staging/lovac/stg_lovac_2023.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
with source as (
SELECT * FROM {{ source ('duckdb_raw', 'raw_lovac_2023') }}
),
{{ handle_lovac_different_years () }}
{{ handle_lovac_different_years(new_version=True) }}
2 changes: 1 addition & 1 deletion analytics/dbt/models/staging/lovac/stg_lovac_2024.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
with source as (
SELECT * FROM {{ source ('duckdb_raw', 'raw_lovac_2024') }}
),
{{ handle_lovac_different_years() }}
{{ handle_lovac_different_years(new_version=True) }}

0 comments on commit e9b2af1

Please sign in to comment.