From f51f4cc4b47881af471d2cd22919772edb5a917c Mon Sep 17 00:00:00 2001 From: tkiehn <162969167+tkiehn@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:05:31 +0200 Subject: [PATCH 1/5] add enable_ghost_records_parameter to stages, makes it possible to disable ghost record creation --- macros/staging/bigquery/stage.sql | 9 +++++++-- macros/staging/exasol/stage.sql | 10 +++++++++- macros/staging/postgres/stage.sql | 7 ++++++- macros/staging/redshift/stage.sql | 7 ++++++- macros/staging/snowflake/stage.sql | 7 ++++++- macros/staging/stage.sql | 12 ++++++++++-- macros/staging/synapse/stage.sql | 7 ++++++- 7 files changed, 50 insertions(+), 9 deletions(-) diff --git a/macros/staging/bigquery/stage.sql b/macros/staging/bigquery/stage.sql index 85fd365f..92f14333 100644 --- a/macros/staging/bigquery/stage.sql +++ b/macros/staging/bigquery/stage.sql @@ -9,7 +9,8 @@ sequence, prejoined_columns, missing_columns, - multi_active_config) -%} + multi_active_config, + enable_ghost_records) -%} {% if (source_model is none) and execute %} @@ -427,6 +428,7 @@ hashed_columns AS ( {%- endif -%} {%- endif -%} +{% if enable_ghost_records %} {# Creating Ghost Record for unknown case, based on datatype #} unknown_values AS ( @@ -570,6 +572,7 @@ ghost_records AS ( UNION ALL SELECT * FROM error_values ), +{%- endif -%} {%- if not include_source_columns -%} {% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %} @@ -584,13 +587,15 @@ columns_to_select AS ( FROM {{ last_cte }} +{%- if enable_ghost_records %} UNION ALL - + SELECT {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM ghost_records +{%- endif %} ) SELECT * FROM columns_to_select diff --git a/macros/staging/exasol/stage.sql b/macros/staging/exasol/stage.sql index 50e6402d..ef5c902a 100644 --- a/macros/staging/exasol/stage.sql +++ b/macros/staging/exasol/stage.sql @@ -7,7 +7,8 @@ sequence, prejoined_columns, missing_columns, - multi_active_config) -%} + multi_active_config, + enable_ghost_records) -%} {% if (source_model is none) and execute %} @@ -404,6 +405,7 @@ hashed_columns AS ( {%- endif -%} {%- endif -%} +{% if enable_ghost_records %} {# Creating Ghost Record for unknown case, based on datatype #} unknown_values AS ( SELECT @@ -542,9 +544,12 @@ ghost_records AS ( UNION ALL SELECT * FROM error_values ), +{%- endif -%} + {%- if not include_source_columns -%} {% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %} {%- endif -%} + {# Combining the two ghost records with the regular data #} columns_to_select AS ( @@ -553,12 +558,15 @@ columns_to_select AS ( {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM {{ last_cte }} + +{%- if enable_ghost_records -%} UNION ALL SELECT {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM ghost_records +{%- endif -%} ) SELECT * FROM columns_to_select diff --git a/macros/staging/postgres/stage.sql b/macros/staging/postgres/stage.sql index 3a133740..f1bac701 100644 --- a/macros/staging/postgres/stage.sql +++ b/macros/staging/postgres/stage.sql @@ -9,7 +9,8 @@ sequence, prejoined_columns, missing_columns, - multi_active_config) -%} + multi_active_config, + enable_ghost_records) -%} {% if (source_model is none) and execute %} @@ -427,6 +428,7 @@ hashed_columns AS ( {%- endif -%} {%- endif -%} +{%- if enable_ghost_records -%} {# Creating Ghost Record for unknown case, based on datatype #} unknown_values AS ( @@ -570,6 +572,7 @@ ghost_records AS ( UNION ALL SELECT * FROM error_values ), +{%- endif -%} {%- if not include_source_columns -%} {% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %} @@ -584,6 +587,7 @@ columns_to_select AS ( FROM {{ last_cte }} +{% if enable_ghost_records %} UNION ALL SELECT @@ -591,6 +595,7 @@ columns_to_select AS ( {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM ghost_records +{% endif %} ) SELECT * FROM columns_to_select diff --git a/macros/staging/redshift/stage.sql b/macros/staging/redshift/stage.sql index 262c6e53..2ff61104 100644 --- a/macros/staging/redshift/stage.sql +++ b/macros/staging/redshift/stage.sql @@ -9,7 +9,8 @@ sequence, prejoined_columns, missing_columns, - multi_active_config) -%} + multi_active_config, + enable_ghost_records) -%} {% if (source_model is none) and execute %} @@ -427,6 +428,7 @@ hashed_columns AS ( {%- endif -%} {%- endif -%} +{%- if enable_ghost_records -%} {# Creating Ghost Record for unknown case, based on datatype #} unknown_values AS ( @@ -570,6 +572,7 @@ ghost_records AS ( UNION ALL SELECT * FROM error_values ), +{%- endif -%} {%- if not include_source_columns -%} {% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %} @@ -584,6 +587,7 @@ columns_to_select AS ( FROM {{ last_cte }} +{%- if enable_ghost_records -%} UNION ALL SELECT @@ -591,6 +595,7 @@ columns_to_select AS ( {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM ghost_records +{%- endif- %} ) SELECT * FROM columns_to_select diff --git a/macros/staging/snowflake/stage.sql b/macros/staging/snowflake/stage.sql index b2b16185..f938ad9b 100644 --- a/macros/staging/snowflake/stage.sql +++ b/macros/staging/snowflake/stage.sql @@ -9,7 +9,8 @@ sequence, prejoined_columns, missing_columns, - multi_active_config) -%} + multi_active_config, + enable_ghost_records) -%} {% if (source_model is none) and execute %} @@ -385,6 +386,7 @@ hashed_columns AS ( {%- endif -%} {%- endif -%} +{% if enable_ghost_records %} {% if not is_incremental() %} {# Creating Ghost Record for unknown case, based on datatype #} unknown_values AS ( @@ -530,6 +532,7 @@ ghost_records AS ( SELECT * FROM error_values ), {%- endif %} +{%- endif %} {%- if not include_source_columns -%} {% set source_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=source_columns_to_select, exclude_columns_list=derived_column_names) %} @@ -545,6 +548,7 @@ columns_to_select AS ( FROM {{ last_cte }} +{% if enable_ghost_records %} {% if not is_incremental() %} UNION ALL @@ -554,6 +558,7 @@ columns_to_select AS ( FROM ghost_records {% endif %} +{% endif %} ) SELECT * FROM columns_to_select diff --git a/macros/staging/stage.sql b/macros/staging/stage.sql index 25aebe8d..7042fdd0 100644 --- a/macros/staging/stage.sql +++ b/macros/staging/stage.sql @@ -98,17 +98,24 @@ {'multi_active_key': ['phonetype', 'company'], This source data comes with two multi-active keys. The combination of those two, the main_hashkey and ldts is unique 'main_hashkey_column': 'hk_contact_h'} inside the source system. + enable_ghost_records::boolean If set to true, the stage will be created with ghost records. By default, ghost records are enabled. Optional Parameter + #} - {%- macro stage(ldts, rsrc, source_model, include_source_columns=true, hashed_columns=none, derived_columns=none, sequence=none, prejoined_columns=none, missing_columns=none, multi_active_config=none) -%} + {%- macro stage(ldts, rsrc, source_model, include_source_columns=true, hashed_columns=none, derived_columns=none, sequence=none, prejoined_columns=none, missing_columns=none, multi_active_config=none, enable_ghost_records=true) -%} {# If include_source_columns is passed but its empty then it is set with the default value (true) #} {%- if include_source_columns is none or include_source_columns == "" -%} {%- set include_source_columns = true -%} {%- endif -%} + {# If enable_ghost_records is passed but its empty then it is set with the default value (true) #} + {%- if enable_ghost_records is none or enable_ghost_records == "" -%} + {%- set enable_ghost_records = true -%} + {%- endif -%} + {{- adapter.dispatch('stage', 'datavault4dbt')(include_source_columns=include_source_columns, ldts=ldts, rsrc=rsrc, @@ -118,6 +125,7 @@ sequence=sequence, prejoined_columns=prejoined_columns, missing_columns=missing_columns, - multi_active_config=multi_active_config) -}} + multi_active_config=multi_active_config, + enable_ghost_records=enable_ghost_records) -}} {%- endmacro -%} diff --git a/macros/staging/synapse/stage.sql b/macros/staging/synapse/stage.sql index 2751b4c9..37b35784 100644 --- a/macros/staging/synapse/stage.sql +++ b/macros/staging/synapse/stage.sql @@ -7,7 +7,8 @@ sequence, prejoined_columns, missing_columns, - multi_active_config) -%} + multi_active_config, + enable_ghost_records) -%} {% if (source_model is none) and execute %} @@ -435,6 +436,7 @@ hashed_columns AS ( {% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(hashed_columns) -%} {%- endif -%} +{% if enable_ghost_records %} {% if not is_incremental() %} {# Creating Ghost Record for unknown case, based on datatype #} unknown_values AS ( @@ -580,6 +582,7 @@ ghost_records AS ( SELECT * FROM error_values ), {%- endif %} +{%- endif %} {%- if not include_source_columns -%} {% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %} @@ -594,6 +597,7 @@ columns_to_select AS ( FROM {{ last_cte }} +{% if enable_ghost_records %} {% if not is_incremental() %} UNION ALL @@ -603,6 +607,7 @@ columns_to_select AS ( FROM ghost_records {% endif %} +{% endif %} ) SELECT * FROM columns_to_select From db98800635d325959bfecfa1888556ba4b856476 Mon Sep 17 00:00:00 2001 From: tkiehn <162969167+tkiehn@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:10:56 +0200 Subject: [PATCH 2/5] fix wrong endif --- macros/staging/bigquery/stage.sql | 2 +- macros/staging/exasol/stage.sql | 2 +- macros/staging/postgres/stage.sql | 2 +- macros/staging/redshift/stage.sql | 4 ++-- macros/staging/snowflake/stage.sql | 2 +- macros/staging/synapse/stage.sql | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/macros/staging/bigquery/stage.sql b/macros/staging/bigquery/stage.sql index 92f14333..cd70575d 100644 --- a/macros/staging/bigquery/stage.sql +++ b/macros/staging/bigquery/stage.sql @@ -149,7 +149,7 @@ {%- set source_columns_to_select = only_include_from_source -%} -{%- endif-%} +{%- endif -%} {%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%} {%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%} diff --git a/macros/staging/exasol/stage.sql b/macros/staging/exasol/stage.sql index ef5c902a..789a795b 100644 --- a/macros/staging/exasol/stage.sql +++ b/macros/staging/exasol/stage.sql @@ -143,7 +143,7 @@ {%- set source_columns_to_select = only_include_from_source -%} -{%- endif-%} +{%- endif -%} {%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%} {%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%} diff --git a/macros/staging/postgres/stage.sql b/macros/staging/postgres/stage.sql index f1bac701..f3b8a5c1 100644 --- a/macros/staging/postgres/stage.sql +++ b/macros/staging/postgres/stage.sql @@ -149,7 +149,7 @@ {%- set source_columns_to_select = only_include_from_source -%} -{%- endif-%} +{%- endif -%} {%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%} {%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%} diff --git a/macros/staging/redshift/stage.sql b/macros/staging/redshift/stage.sql index 2ff61104..f96365fe 100644 --- a/macros/staging/redshift/stage.sql +++ b/macros/staging/redshift/stage.sql @@ -149,7 +149,7 @@ {%- set source_columns_to_select = only_include_from_source -%} -{%- endif-%} +{%- endif -%} {%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%} {%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%} @@ -595,7 +595,7 @@ columns_to_select AS ( {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM ghost_records -{%- endif- %} +{%- endif -%} ) SELECT * FROM columns_to_select diff --git a/macros/staging/snowflake/stage.sql b/macros/staging/snowflake/stage.sql index f938ad9b..2f507758 100644 --- a/macros/staging/snowflake/stage.sql +++ b/macros/staging/snowflake/stage.sql @@ -152,7 +152,7 @@ {%- set source_columns_to_select = only_include_from_source -%} {{ log('source_columns_to_select when include_source_columns=false: '~ source_columns_to_select, false) }} -{%- endif-%} +{%- endif -%} {%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%} {%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%} diff --git a/macros/staging/synapse/stage.sql b/macros/staging/synapse/stage.sql index 37b35784..3629b07e 100644 --- a/macros/staging/synapse/stage.sql +++ b/macros/staging/synapse/stage.sql @@ -139,7 +139,7 @@ {%- endif -%} {%- set source_columns_to_select = only_include_from_source -%} -{%- endif-%} +{%- endif -%} {%- if not var('datavault4dbt.include_derived_column_input_columns', true) -%} {%- set exclude_column_names = exclude_column_names + derived_input_columns -%} From 46b8c8ec470deddc98a425044ddf9b70bb818592 Mon Sep 17 00:00:00 2001 From: Tim Kirschke <81677440+tkirschke@users.noreply.github.com> Date: Mon, 2 Sep 2024 16:21:39 +0200 Subject: [PATCH 3/5] Update README.md empty commit to trigger pipeline From 72982ef47641dfa8a3288d4fdf0e1b5005aca372 Mon Sep 17 00:00:00 2001 From: Theo Kiehn <162969167+tkiehn@users.noreply.github.com> Date: Tue, 3 Sep 2024 08:45:40 +0200 Subject: [PATCH 4/5] Trigger Pipeline From 3d858c0e38282bdeaa346dbf4427a543676aaaf7 Mon Sep 17 00:00:00 2001 From: Theo Kiehn <162969167+tkiehn@users.noreply.github.com> Date: Tue, 3 Sep 2024 09:33:24 +0200 Subject: [PATCH 5/5] fix whitespace control --- macros/staging/exasol/stage.sql | 2 +- macros/staging/redshift/stage.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/staging/exasol/stage.sql b/macros/staging/exasol/stage.sql index 18aa40ea..3935599d 100644 --- a/macros/staging/exasol/stage.sql +++ b/macros/staging/exasol/stage.sql @@ -580,7 +580,7 @@ columns_to_select AS ( FROM {{ last_cte }} -{%- if enable_ghost_records -%} +{%- if enable_ghost_records %} UNION ALL SELECT diff --git a/macros/staging/redshift/stage.sql b/macros/staging/redshift/stage.sql index abe1a184..97ed0304 100644 --- a/macros/staging/redshift/stage.sql +++ b/macros/staging/redshift/stage.sql @@ -588,7 +588,7 @@ columns_to_select AS ( FROM {{ last_cte }} -{%- if enable_ghost_records -%} +{%- if enable_ghost_records %} UNION ALL SELECT