From c9eb855028377cf936d8a032ab7d6bd79c740189 Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Mon, 13 Jan 2025 13:32:07 -0500 Subject: [PATCH] Better handling for dedicated schemas --- cumulus_library/actions/cleaner.py | 12 +++++++-- pyproject.toml | 2 +- tests/test_actions.py | 26 ++++++++++++++++++- .../study_dedicated_schema/module2.py | 4 +-- 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/cumulus_library/actions/cleaner.py b/cumulus_library/actions/cleaner.py index 33b1b56f..982703bf 100644 --- a/cumulus_library/actions/cleaner.py +++ b/cumulus_library/actions/cleaner.py @@ -1,5 +1,6 @@ import sys +import duckdb from rich.progress import Progress, TaskID from cumulus_library import base_utils, databases, enums, errors, study_manifest @@ -27,7 +28,11 @@ def _execute_drop_queries( name=view_table[0], view_or_table=view_table[1] ) with base_utils.query_console_output(verbose, drop_view_table, progress, task): - cursor.execute(drop_view_table) + try: + cursor.execute(drop_view_table) + # did we hit an exception in unit testing on athena-specific quoting? + except duckdb.duckdb.ParserException: + cursor.execute(drop_view_table.replace("`", '"')) def _get_unprotected_stats_view_table( @@ -147,7 +152,10 @@ def clean_study( if dedicated := manifest.get_dedicated_schema(): view_table_list = [ ( - f"`{dedicated}`.`{x[0]}`", + # Athena uses different quoting strategies for drop view statements + # versus drop table statements. -_- + # TODO: Consider moving this logic to a database object? + f'"{dedicated}"."{x[0]}"' if x[1] == "VIEW" else f"`{dedicated}`.`{x[0]}`", x[1], ) for x in view_table_list diff --git a/pyproject.toml b/pyproject.toml index 991a1e3e..76b0b6ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "cumulus-library" requires-python = ">= 3.11" dependencies = [ "cumulus-fhir-support >= 1.2", - "duckdb >= 1.1", + "duckdb >= 1.1.3", "Jinja2 > 3", "pandas <3, >=2.1.3", "psmpy <1, >=0.3.13", diff --git a/tests/test_actions.py b/tests/test_actions.py index e963e5b1..607b60a9 100644 --- a/tests/test_actions.py +++ b/tests/test_actions.py @@ -35,7 +35,6 @@ (None, None, None, False, "study_valid__etl_table", does_not_raise()), (None, None, None, False, "study_valid__nlp_table", does_not_raise()), (None, None, None, False, "study_valid__lib_table", does_not_raise()), - (None, None, None, False, "study_valid__lib", does_not_raise()), (None, "foo", "y", False, "foo_table", does_not_raise()), (None, "foo", "n", False, "foo_table", pytest.raises(SystemExit)), (True, None, "y", True, "study_valid__table", does_not_raise()), @@ -103,6 +102,31 @@ def test_clean_study(mock_db_config, verbose, prefix, confirm, stats, target, ra assert ("study_valid__456",) not in remaining_tables +def test_clean_dedicated_schema(mock_db_config): + with mock.patch.object(builtins, "input", lambda _: False): + mock_db_config.schema = "dedicated" + manifest = study_manifest.StudyManifest("./tests/test_data/study_dedicated_schema/") + mock_db_config.db.cursor().execute("CREATE SCHEMA dedicated") + builder.run_protected_table_builder( + config=mock_db_config, + manifest=manifest, + ) + mock_db_config.db.cursor().execute("CREATE TABLE dedicated.table_1 (test int)") + mock_db_config.db.cursor().execute( + "CREATE VIEW dedicated.view_2 AS SELECT * FROM dedicated.table_1" + ) + cleaner.clean_study(config=mock_db_config, manifest=manifest) + remaining_tables = ( + mock_db_config.db.cursor() + .execute("select distinct(table_name) from information_schema.tables") + .fetchall() + ) + print(remaining_tables) + assert (f"{enums.ProtectedTables.TRANSACTIONS.value}",) in remaining_tables + assert ("table_1",) not in remaining_tables + assert ("view_2",) not in remaining_tables + + def test_clean_throws_error_on_missing_params(mock_db_config): with pytest.raises(errors.CumulusLibraryError): cleaner.clean_study(config=mock_db_config, manifest=None) diff --git a/tests/test_data/study_dedicated_schema/module2.py b/tests/test_data/study_dedicated_schema/module2.py index ce4a5eda..d1bee89f 100644 --- a/tests/test_data/study_dedicated_schema/module2.py +++ b/tests/test_data/study_dedicated_schema/module2.py @@ -5,6 +5,4 @@ class ModuleTwoRunner(cumulus_library.BaseTableBuilder): display_text = "module2" def prepare_queries(self, *args, **kwargs): - self.queries.append( - "CREATE TABLE IF NOT EXISTS study_dedicated_schema__table_2 (test int);" - ) + self.queries.append("CREATE VIEW IF NOT EXISTS study_dedicated_schema__view_2 (test int);")