fix projects in schemas & multiple bigquery options

xnuinside · Nov 20, 2021 · 4613952 · 4613952
1 parent f1a1036
commit 4613952
Show file tree

Hide file tree

Showing 8 changed files with 176 additions and 10 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,10 @@
+**v0.22.4**
+### Fixes:
+
+### BigQuery:
+1. Fixed issue with parsing schemas with project in name.
+2. Added support for multiple OPTION() statements
+
 **v0.22.3**
 ### Fixes:
 

diff --git a/README.md b/README.md
@@ -385,6 +385,13 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 
 
 ## Changelog
+**v0.22.4**
+### Fixes:
+
+### BigQuery:
+1. Fixed issue with parsing schemas with project in name.
+2. Added support for multiple OPTION() statements
+
 **v0.22.3**
 ### Fixes:
 

diff --git a/docs/README.rst b/docs/README.rst
@@ -443,6 +443,18 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 Changelog
 ---------
 
+**v0.22.4**
+
+Fixes:
+^^^^^^
+
+BigQuery:
+^^^^^^^^^
+
+
+#. Fixed issue with parsing schemas with project in name.
+#. Added support for multiple OPTION() statements
+
 **v0.22.3**
 
 Fixes:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "simple-ddl-parser"
-version = "0.22.3"
+version = "0.22.4"
 description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
 authors = ["Iuliia Volkova <[email protected]>"]
 license = "MIT"

diff --git a/simple_ddl_parser/dialects/bigquery.py b/simple_ddl_parser/dialects/bigquery.py
@@ -1,9 +1,20 @@
 class BigQuery:
     def p_expression_options(self, p):
-        """expr : expr options """
+        """expr : expr multiple_options"""
         p[0] = p[1]
         p[1].update(p[2])
 
+    def p_multiple_options(self, p):
+        """multiple_options : options
+        | multiple_options options
+        """
+        print(list(p), "\n")
+        if len(p) > 2:
+            p[1]["options"].extend(p[2]["options"])
+            p[0] = p[1]
+        else:
+            p[0] = p[1]
+
     def p_options(self, p):
         """options : OPTIONS LP id_equals RP"""
         p_list = list(p)

diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py
@@ -388,18 +388,34 @@ def p_create_schema(self, p: List) -> None:
         | create_schema options
         """
         p_list = list(p)
-
+        p[0] = {}
+        print(p_list)
+        # if isinstance(p_list[1], dict):
+        # p[0] = p_list[1]
+        # self.set_properties_for_schema_and_database(p, p_list)
+        auth_ind = None
         if isinstance(p_list[1], dict):
             p[0] = p_list[1]
             self.set_properties_for_schema_and_database(p, p_list)
         elif auth in p_list:
+            auth_ind = p_list.index(auth)
             self.set_auth_property_in_schema(p, p_list)
-        elif isinstance(p_list[-1], dict):
-            p[0] = {"schema": p_list[-1]["table_name"]}
-            if p_list[-1].get("schema"):
-                p[0]["project"] = p_list[-1]["schema"]
-        else:
-            p[0] = {"schema_name": p_list[-1]}
+
+        print(p_list)
+        if isinstance(p_list[-1], str):
+            if auth_ind:
+                schema_name = p_list[auth_ind - 1]
+                if schema_name is None:
+                    schema_name = p_list[auth_ind + 1]
+
+            else:
+                schema_name = p_list[-1]
+
+            p[0]["schema_name"] = schema_name.replace("`", "")
+
+        if len(p_list) > 4 and not auth_ind and "." in p_list:
+            p[0]["project"] = p_list[-3].replace("`", "")
+        print(p[0])
 
     def p_create_database(self, p: List) -> None:
         """create_database : CREATE DATABASE id

diff --git a/tests/test_bigquery.py b/tests/test_bigquery.py
@@ -35,7 +35,7 @@ def test_dataset_in_output():
     }
 
     ddl = """
-    CREATE TABLE mydataset.newtable ( x INT64 )
+    CREATE TABLE mydataset.newtable ( x INT64 );
     """
     result = DDLParser(ddl).run(group_by_type=True, output_mode="bigquery")
     assert expected == result
@@ -428,6 +428,7 @@ def test_table_name_with_project_id():
             {
                 "properties": {"options": [{"location": '"project-location"'}]},
                 "schema_name": "calender",
+                "project": "project",
             }
         ],
         "sequences": [],
@@ -587,6 +588,7 @@ def test_multiple_options():
             {
                 "properties": {"options": [{"location": '"project-location"'}]},
                 "schema_name": "calender",
+                "project": "project",
             }
         ],
         "sequences": [],
@@ -724,3 +726,97 @@ def test_ars_in_arrays_in_option():
         "types": [],
     }
     assert expected == result
+
+
+def test_multiple_options_statements():
+    ddl = """
+            CREATE TABLE `my.data-cdh-hub-REF-CALENDAR` (
+    calendar_dt DATE,
+    calendar_dt_id INT
+    )
+    OPTIONS (
+        location="location"
+        )
+    OPTIONS (
+    description="Calendar table records reference list of calendar dates and related attributes used for reporting."
+    )
+    OPTIONS (
+        name ="path"
+    )
+    OPTIONS (
+        kms_two="path",
+        two="two two"
+    )
+    OPTIONS (
+        kms_three="path",
+        three="three",
+        threethree="three three"
+    )
+    OPTIONS (
+        kms_four="path",
+        four="four four",
+        fourin="four four four",
+        fourlast="four four four four"
+    );
+            """
+    result = DDLParser(ddl).run(group_by_type=True, output_mode="bigquery")
+    expected = {
+        "ddl_properties": [],
+        "domains": [],
+        "schemas": [],
+        "sequences": [],
+        "tables": [
+            {
+                "alter": {},
+                "checks": [],
+                "columns": [
+                    {
+                        "check": None,
+                        "default": None,
+                        "name": "calendar_dt",
+                        "nullable": True,
+                        "references": None,
+                        "size": None,
+                        "type": "DATE",
+                        "unique": False,
+                    },
+                    {
+                        "check": None,
+                        "default": None,
+                        "name": "calendar_dt_id",
+                        "nullable": True,
+                        "references": None,
+                        "size": None,
+                        "type": "INT",
+                        "unique": False,
+                    },
+                ],
+                "dataset": "`my",
+                "index": [],
+                "options": [
+                    {"location": '"location"'},
+                    {
+                        "description": '"Calendar table records reference '
+                        "list of calendar dates and related "
+                        'attributes used for reporting."'
+                    },
+                    {"name": '"path"'},
+                    {"kms_two": '"path"'},
+                    {"two": '"two two"'},
+                    {"kms_three": '"path"'},
+                    {"three": '"three"'},
+                    {"threethree": '"three three"'},
+                    {"kms_four": '"path"'},
+                    {"four": '"four four"'},
+                    {"fourin": '"four four four"'},
+                    {"fourlast": '"four four four four"'},
+                ],
+                "partitioned_by": [],
+                "primary_key": [],
+                "table_name": "data-cdh-hub-REF-CALENDAR`",
+                "tablespace": None,
+            }
+        ],
+        "types": [],
+    }
+    assert expected == result
diff --git a/tests/test_simple_ddl_parser.py b/tests/test_simple_ddl_parser.py
@@ -2312,3 +2312,20 @@ def test_lines_starts_with_statement_keys():
         "types": [],
     }
     assert expected == result
+
+
+def test_schema_with_project_name():
+
+    ddl = """
+    CREATE SCHEMA IF NOT EXISTS `my.data-cdh-hub`
+    """
+    result = DDLParser(ddl).run(group_by_type=True, output_mode="bigquery")
+    expected = {
+        "ddl_properties": [],
+        "domains": [],
+        "schemas": [{"project": "my", "schema_name": "data-cdh-hub"}],
+        "sequences": [],
+        "tables": [],
+        "types": [],
+    }
+    assert expected == result