Skip to content

Commit

Permalink
fix projects in schemas & multiple bigquery options
Browse files Browse the repository at this point in the history
  • Loading branch information
xnuinside committed Nov 20, 2021
1 parent f1a1036 commit 4613952
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 10 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
**v0.22.4**
### Fixes:

### BigQuery:
1. Fixed issue with parsing schemas with project in name.
2. Added support for multiple OPTION() statements

**v0.22.3**
### Fixes:

Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,13 @@ Big thanks for the involving & contribution with test cases with DDL samples & o


## Changelog
**v0.22.4**
### Fixes:

### BigQuery:
1. Fixed issue with parsing schemas with project in name.
2. Added support for multiple OPTION() statements

**v0.22.3**
### Fixes:

Expand Down
12 changes: 12 additions & 0 deletions docs/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,18 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
Changelog
---------

**v0.22.4**

Fixes:
^^^^^^

BigQuery:
^^^^^^^^^


#. Fixed issue with parsing schemas with project in name.
#. Added support for multiple OPTION() statements

**v0.22.3**

Fixes:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-ddl-parser"
version = "0.22.3"
version = "0.22.4"
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
authors = ["Iuliia Volkova <[email protected]>"]
license = "MIT"
Expand Down
13 changes: 12 additions & 1 deletion simple_ddl_parser/dialects/bigquery.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
class BigQuery:
def p_expression_options(self, p):
"""expr : expr options """
"""expr : expr multiple_options"""
p[0] = p[1]
p[1].update(p[2])

def p_multiple_options(self, p):
"""multiple_options : options
| multiple_options options
"""
print(list(p), "\n")
if len(p) > 2:
p[1]["options"].extend(p[2]["options"])
p[0] = p[1]
else:
p[0] = p[1]

def p_options(self, p):
"""options : OPTIONS LP id_equals RP"""
p_list = list(p)
Expand Down
30 changes: 23 additions & 7 deletions simple_ddl_parser/dialects/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,18 +388,34 @@ def p_create_schema(self, p: List) -> None:
| create_schema options
"""
p_list = list(p)

p[0] = {}
print(p_list)
# if isinstance(p_list[1], dict):
# p[0] = p_list[1]
# self.set_properties_for_schema_and_database(p, p_list)
auth_ind = None
if isinstance(p_list[1], dict):
p[0] = p_list[1]
self.set_properties_for_schema_and_database(p, p_list)
elif auth in p_list:
auth_ind = p_list.index(auth)
self.set_auth_property_in_schema(p, p_list)
elif isinstance(p_list[-1], dict):
p[0] = {"schema": p_list[-1]["table_name"]}
if p_list[-1].get("schema"):
p[0]["project"] = p_list[-1]["schema"]
else:
p[0] = {"schema_name": p_list[-1]}

print(p_list)
if isinstance(p_list[-1], str):
if auth_ind:
schema_name = p_list[auth_ind - 1]
if schema_name is None:
schema_name = p_list[auth_ind + 1]

else:
schema_name = p_list[-1]

p[0]["schema_name"] = schema_name.replace("`", "")

if len(p_list) > 4 and not auth_ind and "." in p_list:
p[0]["project"] = p_list[-3].replace("`", "")
print(p[0])

def p_create_database(self, p: List) -> None:
"""create_database : CREATE DATABASE id
Expand Down
98 changes: 97 additions & 1 deletion tests/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_dataset_in_output():
}

ddl = """
CREATE TABLE mydataset.newtable ( x INT64 )
CREATE TABLE mydataset.newtable ( x INT64 );
"""
result = DDLParser(ddl).run(group_by_type=True, output_mode="bigquery")
assert expected == result
Expand Down Expand Up @@ -428,6 +428,7 @@ def test_table_name_with_project_id():
{
"properties": {"options": [{"location": '"project-location"'}]},
"schema_name": "calender",
"project": "project",
}
],
"sequences": [],
Expand Down Expand Up @@ -587,6 +588,7 @@ def test_multiple_options():
{
"properties": {"options": [{"location": '"project-location"'}]},
"schema_name": "calender",
"project": "project",
}
],
"sequences": [],
Expand Down Expand Up @@ -724,3 +726,97 @@ def test_ars_in_arrays_in_option():
"types": [],
}
assert expected == result


def test_multiple_options_statements():
ddl = """
CREATE TABLE `my.data-cdh-hub-REF-CALENDAR` (
calendar_dt DATE,
calendar_dt_id INT
)
OPTIONS (
location="location"
)
OPTIONS (
description="Calendar table records reference list of calendar dates and related attributes used for reporting."
)
OPTIONS (
name ="path"
)
OPTIONS (
kms_two="path",
two="two two"
)
OPTIONS (
kms_three="path",
three="three",
threethree="three three"
)
OPTIONS (
kms_four="path",
four="four four",
fourin="four four four",
fourlast="four four four four"
);
"""
result = DDLParser(ddl).run(group_by_type=True, output_mode="bigquery")
expected = {
"ddl_properties": [],
"domains": [],
"schemas": [],
"sequences": [],
"tables": [
{
"alter": {},
"checks": [],
"columns": [
{
"check": None,
"default": None,
"name": "calendar_dt",
"nullable": True,
"references": None,
"size": None,
"type": "DATE",
"unique": False,
},
{
"check": None,
"default": None,
"name": "calendar_dt_id",
"nullable": True,
"references": None,
"size": None,
"type": "INT",
"unique": False,
},
],
"dataset": "`my",
"index": [],
"options": [
{"location": '"location"'},
{
"description": '"Calendar table records reference '
"list of calendar dates and related "
'attributes used for reporting."'
},
{"name": '"path"'},
{"kms_two": '"path"'},
{"two": '"two two"'},
{"kms_three": '"path"'},
{"three": '"three"'},
{"threethree": '"three three"'},
{"kms_four": '"path"'},
{"four": '"four four"'},
{"fourin": '"four four four"'},
{"fourlast": '"four four four four"'},
],
"partitioned_by": [],
"primary_key": [],
"table_name": "data-cdh-hub-REF-CALENDAR`",
"tablespace": None,
}
],
"types": [],
}
assert expected == result
17 changes: 17 additions & 0 deletions tests/test_simple_ddl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2312,3 +2312,20 @@ def test_lines_starts_with_statement_keys():
"types": [],
}
assert expected == result


def test_schema_with_project_name():

ddl = """
CREATE SCHEMA IF NOT EXISTS `my.data-cdh-hub`
"""
result = DDLParser(ddl).run(group_by_type=True, output_mode="bigquery")
expected = {
"ddl_properties": [],
"domains": [],
"schemas": [{"project": "my", "schema_name": "data-cdh-hub"}],
"sequences": [],
"tables": [],
"types": [],
}
assert expected == result

0 comments on commit 4613952

Please sign in to comment.