forked from doitintl/bigquery-optimization-queries
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_sql_files.py
83 lines (72 loc) · 2.61 KB
/
generate_sql_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import argparse
import os.path
audit_log_directory = 'audit_log'
information_schema_directory = 'information_schema'
directories = [audit_log_directory, information_schema_directory]
def init_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
usage="%(prog)s [--location <dataset-location>] <project> <output-directory>",
description="Generate SQL files for specific projects and/or datasets into output directory."
)
parser.add_argument(
'project',
metavar='project',
type=str,
help='Project name'
)
parser.add_argument(
'--location',
metavar='location',
type=str,
help='Dataset location',
default='region-us'
)
parser.add_argument(
'--dataset',
metavar='dataset',
type=str,
help='Dataset name for audit logs',
default='doitintl-cmp-bq'
)
parser.add_argument(
'output',
metavar='output',
type=str,
help='Output location'
)
return parser
def main() -> None:
parser = init_argparse()
args = parser.parse_args()
project_name = args.project
region = args.location
dataset = args.dataset
output_directory = args.output
# If output directory doesn't exist create it
if not os.path.exists(output_directory):
os.mkdir(output_directory)
for current_directory in directories:
# Cycle over each file in the directories
for filename in os.scandir(current_directory):
# Only grab SQL files
if filename.is_file() and '.sql' in filename.path:
file = open(filename.path)
contents = file.read()
file.close()
# Perform a replace on the contents
contents = contents.replace('<project-name>', project_name)
contents = contents.replace('<dataset-region>', region)
contents = contents.replace('<dataset>', dataset)
# Check if output directory exists, if not create it
output_base_path = output_directory + '/' + current_directory
if not os.path.exists(output_directory):
os.mkdir(output_directory)
if not os.path.exists(output_base_path):
os.mkdir(output_base_path)
# Write the output file out
output_filename = output_base_path + '/' + filename.name
output_file = open(output_filename, "w")
output_file.write(contents)
output_file.close()
if __name__ == "__main__":
main()