-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcreate_indices.py
executable file
·142 lines (104 loc) · 3.81 KB
/
create_indices.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/python3
import psycopg2
from psycopg2 import extensions as ext
from psycopg2 import sql
import csv
import time
from datetime import date
from datetime import datetime
from project_conf import conf as proj_conf
from data_conf import data_conf
def pg_conn():
conn = psycopg2.connect(proj_conf.conn_str)
return conn
conn = pg_conn()
curs = conn.cursor()
column_names = [c['column_name'] for c in data_conf.multi_selectors]
def column_type(col):
template = """
select data_type from information_schema.columns
where table_name = '{}' and column_name = '{}'
"""
sqlstr = template.format(proj_conf.project_name, col)
curs.execute(sqlstr)
results = curs.fetchone()
if results:
results = results[0]
else:
results = None
return results
def create_foreign_key_table(col):
col_type = column_type(col)
if not col_type:
print("No col found for {}".format(col))
return None
print("CREATING TABLE, %s_tbl" % col)
template = "CREATE TABLE IF NOT EXISTS {}_tbl (id serial primary key, {} {} UNIQUE)"
sqlstr = template.format(col, col, col_type)
curs.execute(sqlstr)
conn.commit()
return True
def uniq_col_vals(col):
template = "SELECT DISTINCT({}) from {}"
sqlstr = template.format(col, proj_conf.project_name)
curs.execute(sqlstr)
uniq_vals = set([i[0] for i in curs.fetchall() ])
return list(uniq_vals)
def populate_table(col):
uniq_vals = uniq_col_vals(col)
table_name = "{}_tbl".format(col)
#special snowflake
#if table_name.startswith('reason_'):
# table_fmt = ext.quote_ident('{}_tbl'.format(col), curs)
# sqlstr = "INSERT INTO %s (SELECT * FROM tow_reason_tbl)" % table_fmt
# curs.execute(sqlstr)
for val in uniq_vals:
if not val and val not in (False, '', 0) : ##ignore empty, but allow False
continue
table_fmt = ext.quote_ident(table_name, curs)
col_fmt = ext.quote_ident(col, curs)
sqlstr = "INSERT INTO %s (%s) values (%%s)" % (table_fmt, col_fmt)
curs.execute(sqlstr, [val])
conn.commit()
return
def swap_col_to_fkeys(col):
#adds new column based on `col`,
#populate it with the foreign key of "old" column
#delete "old" column
#rename new column to old column's name
table_name = proj_conf.project_name
fkey_table = "{}_tbl".format(col)
template = "alter table {} add column IF NOT EXISTS {}_lolnew INTEGER ;"
sqlstr = template.format(proj_conf.project_name, col)
curs.execute(sqlstr)
template = "alter table {} add foreign key ({}_lolnew) references {} (id) ;"
sqlstr = template.format(proj_conf.project_name, col, fkey_table)
curs.execute(sqlstr)
template = "ALTER TABLE {} RENAME COLUMN {} TO {}_old ;"
sqlstr = template.format(proj_conf.project_name, col, col)
curs.execute(sqlstr)
template = "ALTER TABLE {} RENAME COLUMN {}_lolnew TO {} ;"
sqlstr = template.format(proj_conf.project_name, col, col)
curs.execute(sqlstr)
conn.commit()
sqlstr = """
UPDATE {}
SET {} = fk_tbl.id
FROM {} fk_tbl
WHERE fk_tbl.{} = {}.{}_old ;
""".format(proj_conf.project_name, col, fkey_table, col, proj_conf.project_name, col)
curs.execute(sqlstr)
curs.execute("ALTER TABLE {} DROP COLUMN IF EXISTS {}_old".format(proj_conf.project_name, col))
conn.commit()
def process_col(col_name):
print("Creating foreign key table: ", col_name)
success = create_foreign_key_table(col_name)
if not success:
print(":(")
return None
print("Populating foreign key table: ", col_name)
populate_table(col_name)
print("Swapping out columns: ", col_name, "{}_old".format(col_name))
swap_col_to_fkeys(col_name)
for col_name in column_names:
process_col(col_name)