-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDataConversion.py
234 lines (209 loc) · 9.05 KB
/
DataConversion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
'''
DataConversion.py
Version: 1.0
Author: Jonathan Hoyle
Last Updated: 11/15/2018
Description: Used to convert exported item jsons from scrapes into more usable dat formats
'''
# ---------------------- BEGIN IMPORTS ----------------------
import json, unicodedata, dataset
# ----------------------- END IMPORTS -----------------------
# ----------------------- BEGIN CLASS -----------------------
# region Base Conversion
# First Conversion class, handles turning the scraped json items into a comprehensive json database
class Conversion(object):
# ---------------- BEGIN SHORTCUT METHODS -------------------
# Method that does absolutely nothing, used to get rid of annoying unused prompts
def no_op(self, anything):
pass
# ----------------- END SHORTCUT METHODS --------------------
# --------------- BEGIN BASIC PARSING METHODS ---------------
# Separating different values by tildes
def tildeSeparate(self, sep):
sepStrings = []
st = sep.split('~')
for s in st:
sepStrings.append(s)
return sepStrings
# Separating different values by commas
def commaSeparate(self, sep):
sepStrings = []
st = sep.split(',')
for s in st:
sepStrings.append(s)
return sepStrings
# ---------------- END BASIC PARSING METHODS ----------------
# ---------------- BEGIN PROGRAM METHODS --------------------
def convert(self, fileName):
# Create the initial json object
jd = None
# Open and parse the json file
with open(fileName) as data_file:
jd = json.loads(data_file.read())
# Create an array to hold the properly formatted json data in
jda = {}
# For each entry in the original data
for j in jd:
# Normalize the name into an ASCII string, not unicode
try:
# name = unicodedata.normalize('NFKD', j['name']).encode('ascii', 'ignore')
name = j['name']
# Set the default entry type
dontPrint = jda.setdefault(name,
{'sku': '', 'url': '', 'price': '', 'description': '', 'image_urls': [],
'colors': [], 'image_paths': [], 'images': [], 'details': [], 'skus': [],
'goto': [], 'origin': '', 'sizes':[], 'color_data':{}})
# Add the information
try:
# Try replacing that weird character set
jda[name]['description'] = j['description'].replace(' ', '').strip()
except:
try:
# Otherwise just try to strip it
jda[name]['description'] = j['description'].strip()
except:
# Or just assign it
jda[name]['description'] = j['description']
try:
# Try stripping the price var
jda[name]['price'] = j['price'] if (j['price'] is not str) else j['price'].strip()
except:
# Otherwise just assign it
jda[name]['price'] = j['price']
jda[name]['image_urls'] = j['image_urls']
jda[name]['colors'] = j['colors']
jda[name]['image_paths'] = j['image_paths']
try:
jda[name]['color_data'] = j['color_data']
except:
self.no_op(None)
try:
jda[name]['origin'] = j['origin']
except:
self.no_op(None)
try:
jda[name]['images'] = j['images']
except:
self.no_op(None)
jda[name]['skus'] = j['skus']
try:
jda[name]['goto'] = j['goto']
except:
self.no_op(None)
jda[name]['sku'] = j['sku'].strip()
jda[name]['url'] = j['url']
jda[name]['details'] = j['details']
jda[name]['sizes'] = j['sizes']
except:
self.no_op(None)
# Return the properly formatted json file
return jda
# ----------------- END PROGRAM METHODS ---------------------
# endregion
# ------------------------ END CLASS ------------------------
# ----------------------- BEGIN CLASS -----------------------
# region SQL Conversion
# Conversion Class that takes a file and uses its data to add/update information in the appropriate database
class SQLConversion(object):
# Connect to the database of items
db = dataset.connect('sqlite:///C:\\Users\\Shipping\\Documents\\Integration Dev\\SquarespaceUploader\\save_data.db')
# Begin referencing the items table
table = db['items']
# Set up the basic json data variable
jd = None
# ---------------- BEGIN PROGRAM METHODS --------------------
# Converts the given file
def convert(self, file_name):
# Open the file
with open(file_name) as data_file:
# And save its converted data
jd = json.loads(data_file.read())
# Loop through the data
for j in jd:
# Get all of the data from the current json item
name = unicodedata.normalize('NFKD', j['name']).encode('ascii', 'ignore')
# Basic assignments
colors = j['colors']
skus = j['skus']
details = j['details']
origin = j['origin']
url = j['url']
image_urls = j['image_urls']
image_paths = j['image_paths']
goto = []
try:
goto = j['goto']
except:
pass
images = []
try:
images = j['images']
except:
pass
while len(images) < len(colors):
images.append('')
while len(goto) < len(colors):
goto.append('')
while len(image_urls) < len(colors):
image_urls.append('')
while len(image_paths) < len(colors):
image_paths.append('')
# Price and description are a bit more complex due to strips
description = ''
try:
# Try replacing that weird character set
description = j['description'].replace(' ', '').strip()
except:
try:
# Otherwise just try to strip it
description = j['description'].strip()
except:
# Or just assign it
description = j['description']
price = ''
try:
# Try stripping the price var
price = j['price'] if (j['price'] is not str) else j['price'].strip()
except:
# Otherwise just assign it
price = j['price']
# Convert the details list into a string so that it can be stored in the SQL DB
if type(details) is list:
details = "\n".join(details)
if type(description) is list:
description = " ".join(description)
assert type(details) is not list
# Create an extra var for the sizes that will be parsed
sizes = []
removes = []
# Go through the list of colors and parse out the sizes
for c in colors:
try:
if (
'1' in c or '2' in c or '3' in c or '4' in c or '5' in c or '6' in c or '7' in c or '8' in c or '9' in c or '0' in c) or len(
c) == 1 or len(c) == 2 or (
len(c) == 3 and (c != 'Sky' and c != 'Red' and c != 'Fig' and c != 'Rio')) or (
len(c) == 4 and 'XXX' in c) or 'Fit' in c:
sizes.append(c)
removes.append(c)
except:
pass
for r in removes:
colors.remove(r)
# Go through each color and size
for c in colors:
for s in sizes:
# Save the current index
ci = colors.index(c)
# And update/insert the current item's information into the table
try:
self.table.upsert(dict(sku=skus[ci] + str(s), name=name, origin=origin, description=description,
image_url=image_urls[ci], color=c, image=images[ci], price=price,
goto=goto[ci], url=url, details=details, size=s, image_oath=image_paths[ci]),
['sku'])
except:
pass
self.db.commit()
# ----------------- END PROGRAM METHODS ---------------------
# endregion
# ------------------------ END CLASS ------------------------