-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmore_data_merging.py
44 lines (35 loc) · 1.32 KB
/
more_data_merging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
'''a script to merge more public data into the 'main' datasets as we go along
by @peteyreplies
'''
import csv
from collections import OrderedDict
#files and filders
new_file = '2011-school-universe-data.csv'
base_path = '../RESOURCES/geocen/'
f = open('../DATADUMP/GeoCen/merged_public_data.csv','w')
#first, load csvs into list of dicts
main_data = []
main_entities = csv.DictReader(open(base_path + 'alabama_main_data.csv', 'rU'))
for m in main_entities:
main_data.append(m)
new_data = []
new_data_entities = csv.DictReader(open(base_path + new_file, 'rU'))
for n in new_data_entities:
new_data.append(n)
#loop thru muckrock list
for d in main_data:
if d['agency_type'] == 'School District':
match = [y for y in new_data if y['LEAID'] == d['fedID']]
d['mls-ala'] = match[0]['LIBSPE']
d['total_librarians'] = int(match[0]['LIBSPE']) + int(match[0]['LIBSUP'])
else:
pass
f = open('../DATADUMP/GeoCen/merged_public_data.csv','a')
orderedEntity = OrderedDict(sorted(d.items()))
DW = csv.DictWriter(f,orderedEntity.keys())
if f.tell() == 0:
DW.writer.writerow(orderedEntity.keys())
DW.writer.writerow(orderedEntity.values())
else:
DW.writer.writerow(orderedEntity.values())
print 'written data for entity ' + d['agency_name'].encode('ascii')