-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexport_reponse_data.py
138 lines (119 loc) · 5.05 KB
/
export_reponse_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
'''a script that talks to the muckrock api and tries to get a lot of
communication data.
first 40 lines or so are ripped right from https://github.com/MuckRock/API-examples/blob/master/export_all_user_requests.py
as of 10/27/2015
rest by @peteyreplies'''
#!/usr/bin/env python2
# -- coding: utf-8 --
import utils
import urllib, os, json, datetime, requests, urlparse
from datetime import datetime, timedelta, date
import time
import csv
from collections import OrderedDict
#set muckrock params
api_url = utils.API_URL
token = utils.get_api_key()
headers = utils.get_headers(token)
username = raw_input('Username: ')
next_url = api_url + "foia/?user=" + username
current_page = 0
#erase prior csv if it exists
f = open('../DATADUMP/GeoCen/response_data.csv','w')
while next_url:
# we use next_url because the API results are paginated
r = requests.get(next_url, headers=headers)
data = r.json()
next_url = data['next']
# measures progress by page, not by result
current_page += 1
total_pages = (data['count'] / 20.0)
utils.display_progress(current_page, total_pages)
for result in data['results']:
#get the id of the first result
request_id = result['id']
print "Working on request " + str(request_id)
# get the first result
##get the API url of the result
request_url = api_url + 'foia/%d/' % request_id
print request_url
##get the data within the request & convert from json to dict
request = requests.get(request_url, headers=headers)
print request
request_data = request.json()
#grab submission time & timestamp
initial_submit_date = request_data['date_submitted'].encode('ascii')
initial_submit_stamp = time.mktime(time.strptime(initial_submit_date, '%Y-%m-%d'))
##create dict for this entity interaction & seed w/ initial data
##we will write this one later
thisEntity = {
'request_id':request_id,
'request_url':request_url,
'muckrock_status':request_data['status'].encode('ascii'),
'initial_submit_date':initial_submit_date,
}
# get agency second
agency_url = api_url + 'agency/%d/' % request_data['agency']
agency = requests.get(agency_url , headers=headers)
agency_data = agency.json()
tempAgency = {
'agency_name':agency_data['name'].encode('ascii'),
'agency_id':agency_data['id'],
'agency_jurisdiction':agency_data['jurisdiction'],
'agency_type':agency_data['types'][0].encode('ascii'),
'agency_address':agency_data['address'].encode('ascii'),
'agency_phone':str(agency_data['phone']).encode('ascii'),
'agency_zip':agency_data['address'].split()[-1].encode('ascii'),
}
thisEntity.update(tempAgency)
#fix school name if necessary
if tempAgency['agency_type'] == 'School District':
p = thisEntity['agency_name']
if p.split()[-1] == 'Schools':
thisEntity['agency_name'] = p[:-8]
# get communications third
communications = request_data['communications']
i = 0
any_response_received = False
for c in communications:
firstResponse = {
'first_response_date':'',
'first_responder_name':'',
'requests_until_first_response':'',
'days_until_first_respone':'',
}
if c['response'] == False:
i = i + 1
continue
else:
any_response_received = True
first_response_datestamp = time.mktime(time.strptime(c['date'].encode('ascii')[0:10], '%Y-%m-%d'))
delta = (datetime.fromtimestamp(first_response_datestamp).date() - datetime.fromtimestamp(initial_submit_stamp).date()).days
firstResponse = {
'first_response_date':c['date'].encode('ascii')[0:10],
'first_responder_name':c['from_who'].encode('ascii'),
'requests_until_first_response':i,
'days_until_first_response':delta,
}
break
thisEntity.update(firstResponse)
num_responses = 0
num_files = 0
for c in communications:
if c['response']:
num_responses = num_responses + 1
num_files = num_files + len(c['files'])
thisEntity['total_communications'] = len(communications)
thisEntity['any_response_received'] = any_response_received
thisEntity['total_num_responses'] = num_responses
thisEntity['total_files_received'] = num_files
#write dict to csv
f = open('../DATADUMP/GeoCen/response_data.csv','a')
orderedEntity = OrderedDict(sorted(thisEntity.items()))
DW = csv.DictWriter(f,orderedEntity.keys())
if f.tell() == 0:
DW.writer.writerow(orderedEntity.keys())
DW.writer.writerow(orderedEntity.values())
else:
DW.writer.writerow(orderedEntity.values())
print 'written data for entity ' + agency_data['name'].encode('ascii')