-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfriends.py
374 lines (302 loc) · 15.6 KB
/
friends.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
"""
A simple example script to get all posts on a user's timeline and sort them according to date.
The first few functions are to allow easy acquisition of the access token, they contain a web server
and a TokenHandler class to accept the token from Facebook.
using_posts(1) for old posts, using_posts(2) for somewhat old and using_posts(3) for new posts
The second argument in using_posts controls filtering the posts by author, using any value
except 'me' will output posts from all users that appear in the timeline.
The third argument controls dating posts from the earliest comment instead of the created_time
of the post.
posts_in_range will get posts from between 2 dates, the first argument is the start date and
the second argument is the end date. Date format is 'yyyy-mm-dd'. Third argument is the same as
second argument in using_posts
comments_from_post gives you comments from a post based on the post ID which can be filtered
for certain attributes.
comment_counter takes array of post ids and counts number of comments in each.
comment_hist produces histogram of comments per post, can be used to get histograms
of using_posts outputs by using same arguments.
"""
import facebook
import requests
import BaseHTTPServer
import urllib2
from webbrowser import open_new
import numpy as np
import dateparser as dp
import datetime as dt
import matplotlib.pyplot as plt
# REDIRECT_URL = 'http://localhost:8080/'
# I've modified my HOSTS file to redirect this domain to localhost:8080
REDIRECT_URL = 'http://this.is.a.real.domain/'
PORT = 8080
def get_access_token_from_url(url):
"""
Parse the access token from Facebook's response
Args:
uri: the facebook graph api oauth URI containing valid client_id,
redirect_uri, client_secret, and auth_code arguements
Returns:
a string containing the access key
"""
token = requests.get(url).json()
return token['access_token']
class HTTPServerHandler(BaseHTTPServer.BaseHTTPRequestHandler,object):
"""
HTTP Server callbacks to handle Facebook OAuth redirects
"""
def __init__(self, request, address, server, a_id, a_secret):
self.app_id = a_id
self.app_secret = a_secret
super(HTTPServerHandler, self).__init__(request, address, server)
def do_GET(self):
GRAPH_API_AUTH_URI = ('https://graph.facebook.com/v2.6/oauth/'
+ 'access_token?client_id=' + self.app_id + '&redirect_uri='
+ REDIRECT_URL + '&client_secret=' + self.app_secret + '&code=')
# GRAPH_API_AUTH_URI
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
if 'code' in self.path:
self.auth_code = self.path.split('=')[1]
self.wfile.write(bytes('<html><h1>You may now close this window.'
+ '</h1></html>'))
self.server.access_token = get_access_token_from_url(
GRAPH_API_AUTH_URI + self.auth_code)
# Disable logging from the HTTP Server
def log_message(self, format, *args):
return
class TokenHandler:
"""
Functions used to handle Facebook oAuth
"""
def __init__(self, a_id, a_secret):
self._id = a_id
self._secret = a_secret
def get_access_token(self):
"""
Fetches the access key using an HTTP server to handle oAuth
requests
Args:
appId: The Facebook assigned App ID
appSecret: The Facebook assigned App Secret
"""
ACCESS_URI = ('https://www.facebook.com/dialog/'
+ 'oauth?client_id=' +self._id + '&redirect_uri='
+ REDIRECT_URL + "&scope=public_profile,user_friends,user_posts")
open_new(ACCESS_URI)
httpServer = BaseHTTPServer.HTTPServer(
('127.0.0.1', 8080),
lambda request, address, server: HTTPServerHandler(
request, address, server, self._id, self._secret))
httpServer.handle_request()
return httpServer.access_token
def some_action(friend):
""" Here you might want to do something with each post. E.g. grab the
post's message (post['message']) or the post's picture (post['picture']).
In this implementation we just print the post's created time.
"""
print(friend['first_name'])
# these parameters come from the facebook app
# the app secret should be stored elsewhere on a final build
app_id = '**input your app ID here**'
app_secret = '**input your app secret here**'
# start with the access token so that we can access the user's data
auth = TokenHandler(app_id,app_secret)
access_token = auth.get_access_token()
# get the name and id of the user
graph = facebook.GraphAPI(access_token)
name_id = graph.get_object('me')
my_name = name_id['name']
my_id = name_id['id']
# these are commands from the Graph API explorer
# facebook limits output of post queries to 100 results
likes_limit = '1000'
photos_likes = '?fields=photos.limit(100){likes.limit(' + likes_limit + ').order(reverse_chronological)}'
feed_from = '?fields=feed.limit(100){from,created_time,comments{user_likes.limit(' + likes_limit + '),from,created_time},type}'
posts_from = '?fields=posts.limit(100){from,created_time,comments{user_likes.limit(' + likes_limit + '),from,created_time},type}'
feed_from_dates = '?fields=feed.limit(100){from,created_time,comments{user_likes.limit(' + likes_limit + '),from,created_time},type}'
posts_from_dates = '?fields=posts.limit(100){from,created_time,comments{user_likes.limit(' + likes_limit + '),from,created_time},type}'
posts_reactions = '?fields=feed{reactions}'
user_liked_comments = '?fields=posts{comments{user_likes.limit(' + likes_limit + '),from}}'
# graph API syntax, breakdown of the GET request:
# 'https://graph.facebook.com/v2.6/' + my_id + ?fields=photos + '&access_token=' + access_token
# access graph and specify version + id of user/post/comment + query for certain connections/attributes + specify access token
def posts_in_range(since, until, whose_posts = 'me'):
# input the since and until dates in the form 'yyyy-mm-dd'
if whose_posts=='me':
# whose_posts variable set to return only user's own posts by default
posts = requests.get('https://graph.facebook.com/v2.6/' + my_id + \
'?fields=posts.limit(100).since(' + since + ').until(' + until + \
'){from,created_time,comments{user_likes.limit(' + likes_limit + \
'),from,created_time},type}' + '&access_token=' + access_token).json()
# specify 'posts' for user's own posts
f_or_p = 'posts'
else:
# alternatively it can return posts from the entire feed
# including those made by other users. Change the value
# of whose_posts to any value other than 'me' to do this
posts = requests.get('https://graph.facebook.com/v2.6/' + my_id + \
'?fields=feed.limit(100).since(' + since + ').until(' + until + \
'){from,created_time,comments{user_likes.limit(' + likes_limit + \
'),from,created_time},type}' + '&access_token=' + access_token).json()
# specify 'feed' to return all posts in the timeline
f_or_p = 'feed'
posts_out = np.array(['ID', 'Date','Type'])
# this while loop will keep paginating results
while True:
# except statement will catch the KeyError when there are no more pages
try:
# get each post, created date and id and sort them according to age
for post in posts[f_or_p]['data']:
posts_out = np.vstack((posts_out,np.array([post['id'], post['created_time'], post['type']])))
# Attempt to make a request to the next page of data, if it exists.
posts = requests.get(posts[f_or_p]['paging']['next']).json()
except KeyError:
# When there are no more pages (['paging']['next']), break from the
# loop and end the script.
break
return posts_out
def using_posts(input = 1, whose_posts = 'me', comment_or_created = 'created'):
# get the first page of posts
if whose_posts=='me':
# whose_posts variable set to return only user's own posts by default
posts = requests.get('https://graph.facebook.com/v2.6/' + my_id + posts_from + '&access_token=' + access_token).json()
# specify 'posts' for user's own posts
f_or_p = 'posts'
else:
# alternatively it can return posts from the entire feed
# including those made by other users. Change the value
# of whose_posts to any value other than 'me' to do this
posts = requests.get('https://graph.facebook.com/v2.6/' + my_id + feed_from + '&access_token=' + access_token).json()
# specify 'feed' to return all posts in the timeline
f_or_p = 'feed'
# initialize results arrays
output_old = np.array(['ID', 'Date','Type'])
output_med = np.array(['ID', 'Date','Type'])
output_new = np.array(['ID', 'Date','Type'])
# this while loop will keep paginating results
while True:
# except statement will catch the KeyError when there are no more pages
try:
# get each post, created date and id and sort them according to age
for post in posts[f_or_p]['data']:
# choose the sorting method, it will either use
# the created_time of the post or the most recent
# user-liked comment
if comment_or_created == 'created':
# using the post created_time
checkdate = dp.parse(post['created_time'])
else:
try:
# using the first liked comment, facebook automatically outputs
# comments in chronological order so this is also the most recent
# comment
liked = post['comments']['data'][0]['user_likes']
# initialize as the date of the first comment
checkdate = dp.parse(post['comments']['data'][0]['created_time'])
n = 0
# if the first comment is liked this loop is not entered, if the loop
# is entered then it breaks at the first user-liked comment
while not liked:
n += 1
# check next comment to see if it is liked
liked = post['comments']['data'][n]['user_likes']
checkdate = dp.parse(post['comments']['data'][n]['created_time'])
# when there are no more comments then an IndexError is thrown
except IndexError:
# if there are no liked comments use date of most recent comment
checkdate = dp.parse(post['comments']['data'][0]['created_time'])
# if there are no comments on the post then a KeyError is thrown
except KeyError:
# if there are no comments then use the created_time of the post
checkdate = dp.parse(post['created_time'])
# old posts
if checkdate < dp.parse('2 years ago'):
output_old = np.vstack((output_old,np.array([post['id'], post['created_time'], post['type']])))
# somewhat old
elif checkdate < dp.parse('1 year ago'):
output_med = np.vstack((output_old, np.array([post['id'], post['created_time'], post['type']])))
# recent
else:
output_new = np.vstack((output_old, np.array([post['id'], post['created_time'], post['type']])))
# Attempt to make a request to the next page of data, if it exists.
posts = requests.get(posts[f_or_p]['paging']['next']).json()
except KeyError:
# When there are no more pages (['paging']['next']), break from the
# loop and end the script.
break
if input == 1:
return output_old
elif input == 2:
return output_med
elif input == 3:
return output_new
else:
print("input either 1, 2 or 3")
def comments_from_post(id, filter_user_liked = False, date_cutoff = dt.timedelta(weeks = 300)):
"""
The only required variable is the Post ID (id), the rest are filters for the comments with
various default values.
filter_user_liked : False means no filter, True means it will only return comments that the
user has liked
date_cutoff: datetime.timedelta variable that is by default to 300 weeks, it is the allowed
time between the creation of the post and the comment. I.e. the maximum allowed time
difference between the post created_time and the comment created_time. Only comments made
within this time difference are returned.
"""
# get comments from the graph
comments = graph.get_connections(id, 'comments?fields=user_likes,message,created_time')
# get the created_time of the post
post_date = graph.get_connections(id, '?fields=created_time')
# initalize output
comments_out = [None]
# if date cutoff is of a string type parse it before use
if type(date_cutoff) == type('string'):
date_cutoff = dt.datetime.today - dp.parse(date_cutoff)
while True:
try:
for comment in comments['data']:
# filter by likes if required
if filter_user_liked and (not comment['user_likes']):
continue
# filter by date if requried
if (dp.parse(comment['created_time']) - dp.parse(post_date['created_time'])) < date_cutoff:
comments_out.append(comment['message'])
# access next page of comments if possible
comments = requests.get(comments['paging']['next'])
except KeyError:
# else break
break
return comments_out
def comment_counter(ids):
# takes array of post ids and counts number of comments in each
summary = [None]
for inp in ids:
# this is a try statement because if the post was not made by the current user
# they will often not have permission to access the comments data
try:
sum = requests.get('https://graph.facebook.com/v2.6/' + inp + '?fields=comments.summary(true)' + '&access_token=' + access_token ).json()
summary.append(sum['comments']['summary']['total_count'])
except Exception:
# if they dont have permission just skip the post
continue
return summary
def comment_hist(inpt = 1, from_who = 'me', com_or_created = 'created', show = True):
# this function takes the same arguments as using_posts
# and can be used to make histograms of the comments of that data
# if only a count of the total number of comments is required then
# the graph output can be suppressed by setting the last argument to false
posts = using_posts(inpt, from_who, com_or_created)
c = comment_counter(posts[ 1: , 0])
while True:
# extract None values from array
try:
c.remove(None)
except Exception:
break
if show:
plt.hist(c,bins = 7)
plt.xlabel('Number of Comments')
plt.ylabel('Frequency')
plt.show()
return np.sum(c)