-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtwitter_analysis.py
33 lines (26 loc) · 998 Bytes
/
twitter_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
import re
from collections import Counter
def find_mentions(tweets):
tweets_df['mentions'] = tweets_df['text'].str.contains('[@]\w+', regex=True)
tweets_df = pd.read_csv('tweets.csv')
tweets_df.pop('Unnamed: 0')
# print(tweets_df.keys())
print(tweets_df.head(5))
# print(tweets_df['text'].head(5))
print('Frequent words')
print(Counter(" ".join(tweets_df["text"]).split()).most_common(100))
tweetsNSW_df = tweets_df[tweets_df['text'].str.contains("NSW")]
tweetsNSW_df = tweetsNSW_df.append(tweets_df[tweets_df['text'].str.contains("nsw")])
# print(len(tweetsNSW_df))
tweetsQLD_df = tweets_df[tweets_df['text'].str.contains("qld")]
tweetsQLD_df = tweetsQLD_df.append(tweets_df[tweets_df['text'].str.contains("QLD")])
# print(len(tweetsQLD_df))
print('===== LOCATION =====')
print('=== NSW ===')
print(tweetsNSW_df['author_location'])
print('\n===QLD===')
print('=== NSW ===')
print(tweetsQLD_df['author_location'])
find_mentions(tweets_df)
# print(tweets_df.mentions)