-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
373 lines (334 loc) · 13.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
import requests
import json
import datetime
import os
import sys
import time
import readline
from bs4 import BeautifulSoup
# 选择要抓取的房间
def SelectRoom():
print("\n请输入序号以选择要抓取的房间:")
print("1.灌水\t2.动漫\t3.游戏\t4.欧美")
print("5.原神\t6.aph\t7.文野\t8.狂飙")
print("9.星穹\t10.日娱\t11.历史\t12.乘风破浪的姐姐")
number = input()
if number == '1':
print("\n已选择灌水专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 灌水主页的roomid
Guanshui_roomid = '551e51f8fbe78e6c02be1d89'
return Guanshui_roomid
elif number == '2':
# 灌水水区的subroomid
Guanshui_subroomid = '57a75577a508c960d3cb7b3a'
return Guanshui_subroomid
elif number == '2':
print("\n已选择动漫专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 动漫主页的roomid
Dongman_roomid = '551e521afbe78e6c02be1d8a'
return Dongman_roomid
elif number == '2':
# 动漫水区的subroomid
Dongman_subroomid = '59702a3bfa1eb084c2d27c40'
return Dongman_subroomid
elif number == '3':
print("\n已选择游戏专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 游戏主页的roomid
Youxi_roomid = '55a8ba62fbe78e0577201b2e'
return Youxi_roomid
elif number == '2':
# 游戏水区的subroomid
Youxi_subroomid = '57cd3638d92cc5794ae3c3dd'
return Youxi_subroomid
elif number == '4':
print("\n已选择欧美专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 欧美主页的roomid
Oumei_roomid = '552b5aecfbe78e6853e442af'
return Oumei_roomid
elif number == '2':
# 欧美水区的subroomid
Oumei_subroomid = '5693462afbe78e2538159b61'
return Oumei_subroomid
elif number == '5':
print("\n已选择原神专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 原神主页的roomid
Yuanshen_roomid = '606589c2dc32c2eb0ec99955'
return Yuanshen_roomid
elif number == '2':
# 原神水区的subroomid
Yuanshen_subroomid = '606589c20de2d061ee2f9805'
return Yuanshen_subroomid
elif number == '6':
print("\n已选择aph专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# aph主页的roomid
aph_roomid = '5528ec8afbe78e4938139f65'
return aph_roomid
elif number == '2':
# aph水区的subroomid
aph_subroomid = '59ef4da6e3d24c4791231b2d'
return aph_subroomid
elif number == '7':
print("\n已选择文野专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 文野主页的roomid
Wenyi_roomid = '56b9d32afbe78e2d8eaed6ff'
return Wenyi_roomid
elif number == '2':
# 文野水区的subroomid
Wenyi_subroomid = '56bc8bfefbe78e50c60ab267'
return Wenyi_subroomid
elif number == '8':
print("\n已选择狂飙专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 狂飙主页的roomid
Kuangbiao_roomid = '63ec477f5fbd2b4494193921'
return Kuangbiao_roomid
elif number == '2':
# 狂飙水区的subroomid
Kuangbiao_subroomid = '63ec477f0de2d061eede7b72'
return Kuangbiao_subroomid
elif number == '9':
print("\n已选择星穹专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 星穹主页的roomid
Xingqiong_roomid = '6454a97fde30c142ef7e21f7'
return Xingqiong_roomid
elif number == '2':
# 星穹水区的subroomid
Xingqiong_subroomid = '6454a97fde30c142ef7e221c'
return Xingqiong_subroomid
elif number == '10':
print("\n已选择日娱专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 日娱主页的roomid
Riyu_roomid = '56cd2ce0fbe78e12938f115a'
return Riyu_roomid
elif number == '2':
# 日娱水区的subroomid
Riyu_subroomid = '56da5a9efbe78e3333cb2137'
return Riyu_subroomid
elif number == '11':
print("\n已选择历史专区。")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 历史主页的roomid
Lishi_roomid = '5523e9dcfbe78e51742b25a2'
return Lishi_roomid
elif number == '2':
# 历史水区的subroomid
Lishi_subroomid = '56ed72b2fbe78e533373777d'
return Lishi_subroomid
elif number == '12':
print("\n已选择乘风破浪的姐姐专区")
print("1.主页\t2.水区:")
number = input()
if number == '1':
# 乘风破浪的姐姐主页的roomid
Chengfeng_roomid = '5ee98c199075f3395ef6f1c7'
return Chengfeng_roomid
elif number == '2':
# 乘风破浪的姐姐水区的subroomid
Chengfeng_subroomid = '5ee98c194ead61ceda497545'
return Chengfeng_subroomid
print("\n不存在的专区,程序中止。")
def Login():
print("\n请输入帐号绑定的手机号:")
phone = input()
print("\n请输入密码:")
password = input()
return 'http://www.mrpyq.com/api/account/login?phone=' + phone + '&password=' + password, phone, password
def getAccessToken(login_url, phone, password):
"""@return access_token"""
# 模拟登录获取会话
session = requests.Session()
headers = {
"Content-Type": "application/json",
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Host": "www.mrpyq.com",
"Origin": "http://web.mrpyq.com",
"Proxy-Connection": "keep-alive",
"Referer": "http://web.mrpyq.com/",
}
# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"
# 构造登录请求的表单数据
login_data = {
'phone': phone, # 替换为你的手机号码
'password': password # 替换为你的登录密码
}
# 发送登录请求
response = session.post(login_url, headers=headers, data=login_data)
# 检查登录是否成功
if response.status_code == 200:
# print(response.json())
token = response.json()["access_token"]
return token
else:
print('登录失败')
return None
def getKeywords():
keywords = []
# 用户输入关键词,以回车分隔,若监测到输入为空,则结束
print("\n注意:关键词不要太长,否则可能会出现抓取不到的情况。最多输入200个关键词。")
print("请输入要搜索关键词,以回车分隔。再按一次回车则结束关键词输入:")
while True:
# 为了防止中文输入删除不干净,导入readline模块
keyword = input()
if keyword == '':
break
else:
keywords.append(keyword)
if len(keywords) == 200:
break
return keywords
def getWatingTime():
print("\n请输入抓取间隔时间(秒), 至少20秒:")
# 为了保护服务器,硬性设置间隔至少20秒抓取一次,否则重新输入
while True:
t = input()
if t.isdigit() and int(t) >= 20:
return int(t)
else:
print("请重新输入抓取间隔时间(秒):")
def main():
# 登录
login_url, phone, password = Login()
access_token = getAccessToken(login_url, phone, password)
# 检查登录是否成功
while access_token is None:
print("\n登录失败,请重新登录。")
login_url, phone, password = Login()
access_token = getAccessToken(login_url, phone, password)
print("\n登录成功。")
# 获取关键词
keywords = getKeywords()
# 获取抓取间隔时间
waitingtime = getWatingTime()
# 获取专区
roomid = SelectRoom()
# print(access_token)
# 检查登录是否成功
if access_token:
t_a = 1
t_b = 1
while True:
# url格式 = 'http://www.mrpyq.com/api/feed/feeds_by_room?access_token={token_number}&page=1&t=1688783232199&roomid=55a8ba62fbe78e0577201b2e'
main = 'http://www.mrpyq.com/api/feed/feeds_by_room?'
# 抓取页面
page = 1
# 获取当前时间
current_time = datetime.datetime.now()
# 将时间格式化为所需的日期和时间格式
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
# 将格式化后的时间转换为时间戳
t = int(
time.mktime(time.strptime(formatted_time,
"%Y-%m-%d %H:%M:%S"))) * 1000
print("\n正在抓取中……请耐心等待……")
# 要修改roomid/subroomid
url = main + 'access_token=' + access_token + '&page=' + str(
page) + '&t=' + str(t) + '&subroomid=' + roomid
response = requests.get(url)
if response.status_code == 200:
data = response.json()
# print(data)
# Extract "name", "content", and "no" values
names = []
contents = []
nos = []
tamestamps = []
items = data.get('items', [])
# print(items)
for item in items:
name = item.get('user', {}).get('name')
content = item.get('content')
no = item.get('no')
# {'create': 1688793477838}
timestamp = item.get('time', {}).get('create')
names.append(name)
contents.append(content)
nos.append(no)
tamestamps.append(timestamp)
# Pair and output the values
for name, content, no, timestamp in zip(
names, contents, nos, tamestamps):
print("Time:", timestamp)
print("Name:", name + " " + str(no))
print("Content:", content)
print("---")
else:
print('\n请求失败!')
# 获取可执行文件所在的路径
# pyinstaller 之后的 exe 文件所在地址,如果直接运行py文件则将exe_dir替换成current_dir = os.path.dirname(os.path.abspath(__file__))
exe_dir = os.path.dirname(sys.executable)
# 构建文件路径
file_path = os.path.join(exe_dir, '所有帖子.txt')
# 打开文件并进行操作
with open(file_path, 'a', encoding='utf-8') as f: # 使用uft-8编码格式
# 逆序遍历
for name, content, no, timestamp in zip(
names[::-1], contents[::-1], nos[::-1],
tamestamps[::-1]):
# 检查时间戳
if timestamp > t_a:
t_a = timestamp
timestamp = timestamp / 1000 # 将毫秒转换为秒
timestamp = datetime.datetime.fromtimestamp(timestamp)
f.write("Time:" + str(timestamp) + '\n')
f.write("Name:" + name + " " + str(no) + '\n')
if content != None:
f.write("Content:" + content + '\n')
f.write("---" + '\n')
file_path = os.path.join(exe_dir, '关键词相关帖子.txt')
# 打开文件并进行操作
with open(file_path, 'a', encoding='utf-8') as f:
# 逆序遍历
for name, content, no, timestamp in zip(
names[::-1], contents[::-1], nos[::-1],
tamestamps[::-1]):
# 检查时间戳
if timestamp > t_b:
t_b = timestamp
timestamp = timestamp / 1000 # 将毫秒转换为秒
timestamp = datetime.datetime.fromtimestamp(timestamp)
# 如果name和content中包含keywords中的任意一个关键词,则写入txt文件
if any(keyword in name for keyword in keywords) or any(
keyword in content for keyword in keywords):
f.write("Time:" + str(timestamp) + '\n')
f.write("Name:" + name + " " + str(no) + '\n')
if content != None:
f.write("Content:" + content + '\n')
f.write("---" + '\n')
print("\n抓取完成!等待" + str(waitingtime) + "秒后继续抓取……")
print("按Ctrl+C可随时退出程序。")
time.sleep(waitingtime)
if __name__ == '__main__':
main()