-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
drop broken FakeUserAgents lib and switch to relying on https://www.u…
…seragents.me (closes #453)
- Loading branch information
Showing
5 changed files
with
104 additions
and
90 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,70 +1,58 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
# Cached list of recent user-agents grabbed from https://www.useragents.me | ||
# Update the fallback cache by running python hyphe_backend/lib/user_agents.py | ||
|
||
import os | ||
import sys | ||
import random | ||
from fake_useragent import UserAgent, FakeUserAgentError | ||
|
||
USER_AGENTS_CLIENT = None | ||
USER_AGENTS_LIST = [] | ||
|
||
# Initializing the UserAgent object if possible, otherwise the user_agets_list from the user_agents.txt file | ||
|
||
directory = os.path.dirname(__file__) | ||
path_to_file = os.path.join(directory,"user_agents.txt") | ||
|
||
# Instantiation of the UserAgent object | ||
try: | ||
USER_AGENTS_CLIENT = UserAgent(cache=False) | ||
except FakeUserAgentError as e: | ||
print "Error when trying to instantiate a user-agent with FakeUserAgent: %s.\nSwitching to local list" % e | ||
# Transcription of the local user_agents.txt into the USER_AGENTS_LIST | ||
with open(path_to_file) as f: | ||
USER_AGENTS_LIST = f.read().splitlines() | ||
|
||
def get_random_user_agent(): | ||
"""Returns a random user agent not including IE ones""" | ||
|
||
if USER_AGENTS_CLIENT is None: | ||
random_user_agent = random.choice(USER_AGENTS_LIST) | ||
else: | ||
random_user_agent = "MSIE " | ||
while "MSIE " in random_user_agent: | ||
random_user_agent = USER_AGENTS_CLIENT.random | ||
|
||
return random_user_agent | ||
|
||
def update_user_agents_list(): | ||
"""Updates the local user_agents.txt file containing 100 user agents""" | ||
|
||
directory = os.path.dirname(__file__) | ||
path_to_file = os.path.join(directory,"user_agents.txt") | ||
|
||
if USER_AGENTS_CLIENT is None: | ||
print "Error when trying to update the user-agents list with FakeUserAgent" | ||
sys.exit(1) | ||
|
||
# Generating a new list of 100 user agents | ||
|
||
new_user_agents_set = set() # Using a set avoids duplicates | ||
while len(new_user_agents_set) < 100: | ||
ua = USER_AGENTS_CLIENT.random | ||
if "MSIE " in ua: | ||
continue | ||
new_user_agents_set.add(ua) | ||
new_user_agents_list = sorted(new_user_agents_set) | ||
|
||
print "List of user agents successfully generated" | ||
|
||
# Storing the list into user_agents.txt | ||
import requests | ||
|
||
class UserAgentsList(object): | ||
|
||
def __init__(self, agents_list=[], cache_file=None, read_cache=True): | ||
self.list = agents_list | ||
self.cache = cache_file or os.path.join(os.path.dirname(__file__), "user_agents.txt") | ||
|
||
# Initiate with latest list of UserAgents or fallback with local list | ||
if not self.list: | ||
self.download_latest() | ||
if not self.list: | ||
self.read_cache() | ||
|
||
def download_latest(self): | ||
try: | ||
json_list = requests.get("https://www.useragents.me/api").json() | ||
self.list = [ | ||
ua["ua"] | ||
for ua in json_list.get("data") | ||
if not "Trident" in ua["ua"] or "MSIE " in ua["ua"] | ||
] | ||
except Exception as e: | ||
print "WARNING: could not download latest UserAgents list from https://www.useragents.me ; will use a local cached list: %s - %s" % (type(e), e) | ||
|
||
def read_cache(self): | ||
try: | ||
with open(self.cache) as f: | ||
self.list = f.read().splitlines() | ||
except Exception as e: | ||
print "ERROR: could not read cached list of user agents in file %s: %s - %s" % (self.cache, type(e), e) | ||
|
||
def write_cache(self): | ||
try: | ||
with open(self.cache, "w") as user_agents_file: | ||
for user_agent in self.list: | ||
print >> user_agents_file, user_agent | ||
except Exception as e: | ||
print "ERROR: could not write list of user agents in cache file %s: %s - %s" % (self.cache, type(e), e) | ||
|
||
def get_random(self): | ||
"""Returns a random user agent not including IE or Trident ones""" | ||
return random.choice(self.list) | ||
|
||
try: | ||
with open(path_to_file, "w") as user_agents_file: | ||
for user_agent in new_user_agents_list: | ||
print >> user_agents_file, user_agent | ||
except: | ||
print "Error writing in user_agents.txt" | ||
|
||
if __name__ == "__main__": | ||
update_user_agents_list() | ||
# Updates the local user_agents.txt backup file | ||
ua_list = UserAgentsList(read_cache=False) | ||
ua_list.write_cache() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,43 @@ | ||
Mozilla/5.0 (Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0 | ||
Mozilla/5.0 (Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 12.3; rv:91.0) Gecko/20100101 Firefox/91.0 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 12.3; rv:99.0) Gecko/20100101 Firefox/99.0 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 12_3_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 12_3_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/99.0.1150.36 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 12_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Safari/605.1.15 | ||
Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/99.0.1150.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0 | ||
Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0 | ||
Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0 | ||
Mozilla/5.0 (X11; Linux i686; rv:91.0) Gecko/20100101 Firefox/91.0 | ||
Mozilla/5.0 (X11; Linux i686; rv:99.0) Gecko/20100101 Firefox/99.0 | ||
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 | ||
Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:91.0) Gecko/20100101 Firefox/91.0 | ||
Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:99.0) Gecko/20100101 Firefox/99.0 | ||
Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0 | ||
Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 | ||
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0 | ||
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 | ||
Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.56 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.46 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Whale/3.19.166.16 Safari/537.36 | ||
Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76 | ||
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.46 | ||
Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0 | ||
Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0 | ||
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.192.400 QQBrowser/11.5.5250.400 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0 | ||
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.67 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0 | ||
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763 | ||
Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 | ||
Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.61 | ||
Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/110.0 | ||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.70 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters