Randomization

UA randomization and delays unified across Plugins.
laramies · Dec 19, 2018 · 1006e05 · 1006e05
2 parents b1fabec + a4ccea6
commit 1006e05
Show file tree

Hide file tree

Showing 28 changed files with 615 additions and 388 deletions.
diff --git a/censysparser.py b/censysparser.py
@@ -1,12 +1,13 @@
 from bs4 import BeautifulSoup
 import re
 
+
 class parser:
-    
+
     def __init__(self, results):
         self.results = results
         self.ipaddresses = []
-        self.soup = BeautifulSoup(results.results,features="html.parser")
+        self.soup = BeautifulSoup(results.results, features="html.parser")
         self.hostnames = []
         self.urls = []
         self.numberofpages = 0
@@ -22,7 +23,7 @@ def search_hostnames(self):
 
     def search_ipaddresses(self):
         try:
-            ipaddresslist = self.soup.findAll('a','SearchResult__title-text')
+            ipaddresslist = self.soup.findAll('a', 'SearchResult__title-text')
             for ipaddressitem in ipaddresslist:
                 self.ipaddresses.append(ipaddressitem.text.strip())
             return self.ipaddresses
@@ -33,11 +34,8 @@ def search_numberofpages(self):
         try:
             items = self.soup.findAll(href=re.compile("page"))
             for item in items:
-                if (item.text !='next'):            #to filter out pagination
-                    self.numberofpages+=1
+                if (item.text != 'next'):  # to filter out pagination
+                    self.numberofpages += 1
             return self.numberofpages
         except Exception as e:
             print("Error occurred: " + str(e))
-
-
-
diff --git a/changelog.txt b/changelog.txt
@@ -1,56 +1,56 @@
-
-Changelog in 2.6:
------------------
-usage() improvement, CameronNemo.
-Added Yahoo and Baidu search engines. Thanks to Tatanus
-Added check for the existence of Requests library.
-Fixed email regex to provide cleaner results. Thanks to Peter McAlpine
-
-Changelog in 2.5:
------------------
-
-
-Changelog in 2.4:
-------------------
--Fixed Linkedin Parser
--Fixed 123people
--Added Dogpile Search engine (Marcus)
--PEP8 compliant (Mario)
--Fixed XML export (Marcus)
--Expanded TLD list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt (Marcus)
--DNS Bruteforce fixed (Tomas)
--Added Google Custom Search Support - Need API Key to use it.
-
-
-
-Changelog in 2.3:
---------------
--Fixed duplicates
-
-Changelog in 2.2:
-----------------
--Added Jigsaw (www.jigsaw.com)
--Added 123People (www.123people.com)
--Added limit to google searches as the maximum results we can obtain is 1000
--Removed SET, as service was discontinued by Google
--Fixed parser to remove wrong results like emails starting with @
-
-
-Changelog in 2.1:
-----------------
--DNS Bruteforcer
--DNS Reverse lookups
--DNS TDL Expansion
--SHODAN DB integration
--HTML report
--DNS server selection
-
-
-Changelog in 2.0:
-----------------
--Complete rewrite, more modular and easy to maintain
--New sources (Exalead, Google-Profiles, Bing-Api)
--Time delay between request, to prevent search engines from blocking our IP´s
--You can start the search from the results page that you want, hence you can *resume* a search
--Export to xml
--All search engines harvesting
+
+Changelog in 2.6:
+-----------------
+usage() improvement, CameronNemo.
+Added Yahoo and Baidu search engines. Thanks to Tatanus
+Added check for the existence of Requests library.
+Fixed email regex to provide cleaner results. Thanks to Peter McAlpine
+
+Changelog in 2.5:
+-----------------
+
+
+Changelog in 2.4:
+------------------
+-Fixed Linkedin Parser
+-Fixed 123people
+-Added Dogpile Search engine (Marcus)
+-PEP8 compliant (Mario)
+-Fixed XML export (Marcus)
+-Expanded TLD list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt (Marcus)
+-DNS Bruteforce fixed (Tomas)
+-Added Google Custom Search Support - Need API Key to use it.
+
+
+
+Changelog in 2.3:
+--------------
+-Fixed duplicates
+
+Changelog in 2.2:
+----------------
+-Added Jigsaw (www.jigsaw.com)
+-Added 123People (www.123people.com)
+-Added limit to google searches as the maximum results we can obtain is 1000
+-Removed SET, as service was discontinued by Google
+-Fixed parser to remove wrong results like emails starting with @
+
+
+Changelog in 2.1:
+----------------
+-DNS Bruteforcer
+-DNS Reverse lookups
+-DNS TDL Expansion
+-SHODAN DB integration
+-HTML report
+-DNS server selection
+
+
+Changelog in 2.0:
+----------------
+-Complete rewrite, more modular and easy to maintain
+-New sources (Exalead, Google-Profiles, Bing-Api)
+-Time delay between request, to prevent search engines from blocking our IP´s
+-You can start the search from the results page that you want, hence you can *resume* a search
+-Export to xml
+-All search engines harvesting
diff --git a/discovery/asksearch.py b/discovery/asksearch.py
@@ -1,6 +1,8 @@
 import myparser
 import re
 import requests
+import time
+from discovery.constants import *
 
 class search_ask:
 
@@ -10,18 +12,18 @@ def __init__(self, word, limit):
         self.totalresults = ""
         self.server = "www.ask.com"
         self.hostname = "www.ask.com"
-        self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
         self.quantity = "100"
         self.limit = int(limit)
         self.counter = 0
 
     def do_search(self):
         headers = {
-            'User-agent':self.userAgent
+            'User-agent': getUserAgent()
         }
         url = 'http://' + self.server + '/web?q=%40' + self.word \
               + "&pu=100&page=" + str(self.counter)
         h = requests.get(url=url, headers=headers)
+        time.sleep(getDelay())
         self.results = h.text
         self.totalresults += self.results
 

diff --git a/discovery/baidusearch.py b/discovery/baidusearch.py
@@ -1,6 +1,7 @@
 import myparser
 import time
 import requests
+from discovery.constants import *
 
 class search_baidu:
 
@@ -9,23 +10,22 @@ def __init__(self, word, limit):
         self.total_results = ""
         self.server = "www.baidu.com"
         self.hostname = "www.baidu.com"
-        self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
         self.limit = limit
         self.counter = 0
 
     def do_search(self):
         url = 'http://' + self.server + "/s?wd=%40" + self.word + "&pn=" + str(self.counter) + "&oq=" + self.word
         headers = {
             'Host': self.hostname,
-            'User-agent': self.userAgent
+            'User-agent': getUserAgent()
         }
         h = requests.get(url=url, headers=headers)
+        time.sleep(getDelay())
         self.total_results += h.text
 
     def process(self):
         while self.counter <= self.limit and self.counter <= 1000:
             self.do_search()
-            time.sleep(1)
             print("\tSearching " + str(self.counter) + " results...")
             self.counter += 10
 

diff --git a/discovery/bingsearch.py b/discovery/bingsearch.py
@@ -2,6 +2,7 @@
 import myparser
 import time
 import requests
+from discovery.constants import *
 
 class search_bing:
 
@@ -12,7 +13,6 @@ def __init__(self, word, limit, start):
         self.server = "www.bing.com"
         self.apiserver = "api.search.live.net"
         self.hostname = "www.bing.com"
-        self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
         self.quantity = "50"
         self.limit = int(limit)
         self.bingApi = ""
@@ -23,7 +23,7 @@ def do_search(self):
             'Host': self.hostname,
             'Cookie':'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
             'Accept-Language': 'en-us,en',
-            'User-agent': self.userAgent
+            'User-agent': getUserAgent()
         }
         h = requests.get(url=('http://'+self.server + "/search?q=%40" + self.word + "&count=50&first=" + str(self.counter)),headers=headers)
         self.results = h.text
@@ -34,7 +34,7 @@ def do_search_api(self):
                self.word + "&sources=web&web.count=40&web.offset=" + str(self.counter)
         headers = {
             'Host': self.apiserver,
-            'User-agent': self.userAgent
+            'User-agent': getUserAgent()
         }
         h = requests.get(url=url, headers=headers)
         self.results = h.text
@@ -45,7 +45,7 @@ def do_search_vhost(self):
             'Host': self.hostname,
             'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50',
             'Accept-Language': 'en-us,en',
-            'User-agent': self.userAgent
+            'User-agent': getUserAgent()
         }
         url = 'http://' + self.server + "/search?q=ip:" + self.word + "&go=&count=50&FORM=QBHL&qs=n&first=" + str(self.counter)
         h = requests.get(url=url, headers=headers)
@@ -72,10 +72,10 @@ def process(self, api):
         while (self.counter < self.limit):
             if api == "yes":
                 self.do_search_api()
-                time.sleep(0.3)
+                time.sleep(getDelay())
             else:
                 self.do_search()
-                time.sleep(1)
+                time.sleep(getDelay())
             self.counter += 50
             print("\tSearching " + str(self.counter) + " results...")
 

diff --git a/discovery/censys.py b/discovery/censys.py
@@ -1,6 +1,7 @@
-import random
 import requests
 import censysparser
+import time
+from discovery.constants import *
 
 class search_censys:
 
@@ -11,45 +12,32 @@ def __init__(self, word):
         self.results = ""
         self.total_results = ""
         self.server = "censys.io"
-        self.userAgent = ["(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
-          "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
-          ,("Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) " +
-          "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36"),
-          ("Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) " +
-          "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254"),
-          "Mozilla/5.0 (SMART-TV; X11; Linux armv7l) AppleWebKit/537.42 (KHTML, like Gecko) Chromium/25.0.1349.2 Chrome/25.0.1349.2 Safari/537.42"
-          ,"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.991"
-          ,"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 OPR/48.0.2685.52"
-          ,"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
-          ,"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
-          ,"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"]
-
+
     def do_search(self):
         try:
-            headers = {'user-agent': random.choice(self.userAgent),'Accept':'*/*','Referer':self.url}
+            headers = {'user-agent': getUserAgent(), 'Accept': '*/*', 'Referer': self.url}
             response = requests.get(self.url, headers=headers)
-            self.results = response.content
-            print ('-')
+            self.results = response.text
             self.total_results += self.results
-            print ('-')
         except Exception as e:
             print(e)
 
     def process(self):
-        self.url="https://" + self.server + "/ipv4/_search?q=" + str(self.word) + "&page=1"
+        self.url = "https://" + self.server + "/ipv4/_search?q=" + str(self.word) + "&page=1"
         self.do_search()
-        self.counter=2
+        self.counter = 2
         pages = censysparser.parser(self)
         totalpages = pages.search_numberofpages()
         while self.counter <= totalpages:
             try:
-                self.page =str(self.counter)
-                self.url="https://" + self.server + "/ipv4/_search?q=" + str(self.word) + "&page=" + str(self.page)                   
-                print("\tSearching Censys results page " + self.page + "...")
+                self.page = str(self.counter)
+                self.url = "https://" + self.server + "/ipv4/_search?q=" + str(self.word) + "&page=" + str(self.page)
+                print("\t - Searching Censys results page " + self.page + "...")
                 self.do_search()
+                time.sleep(getDelay())
             except Exception as e:
                 print("Error occurred: " + str(e))
-            self.counter+=1
+            self.counter += 1
 
     def get_hostnames(self):
         try:
@@ -64,4 +52,3 @@ def get_ipaddresses(self):
             return ips.search_ipaddresses()
         except Exception as e:
             print("Error occurred: " + str(e))
-