diff --git a/Tests/Parser/Client/fixtures/library.yml b/Tests/Parser/Client/fixtures/library.yml index 1665f00c39..eddfd83c9d 100644 --- a/Tests/Parser/Client/fixtures/library.yml +++ b/Tests/Parser/Client/fixtures/library.yml @@ -743,3 +743,9 @@ type: library name: sqlmap version: 1.8.10.1 +- + user_agent: vimeo.php 3.0.8; (http://developer.vimeo.com/api/docs) + client: + type: library + name: vimeo.php + version: 3.0.8 diff --git a/Tests/Parser/Client/fixtures/mobile_app.yml b/Tests/Parser/Client/fixtures/mobile_app.yml index 3bd5c9f13f..0cf83e2461 100644 --- a/Tests/Parser/Client/fixtures/mobile_app.yml +++ b/Tests/Parser/Client/fixtures/mobile_app.yml @@ -2351,3 +2351,21 @@ type: mobile app name: OpenVAS version: 9.0.3 +- + user_agent: appdb/1.4.4 (com.4sh2812.32u1982378; build:2875; iOS 18.3.0) Alamofire/3.5.0 + client: + type: mobile app + name: appdb + version: 1.4.4 +- + user_agent: Apache/2.2.2 (Fedora) (internal dummy connection) + client: + type: mobile app + name: Apache + version: 2.2.2 +- + user_agent: Apache/2.4.34 (Ubuntu) OpenSSL/1.1.1 (internal dummy connection) + client: + type: mobile app + name: Apache + version: 2.4.34 diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index de19d367d3..8e821f2eef 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -1705,12 +1705,12 @@ - user_agent: IDG/IT (http://spaziodati.eu/) bot: - name: IDG/IT - category: Search bot - url: https://spaziodati.eu/ + name: IDG + category: Crawler + url: https://www.spaziodati.eu/ producer: name: SpazioDati S.r.l. - url: https://spaziodati.eu/ + url: https://www.spaziodati.eu/ - user_agent: iisbot/1.0 (+http://www.iis.net/iisbot.html) bot: @@ -2618,9 +2618,9 @@ - user_agent: Mozilla/5.0 (compatible; SEOkicks-Robot; +http://www.seokicks.de/robot.html) bot: - name: SEOkicks-Robot + name: SEOkicks category: Crawler - url: http://www.seokicks.de/robot.html + url: https://www.seokicks.de/robot.html producer: name: SEOkicks url: https://www.seokicks.de/ @@ -4337,6 +4337,9 @@ name: SEOkicks category: Crawler url: https://www.seokicks.de/robot.html + producer: + name: SEOkicks + url: https://www.seokicks.de/ - user_agent: Mozilla/5.0 (compatible; Plukkie/1.6; http://www.botje.com/plukkie.htm) bot: @@ -6911,11 +6914,11 @@ - user_agent: IDG/EU (http://spaziodati.eu/) bot: - name: SpazioDati + name: IDG category: Crawler url: https://www.spaziodati.eu/ producer: - name: SpazioDati s.r.l. + name: SpazioDati S.r.l. url: https://www.spaziodati.eu/ - user_agent: GozleBot; http://gozle.com.tm @@ -8455,5 +8458,77 @@ category: Crawler url: https://www.semrush.com/bot/ producer: - name: Semrush Inc. - url: https://www.semrush.com/ + name: Semrush Inc. + url: https://www.semrush.com/ +- + user_agent: LightspeedSystemsCrawler Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US) + bot: + name: LightspeedSystemsCrawler + category: Crawler + url: https://www.lightspeedsystems.com/ + producer: + name: Lightspeed Systems, Inc. + url: https://www.lightspeedsystems.com/ +- + user_agent: Research JLU + bot: + name: Research JLU + category: Crawler + url: https://www.uni-giessen.de/en/research + producer: + name: Justus Liebig University Giessen + url: https://www.uni-giessen.de/en +- + user_agent: hgfAlphaXCrawl/1.0 (+https://www.fim.uni-passau.de/data-science/forschung/open-search) + bot: + name: AlphaXCrawl + category: Crawler + url: https://www.fim.uni-passau.de/en/data-science/research/open-search + producer: + name: University of Passau + url: https://www.uni-passau.de/en/ +- + user_agent: IDG/RU (http://spaziodati.eu/) + bot: + name: IDG + category: Crawler + url: https://www.spaziodati.eu/ + producer: + name: SpazioDati S.r.l. + url: https://www.spaziodati.eu/ +- + user_agent: IDG/UK (http://spaziodati.eu/) + bot: + name: IDG + category: Crawler + url: https://www.spaziodati.eu/ + producer: + name: SpazioDati S.r.l. + url: https://www.spaziodati.eu/ +- + user_agent: Chatwork LinkPreview v1 + bot: + name: Chatwork LinkPreview + category: Service Agent + url: https://go.chatwork.com/en/ + producer: + name: kubell Co., Ltd. + url: https://www.kubell.com/en/ +- + user_agent: WPMU DEV Broken Link Checker Local Engine + bot: + name: WPMU DEV + category: Crawler + url: 'https://wpmudev.com/docs/wpmu-dev-plugins/broken-link-checker/#broken-link-checker-user-agent' + producer: + name: Incsub, LLC. + url: https://incsub.com/ +- + user_agent: WPMU DEV Broken Link Checker Spider + bot: + name: WPMU DEV + category: Crawler + url: 'https://wpmudev.com/docs/wpmu-dev-plugins/broken-link-checker/#broken-link-checker-user-agent' + producer: + name: Incsub, LLC. + url: https://incsub.com/ diff --git a/regexes/bots.yml b/regexes/bots.yml index 0d662b138e..c487d3ab7d 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -1510,14 +1510,6 @@ name: 'SEO Engine' url: 'http://www.seoengine.com' -- regex: 'SEOkicks-Robot' - name: 'SEOkicks-Robot' - category: 'Crawler' - url: 'http://www.seokicks.de/robot.html' - producer: - name: 'SEOkicks' - url: 'https://www.seokicks.de/' - - regex: 'seoscanners\.net' name: 'Seoscanners.net' category: 'Crawler' @@ -2430,13 +2422,13 @@ name: 'Effyis Inc' url: 'https://boardreader.com/' -- regex: 'IDG/IT' - name: 'IDG/IT' - category: 'Search bot' - url: 'https://spaziodati.eu/' +- regex: 'IDG/(?:EU|IT|RU|UK)' + name: 'IDG' + category: 'Crawler' + url: 'https://www.spaziodati.eu/' producer: name: 'SpazioDati S.r.l.' - url: 'https://spaziodati.eu/' + url: 'https://www.spaziodati.eu/' - regex: 'Bytespider' name: 'Bytespider' @@ -2825,6 +2817,9 @@ name: 'SEOkicks' category: 'Crawler' url: 'https://www.seokicks.de/robot.html' + producer: + name: 'SEOkicks' + url: 'https://www.seokicks.de/' - regex: 'Plukkie/[\d.]+' name: 'Plukkie' @@ -4195,14 +4190,6 @@ name: 'Senuto Sp. z o.o.' url: 'https://www.senuto.com/' -- regex: 'spaziodati' - name: 'SpazioDati' - category: 'Crawler' - url: 'https://www.spaziodati.eu/' - producer: - name: 'SpazioDati s.r.l.' - url: 'https://www.spaziodati.eu/' - - regex: 'GozleBot' name: 'Gozle' category: 'Crawler' @@ -4933,6 +4920,46 @@ name: 'SiteSell Inc.' url: 'https://www.sitesell.com/' +- regex: 'LightspeedSystemsCrawler' + name: 'LightspeedSystemsCrawler' + category: 'Crawler' + url: 'https://www.lightspeedsystems.com/' + producer: + name: 'Lightspeed Systems, Inc.' + url: 'https://www.lightspeedsystems.com/' + +- regex: 'Research JLU' + name: 'Research JLU' + category: 'Crawler' + url: 'https://www.uni-giessen.de/en/research' + producer: + name: 'Justus Liebig University Giessen' + url: 'https://www.uni-giessen.de/en' + +- regex: '(?:hgf|OS)AlphaXCrawl' + name: 'AlphaXCrawl' + category: 'Crawler' + url: 'https://www.fim.uni-passau.de/en/data-science/research/open-search' + producer: + name: 'University of Passau' + url: 'https://www.uni-passau.de/en/' + +- regex: 'Chatwork LinkPreview' + name: 'Chatwork LinkPreview' + category: 'Service Agent' + url: 'https://go.chatwork.com/en/' + producer: + name: 'kubell Co., Ltd.' + url: 'https://www.kubell.com/en/' + +- regex: 'WPMU DEV' + name: 'WPMU DEV' + category: 'Crawler' + url: 'https://wpmudev.com/docs/wpmu-dev-plugins/broken-link-checker/#broken-link-checker-user-agent' + producer: + name: 'Incsub, LLC.' + url: 'https://incsub.com/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$' name: 'Generic Bot' diff --git a/regexes/client/libraries.yml b/regexes/client/libraries.yml index c4543e7bbf..df92d5c99f 100644 --- a/regexes/client/libraries.yml +++ b/regexes/client/libraries.yml @@ -659,3 +659,8 @@ name: 'sqlmap' version: '$1' url: 'https://sqlmap.org/' + +- regex: 'vimeo\.php(?: (\d+[.\d]+))?' + name: 'vimeo.php' + version: '$1' + url: 'https://github.com/vimeo/vimeo.php' diff --git a/regexes/client/mobile_apps.yml b/regexes/client/mobile_apps.yml index 12bf859933..9cc5f0d3f8 100644 --- a/regexes/client/mobile_apps.yml +++ b/regexes/client/mobile_apps.yml @@ -2651,6 +2651,16 @@ name: 'OpenVAS' version: '$1' +# appdb (https://appdb.to/) +- regex: 'appdb/([\d.]+)' + name: 'appdb' + version: '$1' + +# Apache (https://www.apache.org/) +- regex: 'Apache/([\d.]+)' + name: 'Apache' + version: '$1' + # Electron generic apps - regex: ' (?!(?:AppleWebKit|brave|Cypress|Franz|Mailspring|Notion|Basecamp|Evernote|catalyst|ramboxpro|BlueMail|BeakerBrowser|Dezor|TweakStyle|Colibri|Polypane|Singlebox|Skye|VibeMate|(?:d|LT|Glass|Sushi|Flash|OhHai)Browser|Sizzy))([a-z0-9]*)(?:-desktop|-electron-app)?/(\d+\.[\d.]+).*Electron/' name: '$1'