Skip to content

Commit

Permalink
[User reported bug fixes & dependancy updates]
Browse files Browse the repository at this point in the history
Min python version now 3.8
  • Loading branch information
josephkearney91 committed Nov 14, 2023
1 parent 45f1587 commit 6e0e211
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 85 deletions.
2 changes: 1 addition & 1 deletion scrapeops_scrapy/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.3"
__version__ = "0.5.4"
147 changes: 74 additions & 73 deletions scrapeops_scrapy/core/error_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,79 +134,63 @@ def emit(self, record):
try:

if(record.levelname == "ERROR" or record.levelname == "WARNING" or record.levelname == "CRITICAL"):

errorMessage = record.message
fileAndLine = record.pathname + ', line: ' + str(record.lineno)
dateTime = record.asctime
type = record.levelname
engine = record.name


#covering warnings/probableCause/traceback missing
traceback = 'No traceback available'
probableCause = ''

if record.exc_text is not None:
traceback = record.exc_text
splitTraceback = traceback.split('\n')
probableCause = splitTraceback[len(splitTraceback) - 1]


#covering retrys
if("Gave up retrying <" in record.message):

for retryError in self.retryErrors:
if(retryError in record.message):
method = record.message.split('<')[1].split(' ')[0]
errorMessage = "Error: Gave up retrying " + method + " request - " + retryError
fileAndLine = ''
probableCause = retryError
break

# Deprecation Warnings
if "ScrapyDeprecationWarning:" in record.message and record.message[0] == "/":
splitString = record.message.split("ScrapyDeprecationWarning:")
errorMessage = "ScrapyDeprecationWarning: " + splitString[1]
probableCause = splitString[0]


# "Some Other Error Occurred"
if "Some other error occurred: " in record.message:
splitError = record.message.split(' /')
cleanError = splitError[0].split(">: ")[1]
errorMessage = "Some other error occurred: " + cleanError
probableCause = cleanError
traceback = record.message


# Convert Urls To Domains in Error Messages
urls = re.findall(r'(https?://[^\s]+)', errorMessage)
for url in urls:
domain = DomainNormalizer.get_domain(url)
errorMessage = errorMessage.replace(url, domain)


if errorMessage in self.log_dict:
self.log_dict[errorMessage]['count'] = self.log_dict[errorMessage]['count'] + 1
else:
self.log_dict[errorMessage] = {
'type': type,
'engine': engine,
'name': errorMessage,
'count': 1,
'traceback': traceback,
'message' : probableCause,
'filepath': fileAndLine,
'dateTime': dateTime
}

if(SOPSRequest.HIGH_FREQ_ACC == True):

if(errorMessage in self.log_dict_cumulative):
self.log_dict_cumulative[errorMessage]['count'] = self.log_dict_cumulative[errorMessage]['count'] + 1

if hasattr(record, 'message'):
errorMessage = record.message
fileAndLine = record.pathname + ', line: ' + str(record.lineno)
dateTime = record.asctime
type = record.levelname
engine = record.name


#covering warnings/probableCause/traceback missing
traceback = 'No traceback available'
probableCause = ''

if record.exc_text is not None:
traceback = record.exc_text
splitTraceback = traceback.split('\n')
probableCause = splitTraceback[len(splitTraceback) - 1]


#covering retrys
if("Gave up retrying <" in record.message):

for retryError in self.retryErrors:
if(retryError in record.message):
method = record.message.split('<')[1].split(' ')[0]
errorMessage = "Error: Gave up retrying " + method + " request - " + retryError
fileAndLine = ''
probableCause = retryError
break

# Deprecation Warnings
if "ScrapyDeprecationWarning:" in record.message and record.message[0] == "/":
splitString = record.message.split("ScrapyDeprecationWarning:")
errorMessage = "ScrapyDeprecationWarning: " + splitString[1]
probableCause = splitString[0]


# "Some Other Error Occurred"
if "Some other error occurred: " in record.message:
splitError = record.message.split(' /')
cleanError = splitError[0].split(">: ")[1]
errorMessage = "Some other error occurred: " + cleanError
probableCause = cleanError
traceback = record.message


# Convert Urls To Domains in Error Messages
urls = re.findall(r'(https?://[^\s]+)', errorMessage)
for url in urls:
domain = DomainNormalizer.get_domain(url)
errorMessage = errorMessage.replace(url, domain)


if errorMessage in self.log_dict:
self.log_dict[errorMessage]['count'] = self.log_dict[errorMessage]['count'] + 1
else:

self.log_dict_cumulative[errorMessage] = {
self.log_dict[errorMessage] = {
'type': type,
'engine': engine,
'name': errorMessage,
Expand All @@ -215,7 +199,24 @@ def emit(self, record):
'message' : probableCause,
'filepath': fileAndLine,
'dateTime': dateTime
}
}

if(SOPSRequest.HIGH_FREQ_ACC == True):

if(errorMessage in self.log_dict_cumulative):
self.log_dict_cumulative[errorMessage]['count'] = self.log_dict_cumulative[errorMessage]['count'] + 1
else:

self.log_dict_cumulative[errorMessage] = {
'type': type,
'engine': engine,
'name': errorMessage,
'count': 1,
'traceback': traceback,
'message' : probableCause,
'filepath': fileAndLine,
'dateTime': dateTime
}

except Exception as e:
logging.info('Error: Error in error logger')
Expand Down
4 changes: 2 additions & 2 deletions scrapeops_scrapy/validators/response_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def string_check(text, text_check, comparison, text_slice=None):
@staticmethod
def string_slice(text, text_slice):
if text_slice.get('active'):
if text_slice.get('slice_type') == 'first':
if (text_slice.get('slice_type') == 'first') and (len(text) > 0):
return text[:text_slice.get('slice_upper_threshold', len(text))]
if text_slice.get('slice_type') == 'last':
if (text_slice.get('slice_type') == 'last') and (len(text) > 0):
return text[-text_slice.get('slice_lower_threshold', 0)]
if text_slice.get('slice_type') == 'range':
return text[text_slice.get('slice_lower_threshold', 0):text_slice.get('slice_upper_threshold', len(text))]
Expand Down
17 changes: 8 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages


VERSION = '0.5.3'
VERSION = '0.5.4'
DESCRIPTION = 'Scrapeops Scrapy SDK, is a monitoring tool for your Scrapy spiders.'

setup(name='scrapeops_scrapy',
Expand All @@ -14,23 +14,22 @@
url="https://github.com/ScrapeOps/scrapeops-scrapy-sdk",
packages=find_packages(),
install_requires=[
"tld>=0.12.4",
"requests>=2.24.0",
"json5>=0.9.5",
"urllib3>=1.25.10",
"itemadapter>=0.4.0",
"tld>=0.13",
"requests>=2.31.0",
"json5>=0.9.13",
"urllib3>=2.1",
"itemadapter>=0.8.0",
],
classifiers=[
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
],
python_requires=">=3.6",
python_requires=">=3.8",
)

0 comments on commit 6e0e211

Please sign in to comment.