diff --git a/credsweeper/utils/util.py b/credsweeper/utils/util.py
index f86ffa6c9..1e45fe3fc 100644
--- a/credsweeper/utils/util.py
+++ b/credsweeper/utils/util.py
@@ -481,14 +481,14 @@ def is_mxfile(data: Union[bytes, bytearray]) -> bool:
return False
# A well-formed XML must start from < or a whitespace character
- XML_FIRST_TAG_PATTERN = re.compile(rb"^\s*<([0-9A-Za-z_]{1,256})")
- XML_CLOSE_TAG_PATTERN = re.compile(rb"[0-9A-Za-z_]{1,256}>")
+ XML_FIRST_BRACKET_PATTERN = re.compile(rb"^\s*<")
+ XML_OPENING_TAG_PATTERN = re.compile(rb"<([0-9A-Za-z_]{1,256})")
@staticmethod
def is_xml(data: Union[bytes, bytearray]) -> bool:
"""Used to detect xml format from raw bytes"""
- if isinstance(data, (bytes, bytearray)):
- if first_bracket_match := Util.XML_FIRST_TAG_PATTERN.search(data, 0, MAX_LINE_LENGTH):
+ if isinstance(data, (bytes, bytearray)) and Util.XML_FIRST_BRACKET_PATTERN.search(data, 0, MAX_LINE_LENGTH):
+ if first_bracket_match := Util.XML_OPENING_TAG_PATTERN.search(data, 0, MAX_LINE_LENGTH):
start_pos = first_bracket_match.start()
closing_tag = b"" + first_bracket_match.group(1) + b">"
if start_pos < data.find(closing_tag, start_pos):
diff --git a/tests/utils/test_util.py b/tests/utils/test_util.py
index 4aa919622..a4af145f3 100644
--- a/tests/utils/test_util.py
+++ b/tests/utils/test_util.py
@@ -619,10 +619,13 @@ def test_subtext_p(self):
def test_is_xml_n(self):
self.assertFalse(Util.is_xml(b''))
- self.assertFalse(Util.is_xml(b'!<>'))
- self.assertFalse(Util.is_xml(b'
'))
- self.assertFalse(Util.is_xml(bytearray(b'\n' * MAX_LINE_LENGTH) + bytearray(b'
"))
+ self.assertFalse(Util.is_xml(b"
"))
+ self.assertFalse(Util.is_xml(bytearray(b'\n' * MAX_LINE_LENGTH) + bytearray(b"