diff --git a/credsweeper/deep_scanner/deep_scanner.py b/credsweeper/deep_scanner/deep_scanner.py index e5db0bd76..1cb40883a 100644 --- a/credsweeper/deep_scanner/deep_scanner.py +++ b/credsweeper/deep_scanner/deep_scanner.py @@ -19,8 +19,10 @@ from .encoder_scanner import EncoderScanner from .gzip_scanner import GzipScanner from .html_scanner import HtmlScanner +from .jks_scanner import JksScanner from .lang_scanner import LangScanner from .pdf_scanner import PdfScanner +from .pkcs12_scanner import Pkcs12Scanner from .tar_scanner import TarScanner from .xml_scanner import XmlScanner from .zip_scanner import ZipScanner @@ -37,8 +39,10 @@ class DeepScanner( EncoderScanner, # GzipScanner, # HtmlScanner, # + JksScanner, # LangScanner, # PdfScanner, # + Pkcs12Scanner, # TarScanner, # XmlScanner, # ZipScanner @@ -79,6 +83,10 @@ def get_deep_scanners(data: bytes) -> List[Any]: deep_scanners.append(GzipScanner) elif Util.is_pdf(data): deep_scanners.append(PdfScanner) + elif Util.is_jks(data): + deep_scanners.append(JksScanner) + elif Util.is_asn1(data): + deep_scanners.append(Pkcs12Scanner) else: deep_scanners = [ByteScanner, EncoderScanner, HtmlScanner, XmlScanner, LangScanner] return deep_scanners diff --git a/credsweeper/deep_scanner/jks_scanner.py b/credsweeper/deep_scanner/jks_scanner.py new file mode 100644 index 000000000..9e3852893 --- /dev/null +++ b/credsweeper/deep_scanner/jks_scanner.py @@ -0,0 +1,38 @@ +import logging +from abc import ABC +from typing import List + +import jks + +from credsweeper.credentials import Candidate +from credsweeper.deep_scanner.abstract_scanner import AbstractScanner +from credsweeper.file_handler.data_content_provider import DataContentProvider + +logger = logging.getLogger(__name__) + + +class JksScanner(AbstractScanner, ABC): + """Implements jks scanning""" + + def data_scan( + self, # + data_provider: DataContentProvider, # + depth: int, # + recursive_limit_size: int) -> List[Candidate]: + """Tries to scan JKS to open with standard password""" + candidates = [] + for pw_probe in ["", "changeit", "changeme"]: + try: + keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True) + if keystore.private_keys or keystore.secret_keys: + candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path, + data_provider.file_type, + f"{data_provider.info}:'{pw_probe}' - has keys") + else: + candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path, + data_provider.file_type, + f"{data_provider.info}:'{pw_probe}' - default password") + candidates.append(candidate) + except Exception as jks_exc: + logger.debug(f"{data_provider.file_path}:{pw_probe}:{jks_exc}") + return candidates diff --git a/credsweeper/deep_scanner/pkcs12_scanner.py b/credsweeper/deep_scanner/pkcs12_scanner.py new file mode 100644 index 000000000..b4f94088c --- /dev/null +++ b/credsweeper/deep_scanner/pkcs12_scanner.py @@ -0,0 +1,44 @@ +import logging +from abc import ABC +from typing import List + +import cryptography.hazmat.primitives.serialization.pkcs12 + +from credsweeper.credentials import Candidate +from credsweeper.deep_scanner.abstract_scanner import AbstractScanner +from credsweeper.file_handler.data_content_provider import DataContentProvider + +logger = logging.getLogger(__name__) + + +class Pkcs12Scanner(AbstractScanner, ABC): + """Implements jks scanning""" + + def data_scan( + self, # + data_provider: DataContentProvider, # + depth: int, # + recursive_limit_size: int) -> List[Candidate]: + """Tries to scan JKS to open with standard password""" + candidates = [] + for pw_probe in [b"", b"changeit", b"changeme"]: + try: + (private_key, certificate, additional_certificates) \ + = cryptography.hazmat.primitives.serialization.pkcs12.load_key_and_certificates(data_provider.data, + pw_probe) + if private_key: + candidate = Candidate.get_dummy_candidate( + self.config, # + data_provider.file_path, # + data_provider.file_type, # + f"{data_provider.info}:'{pw_probe.decode()}' - has keys PKCS12") + else: + candidate = Candidate.get_dummy_candidate( + self.config, # + data_provider.file_path, # + data_provider.file_type, # + f"{data_provider.info}:'{pw_probe.decode()}' - default password PKCS12") + candidates.append(candidate) + except Exception as pkcs_exc: + logger.debug(f"{data_provider.file_path}:{pw_probe.decode()}:{pkcs_exc}") + return candidates diff --git a/credsweeper/utils/util.py b/credsweeper/utils/util.py index e512911b0..20d3fc760 100644 --- a/credsweeper/utils/util.py +++ b/credsweeper/utils/util.py @@ -3,6 +3,7 @@ import logging import math import os +import struct import tarfile from dataclasses import dataclass from pathlib import Path @@ -392,6 +393,38 @@ def is_pdf(data: bytes) -> bool: return True return False + @staticmethod + def is_jks(data: bytes) -> bool: + """According https://en.wikipedia.org/wiki/List_of_file_signatures - jks""" + if isinstance(data, bytes) and 4 <= len(data): + if data.startswith(b"\xFE\xED\xFE\xED"): + return True + return False + + @staticmethod + def is_asn1(data: bytes) -> bool: + """Only sequence type 0x30 and size correctness is checked""" + data_length = len(data) + if isinstance(data, bytes) and 4 <= data_length: + # sequence + if 0x30 == data[0]: + # https://www.oss.com/asn1/resources/asn1-made-simple/asn1-quick-reference/basic-encoding-rules.html#Lengths + length = data[1] + byte_len = (0x7F & length) + if 0x80 == length and data.endswith(b"\x00\x00"): + return True + elif 0x80 < length and byte_len < data_length: # additional check + len_bytes = data[2:2 + byte_len] + try: + long_size = struct.unpack(">h", len_bytes) + except struct.error: + long_size = (-1,) # yapf: disable + length = long_size[0] + else: + byte_len = 0 + return data_length == length + 2 + byte_len + return False + @staticmethod def is_elf(data: Union[bytes, bytearray]) -> bool: """According to https://en.wikipedia.org/wiki/Executable_and_Linkable_Format use only 5 bytes""" diff --git a/requirements.txt b/requirements.txt index cda32a15e..193d70eb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # Common requirements beautifulsoup4==4.12.2 +cryptography==41.0.4 GitPython==3.1.37 google-auth-oauthlib==1.1.0 humanfriendly==10.0 @@ -16,6 +17,7 @@ whatthepatch==1.0.5 pdfminer.six==20221105 password-strength==0.0.3.post2 python-dateutil==2.8.2 +pyjks==20.0.0 # ML requirements numpy==1.24.4 @@ -45,6 +47,8 @@ types-PyYAML types-requests types-oauthlib types-python-dateutil +types-pyjks types-regex types-humanfriendly yapf + diff --git a/setup.py b/setup.py index 0353ce5e6..10a7bf307 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ install_requires = [ "beautifulsoup4>=4.11.0", # the lowest version with XMLParsedAsHTMLWarning + "cryptography", # "GitPython", # "google_auth_oauthlib", # "humanfriendly", # @@ -24,6 +25,7 @@ "scikit-learn", # "onnxruntime", # "python-dateutil", # + "pyjks", # ] setuptools.setup( diff --git a/tests/__init__.py b/tests/__init__.py index 7687efa34..e31377623 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT: int = 114 +SAMPLES_FILES_COUNT: int = 117 # credentials count after scan SAMPLES_CRED_COUNT: int = 129 @@ -11,10 +11,10 @@ SAMPLES_POST_CRED_COUNT: int = 122 # with option --doc -SAMPLES_IN_DOC = 80 +SAMPLES_IN_DOC = 83 # archived credentials that are not found without --depth -SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 16 +SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 19 SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 16 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 3 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index fa3683c20..3f04f682b 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -557,6 +557,78 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Dummy candidate", + "severity": "info", + "line_data_list": [ + { + "line": "dummy line", + "line_num": 0, + "path": "tests/samples/changeit_crt.jks", + "info": "tests/samples/changeit_crt.jks:'changeit' - default password", + "value": null, + "value_start": -2, + "value_end": -2, + "variable": null, + "entropy_validation": { + "iterator": null, + "entropy": null, + "valid": null + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Dummy candidate", + "severity": "info", + "line_data_list": [ + { + "line": "dummy line", + "line_num": 0, + "path": "tests/samples/changeit_crt.pkcs12", + "info": "tests/samples/changeit_crt.pkcs12:'changeit' - default password PKCS12", + "value": null, + "value_start": -2, + "value_end": -2, + "variable": null, + "entropy_validation": { + "iterator": null, + "entropy": null, + "valid": null + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Dummy candidate", + "severity": "info", + "line_data_list": [ + { + "line": "dummy line", + "line_num": 0, + "path": "tests/samples/changeme_key.jks", + "info": "tests/samples/changeme_key.jks:'changeme' - has keys", + "value": null, + "value_start": -2, + "value_end": -2, + "variable": null, + "entropy_validation": { + "iterator": null, + "entropy": null, + "valid": null + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/doc.json b/tests/data/doc.json index 579d4c1e4..e618a5d98 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -437,6 +437,78 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Dummy candidate", + "severity": "info", + "line_data_list": [ + { + "line": "dummy line", + "line_num": 0, + "path": "tests/samples/changeit_crt.jks", + "info": "tests/samples/changeit_crt.jks:'changeit' - default password", + "value": null, + "value_start": -2, + "value_end": -2, + "variable": null, + "entropy_validation": { + "iterator": null, + "entropy": null, + "valid": null + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Dummy candidate", + "severity": "info", + "line_data_list": [ + { + "line": "dummy line", + "line_num": 0, + "path": "tests/samples/changeit_crt.pkcs12", + "info": "tests/samples/changeit_crt.pkcs12:'changeit' - default password PKCS12", + "value": null, + "value_start": -2, + "value_end": -2, + "variable": null, + "entropy_validation": { + "iterator": null, + "entropy": null, + "valid": null + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Dummy candidate", + "severity": "info", + "line_data_list": [ + { + "line": "dummy line", + "line_num": 0, + "path": "tests/samples/changeme_key.jks", + "info": "tests/samples/changeme_key.jks:'changeme' - has keys", + "value": null, + "value_start": -2, + "value_end": -2, + "variable": null, + "entropy_validation": { + "iterator": null, + "entropy": null, + "valid": null + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/samples/changeit_crt.jks b/tests/samples/changeit_crt.jks new file mode 100644 index 000000000..be4d66ed3 Binary files /dev/null and b/tests/samples/changeit_crt.jks differ diff --git a/tests/samples/changeit_crt.pkcs12 b/tests/samples/changeit_crt.pkcs12 new file mode 100644 index 000000000..9790e0d4e Binary files /dev/null and b/tests/samples/changeit_crt.pkcs12 differ diff --git a/tests/samples/changeme_key.jks b/tests/samples/changeme_key.jks new file mode 100644 index 000000000..ce3443325 Binary files /dev/null and b/tests/samples/changeme_key.jks differ diff --git a/tests/samples/dummy.jks b/tests/samples/dummy.jks index 16905b157..eb58f4761 100644 Binary files a/tests/samples/dummy.jks and b/tests/samples/dummy.jks differ diff --git a/tests/test_main.py b/tests/test_main.py index 6b3fb206d..f2e78ffb8 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -374,7 +374,7 @@ def test_find_by_ext_p(self) -> None: content_provider: FilesProvider = TextProvider([SAMPLES_PATH]) cred_sweeper = CredSweeper(find_by_ext=True) cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_POST_CRED_COUNT + 1, len(cred_sweeper.credential_manager.get_credentials())) + self.assertEqual(SAMPLES_POST_CRED_COUNT + 3, len(cred_sweeper.credential_manager.get_credentials())) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/tests/utils/test_util.py b/tests/utils/test_util.py index f31b94d2a..80e0c4f70 100644 --- a/tests/utils/test_util.py +++ b/tests/utils/test_util.py @@ -16,6 +16,50 @@ class TestUtils(unittest.TestCase): + def test_asn1_p(self): + based_data = """MIIG8gIBAzCCBpwGCSqGSIb3DQEHAaCCBo0EggaJMIIGhTCCBoEGCSqGSIb3DQEHBqCCBnIwggZu +AgEAMIIGZwYJKoZIhvcNAQcBMGYGCSqGSIb3DQEFDTBZMDgGCSqGSIb3DQEFDDArBBSQgogxffCn +YoDJV4hjhkUGIi5AawICJxACASAwDAYIKoZIhvcNAgkFADAdBglghkgBZQMEASoEEAyAGIiPmdMV +4D+JugQ3YF2AggXw6BEQVUIX/ZlHdXyi59XfNwGN9USUMZH8hMKZkhk8aqxpZu61uigs2jSJQLL4 +I8o50VoZVzHveeej24/GLJ8SV+xOS/GoVC55Q+UaKD7ynsQBiheEbihOthPapgqEHyfqd3QOLlcS +SqIDSgTgHVXT37JkS33+vyah/LWNszCXPzwK0nbGZLMUmL9dsFDuKyUEQ6+D/Orif/9Kb2QccqiQ +Lk0uLtAHT26TmjPEZvx5XQ0Ezyu0f4MHikc6B0HoSVACmBHgjajBPPqgBdmoqR6sTkh+0OA9iE8b +KHCCp+MBrZ+yBPD6/bgkhk8O392xtvaaMx3lTWN0R9sM9dv1RRuc34QCbHTI38gEdmxqOSo36rEG +9nu+hMRhZ+eddf55jT+fr0qSOfVbkUAqOQqbcn4/LXZY7r/DEIqn2dX8SaDXKbobZLvDfQpFP04b +xsXNUCcfmLahqUSy3LlqEqouTkt2M/UPeWcMij4pBWhWIjwXxMYvzm/G8A5+FQT6DlgGFAY4/YU3 +YG8OznCtkQEjJwi5CtpUAELvYCQHjOccuNNpWShw9Wn3EsmHRn62CFUB7jsiywYP9NXvvL/K2T7N +vvb7c0mJIp4/twazPYDOEAhkO5tZQbpNoXfF9iqEs/XRD4MVXyKeBsNnIIThrCnMZQTCT4pSZWz0 +zZd9SLSpbGcuEtC7dtbVQFio5ZGDM0rhopY/sYXmdOsaY5dPfJrKfayq3rESFkH82DIKC+snY2GV +qMOCWllPog/VdvwkD6TMQOAdE9fGuqGx9NYl/x05XS5TAVWKNX07+XtA3u6qN5PBgqAaWTpME5oU +vnARmPaaqys7V28JnEUo+hG/zxjV76repv/sXGvRSaD0lOJmGW8aNpu70iOn1T53BzNEG6cXYMRv +vWWKqFddalWzyKtmx4zHdOHAXZNcDc++k+ZhhVCczmxF0jd3xmJvipzHwutfEXC/A3R77N1qAr8C +I8mPHlo5WnuwyAVGCoZJ0qRHBZie7G97SFOANbkRKYRM6z3Tcbdj9UAH0CdhAHirR+vPQRxTYLKm +2qYjusDwK6+8PgBtN5u0SdrHRTFb/bSByNnLVVQ43P9NzZ9I8lXKfk9FHNV5OBusCLUWtWfiH0h6 +NP0Ju6fpw/8jD7iDxZtcmvILaBFBIcXoIuOZxU1jEwxsfAjIvTbbJEfNFayrwiv/kpf55JV6m7Se +FdIhvJKXtmCUe39qGiry3aKyn82uVdz/EBsvux/f2euM1VouooWXWO0s832KkXCIM+J/kQAV0Aaf +VU/ZUqEELw+RCk8l287EdAMhy69w253cHz0RKpxlh8SgAluvpgRWnEzJPeqZzh58/ryu2py8+Wxd +zsND7gqRK8YlEVtbV0ugMoeeXGyALm2LV7CcMWt04ptpg6W8FW/POHDjPK2Non8pOSs0e++BY1sL +tl8jBkXWT1IUb0LPRwo1OCNnOdX4PFRCh/nInihrdOSrmQQZ3Rcm6IMAChr1YcK02mnCvQPVQsXs +1jrUB+TD8axKD/mEcRzrqNaCYJ2e8aSio97FHQyYOtbNC9p8bqPOWxSP4VeIxmg9eJ3SHwTdcDG9 +LJxGJp3WvK16xDprZMg4riW5JbZ/66L1Yt6J7FnbCHD8T09e3ApRzzSI2YooaILju8IrLu9TvozA +gU8tVPHEQlbrcQqjStG3eKTiQdP/Dcc2JmKe5qK0a/zPqrU957QB6CgY+4+6n6ekYVSiN3jYCyby +2ow1ucAT4NGvWzziNMWKbhk+C8M6JXiYzzQ1xjz0RrGmIujjJn+iO6+Y+CiaD3SGtvyRxNUJIQP9 +8e2sL1CTsBDFz2VluAynNtyebzLqvXzeTo/xS/q94rICJfPderKT5qIrj3JUrqnGHwLG9FfOohIF +sXicQDEvAZd5VTPl8KYa+nqAjvnvtyyJ0h8QA2xnJWzTpYRKNPC75H39xDx14LO2MXFplB6xTBNw +6pMwFxJKvf/toAxWh2N0hJlROdfowJ55sqQaY8xQUQlKC4nTYAdmb3uOR99BTsHKu5kwTTAxMA0G +CWCGSAFlAwQCAQUABCAAzNyx82qxGkeCHyzgCY+uYzHKWSxAOYTh2wWtwtqwrgQUGW8PygmD3Yeu +C5z6Z1bgIfi2awICAicQ""" + data = base64.b64decode(based_data) + self.assertTrue(Util.is_asn1(data)) + + def test_asn1_n(self): + based_data = """MIIG8gIBAzCCBpwGCSqGSIb3DQEHAaCCBo0EggaJMIIGhTCCBoEGCSqGSIb3DQEHBqCCBnIwggZu +AgEAMIIGZwYJKoZIhvcNAQcBMGYGCSqGSIb3DQEFDTBZMDgGCSqGSIb3DQEFDDArBBSQgogxffCn +2ow1ucAT4NGvWzziNMWKbhk+C8M6JXiYzzQ1xjz0RrGmIujjJn+iO6+Y+CiaD3SGtvyRxNUJIQP9 +C5z6Z1bgIfi2awICAicQ""" + data = base64.b64decode(based_data) + self.assertFalse(Util.is_asn1(data)) + def test_get_extension_n(self): self.assertEqual("", Util.get_extension(None)) self.assertEqual("", Util.get_extension("/"))