diff --git a/credsweeper/app.py b/credsweeper/app.py index 8050b1fd4..386374652 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -158,12 +158,13 @@ def _use_ml_validation(self) -> bool: logger.info("ML validation is disabled") return False if not self.credential_manager.candidates: - logger.info("Skipping ML validation due to no candidates found") + logger.info("Skip ML validation because no candidates were found") return False for i in self.credential_manager.candidates: if i.use_ml: + # any() or all() is not used to speedup return True - logger.info("Skipp ML validation due no candidates support it") + logger.info("Skip ML validation because no candidates support it") return False # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/tests/test_main.py b/tests/test_main.py index d94b76a02..ac0cbc445 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -366,59 +366,21 @@ def test_multiple_invocation_p(self) -> None: cred_sweeper = CredSweeper() self.assertFalse(cred_sweeper.is_ml_validator_inited) # found candidate is not ML validated - provider = TextContentProvider(SAMPLES_PATH / "small.pdf") + provider = StringContentProvider(["qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a6E"]) candidates = cred_sweeper.file_scan(provider) self.assertEqual(1, len(candidates)) - self.assertDictEqual( - - {"api_validation": "NOT_AVAILABLE", - "line_data_list": [{ - "entropy_validation": { - "entropy": 4.620007704961091, - "iterator": "BASE64_CHARS", - "valid": True}, - "info": "", - "line": "BT /F1 24 Tf 175 720 Td (qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P)Tj ET", - "line_num": 15, - "path": f"{SAMPLES_PATH}/small.pdf", - "value": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", - "value_end": 65, - "value_start": 25, - "variable": None}], - "ml_probability": None, - "ml_validation": "NOT_AVAILABLE", - "rule": "Azure Secret Value", - "severity": "high"} - , candidates[0].to_json()) + self.assertEqual("Azure Secret Value", candidates[0].rule_name) self.assertFalse(cred_sweeper.is_ml_validator_inited) cred_sweeper.credential_manager.set_credentials(candidates) cred_sweeper.post_processing() self.assertFalse(cred_sweeper.is_ml_validator_inited) # found candidate is ML validated - provider = TextContentProvider(SAMPLES_PATH / "nonce.hs") + + provider = StringContentProvider(['"nonce": "qPRjfoZWaBPH0KbXMCicm5v1VdG5Hj0DUFMHdSxPOiS"']) candidates = cred_sweeper.file_scan(provider) self.assertEqual(1, len(candidates)) - self.assertDictEqual({ - "api_validation": "NOT_AVAILABLE", - "line_data_list": [{ - "entropy_validation": { - "entropy": 4.9260374290200755, - "iterator": "BASE64_CHARS", - "valid": True}, - "info": "", - "line": " \"nonce\": \"qPRjfoZWaBPH0KbXMCicm5v1VdG5Hj0DUFMHdSxPOiA\"", - "line_num": 2, - "path": f"{SAMPLES_PATH}/nonce.hs", - "value": "qPRjfoZWaBPH0KbXMCicm5v1VdG5Hj0DUFMHdSxPOiA", - "value_end": 57, - "value_start": 14, - "variable": "nonce"}], - "ml_probability": None, - "ml_validation": "NOT_AVAILABLE", - "rule": "Nonce", - "severity": "medium"} - , candidates[0].to_json()) + self.assertEqual("Nonce", candidates[0].rule_name) self.assertFalse(cred_sweeper.is_ml_validator_inited) cred_sweeper.credential_manager.set_credentials(candidates) cred_sweeper.post_processing() @@ -427,29 +389,10 @@ def test_multiple_invocation_p(self) -> None: validator_id = id(cred_sweeper.ml_validator) # found candidate is ML validated also - provider = TextContentProvider(SAMPLES_PATH / "password.gradle") + provider = StringContentProvider(["password = Xdj@jcN834b"]) candidates = cred_sweeper.file_scan(provider) self.assertEqual(1, len(candidates)) - self.assertDictEqual({ - "api_validation": "NOT_AVAILABLE", - "line_data_list": [{ - "entropy_validation": { - "entropy": 2.120589933192232, - "iterator": "BASE64_CHARS", - "valid": False}, - "info": "", - "line": "password = \"cackle!\"", - "line_num": 1, - "path": f"{SAMPLES_PATH}/password.gradle", - "value": "cackle!", - "value_end": 19, - "value_start": 12, - "variable": "password"}], - "ml_probability": None, - "ml_validation": "NOT_AVAILABLE", - "rule": "Password", - "severity": "medium"} - , candidates[0].to_json()) + self.assertEqual("Password", candidates[0].rule_name) # the ml_validator still initialized self.assertTrue(cred_sweeper.is_ml_validator_inited) cred_sweeper.credential_manager.set_credentials(candidates)