diff --git a/credsweeper/filters/value_allowlist_check.py b/credsweeper/filters/value_allowlist_check.py index 0db5085a5..be30dedca 100644 --- a/credsweeper/filters/value_allowlist_check.py +++ b/credsweeper/filters/value_allowlist_check.py @@ -11,7 +11,7 @@ class ValueAllowlistCheck(Filter): """Check that patterns from the list is not present in the candidate value.""" ALLOWED = [ - r"ENC\(.*\)", r"ENC\[.*\]", r"\$\{.*\}", r"#\{.*\}", r"\{\{.+\}\}", r"([.a-z0-9]|->)+\(.*\)", r"\*\*\*\*\*" + r"ENC\(.*\)", r"ENC\[.*\]", r"\$\{.*\}", r"#\{.*\}", r"\{\{.+\}\}", r"([.a-z0-9]|->)+\(.*\)", r"\S{0,5}\*{5,}" ] ALLOWED_PATTERN = re.compile( # Util.get_regex_combine_or(ALLOWED), # diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 71b7c5b1c..342f0c664 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -5,6 +5,7 @@ - (?P[`'\"]?(?i:token|secret|key|키|암호|암호화|토큰)[`'\"]?)((\s)*[=:](\s)*)(?P[`'\"(])?(?P\S{4,})(?(quote)[)`'\"]) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - token @@ -26,6 +27,7 @@ - (?P[`'\"]?(?i:(?[`'\"(])?(?P\S{4,})(?(quote)[)`'\"]) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - pass @@ -43,9 +45,10 @@ severity: medium type: pattern values: - - (^|(?P(?i:\bip[\s/]+id[\s/]+pw[\s/:]*))|(?P://)|\s)(?P[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((?P\s*\()?\s*|(?(variable)[\s,/]+|\s*(?(url)[,]|[,/])\s*))[\w.-]{3,}[\s,/]+(?P(?(lpar)[^)\s/]{4,}|(?(url)[^\s/]{4,}|[^\s]{4,}))) + - (^|(?P(?i:\bip[\s/]+id[\s/]+pw[\s/:]*))|(?P://)|\s)(?P[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((?P\s*(\w+\s+)?\()?\s*|(?(variable)[\s,/]+|\s*(?(url)[,]|[,/]))\s*)[\w.-]{3,}[\s,/]+(?P(?(lpar)[^)\s]{4,}|[^\s/]{4,}))(?:\s|[^/]|$) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - "." @@ -59,6 +62,7 @@ - (?P--)?(?P(?i:user\s*)?(?i:id|login|account|root|admin|user|name|wifi|role|host|default|계정|아이디))\s*?(?(ddash)[ =]|[ :=])\s*?(?P\S+) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - pass @@ -79,6 +83,7 @@ filter_type: - ValueAllowlistCheck - ValueDictionaryKeywordCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - pw diff --git a/tests/__init__.py b/tests/__init__.py index 4a0d5fd44..103aa501b 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,7 +11,7 @@ SAMPLES_POST_CRED_COUNT: int = 293 # with option --doc -SAMPLES_IN_DOC = 426 +SAMPLES_IN_DOC = 422 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 16 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 514f03140..22696a82f 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -2341,13 +2341,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", diff --git a/tests/data/doc.json b/tests/data/doc.json index 5c28311c4..7e5727f9c 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -5155,13 +5155,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", @@ -5170,17 +5170,17 @@ } }, { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", - "value": "xxx", + "value": "master", "value_start": 9, - "value_end": 12, + "value_end": 15, "variable": "username", "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 0.0, + "iterator": "BASE64_CHARS", + "entropy": 2.584962500721156, "valid": false } } @@ -5194,13 +5194,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", @@ -8186,13 +8186,13 @@ "line_num": 14, "path": "tests/samples/doc_various", "info": "tests/samples/doc_various|RAW", - "value": "(master/IhqSb1Gg)", - "value_start": 17, - "value_end": 34, + "value": "IhqSb1Gg", + "value_start": 25, + "value_end": 33, "variable": null, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.606584859926771, + "entropy": 3.0, "valid": false } } @@ -8750,13 +8750,13 @@ "line_num": 48, "path": "tests/samples/doc_various", "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg,master", + "value": "IhqSb1Gg,master/IhqSb1Gg", "value_start": 19, - "value_end": 34, + "value_end": 43, "variable": null, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, + "entropy": 3.727255729857775, "valid": false } } @@ -8834,45 +8834,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "ID_PAIR_PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "ANY_user:xxxx ANY_pwd:IhqSb1Gg", - "line_num": 61, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg", - "value_start": 22, - "value_end": 30, - "variable": "ANY_pwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.0, - "valid": false - } - }, - { - "line": "ANY_user:xxxx ANY_pwd:IhqSb1Gg", - "line_num": 61, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "xxxx", - "value_start": 9, - "value_end": 13, - "variable": "user", - "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 0.0, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -8897,45 +8858,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "ID_PAIR_PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "Acount name:xxxx Initial Password:IhqSb1Gg", - "line_num": 62, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg", - "value_start": 34, - "value_end": 42, - "variable": "Password", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.0, - "valid": false - } - }, - { - "line": "Acount name:xxxx Initial Password:IhqSb1Gg", - "line_num": 62, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "xxxx", - "value_start": 12, - "value_end": 16, - "variable": "name", - "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 0.0, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -9581,45 +9503,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "ID_PAIR_PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "ID:gildong.hong@xxxx.net mailto:{1} pw:IhqSb1Gg", - "line_num": 106, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg", - "value_start": 39, - "value_end": 47, - "variable": "pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.0, - "valid": false - } - }, - { - "line": "ID:gildong.hong@xxxx.net mailto:{1} pw:IhqSb1Gg", - "line_num": 106, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "gildong.hong@xxxx.net", - "value_start": 3, - "value_end": 24, - "variable": "ID", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.8358066002709883, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -10793,30 +10676,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "mypw: KrAcMe12345,", - "line_num": 2, - "path": "tests/samples/passwd.groovy", - "info": "tests/samples/passwd.groovy|RAW", - "value": "KrAcMe12345,", - "value_start": 6, - "value_end": 18, - "variable": "pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.2862156256610597, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/ml_threshold_0.json b/tests/data/ml_threshold_0.json index 9a28ec61e..9d04198a4 100644 --- a/tests/data/ml_threshold_0.json +++ b/tests/data/ml_threshold_0.json @@ -2437,13 +2437,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", diff --git a/tests/data/output.json b/tests/data/output.json index d2e2f7d19..a537ea66d 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -2317,13 +2317,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", diff --git a/tests/filters/test_value_allowlist_check.py b/tests/filters/test_value_allowlist_check.py index 9c1374c27..9e50eecf2 100644 --- a/tests/filters/test_value_allowlist_check.py +++ b/tests/filters/test_value_allowlist_check.py @@ -11,9 +11,7 @@ def test_value_allowlist_check_p(self, file_path: pytest.fixture, success_line: line_data = get_line_data(file_path, line=success_line, pattern=LINE_VALUE_PATTERN) assert ValueAllowlistCheck().run(line_data, DUMMY_ANALYSIS_TARGET) is False - @pytest.mark.parametrize("line", [ - "ENC(Crackle123)", - ]) + @pytest.mark.parametrize("line", ["test*****", "ENC(Crackle123)"]) def test_value_allowlist_check_n(self, file_path: pytest.fixture, line: str) -> None: line_data = get_line_data(file_path, line=line, pattern=LINE_VALUE_PATTERN) assert ValueAllowlistCheck().run(line_data, DUMMY_ANALYSIS_TARGET) is True diff --git a/tests/samples/doc_id_pair_passwd_pair b/tests/samples/doc_id_pair_passwd_pair index 10989cef6..2a2b441ad 100644 --- a/tests/samples/doc_id_pair_passwd_pair +++ b/tests/samples/doc_id_pair_passwd_pair @@ -87,7 +87,7 @@ id/pw id:master pw:ihqSb1Gg user:master,pwd:ihqSb1Gg ID master PWD ihqSb1Gg username:master/pw:master -username:xxx pw:ihqSb1Gg +username:master pw:ihqSb1Gg PW:ihqSb1Gg ID:master 아이디:master 비밀번호:ihqSb1Gg ID:master,PW ihqSb1Gg diff --git a/tests/samples/doc_ip_id_password_triple b/tests/samples/doc_ip_id_password_triple index 30ed935d7..08cffcf2f 100644 --- a/tests/samples/doc_ip_id_password_triple +++ b/tests/samples/doc_ip_id_password_triple @@ -4,3 +4,8 @@ IP ID PW 192.168.0.1 master IhqSb1Gg IP/ID/PW 192.168.0.1/master/IhqSb1Gg + +# FALSE +

sirius 192.168.0.1/mnt/user/USBx/\\[Root_Test\\]UseCase/12345678/76125-733_FFB_2038_judgement_day6_238811_6524.apk

+http://192.168.0.1/master/IhqSb1Gg/api +http://192.168.0.1/master/branch diff --git a/tests/test_main.py b/tests/test_main.py index 89a05eaa1..6619cc1a8 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -823,7 +823,9 @@ def test_param_p(self) -> None: # internal parametrized tests to keep items = [(" STP_PASSWORD=qbgomdtpqch \\", "qbgomdtpqch")] for i in items: - content_provider: FilesProvider = TextProvider(["test.template", io.BytesIO(i[0].encode())]) + content_provider: FilesProvider = TextProvider([ + ("test.template", io.BytesIO(i[0].encode())), + ]) cred_sweeper = CredSweeper(ml_threshold=0) cred_sweeper.run(content_provider=content_provider) creds = cred_sweeper.credential_manager.get_credentials()