From f5b12b75a87c818368c0c4a79a393a2fb2ef57ef Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Fri, 27 Dec 2024 13:51:52 +0200 Subject: [PATCH] Salesforce Credentials (#647) * salesforce * benchmark * skip json check for last symbol 0x0A * style * rollbackBM --- .ci/benchmark.txt | 17 +++++------ .github/workflows/check.yml | 2 +- credsweeper/common/constants.py | 3 -- credsweeper/config/config.py | 3 +- credsweeper/filters/group/group.py | 4 +-- credsweeper/filters/group/token_pattern.py | 7 ++++- .../filters/group/url_credentials_group.py | 2 +- credsweeper/filters/value_pattern_check.py | 8 ++---- credsweeper/rules/config.yaml | 17 +++++++++++ tests/__init__.py | 6 ++-- tests/data/depth_3.json | 28 ++++++++++++++++++- tests/data/doc.json | 28 ++++++++++++++++++- tests/data/ml_threshold.json | 28 ++++++++++++++++++- tests/data/output.json | 28 ++++++++++++++++++- tests/filters/test_value_pattern_check.py | 16 +++++------ tests/samples/salesfoce | 2 ++ 16 files changed, 160 insertions(+), 39 deletions(-) create mode 100644 tests/samples/salesfoce diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index e01dc52ee..edc4fc91a 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,6 +1,6 @@ -META MD5 b33b22ce3adc2141bcf91e4cdd6f1cab -DATA MD5 9ac09dae7d8873d53e1fbf18da2d71c4 -DATA: 16329853 interested lines. MARKUP: 59549 items +META MD5 984f912263c0c337a1672296aa759cbc +DATA MD5 6db3f0cb94aad9db85077fb00a1ae6bf +DATA: 16329853 interested lines. MARKUP: 59550 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 193 28288 69 415 90 @@ -86,7 +86,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .jenkinsfile 1 58 2 6 .jinja2 1 64 2 .js 653 532652 512 2450 331 -.json 843 13045846 1074 10011 139 +.json 843 13045846 1076 10012 139 .jsp 13 3202 1 37 .jsx 7 857 19 .jwt 1 1 2 @@ -222,8 +222,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36057 522 910 376 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10003 16329853 11856 46611 5084 -credsweeper result_cnt : 11623, lost_cnt : 0, true_cnt : 11391, false_cnt : 232 +TOTAL: 10003 16329853 11858 46612 5084 +credsweeper result_cnt : 11626, lost_cnt : 0, true_cnt : 11393, false_cnt : 233 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- -------- API 130 3166 188 126 125 1 3353 5 0.000298 0.038462 0.998278 0.992063 0.961538 0.976562 @@ -236,7 +236,7 @@ Azure Access Token 19 0 0 1 BASE64 Private Key 12 4 0 12 12 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 Bitbucket Client ID 19 53 0 75 19 53 0 0 1.000000 0.000000 0.263889 0.263889 1.000000 0.417582 -Bitbucket Client Secret 28 66 1 98 28 67 0 0 1.000000 0.000000 0.294737 0.294737 1.000000 0.455285 +Bitbucket Client Secret 28 67 1 99 28 68 0 0 1.000000 0.000000 0.291667 0.291667 1.000000 0.451613 CMD ConvertTo-SecureString 13 4 0 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 CMD Password 21 128 6 20 20 0 134 1 0.000000 0.047619 0.993548 1.000000 0.952381 0.975610 CMD Secret 1 1 0 1 1 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 @@ -262,6 +262,7 @@ Nonce 93 49 0 9 Other 9 7447 5 0 0 7452 9 0.000000 1.000000 0.998794 0.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 Password 1869 7536 2680 1795 1782 13 10203 87 0.001273 0.046549 0.991725 0.992758 0.953451 0.972707 +Salesforce Credentials 2 0 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Salt 47 76 1 45 45 0 77 2 0.000000 0.042553 0.983871 1.000000 0.957447 0.978261 Secret 1297 1576 802 1292 1288 4 2374 9 0.001682 0.006939 0.996463 0.996904 0.993061 0.994979 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 @@ -272,4 +273,4 @@ Token 644 4170 454 61 Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 URL Credentials 210 157 215 209 208 1 371 2 0.002688 0.009524 0.994845 0.995215 0.990476 0.992840 UUID 1075 265 0 1074 1073 1 264 2 0.003774 0.001860 0.997761 0.999069 0.998140 0.998604 - 11856 46611 5084 11636 11391 232 46379 465 0.004977 0.039221 0.988079 0.980040 0.960779 0.970314 + 11858 46612 5084 11639 11393 233 46379 465 0.004999 0.039214 0.988062 0.979959 0.960786 0.970278 diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index ba345ce8c..1eb5250fa 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -49,7 +49,7 @@ jobs: if: ${{ always() && steps.code_checkout.conclusion == 'success' }} run: | n=0 - for f in $(find . -type f -not -wholename '*/.*' -a -not -wholename '*/tests/samples/*' -a -not -wholename '*/corpus/*'); do + for f in $(find . -type f -not -wholename '*/.*' -a -not -wholename '*/tests/samples/*' -a -not -wholename '*/corpus/*' -a -not -wholename '*.json'); do n=$(( 1 + ${n} )) filetype=$(file ${f}) if echo "${filetype}" | grep -q '.*text.*'; then diff --git a/credsweeper/common/constants.py b/credsweeper/common/constants.py index cb8bbd2e2..c2f39b2bb 100644 --- a/credsweeper/common/constants.py +++ b/credsweeper/common/constants.py @@ -170,9 +170,6 @@ class DiffRowType(Enum): # default value for config and ValuePatternCheck DEFAULT_PATTERN_LEN = 4 -# default value for config and ValuePemPatternCheck -DEFAULT_PEM_PATTERN_LEN = 5 - # PEM x509 patterns PEM_BEGIN_PATTERN = "-----BEGIN" PEM_END_PATTERN = "-----END" diff --git a/credsweeper/config/config.py b/credsweeper/config/config.py index f2768d11d..371fe7b1b 100644 --- a/credsweeper/config/config.py +++ b/credsweeper/config/config.py @@ -3,7 +3,7 @@ from humanfriendly import parse_size -from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN, DEFAULT_PEM_PATTERN_LEN +from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN from credsweeper.utils import Util @@ -46,4 +46,3 @@ def __init__(self, config: Dict[str, Any]) -> None: self.exclude_values = set(line.strip() for line in self.exclude_values) self.pattern_len = config.get("pattern_len", DEFAULT_PATTERN_LEN) - self.pem_pattern_len = config.get("pem_pattern_len", DEFAULT_PEM_PATTERN_LEN) diff --git a/credsweeper/filters/group/group.py b/credsweeper/filters/group/group.py index 42094d054..5229b781c 100644 --- a/credsweeper/filters/group/group.py +++ b/credsweeper/filters/group/group.py @@ -48,7 +48,7 @@ def get_keyword_base_filters(config: Config) -> List[Filter]: ValueTokenCheck(), ] if not config.doc: - filters.extend([ValuePatternCheck(config), ValueNotAllowedPatternCheck()]) + filters.extend([ValuePatternCheck(pattern_len=config.pattern_len), ValueNotAllowedPatternCheck()]) return filters @staticmethod @@ -56,5 +56,5 @@ def get_pattern_base_filters(config: Config) -> List[Filter]: """return base filters for pattern""" return [ # LineSpecificKeyCheck(), # - ValuePatternCheck(config), # + ValuePatternCheck(pattern_len=config.pattern_len), # ] diff --git a/credsweeper/filters/group/token_pattern.py b/credsweeper/filters/group/token_pattern.py index 126732f2b..5a319f36c 100644 --- a/credsweeper/filters/group/token_pattern.py +++ b/credsweeper/filters/group/token_pattern.py @@ -9,4 +9,9 @@ class TokenPattern(Group): def __init__(self, config: Config) -> None: super().__init__(config, GroupType.DEFAULT) - self.filters = [ValueCoupleKeywordCheck(), ValueNumberCheck(), ValueCamelCaseCheck(), ValuePatternCheck(config)] + self.filters = [ + ValueCoupleKeywordCheck(), + ValueNumberCheck(), + ValueCamelCaseCheck(), + ValuePatternCheck(pattern_len=config.pattern_len) + ] diff --git a/credsweeper/filters/group/url_credentials_group.py b/credsweeper/filters/group/url_credentials_group.py index 23aba1d3b..44c982807 100644 --- a/credsweeper/filters/group/url_credentials_group.py +++ b/credsweeper/filters/group/url_credentials_group.py @@ -30,5 +30,5 @@ def __init__(self, config: Config) -> None: ValueNotAllowedPatternCheck(), ValueTokenCheck(), ValueDictionaryValueLengthCheck(min_len=4, max_len=80), - ValuePatternCheck(config) + ValuePatternCheck(pattern_len=config.pattern_len) ] diff --git a/credsweeper/filters/value_pattern_check.py b/credsweeper/filters/value_pattern_check.py index 38144a362..f580f0c78 100644 --- a/credsweeper/filters/value_pattern_check.py +++ b/credsweeper/filters/value_pattern_check.py @@ -1,5 +1,6 @@ import re +from credsweeper.common.constants import DEFAULT_PATTERN_LEN from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget @@ -21,17 +22,14 @@ class ValuePatternCheck(Filter): Default pattern LEN is 4 """ - def __init__(self, config: Config): + def __init__(self, config: Config = None, pattern_len: int = DEFAULT_PATTERN_LEN): """Create ValuePatternCheck with a specific pattern_len to check. Args: config: pattern len to use during check. DEFAULT_PATTERN_LEN by default """ - if 'ValuePemPatternCheck' == self.__class__.__name__: - self.pattern_len = config.pem_pattern_len - else: - self.pattern_len = config.pattern_len + self.pattern_len = pattern_len # use non whitespace symbol pattern self.pattern = re.compile(fr"(\S)\1{{{str(self.pattern_len - 1)},}}") diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index c11659efa..16651862f 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -1386,3 +1386,20 @@ target: - code - doc + +- name: Salesforce Credentials + severity: medium + confidence: weak + type: pattern + values: + - (?:(?00D[0-9A-Za-z]{9,15}(![.0-9A-Za-z_-]{24,200})?)(?![0-9A-Za-z_-]) + min_line_len: 12 + filter_type: + - ValuePatternCheck(9) + - ValueNumberCheck + - ValueBase64PartCheck + required_substrings: + - 00D + target: + - code + - doc diff --git a/tests/__init__.py b/tests/__init__.py index f42a82d37..5922f44e8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,13 +1,13 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT = 142 +SAMPLES_FILES_COUNT = 143 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan with negligible ML threshold -SAMPLES_CRED_COUNT = 427 +SAMPLES_CRED_COUNT = 428 SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19 # Number of filtered credentials with ML @@ -17,7 +17,7 @@ SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED # with option --doc -SAMPLES_IN_DOC = 673 +SAMPLES_IN_DOC = 674 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 35 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index f0c4c2c15..d2530ec78 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -10817,6 +10817,32 @@ } ] }, + { + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Salesforce Credentials", + "severity": "medium", + "confidence": "weak", + "line_data_list": [ + { + "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "line_num": 2, + "path": "./tests/samples/salesfoce", + "info": "./tests/samples/salesfoce|RAW", + "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "value_start": 8, + "value_end": 120, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.076545709916438, + "valid": true + } + } + ] + }, { "ml_validation": "VALIDATED_KEY", "ml_probability": 0.999, @@ -13859,4 +13885,4 @@ } ] } -] +] \ No newline at end of file diff --git a/tests/data/doc.json b/tests/data/doc.json index 3684276cd..26a11f3f4 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -17658,6 +17658,32 @@ } ] }, + { + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Salesforce Credentials", + "severity": "medium", + "confidence": "weak", + "line_data_list": [ + { + "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "line_num": 2, + "path": "./tests/samples/salesfoce", + "info": "./tests/samples/salesfoce|RAW", + "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "value_start": 8, + "value_end": 120, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.076545709916438, + "valid": true + } + } + ] + }, { "ml_validation": "VALIDATED_KEY", "ml_probability": 0.79, @@ -19486,4 +19512,4 @@ } ] } -] +] \ No newline at end of file diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index b8d5aa894..82754e4af 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -10124,6 +10124,32 @@ } ] }, + { + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Salesforce Credentials", + "severity": "medium", + "confidence": "weak", + "line_data_list": [ + { + "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "line_num": 2, + "path": "./tests/samples/salesfoce", + "info": "", + "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "value_start": 8, + "value_end": 120, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.076545709916438, + "valid": true + } + } + ] + }, { "ml_validation": "VALIDATED_KEY", "ml_probability": 0.999, @@ -11424,4 +11450,4 @@ } ] } -] +] \ No newline at end of file diff --git a/tests/data/output.json b/tests/data/output.json index c2c06a5cd..4fd23b116 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -9370,6 +9370,32 @@ } ] }, + { + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Salesforce Credentials", + "severity": "medium", + "confidence": "weak", + "line_data_list": [ + { + "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "line_num": 2, + "path": "./tests/samples/salesfoce", + "info": "", + "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1", + "value_start": 8, + "value_end": 120, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64STDPAD_CHARS", + "entropy": 5.076545709916438, + "valid": true + } + } + ] + }, { "ml_validation": "VALIDATED_KEY", "ml_probability": 0.999, @@ -10592,4 +10618,4 @@ } ] } -] +] \ No newline at end of file diff --git a/tests/filters/test_value_pattern_check.py b/tests/filters/test_value_pattern_check.py index f3c8951a5..bb1b580ca 100644 --- a/tests/filters/test_value_pattern_check.py +++ b/tests/filters/test_value_pattern_check.py @@ -14,35 +14,33 @@ class TestValuePatternCheck(unittest.TestCase): def setUp(self) -> None: self.config = MagicMock(spec=Config) self.config.pattern_len = 4 - self.config.pem_pattern_len = 5 def test_equal_pattern_check_n(self) -> None: self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Crackle123")) self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("IEEE32441")) self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Pass...")) - self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Pass:\\n Crackle123")) + self.assertFalse(ValuePatternCheck(pattern_len=4).equal_pattern_check("Pass:\\n Crackle123")) def test_equal_pattern_check_p(self) -> None: self.assertTrue(ValuePatternCheck(self.config).equal_pattern_check("AAAABCD")) - self.assertTrue(ValuePatternCheck(self.config).equal_pattern_check("-------BEGIN")) - self.config.pattern_len = 8 - self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("-------BEGIN")) + self.assertTrue(ValuePatternCheck(pattern_len=4).equal_pattern_check("-------BEGIN")) + self.assertFalse(ValuePatternCheck(pattern_len=8).equal_pattern_check("-------BEGIN")) def test_ascending_pattern_check_n(self) -> None: self.assertFalse(ValuePatternCheck(self.config).ascending_pattern_check("Crackle123")) - self.assertFalse(ValuePatternCheck(self.config).ascending_pattern_check("Crackle987654321")) + self.assertFalse(ValuePatternCheck(pattern_len=4).ascending_pattern_check("Crackle987654321")) def test_ascending_pattern_check_p(self) -> None: self.assertTrue(ValuePatternCheck(self.config).ascending_pattern_check("Crackle1234")) - self.assertTrue(ValuePatternCheck(self.config).ascending_pattern_check("Cracklefgh")) + self.assertTrue(ValuePatternCheck(pattern_len=4).ascending_pattern_check("Cracklefgh")) def test_descending_pattern_check_n(self) -> None: self.assertFalse(ValuePatternCheck(self.config).descending_pattern_check("Crackle321")) - self.assertFalse(ValuePatternCheck(self.config).descending_pattern_check("Crackle123456789")) + self.assertFalse(ValuePatternCheck(pattern_len=4).descending_pattern_check("Crackle123456789")) def test_descending_pattern_check_p(self) -> None: self.assertTrue(ValuePatternCheck(self.config).descending_pattern_check("Crackle4321")) - self.assertTrue(ValuePatternCheck(self.config).descending_pattern_check("Crackledcba")) + self.assertTrue(ValuePatternCheck(pattern_len=4).descending_pattern_check("Crackledcba")) class TestValuePatternCheckFixture: diff --git a/tests/samples/salesfoce b/tests/samples/salesfoce new file mode 100644 index 000000000..a7dbac5ee --- /dev/null +++ b/tests/samples/salesfoce @@ -0,0 +1,2 @@ +f_org_id 00Dee00000000000A5 00D0A53429897F6E +t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1