From 8725d6445fb04aab550818087fd8b2549a6f9448 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 16 Dec 2024 12:19:37 +0200 Subject: [PATCH] Add " and ' cases for keyword pattern (#635) * update action test * fix keyword pattern with HTML escape quotes * MailChimp API Key right border --- .github/workflows/action.yml | 5 +- credsweeper/common/keyword_pattern.py | 7 +-- credsweeper/rules/config.yaml | 2 +- tests/__init__.py | 6 +-- tests/data/depth_3.json | 78 +++++++++++++++++++++++++++ tests/data/doc.json | 52 ++++++++++++++++++ tests/data/ml_threshold.json | 78 +++++++++++++++++++++++++++ tests/data/output.json | 78 +++++++++++++++++++++++++++ tests/samples/sample.html | 24 +++++++++ 9 files changed, 320 insertions(+), 10 deletions(-) create mode 100644 tests/samples/sample.html diff --git a/.github/workflows/action.yml b/.github/workflows/action.yml index e3ea37420..581c63ba7 100644 --- a/.github/workflows/action.yml +++ b/.github/workflows/action.yml @@ -26,9 +26,8 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - - name: CredSweeper test - # TODO: change @action to release point with the action - uses: Samsung/CredSweeper@8682ea7d68bf3dfe96e2ea1fad3f04b9b167993b # main + - name: CredSweeper action + uses: Samsung/CredSweeper@v1.9.6 # may be changed to any tag with: # args - arguments to credsweeper tool. See default values in action.yml args: --path ./tests/samples/ --save-json diff --git a/credsweeper/common/keyword_pattern.py b/credsweeper/common/keyword_pattern.py index 4c37d3b1f..3927606c0 100644 --- a/credsweeper/common/keyword_pattern.py +++ b/credsweeper/common/keyword_pattern.py @@ -8,7 +8,7 @@ class KeywordPattern: r"(?P" # there will be inserted a keyword key_right = r")" \ - r"[^%:='\"`<>{?!&]*)[`'\"]*)" # + r"(&(quot|apos);|[^%:='\"`<>{?!&]*)[`'\"]*))" # separator = r"(\s|\\+[tnr])*\]?(\s|\\+[tnr])*" \ r"(?P:( [a-z]{3,9}[?]? )?=|:|=(>|>|\\u0026gt;)|!=|===|==|=|%3d)" \ r"(\s|\\+[tnr])*" @@ -21,15 +21,16 @@ class KeywordPattern: r"([0-9a-z_]{1,32}=)?" \ r")+)?" string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?" - left_quote = r"(?P((?P\\{1,8})?[`'\"]){1,4}))?" + left_quote = r"(?P((?P\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?" # Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential auth_keywords = r"( ?(oauth|bot|basic|bearer|apikey|accesskey) )?" value = r"(?P" \ r"(?(value_leftquote)" \ r"(" \ r"(?!(?P=value_leftquote))" \ - r"(?(esq)((?!(?P=esq)['`\"]).)|((?!(?P=value_leftquote)).)))" \ + r"(?(esq)((?!(?P=esq)([`'\"]|&(quot|apos);)).)|((?!(?P=value_leftquote)).)))" \ r"|" \ + r"(?!&(quot|apos);)" \ r"(\\+([ tnr]|[^\s`'\"])|[^\s`'\",;\\])" \ r"){3,8000}" \ r"|(\{[^}]{3,8000}\})" \ diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 7bcdc8c0f..6baf16d67 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -368,7 +368,7 @@ confidence: moderate type: pattern values: - - (?:(?[0-9A-Za-z_-]{32}-us[0-9]{1,2}) + - (?:(?[0-9A-Za-z_-]{32}-us[0-9]{1,2})(?![0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - -us diff --git a/tests/__init__.py b/tests/__init__.py index 99d18fcaf..4f1fde9c2 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,13 +1,13 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT = 138 +SAMPLES_FILES_COUNT = 139 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan with negligible ML threshold -SAMPLES_CRED_COUNT = 425 +SAMPLES_CRED_COUNT = 428 SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19 # Number of filtered credentials with ML @@ -17,7 +17,7 @@ SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED # with option --doc -SAMPLES_IN_DOC = 468 +SAMPLES_IN_DOC = 470 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 33 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index e10acceed..ab5b0d0e4 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -10687,6 +10687,84 @@ } ] }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.96, + "rule": "Token", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "TokenRequest", + "line_num": 9, + "path": "./tests/samples/sample.html", + "info": "./tests/samples/sample.html|RAW", + "value": "g1re0g1T0keN3zWx", + "value_start": 40, + "value_end": 56, + "variable": "token", + "variable_start": 28, + "variable_end": 33, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.5, + "valid": false + } + } + ] + }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.999, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": " placeholder=\"Your password: "g1re0g1Pa5$w0Rd"\"", + "line_num": 16, + "path": "./tests/samples/sample.html", + "info": "./tests/samples/sample.html|RAW", + "value": "g1re0g1Pa5$w0Rd", + "value_start": 38, + "value_end": 53, + "variable": "Your password", + "variable_start": 17, + "variable_end": 30, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.246431222567951, + "valid": false + } + } + ] + }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.998, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": " ", + "line_num": 21, + "path": "./tests/samples/sample.html", + "info": "./tests/samples/sample.html|RAW", + "value": "g1re0g2Pa5$w0Rd", + "value_start": 39, + "value_end": 54, + "variable": "password", + "variable_start": 30, + "variable_end": 38, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.379764555901284, + "valid": false + } + } + ] + }, { "ml_validation": "NOT_AVAILABLE", "ml_probability": null, diff --git a/tests/data/doc.json b/tests/data/doc.json index b242989ba..47dd349fe 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -12874,6 +12874,58 @@ } ] }, + { + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "PASSWD_PAIR", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "placeholder=\"Your password: "g1re0g1Pa5$w0Rd"\"", + "line_num": 16, + "path": "./tests/samples/sample.html", + "info": "./tests/samples/sample.html|RAW", + "value": ""g1re0g1Pa5$w0Rd"\"", + "value_start": 32, + "value_end": 60, + "variable": "password", + "variable_start": 22, + "variable_end": 30, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.2772074387595462, + "valid": false + } + } + ] + }, + { + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "PASSWD_PAIR", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "", + "line_num": 21, + "path": "./tests/samples/sample.html", + "info": "./tests/samples/sample.html|RAW", + "value": "g1re0g2Pa5$w0Rd"", + "value_start": 39, + "value_end": 60, + "variable": "password", + "variable_start": 30, + "variable_end": 38, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1560513697361983, + "valid": true + } + } + ] + }, { "ml_validation": "NOT_AVAILABLE", "ml_probability": null, diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index b7cdb268d..69653ba74 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -10280,6 +10280,84 @@ } ] }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.96, + "rule": "Token", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "TokenRequest", + "line_num": 9, + "path": "./tests/samples/sample.html", + "info": "", + "value": "g1re0g1T0keN3zWx", + "value_start": 40, + "value_end": 56, + "variable": "token", + "variable_start": 28, + "variable_end": 33, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.5, + "valid": false + } + } + ] + }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.999, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": " placeholder=\"Your password: "g1re0g1Pa5$w0Rd"\"", + "line_num": 16, + "path": "./tests/samples/sample.html", + "info": "", + "value": "g1re0g1Pa5$w0Rd", + "value_start": 38, + "value_end": 53, + "variable": "Your password", + "variable_start": 17, + "variable_end": 30, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.246431222567951, + "valid": false + } + } + ] + }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.998, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": " ", + "line_num": 21, + "path": "./tests/samples/sample.html", + "info": "", + "value": "g1re0g2Pa5$w0Rd", + "value_start": 39, + "value_end": 54, + "variable": "password", + "variable_start": 30, + "variable_end": 38, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.379764555901284, + "valid": false + } + } + ] + }, { "ml_validation": "VALIDATED_KEY", "ml_probability": 0.672, diff --git a/tests/data/output.json b/tests/data/output.json index 55c5797eb..8135c2645 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -9188,6 +9188,84 @@ } ] }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.96, + "rule": "Token", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "TokenRequest", + "line_num": 9, + "path": "./tests/samples/sample.html", + "info": "", + "value": "g1re0g1T0keN3zWx", + "value_start": 40, + "value_end": 56, + "variable": "token", + "variable_start": 28, + "variable_end": 33, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.5, + "valid": false + } + } + ] + }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.999, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": " placeholder=\"Your password: "g1re0g1Pa5$w0Rd"\"", + "line_num": 16, + "path": "./tests/samples/sample.html", + "info": "", + "value": "g1re0g1Pa5$w0Rd", + "value_start": 38, + "value_end": 53, + "variable": "Your password", + "variable_start": 17, + "variable_end": 30, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.246431222567951, + "valid": false + } + } + ] + }, + { + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.998, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": " ", + "line_num": 21, + "path": "./tests/samples/sample.html", + "info": "", + "value": "g1re0g2Pa5$w0Rd", + "value_start": 39, + "value_end": 54, + "variable": "password", + "variable_start": 30, + "variable_end": 38, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.379764555901284, + "valid": false + } + } + ] + }, { "ml_validation": "VALIDATED_KEY", "ml_probability": 0.672, diff --git a/tests/samples/sample.html b/tests/samples/sample.html new file mode 100644 index 000000000..8e5051c3a --- /dev/null +++ b/tests/samples/sample.html @@ -0,0 +1,24 @@ + + + + + Title + + + +TokenRequest +
+ + + +
+ + + + + \ No newline at end of file