diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index 42a83f0a4..b05a945cb 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,6 +1,6 @@ -META MD5 ef775241a6d575ff10f7220dcfadf5d7 -DATA MD5 51b6d4e4debbd374fc184f2b691e0bb8 -DATA: 16344639 interested lines. MARKUP: 62827 items +META MD5 72b4b7db8a2ffef0f19e802c09032e14 +DATA MD5 abd9c025d5c323af814fbeb33f469c90 +DATA: 16342283 interested lines. MARKUP: 62020 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 194 28318 71 418 90 @@ -55,7 +55,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .erb 13 323 27 .erl 4 96 7 .ex 25 4968 5 98 5 -.example 17 1838 76 38 52 +.example 17 1838 77 38 52 .exs 24 4842 8 187 4 .ext 5 211 1 4 2 .fsproj 1 75 1 2 @@ -159,7 +159,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .pyx 2 1094 23 .r 4 62 6 3 1 .rake 2 51 2 -.rb 860 131838 254 3340 615 +.rb 838 129482 281 2529 613 .re 1 31 1 .red 1 159 1 .release 1 13 4 @@ -222,8 +222,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 419 36169 559 889 376 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10254 16344639 12227 50501 5104 -credsweeper result_cnt : 11493, lost_cnt : 0, true_cnt : 11314, false_cnt : 179 +TOTAL: 10232 16342283 12255 49690 5102 +credsweeper result_cnt : 11521, lost_cnt : 0, true_cnt : 11342, false_cnt : 179 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- -------- API 130 3166 188 125 123 2 3352 7 0.000596 0.053846 0.997417 0.984000 0.946154 0.964706 @@ -242,7 +242,7 @@ CMD Password 21 128 6 1 CMD Secret 1 1 0 1 1 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 CMD Token 6 0 0 6 6 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Certificate 24 471 0 20 20 0 471 4 0.000000 0.166667 0.991919 1.000000 0.833333 0.909091 -Credential 93 419 76 94 93 1 494 0 0.002020 0.000000 0.998299 0.989362 1.000000 0.994652 +Credential 91 421 76 92 91 1 496 0 0.002012 0.000000 0.998299 0.989130 1.000000 0.994536 Docker Swarm Token 2 0 0 1 1 0 0 1 0.500000 0.500000 1.000000 0.500000 0.666667 Dropbox App secret 64 139 1 46 35 10 130 29 0.071429 0.453125 0.808824 0.777778 0.546875 0.642202 Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000 @@ -259,7 +259,7 @@ Jira / Confluence PAT token 0 4 0 Jira 2FA 15 6 1 12 12 0 7 3 0.000000 0.200000 0.863636 1.000000 0.800000 0.888889 Key 3909 15717 485 3944 3893 51 16151 16 0.003148 0.004093 0.996668 0.987069 0.995907 0.991468 Nonce 91 49 0 89 88 1 48 3 0.020408 0.032967 0.971429 0.988764 0.967033 0.977778 -Other 8 8292 1 0 0 8293 8 0.000000 1.000000 0.999036 0.000000 +Other 8 7445 1 0 0 7446 8 0.000000 1.000000 0.998927 0.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 Password 1869 7535 2680 1776 1758 18 10197 111 0.001762 0.059390 0.989325 0.989865 0.940610 0.964609 Salt 47 76 1 44 44 0 77 3 0.000000 0.063830 0.975806 1.000000 0.936170 0.967033 @@ -268,7 +268,7 @@ Seed 1 6 0 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 Tencent WeChat API App ID 6 0 0 6 6 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Token 643 4170 454 616 614 2 4622 29 0.000433 0.045101 0.994114 0.996753 0.954899 0.975377 -Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 +Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 URL Credentials 210 156 216 205 205 0 372 5 0.000000 0.023810 0.991409 1.000000 0.976190 0.987952 UUID 1069 265 0 1068 1067 1 264 2 0.003774 0.001871 0.997751 0.999064 0.998129 0.998596 - 12227 50501 5104 11500 11314 179 50322 913 0.003544 0.074671 0.982592 0.984425 0.925329 0.953963 + 12255 49690 5102 11528 11342 179 49511 913 0.003602 0.074500 0.982371 0.984463 0.925500 0.954071 diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 2b5eec52f..6d9eb4ce6 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -31,6 +31,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: Samsung/CredData + ref: main - name: Markup hashing run: | @@ -86,6 +87,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: Samsung/CredData + ref: main - name: Markup hashing run: | @@ -188,6 +190,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: Samsung/CredData + ref: main - name: Markup hashing run: | @@ -375,6 +378,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: Samsung/CredData + ref: main - name: Markup hashing run: | diff --git a/credsweeper/common/morpheme_checklist.txt b/credsweeper/common/morpheme_checklist.txt index 1287b531f..0cea17b7a 100644 --- a/credsweeper/common/morpheme_checklist.txt +++ b/credsweeper/common/morpheme_checklist.txt @@ -210,6 +210,7 @@ batt beac beans beat +beef begin behav behind diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 1e9f576f9..4f593c1a6 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -644,15 +644,39 @@ target: - code -- name: Twilio API Key +- name: Twilio Credentials severity: high confidence: moderate type: pattern values: - - (?SK[0-9A-Fa-f]{32})(?![0-9A-Za-z_-]) - filter_type: GeneralPattern + - (?(AC|AD|AL|CA|CF|CL|CN|CR|FW|IP|KS|MM|NO|PK|PN|QU|RE|SC|SD|SK|SM|TR|UT|XE|XR)[0-9A-Fa-f]{32})(?![0-9A-Za-z_-]) + filter_type: TokenPattern required_substrings: + - AC + - AD + - AL + - CA + - CF + - CL + - CN + - CR + - FW + - IP + - KS + - MM + - "NO" + - PK + - PN + - QU + - RE + - SC + - SD - SK + - SM + - TR + - UT + - XE + - XR min_line_len: 34 target: - code diff --git a/tests/__init__.py b/tests/__init__.py index d2fa7e181..3e0bd43a3 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,20 +1,20 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT: int = 139 +SAMPLES_FILES_COUNT: int = 140 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan -SAMPLES_CRED_COUNT: int = 410 -SAMPLES_CRED_LINE_COUNT: int = 428 +SAMPLES_CRED_COUNT: int = 411 +SAMPLES_CRED_LINE_COUNT: int = 429 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 367 +SAMPLES_POST_CRED_COUNT: int = 368 # with option --doc -SAMPLES_IN_DOC = 446 +SAMPLES_IN_DOC = 447 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 30 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index e365faebf..b28039408 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -12955,18 +12955,18 @@ "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", "ml_probability": null, - "rule": "Twilio API Key", + "rule": "Twilio Credentials", "severity": "high", "confidence": "moderate", "line_data_list": [ { - "line": " \"mk\" : \"SK4D2F64E2A108CD72F648B1984C3B5A13\",", + "line": "SK4D2F64E2A108CD72F648B1984C3B5A13", "line_num": 1, - "path": "./tests/samples/twilio_key.template", - "info": "./tests/samples/twilio_key.template|RAW", + "path": "./tests/samples/twilio_credentials_p", + "info": "./tests/samples/twilio_credentials_p|RAW", "value": "SK4D2F64E2A108CD72F648B1984C3B5A13", - "value_start": 12, - "value_end": 46, + "value_start": 0, + "value_end": 34, "variable": null, "variable_start": -2, "variable_end": -2, @@ -12978,6 +12978,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Twilio Credentials", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AC4d2f64e2a108cd72f648b1984c3b5a13", + "line_num": 2, + "path": "./tests/samples/twilio_credentials_p", + "info": "./tests/samples/twilio_credentials_p|RAW", + "value": "AC4d2f64e2a108cd72f648b1984c3b5a13", + "value_start": 0, + "value_end": 34, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.7215926003976607, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/doc.json b/tests/data/doc.json index 8dbb9fe81..06e86d4b1 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -13794,18 +13794,18 @@ "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", "ml_probability": null, - "rule": "Twilio API Key", + "rule": "Twilio Credentials", "severity": "high", "confidence": "moderate", "line_data_list": [ { - "line": "\"mk\" : \"SK4D2F64E2A108CD72F648B1984C3B5A13\",", + "line": "SK4D2F64E2A108CD72F648B1984C3B5A13", "line_num": 1, - "path": "./tests/samples/twilio_key.template", - "info": "./tests/samples/twilio_key.template|RAW", + "path": "./tests/samples/twilio_credentials_p", + "info": "./tests/samples/twilio_credentials_p|RAW", "value": "SK4D2F64E2A108CD72F648B1984C3B5A13", - "value_start": 12, - "value_end": 46, + "value_start": 0, + "value_end": 34, "variable": null, "variable_start": -2, "variable_end": -2, @@ -13817,6 +13817,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Twilio Credentials", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AC4d2f64e2a108cd72f648b1984c3b5a13", + "line_num": 2, + "path": "./tests/samples/twilio_credentials_p", + "info": "./tests/samples/twilio_credentials_p|RAW", + "value": "AC4d2f64e2a108cd72f648b1984c3b5a13", + "value_start": 0, + "value_end": 34, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.7215926003976607, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index 7a6f3e2e1..5a53ef27d 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -10920,18 +10920,18 @@ "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", "ml_probability": null, - "rule": "Twilio API Key", + "rule": "Twilio Credentials", "severity": "high", "confidence": "moderate", "line_data_list": [ { - "line": "f2e6bca350950cdcde32e308cfe8364d4191dd2eec8c71a8aef673bdb2fd9158", + "line": "579d9ef4c0aae3cbc1384ca9cd7800e4b0b686607b84121f3584f00e08950f49", "line_num": 1, - "path": "./tests/samples/twilio_key.template", + "path": "./tests/samples/twilio_credentials_p", "info": "", "value": "579d9ef4c0aae3cbc1384ca9cd7800e4b0b686607b84121f3584f00e08950f49", - "value_start": 12, - "value_end": 46, + "value_start": 0, + "value_end": 34, "variable": null, "variable_start": -2, "variable_end": -2, @@ -10943,6 +10943,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Twilio Credentials", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "40f36c54d27048b59e0a4f5cd72565bfa8b370b1544ebfd403b8768eb4f4063f", + "line_num": 2, + "path": "./tests/samples/twilio_credentials_p", + "info": "", + "value": "40f36c54d27048b59e0a4f5cd72565bfa8b370b1544ebfd403b8768eb4f4063f", + "value_start": 0, + "value_end": 34, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.7215926003976607, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/output.json b/tests/data/output.json index fcb92d2c8..9e83fc5dc 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -9786,18 +9786,18 @@ "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", "ml_probability": null, - "rule": "Twilio API Key", + "rule": "Twilio Credentials", "severity": "high", "confidence": "moderate", "line_data_list": [ { - "line": " \"mk\" : \"SK4D2F64E2A108CD72F648B1984C3B5A13\",", + "line": "SK4D2F64E2A108CD72F648B1984C3B5A13", "line_num": 1, - "path": "./tests/samples/twilio_key.template", + "path": "./tests/samples/twilio_credentials_p", "info": "", "value": "SK4D2F64E2A108CD72F648B1984C3B5A13", - "value_start": 12, - "value_end": 46, + "value_start": 0, + "value_end": 34, "variable": null, "variable_start": -2, "variable_end": -2, @@ -9809,6 +9809,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Twilio Credentials", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AC4d2f64e2a108cd72f648b1984c3b5a13", + "line_num": 2, + "path": "./tests/samples/twilio_credentials_p", + "info": "", + "value": "AC4d2f64e2a108cd72f648b1984c3b5a13", + "value_start": 0, + "value_end": 34, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.7215926003976607, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/rules/test_twilio_key.py b/tests/rules/test_twilio_key.py deleted file mode 100644 index 6fa6d3a06..000000000 --- a/tests/rules/test_twilio_key.py +++ /dev/null @@ -1,16 +0,0 @@ -from typing import List - -import pytest - -from .common import BaseTestRule - - -class TestTwilioKey(BaseTestRule): - - @pytest.fixture(params=[["SKABCAEFabcaeaABADEAabadea12145178"]]) - def lines(self, request) -> List[str]: - return request.param - - @pytest.fixture - def rule_name(self) -> str: - return "Twilio API Key" diff --git a/tests/samples/twilio_credentials_n b/tests/samples/twilio_credentials_n new file mode 100644 index 000000000..17e222f38 --- /dev/null +++ b/tests/samples/twilio_credentials_n @@ -0,0 +1,2 @@ +QpQcm9jLVR5cGU6IDQsRU5DU/SK4D2F64E2A108CD72F648Bx984C3B5A13+FSy1JbmZvOiBBRVMtMTI +AC123456E2A108CD72F648B1984C3B5A13 diff --git a/tests/samples/twilio_credentials_p b/tests/samples/twilio_credentials_p new file mode 100644 index 000000000..f0342b318 --- /dev/null +++ b/tests/samples/twilio_credentials_p @@ -0,0 +1,2 @@ +SK4D2F64E2A108CD72F648B1984C3B5A13 +AC4d2f64e2a108cd72f648b1984c3b5a13 diff --git a/tests/samples/twilio_key.template b/tests/samples/twilio_key.template deleted file mode 100644 index 936149ece..000000000 --- a/tests/samples/twilio_key.template +++ /dev/null @@ -1 +0,0 @@ - "mk" : "SK4D2F64E2A108CD72F648B1984C3B5A13", \ No newline at end of file