diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index 6caa498d7..fa5bd5997 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,14 +1,14 @@ -META MD5 5bb0a05fd77c2761b8414bba41103939 -DATA MD5 9e77a2d9f718f175264ab5a386ae86c4 -DATA: 16342283 interested lines. MARKUP: 62022 items +META MD5 d51d1f5107d0906adfd81b9fd6467597 +DATA MD5 5e46a76147ee32073b0d587f80684f86 +DATA: 16329853 interested lines. MARKUP: 59549 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- - 194 28318 71 418 90 + 193 28288 69 415 90 .1 2 641 2 5 .admx 1 26 1 -.adoc 1 158 13 6 1 +.adoc 1 158 13 5 1 .api 2 118 4 -.asciidoc 96 14471 49 348 27 +.asciidoc 96 14471 49 346 27 .axaml 5 286 5 .backup 1 62 2 1 .bash 2 2158 2 1 @@ -16,9 +16,9 @@ FileType FileNumber ValidLines Positives Negatives Templat .bats 15 2804 14 49 9 .bazel 3 424 8 .build 2 40 3 -.bundle 4 1512 580 +.bundle 4 1512 476 .bzl 3 2503 11 -.c 179 284009 8 944 5 +.c 179 284009 8 774 5 .cc 29 30562 617 1 .cf 3 126 2 1 .cfg 1 385 1 1 @@ -29,13 +29,13 @@ FileType FileNumber ValidLines Positives Negatives Templat .cmd 4 401 2 3 .cnf 8 858 15 36 16 .coffee 1 585 2 -.conf 60 4945 55 64 53 -.config 20 492 15 40 1 -.cpp 15 5688 2 61 +.conf 60 4945 55 63 53 +.config 20 492 7 40 1 +.cpp 14 5489 2 60 .creds 1 10 1 1 .crlf 1 27 1 .crt 2 4979 211 -.cs 268 79532 159 895 95 +.cs 266 79474 153 872 95 .cshtml 5 180 12 .csp 3 379 9 .csproj 1 14 1 @@ -45,17 +45,17 @@ FileType FileNumber ValidLines Positives Negatives Templat .deprecated 1 126 1 .development 1 5 1 .diff 2 2460 8 2 -.dist 5 257 7 13 +.dist 4 227 5 13 .doc 1 2489 3 .dockerfile 1 19 1 -.dot 1 160 6 +.dot 1 160 5 .eex 4 74 8 .ejs 1 13 1 .env 10 136 11 3 17 -.erb 13 323 27 +.erb 13 323 26 .erl 4 96 7 .ex 25 4968 5 98 5 -.example 17 1838 77 38 52 +.example 17 1838 69 38 51 .exs 24 4842 8 187 4 .ext 5 211 1 4 2 .fsproj 1 75 1 2 @@ -63,36 +63,36 @@ FileType FileNumber ValidLines Positives Negatives Templat .gd 1 37 1 .gml 3 3075 16 .gni 3 5017 19 -.go 1080 566476 687 4131 747 +.go 1075 565673 652 4090 748 .golden 5 1168 1 13 29 -.gradle 45 3265 4 90 100 +.gradle 45 3265 2 90 100 .graphql 7 420 13 .graphqls 1 30 1 -.groovy 22 4986 24 215 1 +.groovy 21 4920 24 213 1 .h 9 1958 36 .haml 9 191 17 .hbs 2 54 3 -.hs 14 4140 30 61 5 -.html 53 15327 22 110 18 +.hs 14 4140 28 61 5 +.html 52 15255 18 108 18 .idl 2 777 1 4 .iml 6 699 30 .in 6 2130 6 44 10 .inc 2 56 2 1 .ini 11 1437 26 12 17 -.ipynb 1 134 5 +.ipynb 1 134 6 .j 1 241 4 -.j2 30 5530 6 186 10 -.java 621 134132 368 1365 171 +.j2 30 5530 6 174 10 +.java 613 133184 347 1321 171 .jenkinsfile 1 58 2 6 .jinja2 1 64 2 -.js 659 536413 531 2497 331 -.json 851 13046493 1077 10907 140 -.jsp 13 3202 1 40 +.js 653 532652 512 2450 331 +.json 843 13045846 1074 10011 139 +.jsp 13 3202 1 37 .jsx 7 857 19 .jwt 1 1 2 .key 83 2737 70 14 .ks 1 25 1 -.kt 123 20774 64 383 3 +.kt 122 20664 64 374 3 .l 1 982 1 .las 1 6656 36 .lasso 1 230 7 @@ -105,18 +105,18 @@ FileType FileNumber ValidLines Positives Negatives Templat .libsonnet 2 210 1 11 .list 2 15 2 .lkml 1 43 1 -.lock 24 160912 142 +.lock 24 160912 151 .log 2 199 38 52 .lua 10 1924 37 3 -.m 16 13358 19 161 3 +.m 16 13358 19 157 3 .manifest 3 102 9 6 .markdown 3 139 3 1 .markerb 3 12 3 .marko 1 21 2 -.md 674 149300 741 2388 621 +.md 671 148304 719 2340 614 .mdx 3 549 7 .mjml 1 18 1 -.mjs 22 4424 75 340 +.mjs 22 4424 71 333 .mk 1 5878 13 .ml 1 1856 16 .mlir 2 1596 19 @@ -125,7 +125,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .mqh 1 1023 2 .msg 1 26644 1 1 .mysql 1 36 2 -.ndjson 2 5006 75 243 2 +.ndjson 2 5006 75 234 2 .nix 4 211 12 .nolint 1 2 1 .odd 1 1281 43 @@ -133,9 +133,9 @@ FileType FileNumber ValidLines Positives Negatives Templat .p8 4 64 4 .pan 2 48 4 .patch 4 109405 4 27 -.pbxproj 1 941 2 +.pbxproj 1 941 1 .pem 48 1169 47 8 -.php 371 75710 130 1622 79 +.php 370 75658 114 1451 71 .pl 16 14727 7 33 .pm 3 744 7 .po 3 2994 15 @@ -143,23 +143,23 @@ FileType FileNumber ValidLines Positives Negatives Templat .pony 1 83 4 .postinst 2 354 4 15 .pp 10 563 16 -.ppk 1 45 36 +.ppk 1 45 28 .private 1 15 1 .proj 1 85 5 -.properties 48 1621 58 29 28 +.properties 48 1621 57 29 28 .proto 5 5768 2 49 -.ps1 16 8509 15 74 2 +.ps1 16 8509 15 73 2 .ps1xml 1 5022 1 .pug 2 193 2 .purs 1 69 4 -.pxd 1 150 5 2 -.py 890 291553 681 3303 726 +.pxd 1 150 4 2 +.py 886 290215 665 3236 726 .pyi 4 1361 9 .pyp 1 167 1 .pyx 2 1094 23 -.r 4 62 6 3 1 +.r 4 62 4 2 1 .rake 2 51 2 -.rb 838 129482 281 2529 613 +.rb 834 128817 269 2457 615 .re 1 31 1 .red 1 159 1 .release 1 13 4 @@ -172,71 +172,71 @@ FileType FileNumber ValidLines Positives Negatives Templat .rs 31 9855 2 234 11 .rsc 1 691 1 .rsp 16 7101 20 10 27 -.rst 86 33980 71 323 68 +.rst 86 33980 71 305 68 .rules 1 6 2 .sample 2 25 3 4 4 .sbt 3 570 5 2 -.scala 40 5071 22 101 +.scala 39 5028 22 99 .scss 16 8553 32 1 .secrets 1 11 1 -.sh 143 21525 60 480 24 +.sh 142 21518 57 464 24 .slim 1 153 1 2 .smali 1 775 18 -.snap 3 1708 9 30 2 +.snap 3 1708 9 29 2 .spec 2 332 2 .spin 1 565 1 -.sql 27 6606 126 60 3 -.storyboard 20 1802 341 +.sql 25 6456 124 56 3 +.storyboard 20 1802 339 .strings 20 1240 137 .stub 3 84 6 .sublime-keymap 1 3 1 .sum 37 22854 283 .svg 1 638 12 -.t 9 1767 25 44 13 +.t 9 1767 15 44 13 .td 2 14002 6 .template 19 1633 4 35 11 -.test 2 24 24 1 4 +.test 2 24 22 1 4 .testsettings 1 21 1 10 .tf 21 1377 2 30 2 .tfstate 4 307 26 7 4 .tfvars 1 31 3 2 -.tl 2 2161 161 2 +.tl 2 2161 154 2 .tmpl 5 336 3 9 .token 1 1 3 -.toml 83 2379 53 105 156 +.toml 83 2379 49 103 156 .tpl 1 43 1 -.travis 1 34 4 3 1 -.ts 583 106730 157 1800 203 -.tsx 54 7914 1 114 5 +.travis 1 34 2 3 1 +.ts 581 106648 137 1774 203 +.tsx 54 7914 1 113 5 .ttar 1 452 1 -.txt 440 78102 5287 6354 49 +.txt 266 76254 5144 5094 46 .utf8 1 77 2 .vsixmanifest 1 36 1 .vsmdi 1 6 2 -.vue 50 8736 1 154 1 -.xaml 21 8103 163 -.xcscheme 1 109 6 -.xib 11 503 169 +.vue 50 8736 1 153 1 +.xaml 21 8103 155 +.xcscheme 1 109 4 +.xib 11 503 164 .xsl 1 311 1 -.yaml 137 19004 125 345 42 -.yml 419 36169 559 889 376 +.yaml 136 18591 123 341 42 +.yml 418 36057 522 910 376 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10232 16342283 12261 49692 5101 -credsweeper result_cnt : 11521, lost_cnt : 0, true_cnt : 11346, false_cnt : 175 +TOTAL: 10003 16329853 11851 46611 5084 +credsweeper result_cnt : 11610, lost_cnt : 0, true_cnt : 11346, false_cnt : 264 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- -------- API 130 3166 188 125 123 2 3352 7 0.000596 0.053846 0.997417 0.984000 0.946154 0.964706 AWS Client ID 168 21 0 160 160 0 21 8 0.000000 0.047619 0.957672 1.000000 0.952381 0.975610 AWS Multi 82 10 0 84 82 1 9 0 0.100000 0.000000 0.989130 0.987952 1.000000 0.993939 AWS S3 Bucket 67 23 0 92 67 23 0 0 1.000000 0.000000 0.744444 0.744444 1.000000 0.853503 -Atlassian Old PAT token 27 308 3 12 3 8 303 24 0.025723 0.888889 0.905325 0.272727 0.111111 0.157895 +Atlassian Old PAT token 3 7 0 10 3 7 0 0 1.000000 0.000000 0.300000 0.300000 1.000000 0.461538 Auth 417 2739 82 393 390 3 2818 27 0.001063 0.064748 0.990735 0.992366 0.935252 0.962963 Azure Access Token 19 0 0 12 12 0 0 7 0.368421 0.631579 1.000000 0.631579 0.774194 -BASE64 Private Key 7 4 0 7 7 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 +BASE64 Private Key 12 4 0 12 12 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 -Bitbucket Client ID 143 2095 9 48 28 19 2085 115 0.009030 0.804196 0.940365 0.595745 0.195804 0.294737 -Bitbucket Client Secret 301 807 10 40 29 11 806 272 0.013464 0.903654 0.746869 0.725000 0.096346 0.170088 +Bitbucket Client ID 19 53 0 75 19 53 0 0 1.000000 0.000000 0.263889 0.263889 1.000000 0.417582 +Bitbucket Client Secret 27 66 1 97 27 67 0 0 1.000000 0.000000 0.287234 0.287234 1.000000 0.446281 CMD ConvertTo-SecureString 13 4 0 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 CMD Password 21 128 6 18 18 0 134 3 0.000000 0.142857 0.980645 1.000000 0.857143 0.923077 CMD Secret 1 1 0 1 1 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 @@ -248,7 +248,7 @@ Dropbox App secret 64 139 1 4 Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000 Firebase Domain 6 1 0 7 6 1 0 0 1.000000 0.000000 0.857143 0.857143 1.000000 0.923077 Github Old Token 1 0 0 1 1 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Gitlab Feed Token 189 751 87 56 44 11 827 145 0.013126 0.767196 0.848101 0.800000 0.232804 0.360656 +Gitlab Feed Token 189 752 87 58 44 11 828 145 0.013111 0.767196 0.848249 0.800000 0.232804 0.360656 Gitlab Incoming Email Token 37 8 0 21 19 2 6 18 0.250000 0.486486 0.555556 0.904762 0.513514 0.655172 Google API Key 12 0 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Google Multi 10 2 0 11 10 1 1 0 0.500000 0.000000 0.916667 0.909091 1.000000 0.952381 @@ -259,7 +259,7 @@ Jira / Confluence PAT token 0 4 0 Jira 2FA 15 6 1 12 12 0 7 3 0.000000 0.200000 0.863636 1.000000 0.800000 0.888889 Key 3909 15717 485 3944 3893 51 16151 16 0.003148 0.004093 0.996668 0.987069 0.995907 0.991468 Nonce 93 49 0 91 90 1 48 3 0.020408 0.032258 0.971831 0.989011 0.967742 0.978261 -Other 8 7445 1 0 0 7446 8 0.000000 1.000000 0.998927 0.000000 +Other 9 7447 5 0 0 7452 9 0.000000 1.000000 0.998794 0.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 Password 1869 7536 2680 1774 1756 18 10198 113 0.001762 0.060460 0.989160 0.989853 0.939540 0.964041 Salt 47 76 1 44 44 0 77 3 0.000000 0.063830 0.975806 1.000000 0.936170 0.967033 @@ -270,5 +270,5 @@ Tencent WeChat API App ID 6 0 0 Token 644 4170 454 617 615 2 4622 29 0.000433 0.045031 0.994115 0.996759 0.954969 0.975416 Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 URL Credentials 210 157 215 205 205 0 372 5 0.000000 0.023810 0.991409 1.000000 0.976190 0.987952 -UUID 1069 265 0 1068 1067 1 264 2 0.003774 0.001871 0.997751 0.999064 0.998129 0.998596 - 12261 49692 5101 11528 11346 175 49517 915 0.003522 0.074627 0.982406 0.984810 0.925373 0.954167 +UUID 1075 265 0 1074 1073 1 264 2 0.003774 0.001860 0.997761 0.999069 0.998140 0.998604 + 11851 46611 5084 11623 11346 264 46347 505 0.005664 0.042612 0.986846 0.977261 0.957388 0.967222 diff --git a/credsweeper/filters/group/weird_base64_token.py b/credsweeper/filters/group/weird_base64_token.py index 28e583b05..67d0d339c 100644 --- a/credsweeper/filters/group/weird_base64_token.py +++ b/credsweeper/filters/group/weird_base64_token.py @@ -1,7 +1,8 @@ from credsweeper.common.constants import GroupType from credsweeper.config import Config from credsweeper.filters import ValueCoupleKeywordCheck, ValueNotPartEncodedCheck, \ - ValueBase64DataCheck, ValueEntropyBase64Check, ValuePatternCheck, ValueNumberCheck, ValueTokenBase64Check + ValueBase64DataCheck, ValueEntropyBase64Check, ValuePatternCheck, ValueNumberCheck, ValueTokenBase64Check, \ + ValueBase64PartCheck from credsweeper.filters.group import Group @@ -17,5 +18,6 @@ def __init__(self, config: Config) -> None: ValueTokenBase64Check(), ValueEntropyBase64Check(), ValuePatternCheck(config), - ValueNotPartEncodedCheck() + ValueNotPartEncodedCheck(), + ValueBase64PartCheck(), ] diff --git a/credsweeper/filters/value_atlassian_token_check.py b/credsweeper/filters/value_atlassian_token_check.py index b3b093ca3..5e6a97d2f 100644 --- a/credsweeper/filters/value_atlassian_token_check.py +++ b/credsweeper/filters/value_atlassian_token_check.py @@ -63,8 +63,8 @@ def check_atlassian_struct(value: str) -> bool: # there is limit for big integer value: math.log10(1<<64) = 19.265919722494797 if 0 < delimiter_pos <= 20: val = decoded[:delimiter_pos].decode(LATIN_1) - # at least 3 digits in the token - if 100 < int(val): + # at least 4 digits in the token + if 1000 <= int(val): # test for ascii and Shannon entropy - there should be random data data = decoded[delimiter_pos + 1:] return Util.is_ascii_entropy_validate(data) diff --git a/credsweeper/filters/value_base64_part_check.py b/credsweeper/filters/value_base64_part_check.py index 7729d2ded..9e7d0c7d5 100644 --- a/credsweeper/filters/value_base64_part_check.py +++ b/credsweeper/filters/value_base64_part_check.py @@ -1,4 +1,5 @@ import contextlib +import re import statistics from credsweeper.common.constants import Chars @@ -6,6 +7,7 @@ from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import Filter +from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check from credsweeper.utils import Util @@ -14,6 +16,9 @@ class ValueBase64PartCheck(Filter): Check that candidate is NOT a part of base64 long line """ + base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}") + base64_set = set(Chars.BASE64_CHARS.value) + def __init__(self, config: Config = None) -> None: pass @@ -30,26 +35,66 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: """ with contextlib.suppress(Exception): - if line_data.value_start and line_data.line[line_data.value_start - 1] in ('/', '+'): - if '-' in line_data.value or '_' in line_data.value: - # the value contains url-safe chars, so '/' is a delimiter + line = line_data.line + len_line = len(line) + value = line_data.value + len_value = len(value) + if 0 == line_data.value_start and len_line >= 2 * len_value \ + or 0 < line_data.value_start and line[line_data.value_start - 1] in ('/', '+', '\\', '%') \ + or 0 < line_data.value_end < len_line and line[line_data.value_end] in ('/', '+', '\\', '%'): + + if '-' in value or '_' in value: + # the value contains url-safe chars, so '/' or '+' is a delimiter return False - value_entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value) - left_start = line_data.value_start - len(line_data.value) + + left_start = line_data.value_start - len_value if 0 > left_start: left_start = 0 - left_entropy = Util.get_shannon_entropy(line_data.line[left_start:line_data.value_start], - Chars.BASE64STD_CHARS.value) - right_end = line_data.value_end + len(line_data.value) - if len(line_data.line) < right_end: - right_end = len(line_data.line) - right_entropy = Util.get_shannon_entropy(line_data.line[line_data.value_end:right_end], - Chars.BASE64STD_CHARS.value) - data = [value_entropy, left_entropy, right_entropy] + right_end = line_data.value_end + len_value + if len_line < right_end: + right_end = len_line + + hunk_size = right_end - left_start + + if hunk_size == 3 * len_value: + # simple analysis for maximal data size + if self.base64_pattern.match(line[left_start:right_end]): + # obvious case: all characters are base64 standard + return True + elif right_end - left_start >= 2 * len_value: + # simple analysis for data too large to yield sensible insights + part_set = set(line[left_start:right_end]) + if not part_set.difference(self.base64_set): + # obvious case: all characters are base64 standard + return True + + left_part = line[left_start:line_data.value_start] + len_left = len(left_part) + right_part = line[line_data.value_end:right_end] + len_right = len(right_part) + + min_entropy_value = ValueEntropyBase64Check.get_min_data_entropy(len_value) + value_entropy = Util.get_shannon_entropy(value, Chars.BASE64STD_CHARS.value) + + if ValueEntropyBase64Check.min_length < len_left: + left_entropy = Util.get_shannon_entropy(left_part, Chars.BASE64STD_CHARS.value) + if len_left < len_value: + left_entropy *= len_value / len_left + else: + left_entropy = min_entropy_value + + if ValueEntropyBase64Check.min_length < len_right: + right_entropy = Util.get_shannon_entropy(right_part, Chars.BASE64STD_CHARS.value) + if len_right < len_value: + left_entropy *= len_right / len_left + else: + right_entropy = min_entropy_value + + data = [left_entropy, value_entropy, right_entropy, min_entropy_value] avg = statistics.mean(data) stdev = statistics.stdev(data, avg) avg_min = avg - 1.1 * stdev - if avg_min < left_entropy and avg_min < right_entropy: + if avg_min <= left_entropy and avg_min <= right_entropy: # high entropy of bound parts looks like a part of base64 long line return True diff --git a/credsweeper/filters/value_entropy_base64_check.py b/credsweeper/filters/value_entropy_base64_check.py index f97741a0d..7e9603cec 100644 --- a/credsweeper/filters/value_entropy_base64_check.py +++ b/credsweeper/filters/value_entropy_base64_check.py @@ -11,6 +11,9 @@ class ValueEntropyBase64Check(Filter): """Check that candidate have Shanon Entropy > 3 (for HEX_CHARS or BASE36_CHARS) or > 4.5 (for BASE64_CHARS).""" + # If the value size is less than this value the entropy evaluation gives an imprecise result + min_length = 12 + def __init__(self, config: Config = None) -> None: pass @@ -25,7 +28,7 @@ def get_min_data_entropy(x: int) -> float: y = 4.1 elif 32 == x: y = 4.4 - elif 12 <= x < 35: + elif ValueEntropyBase64Check.min_length <= x < 35: # logarithm base 2 - slow, but precise. Approximation does not exceed stdev y = 0.77 * math.log2(x) + 0.62 elif 35 <= x < 60: diff --git a/credsweeper/filters/value_not_part_encoded_check.py b/credsweeper/filters/value_not_part_encoded_check.py index 6de327a9f..dd20e4c96 100644 --- a/credsweeper/filters/value_not_part_encoded_check.py +++ b/credsweeper/filters/value_not_part_encoded_check.py @@ -1,6 +1,7 @@ import re from typing import Optional +from credsweeper.common import static_keyword_checklist from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget @@ -29,15 +30,14 @@ def check_line_target_fit(line_data: LineData, target: AnalysisTarget) -> bool: @staticmethod def check_val(line: str, pattern: re.Pattern) -> Optional[bool]: - """Verifies whether the line looks like a pattern""" - match_obj = pattern.match(line) - if match_obj: + """Verifies whether the line looks like a base64 pattern""" + if match_obj := pattern.match(line): val = match_obj.group("val") # not a path-like - if not val.startswith('/'): - return True - # padding sign - if '=' == val[-1]: + if not val.startswith('/') \ + or not static_keyword_checklist.check_morphemes(val.lower(), 2) \ + or '=' == val[-1]: + # padding char is a marker too return True return None diff --git a/credsweeper/filters/value_token_base64_check.py b/credsweeper/filters/value_token_base64_check.py index d8072796e..50c43e63b 100644 --- a/credsweeper/filters/value_token_base64_check.py +++ b/credsweeper/filters/value_token_base64_check.py @@ -26,7 +26,7 @@ def get_min_strength(x: int) -> float: elif x < 40: y = ((0.0000405 * x - 0.004117) * x + 0.141) * x - 0.65 else: - y = 1 + y = 0.9999 return y def run(self, line_data: LineData, target: AnalysisTarget) -> bool: diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index e3951f9c2..1ab3ec87c 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -54,7 +54,7 @@ confidence: moderate type: pattern values: - - (^|\s|(?P(?i:\bip[\s/]{1,80}id[\s/]{1,80}pw[\s/:]{0,80}))|(?P://))(?P[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((\s*\()?|(?(variable)[\s,/]{1,80}|(?(url)[,]|[,/])))\s*\w[\w.-]{3,80}[\s,/]{1,80}(?P(?(url)(?-i:(?P[A-Z])|(?P[a-z])|(?P[0-9_+=~!@#$%^&*;?-])){7,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)|(?-i:(?P[A-Z])|(?P[a-z])|(?P[0-9/_+=~!@#$%^&*;?-])){7,31}(?(e)(?(f)(?(g)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)))(?:\s|[^/]|$) + - (^|\s|(?P(?i:\bip[\s/]{1,80}id[\s/]{1,80}pw[\s/:]{0,80}))|(?P://))(?P(?(?(url)(?-i:(?P[A-Z])|(?P[a-z])|(?P[0-9_+=~!@#$%^&*;?-])){7,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)|(?-i:(?P[A-Z])|(?P[a-z])|(?P[0-9/_+=~!@#$%^&*;?-])){7,31}(?(e)(?(f)(?(g)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)))(?:\s|[^/]|$) filter_type: - ValueAllowlistCheck - ValuePatternCheck @@ -129,7 +129,7 @@ confidence: strong type: pattern values: - - (?[0-9A-Fa-f]{8}(-[0-9A-Fa-f]{4}){3}-[0-9A-Fa-f]{12})(?![0-9A-Za-z_-]) + - (?:(?[0-9A-Fa-f]{8}(-[0-9A-Fa-f]{4}){3}-[0-9A-Fa-f]{12})(?![0-9A-Za-z_+-]) min_line_len: 36 required_substrings: - "-" @@ -146,7 +146,7 @@ confidence: moderate type: pattern values: - - (?(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_-]) + - (?:(?(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_+-]) filter_type: GeneralPattern required_substrings: - A @@ -161,8 +161,8 @@ confidence: moderate type: multi values: - - (?(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_-]) - - (?[0-9A-Za-z/+]{35,80})(?![0-9A-Za-z_/+-]) + - (?:(?(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_+-]) + - (?:(?[0-9A-Za-z/+]{35,80}) filter_type: - LineSpecificKeyCheck - ValuePatternCheck @@ -180,7 +180,7 @@ confidence: strong type: pattern values: - - (?amzn\.mws\.[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})(?![0-9A-Za-z_-]) + - (?:(?amzn\.mws\.[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})(?![0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - amzn @@ -208,7 +208,7 @@ confidence: moderate type: pattern values: - - (?dt0[A-Za-z]{1}[0-9]{2}\.[0-9A-Z]{24}\.[0-9A-Z]{64})(?![0-9A-Za-z_-]) + - (?:(?dt0[A-Za-z]{1}[0-9]{2}\.[0-9A-Z]{24}\.[0-9A-Z]{64})(?![0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - dt0 @@ -222,7 +222,7 @@ confidence: moderate type: pattern values: - - (?EAA[0-9A-Za-z]{80,800}) + - (?:(?EAA[0-9A-Za-z]{80,800}) filter_type: - ValuePatternCheck - ValueBase64PartCheck @@ -238,7 +238,7 @@ confidence: moderate type: pattern values: - - (?[0-9]{12,18}\|[0-9A-Za-z_-]{24,28})(?![0-9A-Za-z_-]) + - (?:(?[0-9]{12,18}\|[0-9A-Za-z_-]{24,28})(?![0-9A-Za-z_+-]) filter_type: GeneralPattern required_substrings: - "|" @@ -270,7 +270,7 @@ confidence: moderate type: pattern values: - - (?AIza[0-9A-Za-z_-]{35})(?![0-9A-Za-z_-]) + - (?:(?AIza[0-9A-Za-z_-]{35}) filter_type: GeneralPattern validations: - GoogleApiKeyValidation @@ -303,7 +303,7 @@ confidence: strong type: pattern values: - - (?GOCSPX-[0-9A-Za-z_-]{28})(?![0-9A-Za-z_-]) + - (?:(?GOCSPX-[0-9A-Za-z_-]{28})(?![0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - GOCSPX- @@ -317,7 +317,7 @@ confidence: moderate type: pattern values: - - (?ya29\.[0-9A-Za-z_-]{22,8000}) + - (?:(?ya29\.[0-9A-Za-z_-]{22,8000}) filter_type: GeneralPattern required_substrings: - ya29. @@ -345,7 +345,7 @@ confidence: strong type: pattern values: - - (?IGQVJ[0-9A-Za-z_=-]{100,8000}) + - (?:(?IGQVJ[=0-9A-Za-z_-]{100,8000})(?![=0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - IGQVJ @@ -359,7 +359,7 @@ confidence: strong type: pattern values: - - (?eyJ[0-9A-Za-z_+/=-]{15,8000}(\.[0-9A-Za-z_+/=-]{0,8000}){2,16}) + - (?:(?eyJ[=0-9A-Za-z_+/-]{15,8000}(\.[=0-9A-Za-z_+/-]{0,8000}){2,16})(?![=0-9A-Za-z_-]) filter_type: - ValueJsonWebTokenCheck required_substrings: @@ -374,7 +374,7 @@ confidence: moderate type: pattern values: - - (?[0-9A-Za-z_-]{32}-us[0-9]{1,2})(?![0-9A-Za-z_-]) + - (?:(?[0-9A-Za-z_-]{32}-us[0-9]{1,2}) filter_type: GeneralPattern validations: - MailChimpKeyValidation @@ -390,7 +390,7 @@ confidence: moderate type: pattern values: - - (?key-[0-9A-Za-z_-]{32})(?![0-9A-Za-z_-]) + - (?:(?key-[0-9A-Za-z_-]{32})(?![0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - key- @@ -461,7 +461,7 @@ confidence: strong type: pattern values: - - (?P\bMII[A-Za-f][0-9A-Za-z/+]{8}(?s:[^!#$&()*\-.:;<=>?@\[\]^_{|}~]{8,8000})) + - (?:(?MII[A-Za-f][0-9A-Za-z/+]{8}(?s:[^!#$&()*\-.:;<=>?@\[\]^_{|}~]{8,8000})) filter_type: - ValueBase64KeyCheck min_line_len: 160 @@ -532,7 +532,7 @@ confidence: strong type: pattern values: - - (?xox[aboprst]\-[0-9A-Za-z-]{10,250}) + - (?:(?xox[aboprst]\-[0-9A-Za-z-]{10,250})(?![0-9A-Za-z_-]) filter_type: GeneralPattern validations: - SlackTokenValidation @@ -592,7 +592,7 @@ confidence: moderate type: pattern values: - - (?EAAA[0-9A-Za-z_-]{60})(?![0-9A-Za-z_-]) + - (?:(?EAAA[0-9A-Za-z_-]{60})(?![0-9A-Za-z_-]) filter_type: - ValuePatternCheck - ValueBase64PartCheck @@ -610,7 +610,7 @@ confidence: strong type: pattern values: - - (?sq0[a-z]{3}-[0-9A-Za-z_-]{22})(?![0-9A-Za-z_-]) + - (?:(?sq0[a-z]{3}-[0-9A-Za-z_-]{22})(?![0-9A-Za-z_-]) filter_type: GeneralPattern validations: - SquareClientIdValidation @@ -626,7 +626,7 @@ confidence: strong type: pattern values: - - (?Psq0csp-[0-9A-Za-z_-]{43})(?![0-9A-Za-z_-]) + - (?:(?sq0csp-[0-9A-Za-z_-]{43})(?![0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - sq0csp @@ -654,7 +654,7 @@ confidence: moderate type: pattern values: - - (?(AC|AD|AL|CA|CF|CL|CN|CR|FW|IP|KS|MM|NO|PK|PN|QU|RE|SC|SD|SK|SM|TR|UT|XE|XR)[0-9A-Fa-f]{32})(?![0-9A-Za-z_-]) + - (?:(?(AC|AD|AL|CA|CF|CL|CN|CR|FW|IP|KS|MM|NO|PK|PN|QU|RE|SC|SD|SK|SM|TR|UT|XE|XR)[0-9A-Fa-f]{32})(?![0-9A-Za-z_+-]) filter_type: TokenPattern required_substrings: - AC @@ -692,7 +692,7 @@ confidence: moderate type: pattern values: - - (^|\W|\\[tnr])(?PConvertTo-SecureString(\s\s*-(String|AsPlainText|Force))*)\s\s*(?P(\\?[\"']){1,3})?(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,800})(?(value_leftquote)(?P(\\?[\"']){1,3})) + - (?PConvertTo-SecureString(\s\s*-(String|AsPlainText|Force))*)\s\s*(?P(\\?[\"']){1,3})?(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,800})(?(value_leftquote)(?P(\\?[\"']){1,3})) filter_type: GeneralKeyword use_ml: true required_substrings: @@ -706,7 +706,7 @@ confidence: moderate type: pattern values: - - (^|\W|\\[tnr])(?P-[A-Za-z_-]*(?i:pass(in|out|word|phrase)))\s\s*(?!-)(?P(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,80})(?(value_leftquote)(?P(\\?[\"']){1,3})) + - (^|\W|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P-[A-Za-z_-]*(?i:pass(in|out|word|phrase)))\s\s*(?!-)(?P(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,80})(?(value_leftquote)(?P(\\?[\"']){1,3})) filter_type: GeneralKeyword use_ml: true required_substrings: @@ -720,7 +720,7 @@ confidence: moderate type: pattern values: - - (^|\W|\\[tnr])(?P-[A-Za-z_-]*(?i:token))\s\s*(?!-)(?P(\\?[\"']){1,3})?(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P(\\?[\"']){1,3})) + - (^|\W|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P-[A-Za-z_-]*(?i:token))\s\s*(?!-)(?P(\\?[\"']){1,3})?(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P(\\?[\"']){1,3})) filter_type: GeneralKeyword use_ml: true required_substrings: @@ -734,7 +734,7 @@ confidence: moderate type: pattern values: - - (^|\W|\\[tnr])(?P-[A-Za-z_-]*(?i:secret)[A-Za-z_-]*)\s\s*(?!-)(?P(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P(\\?[\"']){1,3})) + - (^|\W|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P-[A-Za-z_-]*(?i:secret)[A-Za-z_-]*)\s\s*(?!-)(?P(\\?[\"']){1,3})?(pass:)?(?!file:|env:|fd:)(?P(?(value_leftquote)[^\"'\\]|[^\s\"'\\]){4,4000})(?(value_leftquote)(?P(\\?[\"']){1,3})) filter_type: GeneralKeyword use_ml: true required_substrings: @@ -790,7 +790,7 @@ confidence: moderate type: pattern values: - - (?P[0-9]{8,10}:[0-9A-Za-z_-]{35})(?![0-9A-Za-z_-]) + - (?:(?[0-9]{8,10}:[0-9A-Za-z_-]{35})(?![0-9A-Za-z_-]) filter_type: GeneralPattern required_substrings: - :AA @@ -804,7 +804,7 @@ confidence: strong type: pattern values: - - (?Ppypi-[0-9A-Za-z_-]{150,8000}) + - (?:(?pypi-[0-9A-Za-z_-]{150,255}) filter_type: GeneralPattern required_substrings: - pypi- @@ -818,7 +818,7 @@ confidence: strong type: pattern values: - - (?gh[pousr]_[0-9A-Za-z_-]{36,255}) + - (?:(?gh[pousr]_[0-9A-Za-z_-]{36,255}) filter_type: - ValueGitHubCheck validations: @@ -839,7 +839,7 @@ confidence: strong type: pattern values: - - (?github_pat_[0-9A-Za-z_]{80,255}) + - (?:(?github_pat_[0-9A-Za-z_]{80,255}) filter_type: GeneralPattern validations: - GithubTokenValidation @@ -855,7 +855,7 @@ confidence: moderate type: pattern values: - - (?[a-z0-9.-]{1,80}\.firebaseio\.com|[a-z0-9.-]{1,80}\.firebaseapp\.com) + - (?:(?[a-z0-9.-]{1,80}\.firebaseio\.com|[a-z0-9.-]{1,80}\.firebaseapp\.com) filter_type: GeneralPattern required_substrings: - .firebase @@ -869,7 +869,7 @@ confidence: moderate type: pattern values: - - (?[a-z0-9.-]{3,63}\.s3\.amazonaws\.com|[a-z0-9.-]{3,63}\.s3-website[.-](eu|ap|us|ca|sa|cn)) + - (?:(?[a-z0-9.-]{3,63}\.s3\.amazonaws\.com|[a-z0-9.-]{3,63}\.s3-website[.-](eu|ap|us|ca|sa|cn)) filter_type: GeneralPattern required_substrings: - .s3-website @@ -926,7 +926,7 @@ confidence: strong type: pattern values: - - (?(cmVmdGtuO[0-9A-Za-z_-]{55}|AKCp[0-9A-Za-z_-]{69}))(?![0-9A-Za-z_-]) + - (?:(?(cmVmdGtuO[0-9A-Za-z_-]{55}|AKCp[0-9A-Za-z_-]{69}))(?![0-9A-Za-z_-]) filter_type: - ValueJfrogTokenCheck required_substrings: @@ -942,7 +942,7 @@ confidence: strong type: pattern values: - - (?eyJ[0-9A-Za-z_=-]{50,500}\.eyJ[0-9A-Za-z_=-]{8,8000}\.[0-9A-Za-z_=-]{18,800}) + - (?:(?eyJ[=0-9A-Za-z_-]{50,500}\.eyJ[=0-9A-Za-z_-]{8,8000}\.[=0-9A-Za-z_-]{18,800}) filter_type: - ValueAzureTokenCheck required_substrings: @@ -957,7 +957,7 @@ confidence: moderate type: pattern values: - - (?[0-9A-Za-z_~.-]{3}8Q~[0-9A-Za-z_~.-]{34})(?![0-9A-Za-z_-]) + - (?:(?[0-9A-Za-z_~.-]{3}8Q~[0-9A-Za-z_~.-]{34})(?![0-9A-Za-z_-]) filter_type: TokenPattern min_line_len: 40 required_substrings: @@ -971,7 +971,7 @@ confidence: strong type: pattern values: - - (?ATBB[0-9A-Za-z]{24}[A-F0-9]{8})(?![0-9A-Za-z_-]) + - (?:(?ATBB[0-9A-Za-z]{24}[A-F0-9]{8})(?![0-9A-Za-z_]) filter_type: - ValueAtlassianTokenCheck min_line_len: 28 @@ -986,7 +986,7 @@ confidence: strong type: pattern values: - - (?ATCTT3xFfGN0[0-9A-Za-z_-]{80,800}(\\?=|%3[dD])[A-F0-9]{8})(?![0-9A-Za-z_-]) + - (?PATCTT3xFfGN0[0-9A-Za-z_-]{80,800}(\\?=|%3[dD])[A-F0-9]{8}) filter_type: - ValueAtlassianTokenCheck min_line_len: 160 @@ -1001,7 +1001,7 @@ confidence: strong type: pattern values: - - (?BBDC-[NMO][ADgjQTwz][0-9A-Za-z_-]{42})(?![0-9A-Za-z_-]) + - (?PBBDC-[MNO][ADQTgjwz][AEIMQUYcgk][012345wxyz][0-9A-Za-z_-]{40}) filter_type: - ValueAtlassianTokenCheck min_line_len: 49 @@ -1016,7 +1016,7 @@ confidence: weak type: pattern values: - - (?[0-9A-Za-z]{18}([0-9A-Za-z]{14})?)(?![0-9A-Za-z.$_/+-]) + - (?:(?[0-9A-Za-z]{18}([0-9A-Za-z]{14})?)(?![=0-9A-Za-z_+-]) filter_type: WeirdBase64Token min_line_len: 18 required_regex: "[0-9A-Za-z_/+-]{15}" @@ -1029,7 +1029,7 @@ confidence: weak type: pattern values: - - (?([0-9A-Za-z_-]{32}){1,2})(?![0-9A-Za-z.$_/+-]) + - (?:(?([0-9A-Za-z_-]{32}){1,2})(?![=0-9A-Za-z_+-]) filter_type: WeirdBase64Token min_line_len: 32 required_regex: "[0-9A-Za-z_/+-]{15}" @@ -1042,10 +1042,9 @@ confidence: strong type: pattern values: - - (?[NMO][ADgjQTwz][0-9A-Za-z_-]{42})(?![0-9A-Za-z_-]) + - (?:(?[MNO][ADQTgjwz][AEIMQUYcgk][012345wxyz][0-9A-Za-z_-]{40})(?![0-9A-Za-z_-]) filter_type: - ValueAtlassianTokenCheck - - ValueBase64PartCheck min_line_len: 44 required_substrings: - M @@ -1061,7 +1060,7 @@ confidence: weak type: pattern values: - - (?[0-9A-Za-z]{24})(?![=0-9A-Za-z.$_/+-]) + - (?:(?[0-9A-Za-z]{24})(?![=.0-9A-Za-z_/+-]) filter_type: WeirdBase64Token min_line_len: 24 required_regex: "[0-9A-Za-z_/+-]{15}" @@ -1074,7 +1073,7 @@ confidence: strong type: pattern values: - - (?ATATT3xFfGF0[0-9A-Za-z_-]{80,800}(\\?=|%3[dD])[A-F0-9]{8})(?![0-9A-Za-z_-]) + - (?PATATT3xFfGF0[0-9A-Za-z_-]{80,800}(\\?=|%3[dD])[A-F0-9]{8}) filter_type: - ValueAtlassianTokenCheck min_line_len: 160 @@ -1089,7 +1088,7 @@ confidence: strong type: pattern values: - - (?do[op]_v1_[a-f0-9]{64})(?![0-9A-Za-z_-]) + - (?:(?do[op]_v1_[a-f0-9]{64})(?![0-9A-Za-z_-]) filter_type: TokenPattern min_line_len: 71 required_substrings: @@ -1104,7 +1103,7 @@ confidence: moderate type: pattern values: - - (?sl.[0-9A-Za-z_-]{135})(?![0-9A-Za-z_-]) + - (?:(?sl.[0-9A-Za-z_-]{135})(?![0-9A-Za-z_-]) filter_type: TokenPattern min_line_len: 138 required_substrings: @@ -1118,7 +1117,7 @@ confidence: moderate type: pattern values: - - (?oy2[a-z0-9]{43})(?![0-9A-Za-z_-]) + - (?:(?oy2[a-z0-9]{43})(?![0-9A-Za-z_-]) filter_type: TokenPattern min_line_len: 46 required_substrings: @@ -1132,7 +1131,7 @@ confidence: strong type: pattern values: - - (?(_gitlab_session=|GR1348941|gl(agent|soat|ffct|p[at]t|oas|cbt|imt|[dfr]t)-)[0-9A-Za-z_-]{20,64})(?![0-9A-Za-z_-]) + - (?:(?(_gitlab_session=|GR1348941|gl(agent|soat|ffct|p[at]t|oas|cbt|imt|[dfr]t)-)[0-9A-Za-z_-]{20,64})(?![0-9A-Za-z_-]) filter_type: - ValuePatternCheck min_line_len: 25 @@ -1159,7 +1158,7 @@ confidence: strong type: pattern values: - - (?eyJ[=0-9A-Za-z_-]{64,360})(?![0-9A-Za-z_-]) + - (?:(?eyJ[=0-9A-Za-z_-]{64,360})(?![=0-9A-Za-z_-]) filter_type: - ValueGrafanaCheck min_line_len: 67 @@ -1174,7 +1173,7 @@ confidence: strong type: pattern values: - - (?glc_eyJ[0-9A-Za-z_-]{80,360})(?![0-9A-Za-z_-]) + - (?:(?glc_eyJ[0-9A-Za-z_-]{80,360})(?![0-9A-Za-z_-]) filter_type: - ValueGrafanaCheck min_line_len: 87 @@ -1189,7 +1188,7 @@ confidence: strong type: pattern values: - - (?glsa_[0-9A-Za-z_-]{32}_[0-9A-Fa-f]{8})(?![0-9A-Za-z_-]) + - (?:(?glsa_[0-9A-Za-z_-]{32}_[0-9A-Fa-f]{8}) min_line_len: 46 filter_type: - ValueGrafanaServiceCheck @@ -1204,7 +1203,7 @@ confidence: weak type: pattern values: - - (?[0-9A-Za-z]{10,12}[B-Za-z0-9]A{10,12}[B-Za-z0-9][0-9A-Za-z]{40,44})(?![=0-9A-Za-z_-]) + - (?:(?[0-9A-Za-z]{10,12}[B-Za-z0-9]A{10,12}[B-Za-z0-9][0-9A-Za-z]{40,44})(?![=0-9A-Za-z_/+-]) filter_type: [] min_line_len: 43 required_substrings: @@ -1218,7 +1217,7 @@ confidence: weak type: pattern values: - - (?[a-z0-9]{15})(?![=0-9A-Za-z_/+-]) + - (?:(?[a-z0-9]{15})(?![=0-9A-Za-z_/+-]) filter_type: WeirdBase36Token min_line_len: 15 required_regex: "[0-9A-Za-z_/+-]{15}" @@ -1231,7 +1230,7 @@ confidence: weak type: pattern values: - - (?[a-z0-9]{24,25})(?![=0-9A-Za-z_/+-]) + - (?:(?[a-z0-9]{24,25})(?![=0-9A-Za-z_/+-]) filter_type: WeirdBase36Token min_line_len: 24 required_regex: "[0-9A-Za-z_/+-]{15}" @@ -1244,7 +1243,7 @@ confidence: weak type: pattern values: - - (?[0-9A-Za-z_-]{20})(?![=0-9A-Za-z_/+-]) + - (?:(?[0-9A-Za-z_-]{20})(?![=0-9A-Za-z_/+-]) filter_type: WeirdBase64Token min_line_len: 20 required_regex: "[0-9A-Za-z_/+-]{15}" @@ -1257,7 +1256,7 @@ confidence: strong type: pattern values: - - (?hv[brs]\.[0-9A-Za-z_-]{80,160}) + - (?:(?hv[brs]\.[0-9A-Za-z_-]{80,160}) filter_type: - ValuePatternCheck - ValueEntropyBase64Check @@ -1275,7 +1274,7 @@ confidence: strong type: pattern values: - - (?[0-9A-Za-z_-]{14}\.atlasv1\.[0-9A-Za-z_-]{67})(?![0-9A-Za-z_-]) + - (?:(?[0-9A-Za-z_-]{14}\.atlasv1\.[0-9A-Za-z_-]{67})(?![0-9A-Za-z_-]) filter_type: - ValuePatternCheck - ValueEntropyBase64Check @@ -1291,7 +1290,7 @@ confidence: weak type: pattern values: - - (?[A-Z2-7]{16})(?![=0-9A-Za-z_/+-]) + - (?:(?[A-Z2-7]{16})(?![=0-9A-Za-z_+-]) filter_type: - ValueCoupleKeywordCheck - ValuePatternCheck @@ -1309,7 +1308,7 @@ confidence: strong type: pattern values: - - (?sk-[0-9A-Za-z_-]{16,32}(T3BlbkFJ|9wZW5BS|PcGVuQU)[0-9A-Za-z_-]{16,32})(?![0-9A-Za-z_-]) + - (?Psk-[0-9A-Za-z_-]{16,32}(T3BlbkFJ|9wZW5BS|PcGVuQU)[0-9A-Za-z_-]{16,32}) min_line_len: 51 filter_type: - ValuePatternCheck @@ -1327,7 +1326,7 @@ confidence: strong type: pattern values: - - (?SWMTKN-1-[0-9a-z]{50}-[0-9a-z]{25})(?![0-9A-Za-z_-]) + - (?PSWMTKN-1-[0-9a-z]{50}-[0-9a-z]{25}) min_line_len: 85 filter_type: - ValueCoupleKeywordCheck @@ -1342,7 +1341,7 @@ confidence: strong type: pattern values: - - (?gsk_[0-9A-Za-z_-]{52})(?![0-9A-Za-z_-]) + - (?:(?gsk_[0-9A-Za-z_-]{52})(?![0-9A-Za-z_-]) min_line_len: 56 filter_type: - ValuePatternCheck @@ -1358,7 +1357,7 @@ confidence: moderate type: pattern values: - - (?hf_[0-9A-Za-z_-]{34})(?![0-9A-Za-z_-]) + - (?:(?hf_[0-9A-Za-z_-]{34})(?![0-9A-Za-z_-]) min_line_len: 37 filter_type: - ValuePatternCheck @@ -1374,7 +1373,7 @@ confidence: strong type: pattern values: - - (?[NMO][ADgjQTwz][0-9A-Za-z_-]{22,26}\.[0-9A-Za-z_-]{6}\.[0-9A-Za-z_-]{30,40})(?![0-9A-Za-z_-]) + - (?:(?[MNO][ADQTgjwz][AEIMQUYcgk][012345wxyz][0-9A-Za-z_-]{20,24}\.[0-9A-Za-z_-]{6}\.[0-9A-Za-z_-]{30,40})(?![0-9A-Za-z_-]) min_line_len: 62 filter_type: - ValueDiscordBotCheck @@ -1392,7 +1391,7 @@ confidence: weak type: pattern values: - - (?wx[0-9a-f]{16})(?![0-9A-Za-z_-]) + - (?:(?wx[0-9a-f]{16})(?![0-9A-Za-z_-]) min_line_len: 18 filter_type: TokenPattern required_substrings: diff --git a/credsweeper/scanner/scanner.py b/credsweeper/scanner/scanner.py index 73a0d984e..cb7054fe7 100644 --- a/credsweeper/scanner/scanner.py +++ b/credsweeper/scanner/scanner.py @@ -70,7 +70,11 @@ def _set_rules_scanners(self, rule_path: Union[None, str, Path]) -> None: rule_templates = Util.yaml_load(rule_path) if rule_templates and isinstance(rule_templates, list): for rule_template in rule_templates: - rule = Rule(self.config, rule_template) + try: + rule = Rule(self.config, rule_template) + except Exception as exc: + logger.error("Rule creation error%s", str(rule_template)) + raise exc if not self._is_available(rule): continue if 0 < rule.min_line_len: diff --git a/tests/__init__.py b/tests/__init__.py index a5436a345..1f8d81222 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,14 +1,14 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT = 140 +SAMPLES_FILES_COUNT = 139 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan with negligible ML threshold -SAMPLES_CRED_COUNT = 418 -SAMPLES_CRED_LINE_COUNT = 437 +SAMPLES_CRED_COUNT = 420 +SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19 # Number of filtered credentials with ML ML_FILTERED = 43 @@ -17,7 +17,7 @@ SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED # with option --doc -SAMPLES_IN_DOC = 451 +SAMPLES_IN_DOC = 453 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 29 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 3ec461f09..e2a9c9433 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -1529,6 +1529,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client ID", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "./tests/samples/bitbucket_client_id|RAW", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client Secret", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "./tests/samples/bitbucket_client_id|RAW", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -1539,7 +1593,7 @@ "line_data_list": [ { "line": "bitbucket_client_2 : \"0DIwN2M1NTeGd6S6jU\"", - "line_num": 2, + "line_num": 3, "path": "./tests/samples/bitbucket_client_id", "info": "./tests/samples/bitbucket_client_id|RAW", "value": "0DIwN2M1NTeGd6S6jU", @@ -8340,6 +8394,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Jira / Confluence PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "TP: \"image/png\": \"iVBORx09VIskhxhCe7sh03R1dnENPiB66xQSIZjEYN13vafX/OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ/hZwUteHsmN+z+aoEAAAAvL+Q5FSQGyqIuYrdkYaQZuW1TvI=\\n\",", + "line_num": 2, + "path": "./tests/samples/jira_confluence_pat", + "info": "./tests/samples/jira_confluence_pat|RAW", + "value": "OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ", + "value_start": 67, + "value_end": 111, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.606936732175321, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -11413,8 +11494,8 @@ { "line": " \"SQT\": \"EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF\",", "line_num": 1, - "path": "./tests/samples/square_access_token.template", - "info": "./tests/samples/square_access_token.template|RAW", + "path": "./tests/samples/square_access_token", + "info": "./tests/samples/square_access_token|RAW", "value": "EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF", "value_start": 12, "value_end": 76, @@ -11440,8 +11521,8 @@ { "line": "sq0atp-GIREOGICRACKLE12145178", "line_num": 1, - "path": "./tests/samples/square_client_id.toml", - "info": "./tests/samples/square_client_id.toml|RAW", + "path": "./tests/samples/square_client_id", + "info": "./tests/samples/square_client_id|RAW", "value": "sq0atp-GIREOGICRACKLE12145178", "value_start": 0, "value_end": 29, @@ -11465,13 +11546,13 @@ "confidence": "strong", "line_data_list": [ { - "line": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", + "line": "20241204_112356 Colored line: \"\u001b[94msq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI\u001b[0m\";", "line_num": 1, - "path": "./tests/samples/square_oauth_secret.hs", - "info": "./tests/samples/square_oauth_secret.hs|RAW", + "path": "./tests/samples/square_oauth_secret", + "info": "./tests/samples/square_oauth_secret|RAW", "value": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", - "value_start": 0, - "value_end": 50, + "value_start": 36, + "value_end": 86, "variable": null, "variable_start": -2, "variable_end": -2, @@ -11483,33 +11564,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Square OAuth Secret", - "severity": "high", - "confidence": "strong", - "line_data_list": [ - { - "line": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "line_num": 1, - "path": "./tests/samples/square_secret_key", - "info": "./tests/samples/square_secret_key|RAW", - "value": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "value_start": 0, - "value_end": 50, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.559610565373458, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -13517,19 +13571,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769 fdsbkjerfkjrekjnkerjnfkrejnfrejn==", + "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==", "line_num": 1, "path": "./tests/samples/xml_data.xml", "info": "./tests/samples/xml_data.xml|XML", - "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769", + "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn", "value_start": 8, - "value_end": 72, + "value_end": 58, "variable": "token", "variable_start": 0, "variable_end": 5, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.091578664259099, + "entropy": 4.8332696895151095, "valid": true } } @@ -13544,11 +13598,11 @@ "confidence": "moderate", "line_data_list": [ { - "line": "token : \"IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769 fdsbkjerfkjrekjnkerjnfkrejnfrejn==\"", + "line": "token : \"IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==\"", "line_num": 1, "path": "./tests/samples/xml_data.xml", "info": "./tests/samples/xml_data.xml|XML", - "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769 fdsbkjerfkjrekjnkerjnfkrejnfrejn==", + "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==", "value_start": 9, "value_end": 115, "variable": "token", diff --git a/tests/data/doc.json b/tests/data/doc.json index a74f34fbd..e0a8ac3be 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -904,6 +904,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client ID", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "./tests/samples/bitbucket_client_id|RAW", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client Secret", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "./tests/samples/bitbucket_client_id|RAW", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -914,7 +968,7 @@ "line_data_list": [ { "line": "bitbucket_client_2 : \"0DIwN2M1NTeGd6S6jU\"", - "line_num": 2, + "line_num": 3, "path": "./tests/samples/bitbucket_client_id", "info": "./tests/samples/bitbucket_client_id|RAW", "value": "0DIwN2M1NTeGd6S6jU", @@ -12371,6 +12425,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Jira / Confluence PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "\"image/png\": \"iVBORx09VIskhxhCe7sh03R1dnENPiB66xQSIZjEYN13vafX/OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ/hZwUteHsmN+z+aoEAAAAvL+Q5FSQGyqIuYrdkYaQZuW1TvI=\\n\",", + "line_num": 2, + "path": "./tests/samples/jira_confluence_pat", + "info": "./tests/samples/jira_confluence_pat|RAW", + "value": "OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ", + "value_start": 67, + "value_end": 111, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.606936732175321, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -13467,8 +13548,8 @@ { "line": "\"SQT\": \"EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF\",", "line_num": 1, - "path": "./tests/samples/square_access_token.template", - "info": "./tests/samples/square_access_token.template|RAW", + "path": "./tests/samples/square_access_token", + "info": "./tests/samples/square_access_token|RAW", "value": "EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF", "value_start": 12, "value_end": 76, @@ -13494,8 +13575,8 @@ { "line": "sq0atp-GIREOGICRACKLE12145178", "line_num": 1, - "path": "./tests/samples/square_client_id.toml", - "info": "./tests/samples/square_client_id.toml|RAW", + "path": "./tests/samples/square_client_id", + "info": "./tests/samples/square_client_id|RAW", "value": "sq0atp-GIREOGICRACKLE12145178", "value_start": 0, "value_end": 29, @@ -13519,13 +13600,13 @@ "confidence": "strong", "line_data_list": [ { - "line": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", + "line": "20241204_112356 Colored line: \"\u001b[94msq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI\u001b[0m\";", "line_num": 1, - "path": "./tests/samples/square_oauth_secret.hs", - "info": "./tests/samples/square_oauth_secret.hs|RAW", + "path": "./tests/samples/square_oauth_secret", + "info": "./tests/samples/square_oauth_secret|RAW", "value": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", - "value_start": 0, - "value_end": 50, + "value_start": 36, + "value_end": 86, "variable": null, "variable_start": -2, "variable_end": -2, @@ -13537,33 +13618,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Square OAuth Secret", - "severity": "high", - "confidence": "strong", - "line_data_list": [ - { - "line": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "line_num": 1, - "path": "./tests/samples/square_secret_key", - "info": "./tests/samples/square_secret_key|RAW", - "value": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "value_start": 0, - "value_end": 50, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.559610565373458, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -14086,19 +14140,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769 fdsbkjerfkjrekjnkerjnfkrejnfrejn==", + "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==", "line_num": 1, "path": "./tests/samples/xml_data.xml", "info": "./tests/samples/xml_data.xml|XML", - "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769", + "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn", "value_start": 8, - "value_end": 72, + "value_end": 58, "variable": "token", "variable_start": 0, "variable_end": 5, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.091578664259099, + "entropy": 4.8332696895151095, "valid": true } } diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index e533c5f6e..108b3b19e 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -1228,6 +1228,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client ID", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client Secret", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -1238,7 +1292,7 @@ "line_data_list": [ { "line": "bitbucket_client_2 : \"0DIwN2M1NTeGd6S6jU\"", - "line_num": 2, + "line_num": 3, "path": "./tests/samples/bitbucket_client_id", "info": "", "value": "0DIwN2M1NTeGd6S6jU", @@ -8930,6 +8984,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Jira / Confluence PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "TP: \"image/png\": \"iVBORx09VIskhxhCe7sh03R1dnENPiB66xQSIZjEYN13vafX/OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ/hZwUteHsmN+z+aoEAAAAvL+Q5FSQGyqIuYrdkYaQZuW1TvI=\\n\",", + "line_num": 2, + "path": "./tests/samples/jira_confluence_pat", + "info": "", + "value": "OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ", + "value_start": 67, + "value_end": 111, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.606936732175321, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -10701,7 +10782,7 @@ { "line": " \"SQT\": \"EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF\",", "line_num": 1, - "path": "./tests/samples/square_access_token.template", + "path": "./tests/samples/square_access_token", "info": "", "value": "EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF", "value_start": 12, @@ -10728,7 +10809,7 @@ { "line": "sq0atp-GIREOGICRACKLE12145178", "line_num": 1, - "path": "./tests/samples/square_client_id.toml", + "path": "./tests/samples/square_client_id", "info": "", "value": "sq0atp-GIREOGICRACKLE12145178", "value_start": 0, @@ -10753,13 +10834,13 @@ "confidence": "strong", "line_data_list": [ { - "line": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", + "line": "20241204_112356 Colored line: \"\u001b[94msq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI\u001b[0m\";", "line_num": 1, - "path": "./tests/samples/square_oauth_secret.hs", + "path": "./tests/samples/square_oauth_secret", "info": "", "value": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", - "value_start": 0, - "value_end": 50, + "value_start": 36, + "value_end": 86, "variable": null, "variable_start": -2, "variable_end": -2, @@ -10771,33 +10852,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Square OAuth Secret", - "severity": "high", - "confidence": "strong", - "line_data_list": [ - { - "line": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "line_num": 1, - "path": "./tests/samples/square_secret_key", - "info": "", - "value": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "value_start": 0, - "value_end": 50, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.559610565373458, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -11509,11 +11563,11 @@ "confidence": "moderate", "line_data_list": [ { - "line": "token : \"IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769\n fdsbkjerfkjrekjnkerjnfkrejnfrejn==\"", + "line": "token : \"IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn\n hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==\"", "line_num": 12, "path": "./tests/samples/xml_data.xml", "info": "", - "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769\n fdsbkjerfkjrekjnkerjnfkrejnfrejn==", + "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn\n hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==", "value_start": 9, "value_end": 116, "variable": "token", @@ -11536,19 +11590,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769\n fdsbkjerfkjrekjnkerjnfkrejnfrejn==", + "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn\n hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==", "line_num": 16, "path": "./tests/samples/xml_data.xml", "info": "", - "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769", + "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn", "value_start": 8, - "value_end": 72, + "value_end": 58, "variable": "token", "variable_start": 0, "variable_end": 5, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.091578664259099, + "entropy": 4.8332696895151095, "valid": true } } diff --git a/tests/data/output.json b/tests/data/output.json index 031913184..ee0439b53 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -1201,6 +1201,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client ID", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Bitbucket Client Secret", + "severity": "info", + "confidence": "weak", + "line_data_list": [ + { + "line": "bitbucket_client : \"0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB\"", + "line_num": 1, + "path": "./tests/samples/bitbucket_client_id", + "info": "", + "value": "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB", + "value_start": 20, + "value_end": 52, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.577819531114783, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -1211,7 +1265,7 @@ "line_data_list": [ { "line": "bitbucket_client_2 : \"0DIwN2M1NTeGd6S6jU\"", - "line_num": 2, + "line_num": 3, "path": "./tests/samples/bitbucket_client_id", "info": "", "value": "0DIwN2M1NTeGd6S6jU", @@ -7877,6 +7931,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Jira / Confluence PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "TP: \"image/png\": \"iVBORx09VIskhxhCe7sh03R1dnENPiB66xQSIZjEYN13vafX/OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ/hZwUteHsmN+z+aoEAAAAvL+Q5FSQGyqIuYrdkYaQZuW1TvI=\\n\",", + "line_num": 2, + "path": "./tests/samples/jira_confluence_pat", + "info": "", + "value": "OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ", + "value_start": 67, + "value_end": 111, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.606936732175321, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -9567,7 +9648,7 @@ { "line": " \"SQT\": \"EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF\",", "line_num": 1, - "path": "./tests/samples/square_access_token.template", + "path": "./tests/samples/square_access_token", "info": "", "value": "EAAAEEPtuW9FnP_CuCV-3DFPB54M7YWBUVEFOWKPRVMWEJYMODIDAEX4FASS64NF", "value_start": 12, @@ -9594,7 +9675,7 @@ { "line": "sq0atp-GIREOGICRACKLE12145178", "line_num": 1, - "path": "./tests/samples/square_client_id.toml", + "path": "./tests/samples/square_client_id", "info": "", "value": "sq0atp-GIREOGICRACKLE12145178", "value_start": 0, @@ -9619,13 +9700,13 @@ "confidence": "strong", "line_data_list": [ { - "line": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", + "line": "20241204_112356 Colored line: \"\u001b[94msq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI\u001b[0m\";", "line_num": 1, - "path": "./tests/samples/square_oauth_secret.hs", + "path": "./tests/samples/square_oauth_secret", "info": "", "value": "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI", - "value_start": 0, - "value_end": 50, + "value_start": 36, + "value_end": 86, "variable": null, "variable_start": -2, "variable_end": -2, @@ -9637,33 +9718,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Square OAuth Secret", - "severity": "high", - "confidence": "strong", - "line_data_list": [ - { - "line": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "line_num": 1, - "path": "./tests/samples/square_secret_key", - "info": "", - "value": "sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1", - "value_start": 0, - "value_end": 50, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.559610565373458, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -10375,11 +10429,11 @@ "confidence": "moderate", "line_data_list": [ { - "line": "token : \"IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769\n fdsbkjerfkjrekjnkerjnfkrejnfrejn==\"", + "line": "token : \"IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn\n hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==\"", "line_num": 12, "path": "./tests/samples/xml_data.xml", "info": "", - "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769\n fdsbkjerfkjrekjnkerjnfkrejnfrejn==", + "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn\n hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==", "value_start": 9, "value_end": 116, "variable": "token", @@ -10402,19 +10456,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769\n fdsbkjerfkjrekjnkerjnfkrejnfrejn==", + "line": "token : IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn\n hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==", "line_num": 16, "path": "./tests/samples/xml_data.xml", "info": "", - "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769", + "value": "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn", "value_start": 8, - "value_end": 72, + "value_end": 58, "variable": "token", "variable_start": 0, "variable_end": 5, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.091578664259099, + "entropy": 4.8332696895151095, "valid": true } } diff --git a/tests/filters/test_value_base64_part_check.py b/tests/filters/test_value_base64_part_check.py index 790ca9008..419b9b8ff 100644 --- a/tests/filters/test_value_base64_part_check.py +++ b/tests/filters/test_value_base64_part_check.py @@ -7,27 +7,75 @@ class TestValueBase64PartCheck(unittest.TestCase): - EAA_PATTERN = re.compile(r"(?P\bEAA[0-9A-Za-z]{32})") + EAA_PATTERN = re.compile(r"(?P\bEAA[0-9A-Za-z]+\b)") def test_value_check_n(self) -> None: - line_data = LineData(config=None, - path="dummy", - file_type="", - line="qcE81rS+FJHuvg39lz4T/EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eo" - "se0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", - info="", - line_num=1, - line_pos=0, - pattern=TestValueBase64PartCheck.EAA_PATTERN) - self.assertTrue(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET)) + for line in [ + # left and right boundaries + "qcE81rS+FJHGy7KedoQ4juvg3FZ9lz4T/" + "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eo" + "+se0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", + # only left + "qcE81rS+FJHGy7KedoQ4juvg3FZ9lz4T/" + "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eo", + # only right + "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eo" + "/qcE81rS+FJHGy7KedoQ4juvg3FZ9lz4T" + ]: + line_data = LineData(config=None, + path="dummy", + file_type="", + line=line, + info="", + line_num=1, + line_pos=0, + pattern=TestValueBase64PartCheck.EAA_PATTERN) + self.assertTrue(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET), line) def test_value_check_p(self) -> None: + for line in ["http://meta.test/api/EAACRvAWiwzR8rcXFsLiUH13ybj0tdEa?"]: + line_data = LineData(config=None, + path="dummy", + file_type="", + line=line, + info="", + line_num=1, + line_pos=0, + pattern=TestValueBase64PartCheck.EAA_PATTERN) + self.assertFalse(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET), line) + + def test_value_jwt_part_p(self) -> None: + value = "GgzlFDwPikM5vUkIT2WOtQxKWceQ4wzV" + line = f"04MjE2MGFkOTFhYzgiLCJlbmMiOiJBMTlwIj.{value}.p9Y0jfEpUq6XHZIlai1oYHbDtx2Nc1k3z7" line_data = LineData(config=None, path="dummy", file_type="", - line="http://meta.test/api/EAACRvAWiwzR8rcXFsLiUH13ybj0tdEa?x=login", + line=line, info="", line_num=1, line_pos=0, - pattern=TestValueBase64PartCheck.EAA_PATTERN) - self.assertFalse(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET)) + pattern=re.compile(fr"(?P{value})")) + self.assertFalse(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET), line) + + def test_value_base64_part_n(self) -> None: + for prefix, value, suffix in [ + ("GuBdjqFPQXaaOcxuJ5oLRDC7IxtkpNz1P9CByI/", "eEZQFtJDUtShrP0tTC", + "\\nztg1zgkXhaz7IMxm4SgeuOUFy4mEcAGjQxs7qays"), + ("hj4Ov3rIwAAdHIIAC7ARR4daWuDXZoA41Bk6QJC\\nLwgikiCrNulUp0VYmrLoEE/", "sBY3YlVbQdYgS9ulYJcKyInd8hWQ31TG", + "/SSyz1SRd\\ncp8SD9bAu8SbqX4DWa6tV2XxopsabwQgWqGtJWzYIyuVFvdSuXGaZ"), + ("aWrnS3VQGR0j4mLkKC1NUeljjA77zYyhVbIE0dR%2By7fmaHq7U%2BdegXWGpAZ+/", "4pR32luBFTAtWgUcCv56", + "/p5y30X87Yz1khTIycdgpUW9kY7WdsC9zxoXTvMvWuVV98YyMnSGH2SYE5pwALBIr9QKi"), + ("04MjE2MGFkOTFhYzgiLCJlbmMiOiJBMTlwIj+", "GgzlFDwPikM5vUkIT2WOtQxKWceQ4wzV", + "/p9Y0jfEpUq6XHZIlai1oYHbDtx2Nc1k3z7"), + # ("sha512-PsjRC7REiu/", "xbYcsFHSp5oKpFNnsj", "/52OVb4zPTRK5onXwVF3=="), + ]: + line = ''.join([prefix, value, suffix]) + line_data = LineData(config=None, + path="dummy", + file_type="", + line=line, + info="", + line_num=1, + line_pos=0, + pattern=re.compile(fr"(?P{value})")) + self.assertTrue(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET), line) diff --git a/tests/filters/test_value_not_part_encoded.py b/tests/filters/test_value_not_part_encoded.py index 4550b6a8d..7db603a49 100644 --- a/tests/filters/test_value_not_part_encoded.py +++ b/tests/filters/test_value_not_part_encoded.py @@ -4,18 +4,36 @@ from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import ValueNotPartEncodedCheck -from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET, DUMMY_DESCRIPTOR -from tests.test_utils.dummy_line_data import get_line_data +from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_DESCRIPTOR class TestValueNotPartEncodedCheck: def test_value_not_part_encoded_p(self, config: pytest.fixture) -> None: + val = 'Q' * 64 + target = AnalysisTarget(0, [val, '/usr/local/host/'], [1, 2], DUMMY_DESCRIPTOR) + line_data = LineData(config, val, 0, 1, "", "", "", LINE_VALUE_PATTERN) + assert ValueNotPartEncodedCheck().run(line_data, target) is False target = AnalysisTarget(0, ["AAA", "BBB"], [1, 2], DUMMY_DESCRIPTOR) line_data = LineData(config, "XXX", 0, 1, "", "", "", LINE_VALUE_PATTERN) assert ValueNotPartEncodedCheck().run(line_data, target) is False def test_value_not_part_encoded_n(self, config: pytest.fixture) -> None: + val = 'Q' * 64 + target = AnalysisTarget(0, [val, '/etc/localhost=='], [1, 2], DUMMY_DESCRIPTOR) + line_data = LineData(config, val, 0, 1, "", "", "", LINE_VALUE_PATTERN) + assert ValueNotPartEncodedCheck().run(line_data, target) is True + val = 'Q' * 64 + target = AnalysisTarget(0, [val, '0123456789ABCDEF'], [1, 2], DUMMY_DESCRIPTOR) + line_data = LineData(config, val, 0, 1, "", "", "", LINE_VALUE_PATTERN) + assert ValueNotPartEncodedCheck().run(line_data, target) is True + val = "/123" + 'Q' * 64 + target = AnalysisTarget(0, [val, '/123456789ABCDE='], [1, 2], DUMMY_DESCRIPTOR) + line_data = LineData(config, val, 0, 1, "", "", "", LINE_VALUE_PATTERN) + assert ValueNotPartEncodedCheck().run(line_data, target) is True + target = AnalysisTarget(1, ['Q' * 64, val, "1234"], [1, 2, 3], DUMMY_DESCRIPTOR) + line_data = LineData(config, val, 1, 2, "", "", "", LINE_VALUE_PATTERN) + assert ValueNotPartEncodedCheck().run(line_data, target) is True target = AnalysisTarget(1, [Chars.BASE64STD_CHARS.value, "XXX"], [1, 2], DUMMY_DESCRIPTOR) line_data = LineData(config, "XXX", 1, 2, "", "", "", LINE_VALUE_PATTERN) assert ValueNotPartEncodedCheck().run(line_data, target) is True diff --git a/tests/samples/bitbucket_client_id b/tests/samples/bitbucket_client_id index fec0a02af..087934fd3 100644 --- a/tests/samples/bitbucket_client_id +++ b/tests/samples/bitbucket_client_id @@ -1,2 +1,8 @@ bitbucket_client : "0DIwN2M1NefTgs3Ghr54TMxNzOhFZPhB" + bitbucket_client_2 : "0DIwN2M1NTeGd6S6jU" + +fp: VJFHj5fr46Qf7h34xa/W6FmGvknPH57eQLJoEjKlZUYn0EGx9EliCdENfzAbtr3e4GnNvQh6X2bLA/lB7TRzoD84oyGkllkMayevQm/dViJbUBQsl7HOa6VwUwugW+o7T3+vPM4y5qqFv1B/fs3Fthl+eKUatccy9qK4xra1Hso= + +fp: zza0dxVlt0/TijfkIXPhSdtdakk9G\nCIpPqrtaOxOx0sEXzS/MuYT4rE3363cXp1yCxqF3dhUP + diff --git a/tests/samples/bitbucket_client_secret b/tests/samples/bitbucket_client_secret index cc1479038..2d179bbee 100644 --- a/tests/samples/bitbucket_client_secret +++ b/tests/samples/bitbucket_client_secret @@ -1,2 +1,9 @@ bitbucket_client_data : "0D13fDM1NkwOhFZ_PhBuW-3keLTMxNzBGlKzZyiFiB-kODIwNDM1NTMxNzkwOhFZ" bitbucket_client_data_v2 : "0sTMxNzkI3fDM1NwOhFZ_PhBuW-3keLB" + +FP case for base64 ValueNotPartEncodedCheck +-----BEGIN PUBLIC KEY----- +EACUDagAEAKBFtea7t5UElfbV1kWH4it9wEAYHKoZIzj0CAQYFK4EcgBnJdAiPHM +/3VTQhmNWSPj4bFiHsGJNlTEUeKCLJJgqw0tjTM3iaPcz8OolQLKUAvQwfYQHsVW +9iL//ygMkJL/wHNyr+TGUdjTvf0O0Kc82iamXSStuno= +-----END PUBLIC KEY----- \ No newline at end of file diff --git a/tests/samples/doc_ip_id_password_triple b/tests/samples/doc_ip_id_password_triple index 18e9fdf34..3b0e0d492 100644 --- a/tests/samples/doc_ip_id_password_triple +++ b/tests/samples/doc_ip_id_password_triple @@ -18,3 +18,4 @@ Service(Standby) ip : 192.168.127.24(23591 port) ip : 192.168.142.42(21345 port) FP# [Wi-Fi HotSpot] 5.2.0.299-WR220224U #Wi-Fi + 10.53.51.17192.168.101.973777주인 FNAT-CC0TG_old diff --git a/tests/samples/jira_confluence_pat b/tests/samples/jira_confluence_pat index 94bf2daf7..ecd24cc6b 100644 --- a/tests/samples/jira_confluence_pat +++ b/tests/samples/jira_confluence_pat @@ -1,2 +1,3 @@ TP: https://www.example.com/api/verification/version2322/token/OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ -FP: "image/png": "iVBORx09VIskhxhCe7sh03R1dnENPiB66xQSIZjEYN13vafX/OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ/hZwUteHsmN+z+aoEAAAAvL+Q5FSQGyqIuYrdkYaQZuW1TvI=\n", +TP: "image/png": "iVBORx09VIskhxhCe7sh03R1dnENPiB66xQSIZjEYN13vafX/OTI2NjA3NjU1NTI2Oh2DOnASdOHoIhEGyqIuYrdkYaQZ/hZwUteHsmN+z+aoEAAAAvL+Q5FSQGyqIuYrdkYaQZuW1TvI=\n", +FP: "image/png": "iVBORx09VIskhxhCe7sh03R1dnENPiB66xQSIZjEYN13vafX/MDAwMToPTwEV1TLGOh2DOnASdOHoIhEGyqIuYrdkYaQZ/hZwUteHsmN+z+aoEAAAAvL+Q5FSQGyqIuYrdkYaQZuW1TvI=\n", diff --git a/tests/samples/square_access_token.template b/tests/samples/square_access_token similarity index 100% rename from tests/samples/square_access_token.template rename to tests/samples/square_access_token diff --git a/tests/samples/square_client_id.toml b/tests/samples/square_client_id similarity index 100% rename from tests/samples/square_client_id.toml rename to tests/samples/square_client_id diff --git a/tests/samples/square_oauth_secret b/tests/samples/square_oauth_secret new file mode 100644 index 000000000..54c3249c8 --- /dev/null +++ b/tests/samples/square_oauth_secret @@ -0,0 +1 @@ +20241204_112356 Colored line: "sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI"; diff --git a/tests/samples/square_oauth_secret.hs b/tests/samples/square_oauth_secret.hs deleted file mode 100644 index 98b21f4e1..000000000 --- a/tests/samples/square_oauth_secret.hs +++ /dev/null @@ -1 +0,0 @@ -sq0csp-ST2dsfwyVI7Ydj343EY72hb48ehKQINMST2ds_fwyVI diff --git a/tests/samples/square_secret_key b/tests/samples/square_secret_key deleted file mode 100644 index 7ac817d4d..000000000 --- a/tests/samples/square_secret_key +++ /dev/null @@ -1 +0,0 @@ -sq0csp-GIREOGICRACKLEGIREOGICRACKLEGIREOGICRACKLE1 diff --git a/tests/samples/xml_data.xml b/tests/samples/xml_data.xml index a73adc85a..b37f53bac 100644 --- a/tests/samples/xml_data.xml +++ b/tests/samples/xml_data.xml @@ -10,11 +10,11 @@ EBUWuiacAxz/WKHb9WVp0F/zkRwucJ0Hr2W4P9cP - "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769 - fdsbkjerfkjrekjnkerjnfkrejnfrejn==" + "IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn + hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn==" - IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsnhRFakQFbAuI769 - fdsbkjerfkjrekjnkerjnfkrejnfrejn== + IBOgIBAAJBAJtFfagSXdo0JmX6EdwWGvFMaXeOFY5xOTs3PWsn + hRFakQFbAuI769fdsbkjerfkjrekjnkerjnfkrejnfrejn== \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py index b4aa3c7ac..7cfb7b25c 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -348,6 +348,23 @@ def test_scan_bytes_n(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_colored_line_p(self) -> None: + cred_sweeper = CredSweeper() + for to_scan in [ + "토큰MTAwMDoxVKvgS4Y7K7UIXHqBmV50aWFs5sb2heWGb3dy사용".encode(), + b'\x1b[93mMTAwMDoxVKvgS4Y7K7UIXHqBmV50aWFs5sb2heWGb3dy\x1b[0m', + b'\r\nMTAwMDoxVKvgS4Y7K7UIXHqBmV50aWFs5sb2heWGb3dy\r\n', + b'\tMTAwMDoxVKvgS4Y7K7UIXHqBmV50aWFs5sb2heWGb3dy\n', + b'%3DMTAwMDoxVKvgS4Y7K7UIXHqBmV50aWFs5sb2heWGb3dy%3B', + ]: + provider = ByteContentProvider(to_scan) + results = cred_sweeper.file_scan(provider) + self.assertEqual(1, len(results), to_scan) + self.assertEqual("Jira / Confluence PAT token", results[0].rule_name) + self.assertEqual("MTAwMDoxVKvgS4Y7K7UIXHqBmV50aWFs5sb2heWGb3dy", results[0].line_data_list[0].value) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_string_content_provider_n(self) -> None: random.seed(42) ascii_chars = string.digits + string.ascii_letters + string.punctuation + ' '