Skip to content

Commit

Permalink
Strict rules for github classic token
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Nov 28, 2023
1 parent 331e963 commit 2b40813
Show file tree
Hide file tree
Showing 24 changed files with 900 additions and 976 deletions.
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ ignore_missing_imports = True

[mypy-docx.*]
ignore_missing_imports = True

[mypy-base62.*]
ignore_missing_imports = True
1 change: 1 addition & 0 deletions credsweeper/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class DiffRowType(Enum):
UTF_8 = "utf_8"
UTF_16 = "utf_16"
LATIN_1 = "latin_1"
ASCII = "ascii"

DEFAULT_ENCODING = UTF_8

Expand Down
4 changes: 2 additions & 2 deletions credsweeper/file_handler/data_content_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import yaml
from bs4 import BeautifulSoup, Tag, XMLParsedAsHTMLWarning

from credsweeper.common.constants import DEFAULT_ENCODING
from credsweeper.common.constants import DEFAULT_ENCODING, ASCII
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.file_handler.content_provider import ContentProvider
from credsweeper.utils import Util
Expand Down Expand Up @@ -343,7 +343,7 @@ def represent_as_encoded(self) -> bool:
return False
try:
self.decoded = base64.b64decode( #
self.data.decode(encoding="ascii", errors="strict"). #
self.data.decode(encoding=ASCII, errors="strict"). #
translate(str.maketrans("", "", string.whitespace)), #
validate=True) #
except Exception as exc:
Expand Down
1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
from credsweeper.filters.value_file_path_check import ValueFilePathCheck
from credsweeper.filters.value_first_word_check import ValueFirstWordCheck
from credsweeper.filters.value_github_check import ValueGitHubCheck
from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
from credsweeper.filters.value_ip_check import ValueIPCheck
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
Expand Down
43 changes: 43 additions & 0 deletions credsweeper/filters/value_github_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import binascii
import contextlib

import base62

from credsweeper.common.constants import ASCII
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter


class ValueGitHubCheck(Filter):
"""GitHub Classic Token validation"""

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received token which might be structured.
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
True, when need to filter candidate and False if left
"""
# https://github.blog/2021-04-05-behind-githubs-new-authentication-token-formats/
if not line_data.value:
return True
with contextlib.suppress(Exception):
if line_data.value.startswith("gh") and '_' == line_data.value[3]:
token = line_data.value[4:-6]
data = token.encode(ASCII, errors="strict")
crc32sum = binascii.crc32(data)
base62_crc32 = line_data.value[-6:]
sign_b = base62.decodebytes(base62_crc32)
crc32sign = int.from_bytes(sign_b, "big")
if crc32sign == crc32sum:
return False
return True
4 changes: 2 additions & 2 deletions credsweeper/filters/value_structured_token_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import binascii
import contextlib

from credsweeper.common.constants import LATIN_1
from credsweeper.common.constants import LATIN_1, ASCII
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand Down Expand Up @@ -47,7 +47,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
def check_crc32_struct(value: str) -> bool:
"""Returns False if value is valid for bitbucket app password structure 'payload:crc32'"""
crc32 = int(value[28:], 16)
data = value[:28].encode("ascii")
data = value[:28].encode(ASCII)
if crc32 == binascii.crc32(data):
return False
return True
Expand Down
21 changes: 8 additions & 13 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -527,26 +527,21 @@
- pypi-
min_line_len: 155

- name: Github Token
- name: Github Classic Token
severity: high
type: pattern
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>(ghr|gho|ghu|ghs)_[\w]{36,255})
filter_type: GeneralPattern
required_substrings:
- gh
min_line_len: 40

- name: Github Personal Access Token
severity: high
type: pattern
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>ghp_[\w]{36,255})
filter_type: GeneralPattern
- (^|[^.0-9A-Za-z_/+-])(?P<value>gh[pousr]_[0-9A-Za-z_]{36,255})
filter_type:
- ValueGitHubCheck
validations:
- GithubTokenValidation
required_substrings:
- ghp_
- gho_
- ghu_
- ghs_
- ghr_
min_line_len: 40

- name: Github Fine-granted Token
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pdfminer.six==20221105
password-strength==0.0.3.post2
python-dateutil==2.8.2
pyjks==20.0.0
pybase62==1.0.0

# ML requirements
numpy==1.24.4
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"onnxruntime", #
"python-dateutil", #
"pyjks", #
"pybase62", #
]

setuptools.setup(
Expand Down
10 changes: 5 additions & 5 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
SAMPLES_FILES_COUNT: int = 120

# credentials count after scan
SAMPLES_CRED_COUNT: int = 373
SAMPLES_CRED_LINE_COUNT: int = 385
SAMPLES_CRED_COUNT: int = 372
SAMPLES_CRED_LINE_COUNT: int = 384

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 287
SAMPLES_POST_CRED_COUNT: int = 286

# with option --doc
SAMPLES_IN_DOC = 428
SAMPLES_IN_DOC = 427

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 24
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 23
SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 16
SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 3

Expand Down
Loading

0 comments on commit 2b40813

Please sign in to comment.