Skip to content

Commit

Permalink
removed api_validations
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Dec 12, 2024
1 parent 84a5ed8 commit fc0e34a
Show file tree
Hide file tree
Showing 51 changed files with 26 additions and 3,058 deletions.
2 changes: 0 additions & 2 deletions credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
DataContentProvider, \
TextContentProvider
from credsweeper.ml_model.ml_validator import MlValidator
from credsweeper.validations.apply_validation import ApplyValidation

__all__ = [
'ApplyValidation', #
'ByteContentProvider', #
'ContentProvider', #
'CredSweeper', #
Expand Down
6 changes: 0 additions & 6 deletions credsweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,6 @@ def get_arguments() -> Namespace:
dest="ml_providers",
required=False,
metavar="STR")
parser.add_argument("--api_validation",
help="add credential api validation option to credsweeper pipeline. "
"External API is used to reduce FP for some rule types.",
dest="api_validation",
action="store_true")
parser.add_argument("--jobs",
"-j",
help="number of parallel processes to use (default: 1)",
Expand Down Expand Up @@ -296,7 +291,6 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt

credsweeper = CredSweeper(rule_path=args.rule_path,
config_path=args.config_path,
api_validation=args.api_validation,
json_filename=json_filename,
xlsx_filename=xlsx_filename,
hashed=args.hashed,
Expand Down
23 changes: 1 addition & 22 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from credsweeper.file_handler.text_content_provider import TextContentProvider
from credsweeper.scanner import Scanner
from credsweeper.utils import Util
from credsweeper.validations.apply_validation import ApplyValidation

logger = logging.getLogger(__name__)

Expand All @@ -39,7 +38,6 @@ class CredSweeper:
def __init__(self,
rule_path: Union[None, str, Path] = None,
config_path: Optional[str] = None,
api_validation: bool = False,
json_filename: Union[None, str, Path] = None,
xlsx_filename: Union[None, str, Path] = None,
hashed: bool = False,
Expand Down Expand Up @@ -67,8 +65,6 @@ def __init__(self,
validation was the grained candidate model on machine learning
config_path: optional str variable, path of CredSweeper config file
default built-in config is used if None
api_validation: optional boolean variable, specifying the need of
parallel API validation
json_filename: optional string variable, path to save result
to json
xlsx_filename: optional string variable, path to save result
Expand Down Expand Up @@ -97,7 +93,6 @@ def __init__(self,
raise RuntimeError(f"Severity level provided: {severity}"
f" -- must be one of: {' | '.join([i.value for i in Severity])}")
config_dict = self._get_config_dict(config_path=config_path,
api_validation=api_validation,
use_filters=use_filters,
find_by_ext=find_by_ext,
depth=depth,
Expand Down Expand Up @@ -137,7 +132,6 @@ def _get_config_path(config_path: Optional[str]) -> Path:
def _get_config_dict(
self, #
config_path: Optional[str], #
api_validation: bool, #
use_filters: bool, #
find_by_ext: bool, #
depth: int, #
Expand All @@ -147,8 +141,6 @@ def _get_config_dict(
exclude_lines: Optional[List[str]], #
exclude_values: Optional[List[str]]) -> Dict[str, Any]:
config_dict = Util.json_load(self._get_config_path(config_path))
config_dict["validation"] = {}
config_dict["validation"]["api_validation"] = api_validation
config_dict["use_filters"] = use_filters
config_dict["find_by_ext"] = find_by_ext
config_dict["size_limit"] = size_limit
Expand Down Expand Up @@ -268,14 +260,7 @@ def scan(self, content_providers: Sequence[Union[DiffContentProvider, TextConten
def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
"""Performs scan in main thread"""
all_cred = self.files_scan(content_providers)
if self.config.api_validation:
api_validation = ApplyValidation()
for cred in all_cred:
logger.info("Run API Validation")
cred.api_validation = api_validation.validate(cred)
self.credential_manager.add_credential(cred)
else:
self.credential_manager.set_credentials(all_cred)
self.credential_manager.set_credentials(all_cred)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Expand All @@ -289,8 +274,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
if "SILENCE" == self.__log_level:
logging.addLevelName(60, "SILENCE")
log_kwargs["level"] = self.__log_level
# providers_map: List[Sequence[Union[DiffContentProvider, TextContentProvider]]] = \
# [content_providers[x::self.pool_count] for x in range(self.pool_count)]
with multiprocessing.get_context("spawn").Pool(processes=self.pool_count,
initializer=self.pool_initializer,
initargs=(log_kwargs, )) as pool:
Expand All @@ -299,10 +282,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
for x in range(self.pool_count))):
for cred in scan_results:
self.credential_manager.add_credential(cred)
if self.config.api_validation:
logger.info("Run API Validation")
api_validation = ApplyValidation()
api_validation.validate_credentials(pool, self.credential_manager)
except KeyboardInterrupt:
pool.terminate()
pool.join()
Expand Down
1 change: 0 additions & 1 deletion credsweeper/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def __init__(self, config: Dict[str, Any]) -> None:
self.check_for_literals: bool = config["check_for_literals"]
self.not_allowed_path_pattern = re.compile(f"{Util.get_regex_combine_or(self.NOT_ALLOWED_PATH)}",
flags=re.IGNORECASE)
self.api_validation: bool = config["validation"]["api_validation"]
self.use_filters: bool = config["use_filters"]
self.line_data_output: List[str] = config["line_data_output"]
self.candidate_output: List[str] = config["candidate_output"]
Expand Down
17 changes: 0 additions & 17 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from credsweeper.common.constants import KeyValidationOption, Severity, Confidence
from credsweeper.config import Config
from credsweeper.credentials.line_data import LineData
from credsweeper.validations.validation import Validation


class Candidate:
Expand All @@ -31,19 +30,15 @@ def __init__(self,
rule_name: str,
severity: Severity,
config: Optional[Config] = None,
validations: List[Validation] = None,
use_ml: bool = False,
confidence: Confidence = Confidence.MODERATE) -> None:
self.line_data_list = line_data_list
self.patterns = patterns
self.rule_name = rule_name
self.severity = severity
self.config = config
self.validations: List[Validation] = validations if validations is not None else []
self.use_ml = use_ml
self.confidence = confidence

self.api_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_probability: Optional[float] = None

Expand All @@ -52,7 +47,6 @@ def compare(self, other: 'Candidate') -> bool:
if self.rule_name == other.rule_name \
and self.severity == other.severity \
and self.confidence == other.confidence \
and self.api_validation == other.api_validation \
and self.use_ml == other.use_ml \
and self.ml_validation == other.ml_validation \
and self.ml_probability == other.ml_probability \
Expand All @@ -79,22 +73,12 @@ def _encode(value: Any) -> Any:
else:
return value

def is_api_validation_available(self) -> bool:
"""Check if current credential candidate can be validated with external API.
Return:
True if any validation available, False otherwise
"""
return len(self.validations) > 0

def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
"""Represent candidate with subtext or|and hashed values"""
return f"rule: {self.rule_name}" \
f" | severity: {self.severity.value}" \
f" | confidence: {self.confidence.value}" \
f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]" \
f" | api_validation: {self.api_validation.name}" \
f" | ml_validation: {self.ml_validation.name}"

def __str__(self):
Expand All @@ -111,7 +95,6 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
"""
full_output = {
"api_validation": self.api_validation.name,
"ml_validation": self.ml_validation.name,
"patterns": [pattern.pattern for pattern in self.patterns],
"ml_probability": self.ml_probability,
Expand Down
38 changes: 1 addition & 37 deletions credsweeper/rules/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
from functools import cached_property
from typing import Dict, List, Optional, Union, Set

from credsweeper import validations, filters
from credsweeper import filters
from credsweeper.common.constants import RuleType, Severity, MAX_LINE_LENGTH, Confidence
from credsweeper.common.keyword_pattern import KeywordPattern
from credsweeper.config import Config
from credsweeper.filters import Filter, group
from credsweeper.filters.group import Group
from credsweeper.validations import Validation

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -73,7 +72,6 @@ def __init__(self, config: Config, rule_dict: Dict) -> None:
# auxiliary fields
self.__filters = self._init_filters(rule_dict.get(Rule.FILTER_TYPE, []))
self.__use_ml = bool(rule_dict.get(Rule.USE_ML))
self.__validations = self._init_validations(rule_dict.get(Rule.VALIDATIONS))
self.__required_substrings = set(i.strip().lower() for i in rule_dict.get(Rule.REQUIRED_SUBSTRINGS, []))
self.__has_required_substrings = bool(self.__required_substrings)
required_regex = rule_dict.get(Rule.REQUIRED_REGEX)
Expand Down Expand Up @@ -198,40 +196,6 @@ def use_ml(self) -> bool:
"""use_ml getter"""
return self.__use_ml

@cached_property
def validations(self) -> List[Validation]:
"""validations getter"""
return self.__validations

def _init_validations(self, validation_names: Union[None, str, List[str]]) -> List[Validation]:
"""Set api validations to the current rule.
All string in `validation_names` should be class names from `credsweeper.validations`
Args:
validation_names: validation names
"""
if not validation_names:
# empty string check to avoid exceptions for getattr
return []
elif isinstance(validation_names, str):
# more convenience way in case of single validator - only one line in YAML
if validation_template := getattr(validations, validation_names, None):
return [validation_template]
elif isinstance(validation_names, list):
_validations: List[Validation] = []
for vn in validation_names:
if validation_template := getattr(validations, vn, None):
_validations.append(validation_template())
else:
break
else:
return _validations
raise ValueError(f"Malformed rule '{self.__rule_name}'."
f" field '{Rule.VALIDATIONS}' has invalid value"
f" '{validation_names}'")

@staticmethod
def _assert_rule_mandatory_fields(rule_template: Dict) -> None:
"""Assert that rule_template have all required fields.
Expand Down
10 changes: 7 additions & 3 deletions credsweeper/scanner/scan_type/scan_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,13 @@ def _get_candidates(cls, config: Config, rule: Rule, target: AnalysisTarget) ->
for line_data in line_data_list:
if config.exclude_values and line_data.value.strip() in config.exclude_values:
continue

candidate = Candidate([line_data], rule.patterns, rule.rule_name, rule.severity, config,
rule.validations, rule.use_ml, rule.confidence)
candidate = Candidate(line_data_list=[line_data],
patterns=rule.patterns,
rule_name=rule.rule_name,
severity=rule.severity,
config=config,
use_ml=rule.use_ml,
confidence=rule.confidence)
# single pattern with multiple values means all the patterns must matched in target
if 1 < len(rule.patterns) and rule.rule_type in (RuleType.PATTERN, RuleType.KEYWORD):
# additional check whether all patterns match
Expand Down
1 change: 0 additions & 1 deletion credsweeper/secret/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@
"rule",
"severity",
"confidence",
"api_validation",
"ml_validation",
"ml_probability",
"line_data_list"
Expand Down
9 changes: 0 additions & 9 deletions credsweeper/validations/__init__.py

This file was deleted.

54 changes: 0 additions & 54 deletions credsweeper/validations/apply_validation.py

This file was deleted.

51 changes: 0 additions & 51 deletions credsweeper/validations/github_token_validation.py

This file was deleted.

Loading

0 comments on commit fc0e34a

Please sign in to comment.