From af55e38b22305c3729f088898b8f256298c7b495 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Sat, 21 Oct 2023 14:08:57 +0300 Subject: [PATCH 1/6] parsing docx in --doc mode as text --- .mypy.ini | 3 + credsweeper/config/config.py | 1 + credsweeper/deep_scanner/deep_scanner.py | 3 + credsweeper/deep_scanner/docx_scanner.py | 44 +++++ .../file_handler/file_path_extractor.py | 3 + credsweeper/secret/config.json | 9 +- docs/source/overall_architecture.rst | 6 + requirements.txt | 2 + setup.py | 1 + tests/__init__.py | 6 +- tests/conftest.py | 3 + tests/data/depth_3.json | 168 +++++++++++++++--- tests/data/doc.json | 120 +++++++++++++ .../file_handler/test_file_path_extractor.py | 9 + tests/samples/password.docx | Bin 4306 -> 0 bytes tests/samples/sample.docx | Bin 0 -> 5624 bytes tests/samples/sample.docx.gz | Bin 0 -> 3358 bytes tests/samples/sample.pdf | Bin 19295 -> 23165 bytes tests/samples/sample_bad_empty.docx | Bin 0 -> 2550 bytes tests/samples/small.pdf | Bin 0 -> 628 bytes tests/test_main.py | 19 +- 21 files changed, 358 insertions(+), 39 deletions(-) create mode 100644 credsweeper/deep_scanner/docx_scanner.py delete mode 100644 tests/samples/password.docx create mode 100644 tests/samples/sample.docx create mode 100644 tests/samples/sample.docx.gz create mode 100644 tests/samples/sample_bad_empty.docx create mode 100644 tests/samples/small.pdf diff --git a/.mypy.ini b/.mypy.ini index 38c842392..cf4fd1082 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -45,3 +45,6 @@ ignore_missing_imports = True [mypy-password_strength.*] ignore_missing_imports = True + +[mypy-docx.*] +ignore_missing_imports = True diff --git a/credsweeper/config/config.py b/credsweeper/config/config.py index 275b49107..f53d9b794 100644 --- a/credsweeper/config/config.py +++ b/credsweeper/config/config.py @@ -19,6 +19,7 @@ def __init__(self, config: Dict[str, Any]) -> None: self.exclude_patterns: List[re.Pattern] = [re.compile(pattern) for pattern in config["exclude"]["pattern"]] self.exclude_paths: List[str] = config["exclude"]["path"] self.exclude_containers: List[str] = config["exclude"]["containers"] + self.exclude_documents: List[str] = config["exclude"]["documents"] self.exclude_extensions: List[str] = config["exclude"]["extension"] self.exclude_lines: Set[str] = set(config["exclude"].get("lines", [])) self.exclude_values: Set[str] = set(config["exclude"].get("values", [])) diff --git a/credsweeper/deep_scanner/deep_scanner.py b/credsweeper/deep_scanner/deep_scanner.py index e5db0bd76..16838cccb 100644 --- a/credsweeper/deep_scanner/deep_scanner.py +++ b/credsweeper/deep_scanner/deep_scanner.py @@ -16,6 +16,7 @@ from credsweeper.utils import Util from .byte_scanner import ByteScanner from .bzip2_scanner import Bzip2Scanner +from .docx_scanner import DocxScanner from .encoder_scanner import EncoderScanner from .gzip_scanner import GzipScanner from .html_scanner import HtmlScanner @@ -34,6 +35,7 @@ class DeepScanner( ByteScanner, # Bzip2Scanner, # + DocxScanner, # EncoderScanner, # GzipScanner, # HtmlScanner, # @@ -71,6 +73,7 @@ def get_deep_scanners(data: bytes) -> List[Any]: deep_scanners.append(ZipScanner) # probably, there might be a docx, xlxs and so on. # It might be scanned with text representation in third-party libraries. + deep_scanners.append(DocxScanner) elif Util.is_bzip2(data): deep_scanners.append(Bzip2Scanner) elif Util.is_tar(data): diff --git a/credsweeper/deep_scanner/docx_scanner.py b/credsweeper/deep_scanner/docx_scanner.py new file mode 100644 index 000000000..9592c33c9 --- /dev/null +++ b/credsweeper/deep_scanner/docx_scanner.py @@ -0,0 +1,44 @@ +import io +import logging +from abc import ABC +from typing import List + +import docx + +from credsweeper.credentials import Candidate +from credsweeper.deep_scanner.abstract_scanner import AbstractScanner +from credsweeper.file_handler.data_content_provider import DataContentProvider +from credsweeper.file_handler.string_content_provider import StringContentProvider + +logger = logging.getLogger(__name__) + + +class DocxScanner(AbstractScanner, ABC): + """Implements pdf scanning""" + + def data_scan( + self, # + data_provider: DataContentProvider, # + depth: int, # + recursive_limit_size: int) -> List[Candidate]: + """Tries to scan PDF elements recursively and the whole text on page as strings""" + candidates = [] + + try: + docx_lines = [] + + doc = docx.Document(io.BytesIO(data_provider.data)) + for paragraph in doc.paragraphs: + for line in paragraph.text.splitlines(): + if line: + docx_lines.append(line) + + string_data_provider = StringContentProvider(lines=docx_lines, + file_path=data_provider.file_path, + file_type=data_provider.file_type, + info=f"{data_provider.info}|DOCX") + pdf_candidates = self.scanner.scan(string_data_provider) + candidates.extend(pdf_candidates) + except Exception as docx_exc: + logger.debug(f"{data_provider.file_path}:{docx_exc}") + return candidates diff --git a/credsweeper/file_handler/file_path_extractor.py b/credsweeper/file_handler/file_path_extractor.py index 84fa1213c..ba8dc6f5b 100644 --- a/credsweeper/file_handler/file_path_extractor.py +++ b/credsweeper/file_handler/file_path_extractor.py @@ -143,6 +143,9 @@ def check_exclude_file(config: Config, path: str) -> bool: return True if not config.depth and file_extension in config.exclude_containers: return True + # --depth or --doc enables scan for all documents extensions + if not (config.depth or config.doc) and file_extension in config.exclude_documents: + return True return False @staticmethod diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json index 6914ac849..bed73ef44 100644 --- a/credsweeper/secret/config.json +++ b/credsweeper/secret/config.json @@ -4,13 +4,15 @@ "containers": [ ".apk", ".bz2", - ".docx", ".gz", - ".pdf", ".tar", - ".xlsx", ".zip" ], + "documents": [ + ".docx", + ".pdf", + ".xlsx" + ], "extension": [ ".7z", ".aac", @@ -71,6 +73,7 @@ "/__pycache__/", "/node_modules/", "/target/", + "/.venv/", "/venv/" ], "lines": [], diff --git a/docs/source/overall_architecture.rst b/docs/source/overall_architecture.rst index 3344454a1..093bdcdcd 100644 --- a/docs/source/overall_architecture.rst +++ b/docs/source/overall_architecture.rst @@ -15,6 +15,7 @@ When paths to scan are entered, get the files in that paths and the files are ex - exclude - pattern: Regex patterns to exclude scan. - containers: Extensions in lower case of container files which might be scan with --depth option + - containers: Extensions in lower case of container files which might be scan with --doc and --depth option - extension: Extensions in lower case to exclude scan. - path: Paths to exclude scan. - source_ext: List of extensions for scanning categorized as source files. @@ -36,6 +37,11 @@ When paths to scan are entered, get the files in that paths and the files are ex ".zip", ... ], + "documents": [ + ".docx", + ".pdf", + ... + ], "extension": [ ".7z", ".jpg", diff --git a/requirements.txt b/requirements.txt index 0e903ccea..bee75005f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ openpyxl==3.1.2 pandas==2.0.3 # ^ the version supports by python 3.8 PyYAML==6.0.1 +python-docx==1.0.1 requests==2.31.0 schwifty==2023.9.0 typing_extensions==4.8.0 @@ -48,3 +49,4 @@ types-python-dateutil types-regex types-humanfriendly yapf + diff --git a/setup.py b/setup.py index 0353ce5e6..a5f591e8c 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ "password-strength", # "pdfminer.six", # "PyYAML", # + "python-docx", # "requests", # "scipy", # "schwifty", # diff --git a/tests/__init__.py b/tests/__init__.py index 4a0d5fd44..ffc9dd122 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT: int = 120 +SAMPLES_FILES_COUNT: int = 123 # credentials count after scan SAMPLES_CRED_COUNT: int = 383 @@ -11,10 +11,10 @@ SAMPLES_POST_CRED_COUNT: int = 293 # with option --doc -SAMPLES_IN_DOC = 426 +SAMPLES_IN_DOC = 431 # archived credentials that are not found without --depth -SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 16 +SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 21 SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 16 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 3 diff --git a/tests/conftest.py b/tests/conftest.py index 49f685146..4f8da811b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,9 @@ def config() -> Config: config_dict["validation"]["api_validation"] = False config_dict["use_filters"] = True config_dict["find_by_ext"] = False + config_dict["exclude"]["containers"] = [".gz", ".zip"] + config_dict["exclude"]["documents"] = [".docx", ".pdf"] + config_dict["exclude"]["extension"] = [".jpg", ".bmp"] config_dict["depth"] = 0 config_dict["doc"] = False config_dict["find_by_ext_list"] = [".txt", ".inf"] diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 514f03140..44d55fd7b 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -6266,30 +6266,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97709, - "rule": "Password", - "severity": "medium", - "line_data_list": [ - { - "line": "password = Xdj@jcN834b.", - "line_num": 2, - "path": "tests/samples/password.docx", - "info": "tests/samples/password.docx|ZIP|word/document.xml|HTML", - "value": "Xdj@jcN834b.", - "value_start": 11, - "value_end": 23, - "variable": "password", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.8208020839342964, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -8144,6 +8120,102 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.94412, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t : Password = WeR15tr0n6", + "line_num": 1, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|ZIP|word/document.xml|XML", + "value": "WeR15tr0n6", + "value_start": 77, + "value_end": 87, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.321928094887362, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "line_num": 2, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|DOCX", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.632263329852917, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.94412, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t : Password = WeR15tr0n6", + "line_num": 1, + "path": "tests/samples/sample.docx.gz", + "info": "tests/samples/sample.docx.gz|GZIP|tests/samples/sample.docx|ZIP|word/document.xml|XML", + "value": "WeR15tr0n6", + "value_start": 77, + "value_end": 87, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.321928094887362, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "line_num": 2, + "path": "tests/samples/sample.docx.gz", + "info": "tests/samples/sample.docx.gz|GZIP|tests/samples/sample.docx|DOCX", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.632263329852917, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -8192,6 +8264,30 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "line_num": 1, + "path": "tests/samples/sample.pdf", + "info": "tests/samples/sample.pdf|PDF:1|RAW", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.732263329852917, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -8336,6 +8432,30 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Secret Value", + "severity": "high", + "line_data_list": [ + { + "line": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "line_num": 1, + "path": "tests/samples/small.pdf", + "info": "tests/samples/small.pdf|PDF:1|RAW", + "value": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.620007704961091, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/doc.json b/tests/data/doc.json index 5c28311c4..f19ee0566 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -11363,6 +11363,102 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "PASSWD_PAIR", + "severity": "medium", + "line_data_list": [ + { + "line": "Password = WeR15tr0n6", + "line_num": 1, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|DOCX", + "value": "WeR15tr0n6", + "value_start": 11, + "value_end": 21, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.321928094887362, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "line_num": 2, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|DOCX", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.632263329852917, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "PASSWD_PAIR", + "severity": "medium", + "line_data_list": [ + { + "line": "password = Xdj@jcN834b", + "line_num": 1, + "path": "tests/samples/sample.pdf", + "info": "tests/samples/sample.pdf|PDF", + "value": "Xdj@jcN834b", + "value_start": 11, + "value_end": 22, + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.963119653306635, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "line_num": 3, + "path": "tests/samples/sample.pdf", + "info": "tests/samples/sample.pdf|PDF", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.732263329852917, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -11483,6 +11579,30 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Secret Value", + "severity": "high", + "line_data_list": [ + { + "line": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "line_num": 1, + "path": "tests/samples/small.pdf", + "info": "tests/samples/small.pdf|PDF", + "value": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.620007704961091, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/file_handler/test_file_path_extractor.py b/tests/file_handler/test_file_path_extractor.py index 16fb5236c..c3eb35ca4 100644 --- a/tests/file_handler/test_file_path_extractor.py +++ b/tests/file_handler/test_file_path_extractor.py @@ -45,15 +45,24 @@ def test_apply_gitignore_n(self) -> None: @pytest.mark.parametrize("file_path", [ "/tmp/test/dummy.p12", + "dummy.docx", + "dummy.pdf", + "dummy.zip", "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\test\\dummy.p12", "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\TarGet\\dummy.p12", ]) def test_check_exclude_file_p(self, config: Config, file_path: pytest.fixture) -> None: config.find_by_ext = True + config.doc = True + config.depth = 1 assert not FilePathExtractor.check_exclude_file(config, file_path), f"{file_path}" @pytest.mark.parametrize("file_path", [ "dummy.JPG", + "dummy.bmp", + "dummy.docx", + "dummy.pdf", + "dummy.zip", "/tmp/target/dummy.p12", "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\target\\dummy.p12", ]) diff --git a/tests/samples/password.docx b/tests/samples/password.docx deleted file mode 100644 index 6d6db3a52ef375600d13248073e819f4949baed2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4306 zcmaJ^2{_dK7RHcWc4f#~COg@`u}pTtkbSH%Gzf6cu1U3G%tqUzLCBK9yKc0qAH9}+ax}OJb zt#b^yV~_ImAI8N&+Di}+Z}GuKahHGzMKLbBVX;OtrF$bfnO1@&(~*zt8MPCmJFl!3 zOG$WRvUr>fjJmmrp#6JUM)FgRb12D^S$O0yqQ5x8L?XI}R#4O9rzKpi8bJVSb) zce@SR%*ORSY99lB%Z2KaQ!EV-^Nr!}t4Kmb1pU7mVZfho^mKswczPkl96WvCq5Fn|wE zIV^AaR}p1bdO5@h7#OuVUm}yNa9S@$8g5%d`mk$5Bz*={lrY;RK5~ms!O};@d^x_@ z5`Yyq=7z|}rGQ!m$^m>|I>|+l=+rh&DXY|-`MhbE1O=+ex&~Nl^dao#2dDlm{}Cw0 z$%F=)#0lhiL*(}o;HlE+qqwCI6k>VBFsTc{-i5PQu;G{GPtaBf?O(6N#oM#JLtlT- zv@Sv3dv^f)p^7hzA6$VElnwnzhVhd3Jg-_J4_7bG+I?I}x_;*tuKwQ2U&-B*$C^5? z{Cs4LJO@l=s#y9y2o2RO5oJ|;S)60KzoNk&o|x^$;`>?YV2dlxlfaLr%)o1VVj`l6 zKk`F=k{^37F9JQ;hJzk$+{`yVm`U~7h=`23&~^&Lb{r*B{2FC%dAV9xa))aihhnl; zwgMVI<+RJQiCR3|6IHnBXH+VPO_oYf@fIlzT0i__(ro;6O4L_Rg8<092eW%ekdOxhLfM#lo!j9Ek4AU)%;=zR8!kLL)^Nux%64 zBzh^}SBwYssmosDq1agWBuzl84EQ5;y(V0uPgf~(!&^SPfR@=pW>L6Pj?FyUC!>gl zO+AM}>TctFg=bve7{7LV1663SW9QDy^?E;{9u3&XrTcj>NDj>DEDoaY4W*q4AJ_WO z4D7l^eD1V5fS2E*?%A6fWp5Po%Nu&%fCLH>Ji61JCgjP z`7tTudp1VGH`=p}1;)5@3qRl7H#o%%xG-Tta!~O)JWcjaOO?B|YZR~)tH{otG!x;$ zd0Q*B8N#@ZN$DY05>?(W3UQ}R+kF3VqP29puj8OnS>@6Fg->ib*+o&>Sn`joC_*}R z#l&BB;xEhe$8-=B<3~D9CZaG^FL8pKo}i?xxQ>kr)`g=A6RIO6cQlf!({^Gd>a-&u zfCSmY?WpfUQ)wkN!4ghNIf2gNp#dR=k_z56B)%HSyEoOWE`cH|Bsb?i_qyrvu3$tl zq8SED7kv)Iz)GwpzInXN$8h zAvkc_NO7xRTS^RO3syEO3ZHlvyT%%~f2N@TyHZ;#?w^-^WO`gMPycS(sFqBahGcGZ z4+0u{Yht{_kRt4HUCyhE2C2J1f(SO*G~k)Q$}@Zc&VwAj!oLkSLBV1c&u2MZT)wN7 zPHXPR4&0AGzZaPjj8z6QU2r;xS-ebzYL-%p(J@`Sx*gut?GpE&+a4X)vD@0;9_aosSwwR!1^rpy_B zF>B}cmoY`PpJsu!S&i}eqo>6(I-ulr`>*y(I5`6L4k^O1poc?zKA$)^ zR=;2GAxVwVX|PgT{dg4Q)kU(Rg62-`h%O(j?u|3})ceHGH9{sY1kv3ay@37feP*1$ zn`AX<2y`+1;??lSr52I8a3go)o1;d%mm?p4T$d6K*BwolR~i#EUhe>vN}^IRyv7z? zOl3NTdmB{>ERRH>{QLA5UxL$vRJNt$ls?^T@>9P}rY@NyS|BOTHX^cN&o<&(k}UKl zj~TmHx@-Ty6dv%N2Yl!#BR0nB~dR#sSc=82$ z#12)ixX84%4DRsO zG_;GpI*n5{yquZNey@Z}i8XC!e-C+UCZjJMB4Sf|nss=sx*r@P^lldCd-P~%TOY>7 zuw9lipwF=}cyGOALw=}4rLuc~De6srexF=W%L?}wy5X!I^e56acwl$W({p_NN1cgm zw6e4tb~BGXEr&qgG>ux%9|<2`E?;~KiT?cQ>ZsvC+O)upAg=Kdk!K#I`BbYb3)^S> zObv6_nyeI3U1-Mpdsrt}!*maFiapJ~io_@hQM0QyIV>Kgwm2-J3_z7(4o$dOdE?6B zE8ew*rtwFz<#b7TO`5r8rST$`fNi?%3>&iXlo#J=3CbALcah~GzOXTre=2Ooe;2ls zr-!ex{WZ5AW~v+t`P)oU^`e$*$Ri)2Sr}&{gYp)LVa~(m)$fwt@qfeB>kIMp<^mb;;^HCer}t)eTA7 z%$x|U+LXqr?3g&Ps#ny*g?X5 zeGwkcKXe0rPtl`YoCZI7+h%NfDF>AK{Oca+78Q`&IV!N)#oScu)-$k0Ys};WJJ*sD z4Vn(0am7&=uF6SOT@(U=%)eU6$*%i_goQ-$Qs1W2q#JpeCb>FsXUfW%(a=QHSG8w* z@+CK;t3x$skAtYZ5p)!}3EI2jdlYN0+N6E84*H)2%tz~(ep>}P3#5iq;4_a?fq$sJzX-?aNls>P+5-CVaW~~AzuI`sMQ9()I9My z`~SwC34a2niZ4XGRk1Y=^nxR;2}=7S^@p`r#~sG@z*6h0R4C4WzS+&#Na5?pbPfp<3@)vJhX|+Pf`JPAzr%25C1868mbYe&dFRQcJ_? zcp6}H8|iUA$&+sTyd~Uk#!Y=#tFAbt(Jgb-9EE!oc24^ulr3(*uILsk*mkyMD)ZPO zGV1J`-L0B^+g4Q|l()z*=WnDU=`0EskD~o1?{WDl)7m99#@qebPtH|#Lj2{v1D<2_ zMc5;#858&(ZQ~-)73aynnG2>m(%)j`PH=1i3ha5ISl=IUK~PKm1GZl{r@Y{F({n{J zz*WImZAFDUr?XfOJ{P^;OPF<4GhHn_Y^9c!X~hPC0Tjg2VM&+QCKJV9A#g3i%6JmU zDX8BvbQ|LdvH3Cjh)I}#aUtmQ&#p)25bKU|AZG6p$2>Vr-VvMch{2A zAW|ZN->&a_j_CUxXRqhEo}w%J)@(6i${e;NJxm4BC2hGbwP;H-}jtgw$1|l zn0L`olREBoF#fz(`u!v?_iBA5XhmblzzbDxzv!MF{1=+4c}A!?9blc#-6kgClhswW zoLPpmx1RZNj0AwfI{#3l#b$HHyz6mgL-6ey_&s<@uF~hV`^8;a9YB?sFMiK^Rn1nD z*#e!0oj)6OT_>fh@)*WlsI%S?JlpcKA2PHnLRMDszI02EIUkQmQ!>6rPs(Z34ylfn z=*l(8_gC_V?VGfp#J$uIo`(&5-DTPFvRaGjZPJU@;J*O`Xt<^2Sf)_Un&?46h7;dE zE9LhvtRE%kST9cRH+5!7NA)KwhsCB(dSmNo5D>079VxA$uRnl;g{AYa1|moQ1B$TJ zaY8sa3s@qYVEm8lY`t|&wVMU0ypIj|yirr6VC(9bmkVkXl!ElAhwhg~i)^Nv=TXyk zwiy(toj0ooGbQc&iiVH822>zI@)mN@1kVhTqCJpiW5nM<9_>O&Wm(UnGuY@vv=R4< zxk^NCZmRoNx9bzNE(%XJbwy)PHH9*rdN9l8I@YI8KE<^s&%>_WF$! z9R;|Echp|VafAh}!410!VFyS2Ae+qB^%l3zRV7RWlXhpC^`g(aU&WIYe2fDKyRApQ zC6AtEPW=#Xg1Y0;btXI1LH3kcYYeLL!Sco%h5~Uougyf*$K703>q1bi_4JmEjQ_NK z9(L^n&{!^Ur$v@o=nNm@DXKk}1siNEteHQ0it-mvxjMTb>@dDc?JEPKeHFMcn@>17 z4Izjbstl8mf*eQBRbx9NI&Oz~d$A#RB7>P_w1%b66kmfU$sfKE;N`};MhfD*hB(WW zP;2U(H)nd1v4YQ6?>2721_(4^XuA1+&hQy$wk{ zSW(6N-iawq9 zMC;2)Q@KDFdc?vm=Xi6_K&EVEH$#3Ei zddK>7)R@8LeY&E|8b-4BTwepKh#Cs@$-?6teXI^Tr?iNe786UWZb&>jU6)f%uo@1( zSsX88dXx@(+i%s`Q5uE#f^<*TI-WopA65!9#~C~;#a7w@K7EMiOfj_Zr86|!C!{j z(i4?+?qx4kAKH2r)>o+Rc-==~d;qET=u<&sOZt!4VnF-A9pMBO_)o4xCj?hJn7s?e z75~Yby4`4yfg4V#Eq29flK`99xYFSFuQQa83Qa3@V(qv`b|jWURLEwB+)p;l!Ys+-+!LikH46r#F;uLy$XS zlO3?eW1oAY3wXjFxjJ(w%5pjYomTLhrXec*_Hd8cPA?&%x#~_nNYuM*KCSrL*90xSo>UGu3sQDG{(%FW-kRh>e@EJ4#APN#%4@>~l6{Qv+J!_;ftuGD6g|)UR&h z%%V)guW|{|bW=EUSo@2OWbCg8SGbz-3;}uUmg#aj_5F>a->{NWPlV&KWrV-l3gL<= ze0Beco4^Q6&Y#}KmFa`&P|Ogd?v1+t`!UkYNX4$#h16DRPV~4d6E_<3ET#@@3uQ;| z5zQ@)1w%}ExkR}XA{YD- zBM^x1Ih?+?qDk?P%LY{tzr>j{H7&K}=`aSiG5WJiHNZi;6Bdm zqfLp>8u;CEnY1SZ?n49;MYbXG^ZvvfMUKN0;C3LiRD?~@U%vzJqJQWnSrCP^Y2ceo z*7l<`B}ahtRgH3yZk!IdX3DC-a#A|3*kMeq9xd5ybA|*{-1Iub<)v%kp=1@R6C92~Y2mgmtg0?H9v zxSK}F*B!R&cx@-#9BN30cZ?eG?y58qXd5pi7z-eU)mD2bP|oG?%Jd|}H86OM{$gsB zqi;rQeWB-9VobDO4zOX5L1THB;7=Ks>~B0fyLi~boH0?}cGm(iB?vxydyjlYij-wU zA6~hks}C!jsH~QKFG0JwFp;!PwN`?Xe99V~lkK8=>c8kdv$E6pAxXesGtR3&LnQY8 zB3a+pw*<(pkf&{}zMc)+reD&;l$4%mU{m_yPQ9!9b^sF{d(a~9^<3cKk?0T$fgkmd{B=&K5C=CrVnvUMx zqxnQ#+A*Wjhj(~%Ql7$EuOAGohQ4OElnz-0k7jh)ss}i7kvzo8*>aKrwF$z! zA57NVu{)_=pUWL8C<_doINi$W41k_;5x38ZV2$db!yo&*XMUl^f_17oY-R=j>BnG zZtD}98p$Y&Ez=5DQ}Pvfi7D0TNW`h6G0vXu)u1|k`D3kn=KxGNB|P8E zqrk<&(jfU$I8po^P7e_FE)a|Rwm&#@qmH~*3%Wk+1G|sMtjP$KZ}YIX5*^BQ<5u_O zl|f6`N~}9}9fwsE!0b z*(4&KWb1bIsMz>Cr3Z}}d%9m7eLLK)m}(QB@zYF=Dp8@g19(`R0kWz%ieC~VSs+!I zVCYuJK=+I`=@aFpd*dog0pleg?lff_qmXqRd(H0hh8#*$t$E$GRL73{0jZCUKya2d ze4M4H&UnThe6bHFaII`*wOZtu{l=7L!}rtP3c->#EU19H_}h<>UGkL^DkHmv7`arcGi6t9l8XUc6q)I zvnLWH?qY0AhaaHKjwUE5w4MK*B>q!^onbC6*7jCEk`_#--o8bUWagAQ=*TECSSDOT zJ~2a4isKARC65w2fh)vi=3ukCmDDKXa%cV0Uag65P^N^%1MM==uyP@Q%B|F>;IYSB zV#nHf=Kd2+a^%L&Dq@?XvrZ4`l;he0?L%fXizGvH+Kt?9k-X>N2oF7s!wsoEG3V?u z?OKjD%(WYl1Yv){&oe}Hp1Qm%3hOng>3=WN`oDv8MkT_)VMk6jQ0PM?iUPK zC|PxBCCrkvuU#g$LS{$H`4nzHuBTwbyDh4HHb-XGw>y$~2i|jjFK^!TYMz;T{9hYh zyPuM~W;AIJ+lI~$sYVxrvKpF3_q*x|!GD=ew9IUK=LgAdOuHNWAbKARX(WlzcFaOQ z|87U>zp?59huOgde!Q<^3czfr8%%%xGbX(A8X z7TYe-SC&L;v7}&F#Cqc~CFtgEx4-lKl3gEFf3xRe4G^M8`>S-(a5tE9Mtr4EqH-6z-I#tt$gVXSj*G`Bl-$_$X|~o zc_M+0`7+-#YF7z*kv+simXPmssmwCO5oU7ij~1CV#cZ6gpD;Ql`O+6-4=o*BduRRW zvXitd8CLENS4(3ypf$GE!dQncav9v8IIv=TzULP#@j&|fmo60OOJKIwd5wXO*!;Sa znt=Ndve`kVSvxz#^${60vT$iFqC(hPzFhY)W1Ce4Ax$v;JKda78Tk_1{3Ii9_Jx8G zDS|JHbz1rzX*>zot}~py^5zgnjW9-4el33Ga5(mq;AW^Hf|M8kT5p1M(~|I1fDnSW z;6#{XorC|4XMTR|RjsMO&#>5SKjN|)&ZeCR*&J?ww!8fh0aJaYvVJY=QDkB~*j4HK zj6X>f2Si+sxZLtiwp`IsuZgu~jd^G^62%Qsb?SE6 z_tD)QPC5_>hF!`&q7@z=C<4R|j=m#{E$acD?0nxsoj0kjA~f)skM?^$a$=&DP$gxQ z4NWk{3IBvLOsi_Fh|Y%S9^xLv!vW@OiosZ8d^aW~Dj%PPb{NOi!u-MNQ-CNAmpfw} z6R{&;S!jLknOD0Sgq12efkrH5G0Cg{<`vF#J4ov5CJ$>D`Ky!9*)!gd>ly$&%x>n4 zL&X?Z!ioUt=EX+a}^Ehl}3G+5JXJUhr8(A zmn3u7LsAy~Exxp4-Z{PljbyBcX$~`5K-G^ufw%Gm{ol4~!QDRkd{u4!^hS7W$%p}Z zO7Je2cCtUx_fjYDmvp#*7uT@G-u^#-YOcIU~C`9Ky`V zew7Oov)>gj&MGiT`KPd=xB8z+((mdQM*{FH7qs(z~fGv4@pfs6etW^(XTve5

!nRSAJ;yA4INB*Z=?k literal 0 HcmV?d00001 diff --git a/tests/samples/sample.docx.gz b/tests/samples/sample.docx.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c4c560125a8fa2fd51fe9d351742122de65c3d9 GIT binary patch literal 3358 zcmV+(4dL=1iwFq+xGQA<19M?*aBO8RWN%}50PP!1ZzM;xlg%#4hL3~<6p27uqrHWh zo}LeTJhCU+*m1I~*K4h3;{bxCrn_dQ*Zq~M9*?~zQ2qcXKq3+nNI7uH1tAXHkvJeO zfeQjf3Q{BvC>JgWiB})pUHvik*dA|4#7J#-SG{`g)qAhrM_2XE%}bA6)!6Ut_U*mj z{_LF(wy$U!yPnaW*S@_T_!0J_d;Oyj)9=&{J@;<^3GGtndoSV-e)A%p96qT%^4N!$ zG>!jReF*^c15BtB_-hrjR1{WD#xY+tWVb}<0uNdhC!_{ z_7JTFA@-qX7!VId@JL2Rh^&1y!bYQBUotGgQ8i*XD!tVWJVX;W(qB1*WlqbA>PpTV-EN%n+sjI?|LdbatUan}?0QO-p7e$P)1oW!wHF|~KK4RzsTjnV-u8nC zxqZjOJ+lMt$%xhbYKRLYjMrY~XQcG6=?!g@HV=FRN z?E0W()t6Vyp=B-)td`j^(bDqJUa4Dcgs|OSUb0QAzG|qX7~R)}CVFPOR)-%4h}%|o zAU4pR38uhrR`+1J(_U-U5I3vqV?>Y@0eG*yvT7)gO4ptfjdvl?0D>h`1yVYI57&CnKqv-v=fC;ke>C7UQ8OVj$p%r!+{=7MMt?<+dtcN3HqX-$fq$jpV z+e%P|6XJ3Nwq;-!Gs~lfSu>3k7DVdN(gD0jf%Ke+d*%|*xHEx1q)q}jola}hCIUqXWFq6K5 z4XQZx8#dH)H*B+__f@Mu6YgV{Y#)&kh5%BmgN$;98uK$$7ILzDl}5D!OL46-gQMmm zIVwp}iXuu2w;3$87Rgc;BIT!8)bQLhW0}V+s>`q_IEwk!RDSrA@$l0w%me$E-j88X zvg`RQl0sSdDP5^2>s-`wsjiip@nNjXq>xKF$d-){V=2a!oFWCaYJ3<=kC$~R{X)rj zss)#<7=dQ~74!J!+wcAPaZO{_m$FDwUM?hSL|slmCRJ!2q1psZc_l8BMdbK#Qbrb{ zl#1^2DqhqQdY`MP7qj|`%raSn$N84cE5q8}i4ty1SdW?V)B z;6_EbA^%A@31IIxPQup^_=<2jd>jB-7EGl`+w7oby(wW8AaPs5Lvv{bW#JY4%>q1X zHgOJrGlg#z;5z`F!{18b+XZ-QWyMNlq5z58r4smtY2pk#2O7JMzh5eG9787{!@zy5 z6ujLwQD-2*Qz%0*A2HsKGRCCXk%#UFWD@|ma=}u4o#oj>s1esH*NKDNiXJ+VHQq#? z;~oKv72Se*k}{uSGSaAvkuqij-$$=c^j+lBlSf+gY$J^vF6kkDuY(Xn8GQxY123L6vYpZrfIW+N{gJRCxaI-@0h zgg$|!w0H5qMj2;=(dMVqe)AJEGRxK|1opTk_g?LH-~95*&mYk=c0CHw~ z2M@k+`vHP1fE9&8HA5b9b3}KKyg}fW(rPT4mPktGo)(=tl%A0kln$7PMYW5GGvri} z%QqBbJb8H=Wt^Fd7J|wd%a=+R6LNYu>`>sLN@t1aLOGE)c~|k3xAw37^D~;pt|Ts~ zZ<;R1Q^X|XDi78w6XJK}60ar>v*qMB-dT(|s#$aPcq_ZA9SV^&^FDL@me3xG`uPU-xOYQILqy4UtP_ zhUF-Kt-uXP4AZDL%!UksG(?t@ra+QC`XGTdrDNmV8Odx(!m*KX9`$w*rA&nSn!bnc znC*zv{Urt=e4@ngf5{akHyV?BHz&)^{>Jpp4f9ppm^Al>=G|M(2aSyUW{uyS3uiF{0^b{!>W)G=C_#?GL~lMmjMym$86!u#OcLP2O>>1g2~YssFLj!o7~mFS za%Ea%K+P6|AeKG=Q4*M{p26--2AMjvFi&-MVaixTvR>D2`oAqpVOr}_4RMu)OXjpg+Pk16^xxo-@pgRMM$c*s0#w&N;2D9K!q)Cp-y50t&yL zl>7-Fak(FRKO>96D2ScPNFWwe7@uNdJn}BKvnB%s1NW7LUX2@XZgNvF6fUQZ9ZcD= zN)ounoNSi{pw#aVFjyRxZU+se3z`w7lmPJY8dD~qvDY*oyNyxtsxhy9D?aCz(LSgO z6Sgr@Fo3j2O$sv8Wg9XbsiH|zjDt3l`t1as$}RFWh)gVTEpQ~;g()G5?c08txw6n1 zx4#ULpbyV84S65qa9bIW8f(!q8!go~<-5C6St8%}C^baclea@?p34jsWPqkG=I42pzIHFhe@S=9!hB!scgNsH_6k>D2r;$e0!3Cdr(GXuBEt4>AeNR{!a&enr==Q<@7UYSM;`s!rCFaiK9?LX z`qdBE-_j2rFN->*aL6b{T>L;KM*NiapX)ybQp)Elm&LiIW6%{pR#Ex7QPxS29BV{pYmgQ1g5e%^qo1S-kprEqlQE z^vqq(F!#dKT6{S4WZsGg-BeWHjZvL$ve{#z8GU3CmK+*A{p_b%B2-W9&1aq2&21I+zrU)LZFSFwHoJ|gqW$m%t#ng$KD^mI7ZvX_*R--7nEB9V oU&yFvZ+%@WzMh$d^zs#<=%=)u%bNCnhZ(|u0q^6uoku>$ld--t)d|=}0$?NROE`WaPLY-25qzr2wrpM+yjL@P*mN zt`w!>?K3{g$TnrDE^DQuIZhD{+_5#$sdi!#0A3umfnRncGQ8j%oGMYrCY`Bq5X~{o z!@)5#{q74ZGb86AEhr5-MMWfHt74X_C+$zC-Ii$#`}k{WNJGBRlz-mb6x?6ba-_;A zx8yR|&lWN+<9cgy0_5cf=TrX*4uW;8*$w!usH{kd9n0pz#001`Z3~&c_=>r~N`xJa zZ2NQHFJk2F^knwSj`GU$L42q*%J#Ie$c^o02 zQhceu9iCkXcUy^3)L_&V`arm^@R8O>EFeU+2kLcZ^?nQ^YSbKQ7!!4YDAGumO zprgZR!L+{*cp*IhkwKvU5{ig`RNyYQo_4el7~%wp4iMlK;1Lr;{}+aD&ZmHYK<(Mt zJ=5J0VOW+2^Vz_N1(byhzgvWP8%c&*Fq|4_xOTF-c5{H8BN$Kv zG@Y+H)-;_jIUb3|TpZak*i5PG(>Hm_7m~!E?uI9u;UhPv=RSM(jl5rqGYPln+BU$` z=z>AK{j&(&OM>t{b*F>YS2s`2Si~7VtVk+Y0P;q!e+;#Vb1+-q=GP|qQ9X_x4DhJXT9yrREBL z2^~pS;pktkKg2HKSU$qD@%0F_bjbs3g5OszdT?`a(%*>(H(ubP*-vmY>*5!$X#rw~ zPdAxy2%xuhn%URsdP%^KyB8Q?KmDUAnOO!zx1H++V@g>hrxd+u8E?+4PEn=O)RUd) z)e#}U)X`IS{BP_gnUoqf?t|*dA_|F!d{r_gh1NmUfnkqr=S=0`#5YsQVrVEWX*usr z#pM@>GWKH!Kh$^|I;n?sDpSMH5kT5Vl)j(-doWTYL&>03?W2IiEs>L~%x{G?Uad(h zp=$?Hc9*t)iOlTNep6&KCF#faZRMf7o3-?2&h>1UfQAa zbvj(TI>{VQ@;KVqkvN*(=vnO@&w5%pLT^(75i8{#ngOIuUg5a*SF_ntsubk&HAWQ1 z9y>%8dR4}gwe6kN-Ug$ZW&VSkvKXKmLoi^vvoa;2_-yjf;L#QA#;>K*NqF1<#zn?l zBv~`cJn%`<;qxwPOLYv+2pT~nn@$lg(H$PXDS zAEcvuHM)Df*}e7zQ0u=N?AM}8VU)=jzsY6od)UBs0e(Ha-dhCzVr-hs-4b&X!2P1Cr$GjtFcM!y|2<|2 zI1vl|jd(tng|=m9FRcc}rzTD4~H3 zha8CSY8kku1EMmMLsf@X2)}8tu|!D1^Ja9mQVn0$YN#-hOqRvckH`eQO6Feq;BJ^- z%BLb-qwuN^DGZqXiB#+X)Juov)F|Ngg%>|+mk0`}FSywYkj2j6F_}Q=XH+y8p~~RM zz0_;OzMSXhDxBF2qvGJX8od5kqk)?_*eLZ7_OKzLgAKmi3ep>L6KB*1A8>Q#WM3 z6ZubQ1O@utv#Krc7-h%DBan^>WVPr#*-=&i`Mv?;qClwtg|?U0$ywG#gtP{Qi&a~r zfx#gM-I%nn?t!rZf`-%6h0hwMx^Lbue*MG45)^$;(97ECDi%xxLxS?fw z)QHcOd&^lvr!OO8xx9Snb2_pu2dLp2<<_{BKDQpDNQu{u#8sd?^W~Y6dHNU#f8g#2 zpb_)m{e*M(b(udCMFT}|XpP&k)o)U0{l%6!=Zi>HXmyt60>-^wwB@J|n z26hjspD`qlkU7OTYeXL3(XD-_h3(qU9AEc-?F~B>^F4Z1cX!n&LP*HY@yG^)0q|qz z5IxoP&FNINQYBO?3XH6zyvw{{_)KN+LHTDfueU1dvo=AjjIH6#&{v#Mvy_+yF~M@+ z*SjkV&!1IcLDV+Gb|h&O#8`iX?4eZR{3^(onvu%v(SqrU&Ct8Kh>Rf&QJb)5=_i;k z-E+7ca%v50D^GE#L zj{A|lesTA}ZRc1t-{g80iQSh;S*O0=nqlIWQLqS3kW(LV!xe4ck61 zAzXo7xa_1;afbs+Hw%wdsMo=euy|Jr{+yn!w~0St@IDDWYNg(9R%2|;mS>mPb0G-n z^Hp73vd#}Ib-!+t5!Cv3L1v>Jm+9u>3|%t2!|`{f0LjujsyT(iS(mA$CN(rGumL$~ z!=h!Kk*c96xj>CA3NIc-!78N}>T>I$$@xmb(!L(Pob%u*i#ZyN(1t2Y;M$WUA&V~96DQBm?QnFK5zj(7 zqR?qIfsJtKRAz-bl`?{8f^xasudM`Xf)*1`C?Dl|_0$sJH^Rc>UjveJ0(9#S^I^^* zOjv4oULHc80S#H z#fz^9qLuNb)-fjSEgukti8)RV8_gPh#o0lIML@;TB+k=7hoO?EYF-U zgP>J!i1Gw2uN8y3qM+sG;q|rM_GT8r>2;C(2GVDP(GqlN5~r8DAd$k$M_B$babo)v zAIAqOHn>~Sd*d6(Aqst7L}xvgT;EN!R?=e$XMrt};mm#nEbnrdtbuvox;B8<6cF@b&4m{CBb zbVRG!T82*U3#z92f8?Kjahh33<6d+Gw*zt zuSOOhSC?APNa^Y5w%`{QK(tf$N)z7s-c=%hpT!Qq7K{BdveY$`e`h@XY0Y%yG~xcX z+(6{#90COT@mAceKP=A3`{dK7`pfB*Y9Bs{U5}7R6kyG6p}z5*_4|*XvZL}qsds^P za39|H0p6b%_AA&$9(9 zXyb0R*G5!Q6Zu z^>gl5eqSf^#V$9CGET<3SGL|fN?HSb>Nwl4=ov|=vu)Eq-q8#G=1u2oIyBaDw=mj9 z0*B#&4se~Xv~7k_u@f%Bd8e-imL@&A!#)MinZ-`t%99?^^x55!UD;nLF2uX%McnEN zD*GU&r--urJI0g*#*l>K-WPb(QcL3ZAj23R;K^{JMOawKOmKY;5ASx9VngofF|Yu% zC-6AsA|%6c7$jWHBM@7?{nFVKd%t3__S@=XJB!Ygi4XcXlMbE!LJhW0Vz}?g8Q(hWlid zb(p_tsLkgns>Ka84>vZ>DX`Ea4%P`cJxCTPLggeV~P$-crCw= z`apCI+86qr$bZojkq?%vj!_0!DYyhd*?O^e@=%EtPQX9oG4oJ7-jF%x@!If2MN>l! zI#%thpSm;?h)`~B(BcI6MxOx_W4&_J`+ltJ?+3#Yqs~OGL!$WoF4=I#&)hPja#JfI zvgZ`kYG=u-;}0ddaNwm9DQGhc8&5+;0u_X%dvvMUH6PogM&VJ9oq|WCQ_*6gG)?XM zi{BH7U_Nz9#t(GPa6MK&OCQb9==(kzY}uxm`*^~)LTNHAZ=9=WxxE~~w3$CJ_pR_6 z)Lf28N~DGp+hgTnPC>gCYT zMU=Nf_H}W1{0rt#h^RBaAhn^X`K~9UVf&jWp>47W>>|AuI|;$V0o#fuf9+02E2Mtx zKeNbhM*s0Cm4TmOYaS24#e3ZBc)h7Y4z-NvHGiYop#G$D2QW^(qj_hx8>*Qh%o%f%u!q2^M;9Rz!v#j{qqXJs!DEQX1Q_?Z9AF-r;(@p}2z1B&y`w&N`JOcYw9SdtUHC{c0G#+;rrUBf-)4*YIbkf>+4ehI{bAy(=qWHtFwcA(~HrB z!MQKBah@9UsLHX6q~ObgoFfN)*_ddx^`nnU+;}C;1l0PUW>XbJ^AhqaZN$uwI-sO9 zlND28OQ*Yym3GY?qSMfduN#NDMl4kG4R#>jaN`({Gt zM|6&Y0VecdjxjD2=xbo!aD+?OZirB7EqUwwch z49rRF(8i{rN1NJ&-uW&qqxRXGjkuo=bl3R%fWpa8P5Jlb!V$Yf<7K`R=1R$F)a=6X z{k$|5BHYPLn%u3A`493oX5odcs(XCkxQTb4!WZiDm|kV8AC1}vb9l`Zf82mqbHYb2 zWoLE2NWM;-9Rv-bSzz#VK*}oy%K$pgB;YzJ=~r+a7$`dy6>fa~FuS1GIQRlB0_jQ| z*wNd**ftvjUEy4*@V=>ktypclHuspcL=X1~b;idyWTXs}U4u;%2^%BM*WEQl?bL10 z?6HiiO(fSYO97o?UGqDLfBWr zWI`*_X+V}^ngeYaU8FJbgqQtuTin{|*#rk#ECRAFmIv-8Rzbr3S09ZWF z7wM|*eCeEtHWt;{wAIxKyFQC_n@vj+za5IZdbTEbcMJE}&lpAnRkXL)ZO4DacDY0`kkbS_tWrt31^nM6K9xcVOM&023f6xJHv)a}x+6aQ^1d9c!}>G% zcSenOe!pfKx>LJ85|xOMul4O5VvuCOvW||vDg+2u!-UCCM+#(cmZpw#Y>J|s&WH!RDXKX~utE#M`h$^dH; z8D0<^!^eE4+p%bk9jTINLk^&rch`xIeqjso@@sJy)5*gu*7mxz-T5*l;7NBeA{~*D z@k!^Ls>NoWyOxZkqd8iX;o|XF9eD(Yt|!R}Yg^np0moPYA)d|O(#E)v>3{h)@u54L8<^qTpGvxqRicNNH@t8N6=hs-|BYn{x6W|Hs2qW?6t6?LD*f;9?e+6 z5vd#1!$u*hJp=OuX6NQe2skqL70QsS**Ebbgv#@Pv*;(0D!6<|x+OXLlUninjKF?~ z2C*p3h4k^b;{uZvpuRR#NiJMXS9=+Yk@Fe1wN2`9PHudga(h`*7?SNJa^kLLx-7sL z#YO6_KiZy#6IqGfhQY(VGHVRpLuTF&t;GoCPBsP~hK6HA+Ua;A?ILf6K1G+D@*FBd zePi0qx|wG@JIPGJ7F=5J@^UkCNaPxD}*%Bd1G@Ch=Aft$h ztCV9~j_Gq5^lqaZrORNZbg!fQD3~Kgx#{!)YZhPBlKfe3GDV?FgvAfys$`c4tu4Ra{`x_T6$Q^pgv9ZqOqV~t- ze5I8SWhCWn5pUWDX9_l7@Rw+vsKPY+caT#t926OhuYi&y-68S)2(6@!JIO7eyyKs~c7DM=lef_9gM2F^ia zIbGMFrW^?q^}`(rTM{W92+Q&-7z|?%9NT>#<>&Ttib_S=!>>DjTapK-$M1PcZDxju zJCMy&*N9aq=%thIy`1v7Yt}5KpkIv%be+1*)DmRR`>h7m9+e;jYgb&KdFJM__1Wp* z)i}9&I(5XjOAV;>IheE8_KeGJ78gKalgp5K{>U7X}f?>=PgSu)u+ zwLOBOdmkcDiS3%0U7+msYb8F*t8J7o8*t|x%W8_%!QPb!T^+>wECFE4eAzKhUY+H7 zGUeDhMTzV2d^qH(Qk0CAceS;H*iE^u<>`L9bIFD9{&*?6uV*#=$%FVl4-RyR2H%Tj zKQv|K{H4p|iY}o?xu;=N9d2v9Uz?rRHww6{LTa|HCW61UtaM^i3%l=K_I|&8xe6Fn zF=wO*7dJIHUtMu$jP~zCVUu#`aW=M#^-gsfv>5gE*PMeJ?xYOah^>nNOS-PS(ddZ6 zpTVL<;miJWTtrB0p(*sF?4fRW=3YZC@^4mEQD<&IO~|utct$$x99iu{?2lPVGQfV% zVp3iWEPG|Xt&IVR`(AyCf4OOZmJS5|3JD3op-5J*<4Q%_4$Hz5Z`*ceEv!11SZ;keSKSUn0PY98~cRwtnpkizW=jgSCe3FCGER< z74j3zl6B%xy0>8q4TqA(jJiQK;M7-WbY3ez89;~Mq;jZxiTx2@55&IrWSas4M-c?3 z0t#%n9qd)37i`xKOLC&ELA;OpJISXLJwbOEM|-$8OiLd$2%@?_#L-#r_uETG|&M$n~`jW1=oIpA!lCb$Ex7U61FYK!L73(FX~+V5+T?hv?;YU!={IkP5y zwXJnw_7wJ3pr)~Pv!cc{jrSL@_@<@7VJy+Q!yCcybHFNrq-%hSdrap%IDT|~YxShN zqd?wpb6wy){e!(6Vqw=UE};bV?P!S}I!UlY{PD~vQH`m|A}^m>cmutR)_ke`E4K~p zt{3Te6&rd>79<^7vm8kEcXh(w6UvSg-FL^PSE=m)j>^T|-mm^d z@qx;8-m>V+FB!+a#pZ|Vq%1X&M%0~NhiM%$xf#qE9}^b$Vt&O)$N5F|?mwgO5V>;_j4YQ#nUPi(>JeR0JL#FnW`r3)6=C}R?-9TFXJFb}8Mvay^jLE2ADz`9U zHhp};rp=y$CW3TI&8&n5=rcWSu5xpr&3zH$t!X_}{=Ffa+Ar9#|Duz@>}qbILY*?A zW_|1Khb`Z?8UFrrF?ZAItar}K-zwJ3o708Dyk3ZCN;NBDba?oRAE#xtGxrt`Pdrju z9HCdo`ht;C@bnoruZn7}<9g$p)RWfZ{Z6CcGc$K9;6mq94R+V%(yh_?QOrrZ^U1NG$)|Z2 z$D>5=db95$BERalZa_G_{O4jY+jHrTsC?jf(@{FiC+H%;=g?Le&E?le{);s?0#(e% zTfvue>MblUtKx3e0)hQC>l^u_MYH1DRh#R055 z`CkYR%H4LZ<9Rlk=cr7opD zfIj%Yk{b6{q?Q*larm3eF~^;xqmWdZ)1<0%hXP%&z1#bq-?M?>;kmVL3h-w_w6FzG7wB7!h`h@ z+Vzpvf(YM*KJ*Ln9Z+Xty&lsr=@Z9=d+aQ7pnl{9;P2_kwK62LUHI%R8sJH(+3X=~eHW%OvMOXp^p$ z9<9ugowV0g={O~PV6aqn{T|sl=1leMnfEYdU8Sc>*}L|)K1O=k?<(JdmX=9ke9q5P zGt*T877l`W3lgjp-;{TSCB@cWyK{VLy2fjB^NXF;&dZ6_3TspP%0}vrM)~@jY9*Pb zYYTHETI_Hp4`n5mlxcfmD@&R93t7YER)(PYn2sZ@E8Jg8&0Zfjor4}99^&z*R)X-t zQhbfnjpM&J#wHM8%{RXFX>NRKu54r`68fP7_=dgxeTgb4w{C8X7~ZHq_~GHy>pHR3 z<_6Vvdiw$N3;{R0J0d<9%c{hhDcWZwBRZrBqi_Xuv-`b!sB=YOH{* z0wE|qZ~H!QEPKZs__r~Ls%Ml|7C0?EePe0s(^%ufvQsB8H#I*u7ZxUK$Y3PgCbYF+ zB>x&T6^bui=hgh7R7L~Wl4nZG_-#=Q|59X*X;a`2MmlJz5^Qd{i9lYrG+=4TFbyx| zDA(C!;W66r{!xcomU@6I4Rne8gjH>;9q4$=lBgQcr{Kt;Mp-<&Twi1~Nf0ZTXURP^ zH6QoRPLDCM!?RyWX!G0NPDppPNt+VA;%sAqjZ*DycUMoJYjiWykI9nnQ=gj1XV;2K zUC?H0pfNuFdvo2g-LdJQ0) zdRY?36J94YS{BYL(d#FfmqmPk!P*k?{b@ONc&s&_)b4s)eCmsq%V=vs%^&og7|U_i zWa+g-JHLmUddA>JgW)d=$&e1B*AL*!b{gR|3!Rg!P~qXyYM|&wmwfvXmXUD z>un0vfc!%`WkpP*COTD)V%*8BxI1Tn=8h_ufvb-h3?G;`4pagyo^~C)B?!PTd%ZR(uHl&v2biQC@s^S??KG?Shh zW6fC&`jtT)UmJg?*G}o<-j*SI8q_;d1^KG4u~09E_2!( zd>X(eUv;TI`_93rW5FA3HmWsG+c6PPb;o00TeWUu{h{=Y5%kG3$WsF&{>3n)7L(Rz zs0-gA>Py8NrWPBe7b0l4HOp*)vSkma=jGQPf<6sw?Qid&Wa)bePtCb15gbgSRlT{T zf7zJ-^Y!)zdr{U3o7V9hkPXCT){}xQK2RRCO#aG6SB<(hTwC3)Y!3M%-`e-sEcA!$W3%Ya z>8Q2Y%gXn=55&j=wPQ73KVBD3t|YPQcx=?3xF$jK$S^@;rvv{0>(Vq22%KkV$6oUq z{E1$fvq`hzFYo7@h1TOjrATu?%+>I=Aha0J9F!hlbgO>zH66n0Tcs*Ig2e*9`8MF zcJX%`cb|XyN`3JCS?{LX7r@Q)9dLR0b>Qn>?W?<;_ZeCDD+1OVI=2I2zEPQZ1YSJj z3w&59A1I%>UbG>f^}fIRXXZ&=x78s7PR1p*Y2H@A*ARow6f`*+^Hds9ar2D>vryjJ zmz;8BrhYk87orU_NP6pym6u1oEYHWu1O2}VKF9eiqHx^H@}Q#lk<%G)ll&~-&1@rx z|I^z$G3W1zx5B6==D#kmkKuWXehw@I7-QdloD=@^PdEGQkzwmPkmXFHv*|Map?p|8 zA7K7$8P;Pwa4EKcJDS6A@P@46gJ`y`{UpwtSozLj%x^CEM{k-$_ z;cX4@=2lyjV|M(>jPeh`mLD+kaI7)+epKp8ig)e@sHhr~JJ!J&8`ddGHdURNRY#rz_VZN^*aI>WcI!?K&KBZ24%A%#<157@7VybO{u zc4)7_haY6)Te#Zk6cS;)(A?`s-WC1K$oop4(^NrRLqB?l>QWWmhmhV)Sta7d6A`v} zNlX*%c)n1n2#G3`U=ntLhpa~rj9g>zc9bQF%M()P>a2Yl>wd~&EE?VNSGK(b{bRW@ z1v)z9-wT!@5GX(N|6a1pgYc9xFzh43MMNinnhUhAxhe!r;s-z%aay#HAJ%mWcjVW$C15W_BEU|iXYz=(_^ zdSnWeGD5myKLvyDuio4|Kt>RjK?dmJu7iSXS9JI3_ZITV5?Q2Mm#TcK^QZL08v|a> z)cNF=47>f{DnrTj&#)_M?abYb%|B`(L-1@LyJp0U3HwkI$1w(B*k%l_&3vk-!1m04 z4eDfAYeTgD(mw)v{Q;_ekPsqUJAI~Ue~)xugm=rs<4tOVO&d4m#k4cA<}LE7D+61v zBNzM8#3bCw@H9dAY5hRGAy@OV5Z(NZaD%QUcK><9H#+U82~G}As)fhn?*%>V->0`2 zhGN8&bu%@5dd?BEo7wUKEdF?Royo(cJE`6*RIlFb`Mf6Jfw8JED*sjY9IpnpK_u{# zK`*W=qKg>7K;yc7bX?dcKHfbEU=ahq9^A`pxo(+`D~iPXh;MgnPJpxm&?KXrVBKjT|qomZ!Uym8XV<`+b3W+}A4N zr5w2m^uH=l9i(F6=j!EokM3k)1$VyJ14%i#S~;SFq@-MZjd)?)g7+D^416K$_dxuVXIA=)V*mF3KXg^GaIs~D zyKw1fv7v)hE!-UyU2I(MN$~p+ZcTe%Qt*4z|79I2fKXC+#>gjz4wAm#{o`rx>LO#| z31^iNf`Xwi2pGZ(7KHNg@^OG+h&2UL8bNgMe_phFyu2`8S{vHGWYBwU1hFDBzz4pc z`tKL5%YQCGdG6``KQfr$eJuZ12D|6U|HycG{)xxK|4%$V!9QbxzZvHH-O>IBQ1Ex( z`MzsVpFL4o^*_IDZdUh^L^DEPni?(KuY5GbGEedGN5U>F$szlQJe1n$Z4 zH$5mA@|!9DBZK|JFci%9`+Mep^aO$54gY_IeE$H1@bmpc4+7!+hcl4-on8Nn2L=B# zolq#`FK2*1dG~a;uy=yH|L%!e_Wt)ZB?$UmXZK&Ge|5s&#H5A(sS9+FqRf50hzRgO zB_+XNL1`&TIZ0`VB%hpsBp3>jmXVZ|;^%|F#L)l0RqhK@0VLyUC2a?{a`f4xc=;ry1m&P|a{SVQy!=B delta 10700 zcma)?1yCGK*YAS_S=`-&%L=Z+3GM{TXSZg44-E)wsR(F{YSOZtU@pEyG)b6-vT7d)vt#%5wMa#e z%BKG-?P5>+S8Z5qF5=6kv^)v(P#6vKUKDP&`ZQBupeNOQkQc46aoI00od ztraAQ;`v3H$+JKRAxXCW5TRoug-*@x#Ti7@;mrQQ82P}zDAG~8k6eORHMHuhrJZxM z3u74RQOxWoA&jP*9slKS4U!qj&+J`libX8-!pYNpS2c$l7%yc&ipJd_}E6nE$VOpq7)iqpgLLC6rd+KO2BSe|z!2u7bgTjQ-&(j0YmX z^E85+yDQYp0pOke-Pg{WV6O3pF}Lq<9Z%g01LC>?v@G0(RELGP7BQ$BB=M&29Ly!U zlQ_wQYdPuAr$qFZ!`hWx3($Sa(3#WVu5LDoNXqi6nV--v68W$MmB(zUd5#?>a|4yh z`AbG`@DqM!iZWOrIdr*o-TjP%Wn?tY*?sJoU9|ft%91iq#3*PPQzQL@0((%dbD+k~33I!x0Zu{d3vTk$o=KRox$Y}d}aHRVZ zTPtZ&2vdk;>Y7{Y%T2*@MfaXXv_J}B?1{D|e zn%P(ot+Jo5k1d(4QMPJom((?uX)N2icJU13Fm(GdUeO0wo5Tl20w)OsjJ3@d^{Y_R zS?(~@dYoqHy*$5pRAlFwgdLcj@d!#BGdjT?AO)ewUuYLX(_7|nzEDIEeSEecnM=0? zlR8p&#oxm>i8v&YbN$RNPE9{J6u5>b?({y) zUkfd~^CERV9O*c$?Aj=Jy!`Bmhs0r~7;&kfRT@UhoyR@Hy*n=>E!S#QTK_4-gBxbm zmw_h}Z~m@M;&iHO4i>(D5M`Fk#7$%6I3%4+k*sRbP=k;5-k=$|txZUn&jGW+>67O} zYopzwzCmV|&fRPDSau4XX%o$6|AooLN(Cnwp9Sq`MMdHk65=)hNA^&<-S|NRd80yw zDH$!)pIma(wkc#+zxUY6>(l%87$+`L8#bVwXLHD;f4b_HB20bX(XNQaOJFqo+MTtt z*KNF@;cfOqnlk{u3^a_+6!=_^&U0gD>LYK<4ZH?w!QXqu*Anc1YXLA(G?QUIPQrSc{dJVdwNzcq*H@qkkpY*!3LsxxkrC>&VJ<_GN_roI~c3^~z^1y7wI5 zmYdTqT*|n+t@s^_UWlx=`7H)s4^O=iR#t2D=i!NA4XP)4mWzmsdc z>h^ZbVAjOSi}6@Cl`(&(JcbolRgeP!tD9HC=LcujY{@yF$1Dfs&1gi-sl z`pD=R@^9jde1mbp9~+zI6IgdDfMODgz1r9njV$JlIP1vuU&vERuZoqdSZBEsA*m%J zQQ^y^OE3?rl`UJDq0ERjGSN<8Hub^h!&V8*n5E>ghpM7^QOBKYA*Y11Gn<*rFSL0& zsfBR0Hy@tu#Iisf3uBkN6h6jN%}cOa?;I?b%T7-nI${q!%Ax|)5aQz;&tFB3FXRws z;^OwY_GdD)b+F)Jn{}gP*?^{uy+j;b?yQU0Ghs6#^ApN4mbDE~kIax;%Zi5Pj4}33 zACc)78MOPZPKSZH*H)!WZ@dRxXe#J>m=YJ7$m^>ceAni{|I!&o@^#wBF zl;UTVTYF>`3FvYdeTQNu`LtLhk4LCD3@6nn(Mbf~w6w++N}CWz$ba){D>}~id#j4^ z&e6rb(62lVYOR(xT)Uq=m$}8^PQWVb+61E$;$GQw8S!vzO=+^ua!5;;KeEh>$qNO(1 zf5D}GJ}sj<*dyon8HB^(H#+}vql@I*HlZf>mrlI_8%8-R-yq%9*f+rh(FwB;BJr?j zl+O@)9qA}6WBZ6qJ~Jt)8>(*@Au;NeLG(6lKVRTRON|H$6k17DG1}E1rvr! zK}or0sZdGC&0NwKXa%z+5iywAY=LwA*y6ZgNDfU%G`?07MOlg2&E- zc&>3K*Nr31Tt7-nX$jiq2=A*NtOVk%v20Q>wkz~kfn|D7Sj56=af&5I-o9V?EWmiv znsvqRO+h}!GH8~>fw z*8NHaP)5Lv)mZ;05=eDLI&-#sh6!>fbk@)rSpD`h8^3D)c3RiNNsiac>m%Tr>18kt zpJk`|O29#s@{M2ndxp~`Vr?s!@mG;yk;I!oD?n5}nNBdndDe*HNHh0i_KnVf@7mYI zBS6>K#>!~}qA!Txq`k-=a5anVs!OV&Pa<&c>Nc(Kp*mhrM@m>NKTND2nWTM*jG^lm zPhhBW-H$$)T{DP&q+;V9G=a{o;=P7$%iNzsEAURBfZ7B#8?=MGimnM$5!ecmXQnKq zr2AT{-0FF}oG~wk5%nI~_Cg(a10WCBxfen&{ZLuSG1N>v$2HZvxoBSHdnuFn3`^B| zmO6yS+R-OM?+jDcYhmhSB{)NPqcI$KKggXSg&^nfK6qgaEwr&QG{e|CsMCX_l{NSH zjI7RchvCx8`}hlk3WN#PU_j_=-ke2PO8+{S6?deb+znBJK^=&OJi{am0Nqdo$Hach z=Uqqad7i?xZuIs@o+L&+204U9OxXOiknEksmUA`_w~gm3vUh?#H_i>DE+0_p>AM;v zDA~i2o=1Hp5 zdrzzTt{JhmmyEsm{YfVgwl1Axs5o(#`O5%h^)m#3>eS8)hd5}<601A|szIFBrum6X zUc$he3C8CdDEZuHunO)yHT9GE*o!8}7SppSgL}-US!dGM zvol9$4phhPpMFf`eTgIJf#$)Zd^HQ=(BQSDbt8)VaTL`h*t!J8g{DR{R_bDYG6eJY z3`$pHPuJ4A4~73I3T`t3+jTdJ)8e{`{Yg63<$|SU85T0d^t@>z76Th3yRHYFg0fTu zK>^CM;@Zfy^;4d&4fORo@2_oVQ4Sg#w+Fm>&X{BWERy^@jR zBC2=RKEEyk>mTmBsXL!|z5dH5fW)n#Qe$s6NLUyn;LKmk@#84vn21i6?c(Qzsd2R6 z@6MuHoe01~*V_2JcowY9e&Mw-F^@oDVY*(@BHwB$9E+H|35#qMqjki^vy|s_P<~aT2S~ZYCmZv(KOFM4of$Q&_23&-SgjV>u_S zVW=i#Q>s9lNm_IrH+cnUJvr%D))Lf z_#8Vb05$^`{)C0z62Gs{T5|FOsW)!KV1`d8gkPuMu(%_8Y|vPeP(G%S z%zHz7e3MSEgqM}WN7{P!7jmpsJuwe1fQ|PM9fPu|TLR;-QG6K@55rwQe+ZG6Sl1FE z`uUjNIub^^?K{n6ZW%J?LhdQz^tx^`3?+S9hj&uM0;cc4m5iqE6fjPbKRQO?7dJ%~ zmcr^Fs4UH(aQ7Asc_W6Bu{_c9-IeSrYcVk0jObd*!77BqPxqHV#XjfodF|(pk&E3t z2RGh&ldk;m&Zq>dXZ97tx*Z1E3$?wizCiq$lFGgyA{om~kgb09A#|)# zB=3_g{|%d_5Ac+x`dTD+*O`Meb2hG7rw}5KELa;3zw?m<4^WEXkLX8 z``)~tG|zl#;QX){zojoO*$WAe)XQ&z=vXH%9@4gy8p;mu>|~C%T8>wnQ?8R@NxoA< zE=DVjT35phiF%f$kzP!syO=`c4L%n2c9$EU2&ByYp`JI$lM!dQHLM zTi#70B(^0_F_7tS*o?5+xJ+x`rOSF`0_;xqRlDAo@8cjc7lFTdkOkAcMUW=}PiD7G z5E((cQ&0o?CKIlbxRTmS`K53rT!_S5b9QCtZe$Clb!Cu45Gz6U(Alf&Yf2SZ?wgFX z!nEXYAA|Zsb5K^XMq*7RkPA85E@kfy=>Mbb51;Ktk=`#-~@AC3!oK9g|AUyE~=&EWCQkl z=Oaz$^;_z1)Xf~QO@xiyiT87%=?o;fROZUS&a?d`Y8L;3{2%Wk4X&zLOsRcdOzDYV z#piI-u)^JAYZ6N&@rLkl&~Kg-ixBJ94F(|^b^zW2)4e!8eJVHJpcIdS)eAp1!|vK1 zalL*k*NDN#l&k_1D;F}GM9ze zxy`&uNlaJLQ6u9aPf6i)M~h1c7TkWexsHb|z(l2qtsOj+=m}zyW2BdiK|f7C>*8v4=IsnKpa?KoV(hRx_+g;?qvB2dmXHlIQ}95hHPO{*%NwO@kt%>L z8nT{+=dIE5P7!l=Fo~>^;iMOnfE%CXMfQu^-6V%D-!b1+ER&P&ukKqXOZb+GfRedOu zS_OyitZ{duT1|Mgi;^HJ2THI-gF>99ZDXmLnh2|{o*Ju`s6^;Z2o?6yMn2WRVM3i$ zKQ=&EX=wvVxLy7<0&}5Y3R~Gzz#?4JGo5H5i%6o(Md>_a$xv^8xx!7iFT5G2^%-Z34k*AGxlD5p!) zu4>dLey|ej=`q8`cCASZCPaOpwBN}O4c5T6rz{r`80>*L33|hWXB^T$gV0r9KN;FC>*TQUD zY_ZWEIt47Xuwk8>-2Uu03vQumhOP78xcC=e$-2UhB8!T6#zj4uz8EWk4Hl%hJ4%#?mpo(p6$hN{NG zzH`FRaZ=WhF-aS?kW5KkEwofg0d=%+j15z|L(UK_Vkb7_ZL!yAnxWm2}#QU(kt}S+bo+S(&A?$CDlM+7%ZzqxrJK45IGT-{fR6VfN8(efxU_AO< z^sdO2%+C@VUlUjq4<;JlMiyT7s!}y-v}dS(KIf3fFsa)v*W#-hi=qDF@f{+xeE;Jt z`4hg`kA(-md8zmiZ)%=J(=h56@eAwn6t7;u_R6n+Qx>PWez*>y7O>#68KD-#QAb!s z5xqbY+O=$X0EAjMHGFIBQ|fXV8lnxH!2gD~`PBd;o*GJs<8-?dSL+E=Hy)n3`XYaF zM)h(*+uhM5a3eZo#|M2jjb_~K9O`FYX2!dHa`)4VsHQ zpUdh4ULlI4WRylR+45T?FQ&($)IJ;)^+U%xvt9Q(Q{f8Ao>!XMtW3H7mE8an1nSyr zsRx5jj3vq{qIiD6DB}l>8H4m-9KF@Iu-lP6{^?twqx-bN+92P0?vdViroz65SlVP) z1yS{+dqgb9QJyygNth7N<)w9E>&j=?MB?SoY6R`3p~5+vp_p>b!W=8dUry>Xn)qQi z!&j^RKetD{{5ccJm;Bh>t|cj?^9eqLI`6=&;@;5^TaH0Ha|APQkUo_x=HK$ZhrKf) z8fm$?v7!y5BtGvowZb?;IjtJgcwaD@|C-2IzvB~*1<$4Yed#=5qK(q3!wxqB19p1V*)gbJ?r zNj`oHv{S3uttT*6XZ-%4@9qzbQ-6+}A_&9&o({R1 z&2@RL#R1+Pfw=)S%@cf=3x;rb19i7iqiF~O)6e>{0@E*$AT*^Te!JX?+5I@X_a_K2s2Fdaplilh zYfE)_y^`CVj^O}ksYKkz`z45J@umqp(vOh2PkN=Q7M@Ji|nPD-**VAF+R0p;(U z_ov@u>Kh}cHvq>eOc(dc5dK}ad2?Pa!_Sjt&-S+F>j33OOfbe_lhlt*wY(+An}#(} zc?W4h-tEbqQSs_JG3R=J_hV$8)4S@NQPkJx zM*aAq!{ab(ZHSU)wKc%BhQELLa(ZfD=~H1xMbBWHY`C2l&#sZl-Qi+0ji~wbXA#ud z9GXf3;-{AoAKOwEetj2h{;71ha>rO}eAR~+8Z8>6S32O^@#=Gg0SqZN723%dSl5r# ztglwpN#tJ9Q;yP@VDUzi;G^}@*v%SZ{V#IsY;ShMU4w<)Z_SXFhldjz*vp3%atqjt z)5WIOelhI7B@HK?N&}bOcXk)n`bM(H8p?m_M0LHG%txOLb=}`o;%;%gwB~jm`es8} zDYX3SA;Nvr=mV`KrymTcSK65dn*U(EZ>7Q>DCJ8e@5(>fM9W39)+Q6^50v#6ojj;L z!n@I9J~eGawxyU>Nrr`0Yde;nNM=`iM^jMye@D}JR?hyX*A+} z->F(JabDLXb64m1@i->1zQ63oj{{OFYEO;;Rw_;cODdv>Z)`?H!;%F9%>#i z7Y-}D-`Az5Rt(V$u1y52>?lB3ZHX$^a=*IL?I#xIP&3a4-P`Lp50Nr(4EQrzf1`U_u6yAPT| zh?HqvokMcBGtV9$t*Ca{ z=!8jQ+MX6T0~#@z*-8U=wwtxCU z1o)XHpDc|Q??t*~7IOP4rhiEJg+vPq?l25qh1-U_HdCCWwJ{3U*^Z(MK>O}fN4|lq z5cv%>L&3mMjEF;0^;ppKCWLQDr$K!W5;B|bPxD4M-nHXZPE{H+6(M&7R3~{^!n1(N ztiPIGJ~@hBHN^F#nDQsAc#CF&Uz0^pu|l@3^5jE%;r7c(;fPG{{XN2xh*WIN11FfY zA?Ekrxi7u9qbGkbCgPX8N1I{WKbDNc{z{aeRLHE+(2VzW#wV0|sdg|h`Dan$i0h^L zin;8oObaxeFJc`C4>|e7(&dJp_VR}}XK03k{6gg(MGfEVSdHzOSx4iKibc4FtaGNL zNX=zT4rLYl15sHhOMl0x*@6dUEr*O@BSOD_UiJ;wnF(?+G{mbfD5#ezU3$F?V6yFE zm`7DTf)|Q(IpyB6Ds^zl_>xwzeq9Q`rn_&G&xQ8=AgcIi>_dH2TJI`OeF~ZxhGSi? zD%aD?Zt-cfCk$K{zAkRxOFK_4t8)pla3|DznL0He!4RpTD8r&;9SBswoakhPEuq+W zhS=H{xmIhsBkRFrwiBat23bq9d8XQNcm1?3_dIl&XHC?LwtHGySO+X47PGR8{6h38 zGN_8S9TM$c9m|uS;=Y&T9~&sk+sWFVK%3F|&F{ae8URo7i&c-Nzh;&({Q92s>v7>} z!VWxnv|?C}kgJP+ibfqLtj|3Va~v}UZs%;VCv(u>1;`9{vsTU%v1YT(>Uf-U zEi`i3?`b(_0NV|N7C#y1J}}hRJI-a_WW`FO&gTDWFz)|;Z<{DOXi_MV&6_qm!luu? z8!!2FK*GMPTvUF47$6iz^~!0GdX~TksAA_he_LW(`T);WjSJ}22}B%F6_*m@3!uCfxNy1OlA@_{ zABd+7zeo7Aro`#hvF6-#@EYr{zouHDL4RhALu_QN;_^^M<{g{CsD5*NTq%0EFRvWkXx>OEad z4Ie!x@Vxsod3)+st`XCtAYx3q{BXzAGme498-a8{g- z?kx##60u0@G*p4aB{+WS$Ydllx;%%vJR34~qQn~{F;RLfyh6(Si^~^yH1rb;=#i81 zdQ?QI)PyYazhTex8wa|cz!XA~KPbl^#zcUZ7yM611Pp;Es4&BEfc&idw1NWQ|1aFc z9mvG|hZXrZ+yu=1cmMxOk8txqp3o6mSkka#FbG%X%0D!1j~?+{fq||VIjFx7C*=|; zLbNzFsuol4#FL-QC$%zNt6LbP7~W9Q8O4R#Wbuuf9g&xU^7VGfx6#8NWUHg%v~}rJ%#3O!hYQ zadtCX;Y0BC;rnP;lGwtSv^a}Rx$S7$H9Oms92(V0i092WIE`(D$1eLHVMPvC|b?`+3} z0S@z(W_l#d5xm*;HtxKc`)%G#(a%1>e=;S+bUfSIbQ8XL7n-9%9=Mj2fvI!zlY(Hf zoZ(MQ;k#Uf@CvXzB7_fq0d`|D0sv*59NqtbL9VvW?oO_>V8~x_`WNg6%ni>1Qi5f{ zw1R)CfU*!;FwcJl^B+LcQ!xMZNDJnLU#N@Y|78f&fVw$(xLQEnXu*7NISpQTtOgkj z{0A`l8+4L#^7zvRC~NEL_H_M=Gy*D_{j>b9n9^Uh|JDu!)UtKAhl?2w2XjiX0CrIslS}k_!lUO z2VS5}i}>VrpSC_CSP)L5L-qWxNx^k>nAlW3-0f{0|IF)eT1v^x(V7M7$f>Q#`ma7i zK=3a*E-!eVypVcAq1E@6($9IcOdKQ6&K3d!oAjO#^{=`2W)m#Lvt9wE6zs zj-QwJXwbod6i}w3R_1*uM-665!_jOZ*oN%qJ-D6vuz-5X=wy!;Jlh_Ot~5 zrSbhqvH!3W;D3tLKQ-MyrKiP!JVo@M;$ND;Q}F*=`@gIfx8Q%<@q*wrdXz9QP{YaT zDINYwc>qw}(aMST&%B==VL*9lS_4`U9%)GlDQPed4-YRyQc94I_sQj_jN|8l@No0U z@<@mQ{@*6FPy0{W$wJBoYGLQ*;Xn)Ila-Z`;O6Is2txRt5>tSOn;#@0DZwKJeu^x= gpp5jNq;hjNb9MK2g<1i?0(|^n03)M}sx07t07@sKE&u=k diff --git a/tests/samples/sample_bad_empty.docx b/tests/samples/sample_bad_empty.docx new file mode 100644 index 0000000000000000000000000000000000000000..3b60e1683a722b3ef3ee0d8f51b67f11f5462802 GIT binary patch literal 2550 zcmb7`L2DC16vrn`HqhqK*pea=MX(@JNGX)wOF zHAMPF`~V(Al!ACJ(h9ye*?BYjW;07#vg9(q{NI0Gex&=XT3ZnK_dR^^QoYF@%c3n_ z?L@;=4%7YcbR?74os&U-FPsoGUe8*G?MT6#uq&0ZgLV3jv3EnE|&fG){<% zvvV6~d#fDmTFVB1v%%yrprXS(ab1Ji)Q1&odgC*|qR%{7ZlgN2fThNO)k*+Xq1Vg> zaaMy^*ytBxWi}`W@~sRAh~cgPm^lwnxS5_gUE}Hc5VbXzp{!xiR5JkElnsadkO$gd zNHcxOCvb_GIx+HB{R!HO&_SUS&HZp-D;IeMeh;WRT}PIqT|?@6u??xwd0zq+ed#IL zwWW>ytg+0Uz(sd@xUM;E?v3W!*QS($9`#`PzAokpep1T{ww(%D3pY2YgC|52{=0z? I=WtYr{}Suux&QzG literal 0 HcmV?d00001 diff --git a/tests/samples/small.pdf b/tests/samples/small.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7b6e0bf5fad1a09ee063e3a89b797b8de5215d2b GIT binary patch literal 628 zcmZ8e!A`31{K>-~}BX@cYG?Z&qfP?*LFd!;RkVfq96}@87NtzTe zZ(H&^YUpETfHz~dfn&9aow5$OLSAVBG*X2==1cT+tqjmWC8-?h6U-R`f-&eaR;p(P z=L+apUSnd%DidF$ESmWoM8#=X7WlhL$)(fd!j#XqA#k8WS*WV>sCJ*qq)XXISYMPN z=ehsy`S3@S3=}zS~=4pCX;L%8HJbINkh9PrPZtSuSX|={L{txczsU`pb literal 0 HcmV?d00001 diff --git a/tests/test_main.py b/tests/test_main.py index 89a05eaa1..c86c352b5 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -463,9 +463,9 @@ def test_pdf_p(self) -> None: cred_sweeper = CredSweeper(depth=33) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() - self.assertEqual(2, len(found_credentials)) - self.assertSetEqual({"AWS Client ID", "Password"}, set(i.rule_name for i in found_credentials)) - self.assertSetEqual({"Xdj@jcN834b", "AKIAGIREOGIAWSKEY123"}, + self.assertEqual(3, len(found_credentials)) + self.assertSetEqual({"AWS Client ID", "Password", "Github Token"}, set(i.rule_name for i in found_credentials)) + self.assertSetEqual({"Xdj@jcN834b", "AKIAGIREOGIAWSKEY123", "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd"}, set(i.line_data_list[0].value for i in found_credentials)) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -556,19 +556,20 @@ def test_encoded_p(self) -> None: def test_docx_p(self) -> None: # test for finding credentials in docx - content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "password.docx"]) - cred_sweeper = CredSweeper(depth=5) + content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "sample.docx"]) + cred_sweeper = CredSweeper(doc=True) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() - self.assertEqual(1, len(found_credentials)) - self.assertEqual("Xdj@jcN834b.", found_credentials[0].line_data_list[0].value) + self.assertEqual(2, len(found_credentials)) + self.assertEqual("WeR15tr0n6", found_credentials[0].line_data_list[0].value) + self.assertEqual("ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", found_credentials[1].line_data_list[0].value) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # def test_docx_n(self) -> None: # test docx - no credential should be found without 'depth' - content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "password.docx"]) - cred_sweeper = CredSweeper() + content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "sample.docx"]) + cred_sweeper = CredSweeper(doc=False) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() self.assertEqual(0, len(found_credentials)) From 031d582c08d5eece2631099c0a9dd02804b2bb66 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 23 Oct 2023 10:11:30 +0300 Subject: [PATCH 2/6] Apply suggestions from code review --- credsweeper/deep_scanner/docx_scanner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/credsweeper/deep_scanner/docx_scanner.py b/credsweeper/deep_scanner/docx_scanner.py index 9592c33c9..6a7b0792b 100644 --- a/credsweeper/deep_scanner/docx_scanner.py +++ b/credsweeper/deep_scanner/docx_scanner.py @@ -14,14 +14,14 @@ class DocxScanner(AbstractScanner, ABC): - """Implements pdf scanning""" + """Implements docx scanning""" def data_scan( self, # data_provider: DataContentProvider, # depth: int, # recursive_limit_size: int) -> List[Candidate]: - """Tries to scan PDF elements recursively and the whole text on page as strings""" + """Tries to scan DOCX text with splitting by lines""" candidates = [] try: @@ -37,8 +37,8 @@ def data_scan( file_path=data_provider.file_path, file_type=data_provider.file_type, info=f"{data_provider.info}|DOCX") - pdf_candidates = self.scanner.scan(string_data_provider) - candidates.extend(pdf_candidates) + candidates = self.scanner.scan(string_data_provider) + candidates.extend(candidates) except Exception as docx_exc: logger.debug(f"{data_provider.file_path}:{docx_exc}") return candidates From 15857d22047d652a05c4de0fc934ea77977dd0cf Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 23 Oct 2023 13:50:23 +0300 Subject: [PATCH 3/6] merge issue fix --- credsweeper/deep_scanner/docx_scanner.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/credsweeper/deep_scanner/docx_scanner.py b/credsweeper/deep_scanner/docx_scanner.py index 6a7b0792b..c40df37b7 100644 --- a/credsweeper/deep_scanner/docx_scanner.py +++ b/credsweeper/deep_scanner/docx_scanner.py @@ -22,10 +22,10 @@ def data_scan( depth: int, # recursive_limit_size: int) -> List[Candidate]: """Tries to scan DOCX text with splitting by lines""" - candidates = [] + candidates: List[Candidate] = [] try: - docx_lines = [] + docx_lines: List[str] = [] doc = docx.Document(io.BytesIO(data_provider.data)) for paragraph in doc.paragraphs: @@ -38,7 +38,6 @@ def data_scan( file_type=data_provider.file_type, info=f"{data_provider.info}|DOCX") candidates = self.scanner.scan(string_data_provider) - candidates.extend(candidates) except Exception as docx_exc: logger.debug(f"{data_provider.file_path}:{docx_exc}") return candidates From e644618c45accf866200ed7390ff58ee0768faf2 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 26 Oct 2023 14:49:22 +0300 Subject: [PATCH 4/6] udpdated tests for FilePathExtractor --- credsweeper/secret/config.json | 4 +- .../file_handler/test_file_path_extractor.py | 199 ++++++++++++------ 2 files changed, 140 insertions(+), 63 deletions(-) diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json index bed73ef44..fa50bb5f5 100644 --- a/credsweeper/secret/config.json +++ b/credsweeper/secret/config.json @@ -6,12 +6,12 @@ ".bz2", ".gz", ".tar", + ".xlsx", ".zip" ], "documents": [ ".docx", - ".pdf", - ".xlsx" + ".pdf" ], "extension": [ ".7z", diff --git a/tests/file_handler/test_file_path_extractor.py b/tests/file_handler/test_file_path_extractor.py index c3eb35ca4..eb7583876 100644 --- a/tests/file_handler/test_file_path_extractor.py +++ b/tests/file_handler/test_file_path_extractor.py @@ -1,9 +1,11 @@ import os.path +import re import tempfile +import unittest +from typing import List from unittest import mock import git -import pytest from humanfriendly import parse_size from credsweeper.config import Config @@ -11,20 +13,60 @@ from tests import AZ_STRING -class TestFilePathExtractor: +class TestFilePathExtractor(unittest.TestCase): + + def setUp(self): + config_dict = { + "size_limit": None, + "find_by_ext": False, + "find_by_ext_list": [], + "doc": False, + "depth": 0, + "exclude": { + "path": [], + "pattern": [], + "containers": [], + "documents": [], + "extension": [] + }, + "source_ext": [], + "source_quote_ext": [], + "check_for_literals": [], + "validation": {"api_validation": False}, + "use_filters": False, + "line_data_output": [], + "candidate_output": [], + "min_keyword_value_length": 0, + "min_pattern_value_length": 0, + } + self.config = Config(config_dict) + + # excluded always not_allowed_path_pattern + self.paths_not = ["dummy.css", "tmp/dummy.css", "c:\\temp\\dummy.css"] + # pattern + self.paths_reg = ["tmp/Magic/dummy.Number", "/tmp/log/MagicNumber.txt"] + # "/.git/" + self.paths_git = ["C:\\.git\\dummy", "./.git/dummy.sample", "~/.git\\dummy.txt"] + # not excluded + self.paths_src = ["dummy.py", "/tmp/dummy.py", "tmp/dummy.py", "C:\\dummy.py", "temp\\dummy.py"] + # not excluded when --depth are set + self.paths_pak = ["dummy.gz", "/tmp/dummy.gz", "tmp/dummy.gz", "C:\\dummy.gz", "temp\\dummy.gz"] + # not excluded when --doc or --depth are set + self.paths_doc = ["dummy.pdf", "/tmp/dummy.pdf", "tmp/dummy.pdf", "C:\\dummy.pdf", "temp\\dummy.pdf"] + # extension to be excluded always + self.paths_ext = ["dummy.so", "dummy.so", "/tmp/dummy.so", "tmp/dummy.so", "C:\\dummy.so", "temp\\dummy.so"] + + def tearDown(self): + del self.config def test_apply_gitignore_p(self) -> None: """Evaluate that code files would be included after filtering with .gitignore""" - files = ["file.py", "src/file.py", "src/dir/file.py"] - filtered_files = FilePathExtractor.apply_gitignore(files) - - assert set(filtered_files) == set(files) + self.assertSetEqual(set(files), set(filtered_files)) def test_apply_gitignore_n(self) -> None: """Evaluate that .gitignore correctly filters out files from project""" - with tempfile.TemporaryDirectory() as tmp_dir: git.Repo.init(tmp_dir) with open(os.path.join(tmp_dir, ".gitignore"), "w") as f: @@ -40,62 +82,97 @@ def test_apply_gitignore_n(self) -> None: ] filtered_files = FilePathExtractor.apply_gitignore(files) - assert len(filtered_files) == 1 - assert filtered_files[0] == os.path.join(tmp_dir, "src", "dir", "file.cpp") - - @pytest.mark.parametrize("file_path", [ - "/tmp/test/dummy.p12", - "dummy.docx", - "dummy.pdf", - "dummy.zip", - "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\test\\dummy.p12", - "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\TarGet\\dummy.p12", - ]) - def test_check_exclude_file_p(self, config: Config, file_path: pytest.fixture) -> None: - config.find_by_ext = True - config.doc = True - config.depth = 1 - assert not FilePathExtractor.check_exclude_file(config, file_path), f"{file_path}" - - @pytest.mark.parametrize("file_path", [ - "dummy.JPG", - "dummy.bmp", - "dummy.docx", - "dummy.pdf", - "dummy.zip", - "/tmp/target/dummy.p12", - "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\target\\dummy.p12", - ]) - def test_check_exclude_file_n(self, config: Config, file_path: pytest.fixture) -> None: - config.find_by_ext = True - assert FilePathExtractor.check_exclude_file(config, file_path) - - @pytest.mark.parametrize("file_type", [".inf", ".txt"]) - def test_find_by_ext_file_p(self, config: Config, file_type: pytest.fixture) -> None: - config.find_by_ext = True - assert FilePathExtractor.is_find_by_ext_file(config, file_type) - - @pytest.mark.parametrize("file_type", [".bmp", ".doc"]) - def test_find_by_ext_file_n(self, config: Config, file_type: pytest.fixture) -> None: - assert not FilePathExtractor.is_find_by_ext_file(config, file_type) - config.find_by_ext = False - assert not FilePathExtractor.is_find_by_ext_file(config, file_type) + self.assertEqual(1, len(filtered_files)) + expected_path = os.path.join(tmp_dir, "src", "dir", "file.cpp") + self.assertEqual(expected_path, filtered_files[0]) + + def assert_true_check_exclude_file(self, paths: List[str]): + for i in paths: + self.assertTrue(FilePathExtractor.check_exclude_file(self.config, i), i) + + def assert_false_check_exclude_file(self, paths: List[str]): + for i in paths: + self.assertFalse(FilePathExtractor.check_exclude_file(self.config, i), i) + + def test_check_exclude_file_p(self) -> None: + # matched only not_allowed_path_pattern + self.config.exclude_containers = [".gz"] + self.config.exclude_documents = [".pdf"] + self.config.exclude_extensions = [".so"] + self.config.exclude_paths = ["/.git/"] + self.config.exclude_patterns = [re.compile(r".*magic.*number.*")] + self.config.depth = 1 + self.config.doc = False + self.assert_true_check_exclude_file(self.paths_not) + self.assert_true_check_exclude_file(self.paths_reg) + self.assert_true_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_false_check_exclude_file(self.paths_pak) + self.assert_false_check_exclude_file(self.paths_doc) + self.assert_true_check_exclude_file(self.paths_ext) + + # pdf should be not filtered + self.config.depth = 0 + self.config.doc = True + self.assert_true_check_exclude_file(self.paths_not) + self.assert_true_check_exclude_file(self.paths_reg) + self.assert_true_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_true_check_exclude_file(self.paths_pak) + self.assert_false_check_exclude_file(self.paths_doc) + self.assert_true_check_exclude_file(self.paths_ext) + + def test_check_exclude_file_n(self) -> None: + # none of extension are in config, only not_allowed_path_pattern matches + self.assert_true_check_exclude_file(self.paths_not) + self.assert_false_check_exclude_file(self.paths_reg) + self.assert_false_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_false_check_exclude_file(self.paths_pak) + self.assert_false_check_exclude_file(self.paths_doc) + self.assert_false_check_exclude_file(self.paths_ext) + + # matched only exclude_extensions + self.config.exclude_containers = [".gz"] + self.config.exclude_documents = [".pdf"] + self.config.exclude_extensions = [".so"] + self.assert_true_check_exclude_file(self.paths_not) + self.assert_false_check_exclude_file(self.paths_reg) + self.assert_false_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_true_check_exclude_file(self.paths_pak) + self.assert_true_check_exclude_file(self.paths_doc) + self.assert_true_check_exclude_file(self.paths_ext) + + def test_find_by_ext_file_p(self) -> None: + self.config.find_by_ext = True + self.config.find_by_ext_list = [".p12", ".jpg"] + self.assertTrue(FilePathExtractor.is_find_by_ext_file(self.config, ".p12")) + self.assertTrue(FilePathExtractor.is_find_by_ext_file(self.config, ".jpg")) + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".bmp")) + + def test_find_by_ext_file_n(self) -> None: + self.config.find_by_ext = False + self.config.find_by_ext_list = [".p12", ".bmp"] + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".p12")) + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".bmp")) + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".jpg")) @mock.patch("os.path.getsize") - def test_check_file_size_p(self, mock_getsize, config: Config) -> None: + def test_check_file_size_p(self, mock_getsize) -> None: mock_getsize.return_value = parse_size("11MiB") - config.size_limit = parse_size("10MiB") - assert FilePathExtractor.check_file_size(config, "") + self.config.size_limit = parse_size("10MiB") + self.assertTrue(FilePathExtractor.check_file_size(self.config, "")) @mock.patch("os.path.getsize") - def test_check_file_size_n(self, mock_getsize, config: Config) -> None: + def test_check_file_size_n(self, mock_getsize) -> None: mock_getsize.return_value = parse_size("11MiB") - config.size_limit = None - assert not FilePathExtractor.check_file_size(config, "") - config.size_limit = parse_size("11MiB") - assert not FilePathExtractor.check_file_size(config, "") + self.config.size_limit = None + self.assertFalse(FilePathExtractor.check_file_size(self.config, "")) + self.config.size_limit = parse_size("11MiB") + self.assertFalse(FilePathExtractor.check_file_size(self.config, "")) - def test_skip_symlink_n(self, config: Config) -> None: + def test_skip_symlink_n(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: sub_dir = os.path.join(tmp_dir, "sub_dir") os.mkdir(sub_dir) @@ -112,9 +189,9 @@ def test_skip_symlink_n(self, config: Config) -> None: for root, dirs, files in os.walk(tmp_dir): files_walked.update(files) dirs_walked.update(dirs) - assert dirs_walked == {"sub_dir", "s_dir_link"} - assert files_walked == {"target", "s_link"} + self.assertEqual({"sub_dir", "s_dir_link"}, dirs_walked) + self.assertEqual({"target", "s_link"}, files_walked) - paths = FilePathExtractor.get_file_paths(config, tmp_dir) - assert len(paths) == 1 - assert paths[0] == target_path + paths = FilePathExtractor.get_file_paths(self.config, tmp_dir) + self.assertEqual(1, len(paths)) + self.assertEqual(target_path, paths[0]) From 8af0953af59da045f1bd93d2aac7e1c6ad2b0f3e Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 26 Oct 2023 14:51:49 +0300 Subject: [PATCH 5/6] style --- tests/file_handler/test_file_path_extractor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/file_handler/test_file_path_extractor.py b/tests/file_handler/test_file_path_extractor.py index eb7583876..49bcfaf13 100644 --- a/tests/file_handler/test_file_path_extractor.py +++ b/tests/file_handler/test_file_path_extractor.py @@ -32,7 +32,9 @@ def setUp(self): "source_ext": [], "source_quote_ext": [], "check_for_literals": [], - "validation": {"api_validation": False}, + "validation": { + "api_validation": False + }, "use_filters": False, "line_data_output": [], "candidate_output": [], From bdde41acf61fa010d8672a298374653f817353a3 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 26 Oct 2023 14:55:14 +0300 Subject: [PATCH 6/6] documentation updated --- docs/source/overall_architecture.rst | 2 +- tests/test_main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/overall_architecture.rst b/docs/source/overall_architecture.rst index 093bdcdcd..47412c496 100644 --- a/docs/source/overall_architecture.rst +++ b/docs/source/overall_architecture.rst @@ -15,7 +15,7 @@ When paths to scan are entered, get the files in that paths and the files are ex - exclude - pattern: Regex patterns to exclude scan. - containers: Extensions in lower case of container files which might be scan with --depth option - - containers: Extensions in lower case of container files which might be scan with --doc and --depth option + - documents: Extensions in lower case of container files which might be scan with --doc and/or --depth option - extension: Extensions in lower case to exclude scan. - path: Paths to exclude scan. - source_ext: List of extensions for scanning categorized as source files. diff --git a/tests/test_main.py b/tests/test_main.py index c86c352b5..6ac4b999a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -567,7 +567,7 @@ def test_docx_p(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # def test_docx_n(self) -> None: - # test docx - no credential should be found without 'depth' + # test docx - no credential should be found without 'doc' content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "sample.docx"]) cred_sweeper = CredSweeper(doc=False) cred_sweeper.run(content_provider=content_provider)