Skip to content

Commit

Permalink
create file extension helper function and reorder zip functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Patterbear committed Nov 6, 2024
1 parent ecccac5 commit 6138f1a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
32 changes: 15 additions & 17 deletions docubleach/bleach.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@
FILESIZE_LIMIT = 209715200


def get_file_extension(file):
return file.split(".")[-1].lower()


def unzip_file(file):
rename(file, file + ".zip")

Expand All @@ -72,10 +76,14 @@ def unzip_file(file):
remove(file + ".zip")


def detect_macros(file):
file_type = file.split(".")[-1].lower()
def rezip_file(file):
make_archive(file, "zip", file + "_temp")
rename(file + ".zip", file)
rmtree(file + "_temp")

if file_type in bff_formats:

def detect_macros(file):
if get_file_extension(file) in bff_formats:
with OleFileIO(file, write_mode=False) as ole:
streams = ole.listdir(streams=True)
macro_streams = []
Expand All @@ -99,7 +107,7 @@ def detect_macros(file):


def remove_macros(file, notify=False):
file_type = file.split(".")[-1].lower()
file_type = get_file_extension(file)

if file_type in ooxml_formats:
unzip_file(file)
Expand All @@ -111,7 +119,7 @@ def remove_macros(file, notify=False):


def remove_bff_macros(file, notify):
file_type = file.split(".")[-1].lower()
file_type = get_file_extension(file)
macros_found = False

if file_type == "doc" or file_type == "xls":
Expand Down Expand Up @@ -143,9 +151,7 @@ def remove_bff_macros(file, notify):

def remove_ooxml_macros(file, notify):
macros_found = False
file_type = file.split(".")[-1].lower()

macro_folder = ooxml_macro_folders.get(file_type[:2])
macro_folder = ooxml_macro_folders.get(get_file_extension(file)[:2])

if path.exists(file + f"_temp/{macro_folder}/vbaProject.bin"):
remove(file + f"_temp/{macro_folder}/vbaProject.bin")
Expand All @@ -159,16 +165,8 @@ def remove_ooxml_macros(file, notify):
print("Macros detected and removed.")


def rezip_file(file):
make_archive(file, "zip", file + "_temp")
rename(file + ".zip", file)
rmtree(file + "_temp")


def validate_file(file):
filetype = file.split(".")[-1].lower()

if filetype in ooxml_formats or filetype in bff_formats:
if get_file_extension(file) in ooxml_formats + bff_formats:
if getsize(file) < FILESIZE_LIMIT:
return True
else:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "docubleach"
version = "0.1.0"
version = "0.1.2"
description = "Tool to purge and remove all macro and dynamic content from an MS Office file"
authors = ["Patterbear"]
license = "MIT"
Expand Down

0 comments on commit 6138f1a

Please sign in to comment.