fox-it · Schamper · Jan 20, 2025
diff --git a/dissect/util/compression/__init__.py b/dissect/util/compression/__init__.py
@@ -1,3 +1,5 @@
+from typing import TYPE_CHECKING
+
 from dissect.util.compression import lz4 as lz4_python
 from dissect.util.compression import lzo as lzo_python
 
@@ -16,8 +18,8 @@
 # Note that the pure Python implementation is not a full replacement of the
 # native lz4 Python package: only the decompress() function is implemented.
 try:
-    import lz4.block as lz4
-    import lz4.block as lz4_native
+    import lz4.block as lz4  # type: ignore
+    import lz4.block as lz4_native  # type: ignore
 except ImportError:
     lz4 = lz4_python
     lz4_native = None
@@ -37,12 +39,19 @@
 # Note that the pure Python implementation is not a full replacement of the
 # native lzo Python package: only the decompress() function is implemented.
 try:
-    import lzo
-    import lzo as lzo_native
+    import lzo  # type: ignore
+    import lzo as lzo_native  # type: ignore
 except ImportError:
     lzo = lzo_python
     lzo_native = None
 
+
+from dissect.util.compression import lznt1, lzxpress, lzxpress_huffman, sevenbit
+
+if TYPE_CHECKING:
+    lzo = lzo_python
+    lz4 = lz4_python
+
 __all__ = [
     "lz4",
     "lz4_native",

diff --git a/dissect/util/compression/lz4.py b/dissect/util/compression/lz4.py
@@ -2,7 +2,7 @@
 
 import io
 import struct
-from typing import BinaryIO
+from typing import BinaryIO, cast
 
 from dissect.util.exceptions import CorruptDataError
 
@@ -25,12 +25,12 @@ def _get_length(src: BinaryIO, length: int) -> int:
 
 
 def decompress(
-    src: bytes | BinaryIO,
+    src: bytes | bytearray | memoryview | BinaryIO,
     uncompressed_size: int = -1,
     max_length: int = -1,
     return_bytearray: bool = False,
     return_bytes_read: bool = False,
-) -> bytes | tuple[bytes, int]:
+) -> bytes | bytearray | tuple[bytes | bytearray, int]:
     """LZ4 decompress from a file-like object up to a certain length. Assumes no header.
 
     Args:
@@ -44,7 +44,7 @@ def decompress(
     Returns:
         The decompressed data or a tuple of the decompressed data and the amount of bytes read.
     """
-    if not hasattr(src, "read"):
+    if isinstance(src, (bytes, bytearray, memoryview)):
         src = io.BytesIO(src)
 
     dst = bytearray()
@@ -78,7 +78,7 @@ def decompress(
         if len(read_buf) != 2:
             raise CorruptDataError("Premature EOF")
 
-        (offset,) = struct.unpack("<H", read_buf)
+        (offset,) = cast(tuple[int], struct.unpack("<H", read_buf))
 
         if offset == 0:
             raise CorruptDataError("Offset can't be 0")

diff --git a/dissect/util/compression/lznt1.py b/dissect/util/compression/lznt1.py
@@ -25,7 +25,7 @@ def _get_displacement(offset: int) -> int:
 TAG_MASKS = [(1 << i) for i in range(8)]
 
 
-def decompress(src: bytes | BinaryIO) -> bytes:
+def decompress(src: bytes | bytearray | memoryview | BinaryIO) -> bytes:
     """LZNT1 decompress from a file-like object or bytes.
 
     Args:
@@ -34,7 +34,7 @@ def decompress(src: bytes | BinaryIO) -> bytes:
     Returns:
         The decompressed data.
     """
-    if not hasattr(src, "read"):
+    if isinstance(src, (bytes, bytearray, memoryview)):
         src = io.BytesIO(src)
 
     offset = src.tell()

diff --git a/dissect/util/compression/lzo.py b/dissect/util/compression/lzo.py
@@ -23,7 +23,7 @@ def _read_length(src: BinaryIO, val: int, mask: int) -> int:
     return length + mask + val
 
 
-def decompress(src: bytes | BinaryIO, header: bool = True, buflen: int = -1) -> bytes:
+def decompress(src: bytes | bytearray | memoryview | BinaryIO, header: bool = True, buflen: int = -1) -> bytes:
     """LZO decompress from a file-like object or bytes. Assumes no header.
 
     Arguments are largely compatible with python-lzo API.
@@ -36,7 +36,7 @@ def decompress(src: bytes | BinaryIO, header: bool = True, buflen: int = -1) ->
     Returns:
         The decompressed data.
     """
-    if not hasattr(src, "read"):
+    if isinstance(src, (bytes, bytearray, memoryview)):
         src = io.BytesIO(src)
 
     dst = bytearray()

diff --git a/dissect/util/compression/lzxpress.py b/dissect/util/compression/lzxpress.py
@@ -6,7 +6,7 @@
 from typing import BinaryIO
 
 
-def decompress(src: bytes | BinaryIO) -> bytes:
+def decompress(src: bytes | bytearray | memoryview | BinaryIO) -> bytes:
     """LZXPRESS decompress from a file-like object or bytes.
 
     Args:
@@ -15,7 +15,7 @@ def decompress(src: bytes | BinaryIO) -> bytes:
     Returns:
         The decompressed data.
     """
-    if not hasattr(src, "read"):
+    if isinstance(src, (bytes, bytearray, memoryview)):
         src = io.BytesIO(src)
 
     offset = src.tell()
@@ -41,7 +41,7 @@ def decompress(src: bytes | BinaryIO) -> bytes:
             if src.tell() - offset == size:
                 break
 
-            match = struct.unpack("<H", src.read(2))[0]
+            match: int = struct.unpack("<H", src.read(2))[0]
             match_offset, match_length = divmod(match, 8)
             match_offset += 1
 

diff --git a/dissect/util/compression/lzxpress_huffman.py b/dissect/util/compression/lzxpress_huffman.py
@@ -19,17 +19,17 @@ def _read_16_bit(fh: BinaryIO) -> int:
 class Node:
     __slots__ = ("children", "is_leaf", "symbol")
 
-    def __init__(self, symbol: Symbol | None = None, is_leaf: bool = False):
+    def __init__(self, symbol: int = 0, is_leaf: bool = False):
         self.symbol = symbol
         self.is_leaf = is_leaf
-        self.children = [None, None]
+        self.children: list[Node | None] = [None, None]
 
 
 def _add_leaf(nodes: list[Node], idx: int, mask: int, bits: int) -> int:
     node = nodes[0]
     i = idx + 1
 
-    while bits > 1:
+    while node and bits > 1:
         bits -= 1
         childidx = (mask >> bits) & 1
         if node.children[childidx] is None:
@@ -38,6 +38,7 @@ def _add_leaf(nodes: list[Node], idx: int, mask: int, bits: int) -> int:
             i += 1
         node = node.children[childidx]
 
+    assert node
     node.children[mask & 1] = nodes[idx]
     return i
 
@@ -84,8 +85,9 @@ def _build_tree(buf: bytes) -> Node:
 
 
 class BitString:
+    source: BinaryIO
+
     def __init__(self):
-        self.source = None
         self.mask = 0
         self.bits = 0
 
@@ -114,16 +116,18 @@ def skip(self, n: int) -> None:
             self.mask += _read_16_bit(self.source) << (16 - self.bits)
             self.bits += 16
 
-    def decode(self, root: Node) -> Symbol:
+    def decode(self, root: Node) -> int:
         node = root
-        while not node.is_leaf:
+        while node and not node.is_leaf:
             bit = self.lookup(1)
             self.skip(1)
             node = node.children[bit]
+
+        assert node
         return node.symbol
 
 
-def decompress(src: bytes | BinaryIO) -> bytes:
+def decompress(src: bytes | bytearray | memoryview | BinaryIO) -> bytes:
     """LZXPRESS decompress from a file-like object or bytes.
 
     Decompresses until EOF of the input data.
@@ -134,7 +138,7 @@ def decompress(src: bytes | BinaryIO) -> bytes:
     Returns:
         The decompressed data.
     """
-    if not hasattr(src, "read"):
+    if isinstance(src, (bytes, bytearray, memoryview)):
         src = io.BytesIO(src)
 
     dst = bytearray()

diff --git a/dissect/util/compression/sevenbit.py b/dissect/util/compression/sevenbit.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
-from io import BytesIO
+import io
 from typing import BinaryIO
 
 
-def compress(src: bytes | BinaryIO) -> bytes:
+def compress(src: bytes | bytearray | memoryview | BinaryIO) -> bytes:
     """Sevenbit compress from a file-like object or bytes.
 
     Args:
@@ -13,8 +13,8 @@ def compress(src: bytes | BinaryIO) -> bytes:
     Returns:
         The compressed data.
     """
-    if not hasattr(src, "read"):
-        src = BytesIO(src)
+    if isinstance(src, (bytes, bytearray, memoryview)):
+        src = io.BytesIO(src)
 
     dst = bytearray()
 
@@ -39,7 +39,7 @@ def compress(src: bytes | BinaryIO) -> bytes:
     return bytes(dst)
 
 
-def decompress(src: bytes | BinaryIO, wide: bool = False) -> bytes:
+def decompress(src: bytes | bytearray | memoryview | BinaryIO, wide: bool = False) -> bytes:
     """Sevenbit decompress from a file-like object or bytes.
 
     Args:
@@ -48,8 +48,8 @@ def decompress(src: bytes | BinaryIO, wide: bool = False) -> bytes:
     Returns:
         The decompressed data.
     """
-    if not hasattr(src, "read"):
-        src = BytesIO(src)
+    if isinstance(src, (bytes, bytearray, memoryview)):
+        src = io.BytesIO(src)
 
     dst = bytearray()
 

diff --git a/dissect/util/compression/xz.py b/dissect/util/compression/xz.py
@@ -8,7 +8,7 @@
 CRC_SIZE = 4
 
 
-def repair_checksum(fh: BinaryIO) -> BinaryIO:
+def repair_checksum(fh: BinaryIO) -> OverlayStream:
     """Repair CRC32 checksums for all headers in an XZ stream.
 
     FortiOS XZ files have (on purpose) corrupt streams which they read using a modified ``xz`` binary.
@@ -55,7 +55,7 @@ def repair_checksum(fh: BinaryIO) -> BinaryIO:
     # Parse the index
     isize, num_records = _mbi(index[1:])
     index = index[1 + isize : -4]
-    records = []
+    records: list[tuple[int, int]] = []
     for _ in range(num_records):
         if not index:
             raise ValueError("Missing index size")