Skip to content

Commit

Permalink
Add LZO decompression (#14)
Browse files Browse the repository at this point in the history
(DIS-1698)
  • Loading branch information
Schamper authored Dec 15, 2022
1 parent 630f01f commit 7870af8
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 1 deletion.
115 changes: 115 additions & 0 deletions dissect/util/compression/lzo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import io
import struct
from typing import BinaryIO, Union


def _count_zeroes(src: BinaryIO):
length = 0
val = src.read(1)[0]
while val == 0:
length += 255
val = src.read(1)[0]
if length > 2**20:
raise ValueError("Too many zeroes")

return length + val


def _copy_block(src: BinaryIO, dst: bytearray, length: int, distance: int, trailing: int):
remaining = length

block = dst[-distance:]
remaining -= len(block)
while remaining > 0:
add = block[:remaining]
remaining -= len(add)
block += add

dst.extend(block[:length])
dst.extend(src.read(trailing))


def decompress(src: Union[bytes, BinaryIO], header: bool = True, buflen: int = -1) -> bytes:
"""LZO decompress from a file-like object or bytes. Assumes no header.
Arguments are largely compatible with python-lzo API.
Args:
src: File-like object or bytes to decompress.
header: Whether the metadata header is included in the input.
buflen: If ``header`` is ``False``, a buffer length in bytes must be given that will fit the output.
Returns:
The decompressed data.
"""
if not hasattr(src, "read"):
src = io.BytesIO(src)

dst = bytearray()

if header:
byte = src.read(1)[0]
if byte not in [0xF0, 0xF1]:
raise ValueError("Invalid header value")
out_len = struct.unpack("<I", src.read(4))
else:
out_len = buflen

val = src.read(1)[0]
if val == 0x10:
raise ValueError("LZOv1")
elif val >= 0x12:
dst += src.read(val - 0x11)
val = src.read(1)[0]

trailing = 0
while True:
if val <= 0xF:
if not trailing:
if val == 0:
dst += src.read(_count_zeroes(src) + 18)
else:
dst += src.read(val + 3)
else:
h = src.read(1)[0]
dist = (h << 2) + (val >> 2) + 1
length = 2
trailing = val & 3
_copy_block(src, dst, length, dist, trailing)
elif val <= 0x1F:
if val & 7 == 0:
length = 9 + _count_zeroes(src)
else:
length = (val & 7) + 2
ds = struct.unpack("<H", src.read(2))[0]
dist = 16384 + ((val & 8) >> 3) + (ds >> 2)
if dist == 16384:
break
trailing = ds & 3
_copy_block(src, dst, length, dist, trailing)
elif val <= 0x3F:
length = val & 31
if length == 0:
length = _count_zeroes(src) + 31
length += 2
ds = struct.unpack("<H", src.read(2))[0]
dist = 1 + (ds >> 2)
trailing = ds & 3
_copy_block(src, dst, length, dist, trailing)
else:
if val <= 0x7F:
length = 3 + ((val >> 5) & 1)
else:
length = 5 + ((val >> 5) & 3)
h = src.read(1)[0]
d = (val >> 2) & 7
dist = (h << 3) + d + 1
trailing = val & 3
_copy_block(src, dst, length, dist, trailing)

if len(dst) == out_len:
break

val = src.read(1)[0]

return bytes(dst)
52 changes: 51 additions & 1 deletion tests/test_compression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
from dissect.util.compression import lz4, lznt1, lzxpress_huffman, lzxpress, sevenbit
import hashlib

from dissect.util.compression import (
lz4,
lznt1,
lzo,
lzxpress,
lzxpress_huffman,
sevenbit,
)


def test_lz4_decompress():
Expand All @@ -20,6 +29,47 @@ def test_lznt1_decompress():
)


def test_lzo_decompress():
assert (
lzo.decompress(bytes.fromhex("0361626361626320f314000f616263616263616263616263616263616263110000"), False)
== b"abc" * 100
)

assert (
hashlib.sha256(
lzo.decompress(
bytes.fromhex(
"160900a40100400003a83e8e6302003800007104ff4000fc012add00032016dd"
"00042016dd00052016dd00062016dd00072016dd00082016dd00092016dd000a"
"2016dd000b2016dd000c2016dd000d2016dd000e2016dd000f2016dd00102016"
"dd00112016dd00122016dd00132016dd00142016dd00152016dd00162016dd00"
"172016dd00182016dd00192016dd001a2016dd001b2016dd001c2016dd001d20"
"16dd001e2016dd001f2016dd00202016dd00212016dd00222016dd00232016dd"
"00242016dd00252016dd00262016dd00272016dd00282016dd00292016dd002a"
"2016dd002b2016dd002c2016dd002d2016dd002e2016dd002f2016dd00302016"
"dd00312016dd00322016dd00332016dd00342016dd00352016dd00362016dd00"
"372016dd00382016dd00392016dd003a2016dd003b2016dd003c2016dd003d20"
"16dd003e2016dd003f2016dd00402016dd00412016dd00422016dd00432016dd"
"00442016dd00452016dd00462016dd00472016dd00482016dd00492016dd004a"
"2016dd004b2016dd004c2016dd004d2016dd004e2016dd004f2016dd00502016"
"dd00512016dd00522016dd00532016dd00542016dd00552016dd00562016dd00"
"572016dd00582016dd00592016dd005a2016dd005b2016dd005c2016dd005d20"
"16dd005e2016dd005f2016dd00602016dd00612016dd00622016dd00632016dd"
"00642016dd0065200adf000800ed27dc006001228d57e32501556c29dc00fd0b"
"f55d04662b5c00307d010031dd004f5d06675c0027ce06c03f3b5e02e4022059"
"0e00880228dd02115d16682002bc03ff020a00ff8902c75d0669dc0322dc5507"
"736d616c6c2d66696c652a9500d455046ad404229455016469722f6f045f3639"
"2a9a00eb4209096bd80422b0526804082d776974682d78617474722a1e077543"
"080a7c3622cd5d91cd126d9500e0943a110000"
),
False,
8192,
)
).hexdigest()
== "a4d6951085717a9698cd814899d11c931db1d4c0f7ddc3b1cba0f582142d4cf4"
)


def test_lzxpress_huffman_decompress():
assert (
lzxpress_huffman.decompress(
Expand Down

0 comments on commit 7870af8

Please sign in to comment.