Skip to content

Commit

Permalink
fix(deflate): split some code into new module and fix panic in pad_to…
Browse files Browse the repository at this point in the history
…_bytes from prev commit
  • Loading branch information
oyvindln committed Dec 31, 2024
1 parent 97ee3f1 commit 38b9ef2
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 115 deletions.
2 changes: 1 addition & 1 deletion miniz_oxide/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ compiler_builtins = { version = '0.1.2', optional = true }
[dev-dependencies]
## Messes with minimum rust version and drags in deps just for running tests
## so just comment out for now and enable manually when needed for enabling benches
# criterion = "0.5"
criterion = "0.5"

[[bench]]
name = "benchmark"
Expand Down
128 changes: 14 additions & 114 deletions miniz_oxide/src/deflate/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::deflate::buffer::{
update_hash, HashBuffers, LocalBuf, LZ_CODE_BUF_SIZE, LZ_DICT_FULL_SIZE, LZ_HASH_BITS,
LZ_HASH_SHIFT, LZ_HASH_SIZE, OUT_BUF_SIZE,
};
use crate::deflate::zlib;
use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER, MZ_ADLER32_INIT};
use crate::DataFormat;

Expand All @@ -19,7 +20,7 @@ use crate::DataFormat;
type Result<T, E = Error> = core::result::Result<T, E>;
struct Error {}

const MAX_PROBES_MASK: i32 = 0xFFF;
pub(crate) const MAX_PROBES_MASK: i32 = 0xFFF;

const MAX_SUPPORTED_HUFF_CODESIZE: usize = 32;

Expand Down Expand Up @@ -157,7 +158,7 @@ const BITMASKS: [u32; 17] = [

/// The maximum number of checks for matches in the hash table the compressor will make for each
/// compression level.
const NUM_PROBES: [u32; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500];
pub(crate) const NUM_PROBES: [u32; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500];

#[derive(Copy, Clone)]
struct SymFreq {
Expand Down Expand Up @@ -303,116 +304,7 @@ const MIN_MATCH_LEN: u8 = 3;
/// The maximum length of a match.
pub(crate) const MAX_MATCH_LEN: usize = 258;

const DEFAULT_FLAGS: u32 = NUM_PROBES[4] | TDEFL_WRITE_ZLIB_HEADER;

mod zlib {
use super::{TDEFL_FORCE_ALL_RAW_BLOCKS, TDEFL_RLE_MATCHES};

const DEFAULT_CM: u8 = 8;
const DEFAULT_CINFO: u8 = 7 << 4;
const _DEFAULT_FDICT: u8 = 0;
const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO;
// CMF used for RLE (technically it uses a window size of 0 but the lowest that can
// be specified in the header corresponds to a window size of 1 << (0 + 8) aka 256.
const MIN_CMF: u8 = DEFAULT_CM; // | 0
/// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid.
const FCHECK_DIVISOR: u8 = 31;

/// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the
/// specification, i.e (CMF*256 + FCHK) % 31 = 0.
/// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored).
fn add_fcheck(cmf: u8, flg: u8) -> u8 {
let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR);

// Clear existing FCHECK if any
let flg = flg & 0b11100000;

// Casting is safe as rem can't overflow since it is a value mod 31
// We can simply add the value to flg as (31 - rem) will never be above 2^5
flg + (FCHECK_DIVISOR - rem as u8)
}

const fn zlib_level_from_flags(flags: u32) -> u8 {
use super::NUM_PROBES;

let num_probes = flags & (super::MAX_PROBES_MASK as u32);
if (flags & super::TDEFL_GREEDY_PARSING_FLAG != 0)
|| (flags & super::TDEFL_RLE_MATCHES != 0)
{
if num_probes <= 1 {
0
} else {
1
}
} else if num_probes >= NUM_PROBES[9] {
3
} else {
2
}
}

const fn cmf_from_flags(flags: u32) -> u8 {
if (flags & TDEFL_RLE_MATCHES == 0) && (flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0) {
DEFAULT_CMF
// If we are using RLE encoding or no compression the window bits can be set as the
// minimum.
} else {
MIN_CMF
}
}

/// Get the zlib header for the level using the default window size and no
/// dictionary.
fn header_from_level(level: u8, flags: u32) -> [u8; 2] {
let cmf = cmf_from_flags(flags);
[cmf, add_fcheck(cmf, level << 6)]
}

/// Create a zlib header from the given compression flags.
/// Only level is considered.
pub fn header_from_flags(flags: u32) -> [u8; 2] {
let level = zlib_level_from_flags(flags);
header_from_level(level, flags)
}

#[cfg(test)]
mod test {
#[test]
fn zlib() {
use super::super::*;
use super::*;

let test_level = |level, expected| {
let flags = create_comp_flags_from_zip_params(
level,
MZ_DEFAULT_WINDOW_BITS,
CompressionStrategy::Default as i32,
);
assert_eq!(zlib_level_from_flags(flags), expected);
};

assert_eq!(zlib_level_from_flags(DEFAULT_FLAGS), 2);
test_level(0, 0);
test_level(1, 0);
test_level(2, 1);
test_level(3, 1);
for i in 4..=8 {
test_level(i, 2)
}
test_level(9, 3);
test_level(10, 3);
}

#[test]
fn test_header() {
let header = super::header_from_level(3, 0);
assert_eq!(
((usize::from(header[0]) * 256) + usize::from(header[1])) % 31,
0
);
}
}
}
pub(crate) const DEFAULT_FLAGS: u32 = NUM_PROBES[4] | TDEFL_WRITE_ZLIB_HEADER;

#[cfg(test)]
#[inline]
Expand Down Expand Up @@ -695,6 +587,9 @@ struct OutputBufferOxide<'a> {
}

impl OutputBufferOxide<'_> {
/// Write bits to the bit buffer and flushes
/// the bit buffer so any whole bytes are output
/// to the underlying buffer.
fn put_bits(&mut self, bits: u32, len: u32) {
// TODO: Removing this assertion worsens performance
// Need to figure out why
Expand All @@ -711,6 +606,8 @@ impl OutputBufferOxide<'_> {
}

#[inline]
/// Write the provided bits to the bit buffer without flushing
/// anything. Does not check if there is actually space for it.
fn put_bits_no_flush(&mut self, bits: u32, len: u32) {
self.bit_buffer |= bits << self.bits_in;
self.bits_in += len;
Expand All @@ -733,6 +630,8 @@ impl OutputBufferOxide<'_> {
}

#[inline]
/// Pad the bit buffer to a whole byte with
/// zeroes and write that byte to the output buffer.
fn pad_to_bytes(&mut self) {
if self.bits_in != 0 {
let len = 8 - self.bits_in;
Expand Down Expand Up @@ -1364,6 +1263,7 @@ impl DictOxide {
// position to match against.
probe_pos = next_probe_pos & LZ_DICT_SIZE_MASK;

// TODO: This bounds check does not get optimized out
if self.read_as_u16(probe_pos + match_len as usize - 1) == c01 {
break 'found;
}
Expand Down Expand Up @@ -1722,7 +1622,7 @@ fn flush_block(
output.load(saved_buffer);

// Block header.
output.put_bits_no_flush(0, 2);
output.put_bits(0, 2);

// Block length has to start on a byte boundary, s opad.
output.pad_to_bytes();
Expand Down Expand Up @@ -1759,7 +1659,7 @@ fn flush_block(
} else {
// Sync or Full flush.
// Output an empty raw block.
output.put_bits_no_flush(0, 3);
output.put_bits(0, 3);
output.pad_to_bytes();
output.put_bits(0, 16);
output.put_bits(0xFFFF, 16);
Expand Down
1 change: 1 addition & 0 deletions miniz_oxide/src/deflate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::alloc::vec::Vec;
mod buffer;
pub mod core;
pub mod stream;
mod zlib;
use self::core::*;

/// How much processing the compressor should do to compress the data.
Expand Down
112 changes: 112 additions & 0 deletions miniz_oxide/src/deflate/zlib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use crate::deflate::core::deflate_flags::{
TDEFL_FORCE_ALL_RAW_BLOCKS, TDEFL_GREEDY_PARSING_FLAG, TDEFL_RLE_MATCHES,
};

const DEFAULT_CM: u8 = 8;
const DEFAULT_CINFO: u8 = 7 << 4;
const _DEFAULT_FDICT: u8 = 0;
const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO;
// CMF used for RLE (technically it uses a window size of 0 but the lowest that can
// be specified in the header corresponds to a window size of 1 << (0 + 8) aka 256.
const MIN_CMF: u8 = DEFAULT_CM; // | 0
/// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid.
const FCHECK_DIVISOR: u8 = 31;

/// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the
/// specification, i.e (CMF*256 + FCHK) % 31 = 0.
/// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored).
#[inline]
fn add_fcheck(cmf: u8, flg: u8) -> u8 {
let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR);

// Clear existing FCHECK if any
let flg = flg & 0b11100000;

// Casting is safe as rem can't overflow since it is a value mod 31
// We can simply add the value to flg as (31 - rem) will never be above 2^5
flg + (FCHECK_DIVISOR - rem as u8)
}

#[inline]
const fn zlib_level_from_flags(flags: u32) -> u8 {
use crate::deflate::core::NUM_PROBES;

let num_probes = flags & (super::MAX_PROBES_MASK as u32);
if (flags & TDEFL_GREEDY_PARSING_FLAG != 0) || (flags & TDEFL_RLE_MATCHES != 0) {
if num_probes <= 1 {
0
} else {
1
}
} else if num_probes >= NUM_PROBES[9] {
3
} else {
2
}
}

#[inline]
const fn cmf_from_flags(flags: u32) -> u8 {
if (flags & TDEFL_RLE_MATCHES == 0) && (flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0) {
DEFAULT_CMF
// If we are using RLE encoding or no compression the window bits can be set as the
// minimum.
} else {
MIN_CMF
}
}

/// Get the zlib header for the level using the default window size and no
/// dictionary.
#[inline]
fn header_from_level(level: u8, flags: u32) -> [u8; 2] {
let cmf = cmf_from_flags(flags);
[cmf, add_fcheck(cmf, level << 6)]
}

/// Create a zlib header from the given compression flags.
/// Only level is considered.
#[inline]
pub fn header_from_flags(flags: u32) -> [u8; 2] {
let level = zlib_level_from_flags(flags);
header_from_level(level, flags)
}

#[cfg(test)]
mod test {
use crate::shared::MZ_DEFAULT_WINDOW_BITS;
#[test]
fn zlib() {
use super::super::*;
use super::*;

let test_level = |level, expected| {
let flags = create_comp_flags_from_zip_params(
level,
MZ_DEFAULT_WINDOW_BITS,
CompressionStrategy::Default as i32,
);
assert_eq!(zlib_level_from_flags(flags), expected);
};

assert_eq!(zlib_level_from_flags(DEFAULT_FLAGS), 2);
test_level(0, 0);
test_level(1, 0);
test_level(2, 1);
test_level(3, 1);
for i in 4..=8 {
test_level(i, 2)
}
test_level(9, 3);
test_level(10, 3);
}

#[test]
fn test_header() {
let header = super::header_from_level(3, 0);
assert_eq!(
((usize::from(header[0]) * 256) + usize::from(header[1])) % 31,
0
);
}
}

0 comments on commit 38b9ef2

Please sign in to comment.