Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement generic CRC-32 #24

Merged
merged 1 commit into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions zlib-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ libc.workspace = true
[dev-dependencies]
libloading = "0.8.1"
libz-ng-sys = "1.1.12"
crc32fast = "1.3.2"
quickcheck = "1.0.3"
169 changes: 169 additions & 0 deletions zlib-rs/src/crc32.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Several implementations of CRC-32:
// * A naive byte-granularity approach
// * A word-sized approach that processes a usize word at a time
// * A "braid" implementation that processes a block of N words
// at a time, based on the algorithm in section 4.11 from
// https://github.com/zlib-ng/zlib-ng/blob/develop/doc/crc-doc.1.0.pdf.

// The binary encoding of the CRC-32 polynomial.
// We are assuming little-endianness so we process the input
// LSB-first. We need to use the "reversed" value from e.g
// https://en.wikipedia.org/wiki/Cyclic_redundancy_check#Polynomial_representations.
const CRC32_LSB_POLY: usize = 0xedb8_8320usize;

const W: usize = core::mem::size_of::<usize>();

// The logic assumes that W >= sizeof(u32).
// In Rust, this is generally true.
const _: () = assert!(W >= core::mem::size_of::<u32>());

// Pre-computed tables for the CRC32 algorithm.
// CRC32_BYTE_TABLE corresponds to MulByXPowD from the paper.
static CRC32_BYTE_TABLE: [[u32; 256]; 1] = build_crc32_table::<256, 1, 1>();
// CRC32_WORD_TABLE is MulWordByXpowD.
static CRC32_WORD_TABLE: [[u32; 256]; W] = build_crc32_table::<256, W, 1>();

// Work-around for not being able to define generic consts or statics
// Crc32BraidTable::<N>::TABLE is the generic table for any braid size N.
struct Crc32BraidTable<const N: usize>;

impl<const N: usize> Crc32BraidTable<N> {
const TABLE: [[u32; 256]; W] = build_crc32_table::<256, W, N>();
}

// Build the CRC32 tables using a more efficient and simpler approach
// than the combination of Multiply and XpowN (which implement polynomial
// multiplication and exponentiation, respectively) from the paper,
// but with identical results. This function is const, so it should be
// fully evaluated at compile time.
const fn build_crc32_table<const A: usize, const W: usize, const N: usize>() -> [[u32; A]; W] {
let mut arr = [[0u32; A]; W];
let mut i = 0;
while i < W {
let mut j = 0;
while j < A {
let mut c = j;
let mut k = 0;
while k < 8 * (W * N - i) {
if c & 1 != 0 {
c = CRC32_LSB_POLY ^ (c >> 1);
} else {
c >>= 1;
}
k += 1;
}
arr[i][j] = c as u32;
j += 1;
}
i += 1;
}
arr
}

fn crc32_naive_inner(data: &[u8], start: u32) -> u32 {
data.iter().fold(start, |crc, val| {
let crc32_lsb = crc.to_le_bytes()[0];
CRC32_BYTE_TABLE[0][usize::from(crc32_lsb ^ *val)] ^ (crc >> 8)
})
}

fn crc32_words_inner(words: &[usize], start: u32, per_word_crcs: &[u32]) -> u32 {
words.iter().enumerate().fold(start, |crc, (i, word)| {
let value = *word ^ (crc ^ per_word_crcs.get(i).unwrap_or(&0)) as usize;
value
.to_le_bytes()
.into_iter()
.zip(CRC32_WORD_TABLE)
.fold(0u32, |crc, (b, tab)| crc ^ tab[usize::from(b)])
})
}

pub fn crc32_naive(data: &[u8], start: u32) -> u32 {
let crc = !start;
let crc = crc32_naive_inner(data, crc);
!crc
}

pub fn crc32_words(data: &[u8], start: u32) -> u32 {
// Get a word-aligned sub-slice of the input data
let (prefix, words, suffix) = unsafe { data.align_to::<usize>() };
let crc = !start;
let crc = crc32_naive_inner(prefix, crc);
let crc = crc32_words_inner(words, crc, &[]);
let crc = crc32_naive_inner(suffix, crc);
!crc
}

pub fn crc32_braid<const N: usize>(data: &[u8], start: u32) -> u32 {
// Get a word-aligned sub-slice of the input data
let (prefix, words, suffix) = unsafe { data.align_to::<usize>() };
let crc = !start;
let crc = crc32_naive_inner(prefix, crc);

let mut crcs = [0u32; N];
crcs[0] = crc;

// TODO: this would normally use words.chunks_exact(N), but
// we need to pass the last full block to crc32_words_inner
// because we accumulate partial crcs in the array and we
// need to roll those into the final value. The last call to
// crc32_words_inner does that for us with its per_word_crcs
// argument.
let blocks = words.len() / N;
let blocks = blocks.saturating_sub(1);
for i in 0..blocks {
// Load the next N words.
let mut buffer: [usize; N] =
core::array::from_fn(|j| usize::to_le(words[i * N + j] ^ (crcs[j] as usize)));

crcs.fill(0);
for j in 0..W {
for k in 0..N {
crcs[k] ^= Crc32BraidTable::<N>::TABLE[j][buffer[k] & 0xff];
buffer[k] >>= 8;
}
}
}

let crc = core::mem::take(&mut crcs[0]);
let crc = crc32_words_inner(&words[blocks * N..], crc, &crcs);
let crc = crc32_naive_inner(suffix, crc);
!crc
}

#[cfg(test)]
mod test {
use super::*;

quickcheck::quickcheck! {
fn naive_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
let mut h = crc32fast::Hasher::new_with_initial(start);
h.update(&v[..]);
crc32_naive(&v[..], start) == h.finalize()
}

fn words_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
let mut h = crc32fast::Hasher::new_with_initial(start);
h.update(&v[..]);
crc32_words(&v[..], start) == h.finalize()
}

fn braid_4_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
let mut h = crc32fast::Hasher::new_with_initial(start);
h.update(&v[..]);
crc32_braid::<4>(&v[..], start) == h.finalize()
}

fn braid_5_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
let mut h = crc32fast::Hasher::new_with_initial(start);
h.update(&v[..]);
crc32_braid::<5>(&v[..], start) == h.finalize()
}

fn braid_6_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
let mut h = crc32fast::Hasher::new_with_initial(start);
h.update(&v[..]);
crc32_braid::<6>(&v[..], start) == h.finalize()
}
}
}
1 change: 1 addition & 0 deletions zlib-rs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod adler32;
pub mod allocate;
pub mod c_api;
mod crc32;
pub mod deflate;
pub mod inflate;

Expand Down
Loading