Skip to content

Commit

Permalink
feat: add zlib codec (#160)
Browse files Browse the repository at this point in the history
compatible with `numcodecs.zlib`
  • Loading branch information
LDeakin authored Mar 10, 2025
1 parent eec216b commit 25b7dd8
Show file tree
Hide file tree
Showing 22 changed files with 660 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `[Async]ArrayDlPackExt` traits that add methods to `Array` for `DLPack` tensor interop
- Gated by the `dlpack` feature
- Add missing `Group::async_child_*` methods
- Add `numcodecs.zlib` codec support

### Changed
- **Breaking**: change `ArraySubset::inbounds` to take another subset rather than a shape
Expand Down
1 change: 1 addition & 0 deletions zarrs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pcodec = ["dep:pco"] # Enable the experimental pcodec codec
sharding = [] # Enable the sharding codec
transpose = ["dep:ndarray"] # Enable the transpose codec
zfp = ["dep:zfp-sys"] # Enable the experimental zfp codec
zlib = ["dep:flate2"] # Enable the experimental zlib codec
zstd = ["dep:zstd"] # Enable the zstd codec
ndarray = ["dep:ndarray"] # Adds ndarray utility functions to Array
dlpack =["dep:dlpark"] # Adds dlpack utility functions to Array
Expand Down
2 changes: 2 additions & 0 deletions zarrs/doc/status/codecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
| | [`zstd`] | Experimental | **zstd** |
| | [`numcodecs.bz2`] | Experimental | bz2 |
| | [`numcodecs.fletcher32`] | Experimental | fletcher32 |
| | [`numcodecs.zlib`] | Experimental | zlib |
| | [`zarrs.gdeflate`] | Experimental | gdeflate |

<sup>\* Bolded feature flags are part of the default set of features.</sup>
Expand Down Expand Up @@ -46,6 +47,7 @@
[`zstd`]: crate::array::codec::bytes_to_bytes::zstd
[`numcodecs.bz2`]: crate::array::codec::bytes_to_bytes::gzip
[`numcodecs.fletcher32`]: crate::array::codec::bytes_to_bytes::fletcher32
[`numcodecs.zlib`]: crate::array::codec::bytes_to_bytes::zlib
[`zarrs.gdeflate`]: crate::array::codec::bytes_to_bytes::gdeflate

**Experimental codecs are recommended for evaluation only**.
Expand Down
7 changes: 7 additions & 0 deletions zarrs/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,13 @@ mod tests {
array_v3_numcodecs("tests/data/v3_zarr_python/array_fletcher32.zarr")
}

#[cfg(feature = "zlib")]
#[test]
#[cfg_attr(miri, ignore)]
fn array_v3_zlib() {
array_v3_numcodecs("tests/data/v3_zarr_python/array_zlib.zarr")
}

#[cfg(feature = "gzip")]
#[test]
#[cfg_attr(miri, ignore)]
Expand Down
2 changes: 2 additions & 0 deletions zarrs/src/array/codec/bytes_to_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ pub mod fletcher32;
pub mod gdeflate;
#[cfg(feature = "gzip")]
pub mod gzip;
#[cfg(feature = "zlib")]
pub mod zlib;
#[cfg(feature = "zstd")]
pub mod zstd;

Expand Down
204 changes: 204 additions & 0 deletions zarrs/src/array/codec/bytes_to_bytes/zlib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
//! The `zlib` bytes to bytes codec (Experimental).
//!
//! <div class="warning">
//! This codec is experimental and may be incompatible with other Zarr V3 implementations.
//! </div>
//!
//! This codec requires the `zlib` feature, which is disabled by default.
//!
//! ### Compatible Implementations
//! This codec is fully compatible with the `numcodecs.zlib` codec in `zarr-python`.
//!
//! ### Specification
//! - <https://github.com/zarr-developers/zarr-extensions/tree/numcodecs/codecs/numcodecs.zlib>
//! - <https://codec.zarrs.dev/bytes_to_bytes/zlib>
//!
//! ### Codec `name` Aliases (Zarr V3)
//! - `numcodecs.zlib`
//!
//! ### Codec `id` Aliases (Zarr V2)
//! - `zlib`
//!
//! ### Codec `configuration` Example - [`ZlibCodecConfiguration`]:
//! ```rust
//! # let JSON = r#"
//! {
//! "level": 9
//! }
//! # "#;
//! # use zarrs_metadata::codec::zlib::ZlibCodecConfiguration;
//! # serde_json::from_str::<ZlibCodecConfiguration>(JSON).unwrap();
//! ```
mod zlib_codec;
mod zlib_partial_decoder;

use std::sync::Arc;

use crate::{
array::codec::{Codec, CodecPlugin},
metadata::codec::zlib,
metadata::v3::MetadataV3,
plugin::{PluginCreateError, PluginMetadataInvalidError},
};

pub use crate::metadata::codec::zlib::{
ZlibCodecConfiguration, ZlibCodecConfigurationV1, ZlibCompressionLevel,
};

pub use self::zlib_codec::ZlibCodec;

pub use zlib::IDENTIFIER;

// Register the codec.
inventory::submit! {
CodecPlugin::new(IDENTIFIER, is_identifier_zlib, create_codec_zlib)
}

fn is_identifier_zlib(identifier: &str) -> bool {
identifier == IDENTIFIER
}

pub(crate) fn create_codec_zlib(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
let configuration: ZlibCodecConfiguration = metadata
.to_configuration()
.map_err(|_| PluginMetadataInvalidError::new(IDENTIFIER, "codec", metadata.clone()))?;
let codec = Arc::new(ZlibCodec::new_with_configuration(&configuration)?);
Ok(Codec::BytesToBytes(codec))
}

#[cfg(test)]
mod tests {
use std::{borrow::Cow, sync::Arc};

use crate::{
array::{
codec::{BytesToBytesCodecTraits, CodecOptions},
ArrayRepresentation, BytesRepresentation, DataType, FillValue,
},
array_subset::ArraySubset,
byte_range::ByteRange,
};

use super::*;

const JSON_VALID1: &str = r#"
{
"level": 5
}"#;

#[test]
#[cfg_attr(miri, ignore)]
fn codec_zlib_round_trip1() {
let elements: Vec<u16> = (0..32).collect();
let bytes = crate::array::transmute_to_bytes_vec(elements);
let bytes_representation = BytesRepresentation::FixedSize(bytes.len() as u64);

let codec_configuration: ZlibCodecConfiguration =
serde_json::from_str(JSON_VALID1).unwrap();
let codec = ZlibCodec::new_with_configuration(&codec_configuration).unwrap();

let encoded = codec
.encode(Cow::Borrowed(&bytes), &CodecOptions::default())
.unwrap();
let decoded = codec
.decode(encoded, &bytes_representation, &CodecOptions::default())
.unwrap();
assert_eq!(bytes, decoded.to_vec());
}

#[test]
#[cfg_attr(miri, ignore)]
fn codec_zlib_partial_decode() {
let array_representation =
ArrayRepresentation::new(vec![2, 2, 2], DataType::UInt16, FillValue::from(0u16))
.unwrap();
let data_type_size = array_representation.data_type().fixed_size().unwrap();
let array_size = array_representation.num_elements_usize() * data_type_size;
let bytes_representation = BytesRepresentation::FixedSize(array_size as u64);

let elements: Vec<u16> = (0..array_representation.num_elements() as u16).collect();
let bytes = crate::array::transmute_to_bytes_vec(elements);

let codec_configuration: ZlibCodecConfiguration =
serde_json::from_str(JSON_VALID1).unwrap();
let codec = Arc::new(ZlibCodec::new_with_configuration(&codec_configuration).unwrap());

let encoded = codec
.encode(Cow::Owned(bytes), &CodecOptions::default())
.unwrap();
let decoded_regions: Vec<ByteRange> = ArraySubset::new_with_ranges(&[0..2, 1..2, 0..1])
.byte_ranges(array_representation.shape(), data_type_size)
.unwrap();
let input_handle = Arc::new(std::io::Cursor::new(encoded));
let partial_decoder = codec
.partial_decoder(
input_handle,
&bytes_representation,
&CodecOptions::default(),
)
.unwrap();
let decoded = partial_decoder
.partial_decode_concat(&decoded_regions, &CodecOptions::default())
.unwrap()
.unwrap();

let decoded: Vec<u16> = decoded
.to_vec()
.chunks_exact(size_of::<u16>())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();

let answer: Vec<u16> = vec![2, 6];
assert_eq!(answer, decoded);
}

#[cfg(feature = "async")]
#[tokio::test]
#[cfg_attr(miri, ignore)]
async fn codec_zlib_async_partial_decode() {
let array_representation =
ArrayRepresentation::new(vec![2, 2, 2], DataType::UInt16, FillValue::from(0u16))
.unwrap();
let data_type_size = array_representation.data_type().fixed_size().unwrap();
let array_size = array_representation.num_elements_usize() * data_type_size;
let bytes_representation = BytesRepresentation::FixedSize(array_size as u64);

let elements: Vec<u16> = (0..array_representation.num_elements() as u16).collect();
let bytes = crate::array::transmute_to_bytes_vec(elements);

let codec_configuration: ZlibCodecConfiguration =
serde_json::from_str(JSON_VALID1).unwrap();
let codec = Arc::new(ZlibCodec::new_with_configuration(&codec_configuration).unwrap());

let encoded = codec
.encode(Cow::Owned(bytes), &CodecOptions::default())
.unwrap();
let decoded_regions: Vec<ByteRange> = ArraySubset::new_with_ranges(&[0..2, 1..2, 0..1])
.byte_ranges(array_representation.shape(), data_type_size)
.unwrap();
let input_handle = Arc::new(std::io::Cursor::new(encoded));
let partial_decoder = codec
.async_partial_decoder(
input_handle,
&bytes_representation,
&CodecOptions::default(),
)
.await
.unwrap();
let decoded = partial_decoder
.partial_decode_concat(&decoded_regions, &CodecOptions::default())
.await
.unwrap()
.unwrap();

let decoded: Vec<u16> = decoded
.to_vec()
.chunks_exact(size_of::<u16>())
.map(|b| u16::from_ne_bytes(b.try_into().unwrap()))
.collect();

let answer: Vec<u16> = vec![2, 6];
assert_eq!(answer, decoded);
}
}
Loading

0 comments on commit 25b7dd8

Please sign in to comment.