diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 45b2f562..0d7a3906 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,6 +30,7 @@ jobs:
- uses: Swatinem/rust-cache@v2
- run: cargo build ${{ matrix.features }}
- run: cargo test ${{ matrix.features }}
+ - run: cargo test ${{ matrix.features }} --examples
build_and_test_windows:
runs-on: windows-latest
steps:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ede3710..246bdd11 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `array:codec::{InvalidBytesLengthError,InvalidArrayShapeError,InvalidNumberOfElementsError,SubsetOutOfBoundsError}`
- Add `ArraySubset::inbounds_shape()` (matches the old `ArraySubset::inbounds` behaviour)
- Add `ArrayBytesFixedDisjointView[CreateError]`
+- Add support for data type extensions with `zarrs_data_type` 0.2.0
+- Add `custom_data_type_fixed_size` and `custom_data_type_variable_size` examples
### Changed
- **Breaking**: change `ArraySubset::inbounds` to take another subset rather than a shape
@@ -36,6 +38,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `zarrs_plugin` is re-exported as `zarrs::plugin`
- **Breaking**: `Plugin` is now generic over the creation arguments
- **Breaking**: `StorageTransformerPlugin` now uses a `Plugin`
+- Add `DataTypeExtension` variant to `CodecError`
+- `ArrayCreateError::DataTypeCreateError` now uses a `PluginCreateError` internally
### Fixed
- Fixed reserving one more element than necessary when retrieving `string` or `bytes` array elements
diff --git a/Cargo.toml b/Cargo.toml
index de135bae..fd4e12d5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,7 +29,7 @@ missing_panics_doc = "warn"
missing_errors_doc = "warn"
[workspace.dependencies.zarrs_data_type]
-version = "0.1.0"
+version = "0.2.0"
path = "zarrs_data_type"
[workspace.dependencies.zarrs_metadata]
diff --git a/README.md b/README.md
index 5039724a..2e08ab4c 100644
--- a/README.md
+++ b/README.md
@@ -8,17 +8,26 @@
[](https://codecov.io/gh/LDeakin/zarrs)
[](https://zenodo.org/badge/latestdoi/695021547)
-`zarrs` is a Rust library for the [Zarr] storage format for multidimensional arrays and metadata. It supports [Zarr V3] and a V3 compatible subset of [Zarr V2].
+`zarrs` is a Rust library for the [Zarr] storage format for multidimensional arrays and metadata.
-A changelog can be found [here][CHANGELOG].
-Correctness issues with past versions are [detailed here][correctness_issues].
-
-Developed at the [Department of Materials Physics, Australian National University, Canberra, Australia].
+`zarrs` supports [Zarr V3] and a V3 compatible subset of [Zarr V2].
+It is fully up-to-date and conformant with the Zarr 3.0 specification with support for:
+- all *core extensions* (data types, codecs, chunk grids, chunk key encodings, storage transformers),
+- all accepted [Zarr Enhancement Proposals (ZEPs)](https://zarr.dev/zeps/) and several draft ZEPs:
+ - ZEP 0003: Variable chunking
+ - ZEP 0007: Strings
+- experimental codecs and data types intended for standardisation, and
+- user-defined custom extensions and stores.
> [!TIP]
> If you are a Python user, check out [`zarrs-python`].
> It includes a high-performance codec pipeline for the reference [`zarr-python`] implementation.
+A changelog can be found [here][CHANGELOG].
+Correctness issues with past versions are [detailed here][correctness_issues].
+
+Developed at the [Department of Materials Physics, Australian National University, Canberra, Australia].
+
## Getting Started
- Review the [implementation status] ([zarr version support], [array support], [storage support], and the [`zarrs` ecosystem](#zarrs-ecosystem)).
- Read [The `zarrs` Book].
@@ -93,7 +102,7 @@ println!("{array_ndarray:4}");
- [`zarrs`]: The core library for manipulating Zarr hierarchies.
- [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`).
- [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`).
-- [`zarrs_plugin`]: Zarr plugin support (re-exported as `zarrs::plugin`).
+- [`zarrs_plugin`]: The plugin API for `zarrs` (re-exported as `zarrs::plugin`).
- [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`).
### Stores
diff --git a/makefile b/makefile
index 7b84f41a..1febaaf7 100644
--- a/makefile
+++ b/makefile
@@ -7,6 +7,7 @@ build:
test:
cargo +$(TOOLCHAIN) test --all-features
+ cargo +$(TOOLCHAIN) test --all-features --examples
doc: RUSTDOCFLAGS="-D warnings --cfg docsrs"
doc:
diff --git a/zarrs/doc/ecosystem.md b/zarrs/doc/ecosystem.md
index 3d796ecb..7c7994fd 100644
--- a/zarrs/doc/ecosystem.md
+++ b/zarrs/doc/ecosystem.md
@@ -2,7 +2,7 @@
- [`zarrs`]: The core library for manipulating Zarr hierarchies.
- [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`).
- [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`).
-- [`zarrs_plugin`]: Zarr plugin support (re-exported as `zarrs::plugin`).
+- [`zarrs_plugin`]: The plugin API for `zarrs` (re-exported as `zarrs::plugin`).
- [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`).
#### Stores
@@ -27,24 +27,24 @@
- Transform arrays: crop, rescale, downsample, gradient magnitude, gaussian, noise filtering, etc.
- Benchmarking tools and performance benchmarks of `zarrs`.
-[`zarrs`]: https://github.com/LDeakin/zarrs/tree/main/zarrs
-[`zarrs_data_type`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_data_type
-[`zarrs_metadata`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata
-[`zarrs_plugin`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_plugin
-[`zarrs_storage`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage
-[`zarrs_filesystem`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem
-[`zarrs_http`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_http
-[`zarrs_object_store`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_object_store
-[`zarrs_opendal`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_opendal
-[`zarrs_zip`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_zip
-[`zarrs_icechunk`]: https://github.com/LDeakin/zarrs_icechunk
-[`zarrs_ffi`]: https://github.com/LDeakin/zarrs_ffi
+[`zarrs`]: https://docs.rs/zarrs/latest/zarrs/
+[`zarrs_data_type`]: https://docs.rs/zarrs_data_type/latest/zarrs_data_type/
+[`zarrs_metadata`]: https://docs.rs/zarrs_metadata/latest/zarrs_metadata/
+[`zarrs_plugin`]: https://docs.rs/zarrs_plugin/latest/zarrs_plugin/
+[`zarrs_storage`]: https://docs.rs/zarrs_storage/latest/zarrs_storage/
+[`zarrs_filesystem`]: https://docs.rs/zarrs_filesystem/latest/zarrs_filesystem/
+[`zarrs_http`]: https://docs.rs/zarrs_http/latest/zarrs_http/
+[`zarrs_object_store`]: https://docs.rs/zarrs_object_store/latest/zarrs_object_store/
+[`zarrs_opendal`]: https://docs.rs/zarrs_opendal/latest/zarrs_opendal/
+[`zarrs_zip`]: https://docs.rs/zarrs_zip/latest/zarrs_zip/
+[`zarrs_icechunk`]: https://docs.rs/zarrs_icechunk/latest/zarrs_icechunk/
+[`zarrs_ffi`]: https://docs.rs/zarrs_ffi/latest/zarrs_ffi/
[`zarrs-python`]: https://github.com/ilan-gold/zarrs-python
[`zarr-python`]: https://github.com/zarr-developers/zarr-python
-[`zarrs_tools`]: https://github.com/LDeakin/zarrs_tools
-[`ome_zarr_metadata`]: https://github.com/LDeakin/rust_ome_zarr_metadata
-[`object_store`]: https://github.com/apache/arrow-rs/tree/main/object_store
-[`opendal`]: https://github.com/apache/OpenDAL
+[`zarrs_tools`]: https://docs.rs/zarrs_tools/latest/zarrs_tools/
+[`ome_zarr_metadata`]: https://docs.rs/ome_zarr_metadata/latest/ome_zarr_metadata/
+[`object_store`]: https://docs.rs/object_store/latest/object_store/
+[`opendal`]: https://docs.rs/opendal/latest/opendal/
[`icechunk`]: https://github.com/earth-mover/icechunk
[OME-Zarr]: https://ngff.openmicroscopy.org/latest/
diff --git a/zarrs/doc/status/data_types.md b/zarrs/doc/status/data_types.md
index 77d35664..158727b5 100644
--- a/zarrs/doc/status/data_types.md
+++ b/zarrs/doc/status/data_types.md
@@ -4,7 +4,7 @@
[r* (raw bits)] | [ZEP0001] | ✓ | | |
| [bfloat16] | [zarr-specs #130] | ✓ | | |
| [string] (experimental) | [ZEP0007 (draft)] | ✓ | | |
-| [dtype_bytes] (experimental) | [ZEP0007 (draft)] | ✓ | | |
+| [bytes](crate::data_type::DataType::Bytes) (experimental) | [ZEP0007 (draft)] | ✓ | | |
† Experimental data types are recommended for evaluation only.
@@ -25,7 +25,6 @@
[bfloat16]: crate::data_type::DataType::BFloat16
[r* (raw bits)]: crate::data_type::DataType::RawBits
[string]: crate::data_type::DataType::String
-[dtype_bytes]: crate::data_type::DataType::Bytes
[ZEP0001]: https://zarr.dev/zeps/accepted/ZEP0001.html
[zarr-specs #130]: https://github.com/zarr-developers/zarr-specs/issues/130
diff --git a/zarrs/examples/custom_data_type_fixed_size.rs b/zarrs/examples/custom_data_type_fixed_size.rs
new file mode 100644
index 00000000..cb5abe05
--- /dev/null
+++ b/zarrs/examples/custom_data_type_fixed_size.rs
@@ -0,0 +1,295 @@
+#![allow(missing_docs)]
+
+use std::{borrow::Cow, sync::Arc};
+
+use num::traits::{FromBytes, ToBytes};
+use serde::{Deserialize, Serialize};
+use zarrs::array::{
+ ArrayBuilder, ArrayBytes, ArrayError, DataTypeSize, Element, ElementOwned, FillValueMetadataV3,
+};
+use zarrs_data_type::{
+ DataType, DataTypeExtension, DataTypeExtensionBytesCodec, DataTypeExtensionBytesCodecError,
+ DataTypeExtensionError, DataTypePlugin, FillValue, IncompatibleFillValueError,
+ IncompatibleFillValueMetadataError,
+};
+use zarrs_metadata::{
+ v3::{MetadataConfiguration, MetadataV3},
+ Endianness,
+};
+use zarrs_plugin::{PluginCreateError, PluginMetadataInvalidError};
+use zarrs_storage::store::MemoryStore;
+
+#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
+struct CustomDataTypeFixedSizeElement {
+ x: u64,
+ y: f32,
+}
+
+type CustomDataTypeFixedSizeMetadata = CustomDataTypeFixedSizeElement;
+
+type CustomDataTypeFixedSizeBytes = [u8; size_of::() + size_of::()];
+
+impl ToBytes for CustomDataTypeFixedSizeElement {
+ type Bytes = CustomDataTypeFixedSizeBytes;
+
+ fn to_be_bytes(&self) -> Self::Bytes {
+ let mut bytes = [0; 12];
+ let (x, y) = bytes.split_at_mut(size_of::());
+ x.copy_from_slice(&self.x.to_be_bytes());
+ y.copy_from_slice(&self.y.to_be_bytes());
+ bytes
+ }
+
+ fn to_le_bytes(&self) -> Self::Bytes {
+ let mut bytes = [0; 12];
+ let (x, y) = bytes.split_at_mut(size_of::());
+ x.copy_from_slice(&self.x.to_le_bytes());
+ y.copy_from_slice(&self.y.to_le_bytes());
+ bytes
+ }
+}
+
+impl FromBytes for CustomDataTypeFixedSizeElement {
+ type Bytes = CustomDataTypeFixedSizeBytes;
+
+ fn from_be_bytes(bytes: &Self::Bytes) -> Self {
+ let (x, y) = bytes.split_at(size_of::());
+ CustomDataTypeFixedSizeElement {
+ x: u64::from_be_bytes(unsafe { x.try_into().unwrap_unchecked() }),
+ y: f32::from_be_bytes(unsafe { y.try_into().unwrap_unchecked() }),
+ }
+ }
+
+ fn from_le_bytes(bytes: &Self::Bytes) -> Self {
+ let (x, y) = bytes.split_at(size_of::());
+ CustomDataTypeFixedSizeElement {
+ x: u64::from_le_bytes(unsafe { x.try_into().unwrap_unchecked() }),
+ y: f32::from_le_bytes(unsafe { y.try_into().unwrap_unchecked() }),
+ }
+ }
+}
+
+impl Element for CustomDataTypeFixedSizeElement {
+ fn validate_data_type(data_type: &DataType) -> Result<(), ArrayError> {
+ (data_type == &DataType::Extension(Arc::new(CustomDataTypeFixedSize)))
+ .then_some(())
+ .ok_or(ArrayError::IncompatibleElementType)
+ }
+
+ fn into_array_bytes<'a>(
+ data_type: &DataType,
+ elements: &'a [Self],
+ ) -> Result, ArrayError> {
+ Self::validate_data_type(data_type)?;
+ let mut bytes: Vec =
+ Vec::with_capacity(size_of::() * elements.len());
+ for element in elements {
+ bytes.extend_from_slice(&element.to_ne_bytes());
+ }
+ Ok(ArrayBytes::Fixed(Cow::Owned(bytes)))
+ }
+}
+
+impl ElementOwned for CustomDataTypeFixedSizeElement {
+ fn from_array_bytes(
+ data_type: &DataType,
+ bytes: ArrayBytes<'_>,
+ ) -> Result, ArrayError> {
+ Self::validate_data_type(data_type)?;
+ let bytes = bytes.into_fixed()?;
+ let bytes_len = bytes.len();
+ let mut elements =
+ Vec::with_capacity(bytes_len / size_of::());
+ for bytes in bytes.chunks_exact(size_of::()) {
+ elements.push(CustomDataTypeFixedSizeElement::from_ne_bytes(unsafe {
+ bytes.try_into().unwrap_unchecked()
+ }))
+ }
+ Ok(elements)
+ }
+}
+
+/// The data type for an array of [`CustomDataTypeFixedSizeElement`].
+#[derive(Debug)]
+struct CustomDataTypeFixedSize;
+
+const CUSTOM_NAME: &'static str = "zarrs.test.CustomDataTypeFixedSize";
+
+fn is_custom_dtype(name: &str) -> bool {
+ name == CUSTOM_NAME
+}
+
+fn create_custom_dtype(metadata: &MetadataV3) -> Result {
+ if metadata.configuration_is_none_or_empty() {
+ Ok(DataType::Extension(Arc::new(CustomDataTypeFixedSize)))
+ } else {
+ Err(PluginMetadataInvalidError::new(CUSTOM_NAME, "codec", metadata.clone()).into())
+ }
+}
+
+inventory::submit! {
+ DataTypePlugin::new(CUSTOM_NAME, is_custom_dtype, create_custom_dtype)
+}
+
+impl DataTypeExtension for CustomDataTypeFixedSize {
+ fn name(&self) -> String {
+ CUSTOM_NAME.to_string()
+ }
+
+ fn configuration(&self) -> MetadataConfiguration {
+ MetadataConfiguration::default()
+ }
+
+ fn fill_value(
+ &self,
+ fill_value_metadata: &FillValueMetadataV3,
+ ) -> Result {
+ let custom_fill_value = match fill_value_metadata {
+ FillValueMetadataV3::Unsupported(value) => serde_json::from_value::<
+ CustomDataTypeFixedSizeMetadata,
+ >(value.clone())
+ .map_err(|_| {
+ IncompatibleFillValueMetadataError::new(self.name(), fill_value_metadata.clone())
+ })?,
+ _ => Err(IncompatibleFillValueMetadataError::new(
+ self.name(),
+ fill_value_metadata.clone(),
+ ))?,
+ };
+ Ok(FillValue::new(custom_fill_value.to_ne_bytes().to_vec()))
+ }
+
+ fn metadata_fill_value(
+ &self,
+ fill_value: &FillValue,
+ ) -> Result {
+ let fill_value_metadata = CustomDataTypeFixedSizeMetadata::from_ne_bytes(
+ fill_value
+ .as_ne_bytes()
+ .try_into()
+ .map_err(|_| IncompatibleFillValueError::new(self.name(), fill_value.clone()))?,
+ );
+ Ok(FillValueMetadataV3::Unsupported(
+ serde_json::to_value(fill_value_metadata).unwrap(),
+ ))
+ }
+
+ fn size(&self) -> zarrs::array::DataTypeSize {
+ DataTypeSize::Fixed(size_of::())
+ }
+
+ fn codec_bytes(&self) -> Result<&dyn DataTypeExtensionBytesCodec, DataTypeExtensionError> {
+ Ok(self)
+ }
+}
+
+impl DataTypeExtensionBytesCodec for CustomDataTypeFixedSize {
+ fn encode<'a>(
+ &self,
+ bytes: std::borrow::Cow<'a, [u8]>,
+ endianness: Option,
+ ) -> Result, DataTypeExtensionBytesCodecError> {
+ if let Some(endianness) = endianness {
+ if endianness != Endianness::native() {
+ let mut bytes = bytes.into_owned();
+ for bytes in bytes.chunks_exact_mut(size_of::()) {
+ let value = CustomDataTypeFixedSizeElement::from_ne_bytes(&unsafe {
+ bytes.try_into().unwrap_unchecked()
+ });
+ if endianness == Endianness::Little {
+ bytes.copy_from_slice(&value.to_le_bytes());
+ } else {
+ bytes.copy_from_slice(&value.to_be_bytes());
+ }
+ }
+ Ok(Cow::Owned(bytes))
+ } else {
+ Ok(bytes)
+ }
+ } else {
+ Err(DataTypeExtensionBytesCodecError::EndiannessNotSpecified)
+ }
+ }
+
+ fn decode<'a>(
+ &self,
+ bytes: std::borrow::Cow<'a, [u8]>,
+ endianness: Option,
+ ) -> Result, DataTypeExtensionBytesCodecError> {
+ if let Some(endianness) = endianness {
+ if endianness != Endianness::native() {
+ let mut bytes = bytes.into_owned();
+ for bytes in bytes.chunks_exact_mut(size_of::() + size_of::()) {
+ let value = if endianness == Endianness::Little {
+ CustomDataTypeFixedSizeElement::from_le_bytes(&unsafe {
+ bytes.try_into().unwrap_unchecked()
+ })
+ } else {
+ CustomDataTypeFixedSizeElement::from_be_bytes(&unsafe {
+ bytes.try_into().unwrap_unchecked()
+ })
+ };
+ bytes.copy_from_slice(&value.to_ne_bytes());
+ }
+ Ok(Cow::Owned(bytes))
+ } else {
+ Ok(bytes)
+ }
+ } else {
+ Err(DataTypeExtensionBytesCodecError::EndiannessNotSpecified)
+ }
+ }
+}
+
+fn main() {
+ let store = std::sync::Arc::new(MemoryStore::default());
+ let array_path = "/array";
+ let fill_value = CustomDataTypeFixedSizeElement { x: 1, y: 2.3 };
+ let array = ArrayBuilder::new(
+ vec![4, 1], // array shape
+ DataType::Extension(Arc::new(CustomDataTypeFixedSize)),
+ vec![2, 1].try_into().unwrap(), // regular chunk shape
+ FillValue::new(fill_value.to_ne_bytes().to_vec()),
+ )
+ .array_to_array_codecs(vec![
+ #[cfg(feature = "transpose")]
+ Arc::new(zarrs::array::codec::TransposeCodec::new(
+ zarrs::array::codec::array_to_array::transpose::TransposeOrder::new(&[1, 0]).unwrap(),
+ )),
+ ])
+ .bytes_to_bytes_codecs(vec![
+ #[cfg(feature = "gzip")]
+ Arc::new(zarrs::array::codec::GzipCodec::new(5).unwrap()),
+ #[cfg(feature = "crc32c")]
+ Arc::new(zarrs::array::codec::Crc32cCodec::new()),
+ ])
+ // .storage_transformers(vec![].into())
+ .build(store, array_path)
+ .unwrap();
+ println!(
+ "{}",
+ serde_json::to_string_pretty(array.metadata()).unwrap()
+ );
+
+ let data = [
+ CustomDataTypeFixedSizeElement { x: 3, y: 4.5 },
+ CustomDataTypeFixedSizeElement { x: 6, y: 7.8 },
+ ];
+ array.store_chunk_elements(&[0, 0], &data).unwrap();
+
+ let data = array
+ .retrieve_array_subset_elements::(&array.subset_all())
+ .unwrap();
+
+ assert_eq!(data[0], CustomDataTypeFixedSizeElement { x: 3, y: 4.5 });
+ assert_eq!(data[1], CustomDataTypeFixedSizeElement { x: 6, y: 7.8 });
+ assert_eq!(data[2], CustomDataTypeFixedSizeElement { x: 1, y: 2.3 });
+ assert_eq!(data[3], CustomDataTypeFixedSizeElement { x: 1, y: 2.3 });
+
+ println!("{data:#?}");
+}
+
+#[test]
+fn custom_data_type_fixed_size() {
+ main()
+}
diff --git a/zarrs/examples/custom_data_type_variable_size.rs b/zarrs/examples/custom_data_type_variable_size.rs
new file mode 100644
index 00000000..01e900c7
--- /dev/null
+++ b/zarrs/examples/custom_data_type_variable_size.rs
@@ -0,0 +1,215 @@
+#![allow(missing_docs)]
+
+use std::{borrow::Cow, sync::Arc};
+
+use derive_more::Deref;
+use itertools::Itertools;
+use serde::{Deserialize, Serialize};
+use zarrs::array::{
+ ArrayBuilder, ArrayBytes, ArrayError, DataTypeSize, Element, ElementOwned, FillValueMetadataV3,
+ RawBytesOffsets,
+};
+use zarrs_data_type::{
+ DataType, DataTypeExtension, DataTypePlugin, FillValue, IncompatibleFillValueError,
+ IncompatibleFillValueMetadataError,
+};
+use zarrs_metadata::v3::{array::fill_value::FillValueFloat, MetadataConfiguration, MetadataV3};
+use zarrs_plugin::{PluginCreateError, PluginMetadataInvalidError};
+use zarrs_storage::store::MemoryStore;
+
+#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize, Deref)]
+struct CustomDataTypeVariableSizeElement(Option);
+
+impl From