From dd64131a17730e495b763c9918bdb09d1d6baee2 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sun, 2 Mar 2025 17:32:04 +1100 Subject: [PATCH 1/3] feat: add custom data type support --- .github/workflows/ci.yml | 1 + CHANGELOG.md | 4 + Cargo.toml | 2 +- README.md | 2 +- makefile | 1 + zarrs/doc/ecosystem.md | 2 +- zarrs/examples/custom_data_type_fixed_size.rs | 288 +++++++++++++ .../custom_data_type_variable_size.rs | 234 +++++++++++ zarrs/src/array/array_builder.rs | 13 +- zarrs/src/array/array_errors.rs | 6 +- zarrs/src/array/codec.rs | 4 + .../codec/array_to_bytes/bytes/bytes_codec.rs | 16 +- zarrs_data_type/CHANGELOG.md | 13 + zarrs_data_type/Cargo.toml | 6 +- zarrs_data_type/src/data_type.rs | 382 ++++++++++++------ zarrs_data_type/src/data_type_extension.rs | 96 +++++ zarrs_data_type/src/data_type_plugin.rs | 20 + zarrs_data_type/src/lib.rs | 9 +- 18 files changed, 955 insertions(+), 144 deletions(-) create mode 100644 zarrs/examples/custom_data_type_fixed_size.rs create mode 100644 zarrs/examples/custom_data_type_variable_size.rs create mode 100644 zarrs_data_type/src/data_type_extension.rs create mode 100644 zarrs_data_type/src/data_type_plugin.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 45b2f562..0d7a3906 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,7 @@ jobs: - uses: Swatinem/rust-cache@v2 - run: cargo build ${{ matrix.features }} - run: cargo test ${{ matrix.features }} + - run: cargo test ${{ matrix.features }} --examples build_and_test_windows: runs-on: windows-latest steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ede3710..246bdd11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `array:codec::{InvalidBytesLengthError,InvalidArrayShapeError,InvalidNumberOfElementsError,SubsetOutOfBoundsError}` - Add `ArraySubset::inbounds_shape()` (matches the old `ArraySubset::inbounds` behaviour) - Add `ArrayBytesFixedDisjointView[CreateError]` +- Add support for data type extensions with `zarrs_data_type` 0.2.0 +- Add `custom_data_type_fixed_size` and `custom_data_type_variable_size` examples ### Changed - **Breaking**: change `ArraySubset::inbounds` to take another subset rather than a shape @@ -36,6 +38,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `zarrs_plugin` is re-exported as `zarrs::plugin` - **Breaking**: `Plugin` is now generic over the creation arguments - **Breaking**: `StorageTransformerPlugin` now uses a `Plugin` +- Add `DataTypeExtension` variant to `CodecError` +- `ArrayCreateError::DataTypeCreateError` now uses a `PluginCreateError` internally ### Fixed - Fixed reserving one more element than necessary when retrieving `string` or `bytes` array elements diff --git a/Cargo.toml b/Cargo.toml index de135bae..fd4e12d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ missing_panics_doc = "warn" missing_errors_doc = "warn" [workspace.dependencies.zarrs_data_type] -version = "0.1.0" +version = "0.2.0" path = "zarrs_data_type" [workspace.dependencies.zarrs_metadata] diff --git a/README.md b/README.md index 5039724a..0c554aab 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ println!("{array_ndarray:4}"); - [`zarrs`]: The core library for manipulating Zarr hierarchies. - [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`). - [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`). -- [`zarrs_plugin`]: Zarr plugin support (re-exported as `zarrs::plugin`). +- [`zarrs_plugin`]: `zarrs` plugin support (re-exported as `zarrs::plugin`). - [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`). ### Stores diff --git a/makefile b/makefile index 7b84f41a..1febaaf7 100644 --- a/makefile +++ b/makefile @@ -7,6 +7,7 @@ build: test: cargo +$(TOOLCHAIN) test --all-features + cargo +$(TOOLCHAIN) test --all-features --examples doc: RUSTDOCFLAGS="-D warnings --cfg docsrs" doc: diff --git a/zarrs/doc/ecosystem.md b/zarrs/doc/ecosystem.md index 3d796ecb..af1c0a2f 100644 --- a/zarrs/doc/ecosystem.md +++ b/zarrs/doc/ecosystem.md @@ -2,7 +2,7 @@ - [`zarrs`]: The core library for manipulating Zarr hierarchies. - [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`). - [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`). -- [`zarrs_plugin`]: Zarr plugin support (re-exported as `zarrs::plugin`). +- [`zarrs_plugin`]: `zarrs` plugin support (re-exported as `zarrs::plugin`). - [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`). #### Stores diff --git a/zarrs/examples/custom_data_type_fixed_size.rs b/zarrs/examples/custom_data_type_fixed_size.rs new file mode 100644 index 00000000..87f9067d --- /dev/null +++ b/zarrs/examples/custom_data_type_fixed_size.rs @@ -0,0 +1,288 @@ +#![allow(missing_docs)] + +use std::{borrow::Cow, sync::Arc}; + +use num::traits::{FromBytes, ToBytes}; +use serde::{Deserialize, Serialize}; +use zarrs::array::{ + ArrayBuilder, ArrayBytes, ArrayError, DataTypeSize, Element, ElementOwned, FillValueMetadataV3, +}; +use zarrs_data_type::{ + DataType, DataTypeExtension, DataTypeExtensionError, DataTypePlugin, FillValue, + IncompatibleFillValueError, IncompatibleFillValueMetadataError, +}; +use zarrs_metadata::{ + v3::{MetadataConfiguration, MetadataV3}, + Endianness, +}; +use zarrs_plugin::{PluginCreateError, PluginMetadataInvalidError}; +use zarrs_storage::store::MemoryStore; + +#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] +struct CustomDataTypeFixedSizeElement { + x: u64, + y: f32, +} + +type CustomDataTypeFixedSizeMetadata = CustomDataTypeFixedSizeElement; + +type CustomDataTypeFixedSizeBytes = [u8; size_of::() + size_of::()]; + +impl ToBytes for CustomDataTypeFixedSizeElement { + type Bytes = CustomDataTypeFixedSizeBytes; + + fn to_be_bytes(&self) -> Self::Bytes { + let mut bytes = [0; 12]; + let (x, y) = bytes.split_at_mut(size_of::()); + x.copy_from_slice(&self.x.to_be_bytes()); + y.copy_from_slice(&self.y.to_be_bytes()); + bytes + } + + fn to_le_bytes(&self) -> Self::Bytes { + let mut bytes = [0; 12]; + let (x, y) = bytes.split_at_mut(size_of::()); + x.copy_from_slice(&self.x.to_le_bytes()); + y.copy_from_slice(&self.y.to_le_bytes()); + bytes + } +} + +impl FromBytes for CustomDataTypeFixedSizeElement { + type Bytes = CustomDataTypeFixedSizeBytes; + + fn from_be_bytes(bytes: &Self::Bytes) -> Self { + let (x, y) = bytes.split_at(size_of::()); + CustomDataTypeFixedSizeElement { + x: u64::from_be_bytes(unsafe { x.try_into().unwrap_unchecked() }), + y: f32::from_be_bytes(unsafe { y.try_into().unwrap_unchecked() }), + } + } + + fn from_le_bytes(bytes: &Self::Bytes) -> Self { + let (x, y) = bytes.split_at(size_of::()); + CustomDataTypeFixedSizeElement { + x: u64::from_le_bytes(unsafe { x.try_into().unwrap_unchecked() }), + y: f32::from_le_bytes(unsafe { y.try_into().unwrap_unchecked() }), + } + } +} + +impl Element for CustomDataTypeFixedSizeElement { + fn validate_data_type(data_type: &DataType) -> Result<(), ArrayError> { + (data_type == &DataType::Extension(Arc::new(CustomDataTypeFixedSize))) + .then_some(()) + .ok_or(ArrayError::IncompatibleElementType) + } + + fn into_array_bytes<'a>( + data_type: &DataType, + elements: &'a [Self], + ) -> Result, ArrayError> { + Self::validate_data_type(data_type)?; + let mut bytes: Vec = + Vec::with_capacity(size_of::() * elements.len()); + for element in elements { + bytes.extend_from_slice(&element.to_ne_bytes()); + } + Ok(ArrayBytes::Fixed(Cow::Owned(bytes))) + } +} + +impl ElementOwned for CustomDataTypeFixedSizeElement { + fn from_array_bytes( + data_type: &DataType, + bytes: ArrayBytes<'_>, + ) -> Result, ArrayError> { + Self::validate_data_type(data_type)?; + let bytes = bytes.into_fixed()?; + let bytes_len = bytes.len(); + let mut elements = + Vec::with_capacity(bytes_len / size_of::()); + for bytes in bytes.chunks_exact(size_of::()) { + elements.push(CustomDataTypeFixedSizeElement::from_ne_bytes(unsafe { + bytes.try_into().unwrap_unchecked() + })) + } + Ok(elements) + } +} + +/// The data type for an array of [`CustomDataTypeFixedSizeElement`]. +#[derive(Debug)] +struct CustomDataTypeFixedSize; + +const CUSTOM_NAME: &'static str = "zarrs.test.CustomDataTypeFixedSize"; + +fn is_custom_dtype(name: &str) -> bool { + name == CUSTOM_NAME +} + +fn create_custom_dtype(metadata: &MetadataV3) -> Result { + if metadata.configuration_is_none_or_empty() { + Ok(DataType::Extension(Arc::new(CustomDataTypeFixedSize))) + } else { + Err(PluginMetadataInvalidError::new(CUSTOM_NAME, "codec", metadata.clone()).into()) + } +} + +inventory::submit! { + DataTypePlugin::new(CUSTOM_NAME, is_custom_dtype, create_custom_dtype) +} + +impl DataTypeExtension for CustomDataTypeFixedSize { + fn name(&self) -> String { + CUSTOM_NAME.to_string() + } + + fn configuration(&self) -> MetadataConfiguration { + MetadataConfiguration::default() + } + + fn fill_value( + &self, + fill_value_metadata: &FillValueMetadataV3, + ) -> Result { + let custom_fill_value = match fill_value_metadata { + FillValueMetadataV3::Unsupported(value) => serde_json::from_value::< + CustomDataTypeFixedSizeMetadata, + >(value.clone()) + .map_err(|_| { + IncompatibleFillValueMetadataError::new(self.name(), fill_value_metadata.clone()) + })?, + _ => Err(IncompatibleFillValueMetadataError::new( + self.name(), + fill_value_metadata.clone(), + ))?, + }; + Ok(FillValue::new(custom_fill_value.to_ne_bytes().to_vec())) + } + + fn metadata_fill_value( + &self, + fill_value: &FillValue, + ) -> Result { + let fill_value_metadata = CustomDataTypeFixedSizeMetadata::from_ne_bytes( + fill_value + .as_ne_bytes() + .try_into() + .map_err(|_| IncompatibleFillValueError::new(self.name(), fill_value.clone()))?, + ); + Ok(FillValueMetadataV3::Unsupported( + serde_json::to_value(fill_value_metadata).unwrap(), + )) + } + + fn size(&self) -> zarrs::array::DataTypeSize { + DataTypeSize::Fixed(size_of::()) + } + + fn encode_bytes<'a>( + &self, + bytes: std::borrow::Cow<'a, [u8]>, + endianness: Option, + ) -> Result, DataTypeExtensionError> { + if let Some(endianness) = endianness { + if endianness != Endianness::native() { + let mut bytes = bytes.into_owned(); + for bytes in bytes.chunks_exact_mut(size_of::()) { + let value = CustomDataTypeFixedSizeElement::from_ne_bytes(&unsafe { + bytes.try_into().unwrap_unchecked() + }); + if endianness == Endianness::Little { + bytes.copy_from_slice(&value.to_le_bytes()); + } else { + bytes.copy_from_slice(&value.to_be_bytes()); + } + } + Ok(Cow::Owned(bytes)) + } else { + Ok(bytes) + } + } else { + Err(DataTypeExtensionError::EndiannessNotSpecified) + } + } + + fn decode_bytes<'a>( + &self, + bytes: std::borrow::Cow<'a, [u8]>, + endianness: Option, + ) -> Result, DataTypeExtensionError> { + if let Some(endianness) = endianness { + if endianness != Endianness::native() { + let mut bytes = bytes.into_owned(); + for bytes in bytes.chunks_exact_mut(size_of::() + size_of::()) { + let value = if endianness == Endianness::Little { + CustomDataTypeFixedSizeElement::from_le_bytes(&unsafe { + bytes.try_into().unwrap_unchecked() + }) + } else { + CustomDataTypeFixedSizeElement::from_be_bytes(&unsafe { + bytes.try_into().unwrap_unchecked() + }) + }; + bytes.copy_from_slice(&value.to_ne_bytes()); + } + Ok(Cow::Owned(bytes)) + } else { + Ok(bytes) + } + } else { + Err(DataTypeExtensionError::EndiannessNotSpecified) + } + } +} + +fn main() { + let store = std::sync::Arc::new(MemoryStore::default()); + let array_path = "/array"; + let fill_value = CustomDataTypeFixedSizeElement { x: 1, y: 2.3 }; + let array = ArrayBuilder::new( + vec![4, 1], // array shape + DataType::Extension(Arc::new(CustomDataTypeFixedSize)), + vec![2, 1].try_into().unwrap(), // regular chunk shape + FillValue::new(fill_value.to_ne_bytes().to_vec()), + ) + .array_to_array_codecs(vec![ + #[cfg(feature = "transpose")] + Arc::new(zarrs::array::codec::TransposeCodec::new( + zarrs::array::codec::array_to_array::transpose::TransposeOrder::new(&[1, 0]).unwrap(), + )), + ]) + .bytes_to_bytes_codecs(vec![ + #[cfg(feature = "gzip")] + Arc::new(zarrs::array::codec::GzipCodec::new(5).unwrap()), + #[cfg(feature = "crc32c")] + Arc::new(zarrs::array::codec::Crc32cCodec::new()), + ]) + // .storage_transformers(vec![].into()) + .build(store, array_path) + .unwrap(); + println!( + "{}", + serde_json::to_string_pretty(array.metadata()).unwrap() + ); + + let data = [ + CustomDataTypeFixedSizeElement { x: 3, y: 4.5 }, + CustomDataTypeFixedSizeElement { x: 6, y: 7.8 }, + ]; + array.store_chunk_elements(&[0, 0], &data).unwrap(); + + let data = array + .retrieve_array_subset_elements::(&array.subset_all()) + .unwrap(); + + assert_eq!(data[0], CustomDataTypeFixedSizeElement { x: 3, y: 4.5 }); + assert_eq!(data[1], CustomDataTypeFixedSizeElement { x: 6, y: 7.8 }); + assert_eq!(data[2], CustomDataTypeFixedSizeElement { x: 1, y: 2.3 }); + assert_eq!(data[3], CustomDataTypeFixedSizeElement { x: 1, y: 2.3 }); + + println!("{data:#?}"); +} + +#[test] +fn custom_data_type_fixed_size() { + main() +} diff --git a/zarrs/examples/custom_data_type_variable_size.rs b/zarrs/examples/custom_data_type_variable_size.rs new file mode 100644 index 00000000..003e97d6 --- /dev/null +++ b/zarrs/examples/custom_data_type_variable_size.rs @@ -0,0 +1,234 @@ +#![allow(missing_docs)] + +use std::{borrow::Cow, sync::Arc}; + +use derive_more::Deref; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use zarrs::array::{ + ArrayBuilder, ArrayBytes, ArrayError, DataTypeSize, Element, ElementOwned, FillValueMetadataV3, + RawBytesOffsets, +}; +use zarrs_data_type::{ + DataType, DataTypeExtension, DataTypeExtensionError, DataTypePlugin, FillValue, + IncompatibleFillValueError, IncompatibleFillValueMetadataError, +}; +use zarrs_metadata::{ + v3::{array::fill_value::FillValueFloat, MetadataConfiguration, MetadataV3}, + Endianness, +}; +use zarrs_plugin::{PluginCreateError, PluginMetadataInvalidError}; +use zarrs_storage::store::MemoryStore; + +#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize, Deref)] +struct CustomDataTypeVariableSizeElement(Option); + +impl From> for CustomDataTypeVariableSizeElement { + fn from(value: Option) -> Self { + Self(value) + } +} + +impl Element for CustomDataTypeVariableSizeElement { + fn validate_data_type(data_type: &DataType) -> Result<(), ArrayError> { + (data_type == &DataType::Extension(Arc::new(CustomDataTypeVariableSize))) + .then_some(()) + .ok_or(ArrayError::IncompatibleElementType) + } + + fn into_array_bytes<'a>( + data_type: &DataType, + elements: &'a [Self], + ) -> Result, ArrayError> { + Self::validate_data_type(data_type)?; + let mut bytes = Vec::new(); + let mut offsets = Vec::with_capacity(elements.len() + 1); + + for element in elements { + offsets.push(bytes.len()); + if let Some(value) = element.0 { + bytes.extend_from_slice(&value.to_le_bytes()); + } + } + offsets.push(bytes.len()); + let offsets = unsafe { + // SAFETY: Constructed correctly above + RawBytesOffsets::new_unchecked(offsets) + }; + Ok(ArrayBytes::Variable(Cow::Owned(bytes), offsets)) + } +} + +impl ElementOwned for CustomDataTypeVariableSizeElement { + fn from_array_bytes( + data_type: &DataType, + bytes: ArrayBytes<'_>, + ) -> Result, ArrayError> { + Self::validate_data_type(data_type)?; + let (bytes, offsets) = bytes.into_variable()?; + + let mut elements = Vec::with_capacity(offsets.len().saturating_sub(1)); + for (curr, next) in offsets.iter().tuple_windows() { + let bytes = &bytes[*curr..*next]; + if let Ok(bytes) = <[u8; 4]>::try_from(bytes) { + let value = f32::from_le_bytes(bytes); + elements.push(CustomDataTypeVariableSizeElement(Some(value))); + } else if bytes.len() == 0 { + elements.push(CustomDataTypeVariableSizeElement(None)); + } else { + panic!() + } + } + + Ok(elements) + } +} + +/// The data type for an array of [`CustomDataTypeVariableSizeElement`]. +#[derive(Debug)] +struct CustomDataTypeVariableSize; + +const CUSTOM_NAME: &'static str = "zarrs.test.CustomDataTypeVariableSize"; + +fn is_custom_dtype(name: &str) -> bool { + name == CUSTOM_NAME +} + +fn create_custom_dtype(metadata: &MetadataV3) -> Result { + if metadata.configuration_is_none_or_empty() { + Ok(DataType::Extension(Arc::new(CustomDataTypeVariableSize))) + } else { + Err(PluginMetadataInvalidError::new(CUSTOM_NAME, "codec", metadata.clone()).into()) + } +} + +inventory::submit! { + DataTypePlugin::new(CUSTOM_NAME, is_custom_dtype, create_custom_dtype) +} + +impl DataTypeExtension for CustomDataTypeVariableSize { + fn name(&self) -> String { + CUSTOM_NAME.to_string() + } + + fn configuration(&self) -> MetadataConfiguration { + MetadataConfiguration::default() + } + + fn fill_value( + &self, + fill_value_metadata: &FillValueMetadataV3, + ) -> Result { + let fill_value = match fill_value_metadata { + FillValueMetadataV3::Float(f) => Ok(f + .to_float::() + .ok_or_else(|| { + IncompatibleFillValueMetadataError::new( + self.name(), + fill_value_metadata.clone(), + ) + })? + .to_ne_bytes() + .to_vec()), + FillValueMetadataV3::Unsupported(serde_json::Value::Null) => Ok(vec![]), + _ => Err(IncompatibleFillValueMetadataError::new( + self.name(), + fill_value_metadata.clone(), + )), + }?; + Ok(FillValue::new(fill_value)) + } + + fn metadata_fill_value( + &self, + fill_value: &FillValue, + ) -> Result { + let fill_value = fill_value.as_ne_bytes(); + if fill_value.len() == 0 { + Ok(FillValueMetadataV3::Unsupported(serde_json::Value::Null)) + } else if fill_value.len() == 4 { + let value = f32::from_ne_bytes(fill_value.try_into().unwrap()); + Ok(FillValueMetadataV3::Float(FillValueFloat::Float( + value as f64, + ))) + } else { + Err(IncompatibleFillValueError::new( + self.name(), + fill_value.into(), + )) + } + } + + fn size(&self) -> zarrs::array::DataTypeSize { + DataTypeSize::Variable + } + + fn encode_bytes<'a>( + &self, + _bytes: Cow<'a, [u8]>, + _endianness: Option, + ) -> Result, DataTypeExtensionError> { + Err(DataTypeExtensionError::BytesCodecUnsupported) + } + + fn decode_bytes<'a>( + &self, + _bytes: Cow<'a, [u8]>, + _endianness: Option, + ) -> Result, DataTypeExtensionError> { + Err(DataTypeExtensionError::BytesCodecUnsupported) + } +} + +fn main() { + let store = std::sync::Arc::new(MemoryStore::default()); + let array_path = "/array"; + let array = ArrayBuilder::new( + vec![4, 1], // array shape + DataType::Extension(Arc::new(CustomDataTypeVariableSize)), + vec![3, 1].try_into().unwrap(), // regular chunk shape + FillValue::from(vec![]), + ) + .array_to_array_codecs(vec![ + #[cfg(feature = "transpose")] + Arc::new(zarrs::array::codec::TransposeCodec::new( + zarrs::array::codec::array_to_array::transpose::TransposeOrder::new(&[1, 0]).unwrap(), + )), + ]) + .bytes_to_bytes_codecs(vec![ + #[cfg(feature = "gzip")] + Arc::new(zarrs::array::codec::GzipCodec::new(5).unwrap()), + #[cfg(feature = "crc32c")] + Arc::new(zarrs::array::codec::Crc32cCodec::new()), + ]) + // .storage_transformers(vec![].into()) + .build(store, array_path) + .unwrap(); + println!( + "{}", + serde_json::to_string_pretty(array.metadata()).unwrap() + ); + + let data = [ + CustomDataTypeVariableSizeElement::from(Some(1.0)), + CustomDataTypeVariableSizeElement::from(None), + CustomDataTypeVariableSizeElement::from(Some(3.0)), + ]; + array.store_chunk_elements(&[0, 0], &data).unwrap(); + + let data = array + .retrieve_array_subset_elements::(&array.subset_all()) + .unwrap(); + + assert_eq!(data[0], CustomDataTypeVariableSizeElement::from(Some(1.0))); + assert_eq!(data[1], CustomDataTypeVariableSizeElement::from(None)); + assert_eq!(data[2], CustomDataTypeVariableSizeElement::from(Some(3.0))); + assert_eq!(data[3], CustomDataTypeVariableSizeElement::from(None)); + + println!("{data:#?}"); +} + +#[test] +fn custom_data_type_variable_size() { + main() +} diff --git a/zarrs/src/array/array_builder.rs b/zarrs/src/array/array_builder.rs index 27452555..1d184a98 100644 --- a/zarrs/src/array/array_builder.rs +++ b/zarrs/src/array/array_builder.rs @@ -1,7 +1,6 @@ use std::sync::Arc; use crate::{ - data_type::IncompatibleFillValueError, metadata::{v3::AdditionalFields, ChunkKeySeparator}, node::NodePath, }; @@ -297,16 +296,6 @@ impl ArrayBuilder { } } - if let Some(data_type_size) = self.data_type.fixed_size() { - if data_type_size != self.fill_value.size() { - return Err(IncompatibleFillValueError::new( - self.data_type.name(), - self.fill_value.clone(), - ) - .into()); - } - } - let codec_chain = CodecChain::new( self.array_to_array_codecs.clone(), self.array_to_bytes_codec.clone(), @@ -318,7 +307,7 @@ impl ArrayBuilder { self.shape.clone(), self.chunk_grid.create_metadata(), self.data_type.metadata(), - self.data_type.metadata_fill_value(&self.fill_value), + self.data_type.metadata_fill_value(&self.fill_value)?, codec_chain.create_metadatas(), ) .with_attributes(self.attributes.clone()) diff --git a/zarrs/src/array/array_errors.rs b/zarrs/src/array/array_errors.rs index 272aff53..42a07701 100644 --- a/zarrs/src/array/array_errors.rs +++ b/zarrs/src/array/array_errors.rs @@ -2,9 +2,7 @@ use thiserror::Error; use crate::{ array_subset::{ArraySubset, IncompatibleDimensionalityError}, - data_type::{ - IncompatibleFillValueError, IncompatibleFillValueMetadataError, UnsupportedDataTypeError, - }, + data_type::{IncompatibleFillValueError, IncompatibleFillValueMetadataError}, metadata::v3::UnsupportedAdditionalFieldError, node::NodePathError, plugin::PluginCreateError, @@ -24,7 +22,7 @@ pub enum ArrayCreateError { UnsupportedAdditionalFieldError(#[from] UnsupportedAdditionalFieldError), /// Unsupported data type. #[error(transparent)] - DataTypeCreateError(UnsupportedDataTypeError), + DataTypeCreateError(PluginCreateError), /// Invalid fill value. #[error(transparent)] InvalidFillValue(#[from] IncompatibleFillValueError), diff --git a/zarrs/src/array/codec.rs b/zarrs/src/array/codec.rs index 11e47e58..e25fcfa5 100644 --- a/zarrs/src/array/codec.rs +++ b/zarrs/src/array/codec.rs @@ -79,6 +79,7 @@ pub use array_to_array_partial_encoder_default::ArrayToArrayPartialEncoderDefaul mod bytes_partial_encoder_default; pub use bytes_partial_encoder_default::BytesPartialEncoderDefault; +use zarrs_data_type::DataTypeExtensionError; use zarrs_metadata::ArrayShape; use zarrs_plugin::PluginUnsupportedError; @@ -1082,6 +1083,9 @@ pub enum CodecError { /// Variable length array bytes offsets are out of bounds. #[error(transparent)] RawBytesOffsetsOutOfBounds(#[from] RawBytesOffsetsOutOfBoundsError), + /// A data type extension error. + #[error(transparent)] + DataTypeExtension(#[from] DataTypeExtensionError), } impl From<&str> for CodecError { diff --git a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs index 43685945..55753c97 100644 --- a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs +++ b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs @@ -2,6 +2,8 @@ use std::sync::Arc; +use zarrs_data_type::DataType; + use crate::{ array::{ codec::{ @@ -156,7 +158,11 @@ impl ArrayToBytesCodecTraits for BytesCodec { decoded_representation.data_type().size(), )?; let bytes = bytes.into_fixed()?; - self.do_encode_or_decode(bytes, decoded_representation) + let bytes_encoded = match decoded_representation.data_type() { + DataType::Extension(ext) => ext.encode_bytes(bytes, self.endian)?, + _ => self.do_encode_or_decode(bytes, decoded_representation)?, + }; + Ok(bytes_encoded) } fn decode<'a>( @@ -165,9 +171,11 @@ impl ArrayToBytesCodecTraits for BytesCodec { decoded_representation: &ChunkRepresentation, _options: &CodecOptions, ) -> Result, CodecError> { - Ok(ArrayBytes::from( - self.do_encode_or_decode(bytes, decoded_representation)?, - )) + let bytes_decoded = match decoded_representation.data_type() { + DataType::Extension(ext) => ext.decode_bytes(bytes, self.endian)?, + _ => self.do_encode_or_decode(bytes, decoded_representation)?, + }; + Ok(ArrayBytes::from(bytes_decoded)) } fn partial_decoder( diff --git a/zarrs_data_type/CHANGELOG.md b/zarrs_data_type/CHANGELOG.md index b84fcc12..b9f61623 100644 --- a/zarrs_data_type/CHANGELOG.md +++ b/zarrs_data_type/CHANGELOG.md @@ -7,8 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Add support for data type extensions + - Adds `DataTypeExtension`, `DataTypeExtensionError`, and `DataTypePlugin` + - Add `Extension` variant to `DataType` + ### Changed - Bump `derive_more` to 0.2.0 +- **Breaking**: `DataType::metadata_fill_value` is now fallible +- **Breaking**: `DataType::{identifier,size,fixed_size}()` are no longer `const` +- **Breaking**: `DataType::from_metadata()` now returns a `PluginCreateError` +- **Breaking**: `DataType::metadata_fill_value()` is now fallible + +### Removed +- **Breaking**: Remove `UnsupportedDataTypeError` +- **Breaking**: Remove `DataType.identifier()` ## [0.1.0] - 2025-01-24 diff --git a/zarrs_data_type/Cargo.toml b/zarrs_data_type/Cargo.toml index 01844a5e..41697ad7 100644 --- a/zarrs_data_type/Cargo.toml +++ b/zarrs_data_type/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "zarrs_data_type" -version = "0.1.0" +version = "0.2.0" authors = ["Lachlan Deakin "] edition = "2021" rust-version = "1.77" @@ -22,8 +22,10 @@ all-features = true half = { workspace = true } num = { workspace = true } thiserror = "2.0.0" -derive_more = { version = "2.0.0", features = ["display", "from"] } +derive_more = { version = "2.0.0", features = ["deref", "display", "from"] } zarrs_metadata = { workspace = true } +zarrs_plugin = { workspace = true } +inventory = { workspace = true } [dev-dependencies] serde_json = { version = "1.0.71", features = ["float_roundtrip", "preserve_order"] } diff --git a/zarrs_data_type/src/data_type.rs b/zarrs_data_type/src/data_type.rs index 1d0d64da..6c0240a6 100644 --- a/zarrs_data_type/src/data_type.rs +++ b/zarrs_data_type/src/data_type.rs @@ -2,22 +2,29 @@ //! //! See . -use derive_more::From; +use std::{fmt::Debug, mem::discriminant, sync::Arc}; + use half::{bf16, f16}; use thiserror::Error; -use zarrs_metadata::v3::array::{ - data_type::{DataTypeMetadataV3, DataTypeSize}, - fill_value::{ - bfloat16_to_fill_value, float16_to_fill_value, float32_to_fill_value, - float64_to_fill_value, FillValueFloat, FillValueMetadataV3, +use zarrs_metadata::v3::{ + array::{ + data_type::{DataTypeMetadataV3, DataTypeSize}, + fill_value::{ + bfloat16_to_fill_value, float16_to_fill_value, float32_to_fill_value, + float64_to_fill_value, FillValueFloat, FillValueMetadataV3, + }, }, + MetadataConfiguration, MetadataV3, }; +use zarrs_plugin::{PluginCreateError, PluginUnsupportedError}; + +use crate::{DataTypeExtension, DataTypePlugin}; use super::FillValue; /// A data type. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug)] #[non_exhaustive] #[rustfmt::skip] pub enum DataType { @@ -61,18 +68,37 @@ pub enum DataType { /// /// This data type is not standardised in the Zarr V3 specification. Bytes, + /// An extension data type. + Extension(Arc) } -/// An unsupported data type error. -#[derive(Debug, Error, From)] -#[error("unsupported data type {_0}")] -pub struct UnsupportedDataTypeError(String); +impl PartialEq for DataType { + fn eq(&self, other: &Self) -> bool { + match (&self, other) { + (DataType::RawBits(a), DataType::RawBits(b)) => a == b, + (DataType::Extension(a), DataType::Extension(b)) => { + a.name() == b.name() && a.configuration() == b.configuration() + } + _ => discriminant(self) == discriminant(other), + } + } +} + +impl Eq for DataType {} /// A fill value metadata incompatibility error. #[derive(Debug, Error)] #[error("incompatible fill value {1} for data type {0}")] pub struct IncompatibleFillValueMetadataError(String, FillValueMetadataV3); +impl IncompatibleFillValueMetadataError { + /// Create a new [`IncompatibleFillValueMetadataError`]. + #[must_use] + pub fn new(data_type: String, fill_value_metadata: FillValueMetadataV3) -> Self { + Self(data_type, fill_value_metadata) + } +} + /// A fill value incompatibility error. #[derive(Debug, Error)] #[error("incompatible fill value {1} for data type {0}")] @@ -87,39 +113,29 @@ impl IncompatibleFillValueError { } impl DataType { - /// Returns the identifier. - #[must_use] - pub const fn identifier(&self) -> &'static str { - match self { - Self::Bool => "bool", - Self::Int8 => "int8", - Self::Int16 => "int16", - Self::Int32 => "int32", - Self::Int64 => "int64", - Self::UInt8 => "uint8", - Self::UInt16 => "uint16", - Self::UInt32 => "uint32", - Self::UInt64 => "uint64", - Self::Float16 => "float16", - Self::Float32 => "float32", - Self::Float64 => "float64", - Self::BFloat16 => "bfloat16", - Self::Complex64 => "complex64", - Self::Complex128 => "complex128", - Self::RawBits(_usize) => "r*", - Self::String => "string", - Self::Bytes => "bytes", - // Self::Extension(extension) => extension.identifier(), - } - } - /// Returns the name. #[must_use] pub fn name(&self) -> String { match self { + Self::Bool => "bool".to_string(), + Self::Int8 => "int8".to_string(), + Self::Int16 => "int16".to_string(), + Self::Int32 => "int32".to_string(), + Self::Int64 => "int64".to_string(), + Self::UInt8 => "uint8".to_string(), + Self::UInt16 => "uint16".to_string(), + Self::UInt32 => "uint32".to_string(), + Self::UInt64 => "uint64".to_string(), + Self::Float16 => "float16".to_string(), + Self::Float32 => "float32".to_string(), + Self::Float64 => "float64".to_string(), + Self::BFloat16 => "bfloat16".to_string(), + Self::Complex64 => "complex64".to_string(), + Self::Complex128 => "complex128".to_string(), Self::RawBits(size) => format!("r{}", size * 8), - // Self::Extension(extension) => extension.name(), - _ => self.identifier().to_string(), + Self::String => "string".to_string(), + Self::Bytes => "bytes".to_string(), + Self::Extension(extension) => extension.name(), } } @@ -145,12 +161,15 @@ impl DataType { Self::RawBits(size) => DataTypeMetadataV3::RawBits(*size), Self::String => DataTypeMetadataV3::String, Self::Bytes => DataTypeMetadataV3::Bytes, + Self::Extension(ext) => DataTypeMetadataV3::from_metadata( + &MetadataV3::new_with_configuration(&ext.name(), ext.configuration().clone()), + ), } } /// Returns the [`DataTypeSize`]. #[must_use] - pub const fn size(&self) -> DataTypeSize { + pub fn size(&self) -> DataTypeSize { match self { Self::Bool | Self::Int8 | Self::UInt8 => DataTypeSize::Fixed(1), Self::Int16 | Self::UInt16 | Self::Float16 | Self::BFloat16 => DataTypeSize::Fixed(2), @@ -159,13 +178,13 @@ impl DataType { Self::Complex128 => DataTypeSize::Fixed(16), Self::RawBits(size) => DataTypeSize::Fixed(*size), Self::String | Self::Bytes => DataTypeSize::Variable, - // Self::Extension(extension) => extension.size(), + Self::Extension(extension) => extension.size(), } } /// Returns the size in bytes of a fixed-size data type, otherwise returns [`None`]. #[must_use] - pub const fn fixed_size(&self) -> Option { + pub fn fixed_size(&self) -> Option { match self.size() { DataTypeSize::Fixed(size) => Some(size), DataTypeSize::Variable => None, @@ -176,8 +195,8 @@ impl DataType { /// /// # Errors /// - /// Returns [`UnsupportedDataTypeError`] if the metadata is invalid or not associated with a registered data type plugin. - pub fn from_metadata(metadata: &DataTypeMetadataV3) -> Result { + /// Returns [`PluginCreateError`] if the metadata is invalid or not associated with a registered data type plugin. + pub fn from_metadata(metadata: &DataTypeMetadataV3) -> Result { match metadata { DataTypeMetadataV3::Bool => Ok(Self::Bool), DataTypeMetadataV3::Int8 => Ok(Self::Int8), @@ -198,9 +217,24 @@ impl DataType { DataTypeMetadataV3::String => Ok(Self::String), DataTypeMetadataV3::Bytes => Ok(Self::Bytes), DataTypeMetadataV3::Unknown(metadata) => { - Err(UnsupportedDataTypeError(metadata.to_string())) + for plugin in inventory::iter:: { + if plugin.match_name(metadata.name()) { + return plugin.create(metadata); + } + } + Err(PluginUnsupportedError::new( + metadata.name().to_string(), + metadata.configuration().cloned(), + "data type".to_string(), + ) + .into()) } - _ => Err(UnsupportedDataTypeError(metadata.to_string())), + _ => Err(PluginUnsupportedError::new( + metadata.name(), + Some(MetadataConfiguration::default()), + "data type".to_string(), + ) + .into()), } } @@ -265,81 +299,139 @@ impl DataType { FillValueMetadataV3::ByteArray(bytes) => Ok(FillValue::new(bytes.clone())), _ => Err(err()), }, + Self::Extension(ext) => ext.fill_value(fill_value), } } /// Create fill value metadata. /// - /// # Panics + /// # Errors /// - /// Panics if the metadata cannot be created from the fill value. - /// This would indicate an implementation error with a data type. - #[must_use] - pub fn metadata_fill_value(&self, fill_value: &FillValue) -> FillValueMetadataV3 { - let bytes = fill_value.as_ne_bytes(); + /// Returns an [`IncompatibleFillValueError`] if the metadata cannot be created from the fill value. + #[allow(clippy::too_many_lines)] + pub fn metadata_fill_value( + &self, + fill_value: &FillValue, + ) -> Result { + let error = || IncompatibleFillValueError::new(self.name(), fill_value.clone()); match self { - Self::Bool => FillValueMetadataV3::Bool(bytes[0] != 0), + Self::Bool => { + let bytes: [u8; 1] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + match bytes[0] { + 0 => Ok(FillValueMetadataV3::Bool(false)), + 1 => Ok(FillValueMetadataV3::Bool(true)), + _ => Err(error()), + } + } Self::Int8 => { - FillValueMetadataV3::Int(i64::from(i8::from_ne_bytes(bytes.try_into().unwrap()))) + let bytes: [u8; 1] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::Int(i64::from(i8::from_ne_bytes( + bytes, + )))) } Self::Int16 => { - FillValueMetadataV3::Int(i64::from(i16::from_ne_bytes(bytes.try_into().unwrap()))) + let bytes: [u8; 2] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::Int(i64::from(i16::from_ne_bytes( + bytes, + )))) } Self::Int32 => { - FillValueMetadataV3::Int(i64::from(i32::from_ne_bytes(bytes.try_into().unwrap()))) + let bytes: [u8; 4] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::Int(i64::from(i32::from_ne_bytes( + bytes, + )))) + } + Self::Int64 => { + let bytes: [u8; 8] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::Int(i64::from_ne_bytes(bytes))) } - Self::Int64 => FillValueMetadataV3::Int(i64::from_ne_bytes(bytes.try_into().unwrap())), Self::UInt8 => { - FillValueMetadataV3::UInt(u64::from(u8::from_ne_bytes(bytes.try_into().unwrap()))) + let bytes: [u8; 1] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::UInt(u64::from(u8::from_ne_bytes( + bytes, + )))) } Self::UInt16 => { - FillValueMetadataV3::UInt(u64::from(u16::from_ne_bytes(bytes.try_into().unwrap()))) + let bytes: [u8; 2] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::UInt(u64::from(u16::from_ne_bytes( + bytes, + )))) } Self::UInt32 => { - FillValueMetadataV3::UInt(u64::from(u32::from_ne_bytes(bytes.try_into().unwrap()))) + let bytes: [u8; 4] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::UInt(u64::from(u32::from_ne_bytes( + bytes, + )))) } Self::UInt64 => { - FillValueMetadataV3::UInt(u64::from_ne_bytes(bytes.try_into().unwrap())) + let bytes: [u8; 8] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::UInt(u64::from_ne_bytes(bytes))) } Self::Float16 => { - let fill_value = f16::from_ne_bytes(fill_value.as_ne_bytes().try_into().unwrap()); - FillValueMetadataV3::Float(float16_to_fill_value(fill_value)) + let bytes: [u8; 2] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + let fill_value = f16::from_ne_bytes(bytes); + Ok(FillValueMetadataV3::Float(float16_to_fill_value( + fill_value, + ))) + } + Self::Float32 => { + let bytes: [u8; 4] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::Float(float32_to_fill_value( + f32::from_ne_bytes(bytes), + ))) + } + Self::Float64 => { + let bytes: [u8; 8] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + Ok(FillValueMetadataV3::Float(float64_to_fill_value( + f64::from_ne_bytes(bytes), + ))) } - Self::Float32 => FillValueMetadataV3::Float(float32_to_fill_value(f32::from_ne_bytes( - bytes.try_into().unwrap(), - ))), - Self::Float64 => FillValueMetadataV3::Float(float64_to_fill_value(f64::from_ne_bytes( - bytes.try_into().unwrap(), - ))), Self::BFloat16 => { - let fill_value = bf16::from_ne_bytes(fill_value.as_ne_bytes().try_into().unwrap()); - FillValueMetadataV3::Float(bfloat16_to_fill_value(fill_value)) + let bytes: [u8; 2] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + let fill_value = bf16::from_ne_bytes(bytes); + Ok(FillValueMetadataV3::Float(bfloat16_to_fill_value( + fill_value, + ))) } Self::Complex64 => { - let re = f32::from_ne_bytes(bytes[0..4].try_into().unwrap()); - let im = f32::from_ne_bytes(bytes[4..8].try_into().unwrap()); - FillValueMetadataV3::Complex(float32_to_fill_value(re), float32_to_fill_value(im)) + let bytes: &[u8; 8] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + let re = f32::from_ne_bytes(unsafe { bytes[0..4].try_into().unwrap_unchecked() }); + let im = f32::from_ne_bytes(unsafe { bytes[4..8].try_into().unwrap_unchecked() }); + Ok(FillValueMetadataV3::Complex( + float32_to_fill_value(re), + float32_to_fill_value(im), + )) } Self::Complex128 => { - let re = f64::from_ne_bytes(bytes[0..8].try_into().unwrap()); - let im = f64::from_ne_bytes(bytes[8..16].try_into().unwrap()); - FillValueMetadataV3::Complex(float64_to_fill_value(re), float64_to_fill_value(im)) + let bytes: &[u8; 16] = fill_value.as_ne_bytes().try_into().map_err(|_| error())?; + let re = f64::from_ne_bytes(unsafe { bytes[0..8].try_into().unwrap_unchecked() }); + let im = f64::from_ne_bytes(unsafe { bytes[8..16].try_into().unwrap_unchecked() }); + Ok(FillValueMetadataV3::Complex( + float64_to_fill_value(re), + float64_to_fill_value(im), + )) } Self::RawBits(size) => { - debug_assert_eq!(fill_value.as_ne_bytes().len(), *size); - FillValueMetadataV3::ByteArray(fill_value.as_ne_bytes().to_vec()) + let bytes = fill_value.as_ne_bytes(); + if bytes.len() == *size { + Ok(FillValueMetadataV3::ByteArray(bytes.to_vec())) + } else { + Err(error()) + } } - // DataType::Extension(extension) => extension.metadata_fill_value(fill_value), - Self::String => FillValueMetadataV3::String( - String::from_utf8(fill_value.as_ne_bytes().to_vec()).unwrap(), - ), - Self::Bytes => FillValueMetadataV3::ByteArray(fill_value.as_ne_bytes().to_vec()), + Self::String => Ok(FillValueMetadataV3::String( + String::from_utf8(fill_value.as_ne_bytes().to_vec()).map_err(|_| error())?, + )), + Self::Bytes => Ok(FillValueMetadataV3::ByteArray( + fill_value.as_ne_bytes().to_vec(), + )), + Self::Extension(extension) => extension.metadata_fill_value(fill_value), } } } impl TryFrom for DataType { - type Error = UnsupportedDataTypeError; + type Error = PluginCreateError; fn try_from(metadata: DataTypeMetadataV3) -> Result { Self::from_metadata(&metadata) @@ -367,7 +459,7 @@ mod tests { let metadata: DataTypeMetadataV3 = serde_json::from_str(json).unwrap(); assert_eq!( DataType::from_metadata(&metadata).unwrap_err().to_string(), - "unsupported data type unknown" + "data type unknown is not supported" ); assert!(DataType::try_from(metadata).is_err()); } @@ -384,7 +476,10 @@ mod tests { let metadata = serde_json::from_str::("true").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), u8::from(true).to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); let fillvalue = data_type .fill_value_from_metadata( @@ -405,7 +500,10 @@ mod tests { let metadata = serde_json::from_str::("-7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), (-7i8).to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -429,7 +527,10 @@ mod tests { let metadata = serde_json::from_str::("-7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), (-7i16).to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -453,7 +554,10 @@ mod tests { let metadata = serde_json::from_str::("-7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), (-7i32).to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -477,7 +581,10 @@ mod tests { let metadata = serde_json::from_str::("-7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), (-7i64).to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -501,7 +608,10 @@ mod tests { let metadata = serde_json::from_str::("7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), 7u8.to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -515,7 +625,10 @@ mod tests { let metadata = serde_json::from_str::("7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), 7u16.to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -529,7 +642,10 @@ mod tests { let metadata = serde_json::from_str::("7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), 7u32.to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -543,7 +659,10 @@ mod tests { let metadata = serde_json::from_str::("7").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), 7u64.to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -557,7 +676,10 @@ mod tests { let metadata = serde_json::from_str::("-7.0").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), (-7.0f32).to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -611,7 +733,10 @@ mod tests { let metadata = serde_json::from_str::("-7.0").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), (-7.0f64).to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -663,7 +788,7 @@ mod tests { let metadata: DataTypeMetadataV3 = serde_json::from_str(json).unwrap(); let data_type = DataType::from_metadata(&metadata).unwrap(); assert_eq!(json, serde_json::to_string(&data_type.metadata()).unwrap()); - assert_eq!(data_type.identifier(), "float16"); + assert_eq!(data_type.name(), "float16"); let metadata = serde_json::from_str::("-7.0").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); @@ -671,7 +796,10 @@ mod tests { fill_value.as_ne_bytes(), f16::from_f32_const(-7.0).to_ne_bytes() ); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -712,7 +840,7 @@ mod tests { let metadata: DataTypeMetadataV3 = serde_json::from_str(json).unwrap(); let data_type = DataType::from_metadata(&metadata).unwrap(); assert_eq!(json, serde_json::to_string(&data_type.metadata()).unwrap()); - assert_eq!(data_type.identifier(), "bfloat16"); + assert_eq!(data_type.name(), "bfloat16"); let metadata = serde_json::from_str::("-7.0").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); @@ -720,7 +848,10 @@ mod tests { fill_value.as_ne_bytes(), bf16::from_f32_const(-7.0).to_ne_bytes() ); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); assert_eq!( data_type @@ -784,7 +915,10 @@ mod tests { .copied() .collect::>() ); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -807,7 +941,10 @@ mod tests { .copied() .collect::>() ); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -816,14 +953,16 @@ mod tests { let metadata: DataTypeMetadataV3 = serde_json::from_str(json).unwrap(); let data_type = DataType::from_metadata(&metadata).unwrap(); assert_eq!(json, serde_json::to_string(&data_type.metadata()).unwrap()); - assert_eq!(data_type.identifier(), "r*"); - assert_eq!(data_type.name().as_str(), "r8"); + assert_eq!(data_type.name(), "r8"); assert_eq!(data_type.size(), DataTypeSize::Fixed(1)); let metadata = serde_json::from_str::("[7]").unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), 7u8.to_ne_bytes()); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -832,8 +971,7 @@ mod tests { let metadata: DataTypeMetadataV3 = serde_json::from_str(json).unwrap(); let data_type = DataType::from_metadata(&metadata).unwrap(); assert_eq!(json, serde_json::to_string(&data_type.metadata()).unwrap()); - assert_eq!(data_type.identifier(), "r*"); - assert_eq!(data_type.name().as_str(), "r16"); + assert_eq!(data_type.name(), "r16"); assert_eq!(data_type.size(), DataTypeSize::Fixed(2)); let metadata = serde_json::from_str::("[0, 255]").unwrap(); @@ -842,7 +980,10 @@ mod tests { fill_value.as_ne_bytes(), // NOTE: Raw value bytes are always read as-is. &[0u8, 255u8] ); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); } #[test] @@ -1096,14 +1237,16 @@ mod tests { let metadata: DataTypeMetadataV3 = serde_json::from_str(json).unwrap(); let data_type = DataType::from_metadata(&metadata).unwrap(); assert_eq!(json, serde_json::to_string(&data_type.metadata()).unwrap()); - assert_eq!(data_type.identifier(), "string"); - assert_eq!(data_type.name().as_str(), "string"); + assert_eq!(data_type.name(), "string"); assert_eq!(data_type.size(), DataTypeSize::Variable); let metadata = serde_json::from_str::(r#""hello world""#).unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), "hello world".as_bytes(),); - assert_eq!(metadata, data_type.metadata_fill_value(&fill_value)); + assert_eq!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); let metadata = serde_json::from_str::( r#"[104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]"#, @@ -1111,16 +1254,25 @@ mod tests { .unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), "hello world".as_bytes(),); - assert_ne!(metadata, data_type.metadata_fill_value(&fill_value)); // metadata is byte array rep, that is okay + assert_ne!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); // metadata is byte array rep, that is okay let metadata = serde_json::from_str::(r#""Infinity""#).unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), "Infinity".as_bytes(),); - assert_ne!(metadata, data_type.metadata_fill_value(&fill_value)); // metadata is float rep, that is okay + assert_ne!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); // metadata is float rep, that is okay let metadata = serde_json::from_str::(r#""0x7fc00000""#).unwrap(); let fill_value = data_type.fill_value_from_metadata(&metadata).unwrap(); assert_eq!(fill_value.as_ne_bytes(), "0x7fc00000".as_bytes(),); - assert_ne!(metadata, data_type.metadata_fill_value(&fill_value)); // metadata is float rep, that is okay + assert_ne!( + metadata, + data_type.metadata_fill_value(&fill_value).unwrap() + ); // metadata is float rep, that is okay } } diff --git a/zarrs_data_type/src/data_type_extension.rs b/zarrs_data_type/src/data_type_extension.rs new file mode 100644 index 00000000..d721ddd5 --- /dev/null +++ b/zarrs_data_type/src/data_type_extension.rs @@ -0,0 +1,96 @@ +use std::{borrow::Cow, fmt::Debug}; +use zarrs_metadata::{ + v3::{ + array::{data_type::DataTypeSize, fill_value::FillValueMetadataV3}, + MetadataConfiguration, + }, + Endianness, +}; + +use crate::{FillValue, IncompatibleFillValueError, IncompatibleFillValueMetadataError}; + +/// Traits for a data type extension. +/// +/// The size in memory of a data type can differ between the in-memory Rust structure and the [`ArrayBytes`](https://docs.rs/zarrs/latest/zarrs/array/enum.ArrayBytes.html) passed into the codec pipeline. +/// For example, consider a structure that has padding bytes when used in memory in Rust. +/// This can be converted to tightly packed bytes before it is passed into the codec pipeline for encoding, and vice versa for decoding. +/// +/// It is recommended to define a concrete structure representing a single element of a custom data type that implements [`Element`](https://docs.rs/zarrs/latest/zarrs/array/trait.Element.html) and [`ElementOwned`](https://docs.rs/zarrs/latest/zarrs/array/trait.ElementOwned.html). +/// These traits have `into_array_bytes` and `from_array_bytes` methods for this purpose that enable custom data types to be used with the [`Array::{store,retrieve}_*_elements`](https://docs.rs/zarrs/latest/zarrs/array/struct.Array.html) variants. +/// These methods should encode data to and from native endianness if endianness is applicable, unless the endianness should be explicitly fixed. +/// Note that codecs that act on numerical data typically expect the data to be in native endianness. +/// +/// The [`DataTypeExtension::encode_bytes`] and [`DataTypeExtension::decode_bytes`] methods allow a fixed-size custom data type to be encoded with the `bytes` codec with a requested [`Endianness`]. +/// These methods are not invoked for variable-size data types, and can be pass-through for a fixed-size data types that use an explicitly fixed endianness or where endianness is not applicable. +/// +/// A custom data type must also directly handle conversion of fill value metadata to fill value bytes, and vice versa. +pub trait DataTypeExtension: Debug + Send + Sync { + /// The name of the data type. + fn name(&self) -> String; + + /// The configuration of the data type. + fn configuration(&self) -> MetadataConfiguration; + + /// The size of the data type. + /// + /// This size may differ from the size in memory of the data type. + /// It represents the size of elements passing through array to array and array to bytes codecs in the codec pipeline (i.e., after conversion to [`ArrayBytes`](https://docs.rs/zarrs/latest/zarrs/array/enum.ArrayBytes.html))). + fn size(&self) -> DataTypeSize; + + /// Create a fill value from metadata. + /// + /// # Errors + /// Returns [`IncompatibleFillValueMetadataError`] if the fill value is incompatible with the data type. + fn fill_value( + &self, + fill_value_metadata: &FillValueMetadataV3, + ) -> Result; + + /// Create fill value metadata. + /// + /// # Errors + /// Returns an [`IncompatibleFillValueError`] if the metadata cannot be created from the fill value. + fn metadata_fill_value( + &self, + fill_value: &FillValue, + ) -> Result; + + /// Encode the bytes to a specified endianness. + /// + /// This is used internally within the `bytes` codec if the data type is fixed size. + /// + /// Return [`DataTypeExtensionError::BytesCodecUnsupported`] if the codec does not support the `bytes` codec. + /// + /// # Errors + /// Returns a [`DataTypeExtensionError`] if the `bytes` codec is not supported or `endianness` has not been specified. + #[allow(unused_variables)] + fn encode_bytes<'a>( + &self, + bytes: Cow<'a, [u8]>, + endianness: Option, + ) -> Result, DataTypeExtensionError>; + + /// Decode bytes from a specified endianness. + /// + /// This is used internally within the `bytes` codec if the data type is fixed size. + /// + /// Return [`DataTypeExtensionError::BytesCodecUnsupported`] if the codec does not support the `bytes` codec. + /// + /// # Errors + /// Returns a [`DataTypeExtensionError`] if the `bytes` codec is not supported or `endianness` has not been specified. + #[allow(unused_variables)] + fn decode_bytes<'a>( + &self, + bytes: Cow<'a, [u8]>, + endianness: Option, + ) -> Result, DataTypeExtensionError>; +} + +/// A data type error. +#[derive(Debug, thiserror::Error, derive_more::From, derive_more::Display)] +pub enum DataTypeExtensionError { + /// The endianness was not specified, and it is required for this data type extension. + EndiannessNotSpecified, + /// The `bytes` codec is not supported, likely because the data type has a variable length. + BytesCodecUnsupported, +} diff --git a/zarrs_data_type/src/data_type_plugin.rs b/zarrs_data_type/src/data_type_plugin.rs new file mode 100644 index 00000000..20ece06d --- /dev/null +++ b/zarrs_data_type/src/data_type_plugin.rs @@ -0,0 +1,20 @@ +use zarrs_metadata::v3::MetadataV3; +use zarrs_plugin::{Plugin, PluginCreateError}; + +use crate::DataType; + +/// A data type plugin. +#[derive(derive_more::Deref)] +pub struct DataTypePlugin(Plugin); +inventory::collect!(DataTypePlugin); + +impl DataTypePlugin { + /// Create a new [`DataTypePlugin`]. + pub const fn new( + identifier: &'static str, + match_name_fn: fn(name: &str) -> bool, + create_fn: fn(metadata: &MetadataV3) -> Result, + ) -> Self { + Self(Plugin::new(identifier, match_name_fn, create_fn)) + } +} diff --git a/zarrs_data_type/src/lib.rs b/zarrs_data_type/src/lib.rs index b9a4eeaf..2813fb3d 100644 --- a/zarrs_data_type/src/lib.rs +++ b/zarrs_data_type/src/lib.rs @@ -1,10 +1,11 @@ //! [Zarr](https://zarr-specs.readthedocs.io/) data types for the [`zarrs`](https://docs.rs/zarrs/latest/zarrs/index.html) crate. mod data_type; +mod data_type_extension; +mod data_type_plugin; mod fill_value; -pub use data_type::{ - DataType, IncompatibleFillValueError, IncompatibleFillValueMetadataError, - UnsupportedDataTypeError, -}; +pub use data_type::{DataType, IncompatibleFillValueError, IncompatibleFillValueMetadataError}; +pub use data_type_extension::{DataTypeExtension, DataTypeExtensionError}; +pub use data_type_plugin::DataTypePlugin; pub use fill_value::FillValue; From 0b7144aec12e79da3e6e44091251504d580413f7 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Mon, 3 Mar 2025 10:05:58 +1100 Subject: [PATCH 2/3] improve docs and cleanup data type extension trait --- README.md | 21 ++++-- zarrs/doc/ecosystem.md | 34 ++++----- zarrs/doc/status/data_types.md | 3 +- zarrs/examples/custom_data_type_fixed_size.rs | 23 ++++-- .../custom_data_type_variable_size.rs | 25 +------ zarrs/src/array.rs | 10 +-- .../codec/array_to_bytes/bytes/bytes_codec.rs | 12 ++- zarrs/src/lib.rs | 38 +++++++--- zarrs_data_type/CHANGELOG.md | 2 +- zarrs_data_type/src/data_type_extension.rs | 74 ++++++++----------- .../src/data_type_extension_bytes_codec.rs | 40 ++++++++++ zarrs_data_type/src/lib.rs | 4 + 12 files changed, 165 insertions(+), 121 deletions(-) create mode 100644 zarrs_data_type/src/data_type_extension_bytes_codec.rs diff --git a/README.md b/README.md index 0c554aab..2e08ab4c 100644 --- a/README.md +++ b/README.md @@ -8,17 +8,26 @@ [![codecov](https://codecov.io/gh/LDeakin/zarrs/graph/badge.svg?token=OBKJQNAZPP)](https://codecov.io/gh/LDeakin/zarrs) [![DOI](https://zenodo.org/badge/695021547.svg)](https://zenodo.org/badge/latestdoi/695021547) -`zarrs` is a Rust library for the [Zarr] storage format for multidimensional arrays and metadata. It supports [Zarr V3] and a V3 compatible subset of [Zarr V2]. +`zarrs` is a Rust library for the [Zarr] storage format for multidimensional arrays and metadata. -A changelog can be found [here][CHANGELOG]. -Correctness issues with past versions are [detailed here][correctness_issues]. - -Developed at the [Department of Materials Physics, Australian National University, Canberra, Australia]. +`zarrs` supports [Zarr V3] and a V3 compatible subset of [Zarr V2]. +It is fully up-to-date and conformant with the Zarr 3.0 specification with support for: +- all *core extensions* (data types, codecs, chunk grids, chunk key encodings, storage transformers), +- all accepted [Zarr Enhancement Proposals (ZEPs)](https://zarr.dev/zeps/) and several draft ZEPs: + - ZEP 0003: Variable chunking + - ZEP 0007: Strings +- experimental codecs and data types intended for standardisation, and +- user-defined custom extensions and stores. > [!TIP] > If you are a Python user, check out [`zarrs-python`]. > It includes a high-performance codec pipeline for the reference [`zarr-python`] implementation. +A changelog can be found [here][CHANGELOG]. +Correctness issues with past versions are [detailed here][correctness_issues]. + +Developed at the [Department of Materials Physics, Australian National University, Canberra, Australia]. + ## Getting Started - Review the [implementation status] ([zarr version support], [array support], [storage support], and the [`zarrs` ecosystem](#zarrs-ecosystem)). - Read [The `zarrs` Book]. @@ -93,7 +102,7 @@ println!("{array_ndarray:4}"); - [`zarrs`]: The core library for manipulating Zarr hierarchies. - [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`). - [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`). -- [`zarrs_plugin`]: `zarrs` plugin support (re-exported as `zarrs::plugin`). +- [`zarrs_plugin`]: The plugin API for `zarrs` (re-exported as `zarrs::plugin`). - [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`). ### Stores diff --git a/zarrs/doc/ecosystem.md b/zarrs/doc/ecosystem.md index af1c0a2f..7c7994fd 100644 --- a/zarrs/doc/ecosystem.md +++ b/zarrs/doc/ecosystem.md @@ -2,7 +2,7 @@ - [`zarrs`]: The core library for manipulating Zarr hierarchies. - [`zarrs_data_type`]: Zarr data types (re-exported as `zarrs::data_type`). - [`zarrs_metadata`]: Zarr metadata support (re-exported as `zarrs::metadata`). -- [`zarrs_plugin`]: `zarrs` plugin support (re-exported as `zarrs::plugin`). +- [`zarrs_plugin`]: The plugin API for `zarrs` (re-exported as `zarrs::plugin`). - [`zarrs_storage`]: The storage API for `zarrs` (re-exported as `zarrs::storage`). #### Stores @@ -27,24 +27,24 @@ - Transform arrays: crop, rescale, downsample, gradient magnitude, gaussian, noise filtering, etc. - Benchmarking tools and performance benchmarks of `zarrs`. -[`zarrs`]: https://github.com/LDeakin/zarrs/tree/main/zarrs -[`zarrs_data_type`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_data_type -[`zarrs_metadata`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_metadata -[`zarrs_plugin`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_plugin -[`zarrs_storage`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_storage -[`zarrs_filesystem`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_filesystem -[`zarrs_http`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_http -[`zarrs_object_store`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_object_store -[`zarrs_opendal`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_opendal -[`zarrs_zip`]: https://github.com/LDeakin/zarrs/tree/main/zarrs_zip -[`zarrs_icechunk`]: https://github.com/LDeakin/zarrs_icechunk -[`zarrs_ffi`]: https://github.com/LDeakin/zarrs_ffi +[`zarrs`]: https://docs.rs/zarrs/latest/zarrs/ +[`zarrs_data_type`]: https://docs.rs/zarrs_data_type/latest/zarrs_data_type/ +[`zarrs_metadata`]: https://docs.rs/zarrs_metadata/latest/zarrs_metadata/ +[`zarrs_plugin`]: https://docs.rs/zarrs_plugin/latest/zarrs_plugin/ +[`zarrs_storage`]: https://docs.rs/zarrs_storage/latest/zarrs_storage/ +[`zarrs_filesystem`]: https://docs.rs/zarrs_filesystem/latest/zarrs_filesystem/ +[`zarrs_http`]: https://docs.rs/zarrs_http/latest/zarrs_http/ +[`zarrs_object_store`]: https://docs.rs/zarrs_object_store/latest/zarrs_object_store/ +[`zarrs_opendal`]: https://docs.rs/zarrs_opendal/latest/zarrs_opendal/ +[`zarrs_zip`]: https://docs.rs/zarrs_zip/latest/zarrs_zip/ +[`zarrs_icechunk`]: https://docs.rs/zarrs_icechunk/latest/zarrs_icechunk/ +[`zarrs_ffi`]: https://docs.rs/zarrs_ffi/latest/zarrs_ffi/ [`zarrs-python`]: https://github.com/ilan-gold/zarrs-python [`zarr-python`]: https://github.com/zarr-developers/zarr-python -[`zarrs_tools`]: https://github.com/LDeakin/zarrs_tools -[`ome_zarr_metadata`]: https://github.com/LDeakin/rust_ome_zarr_metadata -[`object_store`]: https://github.com/apache/arrow-rs/tree/main/object_store -[`opendal`]: https://github.com/apache/OpenDAL +[`zarrs_tools`]: https://docs.rs/zarrs_tools/latest/zarrs_tools/ +[`ome_zarr_metadata`]: https://docs.rs/ome_zarr_metadata/latest/ome_zarr_metadata/ +[`object_store`]: https://docs.rs/object_store/latest/object_store/ +[`opendal`]: https://docs.rs/opendal/latest/opendal/ [`icechunk`]: https://github.com/earth-mover/icechunk [OME-Zarr]: https://ngff.openmicroscopy.org/latest/ diff --git a/zarrs/doc/status/data_types.md b/zarrs/doc/status/data_types.md index 77d35664..158727b5 100644 --- a/zarrs/doc/status/data_types.md +++ b/zarrs/doc/status/data_types.md @@ -4,7 +4,7 @@ [r* (raw bits)] | [ZEP0001] | ✓ | | | | [bfloat16] | [zarr-specs #130] | ✓ | | | | [string] (experimental) | [ZEP0007 (draft)] | ✓ | | | -| [dtype_bytes] (experimental) | [ZEP0007 (draft)] | ✓ | | | +| [bytes](crate::data_type::DataType::Bytes) (experimental) | [ZEP0007 (draft)] | ✓ | | | † Experimental data types are recommended for evaluation only. @@ -25,7 +25,6 @@ [bfloat16]: crate::data_type::DataType::BFloat16 [r* (raw bits)]: crate::data_type::DataType::RawBits [string]: crate::data_type::DataType::String -[dtype_bytes]: crate::data_type::DataType::Bytes [ZEP0001]: https://zarr.dev/zeps/accepted/ZEP0001.html [zarr-specs #130]: https://github.com/zarr-developers/zarr-specs/issues/130 diff --git a/zarrs/examples/custom_data_type_fixed_size.rs b/zarrs/examples/custom_data_type_fixed_size.rs index 87f9067d..cb5abe05 100644 --- a/zarrs/examples/custom_data_type_fixed_size.rs +++ b/zarrs/examples/custom_data_type_fixed_size.rs @@ -8,8 +8,9 @@ use zarrs::array::{ ArrayBuilder, ArrayBytes, ArrayError, DataTypeSize, Element, ElementOwned, FillValueMetadataV3, }; use zarrs_data_type::{ - DataType, DataTypeExtension, DataTypeExtensionError, DataTypePlugin, FillValue, - IncompatibleFillValueError, IncompatibleFillValueMetadataError, + DataType, DataTypeExtension, DataTypeExtensionBytesCodec, DataTypeExtensionBytesCodecError, + DataTypeExtensionError, DataTypePlugin, FillValue, IncompatibleFillValueError, + IncompatibleFillValueMetadataError, }; use zarrs_metadata::{ v3::{MetadataConfiguration, MetadataV3}, @@ -177,11 +178,17 @@ impl DataTypeExtension for CustomDataTypeFixedSize { DataTypeSize::Fixed(size_of::()) } - fn encode_bytes<'a>( + fn codec_bytes(&self) -> Result<&dyn DataTypeExtensionBytesCodec, DataTypeExtensionError> { + Ok(self) + } +} + +impl DataTypeExtensionBytesCodec for CustomDataTypeFixedSize { + fn encode<'a>( &self, bytes: std::borrow::Cow<'a, [u8]>, endianness: Option, - ) -> Result, DataTypeExtensionError> { + ) -> Result, DataTypeExtensionBytesCodecError> { if let Some(endianness) = endianness { if endianness != Endianness::native() { let mut bytes = bytes.into_owned(); @@ -200,15 +207,15 @@ impl DataTypeExtension for CustomDataTypeFixedSize { Ok(bytes) } } else { - Err(DataTypeExtensionError::EndiannessNotSpecified) + Err(DataTypeExtensionBytesCodecError::EndiannessNotSpecified) } } - fn decode_bytes<'a>( + fn decode<'a>( &self, bytes: std::borrow::Cow<'a, [u8]>, endianness: Option, - ) -> Result, DataTypeExtensionError> { + ) -> Result, DataTypeExtensionBytesCodecError> { if let Some(endianness) = endianness { if endianness != Endianness::native() { let mut bytes = bytes.into_owned(); @@ -229,7 +236,7 @@ impl DataTypeExtension for CustomDataTypeFixedSize { Ok(bytes) } } else { - Err(DataTypeExtensionError::EndiannessNotSpecified) + Err(DataTypeExtensionBytesCodecError::EndiannessNotSpecified) } } } diff --git a/zarrs/examples/custom_data_type_variable_size.rs b/zarrs/examples/custom_data_type_variable_size.rs index 003e97d6..01e900c7 100644 --- a/zarrs/examples/custom_data_type_variable_size.rs +++ b/zarrs/examples/custom_data_type_variable_size.rs @@ -10,13 +10,10 @@ use zarrs::array::{ RawBytesOffsets, }; use zarrs_data_type::{ - DataType, DataTypeExtension, DataTypeExtensionError, DataTypePlugin, FillValue, - IncompatibleFillValueError, IncompatibleFillValueMetadataError, -}; -use zarrs_metadata::{ - v3::{array::fill_value::FillValueFloat, MetadataConfiguration, MetadataV3}, - Endianness, + DataType, DataTypeExtension, DataTypePlugin, FillValue, IncompatibleFillValueError, + IncompatibleFillValueMetadataError, }; +use zarrs_metadata::v3::{array::fill_value::FillValueFloat, MetadataConfiguration, MetadataV3}; use zarrs_plugin::{PluginCreateError, PluginMetadataInvalidError}; use zarrs_storage::store::MemoryStore; @@ -162,22 +159,6 @@ impl DataTypeExtension for CustomDataTypeVariableSize { fn size(&self) -> zarrs::array::DataTypeSize { DataTypeSize::Variable } - - fn encode_bytes<'a>( - &self, - _bytes: Cow<'a, [u8]>, - _endianness: Option, - ) -> Result, DataTypeExtensionError> { - Err(DataTypeExtensionError::BytesCodecUnsupported) - } - - fn decode_bytes<'a>( - &self, - _bytes: Cow<'a, [u8]>, - _endianness: Option, - ) -> Result, DataTypeExtensionError> { - Err(DataTypeExtensionError::BytesCodecUnsupported) - } } fn main() { diff --git a/zarrs/src/array.rs b/zarrs/src/array.rs index 5df4c6be..bf61dc00 100644 --- a/zarrs/src/array.rs +++ b/zarrs/src/array.rs @@ -276,7 +276,7 @@ pub fn chunk_shape_to_array_shape(chunk_shape: &[std::num::NonZeroU64]) -> Array /// **Standard [`Array`] retrieve methods do not perform any caching**. /// For this reason, retrieving multiple subsets in a chunk with [`retrieve_chunk_subset`](Array::store_chunk_subset) is very inefficient and strongly discouraged. /// For example, consider that a compressed chunk may need to be retrieved and decoded in its entirety even if only a small part of the data is needed. -/// In such situations, prefer to retrieve a partial decoder for a chunk with [`partial_decoder`](Array::partial_decoder) and then retrieve multiple chunk subsets with [`partial_decode`](codec::ArrayPartialDecoderTraits::partial_decode). +/// In such situations, prefer to initialise a partial decoder for a chunk with [`partial_decoder`](Array::partial_decoder) and then retrieve multiple chunk subsets with [`partial_decode`](codec::ArrayPartialDecoderTraits::partial_decode). /// The underlying codec chain will use a cache where efficient to optimise multiple partial decoding requests (see [`CodecChain`]). /// Another alternative is to use [Chunk Caching](#chunk-caching). /// @@ -295,10 +295,8 @@ pub fn chunk_shape_to_array_shape(chunk_shape: &[std::num::NonZeroU64]) -> Array /// - [`ChunkCacheEncodedLruChunkLimit`]: an encoded chunk cache with a fixed chunk capacity. /// - [`ChunkCacheDecodedLruSizeLimit`]: a decoded chunk cache with a fixed size in bytes. /// - [`ChunkCacheEncodedLruSizeLimit`]: an encoded chunk cache with a fixed size in bytes. -/// - [`ChunkCacheDecodedLruChunkLimitThreadLocal`]: a thread-local decoded chunk cache with a fixed chunk capacity (per thread). -/// - [`ChunkCacheEncodedLruChunkLimitThreadLocal`]: a thread-local encoded chunk cache with a fixed chunk capacity (per thread). -/// - [`ChunkCacheDecodedLruSizeLimitThreadLocal`]: a thread-local decoded chunk cache with a fixed size in bytes (per thread). -/// - [`ChunkCacheEncodedLruSizeLimitThreadLocal`]: a thread-local encoded chunk cache with a fixed size in bytes (per thread). +/// +/// There are also `ThreadLocal` suffixed variants of all of these caches that have a per-thread cache. /// /// `zarrs` consumers can create custom caches by implementing the [`ChunkCache`] trait. /// @@ -314,7 +312,7 @@ pub fn chunk_shape_to_array_shape(chunk_shape: &[std::num::NonZeroU64]) -> Array /// **Benchmark your algorithm/data.** /// /// ## Reading Sharded Arrays -/// The `sharding_indexed` ([`ShardingCodec`](codec::array_to_bytes::sharding)) codec enables multiple sub-chunks ("inner chunks") to be stored in a single chunk ("shard"). +/// The `sharding_indexed` codec ([`ShardingCodec`](codec::array_to_bytes::sharding)) enables multiple sub-chunks ("inner chunks") to be stored in a single chunk ("shard"). /// With a sharded array, the [`chunk_grid`](Array::chunk_grid) and chunk indices in store/retrieve methods reference the chunks ("shards") of an array. /// /// The [`ArrayShardedExt`] trait provides additional methods to [`Array`] to query if an array is sharded and retrieve the inner chunk shape. diff --git a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs index 55753c97..c3caf8e1 100644 --- a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs +++ b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use zarrs_data_type::DataType; +use zarrs_data_type::{DataType, DataTypeExtensionError}; use crate::{ array::{ @@ -159,7 +159,10 @@ impl ArrayToBytesCodecTraits for BytesCodec { )?; let bytes = bytes.into_fixed()?; let bytes_encoded = match decoded_representation.data_type() { - DataType::Extension(ext) => ext.encode_bytes(bytes, self.endian)?, + DataType::Extension(ext) => ext + .codec_bytes()? + .encode(bytes, self.endian) + .map_err(DataTypeExtensionError::from)?, _ => self.do_encode_or_decode(bytes, decoded_representation)?, }; Ok(bytes_encoded) @@ -172,7 +175,10 @@ impl ArrayToBytesCodecTraits for BytesCodec { _options: &CodecOptions, ) -> Result, CodecError> { let bytes_decoded = match decoded_representation.data_type() { - DataType::Extension(ext) => ext.decode_bytes(bytes, self.endian)?, + DataType::Extension(ext) => ext + .codec_bytes()? + .decode(bytes, self.endian) + .map_err(DataTypeExtensionError::from)?, _ => self.do_encode_or_decode(bytes, decoded_representation)?, }; Ok(ArrayBytes::from(bytes_decoded)) diff --git a/zarrs/src/lib.rs b/zarrs/src/lib.rs index 1f09e32f..e2fb7829 100644 --- a/zarrs/src/lib.rs +++ b/zarrs/src/lib.rs @@ -1,13 +1,23 @@ -//! `zarrs` is Rust library for the [Zarr](https://zarr.dev) storage format for multidimensional arrays and metadata. It supports [Zarr V3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html) and a [V3 compatible subset](#implementation-status) of [Zarr V2](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html). +//! `zarrs` is Rust library for the [Zarr](https://zarr.dev) storage format for multidimensional arrays and metadata. +//! +//! It supports [Zarr V3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html) and a [V3 compatible subset](#implementation-status) of [Zarr V2](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html). +//! It is fully up-to-date and conformant with the Zarr 3.0 specification with support for: +//! - all *core extensions* (data types, codecs, chunk grids, chunk key encodings, storage transformers), +//! - all accepted [Zarr Enhancement Proposals (ZEPs)](https://zarr.dev/zeps/) and several draft ZEPs: +//! - ZEP 0003: Variable chunking +//! - ZEP 0007: Strings +// TODO: ZEP 0009 +//! - experimental codecs and data types intended for standardisation, and +//! - user-defined custom extensions and stores. +//! +//! If you are a Python user, check out [`zarrs-python`](https://github.com/ilan-gold/zarrs-python). +//! It includes a high-performance codec pipeline for the reference [`zarr-python`](https://github.com/zarr-developers/zarr-python) implementation. //! //! A changelog can be found [here](https://github.com/LDeakin/zarrs/blob/main/CHANGELOG.md). //! Correctness issues with past versions are [detailed here](https://github.com/LDeakin/zarrs/blob/main/doc/correctness_issues.md). //! //! Developed at the [Department of Materials Physics](https://physics.anu.edu.au/research/mp/), Australian National University, Canberra, Australia. //! -//! If you are a Python user, check out [`zarrs-python`](https://github.com/ilan-gold/zarrs-python). -//! It includes a high-performance codec pipeline for the reference [`zarr-python`](https://github.com/zarr-developers/zarr-python) implementation. -//! //! ## Getting Started //! - Review the [implementation status](#implementation-status), [array support](#array-support), and [storage support](#storage-support). //! - Read [The `zarrs` Book]. @@ -65,10 +75,9 @@ //! //! A huge range of storage backends are supported via the [`opendal`] and [`object_store`] crates. //! The documentation for the [`zarrs_opendal`] and [`zarrs_object_store`] crates includes version compatibility matrices with `zarrs` and the associated storage backends. -//! These backends provide more feature complete HTTP stores than [zarrs_http]. +//! These backends provide more feature complete HTTP stores than [`zarrs_http`]. //! -//! [`zarrs_icechunk`] implements the [Icechunk](https://icechunk.io/overview/) transactional storage engine, a storage specification for Zarr. -//! It supports [`object_store`] stores. +//! [`zarrs_icechunk`] implements the [Icechunk](https://icechunk.io/overview/) transactional storage engine, a storage specification for Zarr that supports [`object_store`] stores. //! //! [`opendal`]: https://docs.rs/opendal/latest/opendal/ //! [`object_store`]: https://docs.rs/object_store/latest/object_store/ @@ -79,7 +88,10 @@ //! //! The [`AsyncToSyncStorageAdapter`](crate::storage::storage_adapter::async_to_sync::AsyncToSyncStorageAdapter) enables some async stores to be used in a sync context. //! +//! A custom store can be developed by implementing the relevant traits in the [`zarrs_storage`] crate. +//! //! ## Examples +//! ### Create and Read a Zarr Hierarchy #![cfg_attr(feature = "ndarray", doc = "```rust")] #![cfg_attr(not(feature = "ndarray"), doc = "```rust,ignore")] //! # use std::{path::PathBuf, sync::Arc}; @@ -148,13 +160,15 @@ //! # Ok::<(), Box>(()) //! ``` //! -//! Various examples can be found in [zarrs/examples](https://github.com/LDeakin/zarrs/blob/main/zarrs/examples) that show how to -//! - create and manipulate zarr hierarchies with various stores (sync and async), codecs, etc. -//! - convert between Zarr V2 and V3. +//! ### More examples +//! Various examples can be found in the [examples](https://github.com/LDeakin/zarrs/blob/main/zarrs/examples) directory that demonstrate: +//! - creating and manipulating zarr hierarchies with various stores (sync and async), codecs, etc, +//! - converting between Zarr V2 and V3, and +//! - creating custom data types. //! -//! They can be run with `cargo run --example `. +//! Examples can be run with `cargo run --example `. //! - Some examples require non-default features, which can be enabled with `--all-features` or `--features `. -//! - Some examples support a `-- --usage-log` argument to print storage API calls during example execution. +//! - Some examples support a `-- --usage-log` argument to print storage API calls during execution. //! //! ## Crate Features //! #### Default diff --git a/zarrs_data_type/CHANGELOG.md b/zarrs_data_type/CHANGELOG.md index b9f61623..3263c35a 100644 --- a/zarrs_data_type/CHANGELOG.md +++ b/zarrs_data_type/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Add support for data type extensions - - Adds `DataTypeExtension`, `DataTypeExtensionError`, and `DataTypePlugin` + - Adds `DataTypeExtension[BytesCodec]`, `DataTypeExtension[BytesCodec]Error`, and `DataTypePlugin` - Add `Extension` variant to `DataType` ### Changed diff --git a/zarrs_data_type/src/data_type_extension.rs b/zarrs_data_type/src/data_type_extension.rs index d721ddd5..0df7bc1b 100644 --- a/zarrs_data_type/src/data_type_extension.rs +++ b/zarrs_data_type/src/data_type_extension.rs @@ -1,26 +1,25 @@ -use std::{borrow::Cow, fmt::Debug}; -use zarrs_metadata::{ - v3::{ - array::{data_type::DataTypeSize, fill_value::FillValueMetadataV3}, - MetadataConfiguration, - }, - Endianness, +use std::fmt::Debug; +use zarrs_metadata::v3::{ + array::{data_type::DataTypeSize, fill_value::FillValueMetadataV3}, + MetadataConfiguration, }; -use crate::{FillValue, IncompatibleFillValueError, IncompatibleFillValueMetadataError}; +use crate::{ + DataTypeExtensionBytesCodec, DataTypeExtensionBytesCodecError, FillValue, + IncompatibleFillValueError, IncompatibleFillValueMetadataError, +}; /// Traits for a data type extension. /// -/// The size in memory of a data type can differ between the in-memory Rust structure and the [`ArrayBytes`](https://docs.rs/zarrs/latest/zarrs/array/enum.ArrayBytes.html) passed into the codec pipeline. -/// For example, consider a structure that has padding bytes when used in memory in Rust. -/// This can be converted to tightly packed bytes before it is passed into the codec pipeline for encoding, and vice versa for decoding. +/// The in-memory size of a data type can differ between its associated Rust structure and the *serialised* [`ArrayBytes`](https://docs.rs/zarrs/latest/zarrs/array/enum.ArrayBytes.html) passed into the codec pipeline. +/// For example, a Rust struct that has padding bytes can be converted to tightly packed bytes before it is passed into the codec pipeline for encoding, and vice versa for decoding. /// /// It is recommended to define a concrete structure representing a single element of a custom data type that implements [`Element`](https://docs.rs/zarrs/latest/zarrs/array/trait.Element.html) and [`ElementOwned`](https://docs.rs/zarrs/latest/zarrs/array/trait.ElementOwned.html). /// These traits have `into_array_bytes` and `from_array_bytes` methods for this purpose that enable custom data types to be used with the [`Array::{store,retrieve}_*_elements`](https://docs.rs/zarrs/latest/zarrs/array/struct.Array.html) variants. /// These methods should encode data to and from native endianness if endianness is applicable, unless the endianness should be explicitly fixed. /// Note that codecs that act on numerical data typically expect the data to be in native endianness. /// -/// The [`DataTypeExtension::encode_bytes`] and [`DataTypeExtension::decode_bytes`] methods allow a fixed-size custom data type to be encoded with the `bytes` codec with a requested [`Endianness`]. +/// The [`DataTypeExtensionBytesCodec`] traits methods allow a fixed-size custom data type to be encoded with the `bytes` codec with a requested endianness. /// These methods are not invoked for variable-size data types, and can be pass-through for a fixed-size data types that use an explicitly fixed endianness or where endianness is not applicable. /// /// A custom data type must also directly handle conversion of fill value metadata to fill value bytes, and vice versa. @@ -34,7 +33,7 @@ pub trait DataTypeExtension: Debug + Send + Sync { /// The size of the data type. /// /// This size may differ from the size in memory of the data type. - /// It represents the size of elements passing through array to array and array to bytes codecs in the codec pipeline (i.e., after conversion to [`ArrayBytes`](https://docs.rs/zarrs/latest/zarrs/array/enum.ArrayBytes.html))). + /// It represents the size of elements passing through array to array and array to bytes codecs in the codec pipeline (i.e., after conversion to [`ArrayBytes`](https://docs.rs/zarrs/latest/zarrs/array/enum.ArrayBytes.html)). fn size(&self) -> DataTypeSize; /// Create a fill value from metadata. @@ -55,42 +54,29 @@ pub trait DataTypeExtension: Debug + Send + Sync { fill_value: &FillValue, ) -> Result; - /// Encode the bytes to a specified endianness. - /// - /// This is used internally within the `bytes` codec if the data type is fixed size. - /// - /// Return [`DataTypeExtensionError::BytesCodecUnsupported`] if the codec does not support the `bytes` codec. - /// - /// # Errors - /// Returns a [`DataTypeExtensionError`] if the `bytes` codec is not supported or `endianness` has not been specified. - #[allow(unused_variables)] - fn encode_bytes<'a>( - &self, - bytes: Cow<'a, [u8]>, - endianness: Option, - ) -> Result, DataTypeExtensionError>; - - /// Decode bytes from a specified endianness. + /// Return [`DataTypeExtensionBytesCodec`] if the data type supports the `bytes` codec. /// - /// This is used internally within the `bytes` codec if the data type is fixed size. + /// Fixed-size data types are expected to support the `bytes` codec, even if bytes pass through it unmodified. /// - /// Return [`DataTypeExtensionError::BytesCodecUnsupported`] if the codec does not support the `bytes` codec. + /// The default implementation returns [`DataTypeExtensionError::CodecUnsupported`]. /// /// # Errors - /// Returns a [`DataTypeExtensionError`] if the `bytes` codec is not supported or `endianness` has not been specified. - #[allow(unused_variables)] - fn decode_bytes<'a>( - &self, - bytes: Cow<'a, [u8]>, - endianness: Option, - ) -> Result, DataTypeExtensionError>; + /// Returns [`DataTypeExtensionError::CodecUnsupported`] if the `bytes` codec is unsupported. + fn codec_bytes(&self) -> Result<&dyn DataTypeExtensionBytesCodec, DataTypeExtensionError> { + Err(DataTypeExtensionError::CodecUnsupported( + "bytes".to_string(), + self.name(), + )) + } } -/// A data type error. -#[derive(Debug, thiserror::Error, derive_more::From, derive_more::Display)] +/// A data type extension error. +#[derive(Debug, thiserror::Error, derive_more::Display)] +#[non_exhaustive] pub enum DataTypeExtensionError { - /// The endianness was not specified, and it is required for this data type extension. - EndiannessNotSpecified, - /// The `bytes` codec is not supported, likely because the data type has a variable length. - BytesCodecUnsupported, + /// Codec not supported + #[display("The {_0} codec is not supported by the {_1} extension data type")] + CodecUnsupported(String, String), + /// A `bytes` codec error. + BytesCodec(#[from] DataTypeExtensionBytesCodecError), } diff --git a/zarrs_data_type/src/data_type_extension_bytes_codec.rs b/zarrs_data_type/src/data_type_extension_bytes_codec.rs new file mode 100644 index 00000000..cd2783d1 --- /dev/null +++ b/zarrs_data_type/src/data_type_extension_bytes_codec.rs @@ -0,0 +1,40 @@ +use std::borrow::Cow; + +use zarrs_metadata::Endianness; + +/// Traits for a data type extension supporting the `bytes` codec. +pub trait DataTypeExtensionBytesCodec { + /// Encode the bytes of a fixed-size data type to a specified endianness for the `bytes` codec. + /// + /// Returns the input bytes unmodified for fixed-size data where endianness is not applicable (i.e. the bytes are serialised directly from the in-memory representation). + /// + /// # Errors + /// Returns a [`DataTypeExtensionBytesCodecError`] if `endianness` is [`None`] but must be specified. + #[allow(unused_variables)] + fn encode<'a>( + &self, + bytes: Cow<'a, [u8]>, + endianness: Option, + ) -> Result, DataTypeExtensionBytesCodecError>; + + /// Decode the bytes of a fixed-size data type from a specified endianness for the `bytes` codec. + /// + /// This performs the inverse operation of [`encode`](DataTypeExtensionBytesCodec::encode). + /// + /// # Errors + /// Returns a [`DataTypeExtensionBytesCodecError`] if `endianness` is [`None`] but must be specified. + #[allow(unused_variables)] + fn decode<'a>( + &self, + bytes: Cow<'a, [u8]>, + endianness: Option, + ) -> Result, DataTypeExtensionBytesCodecError>; +} + +/// A data type extension error related to the `bytes` codec. +#[derive(Debug, thiserror::Error, derive_more::From, derive_more::Display)] +#[non_exhaustive] +pub enum DataTypeExtensionBytesCodecError { + /// The endianness was not specified, and it is required for this data type extension. + EndiannessNotSpecified, +} diff --git a/zarrs_data_type/src/lib.rs b/zarrs_data_type/src/lib.rs index 2813fb3d..cb240ea9 100644 --- a/zarrs_data_type/src/lib.rs +++ b/zarrs_data_type/src/lib.rs @@ -2,10 +2,14 @@ mod data_type; mod data_type_extension; +mod data_type_extension_bytes_codec; mod data_type_plugin; mod fill_value; pub use data_type::{DataType, IncompatibleFillValueError, IncompatibleFillValueMetadataError}; pub use data_type_extension::{DataTypeExtension, DataTypeExtensionError}; +pub use data_type_extension_bytes_codec::{ + DataTypeExtensionBytesCodec, DataTypeExtensionBytesCodecError, +}; pub use data_type_plugin::DataTypePlugin; pub use fill_value::FillValue; From 6215a89009b5f3bb69518fe7e8f591dfb85efd20 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Mon, 3 Mar 2025 10:12:23 +1100 Subject: [PATCH 3/3] use fields in `DataTypeExtensionError::CodecUnsupported` --- zarrs_data_type/src/data_type_extension.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/zarrs_data_type/src/data_type_extension.rs b/zarrs_data_type/src/data_type_extension.rs index 0df7bc1b..1cb6bf1a 100644 --- a/zarrs_data_type/src/data_type_extension.rs +++ b/zarrs_data_type/src/data_type_extension.rs @@ -63,10 +63,10 @@ pub trait DataTypeExtension: Debug + Send + Sync { /// # Errors /// Returns [`DataTypeExtensionError::CodecUnsupported`] if the `bytes` codec is unsupported. fn codec_bytes(&self) -> Result<&dyn DataTypeExtensionBytesCodec, DataTypeExtensionError> { - Err(DataTypeExtensionError::CodecUnsupported( - "bytes".to_string(), - self.name(), - )) + Err(DataTypeExtensionError::CodecUnsupported { + data_type: self.name(), + codec: "bytes".to_string(), + }) } } @@ -75,8 +75,13 @@ pub trait DataTypeExtension: Debug + Send + Sync { #[non_exhaustive] pub enum DataTypeExtensionError { /// Codec not supported - #[display("The {_0} codec is not supported by the {_1} extension data type")] - CodecUnsupported(String, String), + #[display("The {codec} codec is not supported by the {data_type} extension data type")] + CodecUnsupported { + /// The data type name. + data_type: String, + /// The codec name. + codec: String, + }, /// A `bytes` codec error. BytesCodec(#[from] DataTypeExtensionBytesCodecError), }