Skip to content

Commit

Permalink
refactor: remove size_[fixed]() and rename Unknown to Extension
Browse files Browse the repository at this point in the history
… in `DataTypeMetadataV3`
  • Loading branch information
LDeakin committed Mar 9, 2025
1 parent 6d3cc2d commit b3367a2
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 37 deletions.
2 changes: 1 addition & 1 deletion zarrs_data_type/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ impl DataType {
DataTypeMetadataV3::RawBits(size) => Ok(Self::RawBits(*size)),
DataTypeMetadataV3::String => Ok(Self::String),
DataTypeMetadataV3::Bytes => Ok(Self::Bytes),
DataTypeMetadataV3::Unknown(metadata) => {
DataTypeMetadataV3::Extension(metadata) => {
for plugin in inventory::iter::<DataTypePlugin> {
if plugin.match_name(metadata.name()) {
return plugin.create(metadata);
Expand Down
4 changes: 4 additions & 0 deletions zarrs_metadata/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **Breaking**: All functions in `v3::array::fill_value` have been removed
- **Breaking**: `try_as_*()` methods in `FillValueMetadataV3` have been replaced with more extensive `as_*()` methods
- **Breaking**: Remove `fill_value::{HexString,FillValueFloat,FillValueFloatStringNonFinite}`
- **Breaking**: Rename `DataTypeMetadataV3::Unknown` variant to `Extension`

### Removed
- **Breaking**: Remove `DataTypeMetadataV3::size[_fixed]()`

## [0.3.6] - 2025-03-02

Expand Down
33 changes: 30 additions & 3 deletions zarrs_metadata/src/v2_to_v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
transpose::{TransposeCodecConfigurationV1, TransposeOrder},
zstd::codec_zstd_v2_numcodecs_to_v3,
},
codec::zstd::ZstdCodecConfiguration,
codec::{blosc::BloscShuffleModeNumcodecs, zstd::ZstdCodecConfiguration},
v2::{
array::{
data_type_metadata_v2_to_endianness, ArrayMetadataV2Order, DataTypeMetadataV2,
Expand All @@ -18,7 +18,7 @@ use crate::{
v3::{
array::{
chunk_grid::regular::RegularChunkGridConfiguration,
chunk_key_encoding::v2::V2ChunkKeyEncodingConfiguration,
chunk_key_encoding::v2::V2ChunkKeyEncodingConfiguration, data_type::DataTypeSize,
fill_value::FillValueMetadataV3,
},
ArrayMetadataV3, GroupMetadataV3, MetadataV3,
Expand Down Expand Up @@ -174,7 +174,34 @@ pub fn codec_metadata_v2_to_v3(
let blosc = serde_json::from_value::<BloscCodecConfigurationNumcodecs>(
serde_json::to_value(compressor.configuration())?,
)?;
let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, data_type.size());

let data_type_size = if blosc.shuffle == BloscShuffleModeNumcodecs::NoShuffle {
// The data type size does not matter
None
} else {
// Special case for known Zarr V2 data types
type M = DataTypeMetadataV3;
match data_type {
M::Bool | M::Int8 | M::UInt8 => Some(DataTypeSize::Fixed(1)),
M::Int16 | M::UInt16 | M::Float16 | M::BFloat16 => {
Some(DataTypeSize::Fixed(2))
}
M::Int32 | M::UInt32 | M::Float32 => Some(DataTypeSize::Fixed(4)),
M::Int64 | M::UInt64 | M::Float64 | M::Complex64 => {
Some(DataTypeSize::Fixed(8))
}
M::Complex128 => Some(DataTypeSize::Fixed(16)),
M::RawBits(size) => Some(DataTypeSize::Fixed(*size)),
M::String | M::Bytes => Some(DataTypeSize::Variable),
M::Extension(_) => {
// In this case the metadata will not match how the data is encoded, but it can still be decoded just fine.
// Resaving the array metadata as v3 will not have optimal blosc encoding parameters
None
}
}
};

let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, data_type_size);
codecs.push(MetadataV3::new_with_serializable_configuration(
name,
&configuration,
Expand Down
38 changes: 5 additions & 33 deletions zarrs_metadata/src/v3/array/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ pub enum DataTypeMetadataV3 {
String,
/// Variable-sized binary data.
Bytes,
/// An unknown data type.
Unknown(MetadataV3),
/// An unknown extension data type.
Extension(MetadataV3),
}

impl serde::Serialize for DataTypeMetadataV3 {
Expand Down Expand Up @@ -105,47 +105,19 @@ impl DataTypeMetadataV3 {
Self::String => "string".to_string(),
Self::Bytes => "bytes".to_string(),
Self::RawBits(size) => format!("r{}", size * 8),
Self::Unknown(metadata) => metadata.name().to_string(),
Self::Extension(metadata) => metadata.name().to_string(),
}
}

/// Returns the metadata.
#[must_use]
pub fn metadata(&self) -> MetadataV3 {
match self {
Self::Unknown(metadata) => metadata.clone(),
Self::Extension(metadata) => metadata.clone(),
_ => MetadataV3::new(&self.name()),
}
}

/// Returns the [`DataTypeSize`]. Returns [`None`] for an unknown data type.
#[must_use]
pub const fn size(&self) -> Option<DataTypeSize> {
match self {
Self::Bool | Self::Int8 | Self::UInt8 => Some(DataTypeSize::Fixed(1)),
Self::Int16 | Self::UInt16 | Self::Float16 | Self::BFloat16 => {
Some(DataTypeSize::Fixed(2))
}
Self::Int32 | Self::UInt32 | Self::Float32 => Some(DataTypeSize::Fixed(4)),
Self::Int64 | Self::UInt64 | Self::Float64 | Self::Complex64 => {
Some(DataTypeSize::Fixed(8))
}
Self::Complex128 => Some(DataTypeSize::Fixed(16)),
Self::RawBits(size) => Some(DataTypeSize::Fixed(*size)),
Self::String | Self::Bytes => Some(DataTypeSize::Variable),
Self::Unknown(_) => None,
}
}

/// Returns the size in bytes of a known fixed-size data type, otherwise returns [`None`].
#[must_use]
pub const fn fixed_size(&self) -> Option<usize> {
match self.size() {
Some(DataTypeSize::Fixed(size)) => Some(size),
Some(DataTypeSize::Variable) | None => None,
}
}

/// Create a data type from metadata.
#[must_use]
pub fn from_metadata(metadata: &MetadataV3) -> Self {
Expand Down Expand Up @@ -181,7 +153,7 @@ impl DataTypeMetadataV3 {
}
}

Self::Unknown(metadata.clone())
Self::Extension(metadata.clone())
}
}

Expand Down

0 comments on commit b3367a2

Please sign in to comment.