From cdbc815c4db63b9aec6489c69bfaad9485fc9054 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Thu, 6 Jun 2024 13:03:21 -0400 Subject: [PATCH] Version bump to 1.0.0-rc.6, improvements to IonEncoding (#785) * Version bump to 1.0.0-rc.6 * re-export IonEncoding as part of experimental-reader-writer * Adds name(), version() methods to IonEncoding * Raw stream items can now report their encoding * Adds `expect_text` method to Symbol, SymbolRef --- Cargo.toml | 2 +- src/lazy/any_encoding.rs | 54 ++++++++++++++++++++++++++++-- src/lazy/binary/raw/reader.rs | 8 ++--- src/lazy/binary/raw/v1_1/reader.rs | 8 ++--- src/lazy/raw_stream_item.rs | 22 ++++++++++-- src/lazy/system_stream_item.rs | 7 ++-- src/lazy/text/buffer.rs | 12 +++++-- src/lazy/text/raw/reader.rs | 3 +- src/lazy/text/raw/v1_1/reader.rs | 3 +- src/lazy/value.rs | 10 +++++- src/lib.rs | 1 + src/symbol_ref.rs | 10 +++++- src/types/symbol.rs | 2 +- 13 files changed, 116 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6b7b181c..5f2805d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ exclude = [ "**/ion-tests/iontestdata/**", "*.pdf" ] -version = "1.0.0-rc.5" +version = "1.0.0-rc.6" edition = "2021" # We need at least 1.65 for GATs[1] and 1.67 for `ilog`[2] # [1] https://blog.rust-lang.org/2022/11/03/Rust-1.65.0.html diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 04afaeba..616ab49b 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -53,7 +53,7 @@ use crate::lazy::text::value::{ LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker_1_0, LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, }; -use crate::{IonResult, IonType, RawSymbolRef}; +use crate::{Encoding, IonResult, IonType, RawSymbolRef}; use bumpalo::Bump as BumpAllocator; /// An implementation of the `LazyDecoder` trait that can read any encoding of Ion. @@ -89,6 +89,18 @@ pub enum LazyRawAnyVersionMarkerKind<'top> { Binary_1_1(LazyRawBinaryVersionMarker_1_1<'top>), } +impl<'top> LazyRawAnyVersionMarker<'top> { + pub fn encoding(&self) -> IonEncoding { + use crate::lazy::any_encoding::LazyRawAnyVersionMarkerKind::*; + match self.encoding { + Text_1_0(_) => TextEncoding_1_0.encoding(), + Binary_1_0(_) => BinaryEncoding_1_0.encoding(), + Text_1_1(_) => TextEncoding_1_1.encoding(), + Binary_1_1(_) => BinaryEncoding_1_1.encoding(), + } + } +} + impl<'top> HasSpan<'top> for LazyRawAnyVersionMarker<'top> { fn span(&self) -> Span<'top> { use LazyRawAnyVersionMarkerKind::*; @@ -165,6 +177,16 @@ pub enum LazyRawAnyEExpressionKind<'top> { Binary_1_1(Never), // TODO: RawBinaryEExpression_1_1 } +impl<'top> LazyRawAnyEExpression<'top> { + pub fn encoding(&self) -> IonEncoding { + use LazyRawAnyEExpressionKind::*; + match self.encoding { + Text_1_1(_) => TextEncoding_1_1.encoding(), + Binary_1_1(_) => BinaryEncoding_1_1.encoding(), + } + } +} + impl<'top> From> for LazyRawAnyEExpression<'top> { fn from(text_invocation: RawTextEExpression_1_1<'top>) -> Self { LazyRawAnyEExpression { @@ -278,7 +300,7 @@ pub enum RawReaderKind<'data> { Binary_1_1(LazyRawBinaryReader_1_1<'data>), } -#[derive(Default, Copy, Clone)] +#[derive(Default, Debug, Copy, Clone)] #[non_exhaustive] pub enum IonEncoding { // In the absence of a binary IVM, readers must assume Ion 1.0 text data until a @@ -300,6 +322,24 @@ impl IonEncoding { use IonEncoding::*; matches!(*self, Binary_1_0 | Binary_1_1) } + + pub fn name(&self) -> &str { + use IonEncoding::*; + match self { + Text_1_0 => TextEncoding_1_0::name(), + Binary_1_0 => BinaryEncoding_1_0::name(), + Text_1_1 => TextEncoding_1_1::name(), + Binary_1_1 => BinaryEncoding_1_1::name(), + } + } + + pub fn version(&self) -> (u8, u8) { + use IonEncoding::*; + match self { + Text_1_0 | Binary_1_0 => (1, 0), + Text_1_1 | Binary_1_1 => (1, 1), + } + } } impl<'data> From> for LazyRawAnyReader<'data> { @@ -421,6 +461,16 @@ impl<'top> LazyRawAnyValue<'top> { pub fn kind(&self) -> LazyRawValueKind<'top> { self.encoding } + + pub fn encoding(&self) -> IonEncoding { + use LazyRawValueKind::*; + match &self.encoding { + Text_1_0(_) => TextEncoding_1_0.encoding(), + Binary_1_0(_) => BinaryEncoding_1_0.encoding(), + Text_1_1(_) => TextEncoding_1_1.encoding(), + Binary_1_1(_) => BinaryEncoding_1_1.encoding(), + } + } } #[derive(Debug, Copy, Clone)] diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index da494b1d..341b15c2 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -6,7 +6,7 @@ use crate::lazy::decoder::{Decoder, HasRange, LazyRawFieldExpr, LazyRawReader, R use crate::lazy::encoding::BinaryEncoding_1_0; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; -use crate::IonResult; +use crate::{Encoding, IonResult}; use crate::lazy::any_encoding::IonEncoding; use bumpalo::Bump as BumpAllocator; @@ -66,7 +66,7 @@ impl<'data> LazyRawBinaryReader_1_0<'data> { Some(lazy_value) => lazy_value, None => { return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(self.position()), + EndPosition::new(BinaryEncoding_1_0.encoding(), self.position()), )) } }; @@ -83,7 +83,7 @@ impl<'data> LazyRawBinaryReader_1_0<'data> { let mut buffer = self.data.advance_to_next_item()?; if buffer.is_empty() { return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(self.position()), + EndPosition::new(BinaryEncoding_1_0.encoding(), self.position()), )); } // Peek at the first byte in the new buffer view @@ -94,7 +94,7 @@ impl<'data> LazyRawBinaryReader_1_0<'data> { (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(buffer.offset()), + EndPosition::new(BinaryEncoding_1_0.encoding(), buffer.offset()), )); } type_descriptor = buffer.peek_type_descriptor()?; diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index ade9c969..173d440d 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -7,7 +7,7 @@ use crate::lazy::encoder::private::Sealed; use crate::lazy::encoding::BinaryEncoding_1_1; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; -use crate::IonResult; +use crate::{Encoding, IonResult}; use crate::lazy::any_encoding::IonEncoding; use bumpalo::Bump as BumpAllocator; @@ -62,7 +62,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { Some(lazy_value) => lazy_value, None => { return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(self.position()), + EndPosition::new(BinaryEncoding_1_1.encoding(), self.position()), )) } }; @@ -93,7 +93,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { let mut buffer = self.advance_to_next_item()?; if buffer.is_empty() { return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(buffer.offset()), + EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()), )); } @@ -102,7 +102,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(buffer.offset()), + EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()), )); } } diff --git a/src/lazy/raw_stream_item.rs b/src/lazy/raw_stream_item.rs index be497242..98ec6ecf 100644 --- a/src/lazy/raw_stream_item.rs +++ b/src/lazy/raw_stream_item.rs @@ -1,7 +1,7 @@ use crate::lazy::decoder::{Decoder, HasRange, HasSpan}; use crate::lazy::span::Span; use crate::result::IonFailure; -use crate::{IonError, IonResult}; +use crate::{AnyEncoding, IonEncoding, IonError, IonResult}; use std::fmt::Debug; use std::ops::Range; @@ -27,6 +27,17 @@ pub type LazyRawStreamItem<'top, D> = RawStreamItem< ::EExp<'top>, >; +impl<'top> LazyRawStreamItem<'top, AnyEncoding> { + pub fn encoding(&self) -> IonEncoding { + match self { + LazyRawStreamItem::::VersionMarker(m) => m.encoding(), + LazyRawStreamItem::::Value(v) => v.encoding(), + LazyRawStreamItem::::EExpression(e) => e.encoding(), + LazyRawStreamItem::::EndOfStream(eos) => eos.encoding(), + } + } +} + impl HasRange for RawStreamItem { @@ -116,12 +127,17 @@ impl RawStreamItem { /// an `EndOfStream(EndPosition)` variant) to also implement them. #[derive(Debug, Copy, Clone)] pub struct EndPosition { + encoding: IonEncoding, position: usize, } impl EndPosition { - pub(crate) fn new(position: usize) -> Self { - Self { position } + pub(crate) fn new(encoding: IonEncoding, position: usize) -> Self { + Self { encoding, position } + } + + pub fn encoding(&self) -> IonEncoding { + self.encoding } } diff --git a/src/lazy/system_stream_item.rs b/src/lazy/system_stream_item.rs index 3f0f7dae..95bf9507 100644 --- a/src/lazy/system_stream_item.rs +++ b/src/lazy/system_stream_item.rs @@ -73,9 +73,8 @@ impl<'top, D: Decoder> SystemStreamItem<'top, D> { } } - /// Like [`Self::symbol_table`], but returns a [`IonError::Decoding`] if this item is not - /// a symbol table. - pub fn symbol_table(self) -> Option> { + /// If this item is a symbol table, returns `Some(lazy_struct)`. Otherwise, returns `None`. + pub fn as_symbol_table(self) -> Option> { if let Self::SymbolTable(struct_) = self { Some(struct_) } else { @@ -83,7 +82,7 @@ impl<'top, D: Decoder> SystemStreamItem<'top, D> { } } - /// Like [`Self::symbol_table`], but returns a [`IonError::Decoding`] if this item is not + /// Like [`Self::as_symbol_table`], but returns a [`IonError::Decoding`] if this item is not /// a symbol table. pub fn expect_symbol_table(self) -> IonResult> { if let Self::SymbolTable(value) = self { diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 9692b78c..ae5073cc 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -43,7 +43,7 @@ use crate::lazy::text::value::{ LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker, }; use crate::result::DecodingError; -use crate::{IonError, IonResult, IonType, TimestampPrecision}; +use crate::{Encoding, IonError, IonResult, IonType, TimestampPrecision}; impl<'a> Debug for TextBufferView<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { @@ -558,7 +558,10 @@ impl<'top> TextBufferView<'top> { if input_after_ws.is_empty() { return Ok(( input_after_ws, - RawStreamItem::EndOfStream(EndPosition::new(input_after_ws.offset())), + RawStreamItem::EndOfStream(EndPosition::new( + TextEncoding_1_0.encoding(), + input_after_ws.offset(), + )), )); } // Otherwise, the next item must be an IVM or a value. @@ -581,7 +584,10 @@ impl<'top> TextBufferView<'top> { if input_after_ws.is_empty() { return Ok(( input_after_ws, - RawStreamItem::EndOfStream(EndPosition::new(input_after_ws.offset())), + RawStreamItem::EndOfStream(EndPosition::new( + TextEncoding_1_1.encoding(), + input_after_ws.offset(), + )), )); } // Otherwise, the next item must be an IVM or a value. diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index 8a36f879..2262ff49 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -9,7 +9,7 @@ use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; use crate::result::IonFailure; -use crate::IonResult; +use crate::{Encoding, IonResult}; /// A text Ion 1.0 reader that yields [`LazyRawStreamItem`]s representing the top level values found /// in the provided input stream. @@ -59,6 +59,7 @@ impl<'data> LazyRawTextReader_1_0<'data> { .with_context("reading whitespace/comments at the top level", input)?; if buffer_after_whitespace.is_empty() { return Ok(RawStreamItem::EndOfStream(EndPosition::new( + TextEncoding_1_0.encoding(), buffer_after_whitespace.offset(), ))); } diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index 0d3ffee3..f333247a 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -24,7 +24,7 @@ use crate::lazy::text::matched::{MatchedFieldName, MatchedValue}; use crate::lazy::text::parse_result::{AddContext, ToIteratorOutput}; use crate::lazy::text::value::{LazyRawTextValue_1_1, RawTextAnnotationsIterator}; use crate::result::IonFailure; -use crate::{IonResult, IonType, RawSymbolRef}; +use crate::{Encoding, IonResult, IonType, RawSymbolRef}; pub struct LazyRawTextReader_1_1<'data> { input: &'data [u8], @@ -168,6 +168,7 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'da .with_context("reading v1.1 whitespace/comments at the top level", input)?; if buffer_after_whitespace.is_empty() { return Ok(RawStreamItem::EndOfStream(EndPosition::new( + TextEncoding_1_1.encoding(), buffer_after_whitespace.offset(), ))); } diff --git a/src/lazy/value.rs b/src/lazy/value.rs index 71ecb938..38ba1d06 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -66,9 +66,17 @@ impl<'top, D: Decoder> LazyValue<'top, D> { LazyValue { expanded_value } } - fn symbol_table(&'top self) -> &'top SymbolTable { + #[cfg(feature = "experimental-tooling-apis")] + pub fn symbol_table(&self) -> &SymbolTable { + self.expanded_value.context.symbol_table + } + + // When the `experimental-tooling-apis` feature is disabled, this method is `pub(crate)` + #[cfg(not(feature = "experimental-tooling-apis"))] + pub(crate) fn symbol_table(&self) -> &SymbolTable { self.expanded_value.context.symbol_table } + /// Returns the [`IonType`] of this value. /// ``` ///# use ion_rs::IonResult; diff --git a/src/lib.rs b/src/lib.rs index f9928bac..9fa8611f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -213,6 +213,7 @@ macro_rules! v1_x_reader_writer { lazy::r#struct::{LazyStruct, LazyField}, lazy::sequence::{LazyList, LazySExp}, lazy::encoder::value_writer::{ValueWriter, StructWriter, SequenceWriter, EExpWriter}, + lazy::any_encoding::IonEncoding, }; }; } diff --git a/src/symbol_ref.rs b/src/symbol_ref.rs index c88b1867..b2638ac9 100644 --- a/src/symbol_ref.rs +++ b/src/symbol_ref.rs @@ -1,5 +1,6 @@ use crate::raw_symbol_ref::{AsRawSymbolRef, RawSymbolRef}; -use crate::{Str, Symbol}; +use crate::result::IonFailure; +use crate::{IonResult, Str, Symbol}; use std::borrow::Borrow; use std::fmt::{Debug, Formatter}; use std::hash::{Hash, Hasher}; @@ -39,6 +40,13 @@ impl<'a> SymbolRef<'a> { Some(text) => Symbol::owned(Str::from(text)), } } + + pub fn expect_text(&self) -> IonResult<&str> { + match self.text() { + Some(text) => Ok(text), + None => IonResult::decoding_error("symbol has unknown text"), + } + } } impl<'a, A> PartialEq for SymbolRef<'a> diff --git a/src/types/symbol.rs b/src/types/symbol.rs index e5ffb0e6..76247752 100644 --- a/src/types/symbol.rs +++ b/src/types/symbol.rs @@ -114,7 +114,7 @@ impl Symbol { self.text.text() } - pub fn text_or_error(&self) -> IonResult<&str> { + pub fn expect_text(&self) -> IonResult<&str> { match self.text() { Some(text) => Ok(text), None => IonResult::decoding_error("symbol has unknown text"),