From 189dfc3986554c769d09288b88aae0852582d4b3 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Thu, 4 Apr 2024 02:42:57 -0700 Subject: [PATCH 01/17] Wire-up base 1.1 binary reader, and add null.null, and IVM support --- examples/lazy_read_all_values.rs | 2 +- src/binary/constants.rs | 11 + src/lazy/any_encoding.rs | 254 +++- src/lazy/binary/encoded_value.rs | 41 +- src/lazy/binary/immutable_buffer.rs | 14 +- src/lazy/binary/raw/annotations_iterator.rs | 1 + src/lazy/binary/raw/mod.rs | 1 + src/lazy/binary/raw/reader.rs | 38 +- src/lazy/binary/raw/sequence.rs | 50 +- src/lazy/binary/raw/struct.rs | 36 +- .../binary/raw/v1_1/annotations_iterator.rs | 32 + src/lazy/binary/raw/v1_1/immutable_buffer.rs | 1150 +++++++++++++++++ src/lazy/binary/raw/v1_1/mod.rs | 11 + src/lazy/binary/raw/v1_1/reader.rs | 151 +++ src/lazy/binary/raw/v1_1/sequence.rs | 152 +++ src/lazy/binary/raw/v1_1/struct.rs | 152 +++ src/lazy/binary/raw/v1_1/type_code.rs | 147 +++ src/lazy/binary/raw/v1_1/type_descriptor.rs | 164 +++ src/lazy/binary/raw/v1_1/value.rs | 252 ++++ src/lazy/binary/raw/value.rs | 45 +- src/lazy/encoding.rs | 46 +- src/lazy/never.rs | 10 +- src/lazy/raw_value_ref.rs | 2 +- src/lazy/struct.rs | 6 +- 24 files changed, 2625 insertions(+), 143 deletions(-) create mode 100644 src/lazy/binary/raw/v1_1/annotations_iterator.rs create mode 100644 src/lazy/binary/raw/v1_1/immutable_buffer.rs create mode 100644 src/lazy/binary/raw/v1_1/mod.rs create mode 100644 src/lazy/binary/raw/v1_1/reader.rs create mode 100644 src/lazy/binary/raw/v1_1/sequence.rs create mode 100644 src/lazy/binary/raw/v1_1/struct.rs create mode 100644 src/lazy/binary/raw/v1_1/type_code.rs create mode 100644 src/lazy/binary/raw/v1_1/type_descriptor.rs create mode 100644 src/lazy/binary/raw/v1_1/value.rs diff --git a/examples/lazy_read_all_values.rs b/examples/lazy_read_all_values.rs index 32a794ba..5e17517e 100644 --- a/examples/lazy_read_all_values.rs +++ b/examples/lazy_read_all_values.rs @@ -18,7 +18,7 @@ mod lazy_reader_example { use memmap::MmapOptions; - use ion_rs::lazy::r#struct::LazyBinaryStruct; + use ion_rs::lazy::r#struct::LazyBinaryStruct_1_0 as LazyBinaryStruct; use ion_rs::lazy::reader::LazyBinaryReader; use ion_rs::lazy::value::LazyBinaryValue; use ion_rs::lazy::value_ref::ValueRef; diff --git a/src/binary/constants.rs b/src/binary/constants.rs index a9e7cef7..4780fa10 100644 --- a/src/binary/constants.rs +++ b/src/binary/constants.rs @@ -12,3 +12,14 @@ pub mod v1_0 { pub const VAR_UINT: u8 = 14; } } + +pub mod v1_1 { + /// Ion Version Marker byte sequence + pub const IVM: [u8; 4] = [0xE0, 0x01, 0x01, 0xEA]; + + /// Constants for interpreting the length (`L`) code of binary values + pub mod length_codes { + pub const NULL: u8 = 15; + pub const VAR_UINT: u8 = 14; + } +} diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index dc84069a..a6e89f73 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -6,19 +6,21 @@ use std::ops::Range; use bumpalo::Bump as BumpAllocator; use crate::lazy::any_encoding::RawReaderKind::{Binary_1_0, Text_1_0}; -use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; -use crate::lazy::binary::raw::r#struct::{LazyRawBinaryStruct, RawBinaryStructIterator}; -use crate::lazy::binary::raw::reader::LazyRawBinaryReader; +use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_0; +use crate::lazy::binary::raw::r#struct::{LazyRawBinaryStruct_1_0, RawBinaryStructIterator as RawBinaryStructIterator_1_0}; +use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{ - LazyRawBinaryList, LazyRawBinarySExp, RawBinarySequenceIterator, + LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator as RawBinarySequenceIterator_1_0, }; -use crate::lazy::binary::raw::value::LazyRawBinaryValue; +use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; use crate::lazy::decoder::{ LazyDecoder, LazyRawFieldExpr, LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, LazyRawValueExpr, RawFieldExpr, RawValueExpr, }; -use crate::lazy::encoding::{BinaryEncoding_1_0, TextEncoding_1_0, TextEncoding_1_1}; +use crate::lazy::encoding::{ + BinaryEncoding_1_0, BinaryEncoding_1_1, TextEncoding_1_0, TextEncoding_1_1, +}; use crate::lazy::expanded::macro_evaluator::RawEExpression; use crate::lazy::never::Never; use crate::lazy::raw_stream_item::LazyRawStreamItem; @@ -28,9 +30,16 @@ use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::lazy::text::raw::sequence::{ LazyRawTextList_1_0, LazyRawTextSExp_1_0, RawTextListIterator_1_0, RawTextSExpIterator_1_0, }; +use crate::lazy::binary::raw::v1_1::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1; +use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; +use crate::lazy::binary::raw::v1_1::r#struct::{LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1}; +use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; +use crate::lazy::binary::raw::v1_1::sequence::{ + LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1, RawBinarySequenceIterator as RawBinarySequenceIterator_1_1 +}; use crate::lazy::text::raw::v1_1::reader::{ - LazyRawTextList_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, MacroIdRef, - RawTextEExpression_1_1, RawTextSequenceCacheIterator_1_1, RawTextStructCacheIterator_1_1, + LazyRawTextList_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, MacroIdRef, RawTextEExpression_1_1, + RawTextSequenceCacheIterator_1_1, RawTextStructCacheIterator_1_1, }; use crate::lazy::text::value::{ LazyRawTextValue_1_0, LazyRawTextValue_1_1, RawTextAnnotationsIterator, @@ -67,6 +76,7 @@ enum LazyRawAnyEExpressionKind<'top> { Text_1_0(Never), Binary_1_0(Never), Text_1_1(RawTextEExpression_1_1<'top>), + Binary_1_1(Never), } impl<'top> From> for LazyRawAnyEExpression<'top> { @@ -85,6 +95,7 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { LazyRawAnyEExpressionKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), LazyRawAnyEExpressionKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), LazyRawAnyEExpressionKind::Text_1_1(ref m) => m.id(), + LazyRawAnyEExpressionKind::Binary_1_1(_) => unimplemented!("macro in binary Ion 1.1 not implemented"), } } @@ -95,6 +106,7 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { LazyRawAnyEExpressionKind::Text_1_1(m) => LazyRawAnyMacroArgsIterator { encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), }, + LazyRawAnyEExpressionKind::Binary_1_1(_) => unimplemented!("macro in binary Ion 1.0 not implemented"), } } } @@ -145,7 +157,7 @@ impl<'data> LazyRawAnyReader<'data> { match data { &[0xE0, 0x01, 0x00, 0xEA, ..] => RawReaderType::Binary_1_0, - // TODO: Binary Ion 1.1 + &[0xE0, 0x01, 0x01, 0xEA, ..] => RawReaderType::Binary_1_0, _ => RawReaderType::Text_1_0, } } @@ -153,7 +165,8 @@ impl<'data> LazyRawAnyReader<'data> { pub enum RawReaderKind<'data> { Text_1_0(LazyRawTextReader_1_0<'data>), - Binary_1_0(LazyRawBinaryReader<'data>), + Binary_1_0(LazyRawBinaryReader_1_0<'data>), + Binary_1_1(LazyRawBinaryReader_1_1<'data>), } #[derive(Default, Copy, Clone)] @@ -163,7 +176,7 @@ pub enum RawReaderType { #[default] Text_1_0, Binary_1_0, - // TODO: v1.1 + Binary_1_1, } impl<'data> From> for LazyRawAnyReader<'data> { @@ -174,14 +187,22 @@ impl<'data> From> for LazyRawAnyReader<'data> { } } -impl<'data> From> for LazyRawAnyReader<'data> { - fn from(reader: LazyRawBinaryReader<'data>) -> Self { +impl<'data> From> for LazyRawAnyReader<'data> { + fn from(reader: LazyRawBinaryReader_1_0<'data>) -> Self { LazyRawAnyReader { encoding: Binary_1_0(reader), } } } +impl<'data> From> for LazyRawAnyReader<'data> { + fn from(reader: LazyRawBinaryReader_1_1<'data>) -> Self { + LazyRawAnyReader { + encoding: RawReaderKind::Binary_1_1(reader), + } + } +} + impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { fn new(data: &'data [u8]) -> Self { let reader_type = Self::detect_encoding(data); @@ -203,7 +224,10 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { LazyRawTextReader_1_0::resume_at_offset(data, offset, ()).into() } RawReaderType::Binary_1_0 => { - LazyRawBinaryReader::resume_at_offset(data, offset, ()).into() + LazyRawBinaryReader_1_0::resume_at_offset(data, offset, ()).into() + } + RawReaderType::Binary_1_1 => { + LazyRawBinaryReader_1_1::resume_at_offset(data, offset, ()).into() } } } @@ -215,9 +239,11 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { where 'data: 'top, { + use RawReaderKind::*; match &mut self.encoding { Text_1_0(r) => Ok(r.next(allocator)?.into()), Binary_1_0(r) => Ok(r.next()?.into()), + Binary_1_1(r) => Ok(r.next()?.into()), } } @@ -227,13 +253,16 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { match &self.encoding { Text_1_0(_) => RawReaderType::Text_1_0, Binary_1_0(_) => RawReaderType::Binary_1_0, + Binary_1_1(_) => RawReaderType::Binary_1_1, } } fn position(&self) -> usize { + use RawReaderKind::*; match &self.encoding { Text_1_0(r) => r.position(), Binary_1_0(r) => r.position(), + Binary_1_1(r) => r.position(), } } } @@ -248,8 +277,9 @@ pub struct LazyRawAnyValue<'top> { #[derive(Debug, Copy, Clone)] pub enum LazyRawValueKind<'top> { Text_1_0(LazyRawTextValue_1_0<'top>), - Binary_1_0(LazyRawBinaryValue<'top>), + Binary_1_0(LazyRawBinaryValue_1_0<'top>), Text_1_1(LazyRawTextValue_1_1<'top>), + Binary_1_1(LazyRawBinaryValue_1_1<'top>), } impl<'top> From> for LazyRawAnyValue<'top> { @@ -260,8 +290,10 @@ impl<'top> From> for LazyRawAnyValue<'top> { } } -impl<'top> From> for LazyRawAnyValue<'top> { - fn from(value: LazyRawBinaryValue<'top>) -> Self { +// TODO: Can we have a function to convert E to LazyRawValueKind? + +impl<'top> From> for LazyRawAnyValue<'top> { + fn from(value: LazyRawBinaryValue_1_0<'top>) -> Self { LazyRawAnyValue { encoding: LazyRawValueKind::Binary_1_0(value), } @@ -276,6 +308,14 @@ impl<'top> From> for LazyRawAnyValue<'top> { } } +impl<'top> From> for LazyRawAnyValue<'top> { + fn from(value: LazyRawBinaryValue_1_1<'top>) -> Self { + LazyRawAnyValue { + encoding: LazyRawValueKind::Binary_1_1(value), + } + } +} + impl<'top> From> for LazyRawValueExpr<'top, AnyEncoding> { fn from(value: LazyRawValueExpr<'top, TextEncoding_1_0>) -> Self { match value { @@ -320,6 +360,22 @@ impl<'top> From> for LazyRawValueExpr<' } } +impl<'top> From> + for LazyRawValueExpr<'top, AnyEncoding> +{ + fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_1>) -> Self { + match value { + RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), + RawValueExpr::MacroInvocation(m) => { + let invocation = LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Binary_1_1(m), + }; + RawValueExpr::MacroInvocation(invocation) + } + } + } +} + impl<'top> From> for RawValueRef<'top, AnyEncoding> { fn from(value: RawValueRef<'top, TextEncoding_1_0>) -> Self { use RawValueRef::*; @@ -383,6 +439,27 @@ impl<'top> From> for RawValueRef<'top, AnyEn } } +impl<'top> From> for RawValueRef<'top, AnyEncoding> { + fn from(value: RawValueRef<'top, BinaryEncoding_1_1>) -> Self { + use RawValueRef::*; + match value { + Null(ion_type) => Null(ion_type), + Bool(value) => Bool(value), + Int(value) => Int(value), + Float(value) => Float(value), + Decimal(value) => Decimal(value), + Timestamp(value) => Timestamp(value), + String(value) => String(value), + Symbol(value) => Symbol(value), + Blob(value) => Blob(value), + Clob(value) => Clob(value), + SExp(value) => SExp(value.into()), + List(value) => List(value.into()), + Struct(value) => Struct(value.into()), + } + } +} + impl<'top> From> for LazyRawStreamItem<'top, AnyEncoding> { @@ -448,6 +525,27 @@ impl<'top> From> } } +impl<'top> From> + for LazyRawStreamItem<'top, AnyEncoding> +{ + fn from(value: LazyRawStreamItem<'top, BinaryEncoding_1_1>) -> Self { + match value { + LazyRawStreamItem::::VersionMarker(major, minor) => { + LazyRawStreamItem::::VersionMarker(major, minor) + } + LazyRawStreamItem::::Value(value) => { + LazyRawStreamItem::::Value(value.into()) + } + LazyRawStreamItem::::EExpression(_) => { + unreachable!("Ion 1.0 does not support macro invocations") + } + LazyRawStreamItem::::EndOfStream => { + LazyRawStreamItem::::EndOfStream + } + } + } +} + impl<'top> LazyRawValuePrivate<'top> for LazyRawAnyValue<'top> { fn field_name(&self) -> IonResult> { use LazyRawValueKind::*; @@ -455,6 +553,7 @@ impl<'top> LazyRawValuePrivate<'top> for LazyRawAnyValue<'top> { Text_1_0(v) => v.field_name(), Binary_1_0(v) => v.field_name(), Text_1_1(v) => v.field_name(), + Binary_1_1(v) => v.field_name(), } } } @@ -466,6 +565,7 @@ impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { Text_1_0(v) => v.ion_type(), Binary_1_0(v) => v.ion_type(), Text_1_1(v) => v.ion_type(), + Binary_1_1(v) => v.ion_type(), } } @@ -475,6 +575,7 @@ impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { Text_1_0(v) => v.is_null(), Binary_1_0(v) => v.is_null(), Text_1_1(v) => v.is_null(), + Binary_1_1(v) => v.is_null(), } } @@ -490,6 +591,9 @@ impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { Text_1_1(v) => RawAnyAnnotationsIterator { encoding: RawAnnotationsIteratorKind::Text_1_1(v.annotations()), }, + Binary_1_1(v) => RawAnyAnnotationsIterator { + encoding: RawAnnotationsIteratorKind::Binary_1_1(v.annotations()), + }, } } @@ -499,6 +603,7 @@ impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { Text_1_0(v) => Ok(v.read()?.into()), Binary_1_0(v) => Ok(v.read()?.into()), Text_1_1(v) => Ok(v.read()?.into()), + Binary_1_1(v) => Ok(v.read()?.into()), } } @@ -508,6 +613,7 @@ impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { Text_1_0(v) => v.range(), Binary_1_0(v) => v.range(), Text_1_1(v) => v.range(), + Binary_1_1(v) => v.range(), } } @@ -517,6 +623,7 @@ impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { Text_1_0(v) => v.span(), Binary_1_0(v) => v.span(), Text_1_1(v) => v.span(), + Binary_1_1(v) => v.span(), } } } @@ -529,8 +636,9 @@ pub struct RawAnyAnnotationsIterator<'top> { pub enum RawAnnotationsIteratorKind<'top> { Text_1_0(RawTextAnnotationsIterator<'top>), - Binary_1_0(RawBinaryAnnotationsIterator<'top>), + Binary_1_0(RawBinaryAnnotationsIterator_1_0<'top>), Text_1_1(RawTextAnnotationsIterator<'top>), + Binary_1_1(RawBinaryAnnotationsIterator_1_1<'top>), } impl<'top> Iterator for RawAnyAnnotationsIterator<'top> { @@ -541,6 +649,7 @@ impl<'top> Iterator for RawAnyAnnotationsIterator<'top> { RawAnnotationsIteratorKind::Text_1_0(i) => i.next(), RawAnnotationsIteratorKind::Binary_1_0(i) => i.next(), RawAnnotationsIteratorKind::Text_1_1(i) => i.next(), + RawAnnotationsIteratorKind::Binary_1_1(i) => i.next(), } } } @@ -555,8 +664,9 @@ pub struct LazyRawAnyList<'top> { #[derive(Debug, Copy, Clone)] pub enum LazyRawListKind<'top> { Text_1_0(LazyRawTextList_1_0<'top>), - Binary_1_0(LazyRawBinaryList<'top>), + Binary_1_0(LazyRawBinaryList_1_0<'top>), Text_1_1(LazyRawTextList_1_1<'top>), + Binary_1_1(LazyRawBinaryList_1_1<'top>), } impl<'top> LazyContainerPrivate<'top, AnyEncoding> for LazyRawAnyList<'top> { @@ -566,11 +676,14 @@ impl<'top> LazyContainerPrivate<'top, AnyEncoding> for LazyRawAnyList<'top> { encoding: LazyRawListKind::Text_1_0(LazyRawTextList_1_0::from_value(v)), }, LazyRawValueKind::Binary_1_0(v) => LazyRawAnyList { - encoding: LazyRawListKind::Binary_1_0(LazyRawBinaryList::from_value(v)), + encoding: LazyRawListKind::Binary_1_0(LazyRawBinaryList_1_0::from_value(v)), }, LazyRawValueKind::Text_1_1(v) => LazyRawAnyList { encoding: LazyRawListKind::Text_1_1(LazyRawTextList_1_1::from_value(v)), }, + LazyRawValueKind::Binary_1_1(v) => LazyRawAnyList { + encoding: LazyRawListKind::Binary_1_1(LazyRawBinaryList_1_1::from_value(v)), + }, } } } @@ -581,8 +694,9 @@ pub struct RawAnyListIterator<'data> { pub enum RawAnyListIteratorKind<'data> { Text_1_0(RawTextListIterator_1_0<'data>), - Binary_1_0(RawBinarySequenceIterator<'data>), + Binary_1_0(RawBinarySequenceIterator_1_0<'data>), Text_1_1(RawTextSequenceCacheIterator_1_1<'data>), + Binary_1_1(RawBinarySequenceIterator_1_1<'data>), } impl<'data> Iterator for RawAnyListIterator<'data> { @@ -599,6 +713,9 @@ impl<'data> Iterator for RawAnyListIterator<'data> { RawAnyListIteratorKind::Text_1_1(i) => i .next() .map(|value_result| value_result.map(|value| value.into())), + RawAnyListIteratorKind::Binary_1_1(i) => i + .next() + .map(|value_result| value_result.map(|value| value.into())), } } } @@ -615,6 +732,7 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnyList<'top> { LazyRawListKind::Text_1_0(s) => s.ion_type(), LazyRawListKind::Binary_1_0(s) => s.ion_type(), LazyRawListKind::Text_1_1(s) => s.ion_type(), + LazyRawListKind::Binary_1_1(s) => s.ion_type(), } } @@ -629,6 +747,9 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnyList<'top> { LazyRawListKind::Text_1_1(s) => RawAnyListIterator { encoding: RawAnyListIteratorKind::Text_1_1(s.iter()), }, + LazyRawListKind::Binary_1_1(s) => RawAnyListIterator { + encoding: RawAnyListIteratorKind::Binary_1_1(s.iter()), + }, } } @@ -637,6 +758,7 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnyList<'top> { LazyRawListKind::Text_1_0(s) => s.as_value().into(), LazyRawListKind::Binary_1_0(s) => s.as_value().into(), LazyRawListKind::Text_1_1(s) => s.as_value().into(), + LazyRawListKind::Binary_1_1(s) => s.as_value().into(), } } } @@ -649,8 +771,8 @@ impl<'data> From> for LazyRawAnyList<'data> { } } -impl<'data> From> for LazyRawAnyList<'data> { - fn from(value: LazyRawBinaryList<'data>) -> Self { +impl<'data> From> for LazyRawAnyList<'data> { + fn from(value: LazyRawBinaryList_1_0<'data>) -> Self { LazyRawAnyList { encoding: LazyRawListKind::Binary_1_0(value), } @@ -665,6 +787,14 @@ impl<'data> From> for LazyRawAnyList<'data> { } } +impl<'data> From> for LazyRawAnyList<'data> { + fn from(value: LazyRawBinaryList_1_1<'data>) -> Self { + LazyRawAnyList { + encoding: LazyRawListKind::Binary_1_1(value), + } + } +} + // ===== SExps ===== #[derive(Debug, Copy, Clone)] @@ -675,8 +805,9 @@ pub struct LazyRawAnySExp<'data> { #[derive(Debug, Copy, Clone)] pub enum LazyRawSExpKind<'data> { Text_1_0(LazyRawTextSExp_1_0<'data>), - Binary_1_0(LazyRawBinarySExp<'data>), + Binary_1_0(LazyRawBinarySExp_1_0<'data>), Text_1_1(LazyRawTextSExp_1_1<'data>), + Binary_1_1(LazyRawBinarySExp_1_1<'data>), } impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnySExp<'data> { @@ -686,11 +817,14 @@ impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnySExp<'data> { encoding: LazyRawSExpKind::Text_1_0(LazyRawTextSExp_1_0::from_value(v)), }, LazyRawValueKind::Binary_1_0(v) => LazyRawAnySExp { - encoding: LazyRawSExpKind::Binary_1_0(LazyRawBinarySExp::from_value(v)), + encoding: LazyRawSExpKind::Binary_1_0(LazyRawBinarySExp_1_0::from_value(v)), }, LazyRawValueKind::Text_1_1(v) => LazyRawAnySExp { encoding: LazyRawSExpKind::Text_1_1(LazyRawTextSExp_1_1::from_value(v)), }, + LazyRawValueKind::Binary_1_1(v) => LazyRawAnySExp { + encoding: LazyRawSExpKind::Binary_1_1(LazyRawBinarySExp_1_1::from_value(v)), + } } } } @@ -701,8 +835,9 @@ pub struct RawAnySExpIterator<'data> { pub enum RawAnySExpIteratorKind<'data> { Text_1_0(RawTextSExpIterator_1_0<'data>), - Binary_1_0(RawBinarySequenceIterator<'data>), + Binary_1_0(RawBinarySequenceIterator_1_0<'data>), Text_1_1(RawTextSequenceCacheIterator_1_1<'data>), + Binary_1_1(RawBinarySequenceIterator_1_1<'data>), } impl<'data> Iterator for RawAnySExpIterator<'data> { @@ -719,6 +854,9 @@ impl<'data> Iterator for RawAnySExpIterator<'data> { RawAnySExpIteratorKind::Text_1_1(i) => i .next() .map(|value_result| value_result.map(|value| value.into())), + RawAnySExpIteratorKind::Binary_1_1(i) => i + .next() + .map(|value_result| value_result.map(|value| value.into())), } } } @@ -735,6 +873,7 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnySExp<'top> { LazyRawSExpKind::Text_1_0(s) => s.ion_type(), LazyRawSExpKind::Binary_1_0(s) => s.ion_type(), LazyRawSExpKind::Text_1_1(s) => s.ion_type(), + LazyRawSExpKind::Binary_1_1(s) => s.ion_type(), } } @@ -749,6 +888,9 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnySExp<'top> { LazyRawSExpKind::Text_1_1(s) => RawAnySExpIterator { encoding: RawAnySExpIteratorKind::Text_1_1(s.iter()), }, + LazyRawSExpKind::Binary_1_1(s) => RawAnySExpIterator { + encoding: RawAnySExpIteratorKind::Binary_1_1(s.iter()), + }, } } @@ -757,6 +899,7 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnySExp<'top> { LazyRawSExpKind::Text_1_0(s) => (s.as_value()).into(), LazyRawSExpKind::Binary_1_0(s) => (s.as_value()).into(), LazyRawSExpKind::Text_1_1(s) => (s.as_value()).into(), + LazyRawSExpKind::Binary_1_1(s) => (s.as_value()).into(), } } } @@ -769,8 +912,8 @@ impl<'data> From> for LazyRawAnySExp<'data> { } } -impl<'data> From> for LazyRawAnySExp<'data> { - fn from(value: LazyRawBinarySExp<'data>) -> Self { +impl<'data> From> for LazyRawAnySExp<'data> { + fn from(value: LazyRawBinarySExp_1_0<'data>) -> Self { LazyRawAnySExp { encoding: LazyRawSExpKind::Binary_1_0(value), } @@ -785,6 +928,14 @@ impl<'data> From> for LazyRawAnySExp<'data> { } } +impl<'data> From> for LazyRawAnySExp<'data> { + fn from(value: LazyRawBinarySExp_1_1<'data>) -> Self { + LazyRawAnySExp { + encoding: LazyRawSExpKind::Binary_1_1(value), + } + } +} + // ===== Structs ===== #[derive(Debug, Copy, Clone)] @@ -795,8 +946,9 @@ pub struct LazyRawAnyStruct<'data> { #[derive(Debug, Copy, Clone)] pub enum LazyRawStructKind<'data> { Text_1_0(LazyRawTextStruct_1_0<'data>), - Binary_1_0(LazyRawBinaryStruct<'data>), + Binary_1_0(LazyRawBinaryStruct_1_0<'data>), Text_1_1(LazyRawTextStruct_1_1<'data>), + Binary_1_1(LazyRawBinaryStruct_1_1<'data>), } pub struct RawAnyStructIterator<'data> { @@ -805,8 +957,9 @@ pub struct RawAnyStructIterator<'data> { pub enum RawAnyStructIteratorKind<'data> { Text_1_0(RawTextStructIterator_1_0<'data>), - Binary_1_0(RawBinaryStructIterator<'data>), + Binary_1_0(RawBinaryStructIterator_1_0<'data>), Text_1_1(RawTextStructCacheIterator_1_1<'data>), + Binary_1_1(RawBinaryStructIterator_1_1<'data>), } impl<'data> Iterator for RawAnyStructIterator<'data> { @@ -823,6 +976,9 @@ impl<'data> Iterator for RawAnyStructIterator<'data> { RawAnyStructIteratorKind::Text_1_1(i) => i .next() .map(|field_result| field_result.map(|field| field.into())), + RawAnyStructIteratorKind::Binary_1_1(i) => i + .next() + .map(|field_result| field_result.map(|field| field.into())), } } } @@ -875,6 +1031,21 @@ impl<'data> From> } } +impl<'data> From> + for LazyRawFieldExpr<'data, AnyEncoding> +{ + fn from(binary_field: LazyRawFieldExpr<'data, BinaryEncoding_1_1>) -> Self { + let (name, value) = match binary_field { + RawFieldExpr::NameValuePair(name, value) => (name, value), + RawFieldExpr::MacroInvocation(_) => { + unimplemented!("macro invocation in Ion 1.1 binary not implemented") + } + }; + // Convert the binary-encoded value into an any-encoded value + RawFieldExpr::NameValuePair(name, value.into()) + } +} + impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnyStruct<'data> { fn from_value(value: LazyRawAnyValue<'data>) -> Self { match value.encoding { @@ -882,11 +1053,14 @@ impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnyStruct<'data> encoding: LazyRawStructKind::Text_1_0(LazyRawTextStruct_1_0::from_value(v)), }, LazyRawValueKind::Binary_1_0(v) => LazyRawAnyStruct { - encoding: LazyRawStructKind::Binary_1_0(LazyRawBinaryStruct::from_value(v)), + encoding: LazyRawStructKind::Binary_1_0(LazyRawBinaryStruct_1_0::from_value(v)), }, LazyRawValueKind::Text_1_1(v) => LazyRawAnyStruct { encoding: LazyRawStructKind::Text_1_1(LazyRawTextStruct_1_1::from_value(v)), }, + LazyRawValueKind::Binary_1_1(v) => LazyRawAnyStruct { + encoding: LazyRawStructKind::Binary_1_1(LazyRawBinaryStruct_1_1::from_value(v)), + }, } } } @@ -905,6 +1079,9 @@ impl<'top> LazyRawStruct<'top, AnyEncoding> for LazyRawAnyStruct<'top> { LazyRawStructKind::Text_1_1(s) => RawAnyAnnotationsIterator { encoding: RawAnnotationsIteratorKind::Text_1_1(s.annotations()), }, + LazyRawStructKind::Binary_1_1(s) => RawAnyAnnotationsIterator { + encoding: RawAnnotationsIteratorKind::Binary_1_1(s.annotations()), + }, } } @@ -919,6 +1096,9 @@ impl<'top> LazyRawStruct<'top, AnyEncoding> for LazyRawAnyStruct<'top> { LazyRawStructKind::Text_1_1(s) => RawAnyStructIterator { encoding: RawAnyStructIteratorKind::Text_1_1(s.iter()), }, + LazyRawStructKind::Binary_1_1(s) => RawAnyStructIterator { + encoding: RawAnyStructIteratorKind::Binary_1_1(s.iter()), + }, } } } @@ -931,8 +1111,8 @@ impl<'data> From> for LazyRawAnyStruct<'data> { } } -impl<'data> From> for LazyRawAnyStruct<'data> { - fn from(value: LazyRawBinaryStruct<'data>) -> Self { +impl<'data> From> for LazyRawAnyStruct<'data> { + fn from(value: LazyRawBinaryStruct_1_0<'data>) -> Self { LazyRawAnyStruct { encoding: LazyRawStructKind::Binary_1_0(value), } @@ -947,6 +1127,14 @@ impl<'data> From> for LazyRawAnyStruct<'data> { } } +impl<'data> From> for LazyRawAnyStruct<'data> { + fn from(value: LazyRawBinaryStruct_1_1<'data>) -> Self { + LazyRawAnyStruct { + encoding: LazyRawStructKind::Binary_1_1(value), + } + } +} + impl<'data> IntoIterator for LazyRawAnyStruct<'data> { type Item = IonResult>; type IntoIter = RawAnyStructIterator<'data>; diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index 9b9d72d2..7d9b48e6 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,8 +1,39 @@ use crate::binary::non_blocking::type_descriptor::Header; +use crate::lazy::decoder::LazyDecoder; +use crate::lazy::encoding::BinaryEncoding; use crate::types::SymbolId; use crate::IonType; use std::ops::Range; +pub(crate) trait EncodedHeader: Copy { + type TypeCode; + fn ion_type(&self) -> IonType; + fn type_code(&self) -> Self::TypeCode; + fn length_code(&self) -> u8; + + fn is_null(&self) -> bool; +} + +impl EncodedHeader for Header { + type TypeCode = crate::binary::type_code::IonTypeCode; + + fn ion_type(&self) -> IonType { + self.ion_type + } + + fn type_code(&self) -> Self::TypeCode { + self.ion_type_code + } + + fn length_code(&self) -> u8 { + self.length_code + } + + fn is_null(&self) -> bool { + self.is_null() + } +} + /// Represents the type, offset, and length metadata of the various components of an encoded value /// in an input stream. /// @@ -10,7 +41,7 @@ use std::ops::Range; /// allowing a user to re-read (that is: parse) the body of the value as many times as necessary /// without re-parsing its header information each time. #[derive(Clone, Copy, Debug, PartialEq)] -pub(crate) struct EncodedValue { +pub(crate) struct EncodedValue { // If the compiler decides that a value is too large to be moved/copied with inline code, // it will relocate the value using memcpy instead. This can be quite slow by comparison. // @@ -22,7 +53,7 @@ pub(crate) struct EncodedValue { // The type descriptor byte that identified this value; includes the type code, length code, // and IonType. - pub(crate) header: Header, + pub(crate) header: HeaderType, // Each encoded value has up to five components, appearing in the following order: // @@ -69,8 +100,8 @@ pub(crate) struct EncodedValue { pub total_length: usize, } -impl EncodedValue { - pub fn header(&self) -> Header { +impl EncodedValue { + pub fn header(&self) -> HeaderType { self.header } @@ -222,7 +253,7 @@ impl EncodedValue { } pub fn ion_type(&self) -> IonType { - self.header.ion_type + self.header.ion_type() } } diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 177413c2..98f89666 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -7,7 +7,7 @@ use crate::binary::uint::DecodedUInt; use crate::binary::var_int::VarInt; use crate::binary::var_uint::VarUInt; use crate::lazy::binary::encoded_value::EncodedValue; -use crate::lazy::binary::raw::value::LazyRawBinaryValue; +use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::result::IonFailure; @@ -609,7 +609,7 @@ impl<'a> ImmutableBuffer<'a> { } /// Reads a field ID and a value from the buffer. - pub(crate) fn peek_field(self) -> IonResult>> { + pub(crate) fn peek_field(self) -> IonResult>> { let mut input = self; if self.is_empty() { // We're at the end of the struct @@ -689,7 +689,7 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions, /// and at the top level. - pub(crate) fn peek_sequence_value(self) -> IonResult>> { + pub(crate) fn peek_sequence_value(self) -> IonResult>> { if self.is_empty() { return Ok(None); } @@ -711,7 +711,7 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that /// the next byte (`type_descriptor`) is not a NOP. - fn read_value(self, type_descriptor: TypeDescriptor) -> IonResult> { + fn read_value(self, type_descriptor: TypeDescriptor) -> IonResult> { if type_descriptor.is_annotation_wrapper() { self.read_annotated_value(type_descriptor) } else { @@ -724,7 +724,7 @@ impl<'a> ImmutableBuffer<'a> { fn read_value_without_annotations( self, type_descriptor: TypeDescriptor, - ) -> IonResult> { + ) -> IonResult> { let input = self; let header = type_descriptor .to_header() @@ -758,7 +758,7 @@ impl<'a> ImmutableBuffer<'a> { value_length, total_length, }; - let lazy_value = LazyRawBinaryValue { + let lazy_value = LazyRawBinaryValue_1_0 { encoded_value, // If this value has a field ID or annotations, this will be replaced by the caller. input: self, @@ -771,7 +771,7 @@ impl<'a> ImmutableBuffer<'a> { fn read_annotated_value( self, mut type_descriptor: TypeDescriptor, - ) -> IonResult> { + ) -> IonResult> { let input = self; let (wrapper, input_after_annotations) = input.read_annotations_wrapper(type_descriptor)?; type_descriptor = input_after_annotations.peek_type_descriptor()?; diff --git a/src/lazy/binary/raw/annotations_iterator.rs b/src/lazy/binary/raw/annotations_iterator.rs index 58e6df55..aab3d4f9 100644 --- a/src/lazy/binary/raw/annotations_iterator.rs +++ b/src/lazy/binary/raw/annotations_iterator.rs @@ -1,4 +1,5 @@ use crate::lazy::binary::immutable_buffer::ImmutableBuffer; +use crate::lazy::encoding::BinaryEncoding; use crate::{IonResult, RawSymbolTokenRef}; /// Iterates over a slice of bytes, lazily reading them as a sequence of VarUInt symbol IDs. diff --git a/src/lazy/binary/raw/mod.rs b/src/lazy/binary/raw/mod.rs index 3df82f4d..fc879621 100644 --- a/src/lazy/binary/raw/mod.rs +++ b/src/lazy/binary/raw/mod.rs @@ -2,4 +2,5 @@ pub mod annotations_iterator; pub mod reader; pub mod sequence; pub mod r#struct; +pub mod v1_1; pub mod value; diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index 1056cb15..4eb5dd4f 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -1,7 +1,9 @@ +#![allow(non_camel_case_types)] + use crate::lazy::binary::immutable_buffer::ImmutableBuffer; -use crate::lazy::binary::raw::value::LazyRawBinaryValue; +use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::{LazyDecoder, LazyRawReader}; -use crate::lazy::encoding::BinaryEncoding_1_0; +use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_0}; use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; use crate::IonResult; @@ -10,13 +12,13 @@ use bumpalo::Bump as BumpAllocator; /// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue`]s representing the top level values found /// in the provided input stream. -pub struct LazyRawBinaryReader<'data> { +pub struct LazyRawBinaryReader_1_0<'data> { data: DataSource<'data>, } -impl<'data> LazyRawBinaryReader<'data> { +impl<'data> LazyRawBinaryReader_1_0<'data> { /// Constructs a `LazyRawReader` positioned at the beginning of the provided input stream. - pub fn new(data: &'data [u8]) -> LazyRawBinaryReader<'data> { + pub fn new(data: &'data [u8]) -> LazyRawBinaryReader_1_0<'data> { Self::new_with_offset(data, 0) } @@ -24,9 +26,9 @@ impl<'data> LazyRawBinaryReader<'data> { /// The provided input stream is itself a slice starting `offset` bytes from the beginning /// of a larger data stream. This offset is used for reporting the absolute (stream-level) /// position of values encountered in `data`. - fn new_with_offset(data: &'data [u8], offset: usize) -> LazyRawBinaryReader<'data> { + fn new_with_offset(data: &'data [u8], offset: usize) -> LazyRawBinaryReader_1_0<'data> { let data = DataSource::new(ImmutableBuffer::new_with_offset(data, offset)); - LazyRawBinaryReader { data } + Self { data } } /// Helper method called by [`Self::next`]. Reads the current stream item as an Ion version @@ -96,13 +98,13 @@ impl<'data> LazyRawBinaryReader<'data> { } } -impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader<'data> { +impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader_1_0<'data> { fn resume_at_offset( data: &'data [u8], offset: usize, _config: ::ReaderSavedState, ) -> Self { - LazyRawBinaryReader { + LazyRawBinaryReader_1_0 { data: DataSource { buffer: ImmutableBuffer::new_with_offset(data, offset), bytes_to_skip: 0, @@ -167,11 +169,11 @@ impl<'data> DataSource<'data> { /// that were consumed. /// If it does not succeed, the `DataSource` remains unchanged. pub(crate) fn try_parse_next< - F: Fn(ImmutableBuffer<'data>) -> IonResult>>, + F: Fn(ImmutableBuffer<'data>) -> IonResult>>, >( &mut self, parser: F, - ) -> IonResult>> { + ) -> IonResult>> { let buffer = self.advance_to_next_item()?; let lazy_value = match parser(buffer) { @@ -190,7 +192,7 @@ impl<'data> DataSource<'data> { #[cfg(test)] mod tests { - use crate::lazy::binary::raw::reader::LazyRawBinaryReader; + use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::test_utilities::to_binary_ion; use crate::lazy::raw_stream_item::RawStreamItem; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; @@ -204,7 +206,7 @@ mod tests { {name:"hi", name: "hello"} "#, )?; - let mut reader = LazyRawBinaryReader::new(data); + let mut reader = LazyRawBinaryReader_1_0::new(data); let _ivm = reader.next()?.expect_ivm()?; let value = reader.next()?.expect_value()?; let lazy_struct = value.read()?.expect_struct()?; @@ -222,7 +224,7 @@ mod tests { [1, true, foo] "#, )?; - let mut reader = LazyRawBinaryReader::new(data); + let mut reader = LazyRawBinaryReader_1_0::new(data); let _ivm = reader.next()?.expect_ivm()?; let _symbol_table = reader.next()?.expect_value()?; let lazy_list = reader.next()?.expect_value()?.read()?.expect_list()?; @@ -266,7 +268,7 @@ mod tests { {name:"hi", name: "hello"} "#, )?; - let mut reader = LazyRawBinaryReader::new(data); + let mut reader = LazyRawBinaryReader_1_0::new(data); loop { use RawStreamItem::*; match reader.next()? { @@ -289,7 +291,7 @@ mod tests { foo::bar::baz::7 "#, )?; - let mut reader = LazyRawBinaryReader::new(data); + let mut reader = LazyRawBinaryReader_1_0::new(data); let _ivm = reader.next()?.expect_ivm()?; // Read annotations from $ion_symbol_table::{...} @@ -324,7 +326,7 @@ mod tests { 0x0f, // null ]; - let mut reader = LazyRawBinaryReader::new(&data); + let mut reader = LazyRawBinaryReader_1_0::new(&data); let _ivm = reader.next()?.expect_ivm()?; assert_eq!( @@ -346,7 +348,7 @@ mod tests { 0x0f, // null ]; - let mut reader = LazyRawBinaryReader::new(&data); + let mut reader = LazyRawBinaryReader_1_0::new(&data); let _ivm = reader.next()?.expect_ivm()?; let _ivm = reader.next()?.expect_ivm()?; diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs index 1fb86705..cf47a2fb 100644 --- a/src/lazy/binary/raw/sequence.rs +++ b/src/lazy/binary/raw/sequence.rs @@ -1,32 +1,34 @@ +#![allow(non_camel_case_types)] + use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; use crate::lazy::binary::raw::reader::DataSource; -use crate::lazy::binary::raw::value::LazyRawBinaryValue; +use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{LazyRawSequence, LazyRawValueExpr, RawValueExpr}; -use crate::lazy::encoding::BinaryEncoding_1_0; +use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_0}; use crate::{IonResult, IonType}; use std::fmt::{Debug, Formatter}; #[derive(Debug, Copy, Clone)] -pub struct LazyRawBinaryList<'top> { - pub(crate) sequence: LazyRawBinarySequence<'top>, +pub struct LazyRawBinaryList_1_0<'top> { + pub(crate) sequence: LazyRawBinarySequence_1_0<'top>, } #[derive(Debug, Copy, Clone)] -pub struct LazyRawBinarySExp<'top> { - pub(crate) sequence: LazyRawBinarySequence<'top>, +pub struct LazyRawBinarySExp_1_0<'top> { + pub(crate) sequence: LazyRawBinarySequence_1_0<'top>, } -impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryList<'top> { - fn from_value(value: LazyRawBinaryValue<'top>) -> Self { - LazyRawBinaryList { - sequence: LazyRawBinarySequence { value }, +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryList_1_0<'top> { + fn from_value(value: LazyRawBinaryValue_1_0<'top>) -> Self { + LazyRawBinaryList_1_0 { + sequence: LazyRawBinarySequence_1_0 { value }, } } } -impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinaryList<'top> { +impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinaryList_1_0<'top> { type Iterator = RawBinarySequenceIterator<'top>; fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { @@ -41,20 +43,20 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinaryList<'top> self.sequence.iter() } - fn as_value(&self) -> LazyRawBinaryValue<'top> { + fn as_value(&self) -> LazyRawBinaryValue_1_0<'top> { self.sequence.value } } -impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinarySExp<'top> { - fn from_value(value: LazyRawBinaryValue<'top>) -> Self { - LazyRawBinarySExp { - sequence: LazyRawBinarySequence { value }, +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_1_0<'top> { + fn from_value(value: LazyRawBinaryValue_1_0<'top>) -> Self { + LazyRawBinarySExp_1_0 { + sequence: LazyRawBinarySequence_1_0 { value }, } } } -impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinarySExp<'top> { +impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_1_0<'top> { type Iterator = RawBinarySequenceIterator<'top>; fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { @@ -69,17 +71,17 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinarySExp<'top> self.sequence.iter() } - fn as_value(&self) -> LazyRawBinaryValue<'top> { + fn as_value(&self) -> LazyRawBinaryValue_1_0<'top> { self.sequence.value } } #[derive(Copy, Clone)] -pub struct LazyRawBinarySequence<'top> { - pub(crate) value: LazyRawBinaryValue<'top>, +pub struct LazyRawBinarySequence_1_0<'top> { + pub(crate) value: LazyRawBinaryValue_1_0<'top>, } -impl<'top> LazyRawBinarySequence<'top> { +impl<'top> LazyRawBinarySequence_1_0<'top> { pub fn ion_type(&self) -> IonType { self.value.ion_type() } @@ -92,7 +94,7 @@ impl<'top> LazyRawBinarySequence<'top> { } } -impl<'a, 'top> IntoIterator for &'a LazyRawBinarySequence<'top> { +impl<'a, 'top> IntoIterator for &'a LazyRawBinarySequence_1_0<'top> { type Item = IonResult>; type IntoIter = RawBinarySequenceIterator<'top>; @@ -101,7 +103,7 @@ impl<'a, 'top> IntoIterator for &'a LazyRawBinarySequence<'top> { } } -impl<'a> Debug for LazyRawBinarySequence<'a> { +impl<'a> Debug for LazyRawBinarySequence_1_0<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self.value.encoded_value.ion_type() { IonType::SExp => { @@ -143,7 +145,7 @@ impl<'top> Iterator for RawBinarySequenceIterator<'top> { fn next(&mut self) -> Option { match self .source - .try_parse_next(ImmutableBuffer::peek_sequence_value) + .try_parse_next(ImmutableBuffer::<'top>::peek_sequence_value) { Ok(Some(value)) => Some(Ok(RawValueExpr::ValueLiteral(value))), Ok(None) => None, diff --git a/src/lazy/binary/raw/struct.rs b/src/lazy/binary/raw/struct.rs index 39cf3702..91fbc20b 100644 --- a/src/lazy/binary/raw/struct.rs +++ b/src/lazy/binary/raw/struct.rs @@ -1,25 +1,27 @@ +#![allow(non_camel_case_types)] + use std::fmt; use std::fmt::{Debug, Formatter}; use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; use crate::lazy::binary::raw::reader::DataSource; -use crate::lazy::binary::raw::value::LazyRawBinaryValue; +use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::private::{ LazyContainerPrivate, LazyRawFieldPrivate, LazyRawValuePrivate, }; use crate::lazy::decoder::{ LazyRawField, LazyRawFieldExpr, LazyRawStruct, RawFieldExpr, RawValueExpr, }; -use crate::lazy::encoding::BinaryEncoding_1_0; +use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_0}; use crate::{IonResult, RawSymbolTokenRef}; #[derive(Copy, Clone)] -pub struct LazyRawBinaryStruct<'top> { - pub(crate) value: LazyRawBinaryValue<'top>, +pub struct LazyRawBinaryStruct_1_0<'top> { + pub(crate) value: LazyRawBinaryValue_1_0<'top>, } -impl<'a, 'top> IntoIterator for &'a LazyRawBinaryStruct<'top> { +impl<'a, 'top> IntoIterator for &'a LazyRawBinaryStruct_1_0<'top> { type Item = IonResult>; type IntoIter = RawBinaryStructIterator<'top>; @@ -28,7 +30,7 @@ impl<'a, 'top> IntoIterator for &'a LazyRawBinaryStruct<'top> { } } -impl<'top> Debug for LazyRawBinaryStruct<'top> { +impl<'top> Debug for LazyRawBinaryStruct_1_0<'top> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "{{")?; for field in self { @@ -41,7 +43,7 @@ impl<'top> Debug for LazyRawBinaryStruct<'top> { } } -impl<'top> LazyRawBinaryStruct<'top> { +impl<'top> LazyRawBinaryStruct_1_0<'top> { fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.value.annotations() } @@ -54,13 +56,13 @@ impl<'top> LazyRawBinaryStruct<'top> { } } -impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct<'top> { - fn from_value(value: LazyRawBinaryValue<'top>) -> Self { - LazyRawBinaryStruct { value } +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct_1_0<'top> { + fn from_value(value: LazyRawBinaryValue_1_0<'top>) -> Self { + LazyRawBinaryStruct_1_0 { value } } } -impl<'top> LazyRawStruct<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct<'top> { +impl<'top> LazyRawStruct<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct_1_0<'top> { type Iterator = RawBinaryStructIterator<'top>; fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { @@ -101,11 +103,11 @@ impl<'top> Iterator for RawBinaryStructIterator<'top> { #[derive(Copy, Clone)] pub struct LazyRawBinaryField<'top> { - pub(crate) value: LazyRawBinaryValue<'top>, + pub(crate) value: LazyRawBinaryValue_1_0<'top>, } impl<'top> LazyRawBinaryField<'top> { - pub(crate) fn new(value: LazyRawBinaryValue<'top>) -> Self { + pub(crate) fn new(value: LazyRawBinaryValue_1_0<'top>) -> Self { LazyRawBinaryField { value } } @@ -115,17 +117,17 @@ impl<'top> LazyRawBinaryField<'top> { RawSymbolTokenRef::SymbolId(field_id) } - pub fn value(&self) -> LazyRawBinaryValue<'top> { + pub fn value(&self) -> LazyRawBinaryValue_1_0<'top> { self.value } - pub(crate) fn into_value(self) -> LazyRawBinaryValue<'top> { + pub(crate) fn into_value(self) -> LazyRawBinaryValue_1_0<'top> { self.value } } impl<'top> LazyRawFieldPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryField<'top> { - fn into_value(self) -> LazyRawBinaryValue<'top> { + fn into_value(self) -> LazyRawBinaryValue_1_0<'top> { self.value } } @@ -135,7 +137,7 @@ impl<'top> LazyRawField<'top, BinaryEncoding_1_0> for LazyRawBinaryField<'top> { LazyRawBinaryField::name(self) } - fn value(&self) -> LazyRawBinaryValue<'top> { + fn value(&self) -> LazyRawBinaryValue_1_0<'top> { self.value() } } diff --git a/src/lazy/binary/raw/v1_1/annotations_iterator.rs b/src/lazy/binary/raw/v1_1/annotations_iterator.rs new file mode 100644 index 00000000..d9da1475 --- /dev/null +++ b/src/lazy/binary/raw/v1_1/annotations_iterator.rs @@ -0,0 +1,32 @@ +use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; +use crate::{IonResult, RawSymbolTokenRef}; + +/// Iterates over a slice of bytes, lazily reading them as a sequence of VarUInt symbol IDs. +pub struct RawBinaryAnnotationsIterator<'a> { + buffer: ImmutableBuffer<'a>, +} + +impl<'a> RawBinaryAnnotationsIterator<'a> { + pub(crate) fn new(buffer: ImmutableBuffer<'a>) -> RawBinaryAnnotationsIterator<'a> { + RawBinaryAnnotationsIterator { buffer } + } +} + +impl<'a> Iterator for RawBinaryAnnotationsIterator<'a> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + if self.buffer.is_empty() { + return None; + } + // TODO: If the VarUInt doesn't end before the annotations sequence does (i.e. the stream is + // malformed, this will surface an `Incomplete` instead of a more descriptive error. + let (var_uint, buffer_after_var_uint) = match self.buffer.read_var_uint() { + Ok(output) => output, + Err(error) => return Some(Err(error)), + }; + let symbol_id = RawSymbolTokenRef::SymbolId(var_uint.value()); + self.buffer = buffer_after_var_uint; + Some(Ok(symbol_id)) + } +} diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs new file mode 100644 index 00000000..5bafed85 --- /dev/null +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -0,0 +1,1150 @@ +use crate::binary::constants::v1_1::{length_codes, IVM}; +use crate::binary::int::DecodedInt; +// use crate::binary::non_blocking::type_descriptor::{ +// Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS, +// }; +use crate::binary::uint::DecodedUInt; +use crate::binary::var_int::VarInt; +use crate::binary::var_uint::VarUInt; +use crate::lazy::binary::encoded_value::EncodedValue; +use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; +use crate::lazy::binary::raw::v1_1::{Header, TypeDescriptor, ION_1_1_TYPE_DESCRIPTORS}; +use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; +use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; +use crate::result::IonFailure; +use crate::types::UInt; +use crate::{Int, IonError, IonResult, IonType}; +use num_bigint::{BigInt, BigUint, Sign}; +use std::fmt::{Debug, Formatter}; +use std::mem; + +// This limit is used for stack-allocating buffer space to encode/decode UInts. +const UINT_STACK_BUFFER_SIZE: usize = 16; +// This number was chosen somewhat arbitrarily and could be lifted if a use case demands it. +const MAX_UINT_SIZE_IN_BYTES: usize = 2048; + +// This limit is used for stack-allocating buffer space to encode/decode Ints. +const INT_STACK_BUFFER_SIZE: usize = 16; +// This number was chosen somewhat arbitrarily and could be lifted if a use case demands it. +const MAX_INT_SIZE_IN_BYTES: usize = 2048; + +/// A buffer of unsigned bytes that can be cheaply copied and which defines methods for parsing +/// the various encoding elements of a binary Ion stream. +/// +/// Upon success, each parsing method on the `ImmutableBuffer` will return the value that was read +/// and a copy of the `ImmutableBuffer` that starts _after_ the bytes that were parsed. +/// +/// Methods that `peek` at the input stream do not return a copy of the buffer. +#[derive(PartialEq, Clone, Copy)] +pub struct ImmutableBuffer<'a> { + // `data` is a slice of remaining data in the larger input stream. + // `offset` is the position in the overall input stream where that slice begins. + // + // input: 00 01 02 03 04 05 06 07 08 09 + // └────┬────┘ + // data: &[u8] + // offset: 6 + data: &'a [u8], + offset: usize, + + // Each time something is parsed from the buffer successfully, the caller will mark the number + // of bytes that may be skipped the next time `advance_to_next_item` is called. + pub bytes_to_skip: usize, +} + +impl<'a> Debug for ImmutableBuffer<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "ImmutableBuffer {{")?; + for byte in self.bytes().iter().take(16) { + write!(f, "{:x?} ", *byte)?; + } + write!(f, "}}") + } +} + +pub(crate) type ParseResult<'a, T> = IonResult<(T, ImmutableBuffer<'a>)>; + +impl<'a> ImmutableBuffer<'a> { + /// Constructs a new `ImmutableBuffer` that wraps `data`. + #[inline] + pub fn new(data: &[u8]) -> ImmutableBuffer { + Self::new_with_offset(data, 0) + } + + pub fn new_with_offset(data: &[u8], offset: usize) -> ImmutableBuffer { + ImmutableBuffer { + data, + offset, + bytes_to_skip: 0, + } + } + + /// Returns a slice containing all of the buffer's bytes. + pub fn bytes(&self) -> &[u8] { + self.data + } + + /// Gets a slice from the buffer starting at `offset` and ending at `offset + length`. + /// The caller must check that the buffer contains `length + offset` bytes prior + /// to calling this method. + pub fn bytes_range(&self, offset: usize, length: usize) -> &'a [u8] { + &self.data[offset..offset + length] + } + + /// Like [`Self::bytes_range`] above, but returns an updated copy of the [`ImmutableBuffer`] + /// instead of a `&[u8]`. + pub fn slice(&self, offset: usize, length: usize) -> ImmutableBuffer<'a> { + ImmutableBuffer { + data: self.bytes_range(offset, length), + offset: self.offset + offset, + bytes_to_skip: 0, + } + } + + /// Returns the number of bytes between the start of the original input byte array and the + /// subslice of that byte array that this `ImmutableBuffer` represents. + pub fn offset(&self) -> usize { + self.offset + } + + /// Returns the number of bytes in the buffer. + pub fn len(&self) -> usize { + self.data.len() + } + + /// Returns `true` if there are no bytes in the buffer. Otherwise, returns `false`. + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// If the buffer is not empty, returns `Some(_)` containing the next byte in the buffer. + /// Otherwise, returns `None`. + pub fn peek_next_byte(&self) -> Option { + self.data.first().copied() + } + + /// If there are at least `n` bytes left in the buffer, returns `Some(_)` containing a slice + /// with the first `n` bytes. Otherwise, returns `None`. + pub fn peek_n_bytes(&self, n: usize) -> Option<&'a [u8]> { + self.data.get(..n) + } + + /// Creates a copy of this `ImmutableBuffer` that begins `num_bytes_to_consume` further into the + /// slice. + #[inline] + pub fn consume(&self, num_bytes_to_consume: usize) -> Self { + // This assertion is always run during testing but is removed in the release build. + debug_assert!(num_bytes_to_consume <= self.len()); + Self { + data: &self.data[num_bytes_to_consume..], + offset: self.offset + num_bytes_to_consume, + bytes_to_skip: 0, + } + } + + /// Reads the first byte in the buffer and returns it as a [TypeDescriptor]. + #[inline] + pub(crate) fn peek_type_descriptor(&self) -> IonResult { + if self.is_empty() { + return IonResult::incomplete("a type descriptor", self.offset()); + } + let next_byte = self.data[0]; + Ok(ION_1_1_TYPE_DESCRIPTORS[next_byte as usize]) + } + + /// Reads the first four bytes in the buffer as an Ion version marker. If it is successful, + /// returns an `Ok(_)` containing a `(major, minor)` version tuple. + /// + /// See: + pub fn read_ivm(self) -> ParseResult<'a, (u8, u8)> { + let bytes = self + .peek_n_bytes(IVM.len()) + .ok_or_else(|| IonError::incomplete("an IVM", self.offset()))?; + + match bytes { + [0xE0, major, minor, 0xEA] => { + let version = (*major, *minor); + Ok((version, self.consume(IVM.len()))) + } + invalid_ivm => IonResult::decoding_error(format!("invalid IVM: {invalid_ivm:?}")), + } + } + + /// Reads a [`FlexInt`] from the buffer. + pub fn read_flex_int(self) -> ParseResult<'a, FlexInt> { + let flex_int = FlexInt::read(self.bytes(), self.offset())?; + let remaining = self.consume(flex_int.size_in_bytes()); + Ok((flex_int, remaining)) + } + + /// Reads a [`FlexUInt`] from the buffer. + #[inline] + pub fn read_flex_uint(self) -> ParseResult<'a, FlexUInt> { + let flex_uint = FlexUInt::read(self.bytes(), self.offset())?; + let remaining = self.consume(flex_uint.size_in_bytes()); + Ok((flex_uint, remaining)) + } + + /// Reads a `VarUInt` encoding primitive from the beginning of the buffer. If it is successful, + /// returns an `Ok(_)` containing its [VarUInt] representation. + /// + /// See: + #[inline] + pub fn read_var_uint(self) -> ParseResult<'a, VarUInt> { + const LOWER_7_BITMASK: u8 = 0b0111_1111; + const HIGHEST_BIT_VALUE: u8 = 0b1000_0000; + + // Reading a `VarUInt` is one of the hottest paths in the binary 1.0 reader. + // Because `VarUInt`s represent struct field names, annotations, and value lengths, + // smaller values are more common than larger values. As an optimization, we have a + // dedicated code path for the decoding of 1- and 2-byte VarUInts. This allows the logic + // for the most common cases to be inlined and the logic for the less common cases + // (including errors) to be a function call. + + let data = self.bytes(); + // The 'fast path' first checks whether we have at least two bytes available. This allows us + // to do a single length check on the fast path. If there's one byte in the buffer that + // happens to be a complete VarUInt (a very rare occurrence), it will still be handled by + // `read_var_uint_slow()`. + if data.len() >= 2 { + let first_byte = data[0]; + let mut magnitude = (LOWER_7_BITMASK & first_byte) as usize; + let num_bytes = if first_byte >= HIGHEST_BIT_VALUE { + 1 + } else { + let second_byte = data[1]; + if second_byte < HIGHEST_BIT_VALUE { + return self.read_var_uint_slow(); + } + let lower_seven = (LOWER_7_BITMASK & second_byte) as usize; + magnitude <<= 7; + magnitude |= lower_seven; + 2 + }; + return Ok((VarUInt::new(magnitude, num_bytes), self.consume(num_bytes))); + } + + // All other VarUInt sizes and error cases (incomplete data, oversized, etc) are handled by + // this more general decoding loop. + self.read_var_uint_slow() + } + + #[cold] + pub fn read_var_uint_slow(self) -> ParseResult<'a, VarUInt> { + const BITS_PER_ENCODED_BYTE: usize = 7; + const STORAGE_SIZE_IN_BITS: usize = mem::size_of::() * 8; + const MAX_ENCODED_SIZE_IN_BYTES: usize = STORAGE_SIZE_IN_BITS / BITS_PER_ENCODED_BYTE; + + const LOWER_7_BITMASK: u8 = 0b0111_1111; + const HIGHEST_BIT_VALUE: u8 = 0b1000_0000; + + let mut magnitude: usize = 0; + let mut encoded_size_in_bytes = 0; + + for byte in self.bytes().iter().copied() { + encoded_size_in_bytes += 1; + magnitude <<= 7; // Shifts 0 to 0 in the first iteration + let lower_seven = (LOWER_7_BITMASK & byte) as usize; + magnitude |= lower_seven; + if byte >= HIGHEST_BIT_VALUE { + // This is the final byte. + // Make sure we haven't exceeded the configured maximum size + if encoded_size_in_bytes > MAX_ENCODED_SIZE_IN_BYTES { + return Self::value_too_large( + "a VarUInt", + encoded_size_in_bytes, + MAX_ENCODED_SIZE_IN_BYTES, + ); + } + return Ok(( + VarUInt::new(magnitude, encoded_size_in_bytes), + self.consume(encoded_size_in_bytes), + )); + } + } + + IonResult::incomplete("a VarUInt", self.offset() + encoded_size_in_bytes) + } + + /// Reads a `VarInt` encoding primitive from the beginning of the buffer. If it is successful, + /// returns an `Ok(_)` containing its [VarInt] representation. + /// + /// See: + pub fn read_var_int(self) -> ParseResult<'a, VarInt> { + const BITS_PER_ENCODED_BYTE: usize = 7; + const STORAGE_SIZE_IN_BITS: usize = mem::size_of::() * 8; + const MAX_ENCODED_SIZE_IN_BYTES: usize = STORAGE_SIZE_IN_BITS / BITS_PER_ENCODED_BYTE; + + const LOWER_6_BITMASK: u8 = 0b0011_1111; + const LOWER_7_BITMASK: u8 = 0b0111_1111; + const HIGHEST_BIT_VALUE: u8 = 0b1000_0000; + + const BITS_PER_BYTE: usize = 8; + const BITS_PER_U64: usize = mem::size_of::() * BITS_PER_BYTE; + + // Unlike VarUInt's encoding, the first byte in a VarInt is a special case because + // bit #6 (0-indexed, from the right) indicates whether the value is positive (0) or + // negative (1). + + if self.is_empty() { + return IonResult::incomplete("a VarInt", self.offset()); + } + let first_byte: u8 = self.peek_next_byte().unwrap(); + let no_more_bytes: bool = first_byte >= 0b1000_0000; // If the first bit is 1, we're done. + let is_negative: bool = (first_byte & 0b0100_0000) == 0b0100_0000; + let sign: i64 = if is_negative { -1 } else { 1 }; + let mut magnitude = (first_byte & 0b0011_1111) as i64; + + if no_more_bytes { + return Ok(( + VarInt::new(magnitude * sign, is_negative, 1), + self.consume(1), + )); + } + + let mut encoded_size_in_bytes = 1; + // Whether we found the terminating byte in this buffer. + let mut terminated = false; + + for byte in self.bytes()[1..].iter().copied() { + let lower_seven = (0b0111_1111 & byte) as i64; + magnitude <<= 7; + magnitude |= lower_seven; + encoded_size_in_bytes += 1; + if byte >= 0b1000_0000 { + terminated = true; + break; + } + } + + if !terminated { + return IonResult::incomplete("a VarInt", self.offset() + encoded_size_in_bytes); + } + + if encoded_size_in_bytes > MAX_ENCODED_SIZE_IN_BYTES { + return IonResult::decoding_error(format!( + "Found a {encoded_size_in_bytes}-byte VarInt. Max supported size is {MAX_ENCODED_SIZE_IN_BYTES} bytes." + )); + } + + Ok(( + VarInt::new(magnitude * sign, is_negative, encoded_size_in_bytes), + self.consume(encoded_size_in_bytes), + )) + } + + /// Reads the first `length` bytes from the buffer as a `UInt` encoding primitive. If it is + /// successful, returns an `Ok(_)` containing its [DecodedUInt] representation. + /// + /// See: + pub fn read_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> { + if length <= mem::size_of::() { + return self.read_small_uint(length); + } + + // The UInt is too large to fit in a u64; read it as a BigUInt instead. + self.read_big_uint(length) + } + + /// Reads the first `length` bytes from the buffer as a `UInt`. The caller must confirm that + /// `length` is small enough to fit in a `u64`. + #[inline] + fn read_small_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> { + let uint_bytes = self + .peek_n_bytes(length) + .ok_or_else(|| IonError::incomplete("a UInt", self.offset()))?; + let magnitude = DecodedUInt::small_uint_from_slice(uint_bytes); + Ok(( + DecodedUInt::new(UInt::from(magnitude), length), + self.consume(length), + )) + } + + /// Reads the first `length` bytes from the buffer as a `UInt`. If `length` is small enough + /// that the value can fit in a `usize`, it is strongly recommended that you use + /// `read_small_uint` instead as it will be much faster. + #[inline(never)] + // This method performs allocations and its generated assembly is rather large. Isolating its + // logic in a separate method that is never inlined keeps `read_uint` (its caller) small enough + // to inline. This is important as `read_uint` is on the hot path for most Ion streams. + fn read_big_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> { + if length > MAX_UINT_SIZE_IN_BYTES { + return Self::value_too_large("a Uint", length, MAX_UINT_SIZE_IN_BYTES); + } + + let uint_bytes = self + .peek_n_bytes(length) + .ok_or_else(|| IonError::incomplete("a UInt", self.offset()))?; + + let magnitude = BigUint::from_bytes_be(uint_bytes); + Ok(( + DecodedUInt::new(UInt::from(magnitude), length), + self.consume(length), + )) + } + + #[inline(never)] + // This method is inline(never) because it is rarely invoked and its allocations/formatting + // compile to a non-trivial number of instructions. + fn value_too_large(label: &str, length: usize, max_length: usize) -> IonResult { + IonResult::decoding_error(format!( + "found {label} that was too large; size = {length}, max size = {max_length}" + )) + } + + /// Reads the first `length` bytes from the buffer as an `Int` encoding primitive. If it is + /// successful, returns an `Ok(_)` containing its [DecodedInt] representation and consumes the + /// source bytes. + /// + /// See: + pub fn read_int(self, length: usize) -> ParseResult<'a, DecodedInt> { + if length == 0 { + return Ok((DecodedInt::new(0, false, 0), self.consume(0))); + } else if length > MAX_INT_SIZE_IN_BYTES { + return IonResult::decoding_error(format!( + "Found a {length}-byte Int. Max supported size is {MAX_INT_SIZE_IN_BYTES} bytes." + )); + } + + let int_bytes = self + .peek_n_bytes(length) + .ok_or_else(|| IonError::incomplete("an Int encoding primitive", self.offset()))?; + + let mut is_negative: bool = false; + + let value: Int = if length <= mem::size_of::() { + // This Int will fit in an i64. + let first_byte: i64 = i64::from(int_bytes[0]); + let sign: i64 = if first_byte & 0b1000_0000 == 0 { + 1 + } else { + is_negative = true; + -1 + }; + let mut magnitude: i64 = first_byte & 0b0111_1111; + for &byte in &int_bytes[1..] { + let byte = i64::from(byte); + magnitude <<= 8; + magnitude |= byte; + } + (sign * magnitude).into() + } else { + // This Int is too big for an i64, we'll need to use a BigInt + let value = if int_bytes[0] & 0b1000_0000 == 0 { + BigInt::from_bytes_be(Sign::Plus, int_bytes) + } else { + is_negative = true; + // The leading sign bit is the only part of the input that can't be considered + // unsigned, big-endian integer bytes. We need to make our own copy of the input + // so we can flip that bit back to a zero before calling `from_bytes_be`. + let mut owned_int_bytes = Vec::from(int_bytes); + owned_int_bytes[0] &= 0b0111_1111; + BigInt::from_bytes_be(Sign::Minus, owned_int_bytes.as_slice()) + }; + + value.into() + }; + Ok(( + DecodedInt::new(value, is_negative, length), + self.consume(length), + )) + } + + /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning + /// its subfields in an [`AnnotationsWrapper`]. + pub fn read_annotations_wrapper( + &self, + type_descriptor: TypeDescriptor, + ) -> ParseResult<'a, AnnotationsWrapper> { + // Consume the first byte; its contents are already in the `type_descriptor` parameter. + let input_after_type_descriptor = self.consume(1); + + // Read the combined length of the annotations sequence and the value that follows it + let (annotations_and_value_length, input_after_combined_length) = + match type_descriptor.length_code { + length_codes::NULL => (0, input_after_type_descriptor), + length_codes::VAR_UINT => { + let (var_uint, input) = input_after_type_descriptor.read_var_uint()?; + (var_uint.value(), input) + } + length => (length as usize, input_after_type_descriptor), + }; + + // Read the length of the annotations sequence + let (annotations_length, input_after_annotations_length) = + input_after_combined_length.read_var_uint()?; + + // Validate that the annotations sequence is not empty. + if annotations_length.value() == 0 { + return IonResult::decoding_error("found an annotations wrapper with no annotations"); + } + + println!( + "Annotations and value lengths: {}", + annotations_and_value_length + ); + println!("Annotations length: {}", annotations_length.size_in_bytes()); + println!( + "Annotations length .. Value: {}", + annotations_length.value() + ); + // Validate that the annotated value is not missing. + let expected_value_length = annotations_and_value_length + - annotations_length.size_in_bytes() + - annotations_length.value(); + + if expected_value_length == 0 { + return IonResult::decoding_error("found an annotation wrapper with no value"); + } + + // Skip over the annotations sequence itself; the reader will return to it if/when the + // reader asks to iterate over those symbol IDs. + let final_input = input_after_annotations_length.consume(annotations_length.value()); + + // Here, `self` is the (immutable) buffer we started with. Comparing it with `input` + // gets us the before-and-after we need to calculate the size of the header. + let annotations_header_length = final_input.offset() - self.offset(); + let annotations_header_length = u8::try_from(annotations_header_length).map_err(|_e| { + IonError::decoding_error("found an annotations header greater than 255 bytes long") + })?; + + let annotations_sequence_length = + u8::try_from(annotations_length.value()).map_err(|_e| { + IonError::decoding_error( + "found an annotations sequence greater than 255 bytes long", + ) + })?; + + let wrapper = AnnotationsWrapper { + header_length: annotations_header_length, + sequence_length: annotations_sequence_length, + expected_value_length, + }; + + Ok((wrapper, final_input)) + } + + /// Reads a `NOP` encoding primitive from the buffer. If it is successful, returns an `Ok(_)` + /// containing the number of bytes that were consumed. + /// + /// See: + #[inline(never)] + // NOP padding is not widely used in Ion 1.0, in part because many writer implementations do not + // expose the ability to write them. As such, this method has been marked `inline(never)` to + // allow the hot path to be better optimized. + pub fn read_nop_pad(self) -> ParseResult<'a, usize> { + let type_descriptor = self.peek_type_descriptor()?; + println!("type descriptor: {:?}", type_descriptor); + // Advance beyond the type descriptor + let remaining = self.consume(1); + // If the type descriptor says we should skip more bytes, skip them. + let (length, remaining) = remaining.read_length(type_descriptor.length_code)?; + if remaining.len() < length.value() { + return IonResult::incomplete("a NOP", remaining.offset()); + } + let remaining = remaining.consume(length.value()); + let total_nop_pad_size = 1 + length.size_in_bytes() + length.value(); + Ok((total_nop_pad_size, remaining)) + } + + /// Calls [`Self::read_nop_pad`] in a loop until the buffer is empty or a type descriptor + /// is encountered that is not a NOP. + #[inline(never)] + // NOP padding is not widely used in Ion 1.0. This method is annotated with `inline(never)` + // to avoid the compiler bloating other methods on the hot path with its rarely used + // instructions. + pub fn consume_nop_padding(self, mut type_descriptor: TypeDescriptor) -> ParseResult<'a, ()> { + let mut buffer = self; + // Skip over any number of NOP regions + while type_descriptor.is_nop() { + let (_, buffer_after_nop) = buffer.read_nop_pad()?; + buffer = buffer_after_nop; + if buffer.is_empty() { + break; + } + type_descriptor = buffer.peek_type_descriptor()? + } + Ok(((), buffer)) + } + + /// Interprets the length code in the provided [Header]; if necessary, will read more bytes + /// from the buffer to interpret as the value's length. If it is successful, returns an `Ok(_)` + /// containing a [VarUInt] representation of the value's length. If no additional bytes were + /// read, the returned `VarUInt`'s `size_in_bytes()` method will return `0`. + pub fn read_value_length(self, header: Header) -> ParseResult<'a, VarUInt> { + use IonType::*; + // Some type-specific `length` field overrides + let length_code = match header.ion_type { + // Null (0x0F) and Boolean (0x10, 0x11) are the only types that don't have/use a `length` + // field; the header contains the complete value. + Null | Bool => 0, + // If a struct has length = 1, its fields are ordered and the actual length follows. + // For the time being, this reader does not have any special handling for this case. + // Use `0xE` (14) as the length code instead so the call to `read_length` below + // consumes a VarUInt. + Struct if header.length_code == 1 => length_codes::VAR_UINT, + // For any other type, use the header's declared length code. + _ => header.length_code, + }; + + // Read the length, potentially consuming a VarUInt in the process. + let (length, remaining) = self.read_length(length_code)?; + + // After we get the length, perform some type-specific validation. + match header.ion_type { + Float => match header.length_code { + 0 | 4 | 8 | 15 => {} + _ => return IonResult::decoding_error("found a float with an illegal length code"), + }, + Timestamp if !header.is_null() && length.value() <= 1 => { + return IonResult::decoding_error("found a timestamp with length <= 1") + } + Struct if header.length_code == 1 && length.value() == 0 => { + return IonResult::decoding_error("found an empty ordered struct") + } + _ => {} + }; + + Ok((length, remaining)) + } + + /// Interprets a type descriptor's `L` nibble (length) in the way used by most Ion types. + /// + /// If `L` is... + /// * `f`: the value is a typed `null` and its length is `0`. + /// * `e`: the length is encoded as a `VarUInt` that follows the type descriptor. + /// * anything else: the `L` represents the actual length. + /// + /// If successful, returns an `Ok(_)` that contains the [VarUInt] representation + /// of the value's length. + pub fn read_length(self, length_code: u8) -> ParseResult<'a, VarUInt> { + let length = match length_code { + length_codes::NULL => VarUInt::new(0, 0), + length_codes::VAR_UINT => return self.read_var_uint(), + magnitude => VarUInt::new(magnitude as usize, 0), + }; + + // If we reach this point, the length was in the header byte and no additional bytes were + // consumed + Ok((length, self)) + } + + /// Reads a field ID and a value from the buffer. + pub(crate) fn peek_field(self) -> IonResult>> { + let mut input = self; + if self.is_empty() { + // We're at the end of the struct + return Ok(None); + } + // Read the field ID + let (mut field_id_var_uint, mut input_after_field_id) = input.read_var_uint()?; + if input_after_field_id.is_empty() { + return IonResult::incomplete( + "found field name but no value", + input_after_field_id.offset(), + ); + } + + let mut type_descriptor = input_after_field_id.peek_type_descriptor()?; + if type_descriptor.is_nop() { + // Read past NOP fields until we find the first one that's an actual value + // or we run out of struct bytes. Note that we read the NOP field(s) from `self` (the + // initial input) rather than `input_after_field_id` because it simplifies + // the logic of `read_struct_field_nop_pad()`, which is very rarely called. + (field_id_var_uint, input_after_field_id) = match input.read_struct_field_nop_pad()? { + None => { + // There are no more fields, we're at the end of the struct. + return Ok(None); + } + Some((nop_length, field_id_var_uint, input_after_field_id)) => { + // Advance `input` beyond the NOP so that when we store it in the value it begins + // with the field ID. + input = input.consume(nop_length); + type_descriptor = input_after_field_id.peek_type_descriptor()?; + (field_id_var_uint, input_after_field_id) + } + }; + } + + let field_id_length = field_id_var_uint.size_in_bytes() as u8; + let field_id = field_id_var_uint.value(); + + let mut value = input_after_field_id.read_value(type_descriptor)?; + value.encoded_value.field_id = Some(field_id); + value.encoded_value.field_id_length = field_id_length; + value.encoded_value.total_length += field_id_length as usize; + value.input = input; + Ok(Some(value)) + } + + #[cold] + /// Consumes (field ID, NOP pad) pairs until a non-NOP value is encountered in field position or + /// the buffer is empty. Returns a buffer starting at the field ID before the non-NOP value. + fn read_struct_field_nop_pad(self) -> IonResult)>> { + let mut input_before_field_id = self; + loop { + if input_before_field_id.is_empty() { + return Ok(None); + } + let (field_id_var_uint, input_after_field_id) = + input_before_field_id.read_var_uint()?; + // If we're out of data (i.e. there's no field value) the struct is incomplete. + if input_after_field_id.is_empty() { + return IonResult::incomplete( + "found a field name but no value", + input_after_field_id.offset(), + ); + } + // Peek at the next value header. If it's a NOP, we need to repeat the process. + if input_after_field_id.peek_type_descriptor()?.is_nop() { + // Consume the NOP to position the buffer at the beginning of the next field ID. + (_, input_before_field_id) = input_after_field_id.read_nop_pad()?; + } else { + // If it isn't a NOP, return the field ID and the buffer slice containing the field + // value. + let nop_length = input_before_field_id.offset() - self.offset(); + return Ok(Some((nop_length, field_id_var_uint, input_after_field_id))); + } + } + } + + /// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions, + /// and at the top level. + pub(crate) fn peek_sequence_value(self) -> IonResult>> { + if self.is_empty() { + return Ok(None); + } + let mut input = self; + let mut type_descriptor = input.peek_type_descriptor()?; + // If we find a NOP... + if type_descriptor.is_nop() { + // ...skip through NOPs until we found the next non-NOP byte. + (_, input) = self.consume_nop_padding(type_descriptor)?; + // If there is no next byte, we're out of values. + if input.is_empty() { + return Ok(None); + } + // Otherwise, there's a value. + type_descriptor = input.peek_type_descriptor()?; + } + Ok(Some(input.read_value(type_descriptor)?)) + } + + /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that + /// the next byte (`type_descriptor`) is not a NOP. + fn read_value(self, type_descriptor: TypeDescriptor) -> IonResult> { + if type_descriptor.is_annotation_wrapper() { + self.read_annotated_value(type_descriptor) + } else { + self.read_value_without_annotations(type_descriptor) + } + } + + /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that + /// the next byte (`type_descriptor`) is neither a NOP nor an annotations wrapper. + fn read_value_without_annotations( + self, + type_descriptor: TypeDescriptor, + ) -> IonResult> { + let input = self; + let header = type_descriptor + .to_header() + .ok_or_else(|| IonError::decoding_error("found a non-value in value position"))?; + + let header_offset = input.offset(); + let (length, _) = input.consume(1).read_value_length(header)?; + let length_length = length.size_in_bytes() as u8; + let value_length = length.value(); // ha + let total_length = 1 // Header byte + + length_length as usize + + value_length; + + let encoded_value = EncodedValue { + header, + // If applicable, these are populated by the caller: `peek_field()` + field_id_length: 0, + field_id: None, + // If applicable, these are populated by the caller: `read_annotated_value()` + annotations_header_length: 0, + annotations_sequence_length: 0, + header_offset, + length_length, + value_length, + total_length, + }; + let lazy_value = LazyRawBinaryValue_1_1 { + encoded_value, + // If this value has a field ID or annotations, this will be replaced by the caller. + input: self, + }; + Ok(lazy_value) + } + + /// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm + /// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper. + fn read_annotated_value( + self, + mut type_descriptor: TypeDescriptor, + ) -> IonResult> { + let input = self; + let (wrapper, input_after_annotations) = input.read_annotations_wrapper(type_descriptor)?; + type_descriptor = input_after_annotations.peek_type_descriptor()?; + + // Confirm that the next byte begins a value, not a NOP or another annotations wrapper. + if type_descriptor.is_annotation_wrapper() { + return IonResult::decoding_error( + "found an annotations wrapper inside an annotations wrapper", + ); + } else if type_descriptor.is_nop() { + return IonResult::decoding_error("found a NOP inside an annotations wrapper"); + } + + let mut lazy_value = + input_after_annotations.read_value_without_annotations(type_descriptor)?; + if wrapper.expected_value_length != lazy_value.encoded_value.total_length() { + return IonResult::decoding_error( + "value length did not match length declared by annotations wrapper", + ); + } + + lazy_value.encoded_value.annotations_header_length = wrapper.header_length; + lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length; + lazy_value.encoded_value.total_length += wrapper.header_length as usize; + // Modify the input to include the annotations + lazy_value.input = input; + + Ok(lazy_value) + } + + // DataSource Functionality + + pub(crate) fn advance_to_next_item(&mut self) -> IonResult { + if self.len() < self.bytes_to_skip { + return IonResult::incomplete( + "cannot advance to next item, insufficient data in buffer", + self.offset(), + ); + } + + if self.bytes_to_skip > 0 { + Ok(self.consume(self.bytes_to_skip)) + } else { + Ok(*self) + } + } + + /// Runs the provided parsing function on this DataSource's buffer. + /// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes + /// that were consumed. + /// If it does not succeed, the `DataSource` remains unchanged. + pub(crate) fn try_parse_next IonResult>>>( + &mut self, + parser: F, + ) -> IonResult>> { + let buffer = self.advance_to_next_item()?; + + let lazy_value = match parser(buffer) { + Ok(Some(output)) => output, + Ok(None) => return Ok(None), + Err(e) => return Err(e), + }; + + // If the value we read doesn't start where we began reading, there was a NOP. + let num_nop_bytes = lazy_value.input.offset() - buffer.offset(); + self.consume(num_nop_bytes); + self.bytes_to_skip = lazy_value.encoded_value.total_length(); + Ok(Some(lazy_value)) + } +} + +/// Represents the data found in an Ion 1.0 annotations wrapper. +pub struct AnnotationsWrapper { + pub header_length: u8, + pub sequence_length: u8, + pub expected_value_length: usize, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::IonError; + use num_traits::Num; + + fn input_test>(input: A) { + let input = ImmutableBuffer::new(input.as_ref()); + // We can peek at the first byte... + assert_eq!(input.peek_next_byte(), Some(b'f')); + // ...without modifying the input. Looking at the next 3 bytes still includes 'f'. + assert_eq!(input.peek_n_bytes(3), Some("foo".as_bytes())); + // Advancing the cursor by 1... + let input = input.consume(1); + // ...causes next_byte() to return 'o'. + assert_eq!(input.peek_next_byte(), Some(b'o')); + let input = input.consume(1); + assert_eq!(input.peek_next_byte(), Some(b'o')); + let input = input.consume(1); + assert_eq!(input.peek_n_bytes(2), Some(" b".as_bytes())); + assert_eq!(input.peek_n_bytes(6), Some(" bar b".as_bytes())); + } + + #[test] + fn string_test() { + input_test(String::from("foo bar baz")); + } + + #[test] + fn slice_test() { + input_test("foo bar baz".as_bytes()); + } + + #[test] + fn vec_test() { + input_test(Vec::from("foo bar baz".as_bytes())); + } + + #[test] + fn read_var_uint() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0111_1001, 0b0000_1111, 0b1000_0001]); + let var_uint = buffer.read_var_uint()?.0; + assert_eq!(3, var_uint.size_in_bytes()); + assert_eq!(1_984_385, var_uint.value()); + Ok(()) + } + + #[test] + fn read_var_uint_zero() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b1000_0000]); + let var_uint = buffer.read_var_uint()?.0; + assert_eq!(var_uint.size_in_bytes(), 1); + assert_eq!(var_uint.value(), 0); + Ok(()) + } + + #[test] + fn read_var_uint_two_bytes_max_value() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); + let var_uint = buffer.read_var_uint()?.0; + assert_eq!(var_uint.size_in_bytes(), 2); + assert_eq!(var_uint.value(), 16_383); + Ok(()) + } + + #[test] + fn read_incomplete_var_uint() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0111_1001, 0b0000_1111]); + match buffer.read_var_uint() { + Err(IonError::Incomplete { .. }) => Ok(()), + other => panic!("expected IonError::Incomplete, but found: {other:?}"), + } + } + + #[test] + fn read_var_uint_overflow_detection() { + let buffer = ImmutableBuffer::new(&[ + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b1111_1111, + ]); + buffer + .read_var_uint() + .expect_err("This should have failed due to overflow."); + } + + #[test] + fn read_var_int_zero() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b1000_0000]); + let var_int = buffer.read_var_int()?.0; + assert_eq!(var_int.size_in_bytes(), 1); + assert_eq!(var_int.value(), 0); + Ok(()) + } + + #[test] + fn read_negative_var_int() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0111_1001, 0b0000_1111, 0b1000_0001]); + let var_int = buffer.read_var_int()?.0; + assert_eq!(var_int.size_in_bytes(), 3); + assert_eq!(var_int.value(), -935_809); + Ok(()) + } + + #[test] + fn read_positive_var_int() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0011_1001, 0b0000_1111, 0b1000_0001]); + let var_int = buffer.read_var_int()?.0; + assert_eq!(var_int.size_in_bytes(), 3); + assert_eq!(var_int.value(), 935_809); + Ok(()) + } + + #[test] + fn read_var_int_two_byte_min() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); + let var_int = buffer.read_var_int()?.0; + assert_eq!(var_int.size_in_bytes(), 2); + assert_eq!(var_int.value(), -8_191); + Ok(()) + } + + #[test] + fn read_var_int_two_byte_max() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0011_1111, 0b1111_1111]); + let var_int = buffer.read_var_int()?.0; + assert_eq!(var_int.size_in_bytes(), 2); + assert_eq!(var_int.value(), 8_191); + Ok(()) + } + + #[test] + fn read_var_int_overflow_detection() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[ + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b0111_1111, + 0b1111_1111, + ]); + buffer + .read_var_int() + .expect_err("This should have failed due to overflow."); + Ok(()) + } + + #[test] + fn read_one_byte_uint() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b1000_0000]); + let var_int = buffer.read_uint(buffer.len())?.0; + assert_eq!(var_int.size_in_bytes(), 1); + assert_eq!(var_int.value(), &UInt::from(128u64)); + Ok(()) + } + + #[test] + fn read_two_byte_uint() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); + let var_int = buffer.read_uint(buffer.len())?.0; + assert_eq!(var_int.size_in_bytes(), 2); + assert_eq!(var_int.value(), &UInt::from(32_767u64)); + Ok(()) + } + + #[test] + fn read_three_byte_uint() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0011_1100, 0b1000_0111, 0b1000_0001]); + let var_int = buffer.read_uint(buffer.len())?.0; + assert_eq!(var_int.size_in_bytes(), 3); + assert_eq!(var_int.value(), &UInt::from(3_966_849u64)); + Ok(()) + } + + #[test] + fn test_read_ten_byte_uint() -> IonResult<()> { + let data = vec![0xFFu8; 10]; + let buffer = ImmutableBuffer::new(&data); + let uint = buffer.read_uint(buffer.len())?.0; + assert_eq!(uint.size_in_bytes(), 10); + assert_eq!( + uint.value(), + &UInt::from(BigUint::from_str_radix("ffffffffffffffffffff", 16).unwrap()) + ); + Ok(()) + } + + #[test] + fn test_read_uint_too_large() { + let mut buffer = Vec::with_capacity(MAX_UINT_SIZE_IN_BYTES + 1); + buffer.resize(MAX_UINT_SIZE_IN_BYTES + 1, 1); + let buffer = ImmutableBuffer::new(&buffer); + let _uint = buffer + .read_uint(buffer.len()) + .expect_err("This exceeded the configured max UInt size."); + } + + #[test] + fn read_int_negative_zero() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b1000_0000]); // Negative zero + let int = buffer.read_int(buffer.len())?.0; + assert_eq!(int.size_in_bytes(), 1); + assert_eq!(int.value(), &Int::from(0)); + assert!(int.is_negative_zero()); + Ok(()) + } + + #[test] + fn read_int_positive_zero() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0000_0000]); // Negative zero + let int = buffer.read_int(buffer.len())?.0; + assert_eq!(int.size_in_bytes(), 1); + assert_eq!(int.value(), &Int::from(0)); + assert!(!int.is_negative_zero()); + Ok(()) + } + + #[test] + fn read_int_length_zero() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[]); // Negative zero + let int = buffer.read_int(buffer.len())?.0; + assert_eq!(int.size_in_bytes(), 0); + assert_eq!(int.value(), &Int::from(0)); + assert!(!int.is_negative_zero()); + Ok(()) + } + + #[test] + fn read_two_byte_negative_int() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b1111_1111, 0b1111_1111]); + let int = buffer.read_int(buffer.len())?.0; + assert_eq!(int.size_in_bytes(), 2); + assert_eq!(int.value(), &Int::from(-32_767i64)); + Ok(()) + } + + #[test] + fn read_two_byte_positive_int() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); + let int = buffer.read_int(buffer.len())?.0; + assert_eq!(int.size_in_bytes(), 2); + assert_eq!(int.value(), &Int::from(32_767i64)); + Ok(()) + } + + #[test] + fn read_three_byte_negative_int() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b1011_1100, 0b1000_0111, 0b1000_0001]); + let int = buffer.read_int(buffer.len())?.0; + assert_eq!(int.size_in_bytes(), 3); + assert_eq!(int.value(), &Int::from(-3_966_849i64)); + Ok(()) + } + + #[test] + fn read_three_byte_positive_int() -> IonResult<()> { + let buffer = ImmutableBuffer::new(&[0b0011_1100, 0b1000_0111, 0b1000_0001]); + let int = buffer.read_int(buffer.len())?.0; + assert_eq!(int.size_in_bytes(), 3); + assert_eq!(int.value(), &Int::from(3_966_849i64)); + Ok(()) + } + + #[test] + fn read_int_overflow() -> IonResult<()> { + let data = vec![1; MAX_INT_SIZE_IN_BYTES + 1]; + let buffer = ImmutableBuffer::new(&data); // Negative zero + buffer + .read_int(buffer.len()) + .expect_err("This exceeded the configured max Int size."); + Ok(()) + } +} diff --git a/src/lazy/binary/raw/v1_1/mod.rs b/src/lazy/binary/raw/v1_1/mod.rs new file mode 100644 index 00000000..a27fedd4 --- /dev/null +++ b/src/lazy/binary/raw/v1_1/mod.rs @@ -0,0 +1,11 @@ +mod annotations_iterator; +pub use annotations_iterator::*; +pub mod immutable_buffer; +pub mod reader; +pub mod sequence; +pub mod r#struct; +mod type_code; +pub mod value; +pub use type_code::*; +pub mod type_descriptor; +pub use type_descriptor::*; diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs new file mode 100644 index 00000000..39342806 --- /dev/null +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -0,0 +1,151 @@ +#![allow(non_camel_case_types)] + +// use crate::lazy::binary::encoded_value::EncodedValue; +// use crate::lazy::raw_value_ref::RawValueRef; +// use crate::lazy::r#struct::LazyStruct; +use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator; +use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; +use crate::lazy::binary::raw::v1_1::r#struct::RawBinaryStructIterator_1_1 as RawBinaryStructIterator; +use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; +use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; +use crate::lazy::decoder::{ + LazyDecoder, LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, LazyRawValueExpr, +}; +use crate::lazy::encoder::private::Sealed; +use crate::lazy::encoding::BinaryEncoding_1_1; +use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; +use crate::result::IonFailure; +use crate::{IonResult, IonType, RawSymbolTokenRef}; + +use bumpalo::Bump as BumpAllocator; + +pub struct LazyRawBinaryReader_1_1<'data> { + data: ImmutableBuffer<'data>, +} + +impl<'data> LazyRawBinaryReader_1_1<'data> { + fn new(data: &'data [u8]) -> Self { + Self::new_with_offset(data, 0) + } + + fn new_with_offset(data: &'data [u8], offset: usize) -> Self { + let data = ImmutableBuffer::new_with_offset(data, offset); + Self { data } + } + + fn read_ivm<'top>( + &mut self, + buffer: ImmutableBuffer<'data>, + ) -> IonResult> + where + 'data: 'top, + { + let ((major, minor), _buffer_after_ivm) = buffer.read_ivm()?; + if (major, minor) != (1, 1) { + return IonResult::decoding_error(format!( + "unsupported version of Ion: v{}.{}; only 1.1 is supported by this reader", + major, minor, + )); + } + self.data = buffer; + self.data.bytes_to_skip = 4; + Ok(LazyRawStreamItem::::VersionMarker(1, 1)) + } + + fn read_value<'top>( + &mut self, + buffer: ImmutableBuffer<'data>, + ) -> IonResult> + where + 'data: 'top, + { + let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? { + Some(lazy_value) => lazy_value, + None => return Ok(LazyRawStreamItem::::EndOfStream), + }; + self.data = buffer; + self.data.bytes_to_skip = lazy_value.encoded_value.total_length(); + Ok(RawStreamItem::Value(lazy_value)) + } + + pub fn next<'top>(&'top mut self) -> IonResult> + where + 'data: 'top, + { + let mut buffer = self.data.advance_to_next_item()?; + if buffer.is_empty() { + return Ok(LazyRawStreamItem::::EndOfStream); + } + + let mut type_descriptor = buffer.peek_type_descriptor()?; + if type_descriptor.is_nop() { + println!("Nop, reading sled"); + (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; + if buffer.is_empty() { + println!("Reached end of stream"); + return Ok(LazyRawStreamItem::::EndOfStream); + } + } + if type_descriptor.is_ivm_start() { + return self.read_ivm(buffer); + } + println!("Reading value"); + self.read_value(buffer) + } +} + +impl<'data> Sealed for LazyRawBinaryReader_1_1<'data> {} + +impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1<'data> { + fn new(data: &'data [u8]) -> Self { + Self::new(data) + } + + fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top, + { + self.next() + } + + fn resume_at_offset(data: &'data [u8], offset: usize, _saved_state: ::ReaderSavedState) + -> Self + { + Self::new_with_offset(data, offset) + } + + fn position(&self) -> usize { + self.data.offset() + self.data.bytes_to_skip + } +} + +#[cfg(test)] +mod tests { + use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; + use crate::{IonResult, IonType}; + + #[test] + fn nop() -> IonResult<()> { + let data: Vec = vec![ + 0xe0, 0x01, 0x01, 0xea, // IVM + 0xEC, // 1-Byte NOP + 0xEC, 0xEC, // 2-Byte NOP + 0xEC, 0xEC, 0xEC, // 3-Byte Nop + 0xED, 0x05, 0x00, 0x00, // 4-Byte NOP + 0xea, // null.null + ]; + + let mut reader = LazyRawBinaryReader_1_1::new(&data); + let _ivm = reader.next()?.expect_ivm()?; + + assert_eq!( + reader.next()?.expect_value()?.read()?.expect_null()?, + IonType::Null + ); + + Ok(()) + } +} diff --git a/src/lazy/binary/raw/v1_1/sequence.rs b/src/lazy/binary/raw/v1_1/sequence.rs new file mode 100644 index 00000000..f224538f --- /dev/null +++ b/src/lazy/binary/raw/v1_1/sequence.rs @@ -0,0 +1,152 @@ +#![allow(non_camel_case_types)] + +use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator; +use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; +use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; +use crate::lazy::decoder::private::LazyContainerPrivate; +use crate::lazy::decoder::{LazyRawSequence, LazyRawValueExpr, RawValueExpr}; +use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_1}; +use crate::{IonResult, IonType}; +use std::fmt::{Debug, Formatter}; + +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinaryList_1_1<'top> { + pub(crate) sequence: LazyRawBinarySequence_1_1<'top>, +} + +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinarySExp_1_1<'top> { + pub(crate) sequence: LazyRawBinarySequence_1_1<'top>, +} + +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<'top> { + fn from_value(value: LazyRawBinaryValue_1_1<'top>) -> Self { + LazyRawBinaryList_1_1 { + sequence: LazyRawBinarySequence_1_1 { value }, + } + } +} + +impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<'top> { + type Iterator = RawBinarySequenceIterator<'top>; + + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + self.sequence.value.annotations() + } + + fn ion_type(&self) -> IonType { + IonType::List + } + + fn iter(&self) -> Self::Iterator { + self.sequence.iter() + } + + fn as_value(&self) -> LazyRawBinaryValue_1_1<'top> { + self.sequence.value + } +} + +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'top> { + fn from_value(value: LazyRawBinaryValue_1_1<'top>) -> Self { + LazyRawBinarySExp_1_1 { + sequence: LazyRawBinarySequence_1_1 { value }, + } + } +} + +impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'top> { + type Iterator = RawBinarySequenceIterator<'top>; + + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + self.sequence.value.annotations() + } + + fn ion_type(&self) -> IonType { + IonType::SExp + } + + fn iter(&self) -> Self::Iterator { + self.sequence.iter() + } + + fn as_value(&self) -> LazyRawBinaryValue_1_1<'top> { + self.sequence.value + } +} + +#[derive(Copy, Clone)] +pub struct LazyRawBinarySequence_1_1<'top> { + pub(crate) value: LazyRawBinaryValue_1_1<'top>, +} + +impl<'top> LazyRawBinarySequence_1_1<'top> { + pub fn ion_type(&self) -> IonType { + self.value.ion_type() + } + + pub fn iter(&self) -> RawBinarySequenceIterator<'top> { + // Get as much of the sequence's body as is available in the input buffer. + // Reading a child value may fail as `Incomplete` + let buffer_slice = self.value.available_body(); + RawBinarySequenceIterator::new(buffer_slice) + } +} + +impl<'a, 'top> IntoIterator for &'a LazyRawBinarySequence_1_1<'top> { + type Item = IonResult>; + type IntoIter = RawBinarySequenceIterator<'top>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> Debug for LazyRawBinarySequence_1_1<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.value.encoded_value.ion_type() { + IonType::SExp => { + write!(f, "(")?; + for value in self { + write!(f, "{:?} ", value?)?; + } + write!(f, ")").unwrap(); + } + IonType::List => { + write!(f, "[")?; + for value in self { + write!(f, "{:?},", value?)?; + } + write!(f, "]").unwrap(); + } + _ => unreachable!("LazyRawSequence is only created for list and sexp"), + } + + Ok(()) + } +} + +pub struct RawBinarySequenceIterator<'top> { + source: ImmutableBuffer<'top>, +} + +impl<'top> RawBinarySequenceIterator<'top> { + pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinarySequenceIterator<'top> { + RawBinarySequenceIterator { source: input } + } +} + +impl<'top> Iterator for RawBinarySequenceIterator<'top> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + match self + .source + .try_parse_next(ImmutableBuffer::<'top>::peek_sequence_value) + { + Ok(Some(value)) => Some(Ok(RawValueExpr::ValueLiteral(value))), + Ok(None) => None, + Err(e) => Some(Err(e)), + } + } +} diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs new file mode 100644 index 00000000..81cc12fc --- /dev/null +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -0,0 +1,152 @@ +#![allow(non_camel_case_types)] + +use std::fmt; +use std::fmt::{Debug, Formatter}; + +use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator; +use crate::lazy::binary::raw::v1_1::{ + immutable_buffer::ImmutableBuffer, value::LazyRawBinaryValue_1_1, +}; +use crate::lazy::decoder::private::{ + LazyContainerPrivate, LazyRawFieldPrivate, LazyRawValuePrivate, +}; +use crate::lazy::decoder::{ + LazyRawField, LazyRawFieldExpr, LazyRawStruct, LazyRawValue, RawFieldExpr, RawValueExpr, +}; +use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_1}; +use crate::{IonResult, RawSymbolTokenRef}; + +#[derive(Copy, Clone)] +pub struct LazyRawBinaryStruct_1_1<'top> { + pub(crate) value: LazyRawBinaryValue_1_1<'top>, +} + +impl<'a, 'top> IntoIterator for &'a LazyRawBinaryStruct_1_1<'top> { + type Item = IonResult>; + type IntoIter = RawBinaryStructIterator_1_1<'top>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'top> Debug for LazyRawBinaryStruct_1_1<'top> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{{")?; + for field in self { + let (name, lazy_value) = field?.expect_name_value()?; + let value = lazy_value.read()?; + write!(f, "{:?}:{:?},", name, value)?; + } + write!(f, "}}")?; + Ok(()) + } +} + +impl<'top> LazyRawBinaryStruct_1_1<'top> { + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + self.value.annotations() + } + + pub fn iter(&self) -> RawBinaryStructIterator_1_1<'top> { + // Get as much of the struct's body as is available in the input buffer. + // Reading a child value may fail as `Incomplete` + let buffer_slice = self.value.available_body(); + RawBinaryStructIterator_1_1::new(buffer_slice) + } +} + +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryStruct_1_1<'top> { + fn from_value(value: LazyRawBinaryValue_1_1<'top>) -> Self { + LazyRawBinaryStruct_1_1 { value } + } +} + +impl<'top> LazyRawStruct<'top, BinaryEncoding_1_1> for LazyRawBinaryStruct_1_1<'top> { + type Iterator = RawBinaryStructIterator_1_1<'top>; + + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + self.annotations() + } + + fn iter(&self) -> Self::Iterator { + self.iter() + } +} + +pub struct RawBinaryStructIterator_1_1<'top> { + source: ImmutableBuffer<'top>, +} + +impl<'top> RawBinaryStructIterator_1_1<'top> { + pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinaryStructIterator_1_1<'top> { + RawBinaryStructIterator_1_1 { source: input } + } +} + +impl<'top> Iterator for RawBinaryStructIterator_1_1<'top> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + match self.source.try_parse_next(ImmutableBuffer::peek_field) { + Ok(Some(lazy_raw_value)) => Some(Ok(RawFieldExpr::NameValuePair( + lazy_raw_value.field_name().unwrap(), + RawValueExpr::ValueLiteral(lazy_raw_value), + ))), + Ok(None) => None, + Err(e) => Some(Err(e)), + } + } +} + +#[derive(Copy, Clone)] +pub struct LazyRawBinaryField_1_1<'top> { + pub(crate) value: LazyRawBinaryValue_1_1<'top>, +} + +impl<'top> LazyRawBinaryField_1_1<'top> { + pub(crate) fn new(value: LazyRawBinaryValue_1_1<'top>) -> Self { + LazyRawBinaryField_1_1 { value } + } + + pub fn name(&self) -> RawSymbolTokenRef<'top> { + // We're in a struct field, the field ID must be populated. + let field_id = self.value.encoded_value.field_id.unwrap(); + RawSymbolTokenRef::SymbolId(field_id) + } + + pub fn value(&self) -> LazyRawBinaryValue_1_1<'top> { + self.value + } + + pub(crate) fn into_value(self) -> LazyRawBinaryValue_1_1<'top> { + self.value + } +} + +impl<'top> LazyRawFieldPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryField_1_1<'top> { + fn into_value(self) -> LazyRawBinaryValue_1_1<'top> { + self.value + } +} + +impl<'top> LazyRawField<'top, BinaryEncoding_1_1> for LazyRawBinaryField_1_1<'top> { + fn name(&self) -> RawSymbolTokenRef<'top> { + LazyRawBinaryField_1_1::name(self) + } + + fn value(&self) -> LazyRawBinaryValue_1_1<'top> { + self.value() + } +} + +impl<'top> Debug for LazyRawBinaryField_1_1<'top> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "${}: {:?}", + self.value.encoded_value.field_id.unwrap(), + self.value() + ) + } +} diff --git a/src/lazy/binary/raw/v1_1/type_code.rs b/src/lazy/binary/raw/v1_1/type_code.rs new file mode 100644 index 00000000..6c6ff65d --- /dev/null +++ b/src/lazy/binary/raw/v1_1/type_code.rs @@ -0,0 +1,147 @@ +use crate::IonResult; +use std::convert::TryFrom; + +use crate::result::{IonError, IonFailure}; +use crate::IonType; + +/// Represents the type information found in the header byte of each binary Ion value. +/// While this value can be readily mapped to a user-level [`IonType`], it is a distinct concept. +/// The IonTypeCode enum captures system-level information that is not exposed to end users of the +/// library, including: +/// * Whether the cursor is positioned over whitespace that needs to be skipped. +/// * Whether the integer value being read is positive or negative. +/// * Whether the next type code is reserved. +/// +/// See the +/// [Typed Value Formats](https://amazon-ion.github.io/ion-docs/docs/binary.html#typed-value-formats) +/// section of the spec for more information. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum IonTypeCode { + EExpression(bool), // 0x00-0x4F - With, or without, address in the opcode. + Integer, // 0x50-0x58 - Integer up to 8 bytes wide. + Float, // 0x5A-0x5D - + Boolean, // 0x5E-0x5F - + Decimal, // 0x60-0x6F - + Timestamp, // 0x70-0x7F - + String, // 0x80-0x80 - + InlineSymbol, // 0x90-0x9F - + List, // 0xA0-0xAF - + SExpression, // 0xB0-0xBF - + StructEmpty, // 0xC0 - + // reserved + StructSymAddress, // 0xD2-0xDF - + // reserved + StructFlexSym, // 0xD2-0xDF - + IonVersionMarker, // 0xE0 - + + SymbolAddress, // 0xE1-0xE3 - + AnnotationSymAddress, // 0xE4-0xE6 - + AnnotationFlexSym, // 0xE7-0xE9 - + NullNull, // 0xEA - + TypedNull, // 0xEB - + Nop, // 0xEC-0xED - + // Reserved + SystemMacroInvoke, // 0xEF - + // delimited container end + // delimited list start + // delimited s-expression start +} + +impl TryFrom for IonType { + type Error = IonError; + + /// Attempts to convert the system-level IonTypeCode into the corresponding user-level IonType. + fn try_from(ion_type_code: IonTypeCode) -> Result { + use IonTypeCode::*; + let ion_type = match ion_type_code { + NullNull => IonType::Null, + Nop => IonType::Null, + // NullOrNop => IonType::Null, + // Boolean => IonType::Bool, + // PositiveInteger | NegativeInteger => IonType::Int, + // Float => IonType::Float, + // Decimal => IonType::Decimal, + // Timestamp => IonType::Timestamp, + // Symbol => IonType::Symbol, + // String => IonType::String, + // Clob => IonType::Clob, + // Blob => IonType::Blob, + // List => IonType::List, + // SExpression => IonType::SExp, + // Struct => IonType::Struct, + _ => { + return IonResult::decoding_error(format!( + "Attempted to make an IonType from an invalid type code: {ion_type_code:?}" + )); + } + }; + Ok(ion_type) + } +} + +impl TryFrom for IonTypeCode { + type Error = IonError; + + /// Attempts to convert the provided byte into an IonTypeCode. Any value greater than 15 + /// will result in an Error. + fn try_from(type_code: u8) -> Result { + use IonTypeCode::*; + let ion_type_code = match (type_code.overflowing_shr(8).0, type_code & 0x0F) { + (0xE, 0xA) => NullNull, + (0xE, 0xC..=0xD) => Nop, + // 0 => NullOrNop, + // 1 => Boolean, + // 2 => PositiveInteger, + // 3 => NegativeInteger, + // 4 => Float, + // 5 => Decimal, + // 6 => Timestamp, + // 7 => Symbol, + // 8 => String, + // 9 => Clob, + // 10 => Blob, + // 11 => List, + // 12 => SExpression, + // 13 => Struct, + // 14 => AnnotationOrIvm, + // 15 => Reserved, + _ => { + return IonResult::decoding_error(format!( + "{type_code:?} is not implemented, or an invalid type code." + )); + } // _ => { + // return IonResult::decoding_error(format!( + // "{type_code:?} is not a valid header type code." + // )); + // } + }; + Ok(ion_type_code) + } +} + +impl IonTypeCode { + /// Constant function to convert an [`IonTypeCode`] into a `u8`. + pub const fn to_u8(self) -> u8 { + use IonTypeCode::*; + match self { + EExpression(true) => 0x0, + // NullOrNop => 0, + // Boolean => 1, + // PositiveInteger => 2, + // NegativeInteger => 3, + // Float => 4, + // Decimal => 5, + // Timestamp => 6, + // Symbol => 7, + // String => 8, + // Clob => 9, + // Blob => 10, + // List => 11, + // SExpression => 12, + // Struct => 13, + // AnnotationOrIvm => 14, + // Reserved => 15, + _ => todo!(), + } + } +} diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs new file mode 100644 index 00000000..9f98ae12 --- /dev/null +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -0,0 +1,164 @@ +use crate::binary::constants::v1_1::length_codes; +use crate::lazy::binary::encoded_value::EncodedHeader; +use crate::lazy::binary::raw::v1_1::IonTypeCode; +use crate::IonType; + +/// Contains all of the information that can be extracted from the one-octet type descriptor +/// found at the beginning of each value, annotations wrapper, IVM, or NOP in a binary Ion stream. +/// For more information, consult the +/// [Typed Value Formats](https://amazon-ion.github.io/ion-docs/docs/binary.html#typed-value-formats) +/// section of the binary Ion spec. +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct TypeDescriptor { + pub ion_type_code: IonTypeCode, + pub ion_type: Option, + pub length_code: u8, +} + +/// A statically defined array of TypeDescriptor that allows a binary reader to map a given +/// byte (`u8`) to a `TypeDescriptor` without having to perform any masking or bitshift operations. +pub(crate) static ION_1_1_TYPE_DESCRIPTORS: &[TypeDescriptor; 256] = &init_type_descriptor_cache(); + +const DEFAULT_HEADER: TypeDescriptor = TypeDescriptor { + ion_type_code: IonTypeCode::Nop, + ion_type: None, + length_code: 0, +}; + +pub(crate) const fn init_type_descriptor_cache() -> [TypeDescriptor; 256] { + let mut jump_table = [DEFAULT_HEADER; 256]; + let mut index: usize = 0; + while index < 256 { + let byte = index as u8; + jump_table[index] = TypeDescriptor::from_byte(byte); + index += 1; + } + jump_table +} + +impl TypeDescriptor { + /// Attempts to parse the provided byte. If the type code is unrecognized or the + /// type code + length code combination is illegal, an error will be returned. + pub const fn from_byte(byte: u8) -> TypeDescriptor { + let (high_nibble, low_nibble) = (byte >> 4, byte & 0x0F); + use IonTypeCode::*; + + let (ion_type_code, length_code) = match (high_nibble, low_nibble) { + (0xE, 0x0) => (IonVersionMarker, 3), + (0xE, 0xA) => (NullNull, 0), + (0xE, 0xC..=0xD) => (Nop, 0), + _ => (Nop, 0), + // 1 => Boolean, + // 2 => PositiveInteger, + // 3 => NegativeInteger, + // 4 => Float, + // 5 => Decimal, + // 6 => Timestamp, + // 7 => Symbol, + // 8 => String, + // 9 => Clob, + // 10 => Blob, + // 11 => List, + // 12 => SExpression, + // 13 => Struct, + // 14 => AnnotationOrIvm, + // 15 => Reserved, + // _ => panic!("type code was larger than a nibble"), + }; + let ion_type = match ion_type_code { + NullNull => Some(IonType::Null), + _ => Some(IonType::Null), + // NullOrNop if length_code == length_codes::NULL => Some(IonType::Null), + // NullOrNop => None, + // Boolean => Some(IonType::Bool), + // PositiveInteger => Some(IonType::Int), + // NegativeInteger => Some(IonType::Int), + // Float => Some(IonType::Float), + // Decimal => Some(IonType::Decimal), + // Timestamp => Some(IonType::Timestamp), + // Symbol => Some(IonType::Symbol), + // String => Some(IonType::String), + // Clob => Some(IonType::Clob), + // Blob => Some(IonType::Blob), + // List => Some(IonType::List), + // SExpression => Some(IonType::SExp), + // Struct => Some(IonType::Struct), + // AnnotationOrIvm => None, + // Reserved => None, + }; + TypeDescriptor { + ion_type, + ion_type_code, + length_code, + } + } + + pub fn is_null(&self) -> bool { + self.ion_type.is_some() && self.length_code == length_codes::NULL + } + + pub fn is_nop(&self) -> bool { + self.ion_type_code == IonTypeCode::Nop + } + + pub fn is_ivm_start(&self) -> bool { + self.ion_type_code == IonTypeCode::IonVersionMarker + // self.ion_type_code == IonTypeCode::AnnotationOrIvm && self.length_code == 0 + } + + pub fn is_annotation_wrapper(&self) -> bool { + false + // self.ion_type_code == IonTypeCode::AnnotationOrIvm && self.length_code > 0 + } + + #[inline] + pub fn to_header(self) -> Option
{ + let ion_type = self.ion_type?; + let header = Header { + ion_type, + ion_type_code: self.ion_type_code, + length_code: self.length_code, + }; + Some(header) + } +} + +/// Represents a `TypeDescriptor` that appears before an Ion value (and not a NOP, IVM, +/// or annotations wrapper). +/// +/// Notably, it stores an `IonType` instead of an `Option`, allowing functions that expect +/// a value header to avoid matching/unwrapping. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Header { + pub ion_type: IonType, + // The only time the `ion_type_code` is required is to distinguish between positive + // and negative integers. + pub ion_type_code: IonTypeCode, + pub length_code: u8, +} + +impl EncodedHeader for Header { + type TypeCode = IonTypeCode; + + fn ion_type(&self) -> IonType { + self.ion_type + } + + fn type_code(&self) -> Self::TypeCode { + self.ion_type_code + } + + fn length_code(&self) -> u8 { + self.length_code + } + + fn is_null(&self) -> bool { + todo!() + } +} + +impl Header { + pub fn is_null(&self) -> bool { + self.ion_type_code == IonTypeCode::NullNull || self.ion_type_code == IonTypeCode::TypedNull + } +} diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs new file mode 100644 index 00000000..ff7de6db --- /dev/null +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -0,0 +1,252 @@ +#![allow(non_camel_case_types)] + +use std::ops::Range; + +use crate::{ + lazy::{ + binary::{ + encoded_value::EncodedValue, + raw::{ + v1_1::{ + annotations_iterator::RawBinaryAnnotationsIterator, + immutable_buffer::ImmutableBuffer, Header, + }, + value::ValueParseResult, + }, + }, + decoder::{private::LazyRawValuePrivate, LazyDecoder, LazyRawValue}, + encoding::BinaryEncoding_1_1, + raw_value_ref::RawValueRef, + }, + result::IonFailure, + types::SymbolId, + IonResult, IonType, RawSymbolTokenRef, +}; + +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinaryValue_1_1<'top> { + pub(crate) encoded_value: EncodedValue
, + pub(crate) input: ImmutableBuffer<'top>, +} + +impl<'top> LazyRawValuePrivate<'top> for LazyRawBinaryValue_1_1<'top> { + fn field_name(&self) -> IonResult> { + if let Some(field_id) = self.encoded_value.field_id { + Ok(RawSymbolTokenRef::SymbolId(field_id)) + } else { + IonResult::illegal_operation( + "requested field name, but value was not in a struct field", + ) + } + } +} + +impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for LazyRawBinaryValue_1_1<'top> { + fn ion_type(&self) -> IonType { + self.ion_type() + } + + fn is_null(&self) -> bool { + self.is_null() + } + + fn annotations(&self) -> ::AnnotationsIterator<'top> { + self.annotations() + } + + fn read(&self) -> IonResult> { + self.read() + } + + fn range(&self) -> Range { + self.encoded_value.annotated_value_range() + } + + fn span(&self) -> &[u8] { + let range = self.range(); + let local_range = (range.start - self.input.offset())..(range.end - self.input.offset()); + &self.input.bytes()[local_range] + } + +} + +impl<'top> LazyRawBinaryValue_1_1<'top> { + /// Indicates the Ion data type of this value. Calling this method does not require additional + /// parsing of the input stream. + pub fn ion_type(&self) -> IonType { + self.encoded_value.ion_type() + } + + pub fn is_null(&self) -> bool { + self.encoded_value.header().is_null() + } + + /// Returns `true` if this value has a non-empty annotations sequence; otherwise, returns `false`. + fn has_annotations(&self) -> bool { + self.encoded_value.has_annotations() + } + + /// Returns an `ImmutableBuffer` that contains the bytes comprising this value's encoded + /// annotations sequence. + fn annotations_sequence(&self) -> ImmutableBuffer<'top> { + let offset_and_length = self + .encoded_value + .annotations_sequence_offset() + .map(|offset| { + ( + offset, + self.encoded_value.annotations_sequence_length().unwrap(), + ) + }); + let (sequence_offset, sequence_length) = match offset_and_length { + None => { + return self + .input + // A value's binary layout is: + // + // field_id? | annotation_sequence? | type_descriptor | length? | body + // + // If this value has no annotation sequence, then the first byte after the + // field ID is the type descriptor. + // + // If there is no field ID, field_id_length will be zero. + .slice(self.encoded_value.field_id_length as usize, 0); + } + Some(offset_and_length) => offset_and_length, + }; + let local_sequence_offset = sequence_offset - self.input.offset(); + + self.input.slice(local_sequence_offset, sequence_length) + } + + /// Returns an iterator over this value's unresolved annotation symbols. + pub fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + RawBinaryAnnotationsIterator::new(self.annotations_sequence()) + } + + /// Reads this value's data, returning it as a [`RawValueRef`]. If this value is a container, + /// calling this method will not read additional data; the `RawValueRef` will provide a + /// [`LazyRawBinarySequence`] or [`LazyStruct`](crate::lazy::struct::LazyStruct) + /// that can be traversed to access the container's contents. + pub fn read(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + if self.is_null() { + let raw_value_ref = RawValueRef::Null(self.ion_type()); + return Ok(raw_value_ref); + } + + match self.ion_type() { + IonType::Null => unreachable!("all null types handled above"), + IonType::Bool => self.read_bool(), + IonType::Int => self.read_int(), + IonType::Float => self.read_float(), + IonType::Decimal => self.read_decimal(), + IonType::Timestamp => self.read_timestamp(), + IonType::Symbol => self.read_symbol(), + IonType::String => self.read_string(), + IonType::Clob => self.read_clob(), + IonType::Blob => self.read_blob(), + IonType::List => self.read_list(), + IonType::SExp => self.read_sexp(), + IonType::Struct => self.read_struct(), + } + } + + /// Returns the encoded byte slice representing this value's data. + fn value_body(&self) -> IonResult<&'top [u8]> { + let value_total_length = self.encoded_value.total_length(); + if self.input.len() < value_total_length { + eprintln!("[value_body] Incomplete {:?}", self); + return IonResult::incomplete( + "only part of the requested value is available in the buffer", + self.input.offset(), + ); + } + let value_body_length = self.encoded_value.value_length(); + let value_offset = value_total_length - value_body_length; + Ok(self.input.bytes_range(value_offset, value_body_length)) + } + + /// Returns an [`ImmutableBuffer`] containing whatever bytes of this value's body are currently + /// available. This method is used to construct lazy containers, which are not required to be + /// fully buffered before reading begins. + pub(crate) fn available_body(&self) -> ImmutableBuffer<'top> { + let value_total_length = self.encoded_value.total_length(); + let value_body_length = self.encoded_value.value_length(); + let value_offset = value_total_length - value_body_length; + + let bytes_needed = std::cmp::min(self.input.len() - value_offset, value_body_length); + let buffer_slice = self.input.slice(value_offset, bytes_needed); + buffer_slice + } + + /// If this value is within a struct, returns its associated field name as a `Some(SymbolID)`. + /// Otherwise, returns `None`. + pub(crate) fn field_id(&self) -> Option { + self.encoded_value.field_id + } + + /// Helper method called by [`Self::read`]. Reads the current value as a bool. + fn read_bool(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as an int. + fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a float. + fn read_float(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a decimal. + fn read_decimal(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a timestamp. + fn read_timestamp(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read_symbol`]. Reads the current value as a symbol ID. + fn read_symbol_id(&self) -> IonResult { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a symbol. + fn read_symbol(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a string. + fn read_string(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a blob. + fn read_blob(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a clob. + fn read_clob(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as an S-expression. + fn read_sexp(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a list. + fn read_list(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } + + /// Helper method called by [`Self::read`]. Reads the current value as a struct. + fn read_struct(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { + unimplemented!(); + } +} diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 0757c4c1..fdbe6f3f 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -1,11 +1,14 @@ +#![allow(non_camel_case_types)] + use crate::binary::int::DecodedInt; +use crate::binary::non_blocking::type_descriptor::Header; use crate::binary::uint::DecodedUInt; use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; -use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct; +use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct_1_0; use crate::lazy::binary::raw::sequence::{ - LazyRawBinaryList, LazyRawBinarySExp, LazyRawBinarySequence, + LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, LazyRawBinarySequence_1_0, }; use crate::lazy::decoder::private::LazyRawValuePrivate; use crate::lazy::decoder::LazyRawValue; @@ -22,7 +25,7 @@ use std::{fmt, mem}; /// A value that has been identified in the input stream but whose data has not yet been read. /// -/// If only part of the value is in the input buffer, calls to [`LazyRawBinaryValue::read`] (which examines +/// If only part of the value is in the input buffer, calls to [`LazyRawBinaryValue_1_0::read`] (which examines /// bytes beyond the value's header) may return [crate::IonError::Incomplete]. /// /// `LazyRawValue`s are "unresolved," which is to say that symbol values, annotations, and @@ -30,24 +33,24 @@ use std::{fmt, mem}; /// includes a text definition for these items whenever one exists, see /// [`crate::lazy::value::LazyValue`]. #[derive(Clone, Copy)] -pub struct LazyRawBinaryValue<'top> { - pub(crate) encoded_value: EncodedValue, +pub struct LazyRawBinaryValue_1_0<'top> { + pub(crate) encoded_value: EncodedValue
, pub(crate) input: ImmutableBuffer<'top>, } -impl<'top> Debug for LazyRawBinaryValue<'top> { +impl<'top> Debug for LazyRawBinaryValue_1_0<'top> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, - "LazyRawBinaryValue {{\n val={:?},\n buf={:?}\n}}\n", + "LazyRawBinaryValue_1_0 {{\n val={:?},\n buf={:?}\n}}\n", self.encoded_value, self.input ) } } -type ValueParseResult<'top, F> = IonResult>; +pub type ValueParseResult<'top, F> = IonResult>; -impl<'top> LazyRawValuePrivate<'top> for LazyRawBinaryValue<'top> { +impl<'top> LazyRawValuePrivate<'top> for LazyRawBinaryValue_1_0<'top> { fn field_name(&self) -> IonResult> { if let Some(field_id) = self.encoded_value.field_id { Ok(RawSymbolTokenRef::SymbolId(field_id)) @@ -59,7 +62,7 @@ impl<'top> LazyRawValuePrivate<'top> for LazyRawBinaryValue<'top> { } } -impl<'top> LazyRawValue<'top, BinaryEncoding_1_0> for LazyRawBinaryValue<'top> { +impl<'top> LazyRawValue<'top, BinaryEncoding_1_0> for LazyRawBinaryValue_1_0<'top> { fn ion_type(&self) -> IonType { self.ion_type() } @@ -88,7 +91,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_0> for LazyRawBinaryValue<'top> { } } -impl<'top> LazyRawBinaryValue<'top> { +impl<'top> LazyRawBinaryValue_1_0<'top> { /// Indicates the Ion data type of this value. Calling this method does not require additional /// parsing of the input stream. pub fn ion_type(&self) -> IonType { @@ -431,12 +434,12 @@ impl<'top> LazyRawBinaryValue<'top> { /// Helper method called by [`Self::read`]. Reads the current value as an S-expression. fn read_sexp(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::SExp); - let lazy_value = LazyRawBinaryValue { + let lazy_value = LazyRawBinaryValue_1_0 { encoded_value: self.encoded_value, input: self.input, }; - let lazy_sequence = LazyRawBinarySequence { value: lazy_value }; - let lazy_sexp = LazyRawBinarySExp { + let lazy_sequence = LazyRawBinarySequence_1_0 { value: lazy_value }; + let lazy_sexp = LazyRawBinarySExp_1_0 { sequence: lazy_sequence, }; Ok(RawValueRef::SExp(lazy_sexp)) @@ -445,12 +448,12 @@ impl<'top> LazyRawBinaryValue<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a list. fn read_list(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::List); - let lazy_value = LazyRawBinaryValue { + let lazy_value = LazyRawBinaryValue_1_0 { encoded_value: self.encoded_value, input: self.input, }; - let lazy_sequence = LazyRawBinarySequence { value: lazy_value }; - let lazy_list = LazyRawBinaryList { + let lazy_sequence = LazyRawBinarySequence_1_0 { value: lazy_value }; + let lazy_list = LazyRawBinaryList_1_0 { sequence: lazy_sequence, }; Ok(RawValueRef::List(lazy_list)) @@ -459,18 +462,18 @@ impl<'top> LazyRawBinaryValue<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a struct. fn read_struct(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Struct); - let lazy_value = LazyRawBinaryValue { + let lazy_value = LazyRawBinaryValue_1_0 { encoded_value: self.encoded_value, input: self.input, }; - let lazy_struct = LazyRawBinaryStruct { value: lazy_value }; + let lazy_struct = LazyRawBinaryStruct_1_0 { value: lazy_value }; Ok(RawValueRef::Struct(lazy_struct)) } } #[cfg(test)] mod tests { - use crate::lazy::binary::raw::reader::LazyRawBinaryReader; + use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::test_utilities::to_binary_ion; use crate::IonResult; @@ -482,7 +485,7 @@ mod tests { foo // binary writer will omit the symtab if we don't use a symbol "#, )?; - let mut reader = LazyRawBinaryReader::new(data); + let mut reader = LazyRawBinaryReader_1_0::new(data); let _ivm = reader.next()?.expect_ivm()?; let value = reader.next()?.expect_value()?; let annotations_sequence = value.annotations_sequence(); diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index 2431b4a6..0b7ddcea 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -2,10 +2,17 @@ use crate::lazy::any_encoding::LazyRawAnyValue; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; -use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct; -use crate::lazy::binary::raw::reader::LazyRawBinaryReader; -use crate::lazy::binary::raw::sequence::{LazyRawBinaryList, LazyRawBinarySExp}; -use crate::lazy::binary::raw::value::LazyRawBinaryValue; +use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct_1_0; +use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; +use crate::lazy::binary::raw::sequence::{LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0}; +use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; +use crate::lazy::binary::raw::v1_1::{ + RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1, + r#struct::LazyRawBinaryStruct_1_1, + sequence::{LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1}, + value::LazyRawBinaryValue_1_1, +}; +use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::LazyDecoder; use crate::lazy::never::Never; use crate::lazy::text::raw::r#struct::LazyRawTextStruct_1_0; @@ -37,8 +44,8 @@ pub struct BinaryEncoding_1_0; #[derive(Copy, Clone, Debug)] pub struct BinaryEncoding_1_1; -impl BinaryEncoding for BinaryEncoding_1_0 {} -impl BinaryEncoding for BinaryEncoding_1_1 {} +impl<'top> BinaryEncoding<'top> for BinaryEncoding_1_0 {} +impl<'top> BinaryEncoding<'top> for BinaryEncoding_1_1 {} /// The Ion 1.0 text encoding. #[derive(Copy, Clone, Debug)] @@ -70,7 +77,7 @@ impl Encoding for TextEncoding_1_1 { } /// Marker trait for binary encodings of any version. -pub trait BinaryEncoding: Encoding {} +pub trait BinaryEncoding<'top>: Encoding + LazyDecoder {} /// Marker trait for text encodings. pub trait TextEncoding<'top>: @@ -100,12 +107,12 @@ pub trait EncodingWithMacroSupport {} impl EncodingWithMacroSupport for TextEncoding_1_1 {} impl LazyDecoder for BinaryEncoding_1_0 { - type Reader<'data> = LazyRawBinaryReader<'data>; + type Reader<'data> = LazyRawBinaryReader_1_0<'data>; type ReaderSavedState = (); - type Value<'top> = LazyRawBinaryValue<'top>; - type SExp<'top> = LazyRawBinarySExp<'top>; - type List<'top> = LazyRawBinaryList<'top>; - type Struct<'top> = LazyRawBinaryStruct<'top>; + type Value<'top> = LazyRawBinaryValue_1_0<'top>; + type SExp<'top> = LazyRawBinarySExp_1_0<'top>; + type List<'top> = LazyRawBinaryList_1_0<'top>; + type Struct<'top> = LazyRawBinaryStruct_1_0<'top>; type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator<'top>; // Macros are not supported in Ion 1.0 type EExpression<'top> = Never; @@ -134,6 +141,18 @@ impl LazyDecoder for TextEncoding_1_1 { type EExpression<'top> = RawTextEExpression_1_1<'top>; } +impl LazyDecoder for BinaryEncoding_1_1 { + type Reader<'data> = LazyRawBinaryReader_1_1<'data>; + type ReaderSavedState = (); + type Value<'top> = LazyRawBinaryValue_1_1<'top>; + type SExp<'top> = LazyRawBinarySExp_1_1<'top>; + type List<'top> = LazyRawBinaryList_1_1<'top>; + type Struct<'top> = LazyRawBinaryStruct_1_1<'top>; + type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator_1_1<'top>; + // Macros are not supported in Ion 1.0 + type EExpression<'top> = Never; +} + /// Marker trait for types that represent value literals in an Ion stream of some encoding. // This trait is used to provide generic conversion implementation of types used as a // `LazyDecoder::Value` to `ExpandedValueSource`. That is: @@ -148,5 +167,6 @@ pub trait RawValueLiteral {} impl<'top, E: TextEncoding<'top>> RawValueLiteral for MatchedRawTextValue<'top, E> {} impl<'top, E: TextEncoding<'top>> RawValueLiteral for LazyRawTextValue<'top, E> {} -impl<'top> RawValueLiteral for LazyRawBinaryValue<'top> {} +impl<'top> RawValueLiteral for LazyRawBinaryValue_1_0<'top> {} +impl<'top> RawValueLiteral for LazyRawBinaryValue_1_1<'top> {} impl<'top> RawValueLiteral for LazyRawAnyValue<'top> {} diff --git a/src/lazy/never.rs b/src/lazy/never.rs index bc8c1273..b8ce48f5 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -8,7 +8,7 @@ use crate::lazy::encoder::value_writer::{ use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; -use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; +use crate::raw_symbol_token_ref::{RawSymbolTokenRef, AsRawSymbolTokenRef}; use crate::IonResult; /// An uninhabited type that signals to the compiler that related code paths are not reachable. @@ -38,6 +38,14 @@ impl<'top, D: LazyDecoder> From for MacroExpr<'top, D> { } } +impl Iterator for Never { + type Item = IonResult>; + + fn next(&mut self) -> Option { + unreachable!("Never implementation cannot iterate") + } +} + impl AnnotatableValueWriter for Never { type ValueWriter = Never; type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a> = Never where Self: 'a; diff --git a/src/lazy/raw_value_ref.rs b/src/lazy/raw_value_ref.rs index 7c169960..c73c7466 100644 --- a/src/lazy/raw_value_ref.rs +++ b/src/lazy/raw_value_ref.rs @@ -184,7 +184,7 @@ impl<'top, D: LazyDecoder> RawValueRef<'top, D> { #[cfg(test)] mod tests { - use crate::lazy::binary::raw::reader::LazyRawBinaryReader; + use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0 as LazyRawBinaryReader; use crate::lazy::binary::test_utilities::to_binary_ion; use crate::{Decimal, IonResult, IonType, RawSymbolTokenRef, Timestamp}; diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs index 2bc4da91..598d3231 100644 --- a/src/lazy/struct.rs +++ b/src/lazy/struct.rs @@ -1,9 +1,11 @@ +#![allow(non_camel_case_types)] + use std::fmt; use std::fmt::{Debug, Formatter}; use crate::element::builders::StructBuilder; use crate::lazy::decoder::LazyDecoder; -use crate::lazy::encoding::BinaryEncoding_1_0; +use crate::lazy::encoding::{BinaryEncoding_1_0, BinaryEncoding_1_1}; use crate::lazy::expanded::r#struct::{ ExpandedStructIterator, LazyExpandedField, LazyExpandedStruct, }; @@ -49,7 +51,7 @@ pub struct LazyStruct<'top, D: LazyDecoder> { pub(crate) expanded_struct: LazyExpandedStruct<'top, D>, } -pub type LazyBinaryStruct<'top> = LazyStruct<'top, BinaryEncoding_1_0>; +pub type LazyBinaryStruct_1_0<'top> = LazyStruct<'top, BinaryEncoding_1_0>; // Best-effort debug formatting for LazyStruct. Any failures that occur during reading will result // in the output being silently truncated. From 8892925cf8ee0f498ab3c7fca4c3420b848654ed Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Fri, 5 Apr 2024 14:34:48 -0700 Subject: [PATCH 02/17] Remove debug prints, and apply cargo fmt --- src/lazy/any_encoding.rs | 40 ++++++++++++-------- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 10 ----- src/lazy/binary/raw/v1_1/reader.rs | 11 +++--- src/lazy/binary/raw/v1_1/value.rs | 1 - src/lazy/encoding.rs | 2 +- src/lazy/never.rs | 2 +- 6 files changed, 32 insertions(+), 34 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index a6e89f73..12b30fdb 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -7,11 +7,24 @@ use bumpalo::Bump as BumpAllocator; use crate::lazy::any_encoding::RawReaderKind::{Binary_1_0, Text_1_0}; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_0; -use crate::lazy::binary::raw::r#struct::{LazyRawBinaryStruct_1_0, RawBinaryStructIterator as RawBinaryStructIterator_1_0}; +use crate::lazy::binary::raw::r#struct::{ + LazyRawBinaryStruct_1_0, RawBinaryStructIterator as RawBinaryStructIterator_1_0, +}; use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{ - LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator as RawBinarySequenceIterator_1_0, + LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, + RawBinarySequenceIterator as RawBinarySequenceIterator_1_0, +}; +use crate::lazy::binary::raw::v1_1::r#struct::{ + LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1, }; +use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; +use crate::lazy::binary::raw::v1_1::sequence::{ + LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1, + RawBinarySequenceIterator as RawBinarySequenceIterator_1_1, +}; +use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; +use crate::lazy::binary::raw::v1_1::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; use crate::lazy::decoder::{ @@ -30,16 +43,9 @@ use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::lazy::text::raw::sequence::{ LazyRawTextList_1_0, LazyRawTextSExp_1_0, RawTextListIterator_1_0, RawTextSExpIterator_1_0, }; -use crate::lazy::binary::raw::v1_1::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1; -use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::binary::raw::v1_1::r#struct::{LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1}; -use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; -use crate::lazy::binary::raw::v1_1::sequence::{ - LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1, RawBinarySequenceIterator as RawBinarySequenceIterator_1_1 -}; use crate::lazy::text::raw::v1_1::reader::{ - LazyRawTextList_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, MacroIdRef, RawTextEExpression_1_1, - RawTextSequenceCacheIterator_1_1, RawTextStructCacheIterator_1_1, + LazyRawTextList_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, MacroIdRef, + RawTextEExpression_1_1, RawTextSequenceCacheIterator_1_1, RawTextStructCacheIterator_1_1, }; use crate::lazy::text::value::{ LazyRawTextValue_1_0, LazyRawTextValue_1_1, RawTextAnnotationsIterator, @@ -95,7 +101,9 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { LazyRawAnyEExpressionKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), LazyRawAnyEExpressionKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), LazyRawAnyEExpressionKind::Text_1_1(ref m) => m.id(), - LazyRawAnyEExpressionKind::Binary_1_1(_) => unimplemented!("macro in binary Ion 1.1 not implemented"), + LazyRawAnyEExpressionKind::Binary_1_1(_) => { + unimplemented!("macro in binary Ion 1.1 not implemented") + } } } @@ -106,7 +114,9 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { LazyRawAnyEExpressionKind::Text_1_1(m) => LazyRawAnyMacroArgsIterator { encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), }, - LazyRawAnyEExpressionKind::Binary_1_1(_) => unimplemented!("macro in binary Ion 1.0 not implemented"), + LazyRawAnyEExpressionKind::Binary_1_1(_) => { + unimplemented!("macro in binary Ion 1.0 not implemented") + } } } } @@ -713,7 +723,7 @@ impl<'data> Iterator for RawAnyListIterator<'data> { RawAnyListIteratorKind::Text_1_1(i) => i .next() .map(|value_result| value_result.map(|value| value.into())), - RawAnyListIteratorKind::Binary_1_1(i) => i + RawAnyListIteratorKind::Binary_1_1(i) => i .next() .map(|value_result| value_result.map(|value| value.into())), } @@ -824,7 +834,7 @@ impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnySExp<'data> { }, LazyRawValueKind::Binary_1_1(v) => LazyRawAnySExp { encoding: LazyRawSExpKind::Binary_1_1(LazyRawBinarySExp_1_1::from_value(v)), - } + }, } } } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 5bafed85..0cf6c966 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -479,15 +479,6 @@ impl<'a> ImmutableBuffer<'a> { return IonResult::decoding_error("found an annotations wrapper with no annotations"); } - println!( - "Annotations and value lengths: {}", - annotations_and_value_length - ); - println!("Annotations length: {}", annotations_length.size_in_bytes()); - println!( - "Annotations length .. Value: {}", - annotations_length.value() - ); // Validate that the annotated value is not missing. let expected_value_length = annotations_and_value_length - annotations_length.size_in_bytes() @@ -534,7 +525,6 @@ impl<'a> ImmutableBuffer<'a> { // allow the hot path to be better optimized. pub fn read_nop_pad(self) -> ParseResult<'a, usize> { let type_descriptor = self.peek_type_descriptor()?; - println!("type descriptor: {:?}", type_descriptor); // Advance beyond the type descriptor let remaining = self.consume(1); // If the type descriptor says we should skip more bytes, skip them. diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 39342806..554c3ff1 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -79,17 +79,14 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { let mut type_descriptor = buffer.peek_type_descriptor()?; if type_descriptor.is_nop() { - println!("Nop, reading sled"); (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { - println!("Reached end of stream"); return Ok(LazyRawStreamItem::::EndOfStream); } } if type_descriptor.is_ivm_start() { return self.read_ivm(buffer); } - println!("Reading value"); self.read_value(buffer) } } @@ -111,9 +108,11 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 self.next() } - fn resume_at_offset(data: &'data [u8], offset: usize, _saved_state: ::ReaderSavedState) - -> Self - { + fn resume_at_offset( + data: &'data [u8], + offset: usize, + _saved_state: ::ReaderSavedState, + ) -> Self { Self::new_with_offset(data, offset) } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index ff7de6db..774a2e8b 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -67,7 +67,6 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for LazyRawBinaryValue_1_1<'to let local_range = (range.start - self.input.offset())..(range.end - self.input.offset()); &self.input.bytes()[local_range] } - } impl<'top> LazyRawBinaryValue_1_1<'top> { diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index 0b7ddcea..f6c8fd14 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -7,10 +7,10 @@ use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0}; use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; use crate::lazy::binary::raw::v1_1::{ - RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1, r#struct::LazyRawBinaryStruct_1_1, sequence::{LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1}, value::LazyRawBinaryValue_1_1, + RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1, }; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::LazyDecoder; diff --git a/src/lazy/never.rs b/src/lazy/never.rs index b8ce48f5..52a1b583 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -8,7 +8,7 @@ use crate::lazy::encoder::value_writer::{ use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; -use crate::raw_symbol_token_ref::{RawSymbolTokenRef, AsRawSymbolTokenRef}; +use crate::raw_symbol_token_ref::{AsRawSymbolTokenRef, RawSymbolTokenRef}; use crate::IonResult; /// An uninhabited type that signals to the compiler that related code paths are not reachable. From 83aeb0a28711275c34ccef000d2af6c611dd8b70 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Sun, 7 Apr 2024 22:56:04 -0700 Subject: [PATCH 03/17] Fix value lengths --- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 97 +++++++------------- src/lazy/binary/raw/v1_1/reader.rs | 13 +-- src/lazy/binary/raw/v1_1/type_descriptor.rs | 72 ++++++--------- src/lazy/encoder/binary/v1_1/flex_uint.rs | 2 +- 4 files changed, 62 insertions(+), 122 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 0cf6c966..f81c9475 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -6,9 +6,12 @@ use crate::binary::int::DecodedInt; use crate::binary::uint::DecodedUInt; use crate::binary::var_int::VarInt; use crate::binary::var_uint::VarUInt; -use crate::lazy::binary::encoded_value::EncodedValue; +use crate::lazy::binary::encoded_value::{EncodedHeader, EncodedValue}; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::binary::raw::v1_1::{Header, TypeDescriptor, ION_1_1_TYPE_DESCRIPTORS}; +use crate::lazy::binary::raw::v1_1::{ + Header, LengthType, TypeDescriptor, ION_1_1_TYPE_DESCRIPTORS, +}; +use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::result::IonFailure; @@ -525,16 +528,24 @@ impl<'a> ImmutableBuffer<'a> { // allow the hot path to be better optimized. pub fn read_nop_pad(self) -> ParseResult<'a, usize> { let type_descriptor = self.peek_type_descriptor()?; - // Advance beyond the type descriptor - let remaining = self.consume(1); - // If the type descriptor says we should skip more bytes, skip them. - let (length, remaining) = remaining.read_length(type_descriptor.length_code)?; - if remaining.len() < length.value() { - return IonResult::incomplete("a NOP", remaining.offset()); - } - let remaining = remaining.consume(length.value()); - let total_nop_pad_size = 1 + length.size_in_bytes() + length.value(); - Ok((total_nop_pad_size, remaining)) + + // We need to determine the size of the nop.. + let (size, remaining) = if type_descriptor.length_code == 0xC { + (1, self.consume(1)) + } else if type_descriptor.length_code == 0xD { + // We have a flexuint telling us how long our nop is. + let after_header = self.consume(1); + let (len, rest) = after_header.read_flex_uint()?; + ( + len.value() as usize + len.size_in_bytes(), + rest.consume(len.value() as usize), + ) + } else { + return IonResult::decoding_error("Invalid NOP sub-type"); + }; + + let total_nop_pad_size = 1 + size; + Ok((total_nop_pad_size as usize, remaining)) } /// Calls [`Self::read_nop_pad`] in a loop until the buffer is empty or a type descriptor @@ -561,62 +572,20 @@ impl<'a> ImmutableBuffer<'a> { /// from the buffer to interpret as the value's length. If it is successful, returns an `Ok(_)` /// containing a [VarUInt] representation of the value's length. If no additional bytes were /// read, the returned `VarUInt`'s `size_in_bytes()` method will return `0`. - pub fn read_value_length(self, header: Header) -> ParseResult<'a, VarUInt> { - use IonType::*; - // Some type-specific `length` field overrides - let length_code = match header.ion_type { - // Null (0x0F) and Boolean (0x10, 0x11) are the only types that don't have/use a `length` - // field; the header contains the complete value. - Null | Bool => 0, - // If a struct has length = 1, its fields are ordered and the actual length follows. - // For the time being, this reader does not have any special handling for this case. - // Use `0xE` (14) as the length code instead so the call to `read_length` below - // consumes a VarUInt. - Struct if header.length_code == 1 => length_codes::VAR_UINT, - // For any other type, use the header's declared length code. - _ => header.length_code, - }; - - // Read the length, potentially consuming a VarUInt in the process. - let (length, remaining) = self.read_length(length_code)?; - - // After we get the length, perform some type-specific validation. - match header.ion_type { - Float => match header.length_code { - 0 | 4 | 8 | 15 => {} - _ => return IonResult::decoding_error("found a float with an illegal length code"), - }, - Timestamp if !header.is_null() && length.value() <= 1 => { - return IonResult::decoding_error("found a timestamp with length <= 1") + pub fn read_value_length(self, header: Header) -> ParseResult<'a, FlexUInt> { + let length = match header.length_type() { + LengthType::InHeader(n) => FlexUInt::new(1, n as u64), + LengthType::FlexUIntFollows => { + let (flexuint, _) = self.read_flex_uint()?; + flexuint } - Struct if header.length_code == 1 && length.value() == 0 => { - return IonResult::decoding_error("found an empty ordered struct") - } - _ => {} }; - Ok((length, remaining)) - } + let remaining = self; - /// Interprets a type descriptor's `L` nibble (length) in the way used by most Ion types. - /// - /// If `L` is... - /// * `f`: the value is a typed `null` and its length is `0`. - /// * `e`: the length is encoded as a `VarUInt` that follows the type descriptor. - /// * anything else: the `L` represents the actual length. - /// - /// If successful, returns an `Ok(_)` that contains the [VarUInt] representation - /// of the value's length. - pub fn read_length(self, length_code: u8) -> ParseResult<'a, VarUInt> { - let length = match length_code { - length_codes::NULL => VarUInt::new(0, 0), - length_codes::VAR_UINT => return self.read_var_uint(), - magnitude => VarUInt::new(magnitude as usize, 0), - }; + // TODO: Validate length to ensure it is a reasonable value. - // If we reach this point, the length was in the header byte and no additional bytes were - // consumed - Ok((length, self)) + Ok((length, remaining)) } /// Reads a field ID and a value from the buffer. @@ -744,7 +713,7 @@ impl<'a> ImmutableBuffer<'a> { let header_offset = input.offset(); let (length, _) = input.consume(1).read_value_length(header)?; let length_length = length.size_in_bytes() as u8; - let value_length = length.value(); // ha + let value_length = length.value() as usize; // ha let total_length = 1 // Header byte + length_length as usize + value_length; diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 554c3ff1..98eac5fe 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -1,21 +1,12 @@ #![allow(non_camel_case_types)] -// use crate::lazy::binary::encoded_value::EncodedValue; -// use crate::lazy::raw_value_ref::RawValueRef; -// use crate::lazy::r#struct::LazyStruct; -use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator; use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; -use crate::lazy::binary::raw::v1_1::r#struct::RawBinaryStructIterator_1_1 as RawBinaryStructIterator; -use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; -use crate::lazy::decoder::{ - LazyDecoder, LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, LazyRawValueExpr, -}; +use crate::lazy::decoder::{LazyDecoder, LazyRawReader}; use crate::lazy::encoder::private::Sealed; use crate::lazy::encoding::BinaryEncoding_1_1; use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; -use crate::{IonResult, IonType, RawSymbolTokenRef}; +use crate::IonResult; use bumpalo::Bump as BumpAllocator; diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 9f98ae12..3ffbf8e2 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -43,53 +43,23 @@ impl TypeDescriptor { let (high_nibble, low_nibble) = (byte >> 4, byte & 0x0F); use IonTypeCode::*; - let (ion_type_code, length_code) = match (high_nibble, low_nibble) { - (0xE, 0x0) => (IonVersionMarker, 3), - (0xE, 0xA) => (NullNull, 0), - (0xE, 0xC..=0xD) => (Nop, 0), - _ => (Nop, 0), - // 1 => Boolean, - // 2 => PositiveInteger, - // 3 => NegativeInteger, - // 4 => Float, - // 5 => Decimal, - // 6 => Timestamp, - // 7 => Symbol, - // 8 => String, - // 9 => Clob, - // 10 => Blob, - // 11 => List, - // 12 => SExpression, - // 13 => Struct, - // 14 => AnnotationOrIvm, - // 15 => Reserved, - // _ => panic!("type code was larger than a nibble"), + let ion_type_code = match (high_nibble, low_nibble) { + (0xE, 0x0) => IonVersionMarker, + (0xE, 0xA) => NullNull, + (0xE, 0xC..=0xD) => Nop, + _ => Boolean, // Temporary, until everything is implemented to satisfy the LUT. }; let ion_type = match ion_type_code { NullNull => Some(IonType::Null), - _ => Some(IonType::Null), - // NullOrNop if length_code == length_codes::NULL => Some(IonType::Null), - // NullOrNop => None, - // Boolean => Some(IonType::Bool), - // PositiveInteger => Some(IonType::Int), - // NegativeInteger => Some(IonType::Int), - // Float => Some(IonType::Float), - // Decimal => Some(IonType::Decimal), - // Timestamp => Some(IonType::Timestamp), - // Symbol => Some(IonType::Symbol), - // String => Some(IonType::String), - // Clob => Some(IonType::Clob), - // Blob => Some(IonType::Blob), - // List => Some(IonType::List), - // SExpression => Some(IonType::SExp), - // Struct => Some(IonType::Struct), - // AnnotationOrIvm => None, - // Reserved => None, + Nop => None, + IonVersionMarker => None, + Boolean => Some(IonType::Bool), + _ => panic!("the provided ion type code is either not implemented, or invalid"), }; TypeDescriptor { ion_type, ion_type_code, - length_code, + length_code: low_nibble, } } @@ -123,6 +93,11 @@ impl TypeDescriptor { } } +pub enum LengthType { + InHeader(usize), + FlexUIntFollows, +} + /// Represents a `TypeDescriptor` that appears before an Ion value (and not a NOP, IVM, /// or annotations wrapper). /// @@ -137,6 +112,17 @@ pub struct Header { pub length_code: u8, } +impl Header { + pub fn length_type(&self) -> LengthType { + use LengthType::*; + match (self.ion_type_code, self.length_code) { + (IonTypeCode::Nop, 0xC) => InHeader(0), + (IonTypeCode::NullNull, 0xA) => InHeader(0), + _ => FlexUIntFollows, + } + } +} + impl EncodedHeader for Header { type TypeCode = IonTypeCode; @@ -153,12 +139,6 @@ impl EncodedHeader for Header { } fn is_null(&self) -> bool { - todo!() - } -} - -impl Header { - pub fn is_null(&self) -> bool { self.ion_type_code == IonTypeCode::NullNull || self.ion_type_code == IonTypeCode::TypedNull } } diff --git a/src/lazy/encoder/binary/v1_1/flex_uint.rs b/src/lazy/encoder/binary/v1_1/flex_uint.rs index 78484ed9..4d0a336e 100644 --- a/src/lazy/encoder/binary/v1_1/flex_uint.rs +++ b/src/lazy/encoder/binary/v1_1/flex_uint.rs @@ -36,7 +36,7 @@ pub struct FlexUInt { } impl FlexUInt { - fn new(size_in_bytes: usize, value: u64) -> Self { + pub(crate) fn new(size_in_bytes: usize, value: u64) -> Self { Self { value, size_in_bytes, From bfa5415d28d28b2171c32c10a95b99e3f6397401 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Sun, 7 Apr 2024 23:34:39 -0700 Subject: [PATCH 04/17] Add import; missed while branching for PRs --- src/lazy/binary/raw/v1_1/value.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 774a2e8b..d751204a 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -5,7 +5,7 @@ use std::ops::Range; use crate::{ lazy::{ binary::{ - encoded_value::EncodedValue, + encoded_value::{EncodedHeader, EncodedValue}, raw::{ v1_1::{ annotations_iterator::RawBinaryAnnotationsIterator, From 1964a2c224a7a67c528d93e6eaf0098589bfa266 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 02:48:25 -0700 Subject: [PATCH 05/17] Fixed warnings; unused imports and such --- src/lazy/binary/encoded_value.rs | 2 -- src/lazy/binary/raw/annotations_iterator.rs | 1 - src/lazy/binary/raw/reader.rs | 2 +- src/lazy/binary/raw/sequence.rs | 2 +- src/lazy/binary/raw/struct.rs | 2 +- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 8 ++------ src/lazy/binary/raw/v1_1/reader.rs | 4 ++-- src/lazy/binary/raw/v1_1/sequence.rs | 2 +- src/lazy/binary/raw/v1_1/struct.rs | 4 ++-- src/lazy/struct.rs | 2 +- 10 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index 7d9b48e6..7b6c32b0 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,6 +1,4 @@ use crate::binary::non_blocking::type_descriptor::Header; -use crate::lazy::decoder::LazyDecoder; -use crate::lazy::encoding::BinaryEncoding; use crate::types::SymbolId; use crate::IonType; use std::ops::Range; diff --git a/src/lazy/binary/raw/annotations_iterator.rs b/src/lazy/binary/raw/annotations_iterator.rs index aab3d4f9..58e6df55 100644 --- a/src/lazy/binary/raw/annotations_iterator.rs +++ b/src/lazy/binary/raw/annotations_iterator.rs @@ -1,5 +1,4 @@ use crate::lazy::binary::immutable_buffer::ImmutableBuffer; -use crate::lazy::encoding::BinaryEncoding; use crate::{IonResult, RawSymbolTokenRef}; /// Iterates over a slice of bytes, lazily reading them as a sequence of VarUInt symbol IDs. diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index 4eb5dd4f..cd12c77a 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -3,7 +3,7 @@ use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::{LazyDecoder, LazyRawReader}; -use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_0}; +use crate::lazy::encoding::BinaryEncoding_1_0; use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; use crate::IonResult; diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs index cf47a2fb..f5548c11 100644 --- a/src/lazy/binary/raw/sequence.rs +++ b/src/lazy/binary/raw/sequence.rs @@ -6,7 +6,7 @@ use crate::lazy::binary::raw::reader::DataSource; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{LazyRawSequence, LazyRawValueExpr, RawValueExpr}; -use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_0}; +use crate::lazy::encoding::BinaryEncoding_1_0; use crate::{IonResult, IonType}; use std::fmt::{Debug, Formatter}; diff --git a/src/lazy/binary/raw/struct.rs b/src/lazy/binary/raw/struct.rs index 91fbc20b..10e73297 100644 --- a/src/lazy/binary/raw/struct.rs +++ b/src/lazy/binary/raw/struct.rs @@ -13,7 +13,7 @@ use crate::lazy::decoder::private::{ use crate::lazy::decoder::{ LazyRawField, LazyRawFieldExpr, LazyRawStruct, RawFieldExpr, RawValueExpr, }; -use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_0}; +use crate::lazy::encoding::BinaryEncoding_1_0; use crate::{IonResult, RawSymbolTokenRef}; #[derive(Copy, Clone)] diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index f81c9475..93c41e59 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -1,22 +1,18 @@ use crate::binary::constants::v1_1::{length_codes, IVM}; use crate::binary::int::DecodedInt; -// use crate::binary::non_blocking::type_descriptor::{ -// Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS, -// }; use crate::binary::uint::DecodedUInt; use crate::binary::var_int::VarInt; use crate::binary::var_uint::VarUInt; -use crate::lazy::binary::encoded_value::{EncodedHeader, EncodedValue}; +use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; use crate::lazy::binary::raw::v1_1::{ Header, LengthType, TypeDescriptor, ION_1_1_TYPE_DESCRIPTORS, }; -use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::result::IonFailure; use crate::types::UInt; -use crate::{Int, IonError, IonResult, IonType}; +use crate::{Int, IonError, IonResult}; use num_bigint::{BigInt, BigUint, Sign}; use std::fmt::{Debug, Formatter}; use std::mem; diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 98eac5fe..51781cb4 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -68,7 +68,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { return Ok(LazyRawStreamItem::::EndOfStream); } - let mut type_descriptor = buffer.peek_type_descriptor()?; + let type_descriptor = buffer.peek_type_descriptor()?; if type_descriptor.is_nop() { (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { @@ -91,7 +91,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + _allocator: &'top BumpAllocator, ) -> IonResult> where 'data: 'top, diff --git a/src/lazy/binary/raw/v1_1/sequence.rs b/src/lazy/binary/raw/v1_1/sequence.rs index f224538f..383bb588 100644 --- a/src/lazy/binary/raw/v1_1/sequence.rs +++ b/src/lazy/binary/raw/v1_1/sequence.rs @@ -5,7 +5,7 @@ use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{LazyRawSequence, LazyRawValueExpr, RawValueExpr}; -use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_1}; +use crate::lazy::encoding::BinaryEncoding_1_1; use crate::{IonResult, IonType}; use std::fmt::{Debug, Formatter}; diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs index 81cc12fc..59612cae 100644 --- a/src/lazy/binary/raw/v1_1/struct.rs +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -11,9 +11,9 @@ use crate::lazy::decoder::private::{ LazyContainerPrivate, LazyRawFieldPrivate, LazyRawValuePrivate, }; use crate::lazy::decoder::{ - LazyRawField, LazyRawFieldExpr, LazyRawStruct, LazyRawValue, RawFieldExpr, RawValueExpr, + LazyRawField, LazyRawFieldExpr, LazyRawStruct, RawFieldExpr, RawValueExpr, }; -use crate::lazy::encoding::{BinaryEncoding, BinaryEncoding_1_1}; +use crate::lazy::encoding::BinaryEncoding_1_1; use crate::{IonResult, RawSymbolTokenRef}; #[derive(Copy, Clone)] diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs index 598d3231..1c9f7084 100644 --- a/src/lazy/struct.rs +++ b/src/lazy/struct.rs @@ -5,7 +5,7 @@ use std::fmt::{Debug, Formatter}; use crate::element::builders::StructBuilder; use crate::lazy::decoder::LazyDecoder; -use crate::lazy::encoding::{BinaryEncoding_1_0, BinaryEncoding_1_1}; +use crate::lazy::encoding::BinaryEncoding_1_0; use crate::lazy::expanded::r#struct::{ ExpandedStructIterator, LazyExpandedField, LazyExpandedStruct, }; From f9402a6571592733135db65c06a42c57d7c80c79 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 03:29:08 -0700 Subject: [PATCH 06/17] Remove dead code --- src/lazy/any_encoding.rs | 6 +- .../binary/raw/v1_1/annotations_iterator.rs | 13 +- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 645 +----------------- 3 files changed, 9 insertions(+), 655 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 12b30fdb..fedd7bda 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -115,7 +115,7 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), }, LazyRawAnyEExpressionKind::Binary_1_1(_) => { - unimplemented!("macro in binary Ion 1.0 not implemented") + unimplemented!("macro in binary Ion 1.1 not implemented") } } } @@ -167,7 +167,7 @@ impl<'data> LazyRawAnyReader<'data> { match data { &[0xE0, 0x01, 0x00, 0xEA, ..] => RawReaderType::Binary_1_0, - &[0xE0, 0x01, 0x01, 0xEA, ..] => RawReaderType::Binary_1_0, + &[0xE0, 0x01, 0x01, 0xEA, ..] => RawReaderType::Binary_1_1, _ => RawReaderType::Text_1_0, } } @@ -300,8 +300,6 @@ impl<'top> From> for LazyRawAnyValue<'top> { } } -// TODO: Can we have a function to convert E to LazyRawValueKind? - impl<'top> From> for LazyRawAnyValue<'top> { fn from(value: LazyRawBinaryValue_1_0<'top>) -> Self { LazyRawAnyValue { diff --git a/src/lazy/binary/raw/v1_1/annotations_iterator.rs b/src/lazy/binary/raw/v1_1/annotations_iterator.rs index d9da1475..80598346 100644 --- a/src/lazy/binary/raw/v1_1/annotations_iterator.rs +++ b/src/lazy/binary/raw/v1_1/annotations_iterator.rs @@ -16,17 +16,6 @@ impl<'a> Iterator for RawBinaryAnnotationsIterator<'a> { type Item = IonResult>; fn next(&mut self) -> Option { - if self.buffer.is_empty() { - return None; - } - // TODO: If the VarUInt doesn't end before the annotations sequence does (i.e. the stream is - // malformed, this will surface an `Incomplete` instead of a more descriptive error. - let (var_uint, buffer_after_var_uint) = match self.buffer.read_var_uint() { - Ok(output) => output, - Err(error) => return Some(Err(error)), - }; - let symbol_id = RawSymbolTokenRef::SymbolId(var_uint.value()); - self.buffer = buffer_after_var_uint; - Some(Ok(symbol_id)) + unimplemented!() } } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 93c41e59..f475ba6d 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -1,7 +1,4 @@ -use crate::binary::constants::v1_1::{length_codes, IVM}; -use crate::binary::int::DecodedInt; -use crate::binary::uint::DecodedUInt; -use crate::binary::var_int::VarInt; +use crate::binary::constants::v1_1::IVM; use crate::binary::var_uint::VarUInt; use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; @@ -11,11 +8,8 @@ use crate::lazy::binary::raw::v1_1::{ use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::result::IonFailure; -use crate::types::UInt; -use crate::{Int, IonError, IonResult}; -use num_bigint::{BigInt, BigUint, Sign}; +use crate::{IonError, IonResult}; use std::fmt::{Debug, Formatter}; -use std::mem; // This limit is used for stack-allocating buffer space to encode/decode UInts. const UINT_STACK_BUFFER_SIZE: usize = 16; @@ -184,204 +178,6 @@ impl<'a> ImmutableBuffer<'a> { Ok((flex_uint, remaining)) } - /// Reads a `VarUInt` encoding primitive from the beginning of the buffer. If it is successful, - /// returns an `Ok(_)` containing its [VarUInt] representation. - /// - /// See: - #[inline] - pub fn read_var_uint(self) -> ParseResult<'a, VarUInt> { - const LOWER_7_BITMASK: u8 = 0b0111_1111; - const HIGHEST_BIT_VALUE: u8 = 0b1000_0000; - - // Reading a `VarUInt` is one of the hottest paths in the binary 1.0 reader. - // Because `VarUInt`s represent struct field names, annotations, and value lengths, - // smaller values are more common than larger values. As an optimization, we have a - // dedicated code path for the decoding of 1- and 2-byte VarUInts. This allows the logic - // for the most common cases to be inlined and the logic for the less common cases - // (including errors) to be a function call. - - let data = self.bytes(); - // The 'fast path' first checks whether we have at least two bytes available. This allows us - // to do a single length check on the fast path. If there's one byte in the buffer that - // happens to be a complete VarUInt (a very rare occurrence), it will still be handled by - // `read_var_uint_slow()`. - if data.len() >= 2 { - let first_byte = data[0]; - let mut magnitude = (LOWER_7_BITMASK & first_byte) as usize; - let num_bytes = if first_byte >= HIGHEST_BIT_VALUE { - 1 - } else { - let second_byte = data[1]; - if second_byte < HIGHEST_BIT_VALUE { - return self.read_var_uint_slow(); - } - let lower_seven = (LOWER_7_BITMASK & second_byte) as usize; - magnitude <<= 7; - magnitude |= lower_seven; - 2 - }; - return Ok((VarUInt::new(magnitude, num_bytes), self.consume(num_bytes))); - } - - // All other VarUInt sizes and error cases (incomplete data, oversized, etc) are handled by - // this more general decoding loop. - self.read_var_uint_slow() - } - - #[cold] - pub fn read_var_uint_slow(self) -> ParseResult<'a, VarUInt> { - const BITS_PER_ENCODED_BYTE: usize = 7; - const STORAGE_SIZE_IN_BITS: usize = mem::size_of::() * 8; - const MAX_ENCODED_SIZE_IN_BYTES: usize = STORAGE_SIZE_IN_BITS / BITS_PER_ENCODED_BYTE; - - const LOWER_7_BITMASK: u8 = 0b0111_1111; - const HIGHEST_BIT_VALUE: u8 = 0b1000_0000; - - let mut magnitude: usize = 0; - let mut encoded_size_in_bytes = 0; - - for byte in self.bytes().iter().copied() { - encoded_size_in_bytes += 1; - magnitude <<= 7; // Shifts 0 to 0 in the first iteration - let lower_seven = (LOWER_7_BITMASK & byte) as usize; - magnitude |= lower_seven; - if byte >= HIGHEST_BIT_VALUE { - // This is the final byte. - // Make sure we haven't exceeded the configured maximum size - if encoded_size_in_bytes > MAX_ENCODED_SIZE_IN_BYTES { - return Self::value_too_large( - "a VarUInt", - encoded_size_in_bytes, - MAX_ENCODED_SIZE_IN_BYTES, - ); - } - return Ok(( - VarUInt::new(magnitude, encoded_size_in_bytes), - self.consume(encoded_size_in_bytes), - )); - } - } - - IonResult::incomplete("a VarUInt", self.offset() + encoded_size_in_bytes) - } - - /// Reads a `VarInt` encoding primitive from the beginning of the buffer. If it is successful, - /// returns an `Ok(_)` containing its [VarInt] representation. - /// - /// See: - pub fn read_var_int(self) -> ParseResult<'a, VarInt> { - const BITS_PER_ENCODED_BYTE: usize = 7; - const STORAGE_SIZE_IN_BITS: usize = mem::size_of::() * 8; - const MAX_ENCODED_SIZE_IN_BYTES: usize = STORAGE_SIZE_IN_BITS / BITS_PER_ENCODED_BYTE; - - const LOWER_6_BITMASK: u8 = 0b0011_1111; - const LOWER_7_BITMASK: u8 = 0b0111_1111; - const HIGHEST_BIT_VALUE: u8 = 0b1000_0000; - - const BITS_PER_BYTE: usize = 8; - const BITS_PER_U64: usize = mem::size_of::() * BITS_PER_BYTE; - - // Unlike VarUInt's encoding, the first byte in a VarInt is a special case because - // bit #6 (0-indexed, from the right) indicates whether the value is positive (0) or - // negative (1). - - if self.is_empty() { - return IonResult::incomplete("a VarInt", self.offset()); - } - let first_byte: u8 = self.peek_next_byte().unwrap(); - let no_more_bytes: bool = first_byte >= 0b1000_0000; // If the first bit is 1, we're done. - let is_negative: bool = (first_byte & 0b0100_0000) == 0b0100_0000; - let sign: i64 = if is_negative { -1 } else { 1 }; - let mut magnitude = (first_byte & 0b0011_1111) as i64; - - if no_more_bytes { - return Ok(( - VarInt::new(magnitude * sign, is_negative, 1), - self.consume(1), - )); - } - - let mut encoded_size_in_bytes = 1; - // Whether we found the terminating byte in this buffer. - let mut terminated = false; - - for byte in self.bytes()[1..].iter().copied() { - let lower_seven = (0b0111_1111 & byte) as i64; - magnitude <<= 7; - magnitude |= lower_seven; - encoded_size_in_bytes += 1; - if byte >= 0b1000_0000 { - terminated = true; - break; - } - } - - if !terminated { - return IonResult::incomplete("a VarInt", self.offset() + encoded_size_in_bytes); - } - - if encoded_size_in_bytes > MAX_ENCODED_SIZE_IN_BYTES { - return IonResult::decoding_error(format!( - "Found a {encoded_size_in_bytes}-byte VarInt. Max supported size is {MAX_ENCODED_SIZE_IN_BYTES} bytes." - )); - } - - Ok(( - VarInt::new(magnitude * sign, is_negative, encoded_size_in_bytes), - self.consume(encoded_size_in_bytes), - )) - } - - /// Reads the first `length` bytes from the buffer as a `UInt` encoding primitive. If it is - /// successful, returns an `Ok(_)` containing its [DecodedUInt] representation. - /// - /// See: - pub fn read_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> { - if length <= mem::size_of::() { - return self.read_small_uint(length); - } - - // The UInt is too large to fit in a u64; read it as a BigUInt instead. - self.read_big_uint(length) - } - - /// Reads the first `length` bytes from the buffer as a `UInt`. The caller must confirm that - /// `length` is small enough to fit in a `u64`. - #[inline] - fn read_small_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> { - let uint_bytes = self - .peek_n_bytes(length) - .ok_or_else(|| IonError::incomplete("a UInt", self.offset()))?; - let magnitude = DecodedUInt::small_uint_from_slice(uint_bytes); - Ok(( - DecodedUInt::new(UInt::from(magnitude), length), - self.consume(length), - )) - } - - /// Reads the first `length` bytes from the buffer as a `UInt`. If `length` is small enough - /// that the value can fit in a `usize`, it is strongly recommended that you use - /// `read_small_uint` instead as it will be much faster. - #[inline(never)] - // This method performs allocations and its generated assembly is rather large. Isolating its - // logic in a separate method that is never inlined keeps `read_uint` (its caller) small enough - // to inline. This is important as `read_uint` is on the hot path for most Ion streams. - fn read_big_uint(self, length: usize) -> ParseResult<'a, DecodedUInt> { - if length > MAX_UINT_SIZE_IN_BYTES { - return Self::value_too_large("a Uint", length, MAX_UINT_SIZE_IN_BYTES); - } - - let uint_bytes = self - .peek_n_bytes(length) - .ok_or_else(|| IonError::incomplete("a UInt", self.offset()))?; - - let magnitude = BigUint::from_bytes_be(uint_bytes); - Ok(( - DecodedUInt::new(UInt::from(magnitude), length), - self.consume(length), - )) - } - #[inline(never)] // This method is inline(never) because it is rarely invoked and its allocations/formatting // compile to a non-trivial number of instructions. @@ -391,127 +187,13 @@ impl<'a> ImmutableBuffer<'a> { )) } - /// Reads the first `length` bytes from the buffer as an `Int` encoding primitive. If it is - /// successful, returns an `Ok(_)` containing its [DecodedInt] representation and consumes the - /// source bytes. - /// - /// See: - pub fn read_int(self, length: usize) -> ParseResult<'a, DecodedInt> { - if length == 0 { - return Ok((DecodedInt::new(0, false, 0), self.consume(0))); - } else if length > MAX_INT_SIZE_IN_BYTES { - return IonResult::decoding_error(format!( - "Found a {length}-byte Int. Max supported size is {MAX_INT_SIZE_IN_BYTES} bytes." - )); - } - - let int_bytes = self - .peek_n_bytes(length) - .ok_or_else(|| IonError::incomplete("an Int encoding primitive", self.offset()))?; - - let mut is_negative: bool = false; - - let value: Int = if length <= mem::size_of::() { - // This Int will fit in an i64. - let first_byte: i64 = i64::from(int_bytes[0]); - let sign: i64 = if first_byte & 0b1000_0000 == 0 { - 1 - } else { - is_negative = true; - -1 - }; - let mut magnitude: i64 = first_byte & 0b0111_1111; - for &byte in &int_bytes[1..] { - let byte = i64::from(byte); - magnitude <<= 8; - magnitude |= byte; - } - (sign * magnitude).into() - } else { - // This Int is too big for an i64, we'll need to use a BigInt - let value = if int_bytes[0] & 0b1000_0000 == 0 { - BigInt::from_bytes_be(Sign::Plus, int_bytes) - } else { - is_negative = true; - // The leading sign bit is the only part of the input that can't be considered - // unsigned, big-endian integer bytes. We need to make our own copy of the input - // so we can flip that bit back to a zero before calling `from_bytes_be`. - let mut owned_int_bytes = Vec::from(int_bytes); - owned_int_bytes[0] &= 0b0111_1111; - BigInt::from_bytes_be(Sign::Minus, owned_int_bytes.as_slice()) - }; - - value.into() - }; - Ok(( - DecodedInt::new(value, is_negative, length), - self.consume(length), - )) - } - /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning /// its subfields in an [`AnnotationsWrapper`]. pub fn read_annotations_wrapper( &self, - type_descriptor: TypeDescriptor, + _type_descriptor: TypeDescriptor, ) -> ParseResult<'a, AnnotationsWrapper> { - // Consume the first byte; its contents are already in the `type_descriptor` parameter. - let input_after_type_descriptor = self.consume(1); - - // Read the combined length of the annotations sequence and the value that follows it - let (annotations_and_value_length, input_after_combined_length) = - match type_descriptor.length_code { - length_codes::NULL => (0, input_after_type_descriptor), - length_codes::VAR_UINT => { - let (var_uint, input) = input_after_type_descriptor.read_var_uint()?; - (var_uint.value(), input) - } - length => (length as usize, input_after_type_descriptor), - }; - - // Read the length of the annotations sequence - let (annotations_length, input_after_annotations_length) = - input_after_combined_length.read_var_uint()?; - - // Validate that the annotations sequence is not empty. - if annotations_length.value() == 0 { - return IonResult::decoding_error("found an annotations wrapper with no annotations"); - } - - // Validate that the annotated value is not missing. - let expected_value_length = annotations_and_value_length - - annotations_length.size_in_bytes() - - annotations_length.value(); - - if expected_value_length == 0 { - return IonResult::decoding_error("found an annotation wrapper with no value"); - } - - // Skip over the annotations sequence itself; the reader will return to it if/when the - // reader asks to iterate over those symbol IDs. - let final_input = input_after_annotations_length.consume(annotations_length.value()); - - // Here, `self` is the (immutable) buffer we started with. Comparing it with `input` - // gets us the before-and-after we need to calculate the size of the header. - let annotations_header_length = final_input.offset() - self.offset(); - let annotations_header_length = u8::try_from(annotations_header_length).map_err(|_e| { - IonError::decoding_error("found an annotations header greater than 255 bytes long") - })?; - - let annotations_sequence_length = - u8::try_from(annotations_length.value()).map_err(|_e| { - IonError::decoding_error( - "found an annotations sequence greater than 255 bytes long", - ) - })?; - - let wrapper = AnnotationsWrapper { - header_length: annotations_header_length, - sequence_length: annotations_sequence_length, - expected_value_length, - }; - - Ok((wrapper, final_input)) + unimplemented!(); } /// Reads a `NOP` encoding primitive from the buffer. If it is successful, returns an `Ok(_)` @@ -586,81 +268,14 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a field ID and a value from the buffer. pub(crate) fn peek_field(self) -> IonResult>> { - let mut input = self; - if self.is_empty() { - // We're at the end of the struct - return Ok(None); - } - // Read the field ID - let (mut field_id_var_uint, mut input_after_field_id) = input.read_var_uint()?; - if input_after_field_id.is_empty() { - return IonResult::incomplete( - "found field name but no value", - input_after_field_id.offset(), - ); - } - - let mut type_descriptor = input_after_field_id.peek_type_descriptor()?; - if type_descriptor.is_nop() { - // Read past NOP fields until we find the first one that's an actual value - // or we run out of struct bytes. Note that we read the NOP field(s) from `self` (the - // initial input) rather than `input_after_field_id` because it simplifies - // the logic of `read_struct_field_nop_pad()`, which is very rarely called. - (field_id_var_uint, input_after_field_id) = match input.read_struct_field_nop_pad()? { - None => { - // There are no more fields, we're at the end of the struct. - return Ok(None); - } - Some((nop_length, field_id_var_uint, input_after_field_id)) => { - // Advance `input` beyond the NOP so that when we store it in the value it begins - // with the field ID. - input = input.consume(nop_length); - type_descriptor = input_after_field_id.peek_type_descriptor()?; - (field_id_var_uint, input_after_field_id) - } - }; - } - - let field_id_length = field_id_var_uint.size_in_bytes() as u8; - let field_id = field_id_var_uint.value(); - - let mut value = input_after_field_id.read_value(type_descriptor)?; - value.encoded_value.field_id = Some(field_id); - value.encoded_value.field_id_length = field_id_length; - value.encoded_value.total_length += field_id_length as usize; - value.input = input; - Ok(Some(value)) + unimplemented!(); } #[cold] /// Consumes (field ID, NOP pad) pairs until a non-NOP value is encountered in field position or /// the buffer is empty. Returns a buffer starting at the field ID before the non-NOP value. fn read_struct_field_nop_pad(self) -> IonResult)>> { - let mut input_before_field_id = self; - loop { - if input_before_field_id.is_empty() { - return Ok(None); - } - let (field_id_var_uint, input_after_field_id) = - input_before_field_id.read_var_uint()?; - // If we're out of data (i.e. there's no field value) the struct is incomplete. - if input_after_field_id.is_empty() { - return IonResult::incomplete( - "found a field name but no value", - input_after_field_id.offset(), - ); - } - // Peek at the next value header. If it's a NOP, we need to repeat the process. - if input_after_field_id.peek_type_descriptor()?.is_nop() { - // Consume the NOP to position the buffer at the beginning of the next field ID. - (_, input_before_field_id) = input_after_field_id.read_nop_pad()?; - } else { - // If it isn't a NOP, return the field ID and the buffer slice containing the field - // value. - let nop_length = input_before_field_id.offset() - self.offset(); - return Ok(Some((nop_length, field_id_var_uint, input_after_field_id))); - } - } + unimplemented!(); } /// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions, @@ -822,8 +437,6 @@ pub struct AnnotationsWrapper { #[cfg(test)] mod tests { use super::*; - use crate::IonError; - use num_traits::Num; fn input_test>(input: A) { let input = ImmutableBuffer::new(input.as_ref()); @@ -856,250 +469,4 @@ mod tests { fn vec_test() { input_test(Vec::from("foo bar baz".as_bytes())); } - - #[test] - fn read_var_uint() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0111_1001, 0b0000_1111, 0b1000_0001]); - let var_uint = buffer.read_var_uint()?.0; - assert_eq!(3, var_uint.size_in_bytes()); - assert_eq!(1_984_385, var_uint.value()); - Ok(()) - } - - #[test] - fn read_var_uint_zero() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b1000_0000]); - let var_uint = buffer.read_var_uint()?.0; - assert_eq!(var_uint.size_in_bytes(), 1); - assert_eq!(var_uint.value(), 0); - Ok(()) - } - - #[test] - fn read_var_uint_two_bytes_max_value() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); - let var_uint = buffer.read_var_uint()?.0; - assert_eq!(var_uint.size_in_bytes(), 2); - assert_eq!(var_uint.value(), 16_383); - Ok(()) - } - - #[test] - fn read_incomplete_var_uint() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0111_1001, 0b0000_1111]); - match buffer.read_var_uint() { - Err(IonError::Incomplete { .. }) => Ok(()), - other => panic!("expected IonError::Incomplete, but found: {other:?}"), - } - } - - #[test] - fn read_var_uint_overflow_detection() { - let buffer = ImmutableBuffer::new(&[ - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b1111_1111, - ]); - buffer - .read_var_uint() - .expect_err("This should have failed due to overflow."); - } - - #[test] - fn read_var_int_zero() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b1000_0000]); - let var_int = buffer.read_var_int()?.0; - assert_eq!(var_int.size_in_bytes(), 1); - assert_eq!(var_int.value(), 0); - Ok(()) - } - - #[test] - fn read_negative_var_int() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0111_1001, 0b0000_1111, 0b1000_0001]); - let var_int = buffer.read_var_int()?.0; - assert_eq!(var_int.size_in_bytes(), 3); - assert_eq!(var_int.value(), -935_809); - Ok(()) - } - - #[test] - fn read_positive_var_int() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0011_1001, 0b0000_1111, 0b1000_0001]); - let var_int = buffer.read_var_int()?.0; - assert_eq!(var_int.size_in_bytes(), 3); - assert_eq!(var_int.value(), 935_809); - Ok(()) - } - - #[test] - fn read_var_int_two_byte_min() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); - let var_int = buffer.read_var_int()?.0; - assert_eq!(var_int.size_in_bytes(), 2); - assert_eq!(var_int.value(), -8_191); - Ok(()) - } - - #[test] - fn read_var_int_two_byte_max() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0011_1111, 0b1111_1111]); - let var_int = buffer.read_var_int()?.0; - assert_eq!(var_int.size_in_bytes(), 2); - assert_eq!(var_int.value(), 8_191); - Ok(()) - } - - #[test] - fn read_var_int_overflow_detection() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[ - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b0111_1111, - 0b1111_1111, - ]); - buffer - .read_var_int() - .expect_err("This should have failed due to overflow."); - Ok(()) - } - - #[test] - fn read_one_byte_uint() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b1000_0000]); - let var_int = buffer.read_uint(buffer.len())?.0; - assert_eq!(var_int.size_in_bytes(), 1); - assert_eq!(var_int.value(), &UInt::from(128u64)); - Ok(()) - } - - #[test] - fn read_two_byte_uint() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); - let var_int = buffer.read_uint(buffer.len())?.0; - assert_eq!(var_int.size_in_bytes(), 2); - assert_eq!(var_int.value(), &UInt::from(32_767u64)); - Ok(()) - } - - #[test] - fn read_three_byte_uint() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0011_1100, 0b1000_0111, 0b1000_0001]); - let var_int = buffer.read_uint(buffer.len())?.0; - assert_eq!(var_int.size_in_bytes(), 3); - assert_eq!(var_int.value(), &UInt::from(3_966_849u64)); - Ok(()) - } - - #[test] - fn test_read_ten_byte_uint() -> IonResult<()> { - let data = vec![0xFFu8; 10]; - let buffer = ImmutableBuffer::new(&data); - let uint = buffer.read_uint(buffer.len())?.0; - assert_eq!(uint.size_in_bytes(), 10); - assert_eq!( - uint.value(), - &UInt::from(BigUint::from_str_radix("ffffffffffffffffffff", 16).unwrap()) - ); - Ok(()) - } - - #[test] - fn test_read_uint_too_large() { - let mut buffer = Vec::with_capacity(MAX_UINT_SIZE_IN_BYTES + 1); - buffer.resize(MAX_UINT_SIZE_IN_BYTES + 1, 1); - let buffer = ImmutableBuffer::new(&buffer); - let _uint = buffer - .read_uint(buffer.len()) - .expect_err("This exceeded the configured max UInt size."); - } - - #[test] - fn read_int_negative_zero() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b1000_0000]); // Negative zero - let int = buffer.read_int(buffer.len())?.0; - assert_eq!(int.size_in_bytes(), 1); - assert_eq!(int.value(), &Int::from(0)); - assert!(int.is_negative_zero()); - Ok(()) - } - - #[test] - fn read_int_positive_zero() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0000_0000]); // Negative zero - let int = buffer.read_int(buffer.len())?.0; - assert_eq!(int.size_in_bytes(), 1); - assert_eq!(int.value(), &Int::from(0)); - assert!(!int.is_negative_zero()); - Ok(()) - } - - #[test] - fn read_int_length_zero() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[]); // Negative zero - let int = buffer.read_int(buffer.len())?.0; - assert_eq!(int.size_in_bytes(), 0); - assert_eq!(int.value(), &Int::from(0)); - assert!(!int.is_negative_zero()); - Ok(()) - } - - #[test] - fn read_two_byte_negative_int() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b1111_1111, 0b1111_1111]); - let int = buffer.read_int(buffer.len())?.0; - assert_eq!(int.size_in_bytes(), 2); - assert_eq!(int.value(), &Int::from(-32_767i64)); - Ok(()) - } - - #[test] - fn read_two_byte_positive_int() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0111_1111, 0b1111_1111]); - let int = buffer.read_int(buffer.len())?.0; - assert_eq!(int.size_in_bytes(), 2); - assert_eq!(int.value(), &Int::from(32_767i64)); - Ok(()) - } - - #[test] - fn read_three_byte_negative_int() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b1011_1100, 0b1000_0111, 0b1000_0001]); - let int = buffer.read_int(buffer.len())?.0; - assert_eq!(int.size_in_bytes(), 3); - assert_eq!(int.value(), &Int::from(-3_966_849i64)); - Ok(()) - } - - #[test] - fn read_three_byte_positive_int() -> IonResult<()> { - let buffer = ImmutableBuffer::new(&[0b0011_1100, 0b1000_0111, 0b1000_0001]); - let int = buffer.read_int(buffer.len())?.0; - assert_eq!(int.size_in_bytes(), 3); - assert_eq!(int.value(), &Int::from(3_966_849i64)); - Ok(()) - } - - #[test] - fn read_int_overflow() -> IonResult<()> { - let data = vec![1; MAX_INT_SIZE_IN_BYTES + 1]; - let buffer = ImmutableBuffer::new(&data); // Negative zero - buffer - .read_int(buffer.len()) - .expect_err("This exceeded the configured max Int size."); - Ok(()) - } } From 982504ebe2f9a1acedfc3f986d4df35000bc7044 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 03:33:46 -0700 Subject: [PATCH 07/17] Removing more dead code --- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 40 +------------------- 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index f475ba6d..6ddce175 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -178,15 +178,6 @@ impl<'a> ImmutableBuffer<'a> { Ok((flex_uint, remaining)) } - #[inline(never)] - // This method is inline(never) because it is rarely invoked and its allocations/formatting - // compile to a non-trivial number of instructions. - fn value_too_large(label: &str, length: usize, max_length: usize) -> IonResult { - IonResult::decoding_error(format!( - "found {label} that was too large; size = {length}, max size = {max_length}" - )) - } - /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning /// its subfields in an [`AnnotationsWrapper`]. pub fn read_annotations_wrapper( @@ -354,36 +345,9 @@ impl<'a> ImmutableBuffer<'a> { /// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper. fn read_annotated_value( self, - mut type_descriptor: TypeDescriptor, + mut _type_descriptor: TypeDescriptor, ) -> IonResult> { - let input = self; - let (wrapper, input_after_annotations) = input.read_annotations_wrapper(type_descriptor)?; - type_descriptor = input_after_annotations.peek_type_descriptor()?; - - // Confirm that the next byte begins a value, not a NOP or another annotations wrapper. - if type_descriptor.is_annotation_wrapper() { - return IonResult::decoding_error( - "found an annotations wrapper inside an annotations wrapper", - ); - } else if type_descriptor.is_nop() { - return IonResult::decoding_error("found a NOP inside an annotations wrapper"); - } - - let mut lazy_value = - input_after_annotations.read_value_without_annotations(type_descriptor)?; - if wrapper.expected_value_length != lazy_value.encoded_value.total_length() { - return IonResult::decoding_error( - "value length did not match length declared by annotations wrapper", - ); - } - - lazy_value.encoded_value.annotations_header_length = wrapper.header_length; - lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length; - lazy_value.encoded_value.total_length += wrapper.header_length as usize; - // Modify the input to include the annotations - lazy_value.input = input; - - Ok(lazy_value) + unimplemented!(); } // DataSource Functionality From b31c2bc2b114c5c9e0aca84070a3a7e408c143bb Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 03:36:55 -0700 Subject: [PATCH 08/17] Fix copy-pasta'd error; macros not yet implemented, not unsupported --- src/lazy/any_encoding.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index fedd7bda..1efa6e9f 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -545,7 +545,7 @@ impl<'top> From> LazyRawStreamItem::::Value(value.into()) } LazyRawStreamItem::::EExpression(_) => { - unreachable!("Ion 1.0 does not support macro invocations") + unimplemented!("Macro invocations not yet implemented in binary 1.1") } LazyRawStreamItem::::EndOfStream => { LazyRawStreamItem::::EndOfStream From b068d12c24b3bcbbb58d5fd19bdbf15f3696b931 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 03:45:01 -0700 Subject: [PATCH 09/17] Remove changes to Never; ultimately unused --- src/lazy/never.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/lazy/never.rs b/src/lazy/never.rs index 52a1b583..bc8c1273 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -8,7 +8,7 @@ use crate::lazy::encoder::value_writer::{ use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; -use crate::raw_symbol_token_ref::{AsRawSymbolTokenRef, RawSymbolTokenRef}; +use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::IonResult; /// An uninhabited type that signals to the compiler that related code paths are not reachable. @@ -38,14 +38,6 @@ impl<'top, D: LazyDecoder> From for MacroExpr<'top, D> { } } -impl Iterator for Never { - type Item = IonResult>; - - fn next(&mut self) -> Option { - unreachable!("Never implementation cannot iterate") - } -} - impl AnnotatableValueWriter for Never { type ValueWriter = Never; type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a> = Never where Self: 'a; From 17aca975f8bf670e39b89715204d05e17c45d475 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 03:50:17 -0700 Subject: [PATCH 10/17] Remove unneeded constants and fix is_null --- src/binary/constants.rs | 6 ------ src/lazy/binary/raw/v1_1/type_descriptor.rs | 5 +---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/binary/constants.rs b/src/binary/constants.rs index 4780fa10..a3624b51 100644 --- a/src/binary/constants.rs +++ b/src/binary/constants.rs @@ -16,10 +16,4 @@ pub mod v1_0 { pub mod v1_1 { /// Ion Version Marker byte sequence pub const IVM: [u8; 4] = [0xE0, 0x01, 0x01, 0xEA]; - - /// Constants for interpreting the length (`L`) code of binary values - pub mod length_codes { - pub const NULL: u8 = 15; - pub const VAR_UINT: u8 = 14; - } } diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 3ffbf8e2..02beeb00 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -1,4 +1,3 @@ -use crate::binary::constants::v1_1::length_codes; use crate::lazy::binary::encoded_value::EncodedHeader; use crate::lazy::binary::raw::v1_1::IonTypeCode; use crate::IonType; @@ -64,7 +63,7 @@ impl TypeDescriptor { } pub fn is_null(&self) -> bool { - self.ion_type.is_some() && self.length_code == length_codes::NULL + self.ion_type_code == IonTypeCode::NullNull || self.ion_type_code == IonTypeCode::TypedNull } pub fn is_nop(&self) -> bool { @@ -73,12 +72,10 @@ impl TypeDescriptor { pub fn is_ivm_start(&self) -> bool { self.ion_type_code == IonTypeCode::IonVersionMarker - // self.ion_type_code == IonTypeCode::AnnotationOrIvm && self.length_code == 0 } pub fn is_annotation_wrapper(&self) -> bool { false - // self.ion_type_code == IonTypeCode::AnnotationOrIvm && self.length_code > 0 } #[inline] From 4de4b0fcbc8980d6f3e444fd36fbf5f29893a09f Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 03:57:18 -0700 Subject: [PATCH 11/17] Address clippy checks --- src/lazy/any_encoding.rs | 6 +++--- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 1efa6e9f..dfce9621 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -165,9 +165,9 @@ impl<'data> LazyRawAnyReader<'data> { fn detect_encoding(data: &[u8]) -> RawReaderType { const BINARY_1_0_IVM: &[u8] = &[0xEA, 0x01, 0x00, 0xE0]; - match data { - &[0xE0, 0x01, 0x00, 0xEA, ..] => RawReaderType::Binary_1_0, - &[0xE0, 0x01, 0x01, 0xEA, ..] => RawReaderType::Binary_1_1, + match *data { + [0xE0, 0x01, 0x00, 0xEA, ..] => RawReaderType::Binary_1_0, + [0xE0, 0x01, 0x01, 0xEA, ..] => RawReaderType::Binary_1_1, _ => RawReaderType::Text_1_0, } } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 6ddce175..ec7c3c39 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -214,7 +214,7 @@ impl<'a> ImmutableBuffer<'a> { }; let total_nop_pad_size = 1 + size; - Ok((total_nop_pad_size as usize, remaining)) + Ok((total_nop_pad_size, remaining)) } /// Calls [`Self::read_nop_pad`] in a loop until the buffer is empty or a type descriptor From 53406a794e43a17684c6be386dae28f3a5a5b4fe Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 12:15:04 -0700 Subject: [PATCH 12/17] Address rustdoc issues --- src/lazy/binary/encoded_value.rs | 2 +- src/lazy/binary/raw/reader.rs | 2 +- src/lazy/binary/raw/v1_1/value.rs | 4 ++-- src/lazy/binary/raw/value.rs | 2 +- src/lazy/expanded/mod.rs | 2 +- src/lazy/raw_stream_item.rs | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index 7b6c32b0..ce3e9ffa 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -35,7 +35,7 @@ impl EncodedHeader for Header { /// Represents the type, offset, and length metadata of the various components of an encoded value /// in an input stream. /// -/// Each [`LazyRawValue`](super::raw::value::LazyRawBinaryValue) contains an `EncodedValue`, +/// Each [`LazyRawValue`](super::raw::value::LazyRawBinaryValue_1_0) contains an `EncodedValue`, /// allowing a user to re-read (that is: parse) the body of the value as many times as necessary /// without re-parsing its header information each time. #[derive(Clone, Copy, Debug, PartialEq)] diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index cd12c77a..a7e5a986 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -10,7 +10,7 @@ use crate::IonResult; use bumpalo::Bump as BumpAllocator; -/// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue`]s representing the top level values found +/// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue_1_0`]s representing the top level values found /// in the provided input stream. pub struct LazyRawBinaryReader_1_0<'data> { data: DataSource<'data>, diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index d751204a..54f79075 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -125,8 +125,8 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Reads this value's data, returning it as a [`RawValueRef`]. If this value is a container, /// calling this method will not read additional data; the `RawValueRef` will provide a - /// [`LazyRawBinarySequence`] or [`LazyStruct`](crate::lazy::struct::LazyStruct) - /// that can be traversed to access the container's contents. + /// [`LazyRawBinarySequence_1_1`](crate::lazy::binary::raw::v1_1::sequence::LazyRawBinarySequence_1_1) + /// or [`LazyStruct`](crate::lazy::struct::LazyStruct) that can be traversed to access the container's contents. pub fn read(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { if self.is_null() { let raw_value_ref = RawValueRef::Null(self.ion_type()); diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index fdbe6f3f..fe8ba9d4 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -147,7 +147,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// Reads this value's data, returning it as a [`RawValueRef`]. If this value is a container, /// calling this method will not read additional data; the `RawValueRef` will provide a - /// [`LazyRawBinarySequence`] or [`LazyStruct`](crate::lazy::struct::LazyStruct) + /// [`LazyRawBinarySequence_1_0`] or [`LazyStruct`](crate::lazy::struct::LazyStruct) /// that can be traversed to access the container's contents. pub fn read(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { if self.is_null() { diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index f8643dd5..a6243074 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -114,7 +114,7 @@ pub enum ExpandedStreamItem<'top, D: LazyDecoder> { VersionMarker(u8, u8), /// An Ion value whose data has not yet been read. For more information about how to read its /// data and (in the case of containers) access any nested values, see the documentation - /// for [`LazyRawBinaryValue`](crate::lazy::binary::raw::value::LazyRawBinaryValue). + /// for [`LazyRawBinaryValue`](crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0). Value(LazyExpandedValue<'top, D>), /// The end of the stream EndOfStream, diff --git a/src/lazy/raw_stream_item.rs b/src/lazy/raw_stream_item.rs index 5cef4fc9..62c275d5 100644 --- a/src/lazy/raw_stream_item.rs +++ b/src/lazy/raw_stream_item.rs @@ -11,7 +11,7 @@ pub enum RawStreamItem { VersionMarker(u8, u8), /// An Ion value whose data has not yet been read. For more information about how to read its /// data and (in the case of containers) access any nested values, see the documentation - /// for [`LazyRawBinaryValue`](crate::lazy::binary::raw::value::LazyRawBinaryValue). + /// for [`LazyRawBinaryValue`](crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0). Value(V), /// An Ion 1.1+ macro invocation. Ion 1.0 readers will never return a macro invocation. EExpression(E), From 83fa17e6d7dec1276c8e27c520144f1735b84d21 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 9 Apr 2024 12:44:28 -0700 Subject: [PATCH 13/17] Remove unneeded lifetime --- src/lazy/binary/raw/sequence.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs index f5548c11..2af47ceb 100644 --- a/src/lazy/binary/raw/sequence.rs +++ b/src/lazy/binary/raw/sequence.rs @@ -145,7 +145,7 @@ impl<'top> Iterator for RawBinarySequenceIterator<'top> { fn next(&mut self) -> Option { match self .source - .try_parse_next(ImmutableBuffer::<'top>::peek_sequence_value) + .try_parse_next(ImmutableBuffer::peek_sequence_value) { Ok(Some(value)) => Some(Ok(RawValueExpr::ValueLiteral(value))), Ok(None) => None, From 41c67e2ef4440ae44683b38806a7fbd426510f37 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Thu, 11 Apr 2024 04:30:28 -0700 Subject: [PATCH 14/17] Address PR feedback --- src/lazy/any_encoding.rs | 16 +-- src/lazy/binary/raw/sequence.rs | 20 +-- src/lazy/binary/raw/struct.rs | 18 +-- .../binary/raw/v1_1/annotations_iterator.rs | 14 +- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 98 ++++++-------- src/lazy/binary/raw/v1_1/reader.rs | 60 +++++++-- src/lazy/binary/raw/v1_1/sequence.rs | 28 ++-- src/lazy/binary/raw/v1_1/struct.rs | 6 +- src/lazy/binary/raw/v1_1/type_code.rs | 124 +++--------------- src/lazy/binary/raw/v1_1/type_descriptor.rs | 59 ++++----- src/lazy/binary/raw/v1_1/value.rs | 6 +- src/lazy/encoding.rs | 4 +- 12 files changed, 197 insertions(+), 256 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index dfce9621..c1d0f260 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -7,24 +7,20 @@ use bumpalo::Bump as BumpAllocator; use crate::lazy::any_encoding::RawReaderKind::{Binary_1_0, Text_1_0}; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_0; -use crate::lazy::binary::raw::r#struct::{ - LazyRawBinaryStruct_1_0, RawBinaryStructIterator as RawBinaryStructIterator_1_0, -}; +use crate::lazy::binary::raw::r#struct::{LazyRawBinaryStruct_1_0, RawBinaryStructIterator_1_0}; use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{ - LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, - RawBinarySequenceIterator as RawBinarySequenceIterator_1_0, + LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator_1_0, }; use crate::lazy::binary::raw::v1_1::r#struct::{ LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1, }; use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; use crate::lazy::binary::raw::v1_1::sequence::{ - LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1, - RawBinarySequenceIterator as RawBinarySequenceIterator_1_1, + LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1, RawBinarySequenceIterator_1_1, }; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::binary::raw::v1_1::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1; +use crate::lazy::binary::raw::v1_1::RawBinaryAnnotationsIterator_1_1; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; use crate::lazy::decoder::{ @@ -102,7 +98,7 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { LazyRawAnyEExpressionKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), LazyRawAnyEExpressionKind::Text_1_1(ref m) => m.id(), LazyRawAnyEExpressionKind::Binary_1_1(_) => { - unimplemented!("macro in binary Ion 1.1 not implemented") + todo!("macros in binary Ion 1.1 are not implemented") } } } @@ -115,7 +111,7 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), }, LazyRawAnyEExpressionKind::Binary_1_1(_) => { - unimplemented!("macro in binary Ion 1.1 not implemented") + todo!("macros in binary Ion 1.1 are not implemented") } } } diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs index 2af47ceb..bec9079d 100644 --- a/src/lazy/binary/raw/sequence.rs +++ b/src/lazy/binary/raw/sequence.rs @@ -29,7 +29,7 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryList_ } impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinaryList_1_0<'top> { - type Iterator = RawBinarySequenceIterator<'top>; + type Iterator = RawBinarySequenceIterator_1_0<'top>; fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.sequence.value.annotations() @@ -57,7 +57,7 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_ } impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_1_0<'top> { - type Iterator = RawBinarySequenceIterator<'top>; + type Iterator = RawBinarySequenceIterator_1_0<'top>; fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.sequence.value.annotations() @@ -86,17 +86,17 @@ impl<'top> LazyRawBinarySequence_1_0<'top> { self.value.ion_type() } - pub fn iter(&self) -> RawBinarySequenceIterator<'top> { + pub fn iter(&self) -> RawBinarySequenceIterator_1_0<'top> { // Get as much of the sequence's body as is available in the input buffer. // Reading a child value may fail as `Incomplete` let buffer_slice = self.value.available_body(); - RawBinarySequenceIterator::new(buffer_slice) + RawBinarySequenceIterator_1_0::new(buffer_slice) } } impl<'a, 'top> IntoIterator for &'a LazyRawBinarySequence_1_0<'top> { type Item = IonResult>; - type IntoIter = RawBinarySequenceIterator<'top>; + type IntoIter = RawBinarySequenceIterator_1_0<'top>; fn into_iter(self) -> Self::IntoIter { self.iter() @@ -127,19 +127,19 @@ impl<'a> Debug for LazyRawBinarySequence_1_0<'a> { } } -pub struct RawBinarySequenceIterator<'top> { +pub struct RawBinarySequenceIterator_1_0<'top> { source: DataSource<'top>, } -impl<'top> RawBinarySequenceIterator<'top> { - pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinarySequenceIterator<'top> { - RawBinarySequenceIterator { +impl<'top> RawBinarySequenceIterator_1_0<'top> { + pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinarySequenceIterator_1_0<'top> { + RawBinarySequenceIterator_1_0 { source: DataSource::new(input), } } } -impl<'top> Iterator for RawBinarySequenceIterator<'top> { +impl<'top> Iterator for RawBinarySequenceIterator_1_0<'top> { type Item = IonResult>; fn next(&mut self) -> Option { diff --git a/src/lazy/binary/raw/struct.rs b/src/lazy/binary/raw/struct.rs index 10e73297..5f58feaa 100644 --- a/src/lazy/binary/raw/struct.rs +++ b/src/lazy/binary/raw/struct.rs @@ -23,7 +23,7 @@ pub struct LazyRawBinaryStruct_1_0<'top> { impl<'a, 'top> IntoIterator for &'a LazyRawBinaryStruct_1_0<'top> { type Item = IonResult>; - type IntoIter = RawBinaryStructIterator<'top>; + type IntoIter = RawBinaryStructIterator_1_0<'top>; fn into_iter(self) -> Self::IntoIter { self.iter() @@ -48,11 +48,11 @@ impl<'top> LazyRawBinaryStruct_1_0<'top> { self.value.annotations() } - pub fn iter(&self) -> RawBinaryStructIterator<'top> { + pub fn iter(&self) -> RawBinaryStructIterator_1_0<'top> { // Get as much of the struct's body as is available in the input buffer. // Reading a child value may fail as `Incomplete` let buffer_slice = self.value.available_body(); - RawBinaryStructIterator::new(buffer_slice) + RawBinaryStructIterator_1_0::new(buffer_slice) } } @@ -63,7 +63,7 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryStruc } impl<'top> LazyRawStruct<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct_1_0<'top> { - type Iterator = RawBinaryStructIterator<'top>; + type Iterator = RawBinaryStructIterator_1_0<'top>; fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.annotations() @@ -74,19 +74,19 @@ impl<'top> LazyRawStruct<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct_1_0<' } } -pub struct RawBinaryStructIterator<'top> { +pub struct RawBinaryStructIterator_1_0<'top> { source: DataSource<'top>, } -impl<'top> RawBinaryStructIterator<'top> { - pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinaryStructIterator<'top> { - RawBinaryStructIterator { +impl<'top> RawBinaryStructIterator_1_0<'top> { + pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinaryStructIterator_1_0<'top> { + RawBinaryStructIterator_1_0 { source: DataSource::new(input), } } } -impl<'top> Iterator for RawBinaryStructIterator<'top> { +impl<'top> Iterator for RawBinaryStructIterator_1_0<'top> { type Item = IonResult>; fn next(&mut self) -> Option { diff --git a/src/lazy/binary/raw/v1_1/annotations_iterator.rs b/src/lazy/binary/raw/v1_1/annotations_iterator.rs index 80598346..8cfcb85d 100644 --- a/src/lazy/binary/raw/v1_1/annotations_iterator.rs +++ b/src/lazy/binary/raw/v1_1/annotations_iterator.rs @@ -1,18 +1,20 @@ +#![allow(non_camel_case_types)] use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::{IonResult, RawSymbolTokenRef}; -/// Iterates over a slice of bytes, lazily reading them as a sequence of VarUInt symbol IDs. -pub struct RawBinaryAnnotationsIterator<'a> { +/// Iterates over a slice of bytes, lazily reading them as a sequence of FlexUInt- or +/// FlexSym-encoded symbol IDs. +pub struct RawBinaryAnnotationsIterator_1_1<'a> { buffer: ImmutableBuffer<'a>, } -impl<'a> RawBinaryAnnotationsIterator<'a> { - pub(crate) fn new(buffer: ImmutableBuffer<'a>) -> RawBinaryAnnotationsIterator<'a> { - RawBinaryAnnotationsIterator { buffer } +impl<'a> RawBinaryAnnotationsIterator_1_1<'a> { + pub(crate) fn new(buffer: ImmutableBuffer<'a>) -> RawBinaryAnnotationsIterator_1_1<'a> { + Self { buffer } } } -impl<'a> Iterator for RawBinaryAnnotationsIterator<'a> { +impl<'a> Iterator for RawBinaryAnnotationsIterator_1_1<'a> { type Item = IonResult>; fn next(&mut self) -> Option { diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index ec7c3c39..6d0f7f09 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -2,9 +2,7 @@ use crate::binary::constants::v1_1::IVM; use crate::binary::var_uint::VarUInt; use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::binary::raw::v1_1::{ - Header, LengthType, TypeDescriptor, ION_1_1_TYPE_DESCRIPTORS, -}; +use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, ION_1_1_OPCODES}; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::result::IonFailure; @@ -39,10 +37,6 @@ pub struct ImmutableBuffer<'a> { // offset: 6 data: &'a [u8], offset: usize, - - // Each time something is parsed from the buffer successfully, the caller will mark the number - // of bytes that may be skipped the next time `advance_to_next_item` is called. - pub bytes_to_skip: usize, } impl<'a> Debug for ImmutableBuffer<'a> { @@ -65,11 +59,7 @@ impl<'a> ImmutableBuffer<'a> { } pub fn new_with_offset(data: &[u8], offset: usize) -> ImmutableBuffer { - ImmutableBuffer { - data, - offset, - bytes_to_skip: 0, - } + ImmutableBuffer { data, offset } } /// Returns a slice containing all of the buffer's bytes. @@ -90,7 +80,6 @@ impl<'a> ImmutableBuffer<'a> { ImmutableBuffer { data: self.bytes_range(offset, length), offset: self.offset + offset, - bytes_to_skip: 0, } } @@ -131,18 +120,17 @@ impl<'a> ImmutableBuffer<'a> { Self { data: &self.data[num_bytes_to_consume..], offset: self.offset + num_bytes_to_consume, - bytes_to_skip: 0, } } /// Reads the first byte in the buffer and returns it as a [TypeDescriptor]. #[inline] - pub(crate) fn peek_type_descriptor(&self) -> IonResult { + pub(crate) fn peek_opcode(&self) -> IonResult { if self.is_empty() { - return IonResult::incomplete("a type descriptor", self.offset()); + return IonResult::incomplete("an opcode", self.offset()); } let next_byte = self.data[0]; - Ok(ION_1_1_TYPE_DESCRIPTORS[next_byte as usize]) + Ok(ION_1_1_OPCODES[next_byte as usize]) } /// Reads the first four bytes in the buffer as an Ion version marker. If it is successful, @@ -180,10 +168,7 @@ impl<'a> ImmutableBuffer<'a> { /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning /// its subfields in an [`AnnotationsWrapper`]. - pub fn read_annotations_wrapper( - &self, - _type_descriptor: TypeDescriptor, - ) -> ParseResult<'a, AnnotationsWrapper> { + pub fn read_annotations_wrapper(&self, _opcode: Opcode) -> ParseResult<'a, AnnotationsWrapper> { unimplemented!(); } @@ -196,12 +181,13 @@ impl<'a> ImmutableBuffer<'a> { // expose the ability to write them. As such, this method has been marked `inline(never)` to // allow the hot path to be better optimized. pub fn read_nop_pad(self) -> ParseResult<'a, usize> { - let type_descriptor = self.peek_type_descriptor()?; + let opcode = self.peek_opcode()?; // We need to determine the size of the nop.. - let (size, remaining) = if type_descriptor.length_code == 0xC { - (1, self.consume(1)) - } else if type_descriptor.length_code == 0xD { + let (size, remaining) = if opcode.length_code == 0xC { + // Size 0; the nop is contained entirely within the OpCode. + (0, self.consume(1)) + } else if opcode.length_code == 0xD { // We have a flexuint telling us how long our nop is. let after_header = self.consume(1); let (len, rest) = after_header.read_flex_uint()?; @@ -213,37 +199,37 @@ impl<'a> ImmutableBuffer<'a> { return IonResult::decoding_error("Invalid NOP sub-type"); }; - let total_nop_pad_size = 1 + size; + let total_nop_pad_size = 1 + size; // 1 for OpCode, plus any additional NOP bytes. Ok((total_nop_pad_size, remaining)) } - /// Calls [`Self::read_nop_pad`] in a loop until the buffer is empty or a type descriptor + /// Calls [`Self::read_nop_pad`] in a loop until the buffer is empty or an opcode /// is encountered that is not a NOP. #[inline(never)] // NOP padding is not widely used in Ion 1.0. This method is annotated with `inline(never)` // to avoid the compiler bloating other methods on the hot path with its rarely used // instructions. - pub fn consume_nop_padding(self, mut type_descriptor: TypeDescriptor) -> ParseResult<'a, ()> { + pub fn consume_nop_padding(self, mut opcode: Opcode) -> ParseResult<'a, ()> { let mut buffer = self; // Skip over any number of NOP regions - while type_descriptor.is_nop() { + while opcode.is_nop() { let (_, buffer_after_nop) = buffer.read_nop_pad()?; buffer = buffer_after_nop; if buffer.is_empty() { break; } - type_descriptor = buffer.peek_type_descriptor()? + opcode = buffer.peek_opcode()? } Ok(((), buffer)) } /// Interprets the length code in the provided [Header]; if necessary, will read more bytes /// from the buffer to interpret as the value's length. If it is successful, returns an `Ok(_)` - /// containing a [VarUInt] representation of the value's length. If no additional bytes were - /// read, the returned `VarUInt`'s `size_in_bytes()` method will return `0`. + /// containing a [FlexUInt] representation of the value's length. If no additional bytes were + /// read, the returned `FlexUInt`'s `size_in_bytes()` method will return `0`. pub fn read_value_length(self, header: Header) -> ParseResult<'a, FlexUInt> { let length = match header.length_type() { - LengthType::InHeader(n) => FlexUInt::new(1, n as u64), + LengthType::InOpcode(n) => FlexUInt::new(1, n as u64), LengthType::FlexUIntFollows => { let (flexuint, _) = self.read_flex_uint()?; flexuint @@ -276,7 +262,7 @@ impl<'a> ImmutableBuffer<'a> { return Ok(None); } let mut input = self; - let mut type_descriptor = input.peek_type_descriptor()?; + let mut type_descriptor = input.peek_opcode()?; // If we find a NOP... if type_descriptor.is_nop() { // ...skip through NOPs until we found the next non-NOP byte. @@ -286,14 +272,14 @@ impl<'a> ImmutableBuffer<'a> { return Ok(None); } // Otherwise, there's a value. - type_descriptor = input.peek_type_descriptor()?; + type_descriptor = input.peek_opcode()?; } Ok(Some(input.read_value(type_descriptor)?)) } /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that /// the next byte (`type_descriptor`) is not a NOP. - fn read_value(self, type_descriptor: TypeDescriptor) -> IonResult> { + fn read_value(self, type_descriptor: Opcode) -> IonResult> { if type_descriptor.is_annotation_wrapper() { self.read_annotated_value(type_descriptor) } else { @@ -305,7 +291,7 @@ impl<'a> ImmutableBuffer<'a> { /// the next byte (`type_descriptor`) is neither a NOP nor an annotations wrapper. fn read_value_without_annotations( self, - type_descriptor: TypeDescriptor, + type_descriptor: Opcode, ) -> IonResult> { let input = self; let header = type_descriptor @@ -345,28 +331,11 @@ impl<'a> ImmutableBuffer<'a> { /// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper. fn read_annotated_value( self, - mut _type_descriptor: TypeDescriptor, + mut _type_descriptor: Opcode, ) -> IonResult> { unimplemented!(); } - // DataSource Functionality - - pub(crate) fn advance_to_next_item(&mut self) -> IonResult { - if self.len() < self.bytes_to_skip { - return IonResult::incomplete( - "cannot advance to next item, insufficient data in buffer", - self.offset(), - ); - } - - if self.bytes_to_skip > 0 { - Ok(self.consume(self.bytes_to_skip)) - } else { - Ok(*self) - } - } - /// Runs the provided parsing function on this DataSource's buffer. /// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes /// that were consumed. @@ -375,7 +344,8 @@ impl<'a> ImmutableBuffer<'a> { &mut self, parser: F, ) -> IonResult>> { - let buffer = self.advance_to_next_item()?; + // let buffer = self.advance_to_next_item()?; + let buffer = *self; let lazy_value = match parser(buffer) { Ok(Some(output)) => output, @@ -386,7 +356,7 @@ impl<'a> ImmutableBuffer<'a> { // If the value we read doesn't start where we began reading, there was a NOP. let num_nop_bytes = lazy_value.input.offset() - buffer.offset(); self.consume(num_nop_bytes); - self.bytes_to_skip = lazy_value.encoded_value.total_length(); + // self.bytes_to_skip = lazy_value.encoded_value.total_length(); Ok(Some(lazy_value)) } } @@ -433,4 +403,18 @@ mod tests { fn vec_test() { input_test(Vec::from("foo bar baz".as_bytes())); } + + #[test] + fn validate_nop_length() { + // read_nop_pad reads a single NOP value, this test ensures that we're tracking the right + // size for these values. + + let buffer = ImmutableBuffer::new(&[0xECu8]); + let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); + assert_eq!(pad_size, 1); + + let buffer = ImmutableBuffer::new(&[0xEDu8, 0x05, 0x00, 0x00]); + let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); + assert_eq!(pad_size, 4); + } } diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 51781cb4..83b5ceaa 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -1,6 +1,7 @@ #![allow(non_camel_case_types)] use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; +use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; use crate::lazy::decoder::{LazyDecoder, LazyRawReader}; use crate::lazy::encoder::private::Sealed; use crate::lazy::encoding::BinaryEncoding_1_1; @@ -12,6 +13,7 @@ use bumpalo::Bump as BumpAllocator; pub struct LazyRawBinaryReader_1_1<'data> { data: ImmutableBuffer<'data>, + bytes_to_skip: usize, // Bytes to skip in order to advance to the next item. } impl<'data> LazyRawBinaryReader_1_1<'data> { @@ -21,7 +23,10 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { fn new_with_offset(data: &'data [u8], offset: usize) -> Self { let data = ImmutableBuffer::new_with_offset(data, offset); - Self { data } + Self { + data, + bytes_to_skip: 0, + } } fn read_ivm<'top>( @@ -39,7 +44,7 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { )); } self.data = buffer; - self.data.bytes_to_skip = 4; + self.bytes_to_skip = 4; Ok(LazyRawStreamItem::::VersionMarker(1, 1)) } @@ -55,20 +60,35 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { None => return Ok(LazyRawStreamItem::::EndOfStream), }; self.data = buffer; - self.data.bytes_to_skip = lazy_value.encoded_value.total_length(); + self.bytes_to_skip = lazy_value.encoded_value.total_length(); Ok(RawStreamItem::Value(lazy_value)) } + fn advance_to_next_item(&self) -> IonResult> { + if self.data.len() < self.bytes_to_skip { + return IonResult::incomplete( + "cannot advance to next item, insufficient data in buffer", + self.data.offset(), + ); + } + + if self.bytes_to_skip > 0 { + Ok(self.data.consume(self.bytes_to_skip)) + } else { + Ok(self.data) + } + } + pub fn next<'top>(&'top mut self) -> IonResult> where 'data: 'top, { - let mut buffer = self.data.advance_to_next_item()?; + let mut buffer = self.advance_to_next_item()?; if buffer.is_empty() { return Ok(LazyRawStreamItem::::EndOfStream); } - let type_descriptor = buffer.peek_type_descriptor()?; + let type_descriptor = buffer.peek_opcode()?; if type_descriptor.is_nop() { (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { @@ -80,6 +100,30 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { } self.read_value(buffer) } + + /// Runs the provided parsing function on this reader's buffer. + /// If it succeeds, marks the reader as ready to advance by the 'n' bytes + /// that were consumed. + /// If it does not succeed, the `DataSource` remains unchanged. + pub(crate) fn try_parse_next< + F: Fn(ImmutableBuffer) -> IonResult>>, + >( + &mut self, + parser: F, + ) -> IonResult>> { + let buffer = self.advance_to_next_item()?; + + let lazy_value = match parser(buffer) { + Ok(Some(output)) => output, + Ok(None) => return Ok(None), + Err(e) => return Err(e), + }; + + // If the value we read doesn't start where we began reading, there was a NOP. + // let num_nop_bytes = lazy_value.input.offset() - buffer.offset(); + self.bytes_to_skip = lazy_value.encoded_value.total_length(); + Ok(Some(lazy_value)) + } } impl<'data> Sealed for LazyRawBinaryReader_1_1<'data> {} @@ -108,7 +152,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 } fn position(&self) -> usize { - self.data.offset() + self.data.bytes_to_skip + self.data.offset() + self.bytes_to_skip } } @@ -120,12 +164,12 @@ mod tests { #[test] fn nop() -> IonResult<()> { let data: Vec = vec![ - 0xe0, 0x01, 0x01, 0xea, // IVM + 0xE0, 0x01, 0x01, 0xEA, // IVM 0xEC, // 1-Byte NOP 0xEC, 0xEC, // 2-Byte NOP 0xEC, 0xEC, 0xEC, // 3-Byte Nop 0xED, 0x05, 0x00, 0x00, // 4-Byte NOP - 0xea, // null.null + 0xEA, // null.null ]; let mut reader = LazyRawBinaryReader_1_1::new(&data); diff --git a/src/lazy/binary/raw/v1_1/sequence.rs b/src/lazy/binary/raw/v1_1/sequence.rs index 383bb588..5a0f3369 100644 --- a/src/lazy/binary/raw/v1_1/sequence.rs +++ b/src/lazy/binary/raw/v1_1/sequence.rs @@ -1,6 +1,6 @@ #![allow(non_camel_case_types)] -use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator; +use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator_1_1; use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; use crate::lazy::decoder::private::LazyContainerPrivate; @@ -28,9 +28,9 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryList_ } impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<'top> { - type Iterator = RawBinarySequenceIterator<'top>; + type Iterator = RawBinarySequenceIterator_1_1<'top>; - fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { self.sequence.value.annotations() } @@ -56,9 +56,9 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_ } impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'top> { - type Iterator = RawBinarySequenceIterator<'top>; + type Iterator = RawBinarySequenceIterator_1_1<'top>; - fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { self.sequence.value.annotations() } @@ -85,17 +85,17 @@ impl<'top> LazyRawBinarySequence_1_1<'top> { self.value.ion_type() } - pub fn iter(&self) -> RawBinarySequenceIterator<'top> { + pub fn iter(&self) -> RawBinarySequenceIterator_1_1<'top> { // Get as much of the sequence's body as is available in the input buffer. // Reading a child value may fail as `Incomplete` let buffer_slice = self.value.available_body(); - RawBinarySequenceIterator::new(buffer_slice) + RawBinarySequenceIterator_1_1::new(buffer_slice) } } impl<'a, 'top> IntoIterator for &'a LazyRawBinarySequence_1_1<'top> { type Item = IonResult>; - type IntoIter = RawBinarySequenceIterator<'top>; + type IntoIter = RawBinarySequenceIterator_1_1<'top>; fn into_iter(self) -> Self::IntoIter { self.iter() @@ -126,23 +126,23 @@ impl<'a> Debug for LazyRawBinarySequence_1_1<'a> { } } -pub struct RawBinarySequenceIterator<'top> { +pub struct RawBinarySequenceIterator_1_1<'top> { source: ImmutableBuffer<'top>, } -impl<'top> RawBinarySequenceIterator<'top> { - pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinarySequenceIterator<'top> { - RawBinarySequenceIterator { source: input } +impl<'top> RawBinarySequenceIterator_1_1<'top> { + pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinarySequenceIterator_1_1<'top> { + RawBinarySequenceIterator_1_1 { source: input } } } -impl<'top> Iterator for RawBinarySequenceIterator<'top> { +impl<'top> Iterator for RawBinarySequenceIterator_1_1<'top> { type Item = IonResult>; fn next(&mut self) -> Option { match self .source - .try_parse_next(ImmutableBuffer::<'top>::peek_sequence_value) + .try_parse_next(ImmutableBuffer::peek_sequence_value) { Ok(Some(value)) => Some(Ok(RawValueExpr::ValueLiteral(value))), Ok(None) => None, diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs index 59612cae..89ebd839 100644 --- a/src/lazy/binary/raw/v1_1/struct.rs +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -3,7 +3,7 @@ use std::fmt; use std::fmt::{Debug, Formatter}; -use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator; +use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator_1_1; use crate::lazy::binary::raw::v1_1::{ immutable_buffer::ImmutableBuffer, value::LazyRawBinaryValue_1_1, }; @@ -44,7 +44,7 @@ impl<'top> Debug for LazyRawBinaryStruct_1_1<'top> { } impl<'top> LazyRawBinaryStruct_1_1<'top> { - fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { self.value.annotations() } @@ -65,7 +65,7 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryStruc impl<'top> LazyRawStruct<'top, BinaryEncoding_1_1> for LazyRawBinaryStruct_1_1<'top> { type Iterator = RawBinaryStructIterator_1_1<'top>; - fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { + fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { self.annotations() } diff --git a/src/lazy/binary/raw/v1_1/type_code.rs b/src/lazy/binary/raw/v1_1/type_code.rs index 6c6ff65d..c086e3d6 100644 --- a/src/lazy/binary/raw/v1_1/type_code.rs +++ b/src/lazy/binary/raw/v1_1/type_code.rs @@ -6,28 +6,26 @@ use crate::IonType; /// Represents the type information found in the header byte of each binary Ion value. /// While this value can be readily mapped to a user-level [`IonType`], it is a distinct concept. -/// The IonTypeCode enum captures system-level information that is not exposed to end users of the +/// The IonOpcode enum captures system-level information that is not exposed to end users of the /// library, including: /// * Whether the cursor is positioned over whitespace that needs to be skipped. /// * Whether the integer value being read is positive or negative. /// * Whether the next type code is reserved. -/// -/// See the -/// [Typed Value Formats](https://amazon-ion.github.io/ion-docs/docs/binary.html#typed-value-formats) -/// section of the spec for more information. #[derive(Debug, PartialEq, Eq, Copy, Clone)] -pub enum IonTypeCode { - EExpression(bool), // 0x00-0x4F - With, or without, address in the opcode. - Integer, // 0x50-0x58 - Integer up to 8 bytes wide. - Float, // 0x5A-0x5D - - Boolean, // 0x5E-0x5F - - Decimal, // 0x60-0x6F - - Timestamp, // 0x70-0x7F - - String, // 0x80-0x80 - - InlineSymbol, // 0x90-0x9F - - List, // 0xA0-0xAF - - SExpression, // 0xB0-0xBF - - StructEmpty, // 0xC0 - +pub enum OpcodeType { + EExpressionWithAddress, // 0x00-0x4F - + EExpressionAddressFollows, // 0x40-0x4F - + + Integer, // 0x50-0x58 - Integer up to 8 bytes wide. + Float, // 0x5A-0x5D - + Boolean, // 0x5E-0x5F - + Decimal, // 0x60-0x6F - + Timestamp, // 0x70-0x7F - + String, // 0x80-0x80 - + InlineSymbol, // 0x90-0x9F - + List, // 0xA0-0xAF - + SExpression, // 0xB0-0xBF - + StructEmpty, // 0xC0 - // reserved StructSymAddress, // 0xD2-0xDF - // reserved @@ -47,101 +45,21 @@ pub enum IonTypeCode { // delimited s-expression start } -impl TryFrom for IonType { +impl TryFrom for IonType { type Error = IonError; - /// Attempts to convert the system-level IonTypeCode into the corresponding user-level IonType. - fn try_from(ion_type_code: IonTypeCode) -> Result { - use IonTypeCode::*; - let ion_type = match ion_type_code { + /// Attempts to convert the system-level IonOpcode into the corresponding user-level IonType. + fn try_from(opcode: OpcodeType) -> Result { + use OpcodeType::*; + let ion_type = match opcode { NullNull => IonType::Null, Nop => IonType::Null, - // NullOrNop => IonType::Null, - // Boolean => IonType::Bool, - // PositiveInteger | NegativeInteger => IonType::Int, - // Float => IonType::Float, - // Decimal => IonType::Decimal, - // Timestamp => IonType::Timestamp, - // Symbol => IonType::Symbol, - // String => IonType::String, - // Clob => IonType::Clob, - // Blob => IonType::Blob, - // List => IonType::List, - // SExpression => IonType::SExp, - // Struct => IonType::Struct, _ => { return IonResult::decoding_error(format!( - "Attempted to make an IonType from an invalid type code: {ion_type_code:?}" + "Attempted to make an IonType from an invalid opcode: {opcode:?}" )); } }; Ok(ion_type) } } - -impl TryFrom for IonTypeCode { - type Error = IonError; - - /// Attempts to convert the provided byte into an IonTypeCode. Any value greater than 15 - /// will result in an Error. - fn try_from(type_code: u8) -> Result { - use IonTypeCode::*; - let ion_type_code = match (type_code.overflowing_shr(8).0, type_code & 0x0F) { - (0xE, 0xA) => NullNull, - (0xE, 0xC..=0xD) => Nop, - // 0 => NullOrNop, - // 1 => Boolean, - // 2 => PositiveInteger, - // 3 => NegativeInteger, - // 4 => Float, - // 5 => Decimal, - // 6 => Timestamp, - // 7 => Symbol, - // 8 => String, - // 9 => Clob, - // 10 => Blob, - // 11 => List, - // 12 => SExpression, - // 13 => Struct, - // 14 => AnnotationOrIvm, - // 15 => Reserved, - _ => { - return IonResult::decoding_error(format!( - "{type_code:?} is not implemented, or an invalid type code." - )); - } // _ => { - // return IonResult::decoding_error(format!( - // "{type_code:?} is not a valid header type code." - // )); - // } - }; - Ok(ion_type_code) - } -} - -impl IonTypeCode { - /// Constant function to convert an [`IonTypeCode`] into a `u8`. - pub const fn to_u8(self) -> u8 { - use IonTypeCode::*; - match self { - EExpression(true) => 0x0, - // NullOrNop => 0, - // Boolean => 1, - // PositiveInteger => 2, - // NegativeInteger => 3, - // Float => 4, - // Decimal => 5, - // Timestamp => 6, - // Symbol => 7, - // String => 8, - // Clob => 9, - // Blob => 10, - // List => 11, - // SExpression => 12, - // Struct => 13, - // AnnotationOrIvm => 14, - // Reserved => 15, - _ => todo!(), - } - } -} diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 02beeb00..26b24ae8 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -1,77 +1,74 @@ use crate::lazy::binary::encoded_value::EncodedHeader; -use crate::lazy::binary::raw::v1_1::IonTypeCode; +use crate::lazy::binary::raw::v1_1::OpcodeType; use crate::IonType; -/// Contains all of the information that can be extracted from the one-octet type descriptor +/// Contains all of the information that can be extracted from the one-octet Opcode /// found at the beginning of each value, annotations wrapper, IVM, or NOP in a binary Ion stream. -/// For more information, consult the -/// [Typed Value Formats](https://amazon-ion.github.io/ion-docs/docs/binary.html#typed-value-formats) -/// section of the binary Ion spec. #[derive(Copy, Clone, Debug, PartialEq)] -pub struct TypeDescriptor { - pub ion_type_code: IonTypeCode, +pub struct Opcode { + pub opcode_type: OpcodeType, pub ion_type: Option, pub length_code: u8, } /// A statically defined array of TypeDescriptor that allows a binary reader to map a given /// byte (`u8`) to a `TypeDescriptor` without having to perform any masking or bitshift operations. -pub(crate) static ION_1_1_TYPE_DESCRIPTORS: &[TypeDescriptor; 256] = &init_type_descriptor_cache(); +pub(crate) static ION_1_1_OPCODES: &[Opcode; 256] = &init_opcode_cache(); -const DEFAULT_HEADER: TypeDescriptor = TypeDescriptor { - ion_type_code: IonTypeCode::Nop, +const DEFAULT_HEADER: Opcode = Opcode { + opcode_type: OpcodeType::Nop, ion_type: None, length_code: 0, }; -pub(crate) const fn init_type_descriptor_cache() -> [TypeDescriptor; 256] { +pub(crate) const fn init_opcode_cache() -> [Opcode; 256] { let mut jump_table = [DEFAULT_HEADER; 256]; let mut index: usize = 0; while index < 256 { let byte = index as u8; - jump_table[index] = TypeDescriptor::from_byte(byte); + jump_table[index] = Opcode::from_byte(byte); index += 1; } jump_table } -impl TypeDescriptor { - /// Attempts to parse the provided byte. If the type code is unrecognized or the - /// type code + length code combination is illegal, an error will be returned. - pub const fn from_byte(byte: u8) -> TypeDescriptor { +impl Opcode { + /// Attempts to parse the provided byte. If the opcode is unrecognized or the + /// opcode + length code combination is illegal, an error will be returned. + pub const fn from_byte(byte: u8) -> Opcode { let (high_nibble, low_nibble) = (byte >> 4, byte & 0x0F); - use IonTypeCode::*; + use OpcodeType::*; - let ion_type_code = match (high_nibble, low_nibble) { + let opcode_type = match (high_nibble, low_nibble) { (0xE, 0x0) => IonVersionMarker, (0xE, 0xA) => NullNull, (0xE, 0xC..=0xD) => Nop, _ => Boolean, // Temporary, until everything is implemented to satisfy the LUT. }; - let ion_type = match ion_type_code { + let ion_type = match opcode_type { NullNull => Some(IonType::Null), Nop => None, IonVersionMarker => None, Boolean => Some(IonType::Bool), _ => panic!("the provided ion type code is either not implemented, or invalid"), }; - TypeDescriptor { + Opcode { ion_type, - ion_type_code, + opcode_type, length_code: low_nibble, } } pub fn is_null(&self) -> bool { - self.ion_type_code == IonTypeCode::NullNull || self.ion_type_code == IonTypeCode::TypedNull + self.opcode_type == OpcodeType::NullNull || self.opcode_type == OpcodeType::TypedNull } pub fn is_nop(&self) -> bool { - self.ion_type_code == IonTypeCode::Nop + self.opcode_type == OpcodeType::Nop } pub fn is_ivm_start(&self) -> bool { - self.ion_type_code == IonTypeCode::IonVersionMarker + self.opcode_type == OpcodeType::IonVersionMarker } pub fn is_annotation_wrapper(&self) -> bool { @@ -83,7 +80,7 @@ impl TypeDescriptor { let ion_type = self.ion_type?; let header = Header { ion_type, - ion_type_code: self.ion_type_code, + ion_type_code: self.opcode_type, length_code: self.length_code, }; Some(header) @@ -91,7 +88,7 @@ impl TypeDescriptor { } pub enum LengthType { - InHeader(usize), + InOpcode(u8), FlexUIntFollows, } @@ -105,7 +102,7 @@ pub struct Header { pub ion_type: IonType, // The only time the `ion_type_code` is required is to distinguish between positive // and negative integers. - pub ion_type_code: IonTypeCode, + pub ion_type_code: OpcodeType, pub length_code: u8, } @@ -113,15 +110,15 @@ impl Header { pub fn length_type(&self) -> LengthType { use LengthType::*; match (self.ion_type_code, self.length_code) { - (IonTypeCode::Nop, 0xC) => InHeader(0), - (IonTypeCode::NullNull, 0xA) => InHeader(0), + (OpcodeType::Nop, 0xC) => InOpcode(0), + (OpcodeType::NullNull, 0xA) => InOpcode(0), _ => FlexUIntFollows, } } } impl EncodedHeader for Header { - type TypeCode = IonTypeCode; + type TypeCode = OpcodeType; fn ion_type(&self) -> IonType { self.ion_type @@ -136,6 +133,6 @@ impl EncodedHeader for Header { } fn is_null(&self) -> bool { - self.ion_type_code == IonTypeCode::NullNull || self.ion_type_code == IonTypeCode::TypedNull + self.ion_type_code == OpcodeType::NullNull || self.ion_type_code == OpcodeType::TypedNull } } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 54f79075..82843d1e 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -8,7 +8,7 @@ use crate::{ encoded_value::{EncodedHeader, EncodedValue}, raw::{ v1_1::{ - annotations_iterator::RawBinaryAnnotationsIterator, + annotations_iterator::RawBinaryAnnotationsIterator_1_1, immutable_buffer::ImmutableBuffer, Header, }, value::ValueParseResult, @@ -119,8 +119,8 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { } /// Returns an iterator over this value's unresolved annotation symbols. - pub fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { - RawBinaryAnnotationsIterator::new(self.annotations_sequence()) + pub fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { + RawBinaryAnnotationsIterator_1_1::new(self.annotations_sequence()) } /// Reads this value's data, returning it as a [`RawValueRef`]. If this value is a container, diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index f6c8fd14..295b923d 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -10,7 +10,7 @@ use crate::lazy::binary::raw::v1_1::{ r#struct::LazyRawBinaryStruct_1_1, sequence::{LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1}, value::LazyRawBinaryValue_1_1, - RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_1, + RawBinaryAnnotationsIterator_1_1, }; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::LazyDecoder; @@ -149,7 +149,7 @@ impl LazyDecoder for BinaryEncoding_1_1 { type List<'top> = LazyRawBinaryList_1_1<'top>; type Struct<'top> = LazyRawBinaryStruct_1_1<'top>; type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator_1_1<'top>; - // Macros are not supported in Ion 1.0 + // TODO: implement macros in 1.1 type EExpression<'top> = Never; } From ace4b53033c9eecff66334a063908586212ba2ee Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Thu, 11 Apr 2024 04:40:27 -0700 Subject: [PATCH 15/17] Replace unimplemented macros with todo --- src/lazy/any_encoding.rs | 4 +-- .../binary/raw/v1_1/annotations_iterator.rs | 2 +- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 8 +++--- src/lazy/binary/raw/v1_1/value.rs | 26 +++++++++---------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index c1d0f260..d7875b6c 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -541,7 +541,7 @@ impl<'top> From> LazyRawStreamItem::::Value(value.into()) } LazyRawStreamItem::::EExpression(_) => { - unimplemented!("Macro invocations not yet implemented in binary 1.1") + todo!("Macro invocations not yet implemented in binary 1.1") } LazyRawStreamItem::::EndOfStream => { LazyRawStreamItem::::EndOfStream @@ -1042,7 +1042,7 @@ impl<'data> From> let (name, value) = match binary_field { RawFieldExpr::NameValuePair(name, value) => (name, value), RawFieldExpr::MacroInvocation(_) => { - unimplemented!("macro invocation in Ion 1.1 binary not implemented") + todo!("macro invocation in Ion 1.1 binary not implemented") } }; // Convert the binary-encoded value into an any-encoded value diff --git a/src/lazy/binary/raw/v1_1/annotations_iterator.rs b/src/lazy/binary/raw/v1_1/annotations_iterator.rs index 8cfcb85d..2fe89a6c 100644 --- a/src/lazy/binary/raw/v1_1/annotations_iterator.rs +++ b/src/lazy/binary/raw/v1_1/annotations_iterator.rs @@ -18,6 +18,6 @@ impl<'a> Iterator for RawBinaryAnnotationsIterator_1_1<'a> { type Item = IonResult>; fn next(&mut self) -> Option { - unimplemented!() + todo!() } } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 6d0f7f09..9c48dfa8 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -169,7 +169,7 @@ impl<'a> ImmutableBuffer<'a> { /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning /// its subfields in an [`AnnotationsWrapper`]. pub fn read_annotations_wrapper(&self, _opcode: Opcode) -> ParseResult<'a, AnnotationsWrapper> { - unimplemented!(); + todo!(); } /// Reads a `NOP` encoding primitive from the buffer. If it is successful, returns an `Ok(_)` @@ -245,14 +245,14 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a field ID and a value from the buffer. pub(crate) fn peek_field(self) -> IonResult>> { - unimplemented!(); + todo!(); } #[cold] /// Consumes (field ID, NOP pad) pairs until a non-NOP value is encountered in field position or /// the buffer is empty. Returns a buffer starting at the field ID before the non-NOP value. fn read_struct_field_nop_pad(self) -> IonResult)>> { - unimplemented!(); + todo!(); } /// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions, @@ -333,7 +333,7 @@ impl<'a> ImmutableBuffer<'a> { self, mut _type_descriptor: Opcode, ) -> IonResult> { - unimplemented!(); + todo!(); } /// Runs the provided parsing function on this DataSource's buffer. diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 82843d1e..265f9545 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -186,66 +186,66 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a bool. fn read_bool(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as an int. fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a float. fn read_float(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a decimal. fn read_decimal(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a timestamp. fn read_timestamp(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read_symbol`]. Reads the current value as a symbol ID. fn read_symbol_id(&self) -> IonResult { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a symbol. fn read_symbol(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a string. fn read_string(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a blob. fn read_blob(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a clob. fn read_clob(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as an S-expression. fn read_sexp(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a list. fn read_list(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } /// Helper method called by [`Self::read`]. Reads the current value as a struct. fn read_struct(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - unimplemented!(); + todo!(); } } From cd885ddd1e65b960341549facc88cb9819abb996 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Thu, 11 Apr 2024 04:45:50 -0700 Subject: [PATCH 16/17] Fix rust doc --- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 9c48dfa8..d690c577 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -123,7 +123,7 @@ impl<'a> ImmutableBuffer<'a> { } } - /// Reads the first byte in the buffer and returns it as a [TypeDescriptor]. + /// Reads the first byte in the buffer and returns it as an [Opcode]. #[inline] pub(crate) fn peek_opcode(&self) -> IonResult { if self.is_empty() { From 09f316972107e3774c876b1a41b3e680eca68e84 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 16 Apr 2024 16:18:10 -0500 Subject: [PATCH 17/17] Apply suggestions from code review Co-authored-by: Zack Slayton --- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index d690c577..1987be32 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -344,7 +344,6 @@ impl<'a> ImmutableBuffer<'a> { &mut self, parser: F, ) -> IonResult>> { - // let buffer = self.advance_to_next_item()?; let buffer = *self; let lazy_value = match parser(buffer) { @@ -356,7 +355,6 @@ impl<'a> ImmutableBuffer<'a> { // If the value we read doesn't start where we began reading, there was a NOP. let num_nop_bytes = lazy_value.input.offset() - buffer.offset(); self.consume(num_nop_bytes); - // self.bytes_to_skip = lazy_value.encoded_value.total_length(); Ok(Some(lazy_value)) } }