From 0c2c23c6c0b5fb40dcd828c0fb62bcd358407cab Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Thu, 7 Nov 2024 09:30:48 -0500 Subject: [PATCH 1/2] Adds binary eexp subfield span accessors --- src/lazy/any_encoding.rs | 4 + src/lazy/binary/raw/v1_1/e_expression.rs | 79 ++++++++++++-------- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 5 ++ src/lazy/span.rs | 20 +++++ 4 files changed, 77 insertions(+), 31 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 187cc997..d95dd570 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -202,6 +202,10 @@ pub enum LazyRawAnyEExpressionKind<'top> { } impl<'top> LazyRawAnyEExpression<'top> { + pub fn kind(&self) -> LazyRawAnyEExpressionKind<'top> { + self.encoding + } + pub fn encoding(&self) -> IonEncoding { use LazyRawAnyEExpressionKind::*; match self.encoding { diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index 82c8140d..3be383c9 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -18,32 +18,6 @@ use crate::lazy::text::raw::v1_1::arg_group::{EExpArg, EExpArgExpr}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::{try_or_some_err, v1_1, Environment, HasRange, HasSpan, IonResult, Span}; -#[derive(Copy, Clone)] -pub struct BinaryEExpHeader { - // The number of bytes that were used to encode the e-expression's opcode and address. - opcode_and_address_length: u8, - // The number of bytes that were used to encode the e-expression's arg grouping bitmap, if any. - bitmap_length: u8, -} - -impl BinaryEExpHeader { - pub fn new(opcode_length: u8, bitmap_length: u8) -> Self { - Self { - opcode_and_address_length: opcode_length, - bitmap_length, - } - } - pub fn address_and_opcode_length(&self) -> usize { - self.opcode_and_address_length as usize - } - pub fn bitmap_length(&self) -> usize { - self.bitmap_length as usize - } - pub fn header_length(&self) -> usize { - self.address_and_opcode_length() + self.bitmap_length() - } -} - /// An e-expression which has been parsed from a binary Ion 1.1 stream. #[derive(Copy, Clone)] pub struct BinaryEExpression_1_1<'top> { @@ -64,11 +38,14 @@ pub struct BinaryEExpression_1_1<'top> { cache: Option<&'top [ValueExpr<'top, BinaryEncoding_1_1>]>, macro_ref: MacroRef<'top>, bitmap_bits: u64, - // The index of `input` at which the bitmap can be found. If there is no bitmap, this index - // will be the beginning of the encoded arguments. + // This index is the first position after the opcode and address. + // If the e-expression has a length prefix, it will begin at this position in `input`. + length_offset: u8, + // This index is the first position after the opcode, address, and length prefix. + // If the e-expression has a bitmap, it will begin at this position in `input`. bitmap_offset: u8, - // The index at which the arguments to the e-expression begin within `input`. This index is - // the first position after the opcode, address, length, and bitmap. + // This index is the first position after the opcode, address, length, and bitmap. + // If the e-expression has arguments, they will begin at this position in `input`. args_offset: u8, pub(crate) input: BinaryBuffer<'top>, @@ -79,6 +56,7 @@ impl<'top> BinaryEExpression_1_1<'top> { macro_ref: MacroRef<'top>, bitmap_bits: u64, input: BinaryBuffer<'top>, + length_offset: u8, bitmap_offset: u8, args_offset: u8, ) -> Self { @@ -86,19 +64,58 @@ impl<'top> BinaryEExpression_1_1<'top> { bitmap_bits, input, macro_ref, + length_offset, bitmap_offset, args_offset, cache: None, } } - pub fn with_arg_expr_cache( + pub(crate) fn with_arg_expr_cache( mut self, cache: &'top [ValueExpr<'top, BinaryEncoding_1_1>], ) -> Self { self.cache = Some(cache); self } + + /// Returns a span of bytes representing the opcode and macro address. + /// Depending on the encoding, these may be distinct (for example, the span: `0xF4 0x01`, + /// where the `0xF4` is the opcode and the `0x01` is the `FlexUInt` address) or combined + /// (for example: `0x00` is both an opcode and a macro address). + pub fn opcode_and_address_span(&self) -> Span<'top> { + self.input.slice(0, self.length_offset as usize).into() + } + + /// Returns `true` if this binary e-expression includes a length prefix. + pub fn has_length_prefix(&self) -> bool { + // If these offsets are equal, there are no bytes representing the length. + self.length_offset == self.bitmap_offset + } + + /// Returns a span of bytes representing the length prefix. If there is no length prefix, + /// the returned span will be empty. + pub fn length_prefix_span(&self) -> Span<'top> { + let num_bytes = (self.bitmap_offset - self.length_offset) as usize; + self.input + .slice(self.length_offset as usize, num_bytes) + .into() + } + + /// Returns `true` if this binary e-expression includes an argument encoding bitmap. + pub fn has_bitmap(&self) -> bool { + // If these offsets are equal, there are no bytes representing the length. + self.bitmap_offset == self.args_offset + } + + /// Returns a span of bytes representing the e-expression's argument encoding bitmap. + /// If there is no argument encoding bitmap, the returned span will be empty. + pub fn bitmap_span(&self) -> Span<'top> { + let num_bytes = (self.args_offset - self.bitmap_offset) as usize; + self.input + .slice(self.bitmap_offset as usize, num_bytes) + .into() + } } impl<'top> HasSpan<'top> for &'top BinaryEExpression_1_1<'top> { diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 487e7e6d..645cf92b 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -963,6 +963,9 @@ impl<'a> BinaryBuffer<'a> { MacroRef::new(macro_address, macro_ref), bitmap_bits, matched_eexp_bytes, + // There is no length prefix, so we re-use the bitmap_offset as the first position + // beyond the opcode and address subfields. + bitmap_offset as u8, bitmap_offset as u8, args_offset as u8, ) @@ -996,6 +999,7 @@ impl<'a> BinaryBuffer<'a> { })? .reference(); // Offset from `self`, not offset from the beginning of the stream. + let length_offset = (input_after_address.offset() - self.offset()) as u8; let bitmap_offset = (input_after_length.offset() - self.offset()) as u8; let (bitmap_bits, _input_after_bitmap) = input_after_length.read_eexp_bitmap(macro_ref.signature().bitmap_size_in_bytes())?; @@ -1006,6 +1010,7 @@ impl<'a> BinaryBuffer<'a> { MacroRef::new(macro_address, macro_ref), bitmap_bits, matched_bytes, + length_offset, bitmap_offset, args_offset, ), diff --git a/src/lazy/span.rs b/src/lazy/span.rs index 42126b51..337fff9b 100644 --- a/src/lazy/span.rs +++ b/src/lazy/span.rs @@ -1,3 +1,5 @@ +use crate::lazy::binary::raw::v1_1::immutable_buffer::BinaryBuffer; +use crate::lazy::text::buffer::TextBuffer; use crate::result::IonFailure; use crate::{IonError, IonResult}; use std::ops::Range; @@ -62,3 +64,21 @@ impl<'a> Span<'a> { self.bytes.is_empty() } } + +impl<'a> From> for Span<'a> { + fn from(value: BinaryBuffer<'a>) -> Self { + Span { + bytes: value.bytes(), + offset: value.offset(), + } + } +} + +impl<'a> From> for Span<'a> { + fn from(value: TextBuffer<'a>) -> Self { + Span { + bytes: value.bytes(), + offset: value.offset(), + } + } +} From 817f78ace479fe7f4d17edb2f0e9873803337e40 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Thu, 7 Nov 2024 13:20:21 -0500 Subject: [PATCH 2/2] fix typo in `has` methods --- src/lazy/binary/raw/v1_1/e_expression.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index 3be383c9..1abe2b2d 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -90,7 +90,7 @@ impl<'top> BinaryEExpression_1_1<'top> { /// Returns `true` if this binary e-expression includes a length prefix. pub fn has_length_prefix(&self) -> bool { // If these offsets are equal, there are no bytes representing the length. - self.length_offset == self.bitmap_offset + self.length_offset != self.bitmap_offset } /// Returns a span of bytes representing the length prefix. If there is no length prefix, @@ -104,8 +104,8 @@ impl<'top> BinaryEExpression_1_1<'top> { /// Returns `true` if this binary e-expression includes an argument encoding bitmap. pub fn has_bitmap(&self) -> bool { - // If these offsets are equal, there are no bytes representing the length. - self.bitmap_offset == self.args_offset + // If these offsets are equal, there are no bytes representing the bitmap. + self.bitmap_offset != self.args_offset } /// Returns a span of bytes representing the e-expression's argument encoding bitmap.