From 57bd39a233ab2f588f3c70b88ee2bc7dbb41cd17 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Mon, 16 Dec 2024 14:35:32 -0500 Subject: [PATCH] Support for unqualified macro addresses in TDL --- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 44 +++++---- src/lazy/expanded/compiler.rs | 96 ++++++++++++-------- 2 files changed, 79 insertions(+), 61 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 7080f02a..2f39ad50 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -51,7 +51,7 @@ pub struct BinaryBuffer<'a> { impl Debug for BinaryBuffer<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "BinaryBuffer {{")?; + write!(f, "BinaryBuffer @ offset={} {{", self.offset)?; for byte in self.bytes().iter().take(16) { write!(f, "{:x?} ", *byte)?; } @@ -244,7 +244,7 @@ impl<'a> BinaryBuffer<'a> { }; if self.len() < size_in_bytes { - return IonResult::incomplete("reading a flex_uint value", self.offset()); + return IonResult::incomplete("a flex_uint value", self.offset()); } // XXX: This *doesn't* slice `self` because FlexUInt::read() is faster if the input // is at least the size of a u64. @@ -544,16 +544,13 @@ impl<'a> BinaryBuffer<'a> { }; if after_name.is_empty() { - return IonResult::incomplete("found field name but no value", after_name.offset()); + return IonResult::incomplete("a struct field value", after_name.offset()); } let (field, after_value) = match after_name.peek_delimited_struct_value()? { (None, after) => { if after.is_empty() { - return IonResult::incomplete( - "found field name but no value", - after.offset(), - ); + return IonResult::incomplete("a struct field value", after.offset()); } buffer = after; continue; // No value for this field, loop to try next field. @@ -653,10 +650,7 @@ impl<'a> BinaryBuffer<'a> { }; if total_length > input.len() { - return IonResult::incomplete( - "the stream ended unexpectedly in the middle of a value", - header_offset, - ); + return IonResult::incomplete("a value", header_offset); } let encoded_value = EncodedValue { @@ -748,7 +742,7 @@ impl<'a> BinaryBuffer<'a> { let sequence_length = flex_uint.value() as usize; if input_after_header.len() < sequence_length { return IonResult::incomplete( - "reading an annotations sequence", + "an annotations sequence", input_after_header.offset(), ); } @@ -835,7 +829,7 @@ impl<'a> BinaryBuffer<'a> { let sequence_length = flex_uint.value() as usize; if input_after_header.len() < sequence_length { return IonResult::incomplete( - "reading an annotations sequence", + "an annotations sequence", input_after_header.offset(), ); } @@ -875,7 +869,7 @@ impl<'a> BinaryBuffer<'a> { ), EExpressionWith12BitAddress => { if self.len() < 2 { - return IonResult::incomplete("parsing a 12-bit e-exp address", self.offset); + return IonResult::incomplete("a 12-bit e-exp address", self.offset); } let bias = ((opcode.byte as usize & 0x0F) << 8) + 64; @@ -885,7 +879,7 @@ impl<'a> BinaryBuffer<'a> { } EExpressionWith20BitAddress => { if self.len() < 3 { - return IonResult::incomplete("parsing a 20-bit e-exp address", self.offset); + return IonResult::incomplete("a 20-bit e-exp address", self.offset); } let bias = ((opcode.byte as usize & 0x0F) << 16) + 4160; let (fixed_uint, input_after_opcode) = self.consume(1).read_fixed_uint(2)?; @@ -897,7 +891,7 @@ impl<'a> BinaryBuffer<'a> { SystemEExpression => { // The next byte is the system macro address; make sure we have another byte available if self.len() < 2 { - return IonResult::incomplete("parsing a system macro address", self.offset); + return IonResult::incomplete("a system macro address", self.offset); } let address = self.bytes()[1] as usize; let system_macro_address = SystemMacroAddress::new(address).ok_or_else(|| { @@ -913,6 +907,7 @@ impl<'a> BinaryBuffer<'a> { } _ => unreachable!("read_e_expression called with invalid opcode"), }; + self.read_eexp_with_id(input_after_address, macro_id) } @@ -934,7 +929,7 @@ impl<'a> BinaryBuffer<'a> { #[inline(never)] || { IonError::decoding_error(format!( - "invocation of macro at unknown ID '{macro_id:?}'" + "invocation of macro at unknown ID '{macro_id:?}', buffer: {self:?}" )) }, )? @@ -1012,6 +1007,9 @@ impl<'a> BinaryBuffer<'a> { let args_length = args_length_flex_uint.value() as usize; let total_length = header_length + args_length; + if self.len() < total_length { + return IonResult::incomplete("a length-prefixed e-expression", self.offset); + } let matched_bytes = self.slice(0, total_length); let macro_ref = self .context @@ -1044,16 +1042,16 @@ impl<'a> BinaryBuffer<'a> { } fn read_eexp_bitmap(self, bitmap_size_in_bytes: usize) -> ParseResult<'a, u64> { - let bitmap_bytes = self.peek_n_bytes(bitmap_size_in_bytes).ok_or_else(|| { - IonError::incomplete("parsing an e-exp arg grouping bitmap", self.offset) - })?; + let bitmap_bytes = self + .peek_n_bytes(bitmap_size_in_bytes) + .ok_or_else(|| IonError::incomplete("an e-exp arg grouping bitmap", self.offset))?; if bitmap_size_in_bytes == 1 { return Ok((bitmap_bytes[0] as u64, self.consume(1))); } let mut buffer = [0u8; size_of::()]; - let bitmap_bytes = self.peek_n_bytes(bitmap_size_in_bytes).ok_or_else(|| { - IonError::incomplete("parsing an e-exp arg grouping bitmap", self.offset) - })?; + let bitmap_bytes = self + .peek_n_bytes(bitmap_size_in_bytes) + .ok_or_else(|| IonError::incomplete("an e-exp arg grouping bitmap", self.offset))?; buffer[..bitmap_size_in_bytes].copy_from_slice(bitmap_bytes); let bitmap_u64 = u64::from_le_bytes(buffer); Ok((bitmap_u64, self.consume(bitmap_size_in_bytes))) diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index c8ecd71a..71f28d95 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -1113,7 +1113,16 @@ impl TemplateCompiler { Some(Err(e)) => return Err(e), Some(Ok(value)) => value, }; - Self::resolve_macro_id_expr(tdl_context, value) + Self::expect_macro_id_expr(tdl_context, value) + } + + fn expect_macro_id_expr( + tdl_context: TdlContext<'_>, + id_expr: LazyValue<'_, D>, + ) -> IonResult> { + Self::resolve_macro_id_expr(tdl_context, id_expr)?.ok_or_else(|| { + IonError::decoding_error(format!("could not resolve macro id {:?}", id_expr)) + }) } /// Given a `LazyValue` that represents a macro ID (name or address), attempts to resolve the @@ -1121,7 +1130,7 @@ impl TemplateCompiler { fn resolve_macro_id_expr( tdl_context: TdlContext<'_>, id_expr: LazyValue<'_, D>, - ) -> IonResult> { + ) -> IonResult>> { let macro_id = match id_expr.read()? { ValueRef::Symbol(s) => { if let Some(name) = s.text() { @@ -1144,27 +1153,20 @@ impl TemplateCompiler { }; let mut annotations = id_expr.annotations(); - if let Some(module_name) = annotations.next().transpose()? { + let maybe_macro = if let Some(module_name) = annotations.next().transpose()? { Self::resolve_qualified_macro_id( tdl_context.context, module_name.expect_text()?, macro_id, ) - .ok_or_else(|| { - IonError::decoding_error(format!( - "macro '{module_name:?}::{macro_id}' has not been defined (yet?)" - )) - }) } else { Self::resolve_unqualified_macro_id( tdl_context.context, tdl_context.pending_macros, macro_id, ) - .ok_or_else(|| { - IonError::decoding_error(format!("macro '{macro_id}' has not been defined (yet?)")) - }) - } + }; + Ok(maybe_macro) } /// Visits all of the arguments to a `(literal ...)` operation, adding them to the `TemplateBody` @@ -1412,44 +1414,62 @@ impl<'top, D: Decoder> TdlSExpKind<'top, D> { } }; - let operation_name = TemplateCompiler::expect_symbol_text("operation name", operation)?; + // In most cases, an expression in this position is a macro ID. Try to resolve it. + if let Some(macro_ref) = TemplateCompiler::resolve_macro_id_expr(tdl_context, operation)? { + return Ok(TdlSExpKind::MacroInvocation(macro_ref, expressions)); + } + + // If look-up fails to resolve to a macro, it might be a special form. + Self::expect_special_form(tdl_context, operation, expressions) + } + fn expect_special_form( + tdl_context: TdlContext<'_>, + operation: LazyValue<'top, D>, + expressions: SExpIterator<'top, D>, + ) -> IonResult> { // TDL-only operations that are not in the system macro table. static SPECIAL_FORM_NAMES: phf::Set<&'static str> = phf_set!("literal", "if_none", "if_some", "if_single", "if_multi"); + let ValueRef::Symbol(operation_name_symbol) = operation.read()? else { + return IonResult::decoding_error(format!("could not resolve macro ID {operation:?}")); + }; + let operation_name = operation_name_symbol + .text() + .ok_or_else(|| IonError::decoding_error("found operation name with no text"))?; + let is_special_form = SPECIAL_FORM_NAMES.contains(operation_name) // If it's qualified to the system namespace, it's a special form. && (operation.annotations().are(["$ion"])? - // Otherwise, if it has no annotations... - || (!first_expr.has_annotations() - // ...and has not been shadowed by a user-defined macro name, it's a special form. - && tdl_context.pending_macros.macro_with_name(operation_name).is_none() - && tdl_context.context.macro_table.macro_with_name(operation_name).is_none())); - - if is_special_form { - let special_form_macro: &Arc = match operation_name { - // The 'literal' operation exists only at compile time... - "literal" => return Ok(TdlSExpKind::Literal(expressions)), - // ...while the cardinality tests are implemented as different flavors of - // the `ConditionalExpansion` macro. - "if_none" => &IF_NONE_MACRO, - "if_some" => &IF_SOME_MACRO, - "if_single" => &IF_SINGLE_MACRO, - "if_multi" => &IF_MULTI_MACRO, - other => unreachable!("unknown name '{}' found in special forms set", other), - }; + // Otherwise, if it has no annotations... + || (!operation.has_annotations() + // ...and has not been shadowed by a user-defined macro name, it's a special form. + && tdl_context.pending_macros.macro_with_name(operation_name).is_none() + && tdl_context.context.macro_table.macro_with_name(operation_name).is_none())); - return Ok(TdlSExpKind::MacroInvocation( - Arc::clone(special_form_macro), - expressions, + if !is_special_form { + return IonResult::decoding_error(format!( + "could not resolve macro ID {operation_name:?}" )); } - // At this point, we know the sexp must be a normal macro invocation. - // Resolve the macro name or address to the macro it represents. - let macro_ref = TemplateCompiler::resolve_macro_id_expr(tdl_context, operation)?; - Ok(TdlSExpKind::MacroInvocation(macro_ref, expressions)) + let special_form_macro: &Arc = match operation_name { + // The 'literal' operation exists only at compile time... + "literal" => return Ok(TdlSExpKind::Literal(expressions)), + // ...while the cardinality tests are implemented as different flavors of + // the `ConditionalExpansion` macro. + "if_none" => &IF_NONE_MACRO, + "if_some" => &IF_SOME_MACRO, + "if_single" => &IF_SINGLE_MACRO, + "if_multi" => &IF_MULTI_MACRO, + other => unreachable!("unknown name '{}' found in special forms set", other), + }; + + Ok(TdlSExpKind::MacroInvocation( + Arc::clone(special_form_macro), + expressions, + )) } }