Skip to content

Commit

Permalink
More tests, comment cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton committed Oct 18, 2023
1 parent b02b18c commit d169fa8
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 136 deletions.
76 changes: 71 additions & 5 deletions src/lazy/expanded/macro_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,14 +291,14 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> MacroEvaluator<'top, 'data, D> {
/// Creates a new `Environment` for the given `invocation`.
///
/// This helper function iterates over the argument expressions in the invocation. If an argument
/// expression is a value literal or
/// expression is a value literal or macro invocation, it is added to the new environment as-is.
/// If an argument is a variable reference, it is substituted with the corresponding value literal
/// or macro invocation from the current environment and then added to the new environment.
fn make_new_evaluation_environment(
&mut self,
context: EncodingContext<'top>,
invocation: MacroExpr<'top, 'data, D>,
) -> IonResult<Environment<'top, 'data, D>> {
// Using the current environment, make a new environment for the next invocation.
// TODO: Explain better
let mut args = BumpVec::new_in(context.allocator);
for arg in invocation.arguments(self.environment()) {
args.push(arg?);
Expand Down Expand Up @@ -523,7 +523,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValuesExpansion<'top, 'data, D> {
}
}

/// Yields the next [`MacroExpansionStep`] in this macro's evaluation.
/// Yields the next [`ValueExpr`] in this macro's evaluation.
pub fn next(
&mut self,
_context: EncodingContext<'top>,
Expand Down Expand Up @@ -567,7 +567,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> MakeStringExpansion<'top, 'data,
}
}

/// Yields the next [`MacroExpansionStep`] in this `make_string` macro's evaluation.
/// Yields the next [`ValueExpr`] in this `make_string` macro's evaluation.
pub fn next(
&mut self,
context: EncodingContext<'top>,
Expand Down Expand Up @@ -763,6 +763,72 @@ mod tests {
Ok(())
}

#[test]
fn multiple_top_level_values() -> IonResult<()> {
eval_template_invocation(
"(macro foo () (values 1 2 3 4 5))",
r#"
(:foo)
"#,
r#"
1 2 3 4 5
"#,
)
}

#[test]
fn emit_symbol_table() -> IonResult<()> {
eval_template_invocation(
r#"
(macro lst (symbols)
$ion_symbol_table::{
symbols: symbols
}
)
"#,
r#"
(:lst ["foo", "bar", "baz"]) $10 $11 $12
"#,
r#"
foo bar baz
"#,
)
}

#[test]
fn context_changes_happen_between_top_level_expressions() -> IonResult<()> {
eval_template_invocation(
r#"
(macro lst (symbols)
(values
$ion_symbol_table::{
symbols: symbols
}
)
)
"#,
r#"
$ion_symbol_table::{
symbols: ["foo", "bar"]
}
// These symbols refer to the symtab defined above
$10
$11
// The $10 and $11 here _also_ refer to the symtab above because the
// new LST won't be applied until after this top-level expression.
(:values (:lst ["baz", "quux"]) $10 $11)
// These refer to the new LST
$10 $11
"#,
r#"
foo bar foo bar baz quux
"#,
)
}

#[test]
fn swap() -> IonResult<()> {
eval_template_invocation(
Expand Down
56 changes: 26 additions & 30 deletions src/lazy/expanded/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
//! Leaving symbol tokens unresolved is an optimization; annotations, field names, and symbol values
//! that are ignored by the reader do not incur the cost of symbol table resolution.
use std::cell::RefCell;
use std::fmt::{Debug, Formatter};
use std::iter::empty;

Expand Down Expand Up @@ -69,13 +68,12 @@ pub mod e_expression;
pub mod macro_evaluator;
pub mod macro_table;
pub mod sequence;
pub mod stack;
pub mod r#struct;
pub mod template;

/// A collection of resources that can be used to encode or decode Ion values.
/// The `'top` lifetime associated with the [`EncodingContext`] reflects the fact that it can only
/// be used as long as the reader is positioned on the same top level value (i.e. the symbol and
/// be used as long as the reader is positioned on the same top level expression (i.e. the symbol and
/// macro tables are guaranteed not to change).
// It should be possible to loosen this definition of `'top` to include several top level values
// as long as the macro and symbol tables do not change between them, though this would require
Expand Down Expand Up @@ -150,7 +148,7 @@ pub struct LazyExpandingReader<'data, D: LazyDecoder<'data>> {
//
// The `EncodingContext` passed as an argument to each call to `next()` provides a bump allocator
// whose storage is guaranteed to remain available as long as the reader remains on the same
// top-level value. When an e-expression is encountered in the data stream, we can store a
// top-level expression. When an e-expression is encountered in the data stream, we can store a
// MacroEvaluator there until the reader advances to the next top-level expression. However,
// there is not a lifetime we can use that meets our use case; `'data`--the duration of the
// &[u8] from which we're reading--is too long, and `'top`--the duration of the current call
Expand All @@ -163,15 +161,16 @@ pub struct LazyExpandingReader<'data, D: LazyDecoder<'data>> {
//
// Because there is not valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`,
// in the field below, we cast away the pointer's type for the purposes of storage and then cast
// it back at dereference time.
// it back at dereference time when a 'top lifetime is available.
evaluator_ptr: Option<*mut ()>,
// A bump allocator that is cleared between top-level expressions.
allocator: BumpAllocator,
// The encoding context can only be changed between top level expressions. This field stores
// changes that need to be applied at the next opportunity.
// TODO: Remove this RefCell when the Polonius borrow checker is available.
// See: https://github.com/rust-lang/rust/issues/70255
pending_lst: RefCell<PendingLst>,
pending_lst: PendingLst,
has_pending_lst_changes: bool,
// TODO: Make the symbol and macro tables traits on `D` such that they can be configured
// statically. Then 1.0 types can use `Never` for the macro table.
symbol_table: SymbolTable,
Expand All @@ -184,15 +183,18 @@ impl<'data, D: LazyDecoder<'data>> LazyExpandingReader<'data, D> {
raw_reader,
evaluator_ptr: None,
allocator: BumpAllocator::new(),
pending_lst: RefCell::new(PendingLst {
has_pending_lst_changes: false,
pending_lst: PendingLst {
is_lst_append: false,
symbols: Vec::new(),
}),
},
symbol_table: SymbolTable::new(),
macro_table: MacroTable::new(),
}
}

// TODO: Remove this when template definitions can be read in from the input stream and
// do not need to be registered with the reader manually.
pub(crate) fn macro_table_mut(&mut self) -> &mut MacroTable {
&mut self.macro_table
}
Expand Down Expand Up @@ -244,33 +246,27 @@ impl<'data, D: LazyDecoder<'data>> LazyExpandingReader<'data, D> {
&self,
symbol_table_value: &LazyExpandedValue<'_, 'data, D>,
) -> IonResult<()> {
LazySystemReader::process_symbol_table(
&mut self.pending_lst.borrow_mut(),
symbol_table_value,
)
let reader: &mut LazyExpandingReader<'data, D> = Self::unsafe_get_mut(self);
reader.has_pending_lst_changes = true;
LazySystemReader::process_symbol_table(&mut reader.pending_lst, symbol_table_value)
}

/// Updates the encoding context with the information stored in the `PendingLst`.
// TODO: This only works on Ion 1.0 symbol tables for now
fn apply_pending_lst(&mut self) {
// `is_empty()` will be true if the last item was not a symbol table OR if it was a symbol
// table but did not define new symbols. In either case, there's nothing for us to do.
let pending_lst: &mut PendingLst = &mut self.pending_lst.borrow_mut();
if pending_lst.symbols.is_empty() {
return;
}

// If the symbol table's `imports` field had a value of `$ion_symbol_table`, then we're
// appending the symbols it defined to the end of our existing local symbol table.
// Otherwise, we need to clear the existing table before appending the new symbols.
if !pending_lst.is_lst_append {
if !self.pending_lst.is_lst_append {
// We're setting the symbols list, not appending to it.
self.symbol_table.reset();
}
// `drain()` empties the pending symbols list
for symbol in pending_lst.symbols.drain(..) {
for symbol in self.pending_lst.symbols.drain(..) {
self.symbol_table.intern_or_add_placeholder(symbol);
}
pending_lst.is_lst_append = false;
self.pending_lst.is_lst_append = false;
self.has_pending_lst_changes = false;
}

/// Inspects a `LazyExpandedValue` to determine whether it is a symbol table or an
Expand All @@ -291,6 +287,7 @@ impl<'data, D: LazyDecoder<'data>> LazyExpandingReader<'data, D> {
}

fn unsafe_get_mut<T>(value: &T) -> &mut T {
#![allow(clippy::mut_from_ref)]
// XXX: This `unsafe` hack is a workaround for a limitation in the borrow checker that
// prevents it from understanding that a mutable borrow in the body of the loop does
// not necessarily need to prevent all other borrows.
Expand All @@ -303,25 +300,22 @@ impl<'data, D: LazyDecoder<'data>> LazyExpandingReader<'data, D> {
// Indeed, using the experimental `-Zpolonius` flag on the nightly compiler allows the
// version of this code without this `unsafe` hack to work. The alternative to the
// hack is wrapping several components inside the expanding reader in something like
// a `RefCell`, which adds a small amount of overhead to each access. Given that the
// `SymbolTable` is on the hot path and that a fix is inbound, I think this use of
// `unsafe` is warranted.
// a `RefCell`, which adds a small amount of overhead to each access. Given that this
// code is the hot path and a fix is inbound, I think this use of `unsafe` is warranted.
//
// SAFETY: This method should only be used for transient access to fields in a loop.
// The reference it produces MUST NOT be persisted across loop iterations or returned.

// Cast the reference to a const pointer
let ptr = value as *const T;
let mut_value = unsafe {
unsafe {
// Cast the const pointer to a mut pointer
let mut_ptr = ptr as *mut T;
// Cast the mut pointer to a mut reference
&mut *mut_ptr
};
}
// ^--- These operations only "exist" at compile time, and should boil away during rustc's
// optimization passes.

mut_value
}

/// This method is invoked just before the reader begins reading the next top-level expression
Expand All @@ -338,7 +332,9 @@ impl<'data, D: LazyDecoder<'data>> LazyExpandingReader<'data, D> {
// This means that nothing is holding a reference to the bump allocator's memory.
let reader = Self::unsafe_get_mut(self);
reader.evaluator_ptr = None;
reader.apply_pending_lst();
if self.has_pending_lst_changes {
reader.apply_pending_lst();
}
reader.allocator.reset();
}

Expand Down
97 changes: 0 additions & 97 deletions src/lazy/expanded/stack.rs

This file was deleted.

6 changes: 2 additions & 4 deletions src/lazy/expanded/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,10 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator
.expect("field name must be a literal");
let name_token = match LazyExpandedValue::<D>::from_template(
self.context,
// because the name token must be a literal, the env is irrelevant
Environment::empty(),
TemplateElement::new(self.template, name_element),
)
// because the name token must be a literal, the env is irrelevant
.read()
{
Ok(ExpandedValueRef::Symbol(token)) => token,
Expand Down Expand Up @@ -793,11 +793,9 @@ pub enum TemplateValue {
String(Str),
Clob(Bytes),
Blob(Bytes),
// The number of ensuing TemplateElements that are nested.
// The range of ensuing `TemplateBodyValueExpr`s that belong to this container.
List(ExprRange),
SExp(ExprRange),
// The number of fields in the struct. Each field is made up of a pair of TemplateExpansionSteps,
// a name (always a symbol, never has annotations), and a value (any element).
Struct(ExprRange),
}

Expand Down
Loading

0 comments on commit d169fa8

Please sign in to comment.