Skip to content

Commit

Permalink
Overhaul the text parsers, port from nom to winnow (#892)
Browse files Browse the repository at this point in the history
* Migrated to [email protected]
* inlines some hot parsers
* Removes special cases for incompleteness detection
* `match_value` now uses `dispatch!` instead of `alt`
* Added macro to define mappings from MatchedValue to LazyRawTextValue
* Adds text version-agnostic container parsers
* Removes lots of version-specific container parsing code
* Makes raw text lists, sexps, and structs generic over Ion version
* Removes skip list for incompletness checking.
* Removes some `unsafe` usages from `StreamingRawReader`
* Makes `IonDataSource::is_streaming()` an associated const
  • Loading branch information
zslayton authored Jan 9, 2025
1 parent 46cc6b2 commit 0943766
Show file tree
Hide file tree
Showing 28 changed files with 2,091 additions and 3,260 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ compact_str = "0.8.0"
chrono = { version = "0.4", default-features = false, features = ["clock", "std", "wasmbind"] }
delegate = "0.12.0"
thiserror = "1.0"
nom = "7.1.1"
winnow = { version = "0.6", features = ["simd"] }
num-integer = "0.1.44"
num-traits = "0.2"
arrayvec = "0.7"
Expand Down
18 changes: 6 additions & 12 deletions benches/read_many_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ fn maximally_compact_1_1_data(num_values: usize) -> TestData_1_1 {

let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);

let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
let binary_1_1_data: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
0b10, // [NOTE: `0b`] `parameters*` arg is an arg group
0x66, // 6-byte integer (`timestamp` param)
0x75, 0x5D, 0x63, 0xEE, 0x84, 0x01,
Expand All @@ -73,7 +72,6 @@ fn maximally_compact_1_1_data(num_values: usize) -> TestData_1_1 {
0x39, 0x3A, 0x35, 0x39,
0x2E, 0x37, 0x34, 0x34,
0x30, 0x30, 0x30, 0x5A].repeat(num_values);
binary_1_1_data.append(&mut binary_1_1_data_body);
TestData_1_1 {
name: "maximally compact".to_owned(),
template_definition_text,
Expand Down Expand Up @@ -107,9 +105,8 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 {
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
let binary_1_1_data: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
0b10, // [NOTE: `0b` prefix] `parameters*` arg is an arg group
0x66, // 6-byte integer (`timestamp` param)
0x75, 0x5D, 0x63, 0xEE, 0x84, 0x01,
Expand Down Expand Up @@ -142,7 +139,6 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 {
0x2E, 0x37, 0x34, 0x34,
0x30, 0x30, 0x30, 0x5A].repeat(num_values);

binary_1_1_data.append(&mut binary_1_1_data_body);
TestData_1_1 {
name: "moderately compact".to_owned(),
template_definition_text: template_definition_text.to_owned(),
Expand Down Expand Up @@ -176,9 +172,8 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = [0xF5, // LP invocation
let binary_1_1_data: Vec<u8> = [0xF5, // LP invocation
((MacroTable::FIRST_USER_MACRO_ID * 2) + 1) as u8, // Macro ID
0xDF, // Length prefix: FlexUInt 111
0b10, // [NOTE: `0b` prefix] `parameters*` arg is an arg group
Expand Down Expand Up @@ -213,7 +208,6 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_
0x2E, 0x37, 0x34, 0x34,
0x30, 0x30, 0x30, 0x5A].repeat(num_values);

binary_1_1_data.append(&mut binary_1_1_data_body);
TestData_1_1 {
name: "moderately compact w/length-prefixed top level".to_owned(),
template_definition_text: template_definition_text.to_owned(),
Expand Down Expand Up @@ -444,12 +438,12 @@ mod benchmark {
b.iter(|| {
// We don't have an API for doing this with the application-level reader yet, so
// for now we use a manually configured context and a raw reader.
let mut reader = LazyRawBinaryReader_1_1::new(binary_1_1_data);
let mut reader = LazyRawBinaryReader_1_1::new(context_ref, binary_1_1_data);
let mut num_top_level_values: usize = 0;
// Skip past the IVM
reader.next(context_ref).unwrap().expect_ivm().unwrap();
reader.next().unwrap().expect_ivm().unwrap();
// Expect every top-level item to be an e-expression.
while let RawStreamItem::EExp(raw_eexp) = reader.next(context_ref).unwrap() {
while let RawStreamItem::EExp(raw_eexp) = reader.next().unwrap() {
num_top_level_values += 1;
// Look up the e-expression's invoked macro ID in the encoding context.
let eexp = raw_eexp.resolve(context_ref).unwrap();
Expand Down
Loading

0 comments on commit 0943766

Please sign in to comment.