diff --git a/ion-schema-tests-runner/tests/ion-schema-tests-2-0.rs b/ion-schema-tests-runner/tests/ion-schema-tests-2-0.rs index e5488c9..1cba148 100644 --- a/ion-schema-tests-runner/tests/ion-schema-tests-2-0.rs +++ b/ion-schema-tests-runner/tests/ion-schema-tests-2-0.rs @@ -5,7 +5,6 @@ ion_schema_tests!( ignored( // Not fully implemented yet. "imports", - "constraints::ordered_elements", // Failing because of https://github.com/amazon-ion/ion-rust/issues/399 "constraints::regex::value_should_be_invalid_for_type_regex_unescaped_newline__2_", ) diff --git a/ion-schema/src/constraint.rs b/ion-schema/src/constraint.rs index 60ae6d4..b5ba280 100644 --- a/ion-schema/src/constraint.rs +++ b/ion-schema/src/constraint.rs @@ -12,7 +12,7 @@ use crate::isl::util::{ }; use crate::isl::IslVersion; use crate::isl_require; -use crate::nfa::{FinalState, NfaBuilder, NfaEvaluation}; +use crate::ordered_elements_nfa::OrderedElementsNfa; use crate::result::{ invalid_schema_error, invalid_schema_error_raw, IonSchemaResult, ValidationResult, }; @@ -32,7 +32,6 @@ use std::fmt::{Display, Formatter}; use std::iter::Peekable; use std::ops::Neg; use std::str::Chars; -use std::sync::Arc; /// Provides validation for schema Constraint pub trait ConstraintValidator { @@ -779,114 +778,40 @@ impl ConstraintValidator for TypeConstraint { /// [ordered_elements]: https://amazon-ion.github.io/ion-schema/docs/isl-1-0/spec#ordered_elements #[derive(Debug, Clone, PartialEq)] pub struct OrderedElementsConstraint { - type_references: Vec, + nfa: OrderedElementsNfa, } impl OrderedElementsConstraint { pub fn new(type_references: Vec) -> Self { - Self { type_references } + let states: Vec<_> = type_references + .into_iter() + // TODO: See if we can potentially add a more informative description. + .map(|ty| (ty, None)) + .collect(); + OrderedElementsConstraint { + nfa: OrderedElementsNfa::new(states), + } } - /// Tries to create an [OrderedElements] constraint from the given Element fn resolve_from_isl_constraint( isl_version: IslVersion, type_references: &[IslVariablyOccurringTypeRef], type_store: &mut TypeStore, pending_types: &mut PendingTypes, ) -> IonSchemaResult { - let resolved_types: Vec = type_references + let resolved_types = type_references .iter() - .map(|t| + .map(|t| { // resolve type references and create variably occurring type reference with occurs range - t.resolve_type_reference(isl_version, type_store, pending_types)) - .collect::>>()?; - - Ok(OrderedElementsConstraint::new(resolved_types)) - } - - // Builds an NFA state machine based on given type_ids. This is a limited form of NFA where state machine is linear and every transition either leads to itself or the next state. - // - // All the states has some transitions between them leading from one state to another or back to itself. - // All the states that have a minimum occurrence of 0 are optional states, meaning those states can lead to another state with 0 occurrence event transitions. - // There are two special cases of transition that need to be handled. - // For any state whose corresponding `type_id` has an `occurs` where: - // * `max >= 2`, that state will have a transition back to itself, allowing for repetition. - // * `min == 0`, that state will have a transition that advances to the next state automatically, making an occurrence of that `type_id` optional. - // - // Here is an example of how the built NFA would look like for an `ordered_elements` constraint: - // ```ion - // ordered_elements: [ - // { type: int, occurs: optional }, - // number, - // any - // ] - // ``` - // NFA: - // +--------- 0 -----------+ - // | | - // | V - // I(INITIAL) ----> S1(INTERMEDIATE(0, 1)) -- 1 --> S2(INTERMEDIATE(1, 1)) -- 1 --> S3(INTERMEDIATE(1, 1)) ----> F(FINAL) - // - // Validation: - // Valid input value: `[1, 2, 3]` - // +------------------------------+ - // | event | State Visits | - // +------------------------------+ - // | - | I: 1 | - // | 1 | S1: 1, S2: 1 | - // | 2 | S2: 1, S3: 1 | - // | 3 | S3: 1 | - // | END | F: 1 | - // +------------------------------+ - // - // Invalid input value: `[1, 2]` - // +------------------------------+ - // | event | State Visits | - // +------------------------------+ - // | - | I: 1 | - // | 1 | S1: 1, S2: 1 | - // | 2 | S2: 1, S3: 1 | - // | END | S3: 1 | - // +------------------------------+ - // As shown above visit count for `END` doesn't have final state in it which means the value resulted to be invalid. - // - fn build_nfa_from_type_references( - type_ids: &[VariablyOccurringTypeRef], - type_store: &TypeStore, - ) -> NfaEvaluation { - let mut nfa_builder = NfaBuilder::new(); - let mut final_states = HashSet::new(); - for (state_id, variably_occurring_type_reference) in type_ids.iter().enumerate() { - let type_reference = variably_occurring_type_reference.type_ref(); - let (min, max) = variably_occurring_type_reference - .occurs_range() - .inclusive_endpoints(); - - // if the current state is required then that is the only final state till now - if min > 0 { - // remove all previous final states - final_states.clear(); - } - - // add current state as final state to NFA - final_states.insert(FinalState::new(state_id, min, max)); - - if state_id == 0 { - // add a transition to self for initial state - nfa_builder.with_transition(state_id, state_id, type_reference, min, max); - continue; - } - - // add transition to next state - nfa_builder.with_transition(state_id - 1, state_id, type_reference, min, max); - - if max > 1 { - // add a transition to self for states that have max > 1 - nfa_builder.with_transition(state_id, state_id, type_reference, min, max); - } - } + let var_type_ref = t.resolve_type_reference(isl_version, type_store, pending_types); + // TODO: See if we can potentially add a more informative description. + var_type_ref.map(|it| (it, None)) + }) + .collect::>>()?; - NfaEvaluation::new(Arc::new(nfa_builder.build(final_states))) + Ok(OrderedElementsConstraint { + nfa: OrderedElementsNfa::new(resolved_types), + }) } } @@ -899,8 +824,8 @@ impl ConstraintValidator for OrderedElementsConstraint { ) -> ValidationResult { let violations: Vec = vec![]; - let mut element_iter = match value.as_sequence_iter() { - Some(iter) => iter.peekable(), + let element_iter = match value.as_sequence_iter() { + Some(iter) => iter, None => { return Err(Violation::with_violations( "ordered_elements", @@ -919,36 +844,7 @@ impl ConstraintValidator for OrderedElementsConstraint { } }; - // build nfa for validation - let mut nfa_evaluation = OrderedElementsConstraint::build_nfa_from_type_references( - &self.type_references, - type_store, - ); - - if element_iter.peek().is_some() && nfa_evaluation.nfa.get_final_states().is_empty() { - return Err(Violation::with_violations( - "ordered_elements", - ViolationCode::TypeMismatched, - "one or more ordered elements didn't match", - ion_path, - violations, - )); - } - - // use nfa_evaluation for validation - nfa_evaluation.validate_ordered_elements(element_iter, type_store); - - if !nfa_evaluation.has_final_state(type_store) { - return Err(Violation::with_violations( - "ordered_elements", - ViolationCode::TypeMismatched, - "one or more ordered elements didn't match", - ion_path, - violations, - )); - } - - Ok(()) + self.nfa.matches(element_iter, type_store, ion_path) } } diff --git a/ion-schema/src/lib.rs b/ion-schema/src/lib.rs index f653c5a..a6baeb9 100644 --- a/ion-schema/src/lib.rs +++ b/ion-schema/src/lib.rs @@ -29,7 +29,7 @@ pub(crate) mod ion_extension; mod ion_path; mod ion_schema_element; pub mod isl; -mod nfa; +mod ordered_elements_nfa; pub mod result; pub mod schema; pub mod system; diff --git a/ion-schema/src/nfa.rs b/ion-schema/src/nfa.rs deleted file mode 100644 index 6eac1b8..0000000 --- a/ion-schema/src/nfa.rs +++ /dev/null @@ -1,343 +0,0 @@ -use crate::ion_path::IonPath; -use crate::system::TypeStore; -use crate::type_reference::TypeReference; -use crate::types::TypeValidator; -use crate::IonSchemaElement; -use ion_rs::Element; -use std::collections::{HashMap, HashSet}; -use std::iter::Peekable; -use std::sync::Arc; - -/// Represents an id for a state in NFA -type StateId = usize; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Transition { - // represents destination state for the transition - destination: StateId, - // represents the type_ref for the destination state - // this will be used to validate if an Ion value can be accepted at destination state or not - type_ref: TypeReference, - // minimum occurrence allowed for destination state - // this will be used to verify if destination state is optional through check min == 0 - // and it will also be used when destination state is same as source state to verify minimum occurrence for the state - min: usize, - // maximum occurrence allowed for destination state - max: usize, -} - -impl Transition { - /// Verify if the given Ion value is valid for the transition's type_ref or not - pub fn is_valid_for_ion_value(&self, element: &Element, type_store: &TypeStore) -> bool { - let schema_element: IonSchemaElement = element.into(); - - match self - .type_ref - .validate(&schema_element, type_store, &mut IonPath::default()) - { - Ok(_) => true, - Err(violation) => false, - } - } - - /// Verifies if a destination state is optional state or not - pub fn is_destination_state_optional(&self) -> bool { - self.min == 0 - } - - /// Verifies if the minimum occurrence requirement is met for given visits count - pub fn allows_exit_after_n_visits(&self, visits: usize) -> bool { - self.min <= visits - } - - /// Verifies if the maximum occurrence requirement is met for given visits count - pub fn allows_n_visits(&self, visits: usize) -> bool { - self.max >= visits - } -} - -/// Represents a final state of NFA -/// A final state is only reached if the visits to that state is between the (min, max) occurrence range -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct FinalState { - state_id: StateId, - min: usize, - max: usize, -} - -impl FinalState { - pub fn new(state_id: StateId, min: usize, max: usize) -> Self { - Self { state_id, min, max } - } -} - -/// Represents an NFA that will eb used by the ordered_elements` constraint in order to validate an Ion Value -#[derive(Default, Debug, Clone, PartialEq)] -pub struct Nfa { - pub(crate) transitions: HashMap>, // represents transitions between states - pub(crate) final_states: HashSet, // represents all the final states for NFA -} - -impl Nfa { - /// Provides all final states for the [Nfa] - pub fn get_final_states(&self) -> HashSet { - self.final_states.to_owned() - } - - // TODO: make get_transitions() less expensive, avoid allocating a new HashSet for each call to the method - /// Provides all the possible transitions for given state - pub fn get_transitions(&self, state_id: StateId) -> HashSet { - self.transitions - .get(&state_id) - .map(|s| s.to_owned()) - .unwrap_or_default() - } -} - -/// Represents the current status of a single run through the NFA: namely, in which state -/// the run is currently positioned and how many times it has visited that state. -/// This will be used by [NfaEvaluation] which uses [Nfa] to evaluate given Ion value. -/// For each step in the NFA's evaluation, each existing [NfaRun] will be cloned once for each possible transition. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct NfaRun { - state_id: StateId, - state_visits: usize, -} - -impl NfaRun { - pub fn new(current_state_id: StateId, visits_to_current_state: usize) -> Self { - Self { - state_id: current_state_id, - state_visits: visits_to_current_state, - } - } -} - -/// This is a context which will be used while validating an Ion value for `ordered_elements` constraint using its NFA. -/// It stores a set of [NfaRun]s that changes for each element in the ordered elements. i.e. `visits` keeps changing for each element. -/// The final set of `visits` stored in [NfaEvaluation] will be used to determine if we reached the final state or not. -#[derive(Debug, Clone)] -pub struct NfaEvaluation { - pub(crate) visits: HashSet, - pub(crate) nfa: Arc, -} - -impl NfaEvaluation { - pub fn new(nfa: Arc) -> Self { - Self { - visits: { - let mut visits = HashSet::new(); - if !nfa.get_final_states().is_empty() { - visits.insert(NfaRun::new(0, 0)); - } - visits - }, - nfa, - } - } - - /// Verify if referenced [Nfa] for this [NfaEvaluation] has final state in the set of `visits` with correct visit count. - pub fn has_final_state(&self, type_store: &TypeStore) -> bool { - // If the `ordered_elements` had no final states meaning if `ordered_elements` constraint was empty, - // then verify that `visits` is also empty for validation - if self.nfa.get_final_states().is_empty() && self.visits.is_empty() { - return true; - } - - // verify if `visits` contains a final state in it with visit count between (min, max) for that fianl state - self.visits.iter().any(|nfa_run| { - self.nfa.get_final_states().iter().any(|fs| { - fs.state_id == nfa_run.state_id - && nfa_run.state_visits >= fs.min - && nfa_run.state_visits <= fs.max - }) - }) - } - - /// Validates provided ordered elements against referenced [Nfa] - pub fn validate_ordered_elements<'a, T: Iterator>( - &mut self, - mut elements_iter: Peekable, - type_store: &TypeStore, - ) { - // given elements are actually events for the `Nfa` referenced in this `NfaEvaluation`. - // iterate through all elements and update state-visit count(`NfaRun`) for all possible transitions for given element(event). - while let Some(element) = elements_iter.next() { - let mut next_states = HashSet::new(); - for nfa_run in self.visits.iter() { - let transitions: HashSet = self.nfa.get_transitions(nfa_run.state_id); - // evaluate all possible transitions for nfa_run - self.evaluate_transitions( - nfa_run, - transitions, - element, - &mut elements_iter, - type_store, - &mut next_states, - ); - } - self.visits = next_states; - } - } - - /// Evaluates given transitions using referenced [Nfa] - /// For each evaluation of a transition it adds next possible states into `next_states` - fn evaluate_transitions<'a, T: Iterator>( - &self, - nfa_run: &NfaRun, - transitions: HashSet, - current_element: &Element, - elements: &mut Peekable, - type_store: &TypeStore, - nfa_runs: &mut HashSet, - ) { - let source_state_id = nfa_run.state_id; - let visits = nfa_run.state_visits; - - // traverse all transitions for source state and verify if we can take the given transition - // Each transition has 3 possibilities: - // - Transition loops back to same state - // - Transition moves to the next state - // - Transition moves to optional state - for transition in transitions { - let destination_state_id = transition.destination; - - // transition which loops back to same state - if destination_state_id == source_state_id { - if !&self.evaluate_transition_to_self( - visits, - &transition, - current_element, - elements, - type_store, - nfa_runs, - ) { - // given ordered elements are invalid because it didn't satisfy required minimum occurrence constraint - return; - } - } else if transition.is_valid_for_ion_value(current_element, type_store) { - // if transition is valid, add destination state to next states - nfa_runs.insert(NfaRun::new(destination_state_id, 1)); - } - - // transition to optional state - // if destination state is optional then add transitions to next states skipping the optional state - self.evaluate_transition_to_optional_state( - visits, - &transition, - current_element, - elements, - type_store, - nfa_runs, - ); - } - } - - // This is a helper method that is used by `evaluate_transitions()` to resolve destination states that are optional - // for optional destination states, add transitions to next states skipping the optional state - fn evaluate_transition_to_optional_state<'a, T: Iterator>( - &self, - visits: usize, - transition: &Transition, - element: &Element, - elements: &mut Peekable, - type_store: &TypeStore, - next_states: &mut HashSet, - ) { - let mut destination_states_for_optional_state: HashSet = HashSet::new(); - - if transition.is_destination_state_optional() { - let mut transitions = self.nfa.get_transitions(transition.destination); - - // skip the optional state itself - transitions.remove(transition); - - destination_states_for_optional_state.extend(transitions); - - self.evaluate_transitions( - &NfaRun::new(transition.destination, visits), - destination_states_for_optional_state.to_owned(), - element, - elements, - type_store, - next_states, - ); - } - } - - // this is a helper method used by `evaluate_transitions` to evaluate transitions that loops back to the same state - // this method iterates through elements to satisfy minimum required occurrences for given transition - // It will return false if an invalid Ion value is found which doesn't satisfy minimum occurrence requirement for given transition - // Otherwise it will return true - fn evaluate_transition_to_self<'a, T: Iterator>( - &self, - visits: usize, - transition: &Transition, - element: &Element, - elements: &mut Peekable, - type_store: &TypeStore, - next_states: &mut HashSet, - ) -> bool { - let mut visit_count = visits + 1; - let mut element = element; - if transition.allows_n_visits(visit_count) { - if transition.is_valid_for_ion_value(element, type_store) { - // if transition is valid, add destination state to next states - next_states.insert(NfaRun::new(transition.destination, visit_count)); - } - // iterate through elements for at least minimum n visits of the given transition - while !transition.allows_exit_after_n_visits(visit_count) { - element = match elements.next() { - None => { - // if the minimum required occurrences for given transition is not met, - // and if the elements iterator is empty then the given Ion value is invalid - return false; - } - Some(element) => element, - }; - if transition.is_valid_for_ion_value(element, type_store) { - visit_count += 1; - // if transition is valid, add destination state to next states - next_states.insert(NfaRun::new(transition.destination, visit_count)); - } - } - } - true - } -} - -/// Represents a builder for constructing NFA which will be used by `ordered_elements` constraint -pub struct NfaBuilder { - nfa: Nfa, -} - -impl NfaBuilder { - pub fn new() -> NfaBuilder { - NfaBuilder { - nfa: Nfa::default(), - } - } - - pub fn build(mut self, final_states: HashSet) -> Nfa { - self.nfa.final_states = final_states; - self.nfa - } - - pub fn with_transition( - &mut self, - start_id: StateId, - end_id: StateId, - type_ref: TypeReference, - min: usize, - max: usize, - ) { - let end_states = self.nfa.transitions.entry(start_id).or_default(); - - end_states.insert(Transition { - destination: end_id, - type_ref, - min, - max, - }); - } -} diff --git a/ion-schema/src/ordered_elements_nfa.rs b/ion-schema/src/ordered_elements_nfa.rs new file mode 100644 index 0000000..67cd25d --- /dev/null +++ b/ion-schema/src/ordered_elements_nfa.rs @@ -0,0 +1,492 @@ +//! Implementation of a Non-deterministic Finite-state Automaton for the `ordered_elements` constraint. +//! +//! The general idea is this: +//! +//! > The NFA consumes a sequence of input events, one by one. In each step, whenever two or more +//! > transitions are applicable, it "clones" itself into appropriately many copies, each one +//! > following a different transition. If exactly one transition is applicable, it follows that +//! > transition without making any copies. If no transition is applicable, the current copy is in a +//! > dead end, and it "dies". If, after consuming the complete input, any of the copies is in an +//! > accept state, the input is accepted, else, it is rejected. [...] +//! > +//! > Keep a set data structure of all states which the NFA might currently be in. On the +//! > consumption of an input event, unite the results of the transition function applied to all +//! > current states to get the set of next states. [...] +//! > +//! > On the consumption of the last input event, if one of the current states is a final state, +//! > the machine accepts the sequence. +//! +//! (Paraphrased from [Nondeterministic_finite_automaton](https://en.wikipedia.org/wiki/Nondeterministic_finite_automaton)) +//! +//! Strictly speaking, our implementation might not be properly finite. For an `ordered_elements` +//! member such as `{ type: int, occurs: range::[0, max] }`, there are infinite possible states +//! since it could accept a sequence of integers that is any length. We avoid having an +//! infinite-sized graph by constructing one state for each member in the `ordered_elements` +//! constraint and storing the min and max possible visits for each state. +//! +//! As we are traversing the state machine graph, we track the current state of the machine as a set +//! of `(state_id, visit_count)` pairs. For something that accepts a theoretically infinite input +//! sequence (as above), this could result in an infinite number of parallel states, but in practice +//! the number of possible states is bounded by the length of the input. +//! +//! Our state machine has the following rules that specify the edges between states. +//! * Every state (type ref in `ordered_elements`) must have an edge to the next state in the list +//! * If a state has a max occurs >1, then it has an edge to itself +//! * If a state `n` has a min occurs of 0, then any states with an edge to state `n` also have +//! an edge to state `n+1` (to be able to skip the optional state). +//! +//! For example, suppose we have the following `ordered_elements` constraint: +//! ```ion +//! { +//! ordered_elements: [ +//! /*A*/ bool, +//! /*B*/ { occurs: optional, type: int }, +//! /*C*/ float, +//! /*D*/ { occurs: range::[1, 2], type: string } +//! /*E*/ decimal, +//! ] +//! } +//! ``` +//! * `A` is required, so `Initial` only has a single edge to `A`. +//! * `A` has an edge to `B`, but because `B` is optional, we could skip it and go straight from +//! `A` to `C`. `C` is required, so we cannot skip it and there are no edges from `A` to +//! `D` or anything else past `C`. +//! * `B` has a single edge to `C`. +//! * `C` has a single edge to `D`. +//! * `D` has an edge to `E`, but because `D` has a max occurs that is >1, `D` also has an edge +//! back to itself. +//! * `E` has a single edge to `Final`. +//! +//! Thus, the adjacency list ends up being: +//! ```text +//! Initial -> A +//! A -> B +//! A -> C +//! B -> C +//! C -> D +//! D -> D +//! D -> E +//! E -> Final +//! ``` +//! +//! A corollary of these rules is that set of edges out of state `i` must have destinations that +//! are a contiguous range from either `i` or `i+1` to `j` where `j > i`. This ends up being useful +//! because we can model the set of all edges from a particular state as a `Range` rather than a +//! list of destinations. + +use crate::ion_path::{IonPath, IonPathElement}; +use crate::result::ValidationResult; +use crate::system::TypeStore; +use crate::type_reference::{TypeReference, VariablyOccurringTypeRef}; +use crate::types::TypeValidator; +use crate::violation::{Violation, ViolationCode}; +use crate::IonSchemaElement; +use ion_rs::Element; +use std::cmp::Ordering; +use std::collections::HashSet; +use std::fmt::{Debug, Display, Formatter}; +use std::hash::{Hash, Hasher}; +use std::mem::swap; +use std::ops::RangeInclusive; +use std::vec; + +/// Unique identifier for a particular node/state in the NFA. +/// +/// Each [State] has an integer identifier ([StateId]) which must be unique within any +/// [OrderedElementsNfa] instance. This identifier is used for the implementation of traits such as +/// [Ord], [Eq], and [Hash] for types such as [TraversalError]. +type StateId = usize; + +/// In the evaluation of the NFA, used for tracking which states are in the set of possible current states. +type StateVisitCount = (StateId, usize); + +/// Represents an event in the input sequence—it is either some [Element] or the end-of-sequence +/// marker (i.e. [Option::None]). +type ElementOrEndOfSequence<'a> = Option<&'a Element>; + +/// The compiled state machine for an `ordered_elements` constraint. +/// +/// This is represented as a directed, almost a-cyclical graph. The only cycles allowed are +/// loops—i.e. an edge that is connected to the same vertex at both ends. +#[derive(Debug, Clone, PartialEq)] +pub struct OrderedElementsNfa { + /// The vertices/nodes of the state machine. + states: Vec, + /// An adjacency list describing the directed edges between `states`. + /// + /// Because all outgoing edges must have consecutively numbered destinations, we can model the + /// edges as a map of `from_id` (the index in the vec) to a `Range` of `StateId` destinations. + edges: Vec>, + /// Stores the [StateId] of the final state (packaged into a [StateVisitCount]) for convenience. + /// When running the state machine, if the set of current states contains this `StateVisitCount`, + /// we are done evaluating, and the input was accepted by the state machine. + terminal_state: StateVisitCount, +} + +impl OrderedElementsNfa { + /// Constructs an [OrderedElementsNfa] from a [Vec] of pairs of [VariablyOccurringTypeRef] and + /// an optional string description. + /// + /// The description is a human friendly string that goes into the violation messages to describe + /// which entry in the `ordered_elements` constraint is producing the violation. If a + /// description is provided, it should be something that is recognizable to users, such as a + /// row/col in the source ISL or a snippet of the source ISL. + /// If no description is provided, the default is ``, where `i` is the index in the + /// `ordered_element` constraint's list of variably occurring type references. + pub fn new(intermediate_states: Vec<(VariablyOccurringTypeRef, Option)>) -> Self { + // "Initial" state is always first—no surprise there. + let mut states = vec![State::Initial]; + + // Construct intermediate states and add to our vec of states. + intermediate_states + .into_iter() + .enumerate() + .for_each(|(i, (var_type_ref, description))| { + let description = + description.unwrap_or_else(|| format!("", i)); + let (min_visits, max_visits) = var_type_ref.occurs_range().inclusive_endpoints(); + let state = IntermediateState { + type_ref: var_type_ref.type_ref(), + min_visits, + max_visits, + description, + }; + + states.push(State::Intermediate(i + 1, state)) + }); + + // This will become the ID of the "Final" state, but it's convenient to wait to add the + // "Final" state until we've determined the edges of the graph. + let max_id = states.len(); + + // Construct an adjacency list to represent the edges of the graph. + let mut edges = vec![]; + for (i, s) in states.iter().enumerate() { + // Loop back to self, if max is > 1 + let min_transition = if s.can_reenter(1) { i } else { i + 1 }; + + // Add transitions forward up to (including) the first type with a min occurs that is greater than 0 + let mut j = i + 1; + while j < max_id { + if !states[j].can_exit(0) { + break; + } + j += 1; + } + edges.push(min_transition..=j) + } + + states.push(State::Final(max_id)); + + // Terminal state is the Final state with a visit count of 1. + let terminal_state: StateVisitCount = (max_id, 1usize); + + OrderedElementsNfa { + states, + edges, + terminal_state, + } + } + + /// Tests an input sequence of [Element] + pub fn matches<'a, I: Iterator>( + &self, + mut iter: I, + type_store: &'a TypeStore, + ion_path: &mut IonPath, + ) -> ValidationResult { + let mut current_state_set: HashSet = HashSet::new(); + let mut new_states: HashSet = HashSet::new(); + + let mut input_index = 0; + current_state_set.insert((0usize, 1usize)); + + // Essentially, for-each input, but we want to capture the `Option::None` at the end of the iterator. + loop { + let element: ElementOrEndOfSequence = iter.next(); + let mut invalid_transitions: HashSet = HashSet::new(); + + ion_path.push(IonPathElement::Index(input_index)); + + // For each state in the set of current states... + for &(from_state_id, num_visits) in ¤t_state_set { + let from_state = &self.states[from_state_id]; + + let edges = if let Some(edges) = self.edges.get(from_state_id) { + // Need to clone the range because strangely &RangeInclusive doesn't + // implement Copy or IntoIterator. + edges.clone() + } else { + // The only state without edges is `Final`, which cannot be exited. + invalid_transitions.insert(TraversalError::CannotExitState(from_state_id)); + break; + }; + + // For each edge out of the current state we are inspecting... + for to_state_id in edges { + let to_state: &State = &self.states[to_state_id]; + + let can_reenter = from_state.can_reenter(num_visits); + let can_exit = from_state.can_exit(num_visits); + let is_loop = to_state_id == from_state_id; + + if !is_loop && !can_exit { + invalid_transitions.insert(TraversalError::CannotExitState(from_state_id)); + // We haven't reached the min_occurs of the current state. Any further + // transitions will also suffer from the same problem. No need to report + // this same problem repeatedly, so we break here. + break; + } + + // TODO: Consider caching the result of this so that *if* there are multiple + // current states that could transition to the same state, we don't end + // up doing the same work multiple times. + let can_enter = to_state.can_enter(element, type_store, ion_path); + + if let Err(violation) = can_enter { + invalid_transitions + .insert(TraversalError::CannotEnterState(to_state_id, violation)); + } else if is_loop && !can_reenter { + invalid_transitions.insert(TraversalError::CannotReEnterState(to_state_id)); + } else { + let new_num_visits = if is_loop { num_visits + 1 } else { 1 }; + new_states.insert((to_state_id, new_num_visits)); + } + } + } + + // There are no valid paths to continue through the graph. + if new_states.is_empty() { + return Err(self.build_violation(element, ion_path, invalid_transitions)); + } + + ion_path.pop(); + + if new_states.contains(&self.terminal_state) { + return Ok(()); + } + + current_state_set.clear(); + swap(&mut current_state_set, &mut new_states); + input_index += 1; + } + } + + /// Build a [Violation] out of the set of [TraversalError]s. + fn build_violation( + &self, + event: ElementOrEndOfSequence, + ion_path: &mut IonPath, + invalid_transitions: HashSet, + ) -> Violation { + let mut reasons: Vec<_> = invalid_transitions.into_iter().collect(); + reasons.sort(); + let reasons = reasons + .into_iter() + .map(|it| match it { + TraversalError::CannotExitState(s) => Violation::new( + "ordered_elements", + ViolationCode::ElementMismatched, + format!("{}: min occurs not reached", &self.states[s]), + ion_path, + ), + TraversalError::CannotReEnterState(s) => Violation::new( + "ordered_elements", + ViolationCode::ElementMismatched, + format!("{}: max occurs already reached", &self.states[s],), + ion_path, + ), + TraversalError::CannotEnterState(s, v) => Violation::with_violations( + "ordered_elements", + ViolationCode::ElementMismatched, + format!("{}: does not match type", &self.states[s]), + ion_path, + vec![v], + ), + }) + .collect(); + + let index = ion_path.pop().unwrap(); + + Violation::with_violations( + "ordered_elements", + ViolationCode::ElementMismatched, + format!( + "input does not match ordered_elements at index {}: {}", + index, + event + .map(Element::to_string) + .unwrap_or_else(|| "".to_string()) + ), + ion_path, + reasons, + ) + } +} + +/// Details for a state that represents one of the [VariablyOccurringTypeReferences] in an +/// `ordered_elements` constraint. +#[derive(Debug, Clone, PartialEq)] +struct IntermediateState { + type_ref: TypeReference, + min_visits: usize, + max_visits: usize, + description: String, +} + +/// Represents a state in the compiled nondeterministic finite automaton. +#[derive(Debug, Clone, PartialEq)] +enum State { + Initial, + Intermediate(StateId, IntermediateState), + Final(StateId), +} + +impl State { + /// The unique integer identifier for this state. + fn id(&self) -> StateId { + match self { + State::Initial => 0usize, + State::Intermediate(id, _) => *id, + State::Final(id) => *id, + } + } + + /// Checks whether the state can be visited more times or if the current path must move to a + /// different state. + fn can_reenter(&self, num_visits: usize) -> bool { + match self { + State::Initial => false, + State::Intermediate(_, s) => num_visits < s.max_visits, + State::Final(_) => false, + } + } + + /// Checks whether the state has been visited enough times to allow exiting that state (as + /// opposed to either looping or dying out). + fn can_exit(&self, num_visits: usize) -> bool { + match self { + State::Initial => true, + State::Intermediate(_, s) => num_visits >= s.min_visits, + State::Final(_) => false, + } + } + + /// Tests whether the `element` is valid for the [TypeReference] of this state. + fn can_enter( + &self, + element: Option<&Element>, + type_store: &TypeStore, + ion_path: &mut IonPath, + ) -> ValidationResult { + match self { + State::Initial => unreachable!("There are no transitions to the initial state."), + State::Intermediate(_, s) => { + if let Some(el) = element { + let t = s.type_ref; + t.validate(&IonSchemaElement::from(el), type_store, ion_path) + } else { + Err(Violation::new( + "ordered_elements", + ViolationCode::ElementMismatched, + "expected another element; found ", + ion_path, + )) + } + } + State::Final(_) => { + if element.is_some() { + Err(Violation::new( + "ordered_elements", + ViolationCode::ElementMismatched, + format!("expected ; found: {}", element.unwrap()), + ion_path, + )) + } else { + Ok(()) + } + } + } + } +} + +impl Display for State { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let string = match self { + State::Initial => "", + State::Intermediate(i, s) => &s.description, + State::Final(_) => "", + }; + f.write_str(string) + } +} + +/// The reason why a transition (or edge) in the state machine graph cannot be traversed. +#[derive(Debug)] +enum TraversalError { + CannotEnterState(StateId, Violation), + CannotExitState(StateId), + CannotReEnterState(StateId), +} + +impl PartialOrd for TraversalError { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for TraversalError { + fn cmp(&self, other: &Self) -> Ordering { + let self_id = match self { + TraversalError::CannotEnterState(id, _) + | TraversalError::CannotExitState(id) + | TraversalError::CannotReEnterState(id) => id, + }; + let other_id = match other { + TraversalError::CannotEnterState(id, _) + | TraversalError::CannotExitState(id) + | TraversalError::CannotReEnterState(id) => id, + }; + self_id.cmp(other_id) + } +} + +impl Eq for TraversalError {} + +impl PartialEq for TraversalError { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + ( + TraversalError::CannotExitState(self_id), + TraversalError::CannotExitState(other_id), + ) => self_id == other_id, + ( + TraversalError::CannotReEnterState(self_id), + TraversalError::CannotReEnterState(other_id), + ) => self_id == other_id, + // It is okay to ignore the violation here because we only consider one event/element at + // any given point in the state machine. Since that is the case, if the IDs are the same, + // then they must represent the same destination state (type reference), and so the + // violations must be equal. + ( + TraversalError::CannotEnterState(self_id, _), + TraversalError::CannotEnterState(other_id, _), + ) => self_id == other_id, + (_, _) => false, + } + } +} + +impl Hash for TraversalError { + fn hash(&self, state: &mut H) { + // By using unique primes, we cannot get a hash collision unless there's at least as many + // states as the smallest of the prime numbers. Furthermore, the relatively large spacing + // between the prime numbers makes it even more unlikely that a collision would occur since + // the first IDs that could have a collision with each other would be 107 and 307. + state.write_usize(match self { + TraversalError::CannotEnterState(id, _) => id * 503, + TraversalError::CannotExitState(id) => id * 307, + TraversalError::CannotReEnterState(id) => id * 107, + }) + } +} diff --git a/ion-schema/src/violation.rs b/ion-schema/src/violation.rs index 19ede23..8098752 100644 --- a/ion-schema/src/violation.rs +++ b/ion-schema/src/violation.rs @@ -86,12 +86,30 @@ impl Violation { } } -// TODO: Implement Violation with proper indentation for the nested tree of violations impl fmt::Display for Violation { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "A validation error occurred: {}", self.message)?; - for v in &self.violations { - write!(f, " {v}")?; + f.write_str(self.message.as_str())?; + + let mut stack = vec![]; + let mut violations_iter = self.violations.iter(); + let mut violation = violations_iter.next(); + + let mut indent = " ".to_string(); + + while let Some(v) = violation { + f.write_fmt(format_args!("\n{}- {}", &indent, v.message))?; + + if !v.violations.is_empty() { + stack.push(violations_iter); + violations_iter = v.violations.iter(); + indent.push_str(" "); + } + violation = violations_iter.next(); + while violation.is_none() && !stack.is_empty() { + violations_iter = stack.pop().unwrap(); + indent.truncate(indent.len() - 2); + violation = violations_iter.next(); + } } Ok(()) }