From b6356a5711d815855a840105f424305588c356b1 Mon Sep 17 00:00:00 2001 From: Rob Marrowstone Date: Wed, 8 Jan 2025 10:12:29 -0800 Subject: [PATCH] Add Type Check to Strict Extractor Search paths on field names, indexed elements or wildcards imply an expectation for container types. Before this change, a search path looking for a field would simply not be invoked if the data was not a struct. With this change, users can elect to have an Exception thrown when data does not meet the implied type expectations of their paths. The type check is performed after any callback for the parent itself: that is not known to be mistyped by the paths. It is only implemented for the "strict" matcher and the change to the API reflects that. --- .../amazon/ionpathextraction/FsmMatcher.java | 27 ++++++++++-- .../ionpathextraction/FsmMatcherBuilder.java | 14 +++++-- .../ionpathextraction/FsmPathExtractor.java | 16 +++++++- .../PathExtractorBuilder.java | 26 ++++++++++++ .../ionpathextraction/FsmPathExtractorTest.kt | 41 +++++++++++++++++++ .../ionpathextraction/PathExtractorTest.kt | 2 +- 6 files changed, 116 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java index dd7b0a4..a26763f 100644 --- a/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java +++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java @@ -27,11 +27,32 @@ abstract class FsmMatcher { */ BiFunction callback; + enum Transitionable { + TERMINAL(false, false), + POSSIBLE(false, true), + MISTYPED(true, false); + + final boolean invalid; + final boolean possible; + + Transitionable(final boolean invalid, final boolean possible) { + this.invalid = invalid; + this.possible = possible; + } + } + /** - * Indicates if there _may_ be transitions to child matchers from the given IonType. + * Indicates if there _may_ be transitions to child matchers from the given IonType, + * or if the given IonType is mistyped for the expected transitions. */ - boolean transitionsFrom(final IonType ionType) { - return IonType.isContainer(ionType); + Transitionable transitionsFrom(final IonType ionType) { + if (IonType.isContainer(ionType)) { + return Transitionable.POSSIBLE; + } + if (ionType == IonType.NULL) { + return Transitionable.TERMINAL; + } + return Transitionable.MISTYPED; } /** diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java index 49eecc1..13a7eb7 100644 --- a/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java +++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java @@ -214,8 +214,14 @@ private static class FieldMatcher extends FsmMatcher { } @Override - boolean transitionsFrom(final IonType ionType) { - return ionType == IonType.STRUCT; + Transitionable transitionsFrom(final IonType ionType) { + if (ionType == IonType.STRUCT) { + return Transitionable.POSSIBLE; + } + if (ionType == IonType.NULL) { + return Transitionable.TERMINAL; + } + return Transitionable.MISTYPED; } @Override @@ -259,8 +265,8 @@ private static class TerminalMatcher extends FsmMatcher { } @Override - boolean transitionsFrom(final IonType ionType) { - return false; + Transitionable transitionsFrom(final IonType ionType) { + return Transitionable.TERMINAL; } @Override diff --git a/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java index 4f4a822..4b6a837 100644 --- a/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java +++ b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java @@ -18,6 +18,7 @@ import com.amazon.ion.IonReader; import com.amazon.ion.IonType; +import com.amazon.ionpathextraction.exceptions.PathExtractionException; import com.amazon.ionpathextraction.internal.PathExtractorConfig; import java.util.List; import java.util.function.BiFunction; @@ -35,17 +36,21 @@ */ class FsmPathExtractor implements PathExtractor { private final FsmMatcher rootMatcher; + private final boolean strictTyping; private final PathExtractorConfig config; private FsmPathExtractor( final FsmMatcher rootMatcher, + final boolean strictTyping, final PathExtractorConfig config) { this.rootMatcher = rootMatcher; + this.strictTyping = strictTyping; this.config = config; } static FsmPathExtractor create( final List> searchPaths, + final boolean strictTyping, final PathExtractorConfig config) { FsmMatcherBuilder builder = new FsmMatcherBuilder<>( config.isMatchCaseInsensitive(), @@ -54,7 +59,7 @@ static FsmPathExtractor create( builder.accept(path); } - return new FsmPathExtractor<>(builder.build(), config); + return new FsmPathExtractor<>(builder.build(), strictTyping, config); } @Override @@ -105,7 +110,8 @@ private int matchRecursive( } } - if (child.transitionsFrom(reader.getType())) { + FsmMatcher.Transitionable transitionable = child.transitionsFrom(reader.getType()); + if (transitionable.possible) { reader.stepIn(); int childPos = 0; while (reader.next() != null) { @@ -116,6 +122,12 @@ private int matchRecursive( } } reader.stepOut(); + return 0; + } + + if (transitionable.invalid && strictTyping) { + throw new PathExtractionException( + String.format("IonType %s is not valid for transitions from %s", reader.getType(), child)); } return 0; diff --git a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java index 080269d..65b4665 100644 --- a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java +++ b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java @@ -31,6 +31,7 @@ public final class PathExtractorBuilder { private static final boolean DEFAULT_MATCH_RELATIVE_PATHS = false; private static final boolean DEFAULT_CASE_INSENSITIVE = false; + private static final boolean DEFAULT_STRICT_TYPING = false; private final List> searchPaths = new ArrayList<>(); private boolean matchRelativePaths; @@ -98,7 +99,22 @@ public PathExtractor build() { * @throws UnsupportedPathExpression if any search path or the paths combined, are not supported. */ public PathExtractor buildStrict() { + return buildStrict(false); + } + + /** + * Instantiate a "strict" path extractor, which also enforces type expectations. + *
+ * Paths that attempt to find named children are only valid on Structs or untyped null. + * Paths that attempt to find indexed (or wildcard) children are only valid on containers or untyped null. + * For backwards compatibility that includes Structs, though they are defined as unordered per the Ion Datamodel. + *
+ * The type check is performed _after_ any callbacks registered for the current path and + * _before_ any child matches are attempted. + */ + public PathExtractor buildStrict(final boolean strictTyping) { return FsmPathExtractor.create(searchPaths, + strictTyping, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive)); } @@ -265,4 +281,14 @@ public PathExtractorBuilder withSearchPath(final List pathComp return this; } + + /** + * Add a search path by its components, with no annotations matching on the top-level-values. + *
+ * @see PathExtractorBuilder#withSearchPath(List, BiFunction, String[]) + */ + public PathExtractorBuilder withSearchPath(final List pathComponents, + final BiFunction callback) { + return withSearchPath(pathComponents, callback, new String[0]); + } } diff --git a/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt index 41deb22..21470b7 100644 --- a/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt +++ b/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt @@ -1,5 +1,9 @@ package com.amazon.ionpathextraction +import com.amazon.ion.IonReader +import com.amazon.ionpathextraction.exceptions.PathExtractionException +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource @@ -30,4 +34,41 @@ class FsmPathExtractorTest : PathExtractorTest() { super.testSearchPaths(testCase) } } + + data class TypingTestCase(val searchPath: String, val validity: List, val matchCount: Int) + + @Test + fun testStrictTyping() { + val inputs = listOf("17", "[31]", "(53)", "null", "{ foo: 67 }") + val testCases = listOf( // 17, [31], (53), null, { foo: 67 } + TypingTestCase("()", listOf(true, true, true, true, true), 5), + TypingTestCase("A::()", listOf(true, true, true, true, true), 0), + TypingTestCase("(*)", listOf(false, true, true, true, true), 3), + TypingTestCase("(A::*)", listOf(false, true, true, true, true), 0), + TypingTestCase("(0)", listOf(false, true, true, true, true), 3), + TypingTestCase("(foo)", listOf(false, false, false, true, true), 1)) + + testCases.forEach { testCase -> + var count = 0; + val counter = { _: IonReader -> + count += 1 + 0 + } + val extractor = PathExtractorBuilder.standard() + .withSearchPath(testCase.searchPath, counter) + .buildStrict(true) + + for (j in inputs.indices) { + val ionReader = ION.newReader(inputs[j]) + if (testCase.validity[j]) { + extractor.match(ionReader) + } else { + assertThrows { + extractor.match(ionReader) + } + } + } + assertEquals(testCase.matchCount, count) + } + } } diff --git a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt index a847019..fa3ca5d 100644 --- a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt +++ b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt @@ -33,7 +33,7 @@ import kotlin.test.assertTrue abstract class PathExtractorTest { companion object { - private val ION = IonSystemBuilder.standard().build() + val ION: IonSystem = IonSystemBuilder.standard().build() data class TestCase(val searchPaths: List, val data: String,