Skip to content

Commit

Permalink
Add Type Check to Strict Extractor
Browse files Browse the repository at this point in the history
Search paths on field names, indexed elements or wildcards imply an
expectation for container types. Before this change, a search path
looking for a field would simply not be invoked if the data was not
a struct. With this change, users can elect to have an Exception thrown
when data does not meet the implied type expectations of their paths.

The type check is performed after any callback for the parent itself:
that is not known to be mistyped by the paths.

It is only implemented for the "strict" matcher and the change to the
API reflects that.
  • Loading branch information
rmarrowstone committed Jan 8, 2025
1 parent f78144f commit b6cebc1
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 10 deletions.
27 changes: 24 additions & 3 deletions src/main/java/com/amazon/ionpathextraction/FsmMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,32 @@ abstract class FsmMatcher<T> {
*/
BiFunction<IonReader, T, Integer> callback;

enum Transitionable {
TERMINAL(false, false),
POSSIBLE(false, true),
MISTYPED(true, false);

final boolean invalid;
final boolean possible;

Transitionable(final boolean invalid, final boolean possible) {
this.invalid = invalid;
this.possible = possible;
}
}

/**
* Indicates if there _may_ be transitions to child matchers from the given IonType.
* Indicates if there _may_ be transitions to child matchers from the given IonType,
* or if the given IonType is mistyped for the expected transitions.
*/
boolean transitionsFrom(final IonType ionType) {
return IonType.isContainer(ionType);
Transitionable transitionsFrom(final IonType ionType) {
if (IonType.isContainer(ionType)) {
return Transitionable.POSSIBLE;
}
if (ionType == IonType.NULL) {
return Transitionable.TERMINAL;
}
return Transitionable.MISTYPED;
}

/**
Expand Down
14 changes: 10 additions & 4 deletions src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,14 @@ private static class FieldMatcher<T> extends FsmMatcher<T> {
}

@Override
boolean transitionsFrom(final IonType ionType) {
return ionType == IonType.STRUCT;
Transitionable transitionsFrom(final IonType ionType) {
if (ionType == IonType.STRUCT) {
return Transitionable.POSSIBLE;
}
if (ionType == IonType.NULL) {
return Transitionable.TERMINAL;
}
return Transitionable.MISTYPED;
}

@Override
Expand Down Expand Up @@ -259,8 +265,8 @@ private static class TerminalMatcher<T> extends FsmMatcher<T> {
}

@Override
boolean transitionsFrom(final IonType ionType) {
return false;
Transitionable transitionsFrom(final IonType ionType) {
return Transitionable.TERMINAL;
}

@Override
Expand Down
16 changes: 14 additions & 2 deletions src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import com.amazon.ion.IonReader;
import com.amazon.ion.IonType;
import com.amazon.ionpathextraction.exceptions.PathExtractionException;
import com.amazon.ionpathextraction.internal.PathExtractorConfig;
import java.util.List;
import java.util.function.BiFunction;
Expand All @@ -35,17 +36,21 @@
*/
class FsmPathExtractor<T> implements PathExtractor<T> {
private final FsmMatcher<T> rootMatcher;
private final boolean strictTyping;
private final PathExtractorConfig config;

private FsmPathExtractor(
final FsmMatcher<T> rootMatcher,
final boolean strictTyping,
final PathExtractorConfig config) {
this.rootMatcher = rootMatcher;
this.strictTyping = strictTyping;
this.config = config;
}

static <U> FsmPathExtractor<U> create(
final List<SearchPath<U>> searchPaths,
final boolean strictTyping,
final PathExtractorConfig config) {
FsmMatcherBuilder<U> builder = new FsmMatcherBuilder<>(
config.isMatchCaseInsensitive(),
Expand All @@ -54,7 +59,7 @@ static <U> FsmPathExtractor<U> create(
builder.accept(path);
}

return new FsmPathExtractor<>(builder.build(), config);
return new FsmPathExtractor<>(builder.build(), strictTyping, config);
}

@Override
Expand Down Expand Up @@ -105,7 +110,8 @@ private int matchRecursive(
}
}

if (child.transitionsFrom(reader.getType())) {
FsmMatcher.Transitionable transitionable = child.transitionsFrom(reader.getType());
if (transitionable.possible) {
reader.stepIn();
int childPos = 0;
while (reader.next() != null) {
Expand All @@ -116,6 +122,12 @@ private int matchRecursive(
}
}
reader.stepOut();
return 0;
}

if (transitionable.invalid && strictTyping) {
throw new PathExtractionException(
String.format("IonType %s is not valid for transitions from %s", reader.getType(), child));
}

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,22 @@ public PathExtractor<T> build() {
* @throws UnsupportedPathExpression if any search path or the paths combined, are not supported.
*/
public PathExtractor<T> buildStrict() {
return buildStrict(false);
}

/**
* Instantiate a "strict" path extractor, which also enforces type expectations.
* <br>
* Paths that attempt to find named children are only valid on Structs or untyped null.
* Paths that attempt to find indexed (or wildcard) children are only valid on containers or untyped null.
* For backwards compatibility that includes Structs, though they are defined as unordered per the Ion Datamodel.
* <br>
* The type check is performed _after_ any callbacks registered for the current path and
* _before_ any child matches are attempted.
*/
public PathExtractor<T> buildStrict(final boolean strictTyping) {
return FsmPathExtractor.create(searchPaths,
strictTyping,
new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive));
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package com.amazon.ionpathextraction

import com.amazon.ion.IonReader
import com.amazon.ionpathextraction.exceptions.PathExtractionException
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
Expand Down Expand Up @@ -30,4 +34,41 @@ class FsmPathExtractorTest : PathExtractorTest() {
super.testSearchPaths(testCase)
}
}

data class TypingTestCase(val searchPath: String, val validity: List<Boolean>, val matchCount: Int)

@Test
fun testStrictTyping() {
val inputs = listOf("17", "[31]", "(53)", "null", "{ foo: 67 }")
val testCases = listOf( // 17, [31], (53), null, { foo: 67 }
TypingTestCase("()", listOf(true, true, true, true, true), 5),
TypingTestCase("A::()", listOf(true, true, true, true, true), 0),
TypingTestCase("(*)", listOf(false, true, true, true, true), 3),
TypingTestCase("(A::*)", listOf(false, true, true, true, true), 0),
TypingTestCase("(0)", listOf(false, true, true, true, true), 3),
TypingTestCase("(foo)", listOf(false, false, false, true, true), 1))

testCases.forEach { testCase ->
var count = 0;
val counter = { _: IonReader ->
count += 1
0
}
val extractor = PathExtractorBuilder.standard<Any>()
.withSearchPath(testCase.searchPath, counter)
.buildStrict(true)

for (j in inputs.indices) {
val ionReader = ION.newReader(inputs[j])
if (testCase.validity[j]) {
extractor.match(ionReader)
} else {
assertThrows<PathExtractionException> {
extractor.match(ionReader)
}
}
}
assertEquals(testCase.matchCount, count)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import kotlin.test.assertTrue

abstract class PathExtractorTest {
companion object {
private val ION = IonSystemBuilder.standard().build()
val ION: IonSystem = IonSystemBuilder.standard().build()

data class TestCase(val searchPaths: List<String>,
val data: String,
Expand Down

0 comments on commit b6cebc1

Please sign in to comment.