From 76897d9fdf4403c07eaae60b6bc6551f3928ae00 Mon Sep 17 00:00:00 2001 From: alisa101rs Date: Tue, 24 Dec 2024 15:02:55 +0900 Subject: [PATCH] feat: move comma check to lexer and improve trailing comma reports --- .../serialization/json/JsonParserTest.kt | 7 ++- .../serialization/json/MissingCommaTest.kt | 24 ++++++--- .../serialization/json/TrailingCommaTest.kt | 41 ++++++++++++++ .../json/internal/JsonExceptions.kt | 7 --- .../json/internal/JsonTreeReader.kt | 54 +++++++++---------- .../json/internal/StreamingJsonDecoder.kt | 41 ++++++-------- .../json/internal/lexer/AbstractJsonLexer.kt | 44 +++++++++++---- 7 files changed, 137 insertions(+), 81 deletions(-) diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt index 94f7052cd4..ae53dd40b4 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt @@ -6,7 +6,6 @@ package kotlinx.serialization.json import kotlinx.serialization.* import kotlinx.serialization.builtins.* -import kotlinx.serialization.json.internal.* import kotlinx.serialization.test.* import kotlin.test.* @@ -79,11 +78,11 @@ class JsonParserTest : JsonTestBase() { fun testTrailingComma() { testTrailingComma("{\"id\":0,}") testTrailingComma("{\"id\":0 ,}") - testTrailingComma("{\"id\":0 , ,}") + testTrailingComma("{\"id\":0 , ,}", message = "Multiple consecutive commas are not allowed in JSON") } - private fun testTrailingComma(content: String) { - assertFailsWithSerialMessage("JsonDecodingException", "Trailing comma before the end of JSON object") { Json.parseToJsonElement(content) } + private fun testTrailingComma(content: String, message: String = "Trailing comma before the end of JSON object") { + assertFailsWithSerialMessage("JsonDecodingException", message) { Json.parseToJsonElement(content) } } @Test diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt index e337c5a669..a3a894dee6 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/MissingCommaTest.kt @@ -26,7 +26,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaBetweenFieldsAfterPrimitive() { val message = - "Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i" + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -37,7 +37,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaBetweenFieldsAfterObject() { val message = - "Unexpected JSON token at offset 19: Expected comma after the key-value pair at path: \$.c" + "Unexpected JSON token at offset 19: Expected end of the object or comma at path: \$" val json = """{"c":{"i":"string"}"i":42}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -48,7 +48,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun allowTrailingCommaDoesNotApplyToCommaBetweenFields() { val message = - "Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i" + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -59,7 +59,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun lenientSerializeDoesNotAllowToSkipCommaBetweenFields() { val message = - "Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i" + "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", message) { @@ -69,7 +69,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaInDynamicMap() { - val m = "Unexpected JSON token at offset 9: Expected end of the object or comma at path: \$" + val m = "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$" val json = """{"i":42 "c":{"i":"string"}}""" assertFailsWithSerialMessage("JsonDecodingException", m) { default.parseToJsonElement(json) @@ -78,7 +78,7 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaInArray() { - val m = "Unexpected JSON token at offset 3: Expected end of the array or comma at path: \$[0]" + val m = "Unexpected JSON token at offset 3: Expected end of the array or comma at path: \$" val json = """[1 2 3 4]""" assertFailsWithSerialMessage("JsonDecodingException", m) { @@ -88,11 +88,21 @@ class MissingCommaTest : JsonTestBase() { @Test fun missingCommaInStringMap() { - val m = "Unexpected JSON token at offset 9: Expected comma after the key-value pair at path: \$['a']" + val m = "Unexpected JSON token at offset 9: Expected end of the object or comma at path: \$" val json = """{"a":"1" "b":"2"}""" assertFailsWithSerialMessage("JsonDecodingException", m) { default.decodeFromString>(json) } } + + @Test + fun missingCommaInUnknownKeys() { + val m = "Unexpected JSON token at offset 17: Expected end of the object or comma at path: \$" + val json = """{"i":"1","b":"2" "c":"2"}""" + + assertFailsWithSerialMessage("JsonDecodingException", m) { + lenient.decodeFromString(json) + } + } } \ No newline at end of file diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt index 0916de5795..d729349d2e 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/TrailingCommaTest.kt @@ -125,4 +125,45 @@ class TrailingCommaTest : JsonTestBase() { "wl":{"l":[1, 2, 3,],},}""" assertEquals(Mixed(multipleFields, withMap, withList), tj.decodeFromString(input, mode)) } + + @Test + fun testCommaReportedPosition() = parametrizedTest { mode -> + val sd = """{"a":"1", }""" + + checkSerializationException({ + default.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 8: Trailing comma before the end of JSON object""" + ) + }) + } + + @Test + fun testMultipleTrailingCommasAreNotAllowedEvenWhenTrailingIsEnabled() = parametrizedTest { mode -> + val sd = """{"a":"1",,}""" + checkSerializationException({ + tj.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON""" + ) + }) + } + + @Test + fun testMultipleTrailingCommasError() = parametrizedTest { mode -> + val sd = """{"a":"1",,,}""" + + checkSerializationException({ + default.decodeFromString>(sd, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON""" + ) + }) + } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt index c885c808ff..0f1c229153 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt @@ -46,13 +46,6 @@ internal fun AbstractJsonLexer.throwInvalidFloatingPointDecoded(result: Number): hint = specialFlowingValuesHint) } -internal fun AbstractJsonLexer.invalidTrailingComma(entity: String = "object"): Nothing { - fail("Trailing comma before the end of JSON $entity", - position = currentPosition - 1, - hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." - ) -} - @OptIn(ExperimentalSerializationApi::class) internal fun InvalidKeyKindException(keyDescriptor: SerialDescriptor) = JsonEncodingException( "Value of type '${keyDescriptor.serialName}' can't be used in JSON as a key in the map. " + diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt index 9cb9bb3c56..bcad4cdbb3 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt @@ -4,7 +4,7 @@ package kotlinx.serialization.json.internal -import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.* import kotlinx.serialization.json.* @OptIn(ExperimentalSerializationApi::class) @@ -24,53 +24,52 @@ internal class JsonTreeReader( readObjectImpl { callRecursive(Unit) } private inline fun readObjectImpl(reader: () -> JsonElement): JsonObject { - var lastToken = lexer.consumeNextToken(TC_BEGIN_OBJ) + lexer.consumeNextToken(TC_BEGIN_OBJ) + lexer.path.pushDescriptor(JsonObjectSerializer.descriptor) + if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = linkedMapOf() while (lexer.canConsumeValue()) { + lexer.path.resetCurrentMapKey() // Read key and value val key = if (isLenient) lexer.consumeStringLenient() else lexer.consumeString() lexer.consumeNextToken(TC_COLON) + + lexer.path.updateCurrentMapKey(key) val element = reader() result[key] = element // Verify the next token - lastToken = lexer.consumeNextToken() - when (lastToken) { - TC_COMMA -> Unit // no-op, can continue with `canConsumeValue` that verifies the token after comma - TC_END_OBJ -> break // `canConsumeValue` can return incorrect result, since it checks token _after_ TC_END_OBJ - else -> lexer.fail("Expected end of the object or comma") - } + lexer.consumeCommaOrPeekEnd( + allowTrailing = trailingCommaAllowed, + expectedEnd = '}', + ) } - // Check for the correct ending - if (lastToken == TC_BEGIN_OBJ) { // Case of empty object - lexer.consumeNextToken(TC_END_OBJ) - } else if (lastToken == TC_COMMA) { // Trailing comma - if (!trailingCommaAllowed) lexer.invalidTrailingComma() - lexer.consumeNextToken(TC_END_OBJ) - } // else unexpected token? + + lexer.consumeNextToken(TC_END_OBJ) + lexer.path.popDescriptor() + return JsonObject(result) } private fun readArray(): JsonElement { - var lastToken = lexer.consumeNextToken() + lexer.consumeNextToken(TC_BEGIN_LIST) + lexer.path.pushDescriptor(JsonArraySerializer.descriptor) // Prohibit leading comma if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = arrayListOf() while (lexer.canConsumeValue()) { + lexer.path.updateDescriptorIndex(result.size) val element = read() result.add(element) - lastToken = lexer.consumeNextToken() - if (lastToken != TC_COMMA) { - lexer.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" } - } - } - // Check for the correct ending - if (lastToken == TC_BEGIN_LIST) { // Case of empty object - lexer.consumeNextToken(TC_END_LIST) - } else if (lastToken == TC_COMMA) { // Trailing comma - if (!trailingCommaAllowed) lexer.invalidTrailingComma("array") - lexer.consumeNextToken(TC_END_LIST) + lexer.consumeCommaOrPeekEnd( + allowTrailing = trailingCommaAllowed, + expectedEnd = ']', + entity = "array" + ) } + lexer.consumeNextToken(TC_END_LIST) + lexer.path.popDescriptor() + return JsonArray(result) } @@ -103,6 +102,7 @@ internal class JsonTreeReader( --stackDepth result } + TC_BEGIN_LIST -> readArray() else -> lexer.fail("Cannot read Json element because of unexpected ${tokenDescription(token)}") } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index 2e68da734e..807e2c3970 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -123,7 +123,6 @@ internal open class StreamingJsonDecoder( if (descriptor.elementsCount == 0 && descriptor.ignoreUnknownKeys(json)) { skipLeftoverElements(descriptor) } - if (lexer.tryConsumeComma() && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("") // First consume the object so we know it's correct lexer.consumeNextToken(mode.end) // Then cleanup the path @@ -185,24 +184,21 @@ internal open class StreamingJsonDecoder( } private fun decodeMapIndex(): Int { - var hasComma = false val decodingKey = currentIndex % 2 != 0 if (decodingKey) { if (currentIndex != -1) { - hasComma = lexer.tryConsumeComma() + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end + ) } } else { lexer.consumeNextToken(COLON) } return if (lexer.canConsumeValue()) { - if (decodingKey) { - if (currentIndex == -1) lexer.require(!hasComma) { "Unexpected leading comma" } - else lexer.require(hasComma) { "Expected comma after the key-value pair" } - } ++currentIndex } else { - if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma() CompositeDecoder.DECODE_DONE } } @@ -220,19 +216,10 @@ internal open class StreamingJsonDecoder( private fun decodeObjectIndex(descriptor: SerialDescriptor): Int { while (true) { if (currentIndex != -1) { - val next = lexer.peekNextToken() - if (next == TC_END_OBJ) { - currentIndex = CompositeDecoder.DECODE_DONE - return elementMarker?.nextUnmarkedIndex() ?: CompositeDecoder.DECODE_DONE - } - - lexer.require(next == TC_COMMA) { "Expected comma after the key-value pair" } - val commaPosition = lexer.currentPosition - lexer.consumeNextToken() - lexer.require( - configuration.allowTrailingComma || lexer.peekNextToken() != TC_END_OBJ, - position = commaPosition - ) { "Trailing comma before the end of JSON object" } + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end + ) } if (!lexer.canConsumeValue()) break @@ -270,13 +257,17 @@ internal open class StreamingJsonDecoder( } private fun decodeListIndex(): Int { - // Prohibit leading comma - val hasComma = lexer.tryConsumeComma() + if (currentIndex != -1) { + lexer.consumeCommaOrPeekEnd( + allowTrailing = json.configuration.allowTrailingComma, + expectedEnd = mode.end, + entity = "array" + ) + } + return if (lexer.canConsumeValue()) { - if (currentIndex != -1 && !hasComma) lexer.fail("Expected end of the array or comma") ++currentIndex } else { - if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("array") CompositeDecoder.DECODE_DONE } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt index 5f570a95ec..a26df22322 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt @@ -168,17 +168,6 @@ internal abstract class AbstractJsonLexer { abstract fun consumeNextToken(): Byte - fun tryConsumeComma(): Boolean { - val current = skipWhitespaces() - val source = source - if (current >= source.length || current == -1) return false - if (source[current] == ',') { - ++currentPosition - return true - } - return false - } - protected fun isValidValueStart(c: Char): Boolean { return when (c) { '}', ']', ':', ',' -> false @@ -759,4 +748,37 @@ internal abstract class AbstractJsonLexer { currentPosition = snapshot } } + + fun consumeCommaOrPeekEnd( + allowTrailing: Boolean, + expectedEnd: Char, + entity: String = "object", + ) { + val nextToken = peekNextToken() + if (nextToken == charToTokenClass(expectedEnd)) return + if (nextToken != TC_COMMA) { + path.popDescriptor() + fail("Expected end of the $entity or comma") + } + val commaPosition = currentPosition++ // consumes peeked comma + val peeked = peekNextToken() + + if (peeked == charToTokenClass(expectedEnd) && !allowTrailing) { + path.popDescriptor() + + fail( + "Trailing comma before the end of JSON $entity", + position = commaPosition, + hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." + ) + } + if (peeked == TC_COMMA) { + path.popDescriptor() + fail( + "Multiple consecutive commas are not allowed in JSON", + position = currentPosition, + hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them." + ) + } + } }