Skip to content

Commit

Permalink
feat: move comma check to lexer and improve trailing comma reports
Browse files Browse the repository at this point in the history
  • Loading branch information
alisa101rs committed Dec 24, 2024
1 parent 591bc8f commit 76897d9
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package kotlinx.serialization.json

import kotlinx.serialization.*
import kotlinx.serialization.builtins.*
import kotlinx.serialization.json.internal.*
import kotlinx.serialization.test.*
import kotlin.test.*

Expand Down Expand Up @@ -79,11 +78,11 @@ class JsonParserTest : JsonTestBase() {
fun testTrailingComma() {
testTrailingComma("{\"id\":0,}")
testTrailingComma("{\"id\":0 ,}")
testTrailingComma("{\"id\":0 , ,}")
testTrailingComma("{\"id\":0 , ,}", message = "Multiple consecutive commas are not allowed in JSON")
}

private fun testTrailingComma(content: String) {
assertFailsWithSerialMessage("JsonDecodingException", "Trailing comma before the end of JSON object") { Json.parseToJsonElement(content) }
private fun testTrailingComma(content: String, message: String = "Trailing comma before the end of JSON object") {
assertFailsWithSerialMessage("JsonDecodingException", message) { Json.parseToJsonElement(content) }
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class MissingCommaTest : JsonTestBase() {
@Test
fun missingCommaBetweenFieldsAfterPrimitive() {
val message =
"Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i"
"Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$"
val json = """{"i":42 "c":{"i":"string"}}"""

assertFailsWithSerialMessage("JsonDecodingException", message) {
Expand All @@ -37,7 +37,7 @@ class MissingCommaTest : JsonTestBase() {
@Test
fun missingCommaBetweenFieldsAfterObject() {
val message =
"Unexpected JSON token at offset 19: Expected comma after the key-value pair at path: \$.c"
"Unexpected JSON token at offset 19: Expected end of the object or comma at path: \$"
val json = """{"c":{"i":"string"}"i":42}"""

assertFailsWithSerialMessage("JsonDecodingException", message) {
Expand All @@ -48,7 +48,7 @@ class MissingCommaTest : JsonTestBase() {
@Test
fun allowTrailingCommaDoesNotApplyToCommaBetweenFields() {
val message =
"Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i"
"Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$"
val json = """{"i":42 "c":{"i":"string"}}"""

assertFailsWithSerialMessage("JsonDecodingException", message) {
Expand All @@ -59,7 +59,7 @@ class MissingCommaTest : JsonTestBase() {
@Test
fun lenientSerializeDoesNotAllowToSkipCommaBetweenFields() {
val message =
"Unexpected JSON token at offset 8: Expected comma after the key-value pair at path: \$.i"
"Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$"
val json = """{"i":42 "c":{"i":"string"}}"""

assertFailsWithSerialMessage("JsonDecodingException", message) {
Expand All @@ -69,7 +69,7 @@ class MissingCommaTest : JsonTestBase() {

@Test
fun missingCommaInDynamicMap() {
val m = "Unexpected JSON token at offset 9: Expected end of the object or comma at path: \$"
val m = "Unexpected JSON token at offset 8: Expected end of the object or comma at path: \$"
val json = """{"i":42 "c":{"i":"string"}}"""
assertFailsWithSerialMessage("JsonDecodingException", m) {
default.parseToJsonElement(json)
Expand All @@ -78,7 +78,7 @@ class MissingCommaTest : JsonTestBase() {

@Test
fun missingCommaInArray() {
val m = "Unexpected JSON token at offset 3: Expected end of the array or comma at path: \$[0]"
val m = "Unexpected JSON token at offset 3: Expected end of the array or comma at path: \$"
val json = """[1 2 3 4]"""

assertFailsWithSerialMessage("JsonDecodingException", m) {
Expand All @@ -88,11 +88,21 @@ class MissingCommaTest : JsonTestBase() {

@Test
fun missingCommaInStringMap() {
val m = "Unexpected JSON token at offset 9: Expected comma after the key-value pair at path: \$['a']"
val m = "Unexpected JSON token at offset 9: Expected end of the object or comma at path: \$"
val json = """{"a":"1" "b":"2"}"""

assertFailsWithSerialMessage("JsonDecodingException", m) {
default.decodeFromString<Map<String, String>>(json)
}
}

@Test
fun missingCommaInUnknownKeys() {
val m = "Unexpected JSON token at offset 17: Expected end of the object or comma at path: \$"
val json = """{"i":"1","b":"2" "c":"2"}"""

assertFailsWithSerialMessage("JsonDecodingException", m) {
lenient.decodeFromString<Child>(json)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,45 @@ class TrailingCommaTest : JsonTestBase() {
"wl":{"l":[1, 2, 3,],},}"""
assertEquals(Mixed(multipleFields, withMap, withList), tj.decodeFromString(input, mode))
}

@Test
fun testCommaReportedPosition() = parametrizedTest { mode ->
val sd = """{"a":"1", }"""

checkSerializationException({
default.decodeFromString<Map<String, String>>(sd, mode)
}, { message ->
assertContains(
message,
"""Unexpected JSON token at offset 8: Trailing comma before the end of JSON object"""
)
})
}

@Test
fun testMultipleTrailingCommasAreNotAllowedEvenWhenTrailingIsEnabled() = parametrizedTest { mode ->
val sd = """{"a":"1",,}"""
checkSerializationException({
tj.decodeFromString<Map<String, String>>(sd, mode)
}, { message ->
assertContains(
message,
"""Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON"""
)
})
}

@Test
fun testMultipleTrailingCommasError() = parametrizedTest { mode ->
val sd = """{"a":"1",,,}"""

checkSerializationException({
default.decodeFromString<Map<String, String>>(sd, mode)
}, { message ->
assertContains(
message,
"""Unexpected JSON token at offset 9: Multiple consecutive commas are not allowed in JSON"""
)
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ internal fun AbstractJsonLexer.throwInvalidFloatingPointDecoded(result: Number):
hint = specialFlowingValuesHint)
}

internal fun AbstractJsonLexer.invalidTrailingComma(entity: String = "object"): Nothing {
fail("Trailing comma before the end of JSON $entity",
position = currentPosition - 1,
hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them."
)
}

@OptIn(ExperimentalSerializationApi::class)
internal fun InvalidKeyKindException(keyDescriptor: SerialDescriptor) = JsonEncodingException(
"Value of type '${keyDescriptor.serialName}' can't be used in JSON as a key in the map. " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

package kotlinx.serialization.json.internal

import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.*
import kotlinx.serialization.json.*

@OptIn(ExperimentalSerializationApi::class)
Expand All @@ -24,53 +24,52 @@ internal class JsonTreeReader(
readObjectImpl { callRecursive(Unit) }

private inline fun readObjectImpl(reader: () -> JsonElement): JsonObject {
var lastToken = lexer.consumeNextToken(TC_BEGIN_OBJ)
lexer.consumeNextToken(TC_BEGIN_OBJ)
lexer.path.pushDescriptor(JsonObjectSerializer.descriptor)

if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma")
val result = linkedMapOf<String, JsonElement>()
while (lexer.canConsumeValue()) {
lexer.path.resetCurrentMapKey()
// Read key and value
val key = if (isLenient) lexer.consumeStringLenient() else lexer.consumeString()
lexer.consumeNextToken(TC_COLON)

lexer.path.updateCurrentMapKey(key)
val element = reader()
result[key] = element
// Verify the next token
lastToken = lexer.consumeNextToken()
when (lastToken) {
TC_COMMA -> Unit // no-op, can continue with `canConsumeValue` that verifies the token after comma
TC_END_OBJ -> break // `canConsumeValue` can return incorrect result, since it checks token _after_ TC_END_OBJ
else -> lexer.fail("Expected end of the object or comma")
}
lexer.consumeCommaOrPeekEnd(
allowTrailing = trailingCommaAllowed,
expectedEnd = '}',
)
}
// Check for the correct ending
if (lastToken == TC_BEGIN_OBJ) { // Case of empty object
lexer.consumeNextToken(TC_END_OBJ)
} else if (lastToken == TC_COMMA) { // Trailing comma
if (!trailingCommaAllowed) lexer.invalidTrailingComma()
lexer.consumeNextToken(TC_END_OBJ)
} // else unexpected token?

lexer.consumeNextToken(TC_END_OBJ)
lexer.path.popDescriptor()

return JsonObject(result)
}

private fun readArray(): JsonElement {
var lastToken = lexer.consumeNextToken()
lexer.consumeNextToken(TC_BEGIN_LIST)
lexer.path.pushDescriptor(JsonArraySerializer.descriptor)
// Prohibit leading comma
if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma")
val result = arrayListOf<JsonElement>()
while (lexer.canConsumeValue()) {
lexer.path.updateDescriptorIndex(result.size)
val element = read()
result.add(element)
lastToken = lexer.consumeNextToken()
if (lastToken != TC_COMMA) {
lexer.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" }
}
}
// Check for the correct ending
if (lastToken == TC_BEGIN_LIST) { // Case of empty object
lexer.consumeNextToken(TC_END_LIST)
} else if (lastToken == TC_COMMA) { // Trailing comma
if (!trailingCommaAllowed) lexer.invalidTrailingComma("array")
lexer.consumeNextToken(TC_END_LIST)
lexer.consumeCommaOrPeekEnd(
allowTrailing = trailingCommaAllowed,
expectedEnd = ']',
entity = "array"
)
}
lexer.consumeNextToken(TC_END_LIST)
lexer.path.popDescriptor()

return JsonArray(result)
}

Expand Down Expand Up @@ -103,6 +102,7 @@ internal class JsonTreeReader(
--stackDepth
result
}

TC_BEGIN_LIST -> readArray()
else -> lexer.fail("Cannot read Json element because of unexpected ${tokenDescription(token)}")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ internal open class StreamingJsonDecoder(
if (descriptor.elementsCount == 0 && descriptor.ignoreUnknownKeys(json)) {
skipLeftoverElements(descriptor)
}
if (lexer.tryConsumeComma() && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("")
// First consume the object so we know it's correct
lexer.consumeNextToken(mode.end)
// Then cleanup the path
Expand Down Expand Up @@ -185,24 +184,21 @@ internal open class StreamingJsonDecoder(
}

private fun decodeMapIndex(): Int {
var hasComma = false
val decodingKey = currentIndex % 2 != 0
if (decodingKey) {
if (currentIndex != -1) {
hasComma = lexer.tryConsumeComma()
lexer.consumeCommaOrPeekEnd(
allowTrailing = json.configuration.allowTrailingComma,
expectedEnd = mode.end
)
}
} else {
lexer.consumeNextToken(COLON)
}

return if (lexer.canConsumeValue()) {
if (decodingKey) {
if (currentIndex == -1) lexer.require(!hasComma) { "Unexpected leading comma" }
else lexer.require(hasComma) { "Expected comma after the key-value pair" }
}
++currentIndex
} else {
if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma()
CompositeDecoder.DECODE_DONE
}
}
Expand All @@ -220,19 +216,10 @@ internal open class StreamingJsonDecoder(
private fun decodeObjectIndex(descriptor: SerialDescriptor): Int {
while (true) {
if (currentIndex != -1) {
val next = lexer.peekNextToken()
if (next == TC_END_OBJ) {
currentIndex = CompositeDecoder.DECODE_DONE
return elementMarker?.nextUnmarkedIndex() ?: CompositeDecoder.DECODE_DONE
}

lexer.require(next == TC_COMMA) { "Expected comma after the key-value pair" }
val commaPosition = lexer.currentPosition
lexer.consumeNextToken()
lexer.require(
configuration.allowTrailingComma || lexer.peekNextToken() != TC_END_OBJ,
position = commaPosition
) { "Trailing comma before the end of JSON object" }
lexer.consumeCommaOrPeekEnd(
allowTrailing = json.configuration.allowTrailingComma,
expectedEnd = mode.end
)
}

if (!lexer.canConsumeValue()) break
Expand Down Expand Up @@ -270,13 +257,17 @@ internal open class StreamingJsonDecoder(
}

private fun decodeListIndex(): Int {
// Prohibit leading comma
val hasComma = lexer.tryConsumeComma()
if (currentIndex != -1) {
lexer.consumeCommaOrPeekEnd(
allowTrailing = json.configuration.allowTrailingComma,
expectedEnd = mode.end,
entity = "array"
)
}

return if (lexer.canConsumeValue()) {
if (currentIndex != -1 && !hasComma) lexer.fail("Expected end of the array or comma")
++currentIndex
} else {
if (hasComma && !json.configuration.allowTrailingComma) lexer.invalidTrailingComma("array")
CompositeDecoder.DECODE_DONE
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,17 +168,6 @@ internal abstract class AbstractJsonLexer {

abstract fun consumeNextToken(): Byte

fun tryConsumeComma(): Boolean {
val current = skipWhitespaces()
val source = source
if (current >= source.length || current == -1) return false
if (source[current] == ',') {
++currentPosition
return true
}
return false
}

protected fun isValidValueStart(c: Char): Boolean {
return when (c) {
'}', ']', ':', ',' -> false
Expand Down Expand Up @@ -759,4 +748,37 @@ internal abstract class AbstractJsonLexer {
currentPosition = snapshot
}
}

fun consumeCommaOrPeekEnd(
allowTrailing: Boolean,
expectedEnd: Char,
entity: String = "object",
) {
val nextToken = peekNextToken()
if (nextToken == charToTokenClass(expectedEnd)) return
if (nextToken != TC_COMMA) {
path.popDescriptor()
fail("Expected end of the $entity or comma")
}
val commaPosition = currentPosition++ // consumes peeked comma
val peeked = peekNextToken()

if (peeked == charToTokenClass(expectedEnd) && !allowTrailing) {
path.popDescriptor()

fail(
"Trailing comma before the end of JSON $entity",
position = commaPosition,
hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them."
)
}
if (peeked == TC_COMMA) {
path.popDescriptor()
fail(
"Multiple consecutive commas are not allowed in JSON",
position = currentPosition,
hint = "Trailing commas are non-complaint JSON and not allowed by default. Use 'allowTrailingComma = true' in 'Json {}' builder to support them."
)
}
}
}

0 comments on commit 76897d9

Please sign in to comment.