From e9553a0502fc72fbd047e1976714ffb6b37818f1 Mon Sep 17 00:00:00 2001 From: Tim Kuijsten Date: Fri, 4 Nov 2022 01:56:44 +0100 Subject: [PATCH] don't trip over unescaped UTF-8 keys Support UTF-8 in the spirit of jsmn being lean and mean. Don't do any validation or decoding, just make sure unescaped UTF-8 keys are supported. --- jsmn.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/jsmn.h b/jsmn.h index 8ac14c1..8172565 100644 --- a/jsmn.h +++ b/jsmn.h @@ -137,11 +137,13 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, const size_t num_tokens) { jsmntok_t *token; int start; + unsigned char b; start = parser->pos; for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { - switch (js[parser->pos]) { + b = js[parser->pos]; + switch (b) { #ifndef JSMN_STRICT /* In strict mode primitive must be followed by "," or "}" or "]" */ case ':': @@ -158,10 +160,20 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, /* to quiet a warning from gcc*/ break; } - if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + + if (b < 32) { parser->pos = start; return JSMN_ERROR_INVAL; } + + if (b >= 127 && + (b & (0xc0 | 0x20)) != 0xc0 && + (b & (0xe0 | 0x10)) != 0xe0 && + (b & (0xf0 | 0x08)) != 0xf0 && + (b & (0x80 | 0x40)) != 0x80) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } } #ifdef JSMN_STRICT /* In strict mode primitive must be followed by a comma/object/array */