From 3009f51d203f12bdcb0b87f2b63a4658cf55ffba Mon Sep 17 00:00:00 2001 From: Martin Jambon Date: Wed, 16 Oct 2024 17:27:09 -0700 Subject: [PATCH 1/4] Add test for infix function application (bug if starts with 'e') --- test/corpus/expressions.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/corpus/expressions.txt b/test/corpus/expressions.txt index 49cd6fb..64de724 100644 --- a/test/corpus/expressions.txt +++ b/test/corpus/expressions.txt @@ -624,3 +624,22 @@ if (cond1) { (value_argument (string_literal (string_content)))))))))))) + +================================================================================ +Infix syntax for function application +================================================================================ + +1 x 2 +1 e 2 + +--- + +(source_file + (infix_expression + (integer_literal) + (simple_identifier) + (integer_literal)) + (infix_expression + (integer_literal) + (simple_identifier) + (integer_literal))) From 2a0bf8a5eb38b93630b7b2e6222c2e3bbaecfb4d Mon Sep 17 00:00:00 2001 From: Martin Jambon Date: Thu, 17 Oct 2024 17:09:30 -0700 Subject: [PATCH 2/4] Add more tests and a fix for infix operators starting with 'import' --- src/scanner.c | 18 +++++++--- test/corpus/expressions.txt | 71 +++++++++++++++++++++++++++++++++---- 2 files changed, 79 insertions(+), 10 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 4c9bd78..ad8e12a 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -236,12 +236,23 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) { return lexer->lookahead != '/'; } +// Test for any identifier character other than the first character. +// This is meant to match the regexp [\p{L}_\p{Nd}] +// as found in '_alpha_identifier' (see grammar.js). +static bool is_word_char(int32_t c) { + return (iswalnum(c) || c == '_'); +} + +// Scan for a fragment of a nonempty alphanumeric identifier or +// alphanumeric keyword (including '_'). static bool scan_for_word(TSLexer *lexer, const char* word, unsigned len) { skip(lexer); for (unsigned i = 0; i < len; ++i) { if (lexer->lookahead != word[i]) return false; skip(lexer); } + // check that the identifier stops here + if (is_word_char(lexer->lookahead)) return false; return true; } @@ -285,10 +296,8 @@ static bool scan_automatic_semicolon(TSLexer *lexer) { if (sameline) { switch (lexer->lookahead) { - // Don't insert a semicolon before an else - case 'e': - return !scan_for_word(lexer, "lse", 3); - + // Insert imaginary semicolon before an 'import' but in front of other + // words or keywords starting with 'i' case 'i': return scan_for_word(lexer, "mport", 5); @@ -297,6 +306,7 @@ static bool scan_automatic_semicolon(TSLexer *lexer) { lexer->mark_end(lexer); return true; + // Don't insert a semicolon in other cases default: return false; } diff --git a/test/corpus/expressions.txt b/test/corpus/expressions.txt index 64de724..4eeaf0c 100644 --- a/test/corpus/expressions.txt +++ b/test/corpus/expressions.txt @@ -625,21 +625,80 @@ if (cond1) { (string_literal (string_content)))))))))))) +================================================================================ +If-else without braces or semicolons or newlines +================================================================================ + +if (true) a else if (true) b else c + +--- + +(source_file + (if_expression + (boolean_literal) + (control_structure_body + (simple_identifier)) + (control_structure_body + (if_expression + (boolean_literal) + (control_structure_body + (simple_identifier)) + (control_structure_body + (simple_identifier)))))) + ================================================================================ Infix syntax for function application ================================================================================ -1 x 2 -1 e 2 +a mul b --- (source_file (infix_expression - (integer_literal) (simple_identifier) - (integer_literal)) + (simple_identifier) + (simple_identifier))) + +================================================================================ +Infix syntax, edge case 1 +================================================================================ + +a exxx b +a ixxx b + +--- + +(source_file (infix_expression - (integer_literal) (simple_identifier) - (integer_literal))) + (simple_identifier) + (simple_identifier)) + (infix_expression + (simple_identifier) + (simple_identifier) + (simple_identifier))) + +================================================================================ +Infix syntax, edge case 2 +================================================================================ + +a else_ b +a import_ b +a imports b + +--- + +(source_file + (infix_expression + (simple_identifier) + (simple_identifier) + (simple_identifier)) + (infix_expression + (simple_identifier) + (simple_identifier) + (simple_identifier)) + (infix_expression + (simple_identifier) + (simple_identifier) + (simple_identifier))) From 3a7b4fc318b8b690863e5e6b2b42e6a0a47c600b Mon Sep 17 00:00:00 2001 From: Martin Jambon Date: Thu, 17 Oct 2024 17:16:44 -0700 Subject: [PATCH 3/4] Rephrase comment --- src/scanner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanner.c b/src/scanner.c index ad8e12a..ca67c24 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -243,7 +243,7 @@ static bool is_word_char(int32_t c) { return (iswalnum(c) || c == '_'); } -// Scan for a fragment of a nonempty alphanumeric identifier or +// Scan for [the end of] a nonempty alphanumeric identifier or // alphanumeric keyword (including '_'). static bool scan_for_word(TSLexer *lexer, const char* word, unsigned len) { skip(lexer); From 3bd75c5a4f46c9bcb6a0a16b2d12a850deeee6cb Mon Sep 17 00:00:00 2001 From: Martin Jambon Date: Thu, 17 Oct 2024 19:54:56 -0700 Subject: [PATCH 4/4] Fix typo in comment --- src/scanner.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index ca67c24..65539a5 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -296,8 +296,8 @@ static bool scan_automatic_semicolon(TSLexer *lexer) { if (sameline) { switch (lexer->lookahead) { - // Insert imaginary semicolon before an 'import' but in front of other - // words or keywords starting with 'i' + // Insert imaginary semicolon before an 'import' but not in front + // of other words or keywords starting with 'i' case 'i': return scan_for_word(lexer, "mport", 5);