Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing errors for infix expressions #157

Merged
merged 4 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,23 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) {
return lexer->lookahead != '/';
}

// Test for any identifier character other than the first character.
// This is meant to match the regexp [\p{L}_\p{Nd}]
// as found in '_alpha_identifier' (see grammar.js).
static bool is_word_char(int32_t c) {
return (iswalnum(c) || c == '_');
}

// Scan for [the end of] a nonempty alphanumeric identifier or
// alphanumeric keyword (including '_').
static bool scan_for_word(TSLexer *lexer, const char* word, unsigned len) {
skip(lexer);
for (unsigned i = 0; i < len; ++i) {
if (lexer->lookahead != word[i]) return false;
skip(lexer);
}
// check that the identifier stops here
if (is_word_char(lexer->lookahead)) return false;
return true;
}

Expand Down Expand Up @@ -285,10 +296,8 @@ static bool scan_automatic_semicolon(TSLexer *lexer) {

if (sameline) {
switch (lexer->lookahead) {
// Don't insert a semicolon before an else
case 'e':
return !scan_for_word(lexer, "lse", 3);

// Insert imaginary semicolon before an 'import' but not in front
// of other words or keywords starting with 'i'
case 'i':
return scan_for_word(lexer, "mport", 5);

Expand All @@ -297,6 +306,7 @@ static bool scan_automatic_semicolon(TSLexer *lexer) {
lexer->mark_end(lexer);
return true;

// Don't insert a semicolon in other cases
default:
return false;
}
Expand Down
78 changes: 78 additions & 0 deletions test/corpus/expressions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -624,3 +624,81 @@ if (cond1) {
(value_argument
(string_literal
(string_content))))))))))))

================================================================================
If-else without braces or semicolons or newlines
================================================================================

if (true) a else if (true) b else c

---

(source_file
(if_expression
(boolean_literal)
(control_structure_body
(simple_identifier))
(control_structure_body
(if_expression
(boolean_literal)
(control_structure_body
(simple_identifier))
(control_structure_body
(simple_identifier))))))

================================================================================
Infix syntax for function application
================================================================================

a mul b

---

(source_file
(infix_expression
(simple_identifier)
(simple_identifier)
(simple_identifier)))

================================================================================
Infix syntax, edge case 1
================================================================================

a exxx b
a ixxx b

---

(source_file
(infix_expression
(simple_identifier)
(simple_identifier)
(simple_identifier))
(infix_expression
(simple_identifier)
(simple_identifier)
(simple_identifier)))

================================================================================
Infix syntax, edge case 2
================================================================================

a else_ b
a import_ b
a imports b

---

(source_file
(infix_expression
(simple_identifier)
(simple_identifier)
(simple_identifier))
(infix_expression
(simple_identifier)
(simple_identifier)
(simple_identifier))
(infix_expression
(simple_identifier)
(simple_identifier)
(simple_identifier)))