From f27faae58672cff99bcdb1e8ae33e5625826ac77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Tue, 1 Oct 2024 23:59:26 +0200 Subject: [PATCH] Adds lookbehind support --- packages/chevrotain/src/scan/reg_exp.ts | 10 +- .../regexp-to-ast/src/base-regexp-visitor.ts | 10 ++ packages/regexp-to-ast/test/parser.spec.ts | 91 +++++++++++++++++++ packages/regexp-to-ast/test/visitor.spec.ts | 25 +++++ packages/regexp-to-ast/types.d.ts | 6 +- 5 files changed, 139 insertions(+), 3 deletions(-) diff --git a/packages/chevrotain/src/scan/reg_exp.ts b/packages/chevrotain/src/scan/reg_exp.ts index 733febf89..c37c74cbe 100644 --- a/packages/chevrotain/src/scan/reg_exp.ts +++ b/packages/chevrotain/src/scan/reg_exp.ts @@ -263,7 +263,7 @@ class CharCodeFinder extends BaseRegExpVisitor { return; } - // switch lookaheads as they do not actually consume any characters thus + // switch lookaheads / lookbehinds as they do not actually consume any characters thus // finding a charCode at lookahead context does not mean that regexp can actually contain it in a match. switch (node.type) { case "Lookahead": @@ -272,7 +272,13 @@ class CharCodeFinder extends BaseRegExpVisitor { case "NegativeLookahead": this.visitNegativeLookahead(node); return; - } + case "Lookbehind": + this.visitLookbehind(node); + return; + case "NegativeLookbehind": + this.visitNegativeLookbehind(node); + return; + } super.visitChildren(node); } diff --git a/packages/regexp-to-ast/src/base-regexp-visitor.ts b/packages/regexp-to-ast/src/base-regexp-visitor.ts index 2da311893..284adbbe8 100644 --- a/packages/regexp-to-ast/src/base-regexp-visitor.ts +++ b/packages/regexp-to-ast/src/base-regexp-visitor.ts @@ -62,6 +62,12 @@ export class BaseRegExpVisitor { case "NegativeLookahead": this.visitNegativeLookahead(node); break; + case "Lookbehind": + this.visitLookbehind(node); + break; + case "NegativeLookbehind": + this.visitNegativeLookbehind(node); + break; case "Character": this.visitCharacter(node); break; @@ -103,6 +109,10 @@ export class BaseRegExpVisitor { public visitNegativeLookahead(node: Assertion): void {} + public visitLookbehind(node: Assertion): void {} + + public visitNegativeLookbehind(node: Assertion): void {} + // atoms public visitCharacter(node: Character): void {} diff --git a/packages/regexp-to-ast/test/parser.spec.ts b/packages/regexp-to-ast/test/parser.spec.ts index f3fdf7744..670ed7f2b 100644 --- a/packages/regexp-to-ast/test/parser.spec.ts +++ b/packages/regexp-to-ast/test/parser.spec.ts @@ -469,6 +469,97 @@ describe("The RegExp to Ast parser", () => { }); }); + it("lookbehind assertion", () => { + const ast = parser.pattern("/a(?<=b)/"); + expect(ast.value).to.deep.equal({ + type: "Disjunction", + loc: { begin: 1, end: 7 }, + value: [ + { + type: "Alternative", + loc: { begin: 1, end: 7 }, + value: [ + { + type: "Character", + loc: { begin: 1, end: 2 }, + value: 97, + }, + { + type: "Lookbehind", + loc: { begin: 2, end: 7 }, + value: { + type: "Disjunction", + loc: { begin: 5, end: 6 }, + value: [ + { + type: "Alternative", + loc: { begin: 5, end: 6 }, + value: [ + { + type: "Character", + loc: { + begin: 5, + end: 6, + }, + value: 98, + }, + ], + }, + ], + }, + }, + ], + }, + ], + }); + }); + + it("lookbehind assertion", () => { + const ast = parser.pattern("/a(? { it("zero or one", () => { const ast = parser.pattern("/a?/"); diff --git a/packages/regexp-to-ast/test/visitor.spec.ts b/packages/regexp-to-ast/test/visitor.spec.ts index 2cbca3c2d..9f3f4d627 100644 --- a/packages/regexp-to-ast/test/visitor.spec.ts +++ b/packages/regexp-to-ast/test/visitor.spec.ts @@ -138,6 +138,31 @@ describe("The regexp AST visitor", () => { new NegativeLookaheadVisitor().visit(ast); }); + + it("Can visit Lookbehind", () => { + const ast = parser.pattern("/a(?<=a|b)/"); + class LookbehindVisitor extends BaseRegExpVisitor { + visitLookbehind(node: Assertion) { + super.visitLookbehind(node); + expect(node.value?.value).to.have.lengthOf(2); + } + } + + new LookbehindVisitor().visit(ast); + }); + + it("Can visit NegativeLookbehind", () => { + const ast = parser.pattern("/a(? { const ast = parser.pattern("/a/"); class CharacterVisitor extends BaseRegExpVisitor { diff --git a/packages/regexp-to-ast/types.d.ts b/packages/regexp-to-ast/types.d.ts index 743245056..af77f8724 100644 --- a/packages/regexp-to-ast/types.d.ts +++ b/packages/regexp-to-ast/types.d.ts @@ -55,7 +55,9 @@ export interface Assertion extends IRegExpAST { | "WordBoundary" | "NonWordBoundary" | "Lookahead" - | "NegativeLookahead"; + | "NegativeLookahead" + | "Lookbehind" + | "NegativeLookbehind"; value?: Disjunction; } @@ -126,6 +128,8 @@ export class BaseRegExpVisitor { visitNonWordBoundary(node: Assertion): void; visitLookahead(node: Assertion): void; visitNegativeLookahead(node: Assertion): void; + visitLookbehind(node: Assertion): void; + visitNegativeLookbehind(node: Assertion): void; visitCharacter(node: Character): void; visitSet(node: Set): void; visitGroup(Node: Group): void;