From 4a16993bec1318b611027fd5f2432e6605da1f85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Jastrze=CC=A8bski?= Date: Sun, 22 Sep 2024 21:16:17 +0200 Subject: [PATCH] refactor: numeric precedence --- src/constructs/anchors.ts | 8 ++++---- src/constructs/capture.ts | 6 +++--- src/constructs/char-class.ts | 2 +- src/constructs/char-escape.ts | 14 +++++++------- src/constructs/choice-of.ts | 2 +- src/constructs/lookahead.ts | 2 +- src/constructs/lookbehind.ts | 2 +- src/constructs/negative-lookahead.ts | 2 +- src/constructs/negative-lookbehind.ts | 2 +- src/constructs/quantifiers.ts | 6 +++--- src/constructs/repeat.ts | 4 ++-- src/constructs/unicode.ts | 4 ++-- src/encoder.ts | 12 +++++------- src/types.ts | 2 +- 14 files changed, 33 insertions(+), 35 deletions(-) diff --git a/src/constructs/anchors.ts b/src/constructs/anchors.ts index df9f276..e32ca84 100644 --- a/src/constructs/anchors.ts +++ b/src/constructs/anchors.ts @@ -4,7 +4,7 @@ import type { EncodedRegex } from '../types'; * Start of string anchor. Matches the start of of string. In `multiline` mode, also matches immediately following a newline. */ export const startOfString: EncodedRegex = { - precedence: 'atom', + precedence: 0, pattern: '^', }; @@ -12,7 +12,7 @@ export const startOfString: EncodedRegex = { * End of string anchor. Matches the end of a string. In `multiline` mode, also matches immediately preceding a newline. */ export const endOfString: EncodedRegex = { - precedence: 'atom', + precedence: 0, pattern: '$', }; @@ -20,7 +20,7 @@ export const endOfString: EncodedRegex = { * Word boundary anchor. Matches the position where one side is a word character (alphanumeric or underscore) and the other side is a non-word character (anything else). */ export const wordBoundary: EncodedRegex = { - precedence: 'atom', + precedence: 0, pattern: '\\b', }; @@ -28,7 +28,7 @@ export const wordBoundary: EncodedRegex = { * Non-word boundary anchor. Matches the position where both sides are word characters. */ export const nonWordBoundary: EncodedRegex = { - precedence: 'atom', + precedence: 0, pattern: '\\B', }; diff --git a/src/constructs/capture.ts b/src/constructs/capture.ts index 3814866..0db37fd 100644 --- a/src/constructs/capture.ts +++ b/src/constructs/capture.ts @@ -21,13 +21,13 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Enco const name = options?.name; if (name) { return { - precedence: 'atom', + precedence: 0, pattern: `(?<${name}>${encode(sequence).pattern})`, }; } return { - precedence: 'atom', + precedence: 0, pattern: `(${encode(sequence).pattern})`, }; } @@ -43,7 +43,7 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Enco */ export function ref(name: string): Reference { return { - precedence: 'atom', + precedence: 0, pattern: `\\k<${name}>`, name, }; diff --git a/src/constructs/char-class.ts b/src/constructs/char-class.ts index a2b475d..c81bcb7 100644 --- a/src/constructs/char-class.ts +++ b/src/constructs/char-class.ts @@ -81,7 +81,7 @@ function encodeCharClass( isNegated?: boolean, ): EncodedRegex { return { - precedence: 'atom', + precedence: 0, pattern: `[${isNegated ? '^' : ''}${this.elements.join('')}]`, }; } diff --git a/src/constructs/char-escape.ts b/src/constructs/char-escape.ts index ee2854b..4533e7e 100644 --- a/src/constructs/char-escape.ts +++ b/src/constructs/char-escape.ts @@ -5,7 +5,7 @@ import type { CharacterEscape, EncodedRegex } from '../types'; * Specifically this one is NOT a character escape. */ export const any: EncodedRegex = { - precedence: 'atom', + precedence: 0, pattern: '.', }; @@ -13,7 +13,7 @@ export const any: EncodedRegex = { * Matches any digit (0-9). */ export const digit: CharacterEscape = { - precedence: 'atom', + precedence: 0, pattern: '\\d', elements: ['\\d'], }; @@ -22,7 +22,7 @@ export const digit: CharacterEscape = { * Matches any non-digit (0-9) character. */ export const nonDigit: CharacterEscape = { - precedence: 'atom', + precedence: 0, pattern: '\\D', elements: ['\\D'], }; @@ -31,7 +31,7 @@ export const nonDigit: CharacterEscape = { * Matches any word character (alphanumeric or underscore). */ export const word: CharacterEscape = { - precedence: 'atom', + precedence: 0, pattern: '\\w', elements: ['\\w'], }; @@ -40,7 +40,7 @@ export const word: CharacterEscape = { * Matches any non-word (alphanumeric or underscore) character. */ export const nonWord: CharacterEscape = { - precedence: 'atom', + precedence: 0, pattern: '\\W', elements: ['\\W'], }; @@ -49,7 +49,7 @@ export const nonWord: CharacterEscape = { * Matches any whitespace character (space, tab, newline, etc.). */ export const whitespace: CharacterEscape = { - precedence: 'atom', + precedence: 0, pattern: '\\s', elements: ['\\s'], }; @@ -58,7 +58,7 @@ export const whitespace: CharacterEscape = { * Matches any non-whitespace (space, tab, newline, etc.) character. */ export const nonWhitespace: CharacterEscape = { - precedence: 'atom', + precedence: 0, pattern: '\\S', elements: ['\\S'], }; diff --git a/src/constructs/choice-of.ts b/src/constructs/choice-of.ts index 0434174..170ae0d 100644 --- a/src/constructs/choice-of.ts +++ b/src/constructs/choice-of.ts @@ -18,7 +18,7 @@ export function choiceOf(...alternatives: RegexSequence[]): EncodedRegex { } return { - precedence: 'disjunction', + precedence: 2, pattern: encodedAlternatives.map((n) => n.pattern).join('|'), }; } diff --git a/src/constructs/lookahead.ts b/src/constructs/lookahead.ts index 6180033..d9fb582 100644 --- a/src/constructs/lookahead.ts +++ b/src/constructs/lookahead.ts @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types'; */ export function lookahead(sequence: RegexSequence): EncodedRegex { return { - precedence: 'atom', + precedence: 0, pattern: `(?=${encode(sequence).pattern})`, }; } diff --git a/src/constructs/lookbehind.ts b/src/constructs/lookbehind.ts index 9187bed..85f1bc2 100644 --- a/src/constructs/lookbehind.ts +++ b/src/constructs/lookbehind.ts @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types'; */ export function lookbehind(sequence: RegexSequence): EncodedRegex { return { - precedence: 'atom', + precedence: 0, pattern: `(?<=${encode(sequence).pattern})`, }; } diff --git a/src/constructs/negative-lookahead.ts b/src/constructs/negative-lookahead.ts index 5694ca6..54e054a 100644 --- a/src/constructs/negative-lookahead.ts +++ b/src/constructs/negative-lookahead.ts @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types'; */ export function negativeLookahead(sequence: RegexSequence): EncodedRegex { return { - precedence: 'atom', + precedence: 0, pattern: `(?!${encode(sequence).pattern})`, }; } diff --git a/src/constructs/negative-lookbehind.ts b/src/constructs/negative-lookbehind.ts index b0264f3..debc5f3 100644 --- a/src/constructs/negative-lookbehind.ts +++ b/src/constructs/negative-lookbehind.ts @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types'; */ export function negativeLookbehind(sequence: RegexSequence): EncodedRegex { return { - precedence: 'atom', + precedence: 0, pattern: `(? (n.precedence === 'disjunction' ? encodeAtomic(n) : n.pattern)) - .join(''), + precedence: 1, + pattern: encoded.map((n) => (n.precedence === 2 ? encodeAtomic(n) : n.pattern)).join(''), }; } export function encodeAtomic(sequence: RegexSequence): string { const encoded = encode(sequence); - return encoded.precedence === 'atom' ? encoded.pattern : `(?:${encoded.pattern})`; + return encoded.precedence === 0 ? encoded.pattern : `(?:${encoded.pattern})`; } function encodeElement(element: RegexElement): EncodedRegex { @@ -51,7 +49,7 @@ function encodeText(text: string): EncodedRegex { return { // Optimize for single character case - precedence: text.length === 1 ? 'atom' : 'sequence', + precedence: text.length === 1 ? 0 : 1, pattern: escapeText(text), }; } @@ -61,7 +59,7 @@ function encodeRegExp(regexp: RegExp): EncodedRegex { return { // Encode at safe precedence - precedence: isAtomicPattern(pattern) ? 'atom' : 'disjunction', + precedence: isAtomicPattern(pattern) ? 0 : 2, pattern, }; } diff --git a/src/types.ts b/src/types.ts index 4a4b056..61dd143 100644 --- a/src/types.ts +++ b/src/types.ts @@ -28,7 +28,7 @@ export interface EncodedRegex { /** * Precedence of given regex pattern. */ -export type EncodePrecedence = 'atom' | 'sequence' | 'disjunction'; +export type EncodePrecedence = 0 | 1 | 2; /** * Regex patter that can be encoded by calling the `encode` method.