Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC: extreme minifications #102

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/constructs/anchors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,31 @@ import type { EncodedRegex } from '../types';
* Start of string anchor. Matches the start of of string. In `multiline` mode, also matches immediately following a newline.
*/
export const startOfString: EncodedRegex = {
precedence: 'atom',
precedence: 0,
pattern: '^',
};

/**
* End of string anchor. Matches the end of a string. In `multiline` mode, also matches immediately preceding a newline.
*/
export const endOfString: EncodedRegex = {
precedence: 'atom',
precedence: 0,
pattern: '$',
};

/**
* Word boundary anchor. Matches the position where one side is a word character (alphanumeric or underscore) and the other side is a non-word character (anything else).
*/
export const wordBoundary: EncodedRegex = {
precedence: 'atom',
precedence: 0,
pattern: '\\b',
};

/**
* Non-word boundary anchor. Matches the position where both sides are word characters.
*/
export const nonWordBoundary: EncodedRegex = {
precedence: 'atom',
precedence: 0,
pattern: '\\B',
};

Expand Down
6 changes: 3 additions & 3 deletions src/constructs/capture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Enco
const name = options?.name;
if (name) {
return {
precedence: 'atom',
precedence: 0,
pattern: `(?<${name}>${encode(sequence).pattern})`,
};
}

return {
precedence: 'atom',
precedence: 0,
pattern: `(${encode(sequence).pattern})`,
};
}
Expand All @@ -43,7 +43,7 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Enco
*/
export function ref(name: string): Reference {
return {
precedence: 'atom',
precedence: 0,
pattern: `\\k<${name}>`,
name,
};
Expand Down
2 changes: 1 addition & 1 deletion src/constructs/char-class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ function encodeCharClass(
isNegated?: boolean,
): EncodedRegex {
return {
precedence: 'atom',
precedence: 0,
pattern: `[${isNegated ? '^' : ''}${this.elements.join('')}]`,
};
}
14 changes: 7 additions & 7 deletions src/constructs/char-escape.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ import type { CharacterEscape, EncodedRegex } from '../types';
* Specifically this one is NOT a character escape.
*/
export const any: EncodedRegex = {
precedence: 'atom',
precedence: 0,
pattern: '.',
};

/**
* Matches any digit (0-9).
*/
export const digit: CharacterEscape = {
precedence: 'atom',
precedence: 0,
pattern: '\\d',
elements: ['\\d'],
};
Expand All @@ -22,7 +22,7 @@ export const digit: CharacterEscape = {
* Matches any non-digit (0-9) character.
*/
export const nonDigit: CharacterEscape = {
precedence: 'atom',
precedence: 0,
pattern: '\\D',
elements: ['\\D'],
};
Expand All @@ -31,7 +31,7 @@ export const nonDigit: CharacterEscape = {
* Matches any word character (alphanumeric or underscore).
*/
export const word: CharacterEscape = {
precedence: 'atom',
precedence: 0,
pattern: '\\w',
elements: ['\\w'],
};
Expand All @@ -40,7 +40,7 @@ export const word: CharacterEscape = {
* Matches any non-word (alphanumeric or underscore) character.
*/
export const nonWord: CharacterEscape = {
precedence: 'atom',
precedence: 0,
pattern: '\\W',
elements: ['\\W'],
};
Expand All @@ -49,7 +49,7 @@ export const nonWord: CharacterEscape = {
* Matches any whitespace character (space, tab, newline, etc.).
*/
export const whitespace: CharacterEscape = {
precedence: 'atom',
precedence: 0,
pattern: '\\s',
elements: ['\\s'],
};
Expand All @@ -58,7 +58,7 @@ export const whitespace: CharacterEscape = {
* Matches any non-whitespace (space, tab, newline, etc.) character.
*/
export const nonWhitespace: CharacterEscape = {
precedence: 'atom',
precedence: 0,
pattern: '\\S',
elements: ['\\S'],
};
Expand Down
2 changes: 1 addition & 1 deletion src/constructs/choice-of.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export function choiceOf(...alternatives: RegexSequence[]): EncodedRegex {
}

return {
precedence: 'disjunction',
precedence: 2,
pattern: encodedAlternatives.map((n) => n.pattern).join('|'),
};
}
2 changes: 1 addition & 1 deletion src/constructs/lookahead.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types';
*/
export function lookahead(sequence: RegexSequence): EncodedRegex {
return {
precedence: 'atom',
precedence: 0,
pattern: `(?=${encode(sequence).pattern})`,
};
}
2 changes: 1 addition & 1 deletion src/constructs/lookbehind.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types';
*/
export function lookbehind(sequence: RegexSequence): EncodedRegex {
return {
precedence: 'atom',
precedence: 0,
pattern: `(?<=${encode(sequence).pattern})`,
};
}
2 changes: 1 addition & 1 deletion src/constructs/negative-lookahead.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types';
*/
export function negativeLookahead(sequence: RegexSequence): EncodedRegex {
return {
precedence: 'atom',
precedence: 0,
pattern: `(?!${encode(sequence).pattern})`,
};
}
2 changes: 1 addition & 1 deletion src/constructs/negative-lookbehind.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import type { EncodedRegex, RegexSequence } from '../types';
*/
export function negativeLookbehind(sequence: RegexSequence): EncodedRegex {
return {
precedence: 'atom',
precedence: 0,
pattern: `(?<!${encode(sequence).pattern})`,
};
}
6 changes: 3 additions & 3 deletions src/constructs/quantifiers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export interface QuantifierOptions {
export function zeroOrMore(sequence: RegexSequence, options?: QuantifierOptions): EncodedRegex {
const elements = ensureElements(sequence);
return {
precedence: 'sequence',
precedence: 1,
pattern: `${encodeAtomic(elements)}*${options?.greedy === false ? '?' : ''}`,
};
}
Expand All @@ -29,7 +29,7 @@ export function zeroOrMore(sequence: RegexSequence, options?: QuantifierOptions)
export function oneOrMore(sequence: RegexSequence, options?: QuantifierOptions): EncodedRegex {
const elements = ensureElements(sequence);
return {
precedence: 'sequence',
precedence: 1,
pattern: `${encodeAtomic(elements)}+${options?.greedy === false ? '?' : ''}`,
};
}
Expand All @@ -43,7 +43,7 @@ export function oneOrMore(sequence: RegexSequence, options?: QuantifierOptions):
export function optional(sequence: RegexSequence, options?: QuantifierOptions): EncodedRegex {
const elements = ensureElements(sequence);
return {
precedence: 'sequence',
precedence: 1,
pattern: `${encodeAtomic(elements)}?${options?.greedy === false ? '?' : ''}`,
};
}
4 changes: 2 additions & 2 deletions src/constructs/repeat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ export function repeat(sequence: RegexSequence, options: RepeatOptions): Encoded

if (typeof options === 'number') {
return {
precedence: 'sequence',
precedence: 1,
pattern: `${encodeAtomic(elements)}{${options}}`,
};
}

return {
precedence: 'sequence',
precedence: 1,
pattern: `${encodeAtomic(elements)}{${options.min},${options?.max ?? ''}}${
options.greedy === false ? '?' : ''
}`,
Expand Down
4 changes: 2 additions & 2 deletions src/constructs/unicode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export function unicodeChar(codePoint: number): CharacterEscape {
: `\\u{${codePoint.toString(16)}}`; // 1-6 digit hex (requires unicode-aware mode)

return {
precedence: 'atom',
precedence: 0,
pattern: escape,
elements: [escape],
};
Expand All @@ -50,7 +50,7 @@ export function unicodeProperty(property: string, value?: string): CharacterEsca
const escape = `\\p{${property}${value ? `=${value}` : ''}}`;

return {
precedence: 'atom',
precedence: 0,
pattern: escape,
elements: [escape],
};
Expand Down
12 changes: 5 additions & 7 deletions src/encoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,14 @@ export function encode(sequence: RegexSequence): EncodedRegex {
}

return {
precedence: 'sequence',
pattern: encoded
.map((n) => (n.precedence === 'disjunction' ? encodeAtomic(n) : n.pattern))
.join(''),
precedence: 1,
pattern: encoded.map((n) => (n.precedence === 2 ? encodeAtomic(n) : n.pattern)).join(''),
};
}

export function encodeAtomic(sequence: RegexSequence): string {
const encoded = encode(sequence);
return encoded.precedence === 'atom' ? encoded.pattern : `(?:${encoded.pattern})`;
return encoded.precedence === 0 ? encoded.pattern : `(?:${encoded.pattern})`;
}

function encodeElement(element: RegexElement): EncodedRegex {
Expand Down Expand Up @@ -51,7 +49,7 @@ function encodeText(text: string): EncodedRegex {

return {
// Optimize for single character case
precedence: text.length === 1 ? 'atom' : 'sequence',
precedence: text.length === 1 ? 0 : 1,
pattern: escapeText(text),
};
}
Expand All @@ -61,7 +59,7 @@ function encodeRegExp(regexp: RegExp): EncodedRegex {

return {
// Encode at safe precedence
precedence: isAtomicPattern(pattern) ? 'atom' : 'disjunction',
precedence: isAtomicPattern(pattern) ? 0 : 2,
pattern,
};
}
Expand Down
2 changes: 1 addition & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export interface EncodedRegex {
/**
* Precedence of given regex pattern.
*/
export type EncodePrecedence = 'atom' | 'sequence' | 'disjunction';
export type EncodePrecedence = 0 | 1 | 2;

/**
* Regex patter that can be encoded by calling the `encode` method.
Expand Down
Loading