Skip to content

Commit

Permalink
Update to parser
Browse files Browse the repository at this point in the history
  • Loading branch information
CosmonautKitten committed Jan 9, 2025
1 parent 5d51ca1 commit 79a6b15
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 14 deletions.
18 changes: 12 additions & 6 deletions parser/Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,17 @@ function ParserWithCallbacks(rules) {
var start = match.index;
var end = start + match[0].length;

tokens.push({
text: match[0].replace(/\x02/g, ''),
ruleName: key,
match: match,
});
if (typeof (rules[key].tokenize) === "function") {
for (let token of rules[key].tokenize(key, match)) {
tokens.push(token);
}
} else {
tokens.push({
text: match[0].replace(/\x02/g, ''),
ruleName: key,
match: match,
});
}

behind += input.slice(0, end);
behind = behind.slice(-20); // keep max 20 chars of lookbehind
Expand Down Expand Up @@ -116,4 +122,4 @@ if(typeof(module) != 'undefined') {
module.exports = {
ParserWithCallbacks: ParserWithCallbacks
};
}
}
57 changes: 49 additions & 8 deletions parser/rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function build_rules() {
return m[0].match(/^(mi|sina)\s/) && !startOfPartialSentence(m, behind)
}

function Err(rule, message, category, more_infos) {
function Err(rule, message, category, more_infos, custom_tokenizer) {
this.raw_rule = rule;

if(typeof(rule[0]) === "undefined") {
Expand All @@ -65,6 +65,7 @@ function build_rules() {
this.message = message;
this.category = category;
this.more_infos = more_infos;
this.tokenize = custom_tokenizer;
}

var rules = {
Expand All @@ -79,21 +80,61 @@ function build_rules() {
argumentInitial: new Err(
[
new RegExp(
PARTIAL_SENTENCE_SEPARATOR + '\\s*\\b[aeiou]\\w*'
PARTIAL_SENTENCE_SEPARATOR + '(\\s*)(\\b[aeiou]\\w*\\b)'
),
],
'A sentence must begin with a verb',
'error'
'A phrase must begin with a verb',
'error',
null,
(key, match) => {
return [
{
text: match[2].replace(/\x02/g, ''),
ruleName: 'partialSentenceSeparator',
match: match,
},
{
text: match[3].replace(/\x02/g, ''),
ruleName: 'punctuation',
match: match,
},
{
text: match[4].replace(/\x02/g, ''),
ruleName: key,
match: match,
},
];
}
),

verbNonInitial: new Err(
[
new RegExp(
PARTIAL_SENTENCE_SEPARATOR + '\\s*\\b\\w+\\b\\s+\\b(?![aeioun])\\w*\\b'
PARTIAL_SENTENCE_SEPARATOR + '((\\s*\\b\\w+\\b\\s+)+)' + '(\\b(?![aeioun])\\w*\\b)'
),
],
'The word after the first word following a partial sentence separator must begin with a vowel',
'error'
'A verb must appear at the beginning of a phrase',
'error',
null,
(key, match) => {
return [
{
text: match[2].replace(/\x02/g, ''),
ruleName: 'partialSentenceSeparator',
match: match,
},
{
text: match[3].replace(/\x02/g, ''),
ruleName: 'ignore',
match: match,
},
{
text: match[5].replace(/\x02/g, ''),
ruleName: key,
match: match,
},
];
}
),

startOfText: new Err(
Expand Down Expand Up @@ -155,4 +196,4 @@ if(typeof(module) != 'undefined') {
module.exports = {
build_rules: build_rules,
};
}
}

0 comments on commit 79a6b15

Please sign in to comment.