diff --git a/examples/lisp.js b/examples/lisp.js index 06e289a..a439b79 100644 --- a/examples/lisp.js +++ b/examples/lisp.js @@ -7,15 +7,12 @@ let P = require('../'); /////////////////////////////////////////////////////////////////////// -// A little helper to wrap a parser with optional whitespace. Helper functions -// that take a parser can be passed to the .thru(wrapper) method. -function spaced(parser) { - return P.optWhitespace - .then(parser) - .skip(P.optWhitespace); -} let Lisp = P.createLanguage({ + + // An expression is just any of the other values we make in the language. Note + // that because we're using `.createLanguage` here we can reference other + // parsers off of the argument to our function. `r` is short for `rules` here. Expression: function(r) { return P.alt( r.Symbol, @@ -24,8 +21,8 @@ let Lisp = P.createLanguage({ ); }, -// The basic parsers (usually the ones described via regexp) should have a -// description for error message purposes. + // The basic parsers (usually the ones described via regexp) should have a + // description for error message purposes. Symbol: function() { return P.regexp(/[a-zA-Z_-][a-zA-Z0-9_-]*/) .desc('symbol'); @@ -39,18 +36,19 @@ let Lisp = P.createLanguage({ .desc('number'); }, -// `.then` throws away the first value, and `.skip` throws away the second -// `.value, so we're left with just the `Expression.thru(spaced).many()` part as -// the `.yielded value from this parser. + // `.trim(P.optWhitespace)` removes whitespace from both sides, then `.many()` + // repeats the expression zero or more times. Finally, `.wrap(...)` removes + // the '(' and ')' from both sides of the list. List: function(r) { - return P.string('(') - .then(r.Expression.thru(spaced).many()) - .skip(P.string(')')); + return r.Expression + .trim(P.optWhitespace) + .many() + .wrap(P.string('('), P.string(')')); }, - // Let's remember to throw away whitesapce at the top level of the parser. + // A file in Lisp is generally just zero or more expressions. File: function(r) { - return r.Expression.thru(spaced).many(); + return r.Expression.trim(P.optWhitespace).many(); } }); diff --git a/examples/math.js b/examples/math.js index 7d4df75..c078b41 100644 --- a/examples/math.js +++ b/examples/math.js @@ -17,12 +17,7 @@ let P = require('..'); /////////////////////////////////////////////////////////////////////// -// Returns a new parser that ignores whitespace before and after the parser. -function spaced(parser) { - return P.optWhitespace - .then(parser) - .skip(P.optWhitespace); -} +let _ = P.optWhitespace; // Operators should allow whitespace around them, but not require it. This // helper combines multiple operators together with names. @@ -33,7 +28,7 @@ function spaced(parser) { // whitespace, and gives back the word "Add" or "Sub" instead of the character. function operators(ops) { let keys = Object.keys(ops).sort(); - let ps = keys.map(k => P.string(ops[k]).thru(spaced).result(k)); + let ps = keys.map(k => P.string(ops[k]).trim(_).result(k)); return P.alt.apply(null, ps); } @@ -166,7 +161,7 @@ let tableParser = // keep it in a table instead of nesting it all manually. // This is our version of a math expression. -let MyMath = spaced(tableParser); +let MyMath = tableParser.trim(_); /////////////////////////////////////////////////////////////////////// diff --git a/examples/python-ish.js b/examples/python-ish.js index b1e968e..a89615b 100644 --- a/examples/python-ish.js +++ b/examples/python-ish.js @@ -7,6 +7,29 @@ let P = require('..'); /////////////////////////////////////////////////////////////////////// +// LIMITATIONS: Python allows not only multiline blocks, but inline blocks too. +// +// if x == y: print("nice") +// +// vs. +// +// if x == y: +// print("nice") +// +// This parser only supports the multiline indented form. + +// NOTE: This is a hack and is not recommended. Maintaining state throughout +// Parsimmon parsers is not reliable since backtracking may occur, leaving your +// state inaccurate. See the relevant GitHub issue for discussion. +// +// https://github.com/jneen/parsimmon/issues/158 +// +function indentPeek() { + return indentStack[indentStack.length - 1]; +} + +let indentStack = [0]; + let Pythonish = P.createLanguage({ // If this were actually Python, "Block" wouldn't be a statement on its own, // but rather "If" and "While" would be statements that used "Block" inside. @@ -29,18 +52,29 @@ let Pythonish = P.createLanguage({ // indentation in front of it. Block: r => P.seq( - P.string('block:\n'), - P.regexp(/[ ]+/), + P.string('block:\n').then(P.regexp(/[ ]+/)), r.Statement ).chain(args => { // `.chain` is called after a parser succeeds. It returns the next parser // to use for parsing. This allows subsequent parsing to be dependent on // previous text. let [indent, statement] = args; + let indentSize = indent.length; + let currentSize = indentPeek(); + // Indentation must be deeper than the current block context. Otherwise + // you could indent *less* for a block and it would still work. This is + // not how any language I know of works. + if (indentSize <= currentSize) { + return P.fail('at least ' + currentSize + ' spaces'); + } + indentStack.push(indentSize); return P.string(indent) .then(r.Statement) .many() - .map(statements => [statement].concat(statements)); + .map(statements => { + indentStack.pop(); + return [statement].concat(statements); + }); }) .node('Block'), }); @@ -53,9 +87,9 @@ block: b() c() block: - d() - e() - f() + d() + e() + f() block: g() h()