From 160fb123347b697d89874d35c43599f198e78337 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Fri, 17 Sep 2010 18:33:49 +0200 Subject: [PATCH 01/24] git rid of usage of try/finally for doing stuff after return (it's a bit slower) --- lib/parse-js.js | 10 ++++----- lib/process.js | 58 +++++++++++++++++++++++-------------------------- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/lib/parse-js.js b/lib/parse-js.js index 6886612a..47bb5f40 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -1135,12 +1135,10 @@ function parse($TEXT, strict_mode, embed_tokens) { }; function in_loop(cont) { - try { - ++S.in_loop; - return cont(); - } finally { - --S.in_loop; - } + ++S.in_loop; + var ret = cont(); + --S.in_loop; + return ret; }; return as("toplevel", (function(a){ diff --git a/lib/process.js b/lib/process.js index b072ad32..214fb38d 100644 --- a/lib/process.js +++ b/lib/process.js @@ -200,20 +200,18 @@ function ast_walker(ast) { function walk(ast) { if (ast == null) return null; - try { - stack.push(ast); - var type = ast[0]; - var gen = user[type]; - if (gen) { - var ret = gen.apply(ast, ast.slice(1)); - if (ret != null) - return ret; - } - gen = walkers[type]; - return gen.apply(ast, ast.slice(1)); - } finally { - stack.pop(); + stack.push(ast); + var type = ast[0]; + var gen = user[type]; + if (gen) { + var ret = gen.apply(ast, ast.slice(1)); + if (ret != null) + return ret; } + gen = walkers[type]; + var ret = gen.apply(ast, ast.slice(1)); + stack.pop(); + return ret; }; function with_walkers(walkers, cont){ @@ -222,13 +220,12 @@ function ast_walker(ast) { save[i] = user[i]; user[i] = walkers[i]; } - try { return cont(); } - finally { - for (i in save) if (HOP(save, i)) { - if (!save[i]) delete user[i]; - else user[i] = save[i]; - } + var ret = cont(); + for (i in save) if (HOP(save, i)) { + if (!save[i]) delete user[i]; + else user[i] = save[i]; } + return ret; }; return { @@ -358,14 +355,10 @@ function ast_add_scope(ast) { function with_new_scope(cont) { current_scope = new Scope(current_scope); - try { - var ret = current_scope.body = cont(); - ret.scope = current_scope; - return ret; - } - finally { - current_scope = current_scope.parent; - } + var ret = current_scope.body = cont(); + ret.scope = current_scope; + current_scope = current_scope.parent; + return ret; }; function define(name) { @@ -480,8 +473,10 @@ function ast_mangle(ast, do_toplevel) { for (var i in s.names) if (HOP(s.names, i)) { get_mangled(i, true); } - try { var ret = cont(); ret.scope = s; return ret; } - finally { scope = _scope; }; + var ret = cont(); + ret.scope = s; + scope = _scope; + return ret; }; function _vardefs(defs) { @@ -801,8 +796,9 @@ function gen_code(ast, beautify) { function with_indent(cont, incr) { if (incr == null) incr = 1; indentation += incr; - try { return cont.apply(null, slice(arguments, 1)); } - finally { indentation -= incr; } + var ret = cont.apply(null, slice(arguments, 1)); + indentation -= incr; + return ret; }; function add_spaces(a) { From ddfa6ba879e0b87d1eed1e5f73c097113f126bad Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Fri, 17 Sep 2010 21:09:30 +0200 Subject: [PATCH 02/24] moved some utilities/constants in a separate file for easier maintenance --- bin/uglifyjs | 2 +- lib/constants.js | 337 ++++++++++++++++++++++++++++++++++++++++ lib/parse-js.js | 396 +++++++---------------------------------------- lib/process.js | 72 +++------ 4 files changed, 417 insertions(+), 390 deletions(-) create mode 100644 lib/constants.js diff --git a/bin/uglifyjs b/bin/uglifyjs index 59df920a..b052eb26 100755 --- a/bin/uglifyjs +++ b/bin/uglifyjs @@ -25,7 +25,7 @@ var options = { output: true // stdout }; -var args = jsp.slice(process.argv, 2); +var args = process.argv.slice(2); var filename; out: while (args.length > 0) { diff --git a/lib/constants.js b/lib/constants.js new file mode 100644 index 00000000..91679750 --- /dev/null +++ b/lib/constants.js @@ -0,0 +1,337 @@ +/*********************************************************************** + + A JavaScript tokenizer / parser / beautifier / compressor. + + -------------------------------- (C) --------------------------------- + + Author: Mihai Bazon + + http://mihai.bazon.net/blog + + Distributed under a ZLIB license: + + Copyright 2010 (c) Mihai Bazon + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any + damages arising from the use of this software. + + Permission is granted to anyone to use this software for any + purpose, including commercial applications, and to alter it and + redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. + + ***********************************************************************/ + +/* -----[ Utils ]----- */ + +function array_to_hash(a) { + var ret = {}; + for (var i = 0; i < a.length; ++i) + ret[a[i]] = true; + return ret; +}; +exports.array_to_hash = array_to_hash; + +function curry(f) { + var args = slice(arguments, 1); + return function() { return f.apply(this, args.concat(slice(arguments))); }; +}; +exports.curry = curry; + +function prog1(ret) { + if (ret instanceof Function) + ret = ret(); + for (var i = 1, n = arguments.length; --n > 0; ++i) + arguments[i](); + return ret; +}; +exports.prog1 = prog1; + +function slice(a, start) { + return Array.prototype.slice.call(a, start == null ? 0 : start); +}; +exports.slice = slice; + +function characters(str) { + return str.split(""); +}; +exports.characters = characters; + +function member(name, array) { + for (var i = array.length; --i >= 0;) + if (array[i] === name) + return true; + return false; +}; +exports.member = member; + +function HOP(obj, prop) { + return Object.prototype.hasOwnProperty.call(obj, prop); +}; +exports.HOP = HOP; + +function is_alphanumeric_char(ch) { + ch = ch.charCodeAt(0); + return (ch >= 48 && ch <= 57) || + (ch >= 65 && ch <= 90) || + (ch >= 97 && ch <= 122); +}; +exports.is_alphanumeric_char = is_alphanumeric_char; + +function is_identifier_char(ch) { + return is_alphanumeric_char(ch) || ch == "$" || ch == "_"; +}; +exports.is_identifier_char = is_identifier_char; + +function is_identifier(name) { + return /^[a-z_$][a-z0-9_$]*$/i.test(name) && + !HOP(KEYWORDS_ATOM, name) && + !HOP(RESERVED_WORDS, name) && + !HOP(KEYWORDS, name); +}; +exports.is_identifier = is_identifier; + +function is_digit(ch) { + ch = ch.charCodeAt(0); + return ch >= 48 && ch <= 57; +}; +exports.is_digit = is_digit; + +function repeat_string(str, i) { + if (i <= 0) return ""; + if (i == 1) return str; + var d = repeat_string(str, i >> 1); + d += d; + if (i & 1) d += str; + return d; +}; +exports.repeat_string = repeat_string; + +function defaults(args, defs) { + var ret = {}; + if (args === true) + args = {}; + for (var i in defs) if (HOP(defs, i)) { + ret[i] = (args && HOP(args, i)) ? args[i] : defs[i]; + } + return ret; +}; +exports.defaults = defaults; + +/* -----[ Contants ]----- */ + +var KEYWORDS = array_to_hash([ + "break", + "case", + "catch", + "const", + "continue", + "default", + "delete", + "do", + "else", + "finally", + "for", + "function", + "if", + "in", + "instanceof", + "new", + "return", + "switch", + "throw", + "try", + "typeof", + "var", + "void", + "while", + "with", + "NaN" +]); +exports.KEYWORDS = KEYWORDS; + +var RESERVED_WORDS = array_to_hash([ + "abstract", + "boolean", + "byte", + "char", + "class", + "debugger", + "double", + "enum", + "export", + "extends", + "final", + "float", + "goto", + "implements", + "import", + "int", + "interface", + "long", + "native", + "package", + "private", + "protected", + "public", + "short", + "static", + "super", + "synchronized", + "throws", + "transient", + "volatile" +]); +exports.RESERVED_WORDS = RESERVED_WORDS; + +var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([ + "return", + "new", + "delete", + "throw" +]); +exports.KEYWORDS_BEFORE_EXPRESSION = KEYWORDS_BEFORE_EXPRESSION; + +var KEYWORDS_ATOM = array_to_hash([ + "false", + "null", + "true", + "undefined", + "NaN" +]); +exports.KEYWORDS_ATOM = KEYWORDS_ATOM; + +var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^")); +exports.OPERATOR_CHARS = OPERATOR_CHARS; + +var OPERATORS = array_to_hash([ + "in", + "instanceof", + "typeof", + "new", + "void", + "delete", + "++", + "--", + "+", + "-", + "!", + "~", + "&", + "|", + "^", + "*", + "/", + "%", + ">>", + "<<", + ">>>", + "<", + ">", + "<=", + ">=", + "==", + "===", + "!=", + "!==", + "?", + "=", + "+=", + "-=", + "/=", + "*=", + "%=", + ">>=", + "<<=", + ">>>=", + "~=", + "%=", + "|=", + "^=", + "&&", + "||" +]); +exports.OPERATORS = OPERATORS; + +var WHITESPACE_CHARS = array_to_hash(characters(" \n\r\t")); +exports.WHITESPACE_CHARS = WHITESPACE_CHARS; + +var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:")); +exports.PUNC_BEFORE_EXPRESSION = PUNC_BEFORE_EXPRESSION; + +var PUNC_CHARS = array_to_hash(characters("[]{}(),;:")); +exports.PUNC_CHARS = PUNC_CHARS; + +var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy")); +exports.REGEXP_MODIFIERS = REGEXP_MODIFIERS; + +var UNARY_PREFIX = array_to_hash([ + "typeof", + "void", + "delete", + "--", + "++", + "!", + "~", + "-", + "+" +]); +exports.UNARY_PREFIX = UNARY_PREFIX; + +var UNARY_POSTFIX = array_to_hash([ "--", "++" ]); +exports.UNARY_POSTFIX = UNARY_POSTFIX; + +var ASSIGNMENT = (function(a, ret, i){ + while (i < a.length) { + ret[a[i]] = a[i].substr(0, a[i].length - 1); + i++; + } + return ret; +})( + ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "~=", "%=", "|=", "^="], + { "=": true }, + 0 +); +exports.ASSIGNMENT = ASSIGNMENT; + +var PRECEDENCE = (function(a, ret){ + for (var i = 0, n = 1; i < a.length; ++i, ++n) { + var b = a[i]; + for (var j = 0; j < b.length; ++j) { + ret[b[j]] = n; + } + } + return ret; +})( + [ + ["||"], + ["&&"], + ["|"], + ["^"], + ["&"], + ["==", "===", "!=", "!=="], + ["<", ">", "<=", ">=", "in", "instanceof"], + [">>", "<<", ">>>"], + ["+", "-"], + ["*", "/", "%"] + ], + {} +); +exports.PRECEDENCE = PRECEDENCE; + +var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]); +exports.STATEMENTS_WITH_LABELS = STATEMENTS_WITH_LABELS; + +var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]); +exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN; diff --git a/lib/parse-js.js b/lib/parse-js.js index 47bb5f40..d2e1c188 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -50,174 +50,24 @@ ***********************************************************************/ -/* -----[ Tokenizer (constants) ]----- */ - -var KEYWORDS = array_to_hash([ - "break", - "case", - "catch", - "const", - "continue", - "default", - "delete", - "do", - "else", - "finally", - "for", - "function", - "if", - "in", - "instanceof", - "new", - "return", - "switch", - "throw", - "try", - "typeof", - "var", - "void", - "while", - "with", - "NaN" -]); - -var RESERVED_WORDS = array_to_hash([ - "abstract", - "boolean", - "byte", - "char", - "class", - "debugger", - "double", - "enum", - "export", - "extends", - "final", - "float", - "goto", - "implements", - "import", - "int", - "interface", - "long", - "native", - "package", - "private", - "protected", - "public", - "short", - "static", - "super", - "synchronized", - "throws", - "transient", - "volatile" -]); - -var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([ - "return", - "new", - "delete", - "throw" -]); - -var KEYWORDS_ATOM = array_to_hash([ - "false", - "null", - "true", - "undefined", - "NaN" -]); - -var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^")); - -var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; -var RE_OCT_NUMBER = /^0[0-7]+$/; -var RE_DEC_NUMBER = /^\d*\.?\d*(?:e-?\d*(?:\d\.?|\.?\d)\d*)?$/i; - -var OPERATORS = array_to_hash([ - "in", - "instanceof", - "typeof", - "new", - "void", - "delete", - "++", - "--", - "+", - "-", - "!", - "~", - "&", - "|", - "^", - "*", - "/", - "%", - ">>", - "<<", - ">>>", - "<", - ">", - "<=", - ">=", - "==", - "===", - "!=", - "!==", - "?", - "=", - "+=", - "-=", - "/=", - "*=", - "%=", - ">>=", - "<<=", - ">>>=", - "~=", - "%=", - "|=", - "^=", - "&&", - "||" -]); - -var WHITESPACE_CHARS = array_to_hash(characters(" \n\r\t")); - -var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:")); - -var PUNC_CHARS = array_to_hash(characters("[]{}(),;:")); - -var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy")); +var $C = require("./constants"); /* -----[ Tokenizer ]----- */ -function is_alphanumeric_char(ch) { - ch = ch.charCodeAt(0); - return (ch >= 48 && ch <= 57) || - (ch >= 65 && ch <= 90) || - (ch >= 97 && ch <= 122); -}; - -function is_identifier_char(ch) { - return is_alphanumeric_char(ch) || ch == "$" || ch == "_"; -}; - -function is_digit(ch) { - ch = ch.charCodeAt(0); - return ch >= 48 && ch <= 57; -}; - -function parse_js_number(num) { - if (RE_HEX_NUMBER.test(num)) { - return parseInt(num.substr(2), 16); - } else if (RE_OCT_NUMBER.test(num)) { - return parseInt(num.substr(1), 8); - } else if (RE_DEC_NUMBER.test(num)) { - return parseFloat(num); - } -}; +var parse_js_number = (function(){ + var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; + var RE_OCT_NUMBER = /^0[0-7]+$/; + var RE_DEC_NUMBER = /^\d*\.?\d*(?:e-?\d*(?:\d\.?|\.?\d)\d*)?$/i; + return function(num) { + if (RE_HEX_NUMBER.test(num)) { + return parseInt(num.substr(2), 16); + } else if (RE_OCT_NUMBER.test(num)) { + return parseInt(num.substr(1), 8); + } else if (RE_DEC_NUMBER.test(num)) { + return parseFloat(num); + } + }; +})(); function JS_Parse_Error(message, line, col, pos) { this.message = message; @@ -292,9 +142,9 @@ function tokenizer($TEXT, skip_comments) { }; function token(type, value) { - S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) || - (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) || - (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value))); + S.regex_allowed = ((type == "operator" && !$C.HOP($C.UNARY_POSTFIX, value)) || + (type == "keyword" && $C.HOP($C.KEYWORDS_BEFORE_EXPRESSION, value)) || + (type == "punc" && $C.HOP($C.PUNC_BEFORE_EXPRESSION, value))); var ret = { type : type, value : value, @@ -308,7 +158,7 @@ function tokenizer($TEXT, skip_comments) { }; function skip_whitespace() { - while (HOP(WHITESPACE_CHARS, peek())) + while ($C.HOP($C.WHITESPACE_CHARS, peek())) next(); }; @@ -341,7 +191,7 @@ function tokenizer($TEXT, skip_comments) { return false; } after_e = false; - return is_alphanumeric_char(ch) || ch == "."; + return $C.is_alphanumeric_char(ch) || ch == "."; }); if (prefix) num = prefix + num; @@ -438,7 +288,7 @@ function tokenizer($TEXT, skip_comments) { regexp += ch; } var mods = read_while(function(ch){ - return HOP(REGEXP_MODIFIERS, ch); + return $C.HOP($C.REGEXP_MODIFIERS, ch); }); return token("regexp", [ regexp, mods ]); }); @@ -447,7 +297,7 @@ function tokenizer($TEXT, skip_comments) { function read_operator(prefix) { function grow(op) { var bigger = op + peek(); - if (HOP(OPERATORS, bigger)) { + if ($C.HOP($C.OPERATORS, bigger)) { next(); return grow(bigger); } else { @@ -475,18 +325,18 @@ function tokenizer($TEXT, skip_comments) { function handle_dot() { next(); - return is_digit(peek()) + return $C.is_digit(peek()) ? read_num(".") : token("punc", "."); }; function read_word() { - var word = read_while(is_identifier_char); - return !HOP(KEYWORDS, word) + var word = read_while($C.is_identifier_char); + return !$C.HOP($C.KEYWORDS, word) ? token("name", word) - : HOP(OPERATORS, word) + : $C.HOP($C.OPERATORS, word) ? token("operator", word) - : HOP(KEYWORDS_ATOM, word) + : $C.HOP($C.KEYWORDS_ATOM, word) ? token("atom", word) : token("keyword", word); }; @@ -505,13 +355,13 @@ function tokenizer($TEXT, skip_comments) { start_token(); var ch = peek(); if (!ch) return token("eof"); - if (is_digit(ch)) return read_num(); + if ($C.is_digit(ch)) return read_num(); if (ch == '"' || ch == "'") return read_string(); - if (HOP(PUNC_CHARS, ch)) return token("punc", next()); + if ($C.HOP($C.PUNC_CHARS, ch)) return token("punc", next()); if (ch == ".") return handle_dot(); if (ch == "/") return handle_slash(); - if (HOP(OPERATOR_CHARS, ch)) return read_operator(); - if (is_identifier_char(ch)) return read_word(); + if ($C.HOP($C.OPERATOR_CHARS, ch)) return read_operator(); + if ($C.is_identifier_char(ch)) return read_word(); parse_error("Unexpected character '" + ch + "'"); }; @@ -524,73 +374,9 @@ function tokenizer($TEXT, skip_comments) { }; -/* -----[ Parser (constants) ]----- */ - -var UNARY_PREFIX = array_to_hash([ - "typeof", - "void", - "delete", - "--", - "++", - "!", - "~", - "-", - "+" -]); - -var UNARY_POSTFIX = array_to_hash([ "--", "++" ]); - -var ASSIGNMENT = (function(a, ret, i){ - while (i < a.length) { - ret[a[i]] = a[i].substr(0, a[i].length - 1); - i++; - } - return ret; -})( - ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "~=", "%=", "|=", "^="], - { "=": true }, - 0 -); - -var PRECEDENCE = (function(a, ret){ - for (var i = 0, n = 1; i < a.length; ++i, ++n) { - var b = a[i]; - for (var j = 0; j < b.length; ++j) { - ret[b[j]] = n; - } - } - return ret; -})( - [ - ["||"], - ["&&"], - ["|"], - ["^"], - ["&"], - ["==", "===", "!=", "!=="], - ["<", ">", "<=", ">=", "in", "instanceof"], - [">>", "<<", ">>>"], - ["+", "-"], - ["*", "/", "%"] - ], - {} -); - -var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]); - -var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]); - /* -----[ Parser ]----- */ -function NodeWithToken(str, start, end) { - this.name = str; - this.start = start; - this.end = end; -}; - -NodeWithToken.prototype.toString = function() { return this.name; }; - -function parse($TEXT, strict_mode, embed_tokens) { +function parse($TEXT, strict_mode) { var S = { input: tokenizer($TEXT, true), @@ -664,7 +450,7 @@ function parse($TEXT, strict_mode, embed_tokens) { }; function as() { - return slice(arguments); + return $C.slice(arguments); }; function parenthesised() { @@ -674,18 +460,7 @@ function parse($TEXT, strict_mode, embed_tokens) { return ex; }; - function add_tokens(str, start, end) { - return new NodeWithToken(str, start, end); - }; - - var statement = embed_tokens ? function() { - var start = S.token; - var stmt = $statement(); - stmt[0] = add_tokens(stmt[0], start, prev()); - return stmt; - } : $statement; - - function $statement() { + function statement() { switch (S.token.type) { case "num": case "string": @@ -696,7 +471,7 @@ function parse($TEXT, strict_mode, embed_tokens) { case "name": return is_token(peek(), "punc", ":") - ? labeled_statement(prog1(S.token.value, next, next)) + ? labeled_statement($C.prog1(S.token.value, next, next)) : simple_statement(); case "punc": @@ -714,7 +489,7 @@ function parse($TEXT, strict_mode, embed_tokens) { } case "keyword": - switch (prog1(S.token.value, next)) { + switch ($C.prog1(S.token.value, next)) { case "break": return break_cont("break"); @@ -728,7 +503,7 @@ function parse($TEXT, strict_mode, embed_tokens) { case "do": return (function(body){ expect_token("keyword", "while"); - return as("do", prog1(parenthesised, semicolon), body); + return as("do", $C.prog1(parenthesised, semicolon), body); })(in_loop(statement)); case "for": @@ -748,22 +523,22 @@ function parse($TEXT, strict_mode, embed_tokens) { ? (next(), null) : can_insert_semicolon() ? null - : prog1(expression, semicolon)); + : $C.prog1(expression, semicolon)); case "switch": return as("switch", parenthesised(), switch_block_()); case "throw": - return as("throw", prog1(expression, semicolon)); + return as("throw", $C.prog1(expression, semicolon)); case "try": return try_(); case "var": - return prog1(var_, semicolon); + return $C.prog1(var_, semicolon); case "const": - return prog1(const_, semicolon); + return $C.prog1(const_, semicolon); case "while": return as("while", parenthesised(), in_loop(statement)); @@ -780,14 +555,14 @@ function parse($TEXT, strict_mode, embed_tokens) { function labeled_statement(label) { S.labels.push(label); var start = S.token, stat = statement(); - if (strict_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0])) + if (strict_mode && !$C.HOP($C.STATEMENTS_WITH_LABELS, stat[0])) unexpected(start); S.labels.pop(); return as("label", label, stat); }; function simple_statement() { - return as("stat", prog1(expression, semicolon)); + return as("stat", $C.prog1(expression, semicolon)); }; function break_cont(type) { @@ -796,7 +571,7 @@ function parse($TEXT, strict_mode, embed_tokens) { var name = is("name") ? S.token.value : null; if (name != null) { next(); - if (!member(name, S.labels)) + if (!$C.member(name, S.labels)) croak("Label " + name + " without matching loop or statement"); } semicolon(); @@ -828,7 +603,7 @@ function parse($TEXT, strict_mode, embed_tokens) { }; function function_(in_statement) { - var name = is("name") ? prog1(S.token.value, next) : null; + var name = is("name") ? $C.prog1(S.token.value, next) : null; if (in_statement && !name) unexpected(); expect("("); @@ -874,7 +649,7 @@ function parse($TEXT, strict_mode, embed_tokens) { return a; }; - var switch_block_ = curry(in_loop, function(){ + var switch_block_ = $C.curry(in_loop, function(){ expect("{"); var a = [], cur = null; while (!is("punc", "}")) { @@ -965,16 +740,16 @@ function parse($TEXT, strict_mode, embed_tokens) { next(); return new_(); } - if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) { + if (is("operator") && $C.HOP($C.UNARY_PREFIX, S.token.value)) { return make_unary("unary-prefix", - prog1(S.token.value, next), + $C.prog1(S.token.value, next), expr_atom(allow_calls)); } if (is("punc")) { switch (S.token.value) { case "(": next(); - return subscripts(prog1(expression, curry(expect, ")")), allow_calls); + return subscripts($C.prog1(expression, $C.curry(expect, ")")), allow_calls); case "[": next(); return subscripts(array_(), allow_calls); @@ -988,11 +763,11 @@ function parse($TEXT, strict_mode, embed_tokens) { next(); return subscripts(function_(false), allow_calls); } - if (HOP(ATOMIC_START_TOKEN, S.token.type)) { + if ($C.HOP($C.ATOMIC_START_TOKEN, S.token.type)) { var atom = S.token.type == "regexp" ? as("regexp", S.token.value[0], S.token.value[1]) : as(S.token.type, S.token.value); - return subscripts(prog1(atom, next), allow_calls); + return subscripts($C.prog1(atom, next), allow_calls); } unexpected(); }; @@ -1033,7 +808,7 @@ function parse($TEXT, strict_mode, embed_tokens) { switch (S.token.type) { case "num": case "string": - return prog1(S.token.value, next); + return $C.prog1(S.token.value, next); } return as_name(); }; @@ -1044,7 +819,7 @@ function parse($TEXT, strict_mode, embed_tokens) { case "operator": case "keyword": case "atom": - return prog1(S.token.value, next); + return $C.prog1(S.token.value, next); default: unexpected(); } @@ -1057,15 +832,15 @@ function parse($TEXT, strict_mode, embed_tokens) { } if (is("punc", "[")) { next(); - return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls); + return subscripts(as("sub", expr, $C.prog1(expression, $C.curry(expect, "]"))), allow_calls); } if (allow_calls && is("punc", "(")) { next(); return subscripts(as("call", expr, expr_list(")")), true); } - if (allow_calls && is("operator") && HOP(UNARY_POSTFIX, S.token.value)) { - return prog1(curry(make_unary, "unary-postfix", S.token.value, expr), - next); + if (allow_calls && is("operator") && $C.HOP($C.UNARY_POSTFIX, S.token.value)) { + return $C.prog1($C.curry(make_unary, "unary-postfix", S.token.value, expr), + next); } return expr; }; @@ -1078,7 +853,7 @@ function parse($TEXT, strict_mode, embed_tokens) { function expr_op(left, min_prec) { var op = is("operator") ? S.token.value : null; - var prec = op != null ? PRECEDENCE[op] : null; + var prec = op != null ? $C.PRECEDENCE[op] : null; if (prec != null && prec > min_prec) { next(); var right = expr_op(expr_atom(true), prec); @@ -1113,10 +888,10 @@ function parse($TEXT, strict_mode, embed_tokens) { if (arguments.length == 0) commas = true; var left = maybe_conditional(commas), val = S.token.value; - if (is("operator") && HOP(ASSIGNMENT, val)) { + if (is("operator") && $C.HOP($C.ASSIGNMENT, val)) { if (is_assignable(left)) { next(); - return as("assign", ASSIGNMENT[val], left, maybe_assign(commas)); + return as("assign", $C.ASSIGNMENT[val], left, maybe_assign(commas)); } croak("Invalid assignment"); } @@ -1149,58 +924,7 @@ function parse($TEXT, strict_mode, embed_tokens) { }; -/* -----[ Utilities ]----- */ - -function curry(f) { - var args = slice(arguments, 1); - return function() { return f.apply(this, args.concat(slice(arguments))); }; -}; - -function prog1(ret) { - if (ret instanceof Function) - ret = ret(); - for (var i = 1, n = arguments.length; --n > 0; ++i) - arguments[i](); - return ret; -}; - -function array_to_hash(a) { - var ret = {}; - for (var i = 0; i < a.length; ++i) - ret[a[i]] = true; - return ret; -}; - -function slice(a, start) { - return Array.prototype.slice.call(a, start == null ? 0 : start); -}; - -function characters(str) { - return str.split(""); -}; - -function member(name, array) { - for (var i = array.length; --i >= 0;) - if (array[i] === name) - return true; - return false; -}; - -function HOP(obj, prop) { - return Object.prototype.hasOwnProperty.call(obj, prop); -}; - /* -----[ Exports ]----- */ exports.tokenizer = tokenizer; exports.parse = parse; -exports.slice = slice; -exports.curry = curry; -exports.member = member; -exports.array_to_hash = array_to_hash; -exports.PRECEDENCE = PRECEDENCE; -exports.KEYWORDS_ATOM = KEYWORDS_ATOM; -exports.RESERVED_WORDS = RESERVED_WORDS; -exports.KEYWORDS = KEYWORDS; -exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN; -exports.is_alphanumeric_char = is_alphanumeric_char; diff --git a/lib/process.js b/lib/process.js index 214fb38d..8dc88ea3 100644 --- a/lib/process.js +++ b/lib/process.js @@ -52,10 +52,8 @@ ***********************************************************************/ -var jsp = require("./parse-js"), - slice = jsp.slice, - member = jsp.member, - PRECEDENCE = jsp.PRECEDENCE; +var $C = require("./constants"); +var HOP = $C.HOP; /* -----[ helper for AST traversal ]----- */ @@ -126,7 +124,7 @@ function ast_walker(ast) { return [ "assign", op, walk(lvalue), walk(rvalue) ]; }, "dot": function(expr) { - return [ "dot", walk(expr) ].concat(slice(arguments, 1)); + return [ "dot", walk(expr) ].concat($C.slice(arguments, 1)); }, "call": function(expr, args) { return [ "call", walk(expr), args.map(walk) ]; @@ -182,7 +180,7 @@ function ast_walker(ast) { return [ "stat", walk(stat) ]; }, "seq": function() { - return [ "seq" ].concat(slice(arguments).map(walk)); + return [ "seq" ].concat($C.slice(arguments).map(walk)); }, "label": function(name, block) { return [ "label", name, walk(block) ]; @@ -324,7 +322,7 @@ Scope.prototype = { continue; // I got "do" once. :-/ - if (!is_identifier(m)) + if (!$C.is_identifier(m)) continue; return m; @@ -560,7 +558,7 @@ function ast_mangle(ast, do_toplevel) { ]----- */ function ast_squeeze(ast, options) { - options = defaults(options, { + options = $C.defaults(options, { make_seqs: true }); @@ -622,7 +620,7 @@ function ast_squeeze(ast, options) { } else if (cur[0] == "stat") { prev.push(cur[1]); } else if (cur[0] == "seq") { - prev.push.apply(prev, slice(cur, 1)); + prev.push.apply(prev, $C.slice(cur, 1)); } else { prev = null; ret3.push(cur); @@ -658,7 +656,7 @@ function ast_squeeze(ast, options) { "sub": function(expr, subscript) { if (subscript[0] == "string") { var name = subscript[1]; - if (is_identifier(name)) { + if ($C.is_identifier(name)) { return [ "dot", walk(expr), name ]; } } @@ -764,7 +762,7 @@ function ast_squeeze(ast, options) { /* -----[ re-generate code from the AST ]----- */ -var DOT_CALL_NO_PARENS = jsp.array_to_hash([ +var DOT_CALL_NO_PARENS = $C.array_to_hash([ "name", "array", "string", @@ -775,7 +773,7 @@ var DOT_CALL_NO_PARENS = jsp.array_to_hash([ ]); function gen_code(ast, beautify) { - if (beautify) beautify = defaults(beautify, { + if (beautify) beautify = $C.defaults(beautify, { indent_start : 0, indent_level : 4, quote_keys : false, @@ -789,14 +787,14 @@ function gen_code(ast, beautify) { if (line == null) line = ""; if (beautify) - line = repeat_string(" ", beautify.indent_start + indentation * beautify.indent_level) + line; + line = $C.repeat_string(" ", beautify.indent_start + indentation * beautify.indent_level) + line; return line; }; function with_indent(cont, incr) { if (incr == null) incr = 1; indentation += incr; - var ret = cont.apply(null, slice(arguments, 1)); + var ret = cont.apply(null, $C.slice(arguments, 1)); indentation -= incr; return ret; }; @@ -978,12 +976,12 @@ function gen_code(ast, beautify) { // XXX: I'm pretty sure other cases will bite here. // we need to be smarter. // adding parens all the time is the safest bet. - if (member(lvalue[0], [ "assign", "conditional", "seq" ]) || - lvalue[0] == "binary" && PRECEDENCE[operator] > PRECEDENCE[lvalue[1]]) { + if ($C.member(lvalue[0], [ "assign", "conditional", "seq" ]) || + lvalue[0] == "binary" && $C.PRECEDENCE[operator] > $C.PRECEDENCE[lvalue[1]]) { left = "(" + left + ")"; } - if (member(rvalue[0], [ "assign", "conditional", "seq" ]) || - rvalue[0] == "binary" && PRECEDENCE[operator] >= PRECEDENCE[rvalue[1]]) { + if ($C.member(rvalue[0], [ "assign", "conditional", "seq" ]) || + rvalue[0] == "binary" && $C.PRECEDENCE[operator] >= $C.PRECEDENCE[rvalue[1]]) { right = "(" + right + ")"; } return add_spaces([ left, operator, right ]); @@ -992,7 +990,7 @@ function gen_code(ast, beautify) { var val = make(expr); if (!(HOP(DOT_CALL_NO_PARENS, expr[0]) || expr[0] == "num")) val = "(" + val + ")"; - return operator + (jsp.is_alphanumeric_char(operator.charAt(0)) ? " " : "") + val; + return operator + ($C.is_alphanumeric_char(operator.charAt(0)) ? " " : "") + val; }, "unary-postfix": function(operator, expr) { var val = make(expr); @@ -1012,7 +1010,7 @@ function gen_code(ast, beautify) { return "{" + newline + with_indent(function(){ return props.map(function(p){ var key = p[0], val = make(p[1]); - if (beautify && beautify.quote_keys || !is_identifier(key)) + if (beautify && beautify.quote_keys || !$C.is_identifier(key)) key = make_string(key); return indent(add_spaces(beautify && beautify.space_colon ? [ key, ":", val ] @@ -1031,7 +1029,7 @@ function gen_code(ast, beautify) { return make(stmt).replace(/;*\s*$/, ";"); }, "seq": function() { - return add_commas(slice(arguments).map(make)); + return add_commas($C.slice(arguments).map(make)); }, "label": function(name, block) { return add_spaces([ make_name(name), ":", make(block) ]); @@ -1162,38 +1160,6 @@ function gen_code(ast, beautify) { return make(ast); }; -/* -----[ Utilities ]----- */ - -function repeat_string(str, i) { - if (i <= 0) return ""; - if (i == 1) return str; - var d = repeat_string(str, i >> 1); - d += d; - if (i & 1) d += str; - return d; -}; - -function defaults(args, defs) { - var ret = {}; - if (args === true) - args = {}; - for (var i in defs) if (HOP(defs, i)) { - ret[i] = (args && HOP(args, i)) ? args[i] : defs[i]; - } - return ret; -}; - -function is_identifier(name) { - return /^[a-z_$][a-z0-9_$]*$/i.test(name) && - !HOP(jsp.KEYWORDS_ATOM, name) && - !HOP(jsp.RESERVED_WORDS, name) && - !HOP(jsp.KEYWORDS, name); -}; - -function HOP(obj, prop) { - return Object.prototype.hasOwnProperty.call(obj, prop); -}; - /* -----[ Exports ]----- */ exports.ast_walker = ast_walker; From 3edcbe65589b224fd91670463cd06ca88e57d10c Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Fri, 17 Sep 2010 21:25:44 +0200 Subject: [PATCH 03/24] minor --- lib/constants.js | 3 +++ lib/process.js | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/constants.js b/lib/constants.js index 91679750..0cb979a9 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -2,6 +2,9 @@ A JavaScript tokenizer / parser / beautifier / compressor. + This file defines some constants and utility functions that are used + in the parser and code generator. + -------------------------------- (C) --------------------------------- Author: Mihai Bazon diff --git a/lib/process.js b/lib/process.js index 8dc88ea3..d60daf4a 100644 --- a/lib/process.js +++ b/lib/process.js @@ -574,10 +574,6 @@ function ast_squeeze(ast, options) { return block; }; - function _lambda(name, args, body) { - return [ this[0], name, args, tighten(body.map(walk)) ]; - }; - // we get here for blocks that have been already transformed. // this function does two things: // 1. discard useless blocks @@ -652,6 +648,10 @@ function ast_squeeze(ast, options) { return !b || (b[0] == "block" && (!b[1] || b[1].length == 0)); }; + function _lambda(name, args, body) { + return [ this[0], name, args, tighten(body.map(walk)) ]; + }; + return w.with_walkers({ "sub": function(expr, subscript) { if (subscript[0] == "string") { @@ -714,13 +714,14 @@ function ast_squeeze(ast, options) { "switch": function(expr, body) { var last = body.length - 1; return [ "switch", walk(expr), body.map(function(branch, i){ + var ca$e = branch[0] ? walk(branch[0]) : null; var block = tighten(branch[1].map(walk)); if (i == last && block.length > 0) { var node = block[block.length - 1]; if (node[0] == "break" && !node[1]) block.pop(); } - return [ branch[0] ? walk(branch[0]) : null, block ]; + return [ ca$e, block ]; }) ]; }, "function": _lambda, From b078a293e6aec64b0a0a8f263819cb24eaf40fa8 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Fri, 17 Sep 2010 21:54:26 +0200 Subject: [PATCH 04/24] small cleanup --- lib/process.js | 92 +++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/lib/process.js b/lib/process.js index d60daf4a..0f6ff015 100644 --- a/lib/process.js +++ b/lib/process.js @@ -574,55 +574,63 @@ function ast_squeeze(ast, options) { return block; }; - // we get here for blocks that have been already transformed. - // this function does two things: - // 1. discard useless blocks - // 2. join consecutive var declarations - function tighten(statements) { - var cur, prev; - for (var i = 0, ret1 = []; i < statements.length; ++i) { - cur = statements[i]; - if (cur[0] == "block") { - if (cur[1]) { - ret1.push.apply(ret1, cur[1]); + function stats_to_sequences(statements) { + for (var i = 0, ret = [], prev = null; i < statements.length; ++i) { + var st = statements[i]; + if (!prev) { + if (st[0] == "stat") { + prev = [ "seq", st[1] ]; + ret.push([ "stat", prev ]); + } else { + ret.push(st); } + } else if (st[0] == "stat") { + prev.push(st[1]); + } else if (st[0] == "seq") { + prev.push.apply(prev, $C.slice(st, 1)); } else { - ret1.push(cur); + prev = null; + ret.push(st); } } - prev = null; - for (var i = 0, ret2 = []; i < ret1.length; ++i) { - cur = ret1[i]; - if (prev && ((cur[0] == "var" && prev[0] == "var") || - (cur[0] == "const" && prev[0] == "const"))) { - prev[1] = prev[1].concat(cur[1]); + return ret; + }; + + function join_consecutive_vars(statements) { + for (var i = 0, ret = [], prev = null; i < statements.length; ++i) { + var st = statements[i]; + if (prev && ((st[0] == "var" && prev[0] == "var") || + (st[0] == "const" && prev[0] == "const"))) { + prev[1] = prev[1].concat(st[1]); } else { - ret2.push(cur); - prev = cur; + ret.push(st); + prev = st; } } - if (!options.make_seqs) - return ret2; - prev = null; - for (var i = 0, ret3 = []; i < ret2.length; ++i) { - cur = ret2[i]; - if (!prev) { - if (cur[0] == "stat") { - prev = [ "seq", cur[1] ]; - ret3.push([ "stat", prev ]); - } else { - ret3.push(cur); + return ret; + }; + + function discard_unnecessary_blocks(statements) { + for (var i = 0, ret = []; i < statements.length; ++i) { + var st = statements[i]; + if (st[0] == "block") { + if (st[1]) { + ret.push.apply(ret, st[1]); } - } else if (cur[0] == "stat") { - prev.push(cur[1]); - } else if (cur[0] == "seq") { - prev.push.apply(prev, $C.slice(cur, 1)); } else { - prev = null; - ret3.push(cur); + ret.push(st); } } - return ret3.map(walk); + return ret; + }; + + function tighten(statements) { + statements = statements.map(walk); + statements = join_consecutive_vars(statements); + statements = discard_unnecessary_blocks(statements); + if (options.make_seqs) + statements = stats_to_sequences(statements); + return statements.map(walk); }; function best_of(ast1, ast2) { @@ -649,7 +657,7 @@ function ast_squeeze(ast, options) { }; function _lambda(name, args, body) { - return [ this[0], name, args, tighten(body.map(walk)) ]; + return [ this[0], name, args, tighten(body) ]; }; return w.with_walkers({ @@ -709,13 +717,13 @@ function ast_squeeze(ast, options) { return ret; }, "toplevel": function(body) { - return [ "toplevel", tighten(body.map(walk)) ]; + return [ "toplevel", tighten(body) ]; }, "switch": function(expr, body) { var last = body.length - 1; return [ "switch", walk(expr), body.map(function(branch, i){ var ca$e = branch[0] ? walk(branch[0]) : null; - var block = tighten(branch[1].map(walk)); + var block = tighten(branch[1]); if (i == last && block.length > 0) { var node = block[block.length - 1]; if (node[0] == "break" && !node[1]) @@ -727,7 +735,7 @@ function ast_squeeze(ast, options) { "function": _lambda, "defun": _lambda, "block": function(body) { - if (body) return rmblock([ "block", tighten(body.map(walk)) ]); + if (body) return rmblock([ "block", tighten(body) ]); }, "binary": function(op, left, right) { left = walk(left); From d87d069819aa0e3368e908141dc822590f6321a9 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 18 Sep 2010 09:29:27 +0200 Subject: [PATCH 05/24] more cleanups (got rid of $C) --- lib/parse-js.js | 112 ++++++++++++++++++++++++++++++------------------ lib/process.js | 53 ++++++++++++++--------- 2 files changed, 102 insertions(+), 63 deletions(-) diff --git a/lib/parse-js.js b/lib/parse-js.js index d2e1c188..6bfd3d2a 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -52,6 +52,34 @@ var $C = require("./constants"); +// import stuff we need. + +var ASSIGNMENT = $C.ASSIGNMENT; +var ATOMIC_START_TOKEN = $C.ATOMIC_START_TOKEN; +var HOP = $C.HOP; +var KEYWORDS = $C.KEYWORDS; +var KEYWORDS_ATOM = $C.KEYWORDS_ATOM; +var KEYWORDS_BEFORE_EXPRESSION = $C.KEYWORDS_BEFORE_EXPRESSION; +var OPERATORS = $C.OPERATORS; +var OPERATOR_CHARS = $C.OPERATOR_CHARS; +var PRECEDENCE = $C.PRECEDENCE; +var PUNC_BEFORE_EXPRESSION = $C.PUNC_BEFORE_EXPRESSION; +var PUNC_CHARS = $C.PUNC_CHARS; +var REGEXP_MODIFIERS = $C.REGEXP_MODIFIERS; +var STATEMENTS_WITH_LABELS = $C.STATEMENTS_WITH_LABELS; +var UNARY_POSTFIX = $C.UNARY_POSTFIX; +var UNARY_PREFIX = $C.UNARY_PREFIX; +var WHITESPACE_CHARS = $C.WHITESPACE_CHARS; +var curry = $C.curry; +var is_alphanumeric_char = $C.is_alphanumeric_char; +var is_digit = $C.is_digit; +var is_identifier_char = $C.is_identifier_char; +var member = $C.member; +var prog1 = $C.prog1; +var slice = $C.slice; + +// isn't CommonJS great? + /* -----[ Tokenizer ]----- */ var parse_js_number = (function(){ @@ -142,9 +170,9 @@ function tokenizer($TEXT, skip_comments) { }; function token(type, value) { - S.regex_allowed = ((type == "operator" && !$C.HOP($C.UNARY_POSTFIX, value)) || - (type == "keyword" && $C.HOP($C.KEYWORDS_BEFORE_EXPRESSION, value)) || - (type == "punc" && $C.HOP($C.PUNC_BEFORE_EXPRESSION, value))); + S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) || + (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) || + (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value))); var ret = { type : type, value : value, @@ -158,7 +186,7 @@ function tokenizer($TEXT, skip_comments) { }; function skip_whitespace() { - while ($C.HOP($C.WHITESPACE_CHARS, peek())) + while (HOP(WHITESPACE_CHARS, peek())) next(); }; @@ -191,7 +219,7 @@ function tokenizer($TEXT, skip_comments) { return false; } after_e = false; - return $C.is_alphanumeric_char(ch) || ch == "."; + return is_alphanumeric_char(ch) || ch == "."; }); if (prefix) num = prefix + num; @@ -288,7 +316,7 @@ function tokenizer($TEXT, skip_comments) { regexp += ch; } var mods = read_while(function(ch){ - return $C.HOP($C.REGEXP_MODIFIERS, ch); + return HOP(REGEXP_MODIFIERS, ch); }); return token("regexp", [ regexp, mods ]); }); @@ -297,7 +325,7 @@ function tokenizer($TEXT, skip_comments) { function read_operator(prefix) { function grow(op) { var bigger = op + peek(); - if ($C.HOP($C.OPERATORS, bigger)) { + if (HOP(OPERATORS, bigger)) { next(); return grow(bigger); } else { @@ -325,18 +353,18 @@ function tokenizer($TEXT, skip_comments) { function handle_dot() { next(); - return $C.is_digit(peek()) + return is_digit(peek()) ? read_num(".") : token("punc", "."); }; function read_word() { - var word = read_while($C.is_identifier_char); - return !$C.HOP($C.KEYWORDS, word) + var word = read_while(is_identifier_char); + return !HOP(KEYWORDS, word) ? token("name", word) - : $C.HOP($C.OPERATORS, word) + : HOP(OPERATORS, word) ? token("operator", word) - : $C.HOP($C.KEYWORDS_ATOM, word) + : HOP(KEYWORDS_ATOM, word) ? token("atom", word) : token("keyword", word); }; @@ -355,13 +383,13 @@ function tokenizer($TEXT, skip_comments) { start_token(); var ch = peek(); if (!ch) return token("eof"); - if ($C.is_digit(ch)) return read_num(); + if (is_digit(ch)) return read_num(); if (ch == '"' || ch == "'") return read_string(); - if ($C.HOP($C.PUNC_CHARS, ch)) return token("punc", next()); + if (HOP(PUNC_CHARS, ch)) return token("punc", next()); if (ch == ".") return handle_dot(); if (ch == "/") return handle_slash(); - if ($C.HOP($C.OPERATOR_CHARS, ch)) return read_operator(); - if ($C.is_identifier_char(ch)) return read_word(); + if (HOP(OPERATOR_CHARS, ch)) return read_operator(); + if (is_identifier_char(ch)) return read_word(); parse_error("Unexpected character '" + ch + "'"); }; @@ -450,7 +478,7 @@ function parse($TEXT, strict_mode) { }; function as() { - return $C.slice(arguments); + return slice(arguments); }; function parenthesised() { @@ -471,7 +499,7 @@ function parse($TEXT, strict_mode) { case "name": return is_token(peek(), "punc", ":") - ? labeled_statement($C.prog1(S.token.value, next, next)) + ? labeled_statement(prog1(S.token.value, next, next)) : simple_statement(); case "punc": @@ -489,7 +517,7 @@ function parse($TEXT, strict_mode) { } case "keyword": - switch ($C.prog1(S.token.value, next)) { + switch (prog1(S.token.value, next)) { case "break": return break_cont("break"); @@ -503,7 +531,7 @@ function parse($TEXT, strict_mode) { case "do": return (function(body){ expect_token("keyword", "while"); - return as("do", $C.prog1(parenthesised, semicolon), body); + return as("do", prog1(parenthesised, semicolon), body); })(in_loop(statement)); case "for": @@ -523,22 +551,22 @@ function parse($TEXT, strict_mode) { ? (next(), null) : can_insert_semicolon() ? null - : $C.prog1(expression, semicolon)); + : prog1(expression, semicolon)); case "switch": return as("switch", parenthesised(), switch_block_()); case "throw": - return as("throw", $C.prog1(expression, semicolon)); + return as("throw", prog1(expression, semicolon)); case "try": return try_(); case "var": - return $C.prog1(var_, semicolon); + return prog1(var_, semicolon); case "const": - return $C.prog1(const_, semicolon); + return prog1(const_, semicolon); case "while": return as("while", parenthesised(), in_loop(statement)); @@ -555,14 +583,14 @@ function parse($TEXT, strict_mode) { function labeled_statement(label) { S.labels.push(label); var start = S.token, stat = statement(); - if (strict_mode && !$C.HOP($C.STATEMENTS_WITH_LABELS, stat[0])) + if (strict_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0])) unexpected(start); S.labels.pop(); return as("label", label, stat); }; function simple_statement() { - return as("stat", $C.prog1(expression, semicolon)); + return as("stat", prog1(expression, semicolon)); }; function break_cont(type) { @@ -571,7 +599,7 @@ function parse($TEXT, strict_mode) { var name = is("name") ? S.token.value : null; if (name != null) { next(); - if (!$C.member(name, S.labels)) + if (!member(name, S.labels)) croak("Label " + name + " without matching loop or statement"); } semicolon(); @@ -603,7 +631,7 @@ function parse($TEXT, strict_mode) { }; function function_(in_statement) { - var name = is("name") ? $C.prog1(S.token.value, next) : null; + var name = is("name") ? prog1(S.token.value, next) : null; if (in_statement && !name) unexpected(); expect("("); @@ -649,7 +677,7 @@ function parse($TEXT, strict_mode) { return a; }; - var switch_block_ = $C.curry(in_loop, function(){ + var switch_block_ = curry(in_loop, function(){ expect("{"); var a = [], cur = null; while (!is("punc", "}")) { @@ -740,16 +768,16 @@ function parse($TEXT, strict_mode) { next(); return new_(); } - if (is("operator") && $C.HOP($C.UNARY_PREFIX, S.token.value)) { + if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) { return make_unary("unary-prefix", - $C.prog1(S.token.value, next), + prog1(S.token.value, next), expr_atom(allow_calls)); } if (is("punc")) { switch (S.token.value) { case "(": next(); - return subscripts($C.prog1(expression, $C.curry(expect, ")")), allow_calls); + return subscripts(prog1(expression, curry(expect, ")")), allow_calls); case "[": next(); return subscripts(array_(), allow_calls); @@ -763,11 +791,11 @@ function parse($TEXT, strict_mode) { next(); return subscripts(function_(false), allow_calls); } - if ($C.HOP($C.ATOMIC_START_TOKEN, S.token.type)) { + if (HOP(ATOMIC_START_TOKEN, S.token.type)) { var atom = S.token.type == "regexp" ? as("regexp", S.token.value[0], S.token.value[1]) : as(S.token.type, S.token.value); - return subscripts($C.prog1(atom, next), allow_calls); + return subscripts(prog1(atom, next), allow_calls); } unexpected(); }; @@ -808,7 +836,7 @@ function parse($TEXT, strict_mode) { switch (S.token.type) { case "num": case "string": - return $C.prog1(S.token.value, next); + return prog1(S.token.value, next); } return as_name(); }; @@ -819,7 +847,7 @@ function parse($TEXT, strict_mode) { case "operator": case "keyword": case "atom": - return $C.prog1(S.token.value, next); + return prog1(S.token.value, next); default: unexpected(); } @@ -832,14 +860,14 @@ function parse($TEXT, strict_mode) { } if (is("punc", "[")) { next(); - return subscripts(as("sub", expr, $C.prog1(expression, $C.curry(expect, "]"))), allow_calls); + return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls); } if (allow_calls && is("punc", "(")) { next(); return subscripts(as("call", expr, expr_list(")")), true); } - if (allow_calls && is("operator") && $C.HOP($C.UNARY_POSTFIX, S.token.value)) { - return $C.prog1($C.curry(make_unary, "unary-postfix", S.token.value, expr), + if (allow_calls && is("operator") && HOP(UNARY_POSTFIX, S.token.value)) { + return prog1(curry(make_unary, "unary-postfix", S.token.value, expr), next); } return expr; @@ -853,7 +881,7 @@ function parse($TEXT, strict_mode) { function expr_op(left, min_prec) { var op = is("operator") ? S.token.value : null; - var prec = op != null ? $C.PRECEDENCE[op] : null; + var prec = op != null ? PRECEDENCE[op] : null; if (prec != null && prec > min_prec) { next(); var right = expr_op(expr_atom(true), prec); @@ -888,10 +916,10 @@ function parse($TEXT, strict_mode) { if (arguments.length == 0) commas = true; var left = maybe_conditional(commas), val = S.token.value; - if (is("operator") && $C.HOP($C.ASSIGNMENT, val)) { + if (is("operator") && HOP(ASSIGNMENT, val)) { if (is_assignable(left)) { next(); - return as("assign", $C.ASSIGNMENT[val], left, maybe_assign(commas)); + return as("assign", ASSIGNMENT[val], left, maybe_assign(commas)); } croak("Invalid assignment"); } diff --git a/lib/process.js b/lib/process.js index 0f6ff015..89bd4ad7 100644 --- a/lib/process.js +++ b/lib/process.js @@ -53,7 +53,18 @@ ***********************************************************************/ var $C = require("./constants"); -var HOP = $C.HOP; + +// "import" things we need. + +var HOP = $C.HOP; +var PRECEDENCE = $C.PRECEDENCE; +var array_to_hash = $C.array_to_hash; +var defaults = $C.defaults; +var is_alphanumeric_char = $C.is_alphanumeric_char; +var is_identifier = $C.is_identifier; +var member = $C.member; +var repeat_string = $C.repeat_string; +var slice = $C.slice; /* -----[ helper for AST traversal ]----- */ @@ -124,7 +135,7 @@ function ast_walker(ast) { return [ "assign", op, walk(lvalue), walk(rvalue) ]; }, "dot": function(expr) { - return [ "dot", walk(expr) ].concat($C.slice(arguments, 1)); + return [ "dot", walk(expr) ].concat(slice(arguments, 1)); }, "call": function(expr, args) { return [ "call", walk(expr), args.map(walk) ]; @@ -180,7 +191,7 @@ function ast_walker(ast) { return [ "stat", walk(stat) ]; }, "seq": function() { - return [ "seq" ].concat($C.slice(arguments).map(walk)); + return [ "seq" ].concat(slice(arguments).map(walk)); }, "label": function(name, block) { return [ "label", name, walk(block) ]; @@ -322,7 +333,7 @@ Scope.prototype = { continue; // I got "do" once. :-/ - if (!$C.is_identifier(m)) + if (!is_identifier(m)) continue; return m; @@ -558,7 +569,7 @@ function ast_mangle(ast, do_toplevel) { ]----- */ function ast_squeeze(ast, options) { - options = $C.defaults(options, { + options = defaults(options, { make_seqs: true }); @@ -587,7 +598,7 @@ function ast_squeeze(ast, options) { } else if (st[0] == "stat") { prev.push(st[1]); } else if (st[0] == "seq") { - prev.push.apply(prev, $C.slice(st, 1)); + prev.push.apply(prev, slice(st, 1)); } else { prev = null; ret.push(st); @@ -626,11 +637,11 @@ function ast_squeeze(ast, options) { function tighten(statements) { statements = statements.map(walk); + if (options.make_seqs) + statements = stats_to_sequences(statements).map(walk); statements = join_consecutive_vars(statements); statements = discard_unnecessary_blocks(statements); - if (options.make_seqs) - statements = stats_to_sequences(statements); - return statements.map(walk); + return statements; }; function best_of(ast1, ast2) { @@ -664,7 +675,7 @@ function ast_squeeze(ast, options) { "sub": function(expr, subscript) { if (subscript[0] == "string") { var name = subscript[1]; - if ($C.is_identifier(name)) { + if (is_identifier(name)) { return [ "dot", walk(expr), name ]; } } @@ -771,7 +782,7 @@ function ast_squeeze(ast, options) { /* -----[ re-generate code from the AST ]----- */ -var DOT_CALL_NO_PARENS = $C.array_to_hash([ +var DOT_CALL_NO_PARENS = array_to_hash([ "name", "array", "string", @@ -782,7 +793,7 @@ var DOT_CALL_NO_PARENS = $C.array_to_hash([ ]); function gen_code(ast, beautify) { - if (beautify) beautify = $C.defaults(beautify, { + if (beautify) beautify = defaults(beautify, { indent_start : 0, indent_level : 4, quote_keys : false, @@ -796,14 +807,14 @@ function gen_code(ast, beautify) { if (line == null) line = ""; if (beautify) - line = $C.repeat_string(" ", beautify.indent_start + indentation * beautify.indent_level) + line; + line = repeat_string(" ", beautify.indent_start + indentation * beautify.indent_level) + line; return line; }; function with_indent(cont, incr) { if (incr == null) incr = 1; indentation += incr; - var ret = cont.apply(null, $C.slice(arguments, 1)); + var ret = cont.apply(null, slice(arguments, 1)); indentation -= incr; return ret; }; @@ -985,12 +996,12 @@ function gen_code(ast, beautify) { // XXX: I'm pretty sure other cases will bite here. // we need to be smarter. // adding parens all the time is the safest bet. - if ($C.member(lvalue[0], [ "assign", "conditional", "seq" ]) || - lvalue[0] == "binary" && $C.PRECEDENCE[operator] > $C.PRECEDENCE[lvalue[1]]) { + if (member(lvalue[0], [ "assign", "conditional", "seq" ]) || + lvalue[0] == "binary" && PRECEDENCE[operator] > PRECEDENCE[lvalue[1]]) { left = "(" + left + ")"; } - if ($C.member(rvalue[0], [ "assign", "conditional", "seq" ]) || - rvalue[0] == "binary" && $C.PRECEDENCE[operator] >= $C.PRECEDENCE[rvalue[1]]) { + if (member(rvalue[0], [ "assign", "conditional", "seq" ]) || + rvalue[0] == "binary" && PRECEDENCE[operator] >= PRECEDENCE[rvalue[1]]) { right = "(" + right + ")"; } return add_spaces([ left, operator, right ]); @@ -999,7 +1010,7 @@ function gen_code(ast, beautify) { var val = make(expr); if (!(HOP(DOT_CALL_NO_PARENS, expr[0]) || expr[0] == "num")) val = "(" + val + ")"; - return operator + ($C.is_alphanumeric_char(operator.charAt(0)) ? " " : "") + val; + return operator + (is_alphanumeric_char(operator.charAt(0)) ? " " : "") + val; }, "unary-postfix": function(operator, expr) { var val = make(expr); @@ -1019,7 +1030,7 @@ function gen_code(ast, beautify) { return "{" + newline + with_indent(function(){ return props.map(function(p){ var key = p[0], val = make(p[1]); - if (beautify && beautify.quote_keys || !$C.is_identifier(key)) + if (beautify && beautify.quote_keys || !is_identifier(key)) key = make_string(key); return indent(add_spaces(beautify && beautify.space_colon ? [ key, ":", val ] @@ -1038,7 +1049,7 @@ function gen_code(ast, beautify) { return make(stmt).replace(/;*\s*$/, ";"); }, "seq": function() { - return add_commas($C.slice(arguments).map(make)); + return add_commas(slice(arguments).map(make)); }, "label": function(name, block) { return add_spaces([ make_name(name), ":", make(block) ]); From 2dd5f439ecbc3ed650f6a1b52e6d33d426ad836c Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 18 Sep 2010 09:49:34 +0200 Subject: [PATCH 06/24] started an OOP interface, so that we can create multiple parsers with possibly different behaviors (macros, here I come!) --- lib/constants.js | 27 +++++++++++++++++++++ lib/parse-js.js | 62 +++++++++++++++++++++++++++++------------------- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/lib/constants.js b/lib/constants.js index 0cb979a9..dca42d7c 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -132,6 +132,33 @@ function defaults(args, defs) { }; exports.defaults = defaults; +function make_deep_copy(src) { + if (src instanceof Array) + return src.map(make_deep_copy); + if (src === null) + return null; + if (src instanceof Function) + return src; + if (src instanceof Date) + return new Date(src); + if (src instanceof Number || typeof src == "number") + return src; + if (src instanceof String || typeof src == "string") + return src; + if (src instanceof Boolean || src === true || src === false) + return src; + if (src instanceof Object) { + if (src.clone instanceof Function) + return src.clone(); + var i, dest = {}; + for (i in src) if (HOP(src, i)) + dest[i] = make_deep_copy(src[i]); + return dest; + } + return src; +}; +exports.make_deep_copy = make_deep_copy; + /* -----[ Contants ]----- */ var KEYWORDS = array_to_hash([ diff --git a/lib/parse-js.js b/lib/parse-js.js index 6bfd3d2a..283ee7a8 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -52,31 +52,35 @@ var $C = require("./constants"); +var make_deep_copy = $C.make_deep_copy; + +function ParseJS(){ + // import stuff we need. -var ASSIGNMENT = $C.ASSIGNMENT; -var ATOMIC_START_TOKEN = $C.ATOMIC_START_TOKEN; -var HOP = $C.HOP; -var KEYWORDS = $C.KEYWORDS; -var KEYWORDS_ATOM = $C.KEYWORDS_ATOM; -var KEYWORDS_BEFORE_EXPRESSION = $C.KEYWORDS_BEFORE_EXPRESSION; -var OPERATORS = $C.OPERATORS; -var OPERATOR_CHARS = $C.OPERATOR_CHARS; -var PRECEDENCE = $C.PRECEDENCE; -var PUNC_BEFORE_EXPRESSION = $C.PUNC_BEFORE_EXPRESSION; -var PUNC_CHARS = $C.PUNC_CHARS; -var REGEXP_MODIFIERS = $C.REGEXP_MODIFIERS; -var STATEMENTS_WITH_LABELS = $C.STATEMENTS_WITH_LABELS; -var UNARY_POSTFIX = $C.UNARY_POSTFIX; -var UNARY_PREFIX = $C.UNARY_PREFIX; -var WHITESPACE_CHARS = $C.WHITESPACE_CHARS; -var curry = $C.curry; -var is_alphanumeric_char = $C.is_alphanumeric_char; -var is_digit = $C.is_digit; -var is_identifier_char = $C.is_identifier_char; -var member = $C.member; -var prog1 = $C.prog1; -var slice = $C.slice; +var ASSIGNMENT = make_deep_copy($C.ASSIGNMENT); +var ATOMIC_START_TOKEN = make_deep_copy($C.ATOMIC_START_TOKEN); +var HOP = make_deep_copy($C.HOP); +var KEYWORDS = make_deep_copy($C.KEYWORDS); +var KEYWORDS_ATOM = make_deep_copy($C.KEYWORDS_ATOM); +var KEYWORDS_BEFORE_EXPRESSION = make_deep_copy($C.KEYWORDS_BEFORE_EXPRESSION); +var OPERATORS = make_deep_copy($C.OPERATORS); +var OPERATOR_CHARS = make_deep_copy($C.OPERATOR_CHARS); +var PRECEDENCE = make_deep_copy($C.PRECEDENCE); +var PUNC_BEFORE_EXPRESSION = make_deep_copy($C.PUNC_BEFORE_EXPRESSION); +var PUNC_CHARS = make_deep_copy($C.PUNC_CHARS); +var REGEXP_MODIFIERS = make_deep_copy($C.REGEXP_MODIFIERS); +var STATEMENTS_WITH_LABELS = make_deep_copy($C.STATEMENTS_WITH_LABELS); +var UNARY_POSTFIX = make_deep_copy($C.UNARY_POSTFIX); +var UNARY_PREFIX = make_deep_copy($C.UNARY_PREFIX); +var WHITESPACE_CHARS = make_deep_copy($C.WHITESPACE_CHARS); +var curry = make_deep_copy($C.curry); +var is_alphanumeric_char = make_deep_copy($C.is_alphanumeric_char); +var is_digit = make_deep_copy($C.is_digit); +var is_identifier_char = make_deep_copy($C.is_identifier_char); +var member = make_deep_copy($C.member); +var prog1 = make_deep_copy($C.prog1); +var slice = make_deep_copy($C.slice); // isn't CommonJS great? @@ -952,7 +956,15 @@ function parse($TEXT, strict_mode) { }; +this.tokenizer = tokenizer; +this.parse = parse; + +}; + /* -----[ Exports ]----- */ -exports.tokenizer = tokenizer; -exports.parse = parse; +exports.ParseJS = ParseJS; + +// still support old API +exports.tokenizer = new ParseJS().tokenizer; +exports.parse = new ParseJS().parse; From 85d3aa2f9392d89897830bbce750c3397f88809f Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 18 Sep 2010 12:29:34 +0200 Subject: [PATCH 07/24] discard noop statements --- lib/process.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lib/process.js b/lib/process.js index 89bd4ad7..ff17f5cd 100644 --- a/lib/process.js +++ b/lib/process.js @@ -635,12 +635,29 @@ function ast_squeeze(ast, options) { return ret; }; + function discard_noop_stats(statements) { + for (var i = 0, ret = []; i < statements.length; ++i) { + var st = statements[i]; + if (st[0] == "stat") { + switch (st[1][0]) { + case "string": + case "name": + case "number": + continue; + } + } + ret.push(st); + } + return ret; + }; + function tighten(statements) { statements = statements.map(walk); if (options.make_seqs) statements = stats_to_sequences(statements).map(walk); statements = join_consecutive_vars(statements); statements = discard_unnecessary_blocks(statements); + statements = discard_noop_stats(statements); return statements; }; From 8caac8d89ed7c55931476ef099a1b522e28bbbef Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 18 Sep 2010 14:00:10 +0200 Subject: [PATCH 08/24] minor --- lib/constants.js | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/constants.js b/lib/constants.js index dca42d7c..5962af48 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -133,29 +133,30 @@ function defaults(args, defs) { exports.defaults = defaults; function make_deep_copy(src) { - if (src instanceof Array) - return src.map(make_deep_copy); if (src === null) return null; - if (src instanceof Function) + if (src instanceof Boolean || src === true || src === false) return src; - if (src instanceof Date) - return new Date(src); if (src instanceof Number || typeof src == "number") return src; if (src instanceof String || typeof src == "string") return src; - if (src instanceof Boolean || src === true || src === false) + if (src.clone instanceof Function) + return src.clone(); + if (src instanceof Array) + return src.map(make_deep_copy); + if (src instanceof Function) return src; + if (src instanceof Date) + return new Date(src); if (src instanceof Object) { - if (src.clone instanceof Function) - return src.clone(); var i, dest = {}; - for (i in src) if (HOP(src, i)) + for (i in src) if (HOP(src, i)) { dest[i] = make_deep_copy(src[i]); + } return dest; } - return src; + throw new Error("Cannot clone object: " + src); }; exports.make_deep_copy = make_deep_copy; From 66f31fc0070090c08f1bad38a0d56f7f9159fc23 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 18 Sep 2010 14:00:31 +0200 Subject: [PATCH 09/24] display comments in the generated code when they are in the AST. --- lib/process.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/process.js b/lib/process.js index ff17f5cd..dfe7f0b5 100644 --- a/lib/process.js +++ b/lib/process.js @@ -201,7 +201,9 @@ function ast_walker(ast) { }, "atom": function(name) { return [ "atom", name ]; - } + }, + "comment1": function(name) { return this }, + "comment2": function(name) { return this } }; var user = {}; From 922c63c7d9c69e95021f286463fee43c31dfd8c5 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 18 Sep 2010 14:01:24 +0200 Subject: [PATCH 10/24] started macro compiler -- I'd say it's halfway there. --- lib/macro-js.js | 93 +++++++++++++++++++++ lib/parse-js.js | 211 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 249 insertions(+), 55 deletions(-) create mode 100644 lib/macro-js.js diff --git a/lib/macro-js.js b/lib/macro-js.js new file mode 100644 index 00000000..fa6c1d28 --- /dev/null +++ b/lib/macro-js.js @@ -0,0 +1,93 @@ +var ParseJS = require("./parse-js").ParseJS; +var pro = require("./process"); +var $C = require("./constants"); +var HOP = $C.HOP; + +function createParser() { + var parser = new ParseJS(); + parser.macros = {}; + parser.define_statement("defmacro", function(PC, HC) { + var name, args = [], body; + //*** read macro name + if (!PC.is("name")) PC.unexpected(); + name = PC.tokval(); + PC.next(); + //*** read arguments list + PC.expect("("); + var first = true; + while (!PC.is("punc", ")")) { + if (first) first = false; else PC.expect(","); + if (!PC.is("name")) PC.unexpected(); + var a = { name: PC.tokval() }; + PC.next(); + if (PC.is("punc", ":")) { + PC.next(); + if (!(PC.is("name") || PC.is("keyword"))) + PC.unexpected(); + switch (a.type = PC.tokval()) { + case "block": + a.reader = PC.block_; + break; + case "name": + a.reader = function() { + if (!PC.is("name")) PC.unexpected(); + return HC.prog1(PC.tokval, PC.next); + }; + break; + default: + PC.unexpected(); + } + PC.next(); + } else { + a.reader = HC.curry(PC.expression, false); + } + args.push(a); + } + PC.next(); // skip closing paren + //*** read body; set in_function so that "return" is allowed. + PC.S.in_function++; + body = PC.block_(); + PC.S.in_function--; + //*** should compile the macro now + return compileMacro(name, args, body, parser); + }); + return parser; +}; + +function compileMacro(name, args, body, parser) { + if (HOP(parser.macros, name)) { + throw new Error("Redefinition of macro '" + name + "'"); + } + var code = pro.gen_code([ "toplevel", [ + [ "defun", + name, + args.map(function(a) { return a.name }), + body ] + ]], { indent_start: 3 }); + var func; + try { func = new Function("return (" + code + ");").call(parser); } catch(ex) { + sys.puts("Error compiling macro '" + name + "'"); + sys.puts(code); + sys.puts(ex.toString()); + throw ex; + } + parser.macros[name] = { + args: args, + func: func + }; + parser.define_call_parser(name, function(PC, HC){ + PC.expect("("); + var first = true, a = []; + while (!PC.is("punc", ")")) { + if (first) first = false; else PC.expect(","); + a.push(args[a.length].reader()); + } + PC.next(); + return func.apply(parser, a); + }); + return [ "comment2", "*** // Macro '" + name + "' compiled as:\n" + code + "\n ***" ]; +}; + +/* -----[ Exports ]----- */ + +exports.createParser = createParser; diff --git a/lib/parse-js.js b/lib/parse-js.js index 283ee7a8..6bf50bd0 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -54,6 +54,39 @@ var $C = require("./constants"); var make_deep_copy = $C.make_deep_copy; +var parse_js_number = (function(){ + var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; + var RE_OCT_NUMBER = /^0[0-7]+$/; + var RE_DEC_NUMBER = /^\d*\.?\d*(?:e-?\d*(?:\d\.?|\.?\d)\d*)?$/i; + return function(num) { + if (RE_HEX_NUMBER.test(num)) { + return parseInt(num.substr(2), 16); + } else if (RE_OCT_NUMBER.test(num)) { + return parseInt(num.substr(1), 8); + } else if (RE_DEC_NUMBER.test(num)) { + return parseFloat(num); + } + }; +})(); + +function js_error(message, line, col, pos) { + var err = new Error(message); + err.type = "ParseJS"; + err.line = line; + err.col = col; + err.pos = pos; + err.toString = function() { + return message + "\nline: " + line + "\ncol: " + col + "\npos: " + pos; + }; + throw err; +}; + +function is_token(token, type, val) { + return token.type == type && (val == null || token.value == val); +}; + +var EX_EOF = {}; + function ParseJS(){ // import stuff we need. @@ -74,6 +107,7 @@ var STATEMENTS_WITH_LABELS = make_deep_copy($C.STATEMENTS_WITH_LABELS); var UNARY_POSTFIX = make_deep_copy($C.UNARY_POSTFIX); var UNARY_PREFIX = make_deep_copy($C.UNARY_PREFIX); var WHITESPACE_CHARS = make_deep_copy($C.WHITESPACE_CHARS); +var RESERVED_WORDS = make_deep_copy($C.RESERVED_WORDS); var curry = make_deep_copy($C.curry); var is_alphanumeric_char = make_deep_copy($C.is_alphanumeric_char); var is_digit = make_deep_copy($C.is_digit); @@ -84,48 +118,10 @@ var slice = make_deep_copy($C.slice); // isn't CommonJS great? -/* -----[ Tokenizer ]----- */ - -var parse_js_number = (function(){ - var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; - var RE_OCT_NUMBER = /^0[0-7]+$/; - var RE_DEC_NUMBER = /^\d*\.?\d*(?:e-?\d*(?:\d\.?|\.?\d)\d*)?$/i; - return function(num) { - if (RE_HEX_NUMBER.test(num)) { - return parseInt(num.substr(2), 16); - } else if (RE_OCT_NUMBER.test(num)) { - return parseInt(num.substr(1), 8); - } else if (RE_DEC_NUMBER.test(num)) { - return parseFloat(num); - } - }; -})(); - -function JS_Parse_Error(message, line, col, pos) { - this.message = message; - this.line = line; - this.col = col; - this.pos = pos; - try { - ({})(); - } catch(ex) { - this.stack = ex.stack; - }; -}; - -JS_Parse_Error.prototype.toString = function() { - return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack; -}; - -function js_error(message, line, col, pos) { - throw new JS_Parse_Error(message, line, col, pos); -}; +var CUSTOM_STATEMENT_PARSERS = {}; +var CUSTOM_CALL_PARSERS = {}; -function is_token(token, type, val) { - return token.type == type && (val == null || token.value == val); -}; - -var EX_EOF = {}; +/* -----[ Tokenizer ]----- */ function tokenizer($TEXT, skip_comments) { @@ -157,10 +153,6 @@ function tokenizer($TEXT, skip_comments) { return ch; }; - function eof() { - return !S.peek(); - }; - function find(what, signal_eof) { var pos = S.text.indexOf(what, S.pos); if (signal_eof && pos == -1) throw EX_EOF; @@ -458,14 +450,14 @@ function parse($TEXT, strict_mode) { function unexpected(token) { if (token == null) token = S.token; - token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")"); + token_error(token, "Unexpected token: " + token.type + " \"" + token.value + "\""); }; function expect_token(type, val) { if (is(type, val)) { return next(); } - token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type); + token_error(S.token, "Unexpected token " + S.token.type + " \"" + S.token.value + "\", expected " + type + " \"" + val + "\""); }; function expect(punc) { return expect_token("punc", punc); }; @@ -482,7 +474,9 @@ function parse($TEXT, strict_mode) { }; function as() { - return slice(arguments); + return make_node + ? make_node(parser_helpers, obj_context, slice(arguments)) + : slice(arguments); }; function parenthesised() { @@ -521,7 +515,10 @@ function parse($TEXT, strict_mode) { } case "keyword": - switch (prog1(S.token.value, next)) { + var kw = prog1(S.token.value, next); + if (CUSTOM_STATEMENT_PARSERS[kw]) + return CUSTOM_STATEMENT_PARSERS[kw](parser_helpers, obj_context); + switch (kw) { case "break": return break_cont("break"); @@ -867,12 +864,15 @@ function parse($TEXT, strict_mode) { return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls); } if (allow_calls && is("punc", "(")) { + if (expr[0] == "name" && CUSTOM_CALL_PARSERS[expr[1]]) { + return CUSTOM_CALL_PARSERS[expr[1]](parser_helpers, obj_context); + } next(); return subscripts(as("call", expr, expr_list(")")), true); } if (allow_calls && is("operator") && HOP(UNARY_POSTFIX, S.token.value)) { return prog1(curry(make_unary, "unary-postfix", S.token.value, expr), - next); + next); } return expr; }; @@ -948,6 +948,53 @@ function parse($TEXT, strict_mode) { return ret; }; + var parser_helpers = { + S : S, + is : is, + peek : peek, + next : next, + prev : prev, + croak : croak, + token_error : token_error, + unexpected : unexpected, + expect_token : expect_token, + expect : expect, + semicolon : semicolon, + as : as, + parenthesised : parenthesised, + statement : statement, + labeled_statement : labeled_statement, + simple_statement : simple_statement, + break_cont : break_cont, + for_ : for_, + function_ : function_, + if_ : if_, + block_ : block_, + switch_block_ : switch_block_, + try_ : try_, + vardefs : vardefs, + var_ : var_, + const_ : const_, + new_ : new_, + expr_atom : expr_atom, + expr_list : expr_list, + array_ : array_, + object_ : object_, + as_property_name : as_property_name, + as_name : as_name, + subscripts : subscripts, + make_unary : make_unary, + expr_op : expr_op, + expr_ops : expr_ops, + maybe_conditional : maybe_conditional, + is_assignable : is_assignable, + maybe_assign : maybe_assign, + expression : expression, + in_loop : in_loop, + token : function() { return S.token }, + tokval : function() { return S.token.value } + }; + return as("toplevel", (function(a){ while (!is("eof")) a.push(statement()); @@ -956,8 +1003,60 @@ function parse($TEXT, strict_mode) { }; -this.tokenizer = tokenizer; -this.parse = parse; + // API + + var obj_context = { + ASSIGNMENT : ASSIGNMENT, + ATOMIC_START_TOKEN : ATOMIC_START_TOKEN, + HOP : HOP, + KEYWORDS : KEYWORDS, + KEYWORDS_ATOM : KEYWORDS_ATOM, + KEYWORDS_BEFORE_EXPRESSION : KEYWORDS_BEFORE_EXPRESSION, + OPERATORS : OPERATORS, + OPERATOR_CHARS : OPERATOR_CHARS, + PRECEDENCE : PRECEDENCE, + PUNC_BEFORE_EXPRESSION : PUNC_BEFORE_EXPRESSION, + PUNC_CHARS : PUNC_CHARS, + REGEXP_MODIFIERS : REGEXP_MODIFIERS, + STATEMENTS_WITH_LABELS : STATEMENTS_WITH_LABELS, + UNARY_POSTFIX : UNARY_POSTFIX, + UNARY_PREFIX : UNARY_PREFIX, + WHITESPACE_CHARS : WHITESPACE_CHARS, + RESERVED_WORDS : RESERVED_WORDS, + curry : curry, + is_alphanumeric_char : is_alphanumeric_char, + is_digit : is_digit, + member : member, + prog1 : prog1, + slice : slice, + + self: this, + tokenizer: tokenizer, + parse: parse + }; + + var make_node = null; + + this.tokenizer = tokenizer; + this.parse = parse; + + this.define_keyword = function(kw) { + KEYWORDS[kw] = kw; + RESERVED_WORDS[kw] = kw; + }; + + this.define_call_parser = function(name, parser) { + CUSTOM_CALL_PARSERS[name] = parser; + }; + + this.define_statement = function(kw, parser) { + this.define_keyword(kw); + CUSTOM_STATEMENT_PARSERS[kw] = parser; + }; + + this.define_mknode = function(func) { + make_node = func; + }; }; @@ -965,6 +1064,8 @@ this.parse = parse; exports.ParseJS = ParseJS; -// still support old API -exports.tokenizer = new ParseJS().tokenizer; -exports.parse = new ParseJS().parse; +// support old API +(function(P){ + exports.tokenizer = P.tokenizer; + exports.parse = P.parse; +})(new ParseJS()); From 86e19e5e7504c647d8d2e8e99eb78952ff2b4104 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 18 Sep 2010 19:38:55 +0200 Subject: [PATCH 11/24] preparing for custom tokenizers, other small cleanups --- lib/macro-js.js | 100 +++++++++++++++++++++++++----------------------- lib/parse-js.js | 33 ++++++++++++++-- 2 files changed, 81 insertions(+), 52 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index fa6c1d28..0b72d3d9 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -6,64 +6,68 @@ var HOP = $C.HOP; function createParser() { var parser = new ParseJS(); parser.macros = {}; - parser.define_statement("defmacro", function(PC, HC) { - var name, args = [], body; - //*** read macro name + parser.define_statement("defmacro", function(PC, OC) { + var m = readDefMacro(PC, OC); + return compileMacro(m.name, m.args, m.body, parser); + }); + return parser; +}; + +function readDefMacro(PC, OC) { + var name, args = [], body; + //*** read macro name + if (!PC.is("name")) PC.unexpected(); + name = PC.tokval(); + PC.next(); + //*** read arguments list + PC.expect("("); + var first = true; + while (!PC.is("punc", ")")) { + if (first) first = false; else PC.expect(","); if (!PC.is("name")) PC.unexpected(); - name = PC.tokval(); + var a = { name: PC.tokval() }; PC.next(); - //*** read arguments list - PC.expect("("); - var first = true; - while (!PC.is("punc", ")")) { - if (first) first = false; else PC.expect(","); - if (!PC.is("name")) PC.unexpected(); - var a = { name: PC.tokval() }; + if (PC.is("punc", ":")) { PC.next(); - if (PC.is("punc", ":")) { - PC.next(); - if (!(PC.is("name") || PC.is("keyword"))) - PC.unexpected(); - switch (a.type = PC.tokval()) { - case "block": - a.reader = PC.block_; - break; - case "name": - a.reader = function() { - if (!PC.is("name")) PC.unexpected(); - return HC.prog1(PC.tokval, PC.next); - }; - break; - default: - PC.unexpected(); - } - PC.next(); - } else { - a.reader = HC.curry(PC.expression, false); + if (!(PC.is("name") || PC.is("keyword"))) + PC.unexpected(); + switch (a.type = PC.tokval()) { + case "block": + a.reader = PC.block_; + break; + case "name": + a.reader = function() { + if (!PC.is("name")) PC.unexpected(); + return OC.prog1(PC.tokval, PC.next); + }; + break; + default: + PC.unexpected(); } - args.push(a); + PC.next(); + } else { + a.reader = OC.curry(PC.expression, false); } - PC.next(); // skip closing paren - //*** read body; set in_function so that "return" is allowed. - PC.S.in_function++; - body = PC.block_(); - PC.S.in_function--; - //*** should compile the macro now - return compileMacro(name, args, body, parser); - }); - return parser; + args.push(a); + } + PC.next(); // skip closing paren + //*** read body; set in_function so that "return" is allowed. + PC.S.in_function++; + body = PC.block_(); + PC.S.in_function--; + return { name: name, args: args, body: body }; }; function compileMacro(name, args, body, parser) { if (HOP(parser.macros, name)) { throw new Error("Redefinition of macro '" + name + "'"); } - var code = pro.gen_code([ "toplevel", [ - [ "defun", - name, - args.map(function(a) { return a.name }), - body ] - ]], { indent_start: 3 }); + var code = pro.gen_code([ "toplevel", [[ + "defun", + name, + args.map(function(a) { return a.name }), + body + ]]], { indent_start: 3 }); var func; try { func = new Function("return (" + code + ");").call(parser); } catch(ex) { sys.puts("Error compiling macro '" + name + "'"); @@ -75,7 +79,7 @@ function compileMacro(name, args, body, parser) { args: args, func: func }; - parser.define_call_parser(name, function(PC, HC){ + parser.define_call_parser(name, function(PC, OC){ PC.expect("("); var first = true, a = []; while (!PC.is("punc", ")")) { diff --git a/lib/parse-js.js b/lib/parse-js.js index 6bf50bd0..8e714cf9 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -389,6 +389,31 @@ function tokenizer($TEXT, skip_comments) { parse_error("Unexpected character '" + ch + "'"); }; + var tokenizer_context = { + S : S, + peek : peek, + next : next, + find : find, + start_token : start_token, + token : token, + skip_whitespace : skip_whitespace, + read_while : read_while, + parse_error : parse_error, + read_num : read_num, + read_escaped_char : read_escaped_char, + hex_bytes : hex_bytes, + read_string : read_string, + read_line_comment : read_line_comment, + read_multiline_comment : read_multiline_comment, + read_regexp : read_regexp, + read_operator : read_operator, + handle_slash : handle_slash, + handle_dot : handle_dot, + read_word : read_word, + with_eof_error : with_eof_error, + next_token : next_token + }; + next_token.context = function(nc) { if (nc) S = nc; return S; @@ -475,7 +500,7 @@ function parse($TEXT, strict_mode) { function as() { return make_node - ? make_node(parser_helpers, obj_context, slice(arguments)) + ? make_node(parser_context, obj_context, slice(arguments)) : slice(arguments); }; @@ -517,7 +542,7 @@ function parse($TEXT, strict_mode) { case "keyword": var kw = prog1(S.token.value, next); if (CUSTOM_STATEMENT_PARSERS[kw]) - return CUSTOM_STATEMENT_PARSERS[kw](parser_helpers, obj_context); + return CUSTOM_STATEMENT_PARSERS[kw](parser_context, obj_context); switch (kw) { case "break": return break_cont("break"); @@ -865,7 +890,7 @@ function parse($TEXT, strict_mode) { } if (allow_calls && is("punc", "(")) { if (expr[0] == "name" && CUSTOM_CALL_PARSERS[expr[1]]) { - return CUSTOM_CALL_PARSERS[expr[1]](parser_helpers, obj_context); + return CUSTOM_CALL_PARSERS[expr[1]](parser_context, obj_context); } next(); return subscripts(as("call", expr, expr_list(")")), true); @@ -948,7 +973,7 @@ function parse($TEXT, strict_mode) { return ret; }; - var parser_helpers = { + var parser_context = { S : S, is : is, peek : peek, From e022cacd032df98ca6dedd25e39113acaa18927e Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sun, 19 Sep 2010 14:41:58 +0200 Subject: [PATCH 12/24] some limited support for quote / code templates --- lib/macro-js.js | 109 +++++++++++++++++++++++++++++++++++++++++++----- lib/parse-js.js | 76 ++++++++++++++++++++++----------- lib/process.js | 5 +++ 3 files changed, 156 insertions(+), 34 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 0b72d3d9..2f27fea6 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -3,23 +3,91 @@ var pro = require("./process"); var $C = require("./constants"); var HOP = $C.HOP; +function Unquote(ast) { this.ast = ast; }; + +function Symbol(sym) { this.sym = sym; }; + +Symbol.prototype.toString = function() { return this.sym; }; + +function quote_ast(ast) { + if (ast === null) { + return [ "name", "null" ]; + } + else if (typeof ast == "undefined") { + return [ "name", "undefined" ]; + } + else if (ast instanceof Unquote) { + return ast.ast; + } + else if (ast instanceof Array) { + return [ "array", ast.map(quote_ast) ]; + } + else if (typeof ast == "string") { + return [ "string", ast ]; + } + else if (typeof ast == "boolean") { + return [ "name", ast.toString() ]; + } + else if (typeof ast == "number") { + return isNaN(ast) + ? [ "name", "NaN" ] + : [ "num", ast ]; + } + else throw new Error("Unhandled case in quote: " + typeof ast); +}; + function createParser() { var parser = new ParseJS(); + var SYM = 0; + parser.gensym = function() { + return new Symbol("__$$__SYM" + (++SYM)); + }; + parser.symbol = function(name) { + return new Symbol(name); + }; parser.macros = {}; + parser.define_token_reader("`", function(TC, OC) { + TC.next(); + var tok = TC.next_token(); + tok.macro = "quote"; + return tok; + }); + parser.define_token_reader("@", function(TC, OC) { + TC.next(); + var tok = TC.next_token(); + tok.macro = "quote-stmt"; + return tok; + }); + parser.define_token_reader("\\", function(TC, OC) { + TC.next(); + var tok = TC.next_token(); + tok.macro = "unquote"; + return tok; + }); parser.define_statement("defmacro", function(PC, OC) { var m = readDefMacro(PC, OC); return compileMacro(m.name, m.args, m.body, parser); }); + parser.define_token_processor(function(cont, PC, OC){ + var tok = PC.token(); + if (!tok.macro) + return cont(); + switch (tok.macro) { + case "quote": + return quote_ast(cont()); + case "quote-stmt": + return quote_ast(PC.statement()); + case "unquote": + return new Unquote(cont()); + default: + throw new Error("Unsupported macro character: " + tok.macro); + } + }); return parser; }; -function readDefMacro(PC, OC) { - var name, args = [], body; - //*** read macro name - if (!PC.is("name")) PC.unexpected(); - name = PC.tokval(); - PC.next(); - //*** read arguments list +function readMacroArgs(PC, OC) { + var args = []; PC.expect("("); var first = true; while (!PC.is("punc", ")")) { @@ -41,6 +109,16 @@ function readDefMacro(PC, OC) { return OC.prog1(PC.tokval, PC.next); }; break; + case "string": + a.reader = function() { + if (!PC.is("string")) PC.unexpected(); + return OC.prog1(PC.tokval, PC.next); + }; + case "num": + a.reader = function() { + if (!PC.is("num")) PC.unexpected(); + return OC.prog1(PC.tokval, PC.next); + }; default: PC.unexpected(); } @@ -51,9 +129,19 @@ function readDefMacro(PC, OC) { args.push(a); } PC.next(); // skip closing paren + return args; +}; + +function readDefMacro(PC, OC) { + //*** read macro name + if (!PC.is("name")) PC.unexpected(); + var name = PC.tokval(); + PC.next(); + //*** read arguments list + var args = readMacroArgs(PC, OC); //*** read body; set in_function so that "return" is allowed. PC.S.in_function++; - body = PC.block_(); + var body = PC.block_(); PC.S.in_function--; return { name: name, args: args, body: body }; }; @@ -62,12 +150,13 @@ function compileMacro(name, args, body, parser) { if (HOP(parser.macros, name)) { throw new Error("Redefinition of macro '" + name + "'"); } - var code = pro.gen_code([ "toplevel", [[ + var ast = [ "toplevel", [[ "defun", name, args.map(function(a) { return a.name }), body - ]]], { indent_start: 3 }); + ]]]; + var code = pro.gen_code(ast, { indent_start: 3, plainsyms: true }); var func; try { func = new Function("return (" + code + ");").call(parser); } catch(ex) { sys.puts("Error compiling macro '" + name + "'"); diff --git a/lib/parse-js.js b/lib/parse-js.js index 8e714cf9..35eb2dbf 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -120,6 +120,8 @@ var slice = make_deep_copy($C.slice); var CUSTOM_STATEMENT_PARSERS = {}; var CUSTOM_CALL_PARSERS = {}; +var CUSTOM_TOKEN_READER = {}; +var CUSTOM_TOKEN_PROCESSOR = null; /* -----[ Tokenizer ]----- */ @@ -376,8 +378,10 @@ function tokenizer($TEXT, skip_comments) { function next_token() { skip_whitespace(); - start_token(); var ch = peek(); + if (CUSTOM_TOKEN_READER[ch]) + return CUSTOM_TOKEN_READER[ch](tokenizer_context, obj_context); + start_token(); if (!ch) return token("eof"); if (is_digit(ch)) return read_num(); if (ch == '"' || ch == "'") return read_string(); @@ -439,6 +443,12 @@ function parse($TEXT, strict_mode) { S.token = next(); + function tokprocess(cont) { + return CUSTOM_TOKEN_PROCESSOR + ? CUSTOM_TOKEN_PROCESSOR(cont, parser_context, obj_context) + : cont(); + }; + function is(type, value) { return is_token(S.token, type, value); }; @@ -657,7 +667,7 @@ function parse($TEXT, strict_mode) { }; function function_(in_statement) { - var name = is("name") ? prog1(S.token.value, next) : null; + var name = is("name") ? tokprocess(curry(prog1, S.token.value, next)) : null; if (in_statement && !name) unexpected(); expect("("); @@ -668,7 +678,7 @@ function parse($TEXT, strict_mode) { while (!is("punc", ")")) { if (first) first = false; else expect(","); if (!is("name")) unexpected(); - a.push(S.token.value); + a.push(tokprocess(function() { return S.token.value })); next(); } next(); @@ -684,7 +694,7 @@ function parse($TEXT, strict_mode) { }; function if_() { - var cond = parenthesised(), body = statement(), belse; + var cond = parenthesised(), body = statement(), belse = null; if (is("keyword", "else")) { next(); belse = statement(); @@ -755,7 +765,11 @@ function parse($TEXT, strict_mode) { for (;;) { if (!is("name")) unexpected(); - var name = S.token.value; + var name = tokprocess(function(){ + if (!is("name")) + unexpected(); + return S.token.value; + }); next(); if (is("operator", "=")) { next(); @@ -799,7 +813,7 @@ function parse($TEXT, strict_mode) { prog1(S.token.value, next), expr_atom(allow_calls)); } - if (is("punc")) { + if (is("punc")) return tokprocess(function(){ switch (S.token.value) { case "(": next(); @@ -812,15 +826,17 @@ function parse($TEXT, strict_mode) { return subscripts(object_(), allow_calls); } unexpected(); - } - if (is("keyword", "function")) { + }); + if (is("keyword", "function")) return tokprocess(function(){ next(); return subscripts(function_(false), allow_calls); - } + }); if (HOP(ATOMIC_START_TOKEN, S.token.type)) { - var atom = S.token.type == "regexp" - ? as("regexp", S.token.value[0], S.token.value[1]) - : as(S.token.type, S.token.value); + var atom = tokprocess(function(){ + return S.token.type == "regexp" + ? as("regexp", S.token.value[0], S.token.value[1]) + : as(S.token.type, S.token.value); + }); return subscripts(prog1(atom, next), allow_calls); } unexpected(); @@ -862,21 +878,23 @@ function parse($TEXT, strict_mode) { switch (S.token.type) { case "num": case "string": - return prog1(S.token.value, next); + return tokprocess(curry(prog1, S.token.value, next)); } return as_name(); }; function as_name() { - switch (S.token.type) { - case "name": - case "operator": - case "keyword": - case "atom": - return prog1(S.token.value, next); - default: - unexpected(); - } + return tokprocess(function(){ + switch (S.token.type) { + case "name": + case "operator": + case "keyword": + case "atom": + return prog1(S.token.value, next); + default: + unexpected(); + } + }); }; function subscripts(expr, allow_calls) { @@ -937,8 +955,10 @@ function parse($TEXT, strict_mode) { }; function is_assignable(expr) { - expr = expr[0]; - return expr == "name" || expr == "dot" || expr == "sub"; + // XXX: to support macro it's necessary not to check this + return true; + // expr = expr[0]; + // return expr == "name" || expr == "dot" || expr == "sub"; }; function maybe_assign(commas) { @@ -1083,6 +1103,14 @@ function parse($TEXT, strict_mode) { make_node = func; }; + this.define_token_reader = function(ch, reader) { + CUSTOM_TOKEN_READER[ch] = reader; + }; + + this.define_token_processor = function(func) { + CUSTOM_TOKEN_PROCESSOR = func; + }; + }; /* -----[ Exports ]----- */ diff --git a/lib/process.js b/lib/process.js index dfe7f0b5..77bb2f86 100644 --- a/lib/process.js +++ b/lib/process.js @@ -1189,6 +1189,11 @@ function gen_code(ast, beautify) { }; function make(node) { + // XXX: this helps us deal with generated names in macros. + { + if (!(node instanceof Array)) + return node.toString(); + } var type = node[0]; var gen = generators[type]; if (!gen) From 46963a09ff775216b0ccbb98e7b2a16f33893b69 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sun, 19 Sep 2010 15:20:42 +0200 Subject: [PATCH 13/24] normalize AST after macroexpand, other small fixes --- lib/macro-js.js | 71 ++++++++++++++++++++++++++++++++++++++++++++++++- lib/parse-js.js | 8 +++--- lib/process.js | 3 +++ 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 2f27fea6..6a5ce518 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -83,6 +83,13 @@ function createParser() { throw new Error("Unsupported macro character: " + tok.macro); } }); + // finally, the AST that we build is a bit different from what + // our processors already know, so we include a final step to + // normalize it. + var orig_parse = parser.parse; + parser.parse = function() { + return normalize_ast(orig_parse.apply(this, arguments)); + }; return parser; }; @@ -106,7 +113,7 @@ function readMacroArgs(PC, OC) { case "name": a.reader = function() { if (!PC.is("name")) PC.unexpected(); - return OC.prog1(PC.tokval, PC.next); + return new Symbol(OC.prog1(PC.tokval, PC.next)); }; break; case "string": @@ -181,6 +188,68 @@ function compileMacro(name, args, body, parser) { return [ "comment2", "*** // Macro '" + name + "' compiled as:\n" + code + "\n ***" ]; }; +function replace_symbols(ast) { + if (ast instanceof Array) { + switch (ast[0]) { + case "var": + case "const": + case "object": + ast[1].forEach(function(def){ + if (def[0] instanceof Symbol) + def[0] = def[0].toString(); + if (def[1]) + def[1] = replace_symbols(def[1]); + }); + return ast; + case "function": + case "defun": + if (ast[1] instanceof Symbol) + ast[1] = ast[1].toString(); + ast[2] = ast[2].map(function(name, i){ + return name instanceof Symbol ? name.toString() : name; + }); + ast[3] = ast[3].map(replace_symbols); + return ast; + case "try": + // 0 block, 1 catch: 1.0 ex, 1.1 block, 2 finally + ast[0] = replace_symbols(ast[0]); + if (ast[1]) { + if (ast[1][0] instanceof Symbol) + ast[1][0] = ast[1][0].toString(); + ast[1][1] = ast[1][1].map(replace_symbols); + } + if (ast[2]) { + ast[2] = ast[2].map(replace_symbols); + } + return ast; + default: + for (var i = 0; i < ast.length; ++i) + ast[i] = replace_symbols(ast[i]); + return ast; + } + } else if (ast instanceof Symbol) { + return [ "name", ast.toString() ]; + } + return ast; +}; + +function normalize_ast(ast) { + ast = replace_symbols(ast); + INSPECT(ast); + var w = pro.ast_walker(); + return w.with_walkers({ + "stat": function(expr) { + if (expr[0] == "block") { + if (expr[1] && expr[1].length == 1) + return expr[1][0]; + return expr; + } + } + }, function() { + return w.walk(ast); + }); +}; + /* -----[ Exports ]----- */ exports.createParser = createParser; diff --git a/lib/parse-js.js b/lib/parse-js.js index 35eb2dbf..cba30bb0 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -744,9 +744,11 @@ function parse($TEXT, strict_mode) { if (is("keyword", "catch")) { next(); expect("("); - if (!is("name")) - croak("Name expected"); - var name = S.token.value; + var name = tokprocess(function(){ + if (!is("name")) + croak("Name expected"); + return S.token.value; + }); next(); expect(")"); bcatch = [ name, block_() ]; diff --git a/lib/process.js b/lib/process.js index 77bb2f86..b66a31a9 100644 --- a/lib/process.js +++ b/lib/process.js @@ -220,6 +220,9 @@ function ast_walker(ast) { return ret; } gen = walkers[type]; + if (!gen) { + throw new Error("No generator for " + ast); + } var ret = gen.apply(ast, ast.slice(1)); stack.pop(); return ret; From 6a733e0f2ff022cb9cd2d9d6b634514a4f3e7b06 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sun, 19 Sep 2010 15:40:06 +0200 Subject: [PATCH 14/24] fixed symbol normalization --- lib/macro-js.js | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 6a5ce518..234ee6bf 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -45,6 +45,7 @@ function createParser() { parser.symbol = function(name) { return new Symbol(name); }; + parser.quote = quote_ast; parser.macros = {}; parser.define_token_reader("`", function(TC, OC) { TC.next(); @@ -188,6 +189,18 @@ function compileMacro(name, args, body, parser) { return [ "comment2", "*** // Macro '" + name + "' compiled as:\n" + code + "\n ***" ]; }; +function normalize_symbol(s, wantname) { + if (s instanceof Symbol) { + return wantname + ? [ "name", s.toString() ] + : s.toString(); + } + else if (s instanceof Array && s[0] == "name" && !wantname) { + return s[1]; + } + return s; +}; + function replace_symbols(ast) { if (ast instanceof Array) { switch (ast[0]) { @@ -195,27 +208,22 @@ function replace_symbols(ast) { case "const": case "object": ast[1].forEach(function(def){ - if (def[0] instanceof Symbol) - def[0] = def[0].toString(); + def[0] = normalize_symbol(def[0]); if (def[1]) def[1] = replace_symbols(def[1]); }); return ast; case "function": case "defun": - if (ast[1] instanceof Symbol) - ast[1] = ast[1].toString(); - ast[2] = ast[2].map(function(name, i){ - return name instanceof Symbol ? name.toString() : name; - }); + ast[1] = normalize_symbol(ast[1]); + ast[2] = ast[2].map(function(name){ return normalize_symbol(name) }); ast[3] = ast[3].map(replace_symbols); return ast; case "try": // 0 block, 1 catch: 1.0 ex, 1.1 block, 2 finally ast[0] = replace_symbols(ast[0]); if (ast[1]) { - if (ast[1][0] instanceof Symbol) - ast[1][0] = ast[1][0].toString(); + ast[1][0] = normalize_symbol(ast[1][0]); ast[1][1] = ast[1][1].map(replace_symbols); } if (ast[2]) { @@ -227,8 +235,8 @@ function replace_symbols(ast) { ast[i] = replace_symbols(ast[i]); return ast; } - } else if (ast instanceof Symbol) { - return [ "name", ast.toString() ]; + } else { + return normalize_symbol(ast, true); } return ast; }; From 2745e65b6adfe4d8ed379a1082c553e4426f2c36 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sun, 19 Sep 2010 15:49:03 +0200 Subject: [PATCH 15/24] some magic to allow us to return quoted statements from a macro without an embedding block --- lib/macro-js.js | 19 ++++++++++++++++++- lib/parse-js.js | 3 +++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 234ee6bf..525082b1 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -247,10 +247,27 @@ function normalize_ast(ast) { var w = pro.ast_walker(); return w.with_walkers({ "stat": function(expr) { - if (expr[0] == "block") { + switch (expr[0]) { + case "block": if (expr[1] && expr[1].length == 1) return expr[1][0]; return expr; + case "break": + case "const": + case "continue": + case "defun": + case "do": + case "for": + case "for-in": + case "if": + case "return": + case "switch": + case "throw": + case "try": + case "var": + case "while": + case "with": + return expr; } } }, function() { diff --git a/lib/parse-js.js b/lib/parse-js.js index cba30bb0..3acf03fe 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -841,6 +841,9 @@ function parse($TEXT, strict_mode) { }); return subscripts(prog1(atom, next), allow_calls); } + if (CUSTOM_TOKEN_PROCESSOR) { + return tokprocess(statement); // try to read as statement + } unexpected(); }; From 2d82ca794c0fd8b91974e0095c06e63fb53b28bd Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sun, 19 Sep 2010 16:38:59 +0200 Subject: [PATCH 16/24] macro-expand in a separate step, which allows calling macros from a macro --- lib/macro-js.js | 43 +++++++++++++++++++++++++------------------ lib/process.js | 5 +++++ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 525082b1..541094fa 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -19,6 +19,9 @@ function quote_ast(ast) { else if (ast instanceof Unquote) { return ast.ast; } + else if (ast instanceof Symbol) { + return ast; + } else if (ast instanceof Array) { return [ "array", ast.map(quote_ast) ]; } @@ -33,7 +36,7 @@ function quote_ast(ast) { ? [ "name", "NaN" ] : [ "num", ast ]; } - else throw new Error("Unhandled case in quote: " + typeof ast); + else throw new Error("Unhandled case in quote: " + typeof ast + "\n" + sys.inspect(ast, null, null)); }; function createParser() { @@ -84,12 +87,9 @@ function createParser() { throw new Error("Unsupported macro character: " + tok.macro); } }); - // finally, the AST that we build is a bit different from what - // our processors already know, so we include a final step to - // normalize it. var orig_parse = parser.parse; parser.parse = function() { - return normalize_ast(orig_parse.apply(this, arguments)); + return macro_expand(parser, orig_parse.apply(this, arguments)); }; return parser; }; @@ -109,7 +109,9 @@ function readMacroArgs(PC, OC) { PC.unexpected(); switch (a.type = PC.tokval()) { case "block": - a.reader = PC.block_; + a.reader = function() { + return [ "block", PC.block_() ]; + }; break; case "name": a.reader = function() { @@ -117,16 +119,6 @@ function readMacroArgs(PC, OC) { return new Symbol(OC.prog1(PC.tokval, PC.next)); }; break; - case "string": - a.reader = function() { - if (!PC.is("string")) PC.unexpected(); - return OC.prog1(PC.tokval, PC.next); - }; - case "num": - a.reader = function() { - if (!PC.is("num")) PC.unexpected(); - return OC.prog1(PC.tokval, PC.next); - }; default: PC.unexpected(); } @@ -184,11 +176,26 @@ function compileMacro(name, args, body, parser) { a.push(args[a.length].reader()); } PC.next(); - return func.apply(parser, a); + return [ "macro-expand", name, a ]; }); return [ "comment2", "*** // Macro '" + name + "' compiled as:\n" + code + "\n ***" ]; }; +function macro_expand(parser, ast) { + var w = pro.ast_walker(); + return normalize_ast(w.with_walkers({ + "macro-expand": function(macro, args) { + var func = parser.macros[macro].func; + var ret = func.apply(parser, args.map(w.walk)); + ret = replace_symbols(ret); + ret = w.walk(ret); + return ret; + } + }, function() { + return w.walk(ast); + })); +}; + function normalize_symbol(s, wantname) { if (s instanceof Symbol) { return wantname @@ -243,10 +250,10 @@ function replace_symbols(ast) { function normalize_ast(ast) { ast = replace_symbols(ast); - INSPECT(ast); var w = pro.ast_walker(); return w.with_walkers({ "stat": function(expr) { + expr = w.walk(expr); switch (expr[0]) { case "block": if (expr[1] && expr[1].length == 1) diff --git a/lib/process.js b/lib/process.js index b66a31a9..133a87d3 100644 --- a/lib/process.js +++ b/lib/process.js @@ -211,6 +211,11 @@ function ast_walker(ast) { function walk(ast) { if (ast == null) return null; + // XXX: this helps us deal with generated names in macros. + { + if (!(ast instanceof Array)) + return ast; + } stack.push(ast); var type = ast[0]; var gen = user[type]; From 99db7a75d8bb5c6c1a6b73d935230d43f267921e Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sun, 19 Sep 2010 20:21:28 +0200 Subject: [PATCH 17/24] support for defstat --- lib/macro-js.js | 26 ++++++++++++++++++++++++-- lib/parse-js.js | 3 +-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 541094fa..bb6d9e17 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -48,6 +48,9 @@ function createParser() { parser.symbol = function(name) { return new Symbol(name); }; + parser.macro_expand = function(ast) { + return macro_expand(parser, ast); + }; parser.quote = quote_ast; parser.macros = {}; parser.define_token_reader("`", function(TC, OC) { @@ -72,6 +75,22 @@ function createParser() { var m = readDefMacro(PC, OC); return compileMacro(m.name, m.args, m.body, parser); }); + parser.define_statement("defstat", function(PC, OC) { + // what happens here is really quite tricky: if + // immediately after "defstat" you use the new + // statement, it won't be seen as a keyword because + // the token has already been peek()-ed. Hence, we + // use a hack -- passing true to readDefMacro will + // register the new name as a keyword immediately. + var m = readDefMacro(PC, OC, true); + parser.define_statement(m.name, function(PC, OC) { + var a = []; + for (var i = 0; i < m.args.length; ++i) + a[i] = m.args[i].reader(); + return [ "macro-expand", m.name, a ]; + }); + return compileMacro(m.name, m.args, m.body, parser); + }); parser.define_token_processor(function(cont, PC, OC){ var tok = PC.token(); if (!tok.macro) @@ -110,7 +129,7 @@ function readMacroArgs(PC, OC) { switch (a.type = PC.tokval()) { case "block": a.reader = function() { - return [ "block", PC.block_() ]; + return [ "block", [ PC.statement() ] ]; }; break; case "name": @@ -132,10 +151,13 @@ function readMacroArgs(PC, OC) { return args; }; -function readDefMacro(PC, OC) { +function readDefMacro(PC, OC, make_kw) { //*** read macro name if (!PC.is("name")) PC.unexpected(); var name = PC.tokval(); + // this is needed for defstat. + if (make_kw) + OC.self.define_keyword(name); PC.next(); //*** read arguments list var args = readMacroArgs(PC, OC); diff --git a/lib/parse-js.js b/lib/parse-js.js index 3acf03fe..1dfa1ec9 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -545,9 +545,8 @@ function parse($TEXT, strict_mode) { case ";": next(); return as("block"); - default: - unexpected(); } + unexpected(); case "keyword": var kw = prog1(S.token.value, next); From db2bec7c3aba4432c51fd32451e283ea46af97f1 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Mon, 20 Sep 2010 07:00:23 +0200 Subject: [PATCH 18/24] minor --- lib/macro-js.js | 44 ++++++++++++++++++++++++-------------------- lib/parse-js.js | 1 + 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index bb6d9e17..2e647da7 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -72,8 +72,8 @@ function createParser() { return tok; }); parser.define_statement("defmacro", function(PC, OC) { - var m = readDefMacro(PC, OC); - return compileMacro(m.name, m.args, m.body, parser); + var m = read_defmacro(PC, OC); + return compile_macro(m.name, m.args, m.body, parser); }); parser.define_statement("defstat", function(PC, OC) { // what happens here is really quite tricky: if @@ -82,14 +82,14 @@ function createParser() { // the token has already been peek()-ed. Hence, we // use a hack -- passing true to readDefMacro will // register the new name as a keyword immediately. - var m = readDefMacro(PC, OC, true); + var m = read_defmacro(PC, OC, true); parser.define_statement(m.name, function(PC, OC) { var a = []; for (var i = 0; i < m.args.length; ++i) a[i] = m.args[i].reader(); return [ "macro-expand", m.name, a ]; }); - return compileMacro(m.name, m.args, m.body, parser); + return compile_macro(m.name, m.args, m.body, parser, true); }); parser.define_token_processor(function(cont, PC, OC){ var tok = PC.token(); @@ -113,7 +113,7 @@ function createParser() { return parser; }; -function readMacroArgs(PC, OC) { +function read_macro_args(PC, OC) { var args = []; PC.expect("("); var first = true; @@ -129,13 +129,15 @@ function readMacroArgs(PC, OC) { switch (a.type = PC.tokval()) { case "block": a.reader = function() { - return [ "block", [ PC.statement() ] ]; + return [ "block", [ PC.tokprocess(PC.statement) ] ]; }; break; case "name": a.reader = function() { if (!PC.is("name")) PC.unexpected(); - return new Symbol(OC.prog1(PC.tokval, PC.next)); + return new Symbol(OC.prog1(function() { + return PC.tokprocess(PC.tokval); + }, PC.next)); }; break; default: @@ -151,7 +153,7 @@ function readMacroArgs(PC, OC) { return args; }; -function readDefMacro(PC, OC, make_kw) { +function read_defmacro(PC, OC, make_kw) { //*** read macro name if (!PC.is("name")) PC.unexpected(); var name = PC.tokval(); @@ -160,7 +162,7 @@ function readDefMacro(PC, OC, make_kw) { OC.self.define_keyword(name); PC.next(); //*** read arguments list - var args = readMacroArgs(PC, OC); + var args = read_macro_args(PC, OC); //*** read body; set in_function so that "return" is allowed. PC.S.in_function++; var body = PC.block_(); @@ -168,7 +170,7 @@ function readDefMacro(PC, OC, make_kw) { return { name: name, args: args, body: body }; }; -function compileMacro(name, args, body, parser) { +function compile_macro(name, args, body, parser, statement_only) { if (HOP(parser.macros, name)) { throw new Error("Redefinition of macro '" + name + "'"); } @@ -190,16 +192,18 @@ function compileMacro(name, args, body, parser) { args: args, func: func }; - parser.define_call_parser(name, function(PC, OC){ - PC.expect("("); - var first = true, a = []; - while (!PC.is("punc", ")")) { - if (first) first = false; else PC.expect(","); - a.push(args[a.length].reader()); - } - PC.next(); - return [ "macro-expand", name, a ]; - }); + if (!statement_only) { + parser.define_call_parser(name, function(PC, OC){ + PC.expect("("); + var first = true, a = []; + while (!PC.is("punc", ")")) { + if (first) first = false; else PC.expect(","); + a.push(args[a.length].reader()); + } + PC.next(); + return [ "macro-expand", name, a ]; + }); + } return [ "comment2", "*** // Macro '" + name + "' compiled as:\n" + code + "\n ***" ]; }; diff --git a/lib/parse-js.js b/lib/parse-js.js index 1dfa1ec9..af9c6a64 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -1040,6 +1040,7 @@ function parse($TEXT, strict_mode) { maybe_assign : maybe_assign, expression : expression, in_loop : in_loop, + tokprocess : tokprocess, token : function() { return S.token }, tokval : function() { return S.token.value } }; From 90e862f5248c20d3a281189422f3bb12fdd6cb69 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Mon, 20 Sep 2010 09:51:21 +0200 Subject: [PATCH 19/24] minor --- lib/macro-js.js | 23 +++++++++++++++++------ lib/parse-js.js | 3 ++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 2e647da7..b5a88cf4 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -3,6 +3,9 @@ var pro = require("./process"); var $C = require("./constants"); var HOP = $C.HOP; +var prog1 = $C.prog1; +var curry = $C.curry; + function Unquote(ast) { this.ast = ast; }; function Symbol(sym) { this.sym = sym; }; @@ -68,7 +71,8 @@ function createParser() { parser.define_token_reader("\\", function(TC, OC) { TC.next(); var tok = TC.next_token(); - tok.macro = "unquote"; + if (tok.macro == "quote-stmt") tok.macro = "splice"; + else tok.macro = "unquote"; return tok; }); parser.define_statement("defmacro", function(PC, OC) { @@ -102,6 +106,8 @@ function createParser() { return quote_ast(PC.statement()); case "unquote": return new Unquote(cont()); + // case "splice": + // return new Unquote(cont(), true); default: throw new Error("Unsupported macro character: " + tok.macro); } @@ -128,16 +134,21 @@ function read_macro_args(PC, OC) { PC.unexpected(); switch (a.type = PC.tokval()) { case "block": + a.reader = function() { + return [ "block", [ PC.tokprocess(PC.block_) ] ]; + }; + break; + case "statement": a.reader = function() { return [ "block", [ PC.tokprocess(PC.statement) ] ]; }; break; case "name": a.reader = function() { - if (!PC.is("name")) PC.unexpected(); - return new Symbol(OC.prog1(function() { - return PC.tokprocess(PC.tokval); - }, PC.next)); + return new Symbol(prog1(curry(PC.tokprocess, function(){ + if (!PC.is("name")) PC.unexpected(); + return PC.tokval(); + }), PC.next)); }; break; default: @@ -145,7 +156,7 @@ function read_macro_args(PC, OC) { } PC.next(); } else { - a.reader = OC.curry(PC.expression, false); + a.reader = curry(PC.expression, false); } args.push(a); } diff --git a/lib/parse-js.js b/lib/parse-js.js index af9c6a64..6726da26 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -415,7 +415,8 @@ function tokenizer($TEXT, skip_comments) { handle_dot : handle_dot, read_word : read_word, with_eof_error : with_eof_error, - next_token : next_token + next_token : next_token, + is_token : is_token }; next_token.context = function(nc) { From a0101f822d6d3fb39dcf06d101229c392135c522 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Mon, 20 Sep 2010 15:14:53 +0200 Subject: [PATCH 20/24] allow a literal regexp to start a statement --- lib/parse-js.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/parse-js.js b/lib/parse-js.js index 6726da26..147cf5ca 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -376,7 +376,9 @@ function tokenizer($TEXT, skip_comments) { } }; - function next_token() { + function next_token(force_regexp) { + if (force_regexp) + return read_regexp(); skip_whitespace(); var ch = peek(); if (CUSTOM_TOKEN_READER[ch]) @@ -523,6 +525,10 @@ function parse($TEXT, strict_mode) { }; function statement() { + if (is("operator", "/")) { + S.peeked = null; + S.token = S.input(true); // force regexp + } switch (S.token.type) { case "num": case "string": From c7b83e1be7628eac31ac4b289ee3cbc7d4122f30 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Mon, 20 Sep 2010 19:20:07 +0200 Subject: [PATCH 21/24] looks like comma-expressions should be explicitly forbidden in expressions of the "conditional" (?) operator. --- lib/parse-js.js | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/lib/parse-js.js b/lib/parse-js.js index 147cf5ca..f9117fc1 100644 --- a/lib/parse-js.js +++ b/lib/parse-js.js @@ -952,15 +952,13 @@ function parse($TEXT, strict_mode) { return expr_op(expr_atom(true), 0); }; - function maybe_conditional(commas) { - if (arguments.length == 0) - commas = true; + function maybe_conditional() { var expr = expr_ops(); if (is("operator", "?")) { next(); - var yes = expression(); + var yes = expression(false); expect(":"); - return as("conditional", expr, yes, expression(commas)); + return as("conditional", expr, yes, expression(false)); } return expr; }; @@ -972,14 +970,12 @@ function parse($TEXT, strict_mode) { // return expr == "name" || expr == "dot" || expr == "sub"; }; - function maybe_assign(commas) { - if (arguments.length == 0) - commas = true; - var left = maybe_conditional(commas), val = S.token.value; + function maybe_assign() { + var left = maybe_conditional(), val = S.token.value; if (is("operator") && HOP(ASSIGNMENT, val)) { if (is_assignable(left)) { next(); - return as("assign", ASSIGNMENT[val], left, maybe_assign(commas)); + return as("assign", ASSIGNMENT[val], left, maybe_assign()); } croak("Invalid assignment"); } @@ -989,7 +985,7 @@ function parse($TEXT, strict_mode) { function expression(commas) { if (arguments.length == 0) commas = true; - var expr = maybe_assign(commas); + var expr = maybe_assign(); if (commas && is("punc", ",")) { next(); return as("seq", expr, expression()); From 2035accb1c2de7e1d84316e09a1c1057afce8502 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Tue, 21 Sep 2010 09:41:03 +0200 Subject: [PATCH 22/24] a more elaborate macro args reader --- lib/macro-js.js | 118 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 80 insertions(+), 38 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index b5a88cf4..26646952 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -119,48 +119,90 @@ function createParser() { return parser; }; -function read_macro_args(PC, OC) { +function read_defmacro_args(PC, OC) { var args = []; PC.expect("("); - var first = true; - while (!PC.is("punc", ")")) { - if (first) first = false; else PC.expect(","); - if (!PC.is("name")) PC.unexpected(); - var a = { name: PC.tokval() }; - PC.next(); - if (PC.is("punc", ":")) { - PC.next(); - if (!(PC.is("name") || PC.is("keyword"))) - PC.unexpected(); - switch (a.type = PC.tokval()) { - case "block": - a.reader = function() { - return [ "block", [ PC.tokprocess(PC.block_) ] ]; - }; - break; - case "statement": - a.reader = function() { - return [ "block", [ PC.tokprocess(PC.statement) ] ]; - }; - break; - case "name": - a.reader = function() { - return new Symbol(prog1(curry(PC.tokprocess, function(){ - if (!PC.is("name")) PC.unexpected(); - return PC.tokval(); - }), PC.next)); + get_list(); + function get_list(nested) { + var first = true; + var optional = false; + while (!PC.is("punc", ")")) { + if (!first) PC.expect(","); + if (PC.is("punc", "(")) { + PC.next(); + get_list(true); + } else { + if (!PC.is("name")) PC.unexpected(); + var a = { + name : PC.tokval(), + optional : optional }; - break; - default: - PC.unexpected(); + if (first && nested) { + a.nested = true; + } + PC.next(); + if (PC.is("punc", ":")) { + PC.next(); + if (!(PC.is("name") || PC.is("keyword"))) + PC.unexpected(); + switch (a.type = PC.tokval()) { + case "block": + a.reader = function() { + return [ "block", [ PC.tokprocess(PC.block_) ] ]; + }; + break; + case "statement": + a.reader = function() { + return [ "block", [ PC.tokprocess(PC.statement) ] ]; + }; + break; + case "name": + a.reader = function() { + return new Symbol(prog1(curry(PC.tokprocess, function(){ + if (!PC.is("name")) PC.unexpected(); + return PC.tokval(); + }), PC.next)); + }; + break; + default: + PC.unexpected(); + } + PC.next(); + } else { + a.reader = curry(PC.expression, false); + } + if (PC.is("operator", "*")) { + if (!nested) + throw new Error("&rest can appear only in nested argument list"); + a.rest = true; + a.reader = (function(orig){ + return function() { + var a = [], first = true; + PC.expect("("); + while (!PC.is("punc", ")")) { + if (first) first = false; else PC.expect(","); + a.push(orig()); + } + PC.next(); + return a; + }; + })(a.reader); + PC.next(); + } + else if (PC.is("operator", "?")) { + optional = a.optional = true; + a.reader = (function(orig){ + return function() { + }; + })(a.reader); + PC.next(); + } + args.push(a); } - PC.next(); - } else { - a.reader = curry(PC.expression, false); + first = false; } - args.push(a); - } - PC.next(); // skip closing paren + PC.next(); // skip closing paren + }; return args; }; @@ -173,7 +215,7 @@ function read_defmacro(PC, OC, make_kw) { OC.self.define_keyword(name); PC.next(); //*** read arguments list - var args = read_macro_args(PC, OC); + var args = read_defmacro_args(PC, OC); //*** read body; set in_function so that "return" is allowed. PC.S.in_function++; var body = PC.block_(); From dae8ed2950d156c1c2130a1f8dbd38f9dbeacaa6 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Fri, 24 Sep 2010 14:39:29 +0200 Subject: [PATCH 23/24] minor --- lib/macro-js.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/macro-js.js b/lib/macro-js.js index 26646952..1d8de9fa 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -227,15 +227,15 @@ function compile_macro(name, args, body, parser, statement_only) { if (HOP(parser.macros, name)) { throw new Error("Redefinition of macro '" + name + "'"); } - var ast = [ "toplevel", [[ + var ast = [ "defun", name, args.map(function(a) { return a.name }), body - ]]]; - var code = pro.gen_code(ast, { indent_start: 3, plainsyms: true }); + ]; + var code = pro.gen_code(ast, { plainsyms: true }); var func; - try { func = new Function("return (" + code + ");").call(parser); } catch(ex) { + try { func = new Function("return " + code).call(parser); } catch(ex) { sys.puts("Error compiling macro '" + name + "'"); sys.puts(code); sys.puts(ex.toString()); @@ -257,7 +257,7 @@ function compile_macro(name, args, body, parser, statement_only) { return [ "macro-expand", name, a ]; }); } - return [ "comment2", "*** // Macro '" + name + "' compiled as:\n" + code + "\n ***" ]; + return [ "comment2", "*** // Macro '" + name + "' compiled as:\n" + code + " ***" ]; }; function macro_expand(parser, ast) { From 7137daaff65129d5759356fc5ba9ef4ac68fd0dd Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Sat, 25 Sep 2010 10:15:21 +0200 Subject: [PATCH 24/24] some support for splicing argument list, to define a macro that defines a function --- lib/macro-js.js | 55 +++++++++++++++----- lib/process.js | 5 +- test/macro.js | 21 ++++++++ test/macro/defun.js | 26 ++++++++++ test/macro/literal-obj.js | 17 +++++++ test/macro/test.js | 103 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 213 insertions(+), 14 deletions(-) create mode 100755 test/macro.js create mode 100644 test/macro/defun.js create mode 100644 test/macro/literal-obj.js create mode 100644 test/macro/test.js diff --git a/lib/macro-js.js b/lib/macro-js.js index 1d8de9fa..ae80797c 100644 --- a/lib/macro-js.js +++ b/lib/macro-js.js @@ -6,7 +6,7 @@ var HOP = $C.HOP; var prog1 = $C.prog1; var curry = $C.curry; -function Unquote(ast) { this.ast = ast; }; +function Unquote(ast, splice) { this.ast = ast; this.splice = splice; }; function Symbol(sym) { this.sym = sym; }; @@ -20,7 +20,11 @@ function quote_ast(ast) { return [ "name", "undefined" ]; } else if (ast instanceof Unquote) { - return ast.ast; + if (ast.splice) { + return [ "array", [ [ "string", "splice" ], ast.ast ] ]; + } else { + return ast.ast; + } } else if (ast instanceof Symbol) { return ast; @@ -54,6 +58,9 @@ function createParser() { parser.macro_expand = function(ast) { return macro_expand(parser, ast); }; + parser.splice_list = function(list) { + return [ "splice", list ]; + }; parser.quote = quote_ast; parser.macros = {}; parser.define_token_reader("`", function(TC, OC) { @@ -106,8 +113,8 @@ function createParser() { return quote_ast(PC.statement()); case "unquote": return new Unquote(cont()); - // case "splice": - // return new Unquote(cont(), true); + case "splice": + return new Unquote(cont(), true); default: throw new Error("Unsupported macro character: " + tok.macro); } @@ -122,24 +129,22 @@ function createParser() { function read_defmacro_args(PC, OC) { var args = []; PC.expect("("); - get_list(); - function get_list(nested) { + get_list(0); + function get_list(level) { var first = true; var optional = false; while (!PC.is("punc", ")")) { if (!first) PC.expect(","); if (PC.is("punc", "(")) { PC.next(); - get_list(true); + get_list(level + 1); } else { if (!PC.is("name")) PC.unexpected(); var a = { name : PC.tokval(), optional : optional }; - if (first && nested) { - a.nested = true; - } + a.level = level; PC.next(); if (PC.is("punc", ":")) { PC.next(); @@ -172,8 +177,8 @@ function read_defmacro_args(PC, OC) { a.reader = curry(PC.expression, false); } if (PC.is("operator", "*")) { - if (!nested) - throw new Error("&rest can appear only in nested argument list"); + if (level == 0) + throw new Error("'*' can appear only in nested argument list"); a.rest = true; a.reader = (function(orig){ return function() { @@ -265,10 +270,16 @@ function macro_expand(parser, ast) { return normalize_ast(w.with_walkers({ "macro-expand": function(macro, args) { var func = parser.macros[macro].func; - var ret = func.apply(parser, args.map(w.walk)); + args = args.map(w.walk); + var ret = func.apply(parser, args); ret = replace_symbols(ret); ret = w.walk(ret); return ret; + }, + "-other": function() { + return this; + // if (this[0] instanceof Symbol) + // return this; } }, function() { return w.walk(ast); @@ -302,6 +313,8 @@ function replace_symbols(ast) { case "function": case "defun": ast[1] = normalize_symbol(ast[1]); + if (ast[2][0] instanceof Array) ast[2] = ast[2][0]; + if (ast[2][0] == "splice") ast[2] = ast[2][1]; ast[2] = ast[2].map(function(name){ return normalize_symbol(name) }); ast[3] = ast[3].map(replace_symbols); return ast; @@ -355,6 +368,22 @@ function normalize_ast(ast) { case "with": return expr; } + }, + "splice": function(a) { + if (w.parent()[0] == "stat" && a[0] == "block") { + if (a[1].length > 0 && a[1][0] instanceof Array) { + a[1] = a[1][0]; + } + return a[0]; + } else { + for (var i = 0; i < a.length; ++i) + this[i] = a[i]; + this.length = a.length; + return this; + } + }, + "-other": function() { + return this; } }, function() { return w.walk(ast); diff --git a/lib/process.js b/lib/process.js index 133a87d3..0164e081 100644 --- a/lib/process.js +++ b/lib/process.js @@ -216,8 +216,8 @@ function ast_walker(ast) { if (!(ast instanceof Array)) return ast; } - stack.push(ast); var type = ast[0]; + stack.push(ast); var gen = user[type]; if (gen) { var ret = gen.apply(ast, ast.slice(1)); @@ -225,6 +225,9 @@ function ast_walker(ast) { return ret; } gen = walkers[type]; + if (!gen) { + gen = walkers["-other"] || user["-other"]; + } if (!gen) { throw new Error("No generator for " + ast); } diff --git a/test/macro.js b/test/macro.js new file mode 100755 index 00000000..6a132036 --- /dev/null +++ b/test/macro.js @@ -0,0 +1,21 @@ +#! /usr/bin/env node + +global.sys = require("sys"); +global.INSPECT = INSPECT; +var fs = require("fs"); + +var macrojs = require("../lib/macro-js"); +var pro = require("../lib/process"); +var p = macrojs.createParser(); + +fs.readFile(process.argv[2], function(err, data){ + data = data.toString(); + var ast = p.parse(data); + INSPECT(ast); + // sys.puts(pro.gen_code(pro.ast_squeeze(ast), true)); + sys.puts(pro.gen_code(ast, true)); +}); + +function INSPECT(obj) { + sys.puts(sys.inspect(obj, null, null)); +}; diff --git a/test/macro/defun.js b/test/macro/defun.js new file mode 100644 index 00000000..5814678d --- /dev/null +++ b/test/macro/defun.js @@ -0,0 +1,26 @@ +// -*- espresso -*- + +defmacro ensure_ordered(a:name, b:name) { + var tmp = this.gensym(); + return @if (\a > \b) { + var \tmp = \a; + \a = \b; + \b = \tmp; + }; +} + +defstat defun_region(name:name, (args:name*), b:block) { + var p1 = args[0], p2 = args[1]; + return @function \name (\p1, \p2) { + ensure_ordered(p1, p2); + \@b + }; +} + +// ensure_ordered(foo, bar); + +defun_region foo(start, stop) { + for (var i = start; i <= stop; ++i) { + print(getChar(i)); + } +} diff --git a/test/macro/literal-obj.js b/test/macro/literal-obj.js new file mode 100644 index 00000000..9d9505b4 --- /dev/null +++ b/test/macro/literal-obj.js @@ -0,0 +1,17 @@ +// -*- espresso -*- + +defmacro mkobj(p1, v1, p2, v2, p3, v3) { + var opt = this.gensym(); + var bar = [ "name", "while" ]; + return `{ + \p1: \v1, + \p2: \v2, + \p3: \(this.quote(v3)), + \opt: "And some more", + \bar: "even more" + }; +}; + +mkobj(foo, [ 1, 2, 3 ], + bar, "some string here", + baz, { a: 1, b: 2 }); diff --git a/test/macro/test.js b/test/macro/test.js new file mode 100644 index 00000000..ee8a7f72 --- /dev/null +++ b/test/macro/test.js @@ -0,0 +1,103 @@ +// -*- espresso -*- + +// defmacro test(a:name, b, c:statement) { +// return [ "block", [ [ "binary", "+", [ "name", a ], b ] ].concat(c) ]; +// } + +// test(foo, { parc: "mak" }, { +// var bar = 10; +// check(this.out()); +// }); + +// defmacro order(a:name, b:name) { +// var tmp = this.gensym(); +// return @{ +// if (\a > \b) { +// var \tmp = \a; +// \a = \b; +// \b = \tmp; +// } +// crap(); +// }; +// }; + +defmacro order(a:name, b:name) { + var tmp = this.gensym(); + return `if (\a > \b) { + var \tmp = \a; + \a = \b; + \b = \tmp; + }; +}; + +// defmacro with_orderd(a:name, b:name, c:statement) { +// return @{ +// order(\a, \b); +// \c; +// }; +// }; + +defmacro with_orderd(a:name, b:name, c:statement) { + var tmp = this.gensym(); + return `(function(\a, \b){ + if (\a > \b) { + var \tmp = \a; + \a = \b; + \b = \tmp; + } + \c; + })(\a, \b); +}; + +with_orderd(crap, mak, { + print("Smallest is " + crap); + print("And " + mak + " follows"); + order(mak, crap); + print("Reverse order: " + mak + ", " + crap); +}); + +with_orderd( + foo, bar, + print("order: " + foo + ", " + bar) +); + +// defmacro order(a:name, b:name) { +// var tmp = this.gensym(); +// return `(function(\tmp){ +// \a = \b; +// \b = \tmp; +// })(\a); +// }; + +var foo = 10; +var bar = 20; +order(foo, bar); + +// defmacro qwe (a) { +// var tmp = this.symbol("crapmak"); +// return @{ +// var \tmp = \a; +// ++\tmp; +// }; +// } + +// var a = 5; +// qwe(a); + + +defstat unless(cond, b:statement) { + sys.log("********************************************"); + INSPECT(cond); + return @if (!\cond) \b; +} + +unless (foo + bar < 10) { + crap(); + mak(); +} + +(function(){ + unless (foo + bar < 10) unless (bar) return @if (baz) { + crap(); + } +})();