acorn.js | |
---|---|
Acorn is a tiny, fast JavaScript parser written in JavaScript. Acorn was written by Marijn Haverbeke and released under an MIT license. The Unicode regexps (for identifiers and whitespace) were taken from Esprima by Ariya Hidayat. Git repositories for Acorn are available at
Please use the github bug tracker to report issues. This file defines the main parser interface. The library also comes with a error-tolerant parser and an abstract syntax tree walker, defined in other files. | (function(root, mod) {
if (typeof exports == "object" && typeof module == "object") return mod(exports); // CommonJS
if (typeof define == "function" && define.amd) return define(["exports"], mod); // AMD
mod(root.acorn || (root.acorn = {})); // Plain browser env
})(this, function(exports) {
"use strict";
exports.version = "0.6.0"; |
The main exported interface (under | var options, input, inputLen, sourceFile;
exports.parse = function(inpt, opts) {
input = String(inpt); inputLen = input.length;
setOptions(opts);
initTokenState();
return parseTopLevel(options.program);
}; |
A second optional argument can be given to further configure the parser process. These options are recognized: | var defaultOptions = exports.defaultOptions = { |
| ecmaVersion: 5, |
Turn on | strictSemicolons: false, |
When | allowTrailingCommas: true, |
By default, reserved words are not enforced. Enable
| forbidReserved: false, |
When enabled, a return at the top level is not considered an error. | allowReturnOutsideFunction: false, |
When | locations: false, |
A function can be passed as | onComment: null, |
Nodes have their start and end characters offsets recorded in
| ranges: false, |
It is possible to parse multiple files into a single AST by
passing the tree produced by parsing the first file as
| program: null, |
When | sourceFile: null, |
This value, if given, is stored in every node, whether
| directSourceFile: null
};
function setOptions(opts) {
options = opts || {};
for (var opt in defaultOptions) if (!Object.prototype.hasOwnProperty.call(options, opt))
options[opt] = defaultOptions[opt];
sourceFile = options.sourceFile || null;
isKeyword = options.ecmaVersion >= 6 ? isEcma6Keyword : isEcma5AndLessKeyword;
} |
The | var getLineInfo = exports.getLineInfo = function(input, offset) {
for (var line = 1, cur = 0;;) {
lineBreak.lastIndex = cur;
var match = lineBreak.exec(input);
if (match && match.index < offset) {
++line;
cur = match.index + match[0].length;
} else break;
}
return {line: line, column: offset - cur};
}; |
Acorn is organized as a tokenizer and a recursive-descent parser.
The | exports.tokenize = function(inpt, opts) {
input = String(inpt); inputLen = input.length;
setOptions(opts);
initTokenState();
var t = {};
function getToken(forceRegexp) {
lastEnd = tokEnd;
readToken(forceRegexp);
t.start = tokStart; t.end = tokEnd;
t.startLoc = tokStartLoc; t.endLoc = tokEndLoc;
t.type = tokType; t.value = tokVal;
return t;
}
getToken.jumpTo = function(pos, reAllowed) {
tokPos = pos;
if (options.locations) {
tokCurLine = 1;
tokLineStart = lineBreak.lastIndex = 0;
var match;
while ((match = lineBreak.exec(input)) && match.index < pos) {
++tokCurLine;
tokLineStart = match.index + match[0].length;
}
}
tokRegexpAllowed = reAllowed;
skipSpace();
};
return getToken;
}; |
State is kept in (closure-)global variables. We already saw the
| |
The current position of the tokenizer in the input. | var tokPos; |
The start and end offsets of the current token. | var tokStart, tokEnd; |
When | var tokStartLoc, tokEndLoc; |
The type and value of the current token. Token types are objects,
named by variables against which they can be compared, and
holding properties that describe them (indicating, for example,
the precedence of an infix operator, and the original name of a
keyword token). The kind of value that's held in | var tokType, tokVal; |
Interal state for the tokenizer. To distinguish between division
operators and regular expressions, it remembers whether the last
token was one that is allowed to be followed by an expression.
(If it is, a slash is probably a regexp, if it isn't it's a
division operator. See the | var tokRegexpAllowed; |
When | var tokCurLine, tokLineStart; |
These store the position of the previous token, which is useful
when finishing a node and assigning its | var lastStart, lastEnd, lastEndLoc; |
This is the parser's state. | var inFunction, labels, strict; |
This function is used to raise exceptions on parse errors. It
takes an offset integer (into the current | function raise(pos, message) {
var loc = getLineInfo(input, pos);
message += " (" + loc.line + ":" + loc.column + ")";
var err = new SyntaxError(message);
err.pos = pos; err.loc = loc; err.raisedAt = tokPos;
throw err;
} |
Reused empty array added for node fields that are always empty. | var empty = []; |
Token types | |
The assignment of fine-grained, information-carrying type objects allows the tokenizer to store the information it has about a token in a way that is very cheap for the parser to look up. | |
All token type variables start with an underscore, to make them easy to recognize. | |
These are the general types. The | var _num = {type: "num"}, _regexp = {type: "regexp"}, _string = {type: "string"};
var _name = {type: "name"}, _eof = {type: "eof"}; |
Keyword tokens. The The
| var _break = {keyword: "break"}, _case = {keyword: "case", beforeExpr: true}, _catch = {keyword: "catch"};
var _continue = {keyword: "continue"}, _debugger = {keyword: "debugger"}, _default = {keyword: "default"};
var _do = {keyword: "do", isLoop: true}, _else = {keyword: "else", beforeExpr: true};
var _finally = {keyword: "finally"}, _for = {keyword: "for", isLoop: true}, _function = {keyword: "function"};
var _if = {keyword: "if"}, _return = {keyword: "return", beforeExpr: true}, _switch = {keyword: "switch"};
var _throw = {keyword: "throw", beforeExpr: true}, _try = {keyword: "try"}, _var = {keyword: "var"};
var _let = {keyword: "let"}, _const = {keyword: "const"};
var _while = {keyword: "while", isLoop: true}, _with = {keyword: "with"}, _new = {keyword: "new", beforeExpr: true};
var _this = {keyword: "this"}; |
The keywords that denote values. | var _null = {keyword: "null", atomValue: null}, _true = {keyword: "true", atomValue: true};
var _false = {keyword: "false", atomValue: false}; |
Some keywords are treated as regular operators. | var _in = {keyword: "in", binop: 7, beforeExpr: true}; |
Map keyword names to token types. | var keywordTypes = {"break": _break, "case": _case, "catch": _catch,
"continue": _continue, "debugger": _debugger, "default": _default,
"do": _do, "else": _else, "finally": _finally, "for": _for,
"function": _function, "if": _if, "return": _return, "switch": _switch,
"throw": _throw, "try": _try, "var": _var, "let": _let, "const": _const,
"while": _while, "with": _with,
"null": _null, "true": _true, "false": _false, "new": _new, "in": _in,
"instanceof": {keyword: "instanceof", binop: 7, beforeExpr: true}, "this": _this,
"typeof": {keyword: "typeof", prefix: true, beforeExpr: true},
"void": {keyword: "void", prefix: true, beforeExpr: true},
"delete": {keyword: "delete", prefix: true, beforeExpr: true}}; |
Punctuation token types. Again, the | var _bracketL = {type: "[", beforeExpr: true}, _bracketR = {type: "]"}, _braceL = {type: "{", beforeExpr: true};
var _braceR = {type: "}"}, _parenL = {type: "(", beforeExpr: true}, _parenR = {type: ")"};
var _comma = {type: ",", beforeExpr: true}, _semi = {type: ";", beforeExpr: true};
var _colon = {type: ":", beforeExpr: true}, _dot = {type: "."}, _ellipsis = {type: "..."}, _question = {type: "?", beforeExpr: true}; |
Operators. These carry several kinds of properties to help the parser use them properly (the presence of these properties is what categorizes them as operators).
| var _slash = {binop: 10, beforeExpr: true}, _eq = {isAssign: true, beforeExpr: true};
var _assign = {isAssign: true, beforeExpr: true};
var _incDec = {postfix: true, prefix: true, isUpdate: true}, _prefix = {prefix: true, beforeExpr: true};
var _logicalOR = {binop: 1, beforeExpr: true};
var _logicalAND = {binop: 2, beforeExpr: true};
var _bitwiseOR = {binop: 3, beforeExpr: true};
var _bitwiseXOR = {binop: 4, beforeExpr: true};
var _bitwiseAND = {binop: 5, beforeExpr: true};
var _equality = {binop: 6, beforeExpr: true};
var _relational = {binop: 7, beforeExpr: true};
var _bitShift = {binop: 8, beforeExpr: true};
var _plusMin = {binop: 9, prefix: true, beforeExpr: true};
var _multiplyModulo = {binop: 10, beforeExpr: true}; |
Provide access to the token types for external users of the tokenizer. | exports.tokTypes = {bracketL: _bracketL, bracketR: _bracketR, braceL: _braceL, braceR: _braceR,
parenL: _parenL, parenR: _parenR, comma: _comma, semi: _semi, colon: _colon,
dot: _dot, ellipsis: _ellipsis, question: _question, slash: _slash, eq: _eq,
name: _name, eof: _eof, num: _num, regexp: _regexp, string: _string};
for (var kw in keywordTypes) exports.tokTypes["_" + kw] = keywordTypes[kw]; |
This is a trick taken from Esprima. It turns out that, on
non-Chrome browsers, to check whether a string is in a set, a
predicate containing a big ugly It starts by sorting the words by length. | function makePredicate(words) {
words = words.split(" ");
var f = "", cats = [];
out: for (var i = 0; i < words.length; ++i) {
for (var j = 0; j < cats.length; ++j)
if (cats[j][0].length == words[i].length) {
cats[j].push(words[i]);
continue out;
}
cats.push([words[i]]);
}
function compareTo(arr) {
if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
f += "switch(str){";
for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
f += "return true}return false;";
} |
When there are more than three length categories, an outer switch first dispatches on the lengths, to save on comparisons. | if (cats.length > 3) {
cats.sort(function(a, b) {return b.length - a.length;});
f += "switch(str.length){";
for (var i = 0; i < cats.length; ++i) {
var cat = cats[i];
f += "case " + cat[0].length + ":";
compareTo(cat);
}
f += "}"; |
Otherwise, simply generate a flat | } else {
compareTo(words);
}
return new Function("str", f);
} |
The ECMAScript 3 reserved word list. | var isReservedWord3 = makePredicate("abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile"); |
ECMAScript 5 reserved words. | var isReservedWord5 = makePredicate("class enum extends super const export import"); |
The additional reserved words in strict mode. | var isStrictReservedWord = makePredicate("implements interface let package private protected public static yield"); |
The forbidden variable names in strict mode. | var isStrictBadIdWord = makePredicate("eval arguments"); |
And the keywords. | var ecma5AndLessKeywords = "break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in this";
var isEcma5AndLessKeyword = makePredicate(ecma5AndLessKeywords);
var isEcma6Keyword = makePredicate(ecma5AndLessKeywords + " let const");
var isKeyword = isEcma5AndLessKeyword; |
Character categories | |
Big ugly regular expressions that match characters in the whitespace, identifier, and identifier-start categories. These are only applied when a character is found to actually have a code point above 128. | var nonASCIIwhitespace = /[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]/;
var nonASCIIidentifierStartChars = "\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc";
var nonASCIIidentifierChars = "\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f";
var nonASCIIidentifierStart = new RegExp("[" + nonASCIIidentifierStartChars + "]");
var nonASCIIidentifier = new RegExp("[" + nonASCIIidentifierStartChars + nonASCIIidentifierChars + "]"); |
Whether a single character denotes a newline. | var newline = /[\n\r\u2028\u2029]/; |
Matches a whole line break (where CRLF is considered a single line break). Used to count lines. | var lineBreak = /\r\n|[\n\r\u2028\u2029]/g; |
Test whether a given character code starts an identifier. | var isIdentifierStart = exports.isIdentifierStart = function(code) {
if (code < 65) return code === 36;
if (code < 91) return true;
if (code < 97) return code === 95;
if (code < 123)return true;
return code >= 0xaa && nonASCIIidentifierStart.test(String.fromCharCode(code));
}; |
Test whether a given character is part of an identifier. | var isIdentifierChar = exports.isIdentifierChar = function(code) {
if (code < 48) return code === 36;
if (code < 58) return true;
if (code < 65) return false;
if (code < 91) return true;
if (code < 97) return code === 95;
if (code < 123)return true;
return code >= 0xaa && nonASCIIidentifier.test(String.fromCharCode(code));
}; |
Tokenizer | |
These are used when | function Position() {
this.line = tokCurLine;
this.column = tokPos - tokLineStart;
} |
Reset the token state. Used at the start of a parse. | function initTokenState() {
tokCurLine = 1;
tokPos = tokLineStart = 0;
tokRegexpAllowed = true;
skipSpace();
} |
Called at the end of every token. Sets | function finishToken(type, val) {
tokEnd = tokPos;
if (options.locations) tokEndLoc = new Position;
tokType = type;
skipSpace();
tokVal = val;
tokRegexpAllowed = type.beforeExpr;
}
function skipBlockComment() {
var startLoc = options.onComment && options.locations && new Position;
var start = tokPos, end = input.indexOf("*/", tokPos += 2);
if (end === -1) raise(tokPos - 2, "Unterminated comment");
tokPos = end + 2;
if (options.locations) {
lineBreak.lastIndex = start;
var match;
while ((match = lineBreak.exec(input)) && match.index < tokPos) {
++tokCurLine;
tokLineStart = match.index + match[0].length;
}
}
if (options.onComment)
options.onComment(true, input.slice(start + 2, end), start, tokPos,
startLoc, options.locations && new Position);
}
function skipLineComment() {
var start = tokPos;
var startLoc = options.onComment && options.locations && new Position;
var ch = input.charCodeAt(tokPos+=2);
while (tokPos < inputLen && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) {
++tokPos;
ch = input.charCodeAt(tokPos);
}
if (options.onComment)
options.onComment(false, input.slice(start + 2, tokPos), start, tokPos,
startLoc, options.locations && new Position);
} |
Called at the start of the parse and after every token. Skips whitespace and comments, and. | function skipSpace() {
while (tokPos < inputLen) {
var ch = input.charCodeAt(tokPos);
if (ch === 32) { // ' '
++tokPos;
} else if (ch === 13) {
++tokPos;
var next = input.charCodeAt(tokPos);
if (next === 10) {
++tokPos;
}
if (options.locations) {
++tokCurLine;
tokLineStart = tokPos;
}
} else if (ch === 10 || ch === 8232 || ch === 8233) {
++tokPos;
if (options.locations) {
++tokCurLine;
tokLineStart = tokPos;
}
} else if (ch > 8 && ch < 14) {
++tokPos;
} else if (ch === 47) { // '/'
var next = input.charCodeAt(tokPos + 1);
if (next === 42) { // '*'
skipBlockComment();
} else if (next === 47) { // '/'
skipLineComment();
} else break;
} else if (ch === 160) { // '\xa0'
++tokPos;
} else if (ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) {
++tokPos;
} else {
break;
}
}
} |
Token reading | |
This is the function that is called to fetch the next token. It is somewhat obscure, because it works in character codes rather than characters, and because operator parsing has been inlined into it. All in the name of speed. The | function readToken_dot() {
var next = input.charCodeAt(tokPos + 1);
if (next >= 48 && next <= 57) return readNumber(true);
var next2 = input.charCodeAt(tokPos + 2);
if (options.ecmaVersion >= 6 && next === 46 && next2 === 46) { // 46 = dot '.'
tokPos += 3;
return finishToken(_ellipsis);
} else {
++tokPos;
return finishToken(_dot);
}
}
function readToken_slash() { // '/'
var next = input.charCodeAt(tokPos + 1);
if (tokRegexpAllowed) {++tokPos; return readRegexp();}
if (next === 61) return finishOp(_assign, 2);
return finishOp(_slash, 1);
}
function readToken_mult_modulo() { // '%*'
var next = input.charCodeAt(tokPos + 1);
if (next === 61) return finishOp(_assign, 2);
return finishOp(_multiplyModulo, 1);
}
function readToken_pipe_amp(code) { // '|&'
var next = input.charCodeAt(tokPos + 1);
if (next === code) return finishOp(code === 124 ? _logicalOR : _logicalAND, 2);
if (next === 61) return finishOp(_assign, 2);
return finishOp(code === 124 ? _bitwiseOR : _bitwiseAND, 1);
}
function readToken_caret() { // '^'
var next = input.charCodeAt(tokPos + 1);
if (next === 61) return finishOp(_assign, 2);
return finishOp(_bitwiseXOR, 1);
}
function readToken_plus_min(code) { // '+-'
var next = input.charCodeAt(tokPos + 1);
if (next === code) {
if (next == 45 && input.charCodeAt(tokPos + 2) == 62 &&
newline.test(input.slice(lastEnd, tokPos))) { |
A | tokPos += 3;
skipLineComment();
skipSpace();
return readToken();
}
return finishOp(_incDec, 2);
}
if (next === 61) return finishOp(_assign, 2);
return finishOp(_plusMin, 1);
}
function readToken_lt_gt(code) { // '<>'
var next = input.charCodeAt(tokPos + 1);
var size = 1;
if (next === code) {
size = code === 62 && input.charCodeAt(tokPos + 2) === 62 ? 3 : 2;
if (input.charCodeAt(tokPos + size) === 61) return finishOp(_assign, size + 1);
return finishOp(_bitShift, size);
}
if (next == 33 && code == 60 && input.charCodeAt(tokPos + 2) == 45 &&
input.charCodeAt(tokPos + 3) == 45) { |
| tokPos += 4;
skipLineComment();
skipSpace();
return readToken();
}
if (next === 61)
size = input.charCodeAt(tokPos + 2) === 61 ? 3 : 2;
return finishOp(_relational, size);
}
function readToken_eq_excl(code) { // '=!'
var next = input.charCodeAt(tokPos + 1);
if (next === 61) return finishOp(_equality, input.charCodeAt(tokPos + 2) === 61 ? 3 : 2);
return finishOp(code === 61 ? _eq : _prefix, 1);
}
function getTokenFromCode(code) {
switch(code) { |
The interpretation of a dot depends on whether it is followed by a digit or another two dots. | case 46: // '.'
return readToken_dot(); |
Punctuation tokens. | case 40: ++tokPos; return finishToken(_parenL);
case 41: ++tokPos; return finishToken(_parenR);
case 59: ++tokPos; return finishToken(_semi);
case 44: ++tokPos; return finishToken(_comma);
case 91: ++tokPos; return finishToken(_bracketL);
case 93: ++tokPos; return finishToken(_bracketR);
case 123: ++tokPos; return finishToken(_braceL);
case 125: ++tokPos; return finishToken(_braceR);
case 58: ++tokPos; return finishToken(_colon);
case 63: ++tokPos; return finishToken(_question); |
'0x' is a hexadecimal number. | case 48: // '0'
var next = input.charCodeAt(tokPos + 1);
if (next === 120 || next === 88) return readHexNumber(); |
Anything else beginning with a digit is an integer, octal number, or float. | case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 1-9
return readNumber(false); |
Quotes produce strings. | case 34: case 39: // '"', "'"
return readString(code); |
Operators are parsed inline in tiny state machines. '=' (61) is
often referred to. | case 47: // '/'
return readToken_slash();
case 37: case 42: // '%*'
return readToken_mult_modulo();
case 124: case 38: // '|&'
return readToken_pipe_amp(code);
case 94: // '^'
return readToken_caret();
case 43: case 45: // '+-'
return readToken_plus_min(code);
case 60: case 62: // '<>'
return readToken_lt_gt(code);
case 61: case 33: // '=!'
return readToken_eq_excl(code);
case 126: // '~'
return finishOp(_prefix, 1);
}
return false;
}
function readToken(forceRegexp) {
if (!forceRegexp) tokStart = tokPos;
else tokPos = tokStart + 1;
if (options.locations) tokStartLoc = new Position;
if (forceRegexp) return readRegexp();
if (tokPos >= inputLen) return finishToken(_eof);
var code = input.charCodeAt(tokPos); |
Identifier or keyword. '\uXXXX' sequences are allowed in identifiers, so '\' also dispatches to that. | if (isIdentifierStart(code) || code === 92 /* '\' */) return readWord();
var tok = getTokenFromCode(code);
if (tok === false) { |
If we are here, we either found a non-ASCII identifier character, or something that's entirely disallowed. | var ch = String.fromCharCode(code);
if (ch === "\\" || nonASCIIidentifierStart.test(ch)) return readWord();
raise(tokPos, "Unexpected character '" + ch + "'");
}
return tok;
}
function finishOp(type, size) {
var str = input.slice(tokPos, tokPos + size);
tokPos += size;
finishToken(type, str);
} |
Parse a regular expression. Some context-awareness is necessary, since a '/' inside a '[]' set does not end the expression. | function readRegexp() {
var content = "", escaped, inClass, start = tokPos;
for (;;) {
if (tokPos >= inputLen) raise(start, "Unterminated regular expression");
var ch = input.charAt(tokPos);
if (newline.test(ch)) raise(start, "Unterminated regular expression");
if (!escaped) {
if (ch === "[") inClass = true;
else if (ch === "]" && inClass) inClass = false;
else if (ch === "/" && !inClass) break;
escaped = ch === "\\";
} else escaped = false;
++tokPos;
}
var content = input.slice(start, tokPos);
++tokPos; |
Need to use | var mods = readWord1();
if (mods && !/^[gmsiy]*$/.test(mods)) raise(start, "Invalid regular expression flag");
try {
var value = new RegExp(content, mods);
} catch (e) {
if (e instanceof SyntaxError) raise(start, "Error parsing regular expression: " + e.message);
raise(e);
}
return finishToken(_regexp, value);
} |
Read an integer in the given radix. Return null if zero digits
were read, the integer value otherwise. When | function readInt(radix, len) {
var start = tokPos, total = 0;
for (var i = 0, e = len == null ? Infinity : len; i < e; ++i) {
var code = input.charCodeAt(tokPos), val;
if (code >= 97) val = code - 97 + 10; // a
else if (code >= 65) val = code - 65 + 10; // A
else if (code >= 48 && code <= 57) val = code - 48; // 0-9
else val = Infinity;
if (val >= radix) break;
++tokPos;
total = total * radix + val;
}
if (tokPos === start || len != null && tokPos - start !== len) return null;
return total;
}
function readHexNumber() {
tokPos += 2; // 0x
var val = readInt(16);
if (val == null) raise(tokStart + 2, "Expected hexadecimal number");
if (isIdentifierStart(input.charCodeAt(tokPos))) raise(tokPos, "Identifier directly after number");
return finishToken(_num, val);
} |
Read an integer, octal integer, or floating-point number. | function readNumber(startsWithDot) {
var start = tokPos, isFloat = false, octal = input.charCodeAt(tokPos) === 48;
if (!startsWithDot && readInt(10) === null) raise(start, "Invalid number");
if (input.charCodeAt(tokPos) === 46) {
++tokPos;
readInt(10);
isFloat = true;
}
var next = input.charCodeAt(tokPos);
if (next === 69 || next === 101) { // 'eE'
next = input.charCodeAt(++tokPos);
if (next === 43 || next === 45) ++tokPos; // '+-'
if (readInt(10) === null) raise(start, "Invalid number");
isFloat = true;
}
if (isIdentifierStart(input.charCodeAt(tokPos))) raise(tokPos, "Identifier directly after number");
var str = input.slice(start, tokPos), val;
if (isFloat) val = parseFloat(str);
else if (!octal || str.length === 1) val = parseInt(str, 10);
else if (/[89]/.test(str) || strict) raise(start, "Invalid number");
else val = parseInt(str, 8);
return finishToken(_num, val);
} |
Read a string value, interpreting backslash-escapes. | function readString(quote) {
tokPos++;
var out = "";
for (;;) {
if (tokPos >= inputLen) raise(tokStart, "Unterminated string constant");
var ch = input.charCodeAt(tokPos);
if (ch === quote) {
++tokPos;
return finishToken(_string, out);
}
if (ch === 92) { // '\'
ch = input.charCodeAt(++tokPos);
var octal = /^[0-7]+/.exec(input.slice(tokPos, tokPos + 3));
if (octal) octal = octal[0];
while (octal && parseInt(octal, 8) > 255) octal = octal.slice(0, -1);
if (octal === "0") octal = null;
++tokPos;
if (octal) {
if (strict) raise(tokPos - 2, "Octal literal in strict mode");
out += String.fromCharCode(parseInt(octal, 8));
tokPos += octal.length - 1;
} else {
switch (ch) {
case 110: out += "\n"; break; // 'n' -> '\n'
case 114: out += "\r"; break; // 'r' -> '\r'
case 120: out += String.fromCharCode(readHexChar(2)); break; // 'x'
case 117: out += String.fromCharCode(readHexChar(4)); break; // 'u'
case 85: out += String.fromCharCode(readHexChar(8)); break; // 'U'
case 116: out += "\t"; break; // 't' -> '\t'
case 98: out += "\b"; break; // 'b' -> '\b'
case 118: out += "\u000b"; break; // 'v' -> '\u000b'
case 102: out += "\f"; break; // 'f' -> '\f'
case 48: out += "\0"; break; // 0 -> '\0'
case 13: if (input.charCodeAt(tokPos) === 10) ++tokPos; // '\r\n'
case 10: // ' \n'
if (options.locations) { tokLineStart = tokPos; ++tokCurLine; }
break;
default: out += String.fromCharCode(ch); break;
}
}
} else {
if (ch === 13 || ch === 10 || ch === 8232 || ch === 8233) raise(tokStart, "Unterminated string constant");
out += String.fromCharCode(ch); // '\'
++tokPos;
}
}
} |
Used to read character escape sequences ('\x', '\u', '\U'). | function readHexChar(len) {
var n = readInt(16, len);
if (n === null) raise(tokStart, "Bad character escape sequence");
return n;
} |
Used to signal to callers of | var containsEsc; |
Read an identifier, and return it as a string. Sets Only builds up the word character-by-character when it actually containeds an escape, as a micro-optimization. | function readWord1() {
containsEsc = false;
var word, first = true, start = tokPos;
for (;;) {
var ch = input.charCodeAt(tokPos);
if (isIdentifierChar(ch)) {
if (containsEsc) word += input.charAt(tokPos);
++tokPos;
} else if (ch === 92) { // "\"
if (!containsEsc) word = input.slice(start, tokPos);
containsEsc = true;
if (input.charCodeAt(++tokPos) != 117) // "u"
raise(tokPos, "Expecting Unicode escape sequence \\uXXXX");
++tokPos;
var esc = readHexChar(4);
var escStr = String.fromCharCode(esc);
if (!escStr) raise(tokPos - 1, "Invalid Unicode escape");
if (!(first ? isIdentifierStart(esc) : isIdentifierChar(esc)))
raise(tokPos - 4, "Invalid Unicode escape");
word += escStr;
} else {
break;
}
first = false;
}
return containsEsc ? word : input.slice(start, tokPos);
} |
Read an identifier or keyword token. Will check for reserved words when necessary. | function readWord() {
var word = readWord1();
var type = _name;
if (!containsEsc && isKeyword(word))
type = keywordTypes[word];
return finishToken(type, word);
} |
Parser | |
A recursive descent parser operates by defining functions for all
syntactic elements, and recursively calling those, each function
advancing the input stream and returning an AST node. Precedence
of constructs (for example, the fact that Acorn uses an operator precedence parser to handle binary operator precedence, because it is much more compact than using the technique outlined above, which uses different, nesting functions to specify precedence, for all of the ten binary precedence levels that JavaScript defines. | |
Parser utilities | |
Continue to the next token. | function next() {
lastStart = tokStart;
lastEnd = tokEnd;
lastEndLoc = tokEndLoc;
readToken();
} |
Enter strict mode. Re-reads the next token to please pedantic tests ("use strict"; 010; -- should fail). | function setStrict(strct) {
strict = strct;
tokPos = tokStart;
if (options.locations) {
while (tokPos < tokLineStart) {
tokLineStart = input.lastIndexOf("\n", tokLineStart - 2) + 1;
--tokCurLine;
}
}
skipSpace();
readToken();
} |
Start an AST node, attaching a start offset. | function Node() {
this.type = null;
this.start = tokStart;
this.end = null;
}
exports.Node = Node;
function SourceLocation() {
this.start = tokStartLoc;
this.end = null;
if (sourceFile !== null) this.source = sourceFile;
}
function startNode() {
var node = new Node();
if (options.locations)
node.loc = new SourceLocation();
if (options.directSourceFile)
node.sourceFile = options.directSourceFile;
if (options.ranges)
node.range = [tokStart, 0];
return node;
} |
Start a node whose start offset information should be based on the start of another node. For example, a binary operator node is only started after its left-hand side has already been parsed. | function startNodeFrom(other) {
var node = new Node();
node.start = other.start;
if (options.locations) {
node.loc = new SourceLocation();
node.loc.start = other.loc.start;
}
if (options.ranges)
node.range = [other.range[0], 0];
return node;
} |
Finish an AST node, adding | function finishNode(node, type) {
node.type = type;
node.end = lastEnd;
if (options.locations)
node.loc.end = lastEndLoc;
if (options.ranges)
node.range[1] = lastEnd;
return node;
} |
Test whether a statement node is the string literal | function isUseStrict(stmt) {
return options.ecmaVersion >= 5 && stmt.type === "ExpressionStatement" &&
stmt.expression.type === "Literal" && stmt.expression.value === "use strict";
} |
Predicate that tests whether the next token is of the given type, and if yes, consumes it as a side effect. | function eat(type) {
if (tokType === type) {
next();
return true;
}
} |
Test whether a semicolon can be inserted at the current position. | function canInsertSemicolon() {
return !options.strictSemicolons &&
(tokType === _eof || tokType === _braceR || newline.test(input.slice(lastEnd, tokStart)));
} |
Consume a semicolon, or, failing that, see if we are allowed to pretend that there is a semicolon at this position. | function semicolon() {
if (!eat(_semi) && !canInsertSemicolon()) unexpected();
} |
Expect a token of a given type. If found, consume it, otherwise, raise an unexpected token error. | function expect(type) {
if (tokType === type) next();
else unexpected();
} |
Raise an unexpected token error. | function unexpected() {
raise(tokStart, "Unexpected token");
} |
Verify that a node is an lval — something that can be assigned to. | function checkLVal(expr) {
if (expr.type !== "Identifier" && expr.type !== "MemberExpression")
raise(expr.start, "Assigning to rvalue");
if (strict && expr.type === "Identifier" && isStrictBadIdWord(expr.name))
raise(expr.start, "Assigning to " + expr.name + " in strict mode");
} |
Statement parsing | |
Parse a program. Initializes the parser, reads any number of
statements, and wraps them in a Program node. Optionally takes a
| function parseTopLevel(program) {
lastStart = lastEnd = tokPos;
if (options.locations) lastEndLoc = new Position;
inFunction = strict = null;
labels = [];
readToken();
var node = program || startNode(), first = true;
if (!program) node.body = [];
while (tokType !== _eof) {
var stmt = parseStatement();
node.body.push(stmt);
if (first && isUseStrict(stmt)) setStrict(true);
first = false;
}
return finishNode(node, "Program");
}
var loopLabel = {kind: "loop"}, switchLabel = {kind: "switch"}; |
Parse a single statement. If expecting a statement and finding a slash operator, parse a
regular expression literal. This is to handle cases like
| function parseStatement() {
if (tokType === _slash || tokType === _assign && tokVal == "/=")
readToken(true);
var starttype = tokType, node = startNode(); |
Most types of statements are recognized by the keyword they start with. Many are trivial to parse, some require a bit of complexity. | switch (starttype) {
case _break: case _continue: return parseBreakContinueStatement(node, starttype.keyword);
case _debugger: return parseDebuggerStatement(node);
case _do: return parseDoStatement(node);
case _for: return parseForStatement(node);
case _function: return parseFunctionStatement(node);
case _if: return parseIfStatement(node);
case _return: return parseReturnStatement(node);
case _switch: return parseSwitchStatement(node);
case _throw: return parseThrowStatement(node);
case _try: return parseTryStatement(node);
case _var: case _let: case _const: return parseVarStatement(node, starttype.keyword);
case _while: return parseWhileStatement(node);
case _with: return parseWithStatement(node);
case _braceL: return parseBlock(); // no point creating a function for this
case _semi: return parseEmptyStatement(node); |
If the statement does not start with a statement keyword or a brace, it's an ExpressionStatement or LabeledStatement. We simply start parsing an expression, and afterwards, if the next token is a colon and the expression was a simple Identifier node, we switch to interpreting it as a label. | default:
var maybeName = tokVal, expr = parseExpression();
if (starttype === _name && expr.type === "Identifier" && eat(_colon))
return parseLabeledStatement(node, maybeName, expr);
else return parseExpressionStatement(node, expr);
}
}
function parseBreakContinueStatement(node, keyword) {
var isBreak = keyword == "break";
next();
if (eat(_semi) || canInsertSemicolon()) node.label = null;
else if (tokType !== _name) unexpected();
else {
node.label = parseIdent();
semicolon();
} |
Verify that there is an actual destination to break or continue to. | for (var i = 0; i < labels.length; ++i) {
var lab = labels[i];
if (node.label == null || lab.name === node.label.name) {
if (lab.kind != null && (isBreak || lab.kind === "loop")) break;
if (node.label && isBreak) break;
}
}
if (i === labels.length) raise(node.start, "Unsyntactic " + keyword);
return finishNode(node, isBreak ? "BreakStatement" : "ContinueStatement");
}
function parseDebuggerStatement(node) {
next();
semicolon();
return finishNode(node, "DebuggerStatement");
}
function parseDoStatement(node) {
next();
labels.push(loopLabel);
node.body = parseStatement();
labels.pop();
expect(_while);
node.test = parseParenExpression();
semicolon();
return finishNode(node, "DoWhileStatement");
}
|
Disambiguating between a |
function parseForStatement(node) {
next();
labels.push(loopLabel);
expect(_parenL);
if (tokType === _semi) return parseFor(node, null);
if (tokType === _var || tokType === _let) {
var init = startNode(), varKind = tokType.keyword;
next();
parseVar(init, true, varKind);
finishNode(init, "VariableDeclaration");
if (init.declarations.length === 1 && eat(_in))
return parseForIn(node, init);
return parseFor(node, init);
}
var init = parseExpression(false, true);
if (eat(_in)) {checkLVal(init); return parseForIn(node, init);}
return parseFor(node, init);
}
function parseFunctionStatement(node) {
next();
return parseFunction(node, true);
}
function parseIfStatement(node) {
next();
node.test = parseParenExpression();
node.consequent = parseStatement();
node.alternate = eat(_else) ? parseStatement() : null;
return finishNode(node, "IfStatement");
}
function parseReturnStatement(node) {
if (!inFunction && !options.allowReturnOutsideFunction)
raise(tokStart, "'return' outside of function");
next(); |
In | if (eat(_semi) || canInsertSemicolon()) node.argument = null;
else { node.argument = parseExpression(); semicolon(); }
return finishNode(node, "ReturnStatement");
}
function parseSwitchStatement(node) {
next();
node.discriminant = parseParenExpression();
node.cases = [];
expect(_braceL);
labels.push(switchLabel); |
Statements under must be grouped (by label) in SwitchCase
nodes. | for (var cur, sawDefault; tokType != _braceR;) {
if (tokType === _case || tokType === _default) {
var isCase = tokType === _case;
if (cur) finishNode(cur, "SwitchCase");
node.cases.push(cur = startNode());
cur.consequent = [];
next();
if (isCase) cur.test = parseExpression();
else {
if (sawDefault) raise(lastStart, "Multiple default clauses"); sawDefault = true;
cur.test = null;
}
expect(_colon);
} else {
if (!cur) unexpected();
cur.consequent.push(parseStatement());
}
}
if (cur) finishNode(cur, "SwitchCase");
next(); // Closing brace
labels.pop();
return finishNode(node, "SwitchStatement");
}
function parseThrowStatement(node) {
next();
if (newline.test(input.slice(lastEnd, tokStart)))
raise(lastEnd, "Illegal newline after throw");
node.argument = parseExpression();
semicolon();
return finishNode(node, "ThrowStatement");next();
if (newline.test(input.slice(lastEnd, tokStart)))
raise(lastEnd, "Illegal newline after throw");
node.argument = parseExpression();
semicolon();
return finishNode(node, "ThrowStatement");
}
function parseTryStatement(node) {
next();
node.block = parseBlock();
node.handler = null;
if (tokType === _catch) {
var clause = startNode();
next();
expect(_parenL);
clause.param = parseIdent();
if (strict && isStrictBadIdWord(clause.param.name))
raise(clause.param.start, "Binding " + clause.param.name + " in strict mode");
expect(_parenR);
clause.guard = null;
clause.body = parseBlock();
node.handler = finishNode(clause, "CatchClause");
}
node.guardedHandlers = empty;
node.finalizer = eat(_finally) ? parseBlock() : null;
if (!node.handler && !node.finalizer)
raise(node.start, "Missing catch or finally clause");
return finishNode(node, "TryStatement");
}
function parseVarStatement(node, kind) {
next();
parseVar(node, false, kind);
semicolon();
return finishNode(node, "VariableDeclaration");
}
function parseWhileStatement(node) {
next();
node.test = parseParenExpression();
labels.push(loopLabel);
node.body = parseStatement();
labels.pop();
return finishNode(node, "WhileStatement");
}
function parseWithStatement(node) {
if (strict) raise(tokStart, "'with' in strict mode");
next();
node.object = parseParenExpression();
node.body = parseStatement();
return finishNode(node, "WithStatement");
}
function parseEmptyStatement(node) {
next();
return finishNode(node, "EmptyStatement");
}
function parseLabeledStatement(node, maybeName, expr) {
for (var i = 0; i < labels.length; ++i)
if (labels[i].name === maybeName) raise(expr.start, "Label '" + maybeName + "' is already declared");
var kind = tokType.isLoop ? "loop" : tokType === _switch ? "switch" : null;
labels.push({name: maybeName, kind: kind});
node.body = parseStatement();
labels.pop();
node.label = expr;
return finishNode(node, "LabeledStatement");
}
function parseExpressionStatement(node, expr) {
node.expression = expr;
semicolon();
return finishNode(node, "ExpressionStatement");
} |
Used for constructs like | function parseParenExpression() {
expect(_parenL);
var val = parseExpression();
expect(_parenR);
return val;
} |
Parse a semicolon-enclosed block of statements, handling | function parseBlock(allowStrict) {
var node = startNode(), first = true, strict = false, oldStrict;
node.body = [];
expect(_braceL);
while (!eat(_braceR)) {
var stmt = parseStatement();
node.body.push(stmt);
if (first && allowStrict && isUseStrict(stmt)) {
oldStrict = strict;
setStrict(strict = true);
}
first = false;
}
if (strict && !oldStrict) setStrict(false);
return finishNode(node, "BlockStatement");
} |
Parse a regular | function parseFor(node, init) {
node.init = init;
expect(_semi);
node.test = tokType === _semi ? null : parseExpression();
expect(_semi);
node.update = tokType === _parenR ? null : parseExpression();
expect(_parenR);
node.body = parseStatement();
labels.pop();
return finishNode(node, "ForStatement");
} |
Parse a | function parseForIn(node, init) {
node.left = init;
node.right = parseExpression();
expect(_parenR);
node.body = parseStatement();
labels.pop();
return finishNode(node, "ForInStatement");
} |
Parse a list of variable declarations. | function parseVar(node, noIn, kind) {
node.declarations = [];
node.kind = kind;
for (;;) {
var decl = startNode();
decl.id = parseIdent();
if (strict && isStrictBadIdWord(decl.id.name))
raise(decl.id.start, "Binding " + decl.id.name + " in strict mode");
decl.init = eat(_eq) ? parseExpression(true, noIn) : (kind === _const.keyword ? unexpected() : null);
node.declarations.push(finishNode(decl, "VariableDeclarator"));
if (!eat(_comma)) break;
}
return node;
} |
Expression parsing | |
These nest, from the most general expression type at the top to 'atomic', nondivisible expression types at the bottom. Most of the functions will simply let the function(s) below them parse, and, if the syntactic construct they handle is present, wrap the AST node that the inner parser gave them in another node. | |
Parse a full expression. The arguments are used to forbid comma
sequences (in argument lists, array literals, or object literals)
or the | function parseExpression(noComma, noIn) {
var expr = parseMaybeAssign(noIn);
if (!noComma && tokType === _comma) {
var node = startNodeFrom(expr);
node.expressions = [expr];
while (eat(_comma)) node.expressions.push(parseMaybeAssign(noIn));
return finishNode(node, "SequenceExpression");
}
return expr;
} |
Parse an assignment expression. This includes applications of
operators like | function parseMaybeAssign(noIn) {
var left = parseMaybeConditional(noIn);
if (tokType.isAssign) {
var node = startNodeFrom(left);
node.operator = tokVal;
node.left = left;
next();
node.right = parseMaybeAssign(noIn);
checkLVal(left);
return finishNode(node, "AssignmentExpression");
}
return left;
} |
Parse a ternary conditional ( | function parseMaybeConditional(noIn) {
var expr = parseExprOps(noIn);
if (eat(_question)) {
var node = startNodeFrom(expr);
node.test = expr;
node.consequent = parseExpression(true);
expect(_colon);
node.alternate = parseExpression(true, noIn);
return finishNode(node, "ConditionalExpression");
}
return expr;
} |
Start the precedence parser. | function parseExprOps(noIn) {
return parseExprOp(parseMaybeUnary(), -1, noIn);
} |
Parse binary operators with the operator precedence parsing
algorithm. | function parseExprOp(left, minPrec, noIn) {
var prec = tokType.binop;
if (prec != null && (!noIn || tokType !== _in)) {
if (prec > minPrec) {
var node = startNodeFrom(left);
node.left = left;
node.operator = tokVal;
var op = tokType;
next();
node.right = parseExprOp(parseMaybeUnary(), prec, noIn);
var exprNode = finishNode(node, (op === _logicalOR || op === _logicalAND) ? "LogicalExpression" : "BinaryExpression");
return parseExprOp(exprNode, minPrec, noIn);
}
}
return left;
} |
Parse unary operators, both prefix and postfix. | function parseMaybeUnary() {
if (tokType.prefix) {
var node = startNode(), update = tokType.isUpdate;
node.operator = tokVal;
node.prefix = true;
tokRegexpAllowed = true;
next();
node.argument = parseMaybeUnary();
if (update) checkLVal(node.argument);
else if (strict && node.operator === "delete" &&
node.argument.type === "Identifier")
raise(node.start, "Deleting local variable in strict mode");
return finishNode(node, update ? "UpdateExpression" : "UnaryExpression");
}
var expr = parseExprSubscripts();
while (tokType.postfix && !canInsertSemicolon()) {
var node = startNodeFrom(expr);
node.operator = tokVal;
node.prefix = false;
node.argument = expr;
checkLVal(expr);
next();
expr = finishNode(node, "UpdateExpression");
}
return expr;
} |
Parse call, dot, and | function parseExprSubscripts() {
return parseSubscripts(parseExprAtom());
}
function parseSubscripts(base, noCalls) {
if (eat(_dot)) {
var node = startNodeFrom(base);
node.object = base;
node.property = parseIdent(true);
node.computed = false;
return parseSubscripts(finishNode(node, "MemberExpression"), noCalls);
} else if (eat(_bracketL)) {
var node = startNodeFrom(base);
node.object = base;
node.property = parseExpression();
node.computed = true;
expect(_bracketR);
return parseSubscripts(finishNode(node, "MemberExpression"), noCalls);
} else if (!noCalls && eat(_parenL)) {
var node = startNodeFrom(base);
node.callee = base;
node.arguments = parseExprList(_parenR, false);
return parseSubscripts(finishNode(node, "CallExpression"), noCalls);
} else return base;
} |
Parse an atomic expression — either a single token that is an
expression, an expression started by a keyword like | function parseExprAtom() {
switch (tokType) {
case _this:
var node = startNode();
next();
return finishNode(node, "ThisExpression");
case _name:
return parseIdent();
case _num: case _string: case _regexp:
var node = startNode();
node.value = tokVal;
node.raw = input.slice(tokStart, tokEnd);
next();
return finishNode(node, "Literal");
case _null: case _true: case _false:
var node = startNode();
node.value = tokType.atomValue;
node.raw = tokType.keyword;
next();
return finishNode(node, "Literal");
case _parenL:
var tokStartLoc1 = tokStartLoc, tokStart1 = tokStart;
next();
var val = parseExpression();
val.start = tokStart1;
val.end = tokEnd;
if (options.locations) {
val.loc.start = tokStartLoc1;
val.loc.end = tokEndLoc;
}
if (options.ranges)
val.range = [tokStart1, tokEnd];
expect(_parenR);
return val;
case _bracketL:
var node = startNode();
next();
node.elements = parseExprList(_bracketR, true, true);
return finishNode(node, "ArrayExpression");
case _braceL:
return parseObj();
case _function:
var node = startNode();
next();
return parseFunction(node, false);
case _new:
return parseNew();
default:
unexpected();
}
} |
New's precedence is slightly tricky. It must allow its argument
to be a | function parseNew() {
var node = startNode();
next();
node.callee = parseSubscripts(parseExprAtom(), true);
if (eat(_parenL)) node.arguments = parseExprList(_parenR, false);
else node.arguments = empty;
return finishNode(node, "NewExpression");
} |
Parse an object literal. | function parseObj() {
var node = startNode(), first = true, sawGetSet = false;
node.properties = [];
next();
while (!eat(_braceR)) {
if (!first) {
expect(_comma);
if (options.allowTrailingCommas && eat(_braceR)) break;
} else first = false;
var prop = {key: parsePropertyName()}, isGetSet = false, kind;
if (eat(_colon)) {
prop.value = parseExpression(true);
kind = prop.kind = "init";
} else if (options.ecmaVersion >= 5 && prop.key.type === "Identifier" &&
(prop.key.name === "get" || prop.key.name === "set")) {
isGetSet = sawGetSet = true;
kind = prop.kind = prop.key.name;
prop.key = parsePropertyName();
if (tokType !== _parenL) unexpected();
prop.value = parseFunction(startNode(), false);
} else unexpected(); |
getters and setters are not allowed to clash — either with each other or with an init property — and in strict mode, init properties are also not allowed to be repeated. | if (prop.key.type === "Identifier" && (strict || sawGetSet)) {
for (var i = 0; i < node.properties.length; ++i) {
var other = node.properties[i];
if (other.key.name === prop.key.name) {
var conflict = kind == other.kind || isGetSet && other.kind === "init" ||
kind === "init" && (other.kind === "get" || other.kind === "set");
if (conflict && !strict && kind === "init" && other.kind === "init") conflict = false;
if (conflict) raise(prop.key.start, "Redefinition of property");
}
}
}
node.properties.push(prop);
}
return finishNode(node, "ObjectExpression");
}
function parsePropertyName() {
if (tokType === _num || tokType === _string) return parseExprAtom();
return parseIdent(true);
} |
Parse a function declaration or literal (depending on the
| function parseFunction(node, isStatement) {
if (tokType === _name) node.id = parseIdent();
else if (isStatement) unexpected();
else node.id = null;
node.params = [];
node.rest = null;
expect(_parenL);
for (;;) {
if (eat(_parenR)) {
break;
} else if (options.ecmaVersion >= 6 && eat(_ellipsis)) {
node.rest = parseIdent();
expect(_parenR);
break;
} else {
node.params.push(parseIdent());
if (!eat(_comma)) {
expect(_parenR);
break;
}
}
} |
Start a new scope with regard to labels and the | var oldInFunc = inFunction, oldLabels = labels;
inFunction = true; labels = [];
node.body = parseBlock(true);
inFunction = oldInFunc; labels = oldLabels; |
If this is a strict mode function, verify that argument names
are not repeated, and it does not try to bind the words | if (strict || node.body.body.length && isUseStrict(node.body.body[0])) { |
Negative indices are used to reuse loop body for node.rest and node.id | for (var i = -2, id; i < node.params.length; ++i) {
if (i >= 0) {
id = node.params[i];
} else if (i == -2) {
if (node.rest) id = node.rest;
else continue;
} else {
if (node.id) id = node.id;
else continue;
}
if (isStrictReservedWord(id.name) || isStrictBadIdWord(id.name))
raise(id.start, "Defining '" + id.name + "' in strict mode");
if (i >= 0) for (var j = 0; j < i; ++j) if (id.name === node.params[j].name)
raise(id.start, "Argument name clash in strict mode");
}
}
return finishNode(node, isStatement ? "FunctionDeclaration" : "FunctionExpression");
} |
Parses a comma-separated list of expressions, and returns them as
an array. | function parseExprList(close, allowTrailingComma, allowEmpty) {
var elts = [], first = true;
while (!eat(close)) {
if (!first) {
expect(_comma);
if (allowTrailingComma && options.allowTrailingCommas && eat(close)) break;
} else first = false;
if (allowEmpty && tokType === _comma) elts.push(null);
else elts.push(parseExpression(true));
}
return elts;
} |
Parse the next token as an identifier. If | function parseIdent(liberal) {
var node = startNode();
if (liberal && options.forbidReserved == "everywhere") liberal = false;
if (tokType === _name) {
if (!liberal &&
(options.forbidReserved &&
(options.ecmaVersion === 3 ? isReservedWord3 : isReservedWord5)(tokVal) ||
strict && isStrictReservedWord(tokVal)) &&
input.slice(tokStart, tokEnd).indexOf("\\") == -1)
raise(tokStart, "The keyword '" + tokVal + "' is reserved");
node.name = tokVal;
} else if (liberal && tokType.keyword) {
node.name = tokType.keyword;
} else {
unexpected();
}
tokRegexpAllowed = false;
next();
return finishNode(node, "Identifier");
}
});
|