"""
**********************************************************************

  A RapydScript to JavaScript compiler.
  https://github.com/atsepkov/RapydScript

  -------------------------------- (C) ---------------------------------

                       Author: Alexander Tsepkov
                         <atsepkov@pyjeon.com>
                         http://www.pyjeon.com

  Distributed under BSD license:
    Copyright 2013 (c) Alexander Tsepkov <atsepkov@pyjeon.com>

**********************************************************************
"""

from utils import makePredicate, ParseError
import ast


def characters(str_): return str_.split("")

# -------- [ KEYWORDS ] ----------
# 'arguments'  - it seems that it is a special var and could be used as valid name

ES6_KEYWORDS = "async await yield"
COMMON_KEYWORDS = "break case class const continue debugger default do else " +
"finally for if import in new return switch " +
"try void while with " + ES6_KEYWORDS

# some keywords removed in es5/6
JS_KEYWORDS = "enum export extends implements " +
"interface let package private protected " +
"public static this" # super?

# with the `strict_names` compiler option turned off
# these keywords may be used as names, RS will fix them by adding the suffix `_ϟ`
JS_KEYWORDS_AUTOFIX = "var function instanceof typeof catch delete throw false null true"

RS_KEYWORDS = "as def del elif except " +
"from is nonlocal pass raise til to or and not"

KEYWORDS = [RS_KEYWORDS, COMMON_KEYWORDS].join(" ")
KEYWORDS_ATOM = "False None True"

RESERVED_WORDS = [KEYWORDS_ATOM, KEYWORDS, JS_KEYWORDS].join(" ")

KEYWORDS_BEFORE_EXPRESSION = "return new del raise elif else if"

ALL_KEYWORDS = [RESERVED_WORDS, JS_KEYWORDS_AUTOFIX].join(" ")
ALL_JS_KEYWORDS =  [JS_KEYWORDS, JS_KEYWORDS_AUTOFIX, COMMON_KEYWORDS].join(" ")

KEYWORDS = makePredicate(KEYWORDS)
ES6_KEYWORDS = makePredicate(ES6_KEYWORDS)
RESERVED_WORDS = makePredicate(RESERVED_WORDS)
IS_ANY_KEYWORD = makePredicate(ALL_KEYWORDS)
KEYWORDS_BEFORE_EXPRESSION = makePredicate(KEYWORDS_BEFORE_EXPRESSION)
KEYWORDS_ATOM = makePredicate(KEYWORDS_ATOM)
JS_KEYWORDS_AUTOFIX = makePredicate(JS_KEYWORDS_AUTOFIX)
ALL_JS_KEYWORDS= makePredicate(ALL_JS_KEYWORDS)

OPERATOR_CHARS = makePredicate(characters("+-*&%=<>!?|~^@"))

RE_HEX_NUMBER = /^0x[0-9a-f]+$/i
RE_OCT_NUMBER = /^0[0-7]+$/
RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i

OPERATORS = makePredicate([
    "in",
    "new",
    "void",
    "del",
    "++",
    "--",
    "+",
    "-",
    "not",
    "~",
    "&",
    "|",
    "^",
    "**",
    "*",
    "/",
    "//",
    "%",
    ">>",
    "<<",
    ">>>",
    "<",
    ">",
    "<=",
    ">=",
    "==",
    "===",
    "is",
    "!=",
    "!==",
    "?",
    "=",
    "+=",
    "-=",
    "/=",
    "//=",
    "*=",
    "%=",
    ">>=",
    "<<=",
    ">>>=",
    "|=",
    "^=",
    "&=",
    "and",
    "or",
    "til",
    "to",
    "@",
    "->"
])

OP_MAP = {
    'or': "||",
    'and': "&&",
    'not': "!",
    'del': "delete",
    'None': "null",
    'is': "==="
}

WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));

PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"))

PUNC_CHARS = makePredicate(characters("[]{}(),;:"))

REGEXP_MODIFIERS = makePredicate(characters("gmsiy"))

# -----[ Tokenizer ]-----
# regexps adapted from http://xregexp.com/plugins/#unicode
UNICODE = {  # {{{
    letter: RegExp("[\\u0041-\\u005A\\u0061-\\u007A\\u00AA\\u00B5\\u00BA\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0374\\u0376\\u0377\\u037A-\\u037D\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u0523\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0621-\\u064A\\u066E\\u066F\\u0671-\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4\\u07F5\\u07FA\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971\\u0972\\u097B-\\u097F\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0\\u0AE1\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D\\u0C58\\u0C59\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0\\u0CE1\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D28\\u0D2A-\\u0D39\\u0D3D\\u0D60\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E46\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EDC\\u0EDD\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8B\\u1000-\\u102A\\u103F\\u1050-\\u1055\\u105A-\\u105D\\u1061\\u1065\\u1066\\u106E-\\u1070\\u1075-\\u1081\\u108E\\u10A0-\\u10C5\\u10D0-\\u10FA\\u10FC\\u1100-\\u1159\\u115F-\\u11A2\\u11A8-\\u11F9\\u1200-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u1676\\u1681-\\u169A\\u16A0-\\u16EA\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1780-\\u17B3\\u17D7\\u17DC\\u1820-\\u1877\\u1880-\\u18A8\\u18AA\\u1900-\\u191C\\u1950-\\u196D\\u1970-\\u1974\\u1980-\\u19A9\\u19C1-\\u19C7\\u1A00-\\u1A16\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE\\u1BAF\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C7D\\u1D00-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u2094\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2183\\u2184\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2C6F\\u2C71-\\u2C7D\\u2C80-\\u2CE4\\u2D00-\\u2D25\\u2D30-\\u2D65\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u3006\\u3031-\\u3035\\u303B\\u303C\\u3041-\\u3096\\u309D-\\u309F\\u30A1-\\u30FA\\u30FC-\\u30FF\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31B7\\u31F0-\\u31FF\\u3400\\u4DB5\\u4E00\\u9FC3\\uA000-\\uA48C\\uA500-\\uA60C\\uA610-\\uA61F\\uA62A\\uA62B\\uA640-\\uA65F\\uA662-\\uA66E\\uA67F-\\uA697\\uA717-\\uA71F\\uA722-\\uA788\\uA78B\\uA78C\\uA7FB-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA90A-\\uA925\\uA930-\\uA946\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAC00\\uD7A3\\uF900-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFF66-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC]"),
    non_spacing_mark: RegExp("[\\u0300-\\u036F\\u0483-\\u0487\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065E\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0900-\\u0902\\u093C\\u0941-\\u0948\\u094D\\u0951-\\u0955\\u0962\\u0963\\u0981\\u09BC\\u09C1-\\u09C4\\u09CD\\u09E2\\u09E3\\u0A01\\u0A02\\u0A3C\\u0A41\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70\\u0A71\\u0A75\\u0A81\\u0A82\\u0ABC\\u0AC1-\\u0AC5\\u0AC7\\u0AC8\\u0ACD\\u0AE2\\u0AE3\\u0B01\\u0B3C\\u0B3F\\u0B41-\\u0B44\\u0B4D\\u0B56\\u0B62\\u0B63\\u0B82\\u0BC0\\u0BCD\\u0C3E-\\u0C40\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C62\\u0C63\\u0CBC\\u0CBF\\u0CC6\\u0CCC\\u0CCD\\u0CE2\\u0CE3\\u0D41-\\u0D44\\u0D4D\\u0D62\\u0D63\\u0DCA\\u0DD2-\\u0DD4\\u0DD6\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F71-\\u0F7E\\u0F80-\\u0F84\\u0F86\\u0F87\\u0F90-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102D-\\u1030\\u1032-\\u1037\\u1039\\u103A\\u103D\\u103E\\u1058\\u1059\\u105E-\\u1060\\u1071-\\u1074\\u1082\\u1085\\u1086\\u108D\\u109D\\u135F\\u1712-\\u1714\\u1732-\\u1734\\u1752\\u1753\\u1772\\u1773\\u17B7-\\u17BD\\u17C6\\u17C9-\\u17D3\\u17DD\\u180B-\\u180D\\u18A9\\u1920-\\u1922\\u1927\\u1928\\u1932\\u1939-\\u193B\\u1A17\\u1A18\\u1A56\\u1A58-\\u1A5E\\u1A60\\u1A62\\u1A65-\\u1A6C\\u1A73-\\u1A7C\\u1A7F\\u1B00-\\u1B03\\u1B34\\u1B36-\\u1B3A\\u1B3C\\u1B42\\u1B6B-\\u1B73\\u1B80\\u1B81\\u1BA2-\\u1BA5\\u1BA8\\u1BA9\\u1C2C-\\u1C33\\u1C36\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1DFF\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3099\\u309A\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA802\\uA806\\uA80B\\uA825\\uA826\\uA8C4\\uA8E0-\\uA8F1\\uA926-\\uA92D\\uA947-\\uA951\\uA980-\\uA982\\uA9B3\\uA9B6-\\uA9B9\\uA9BC\\uAA29-\\uAA2E\\uAA31\\uAA32\\uAA35\\uAA36\\uAA43\\uAA4C\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABE5\\uABE8\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE26]"),
    space_combining_mark: RegExp("[\\u0903\\u093E-\\u0940\\u0949-\\u094C\\u094E\\u0982\\u0983\\u09BE-\\u09C0\\u09C7\\u09C8\\u09CB\\u09CC\\u09D7\\u0A03\\u0A3E-\\u0A40\\u0A83\\u0ABE-\\u0AC0\\u0AC9\\u0ACB\\u0ACC\\u0B02\\u0B03\\u0B3E\\u0B40\\u0B47\\u0B48\\u0B4B\\u0B4C\\u0B57\\u0BBE\\u0BBF\\u0BC1\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCC\\u0BD7\\u0C01-\\u0C03\\u0C41-\\u0C44\\u0C82\\u0C83\\u0CBE\\u0CC0-\\u0CC4\\u0CC7\\u0CC8\\u0CCA\\u0CCB\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D40\\u0D46-\\u0D48\\u0D4A-\\u0D4C\\u0D57\\u0D82\\u0D83\\u0DCF-\\u0DD1\\u0DD8-\\u0DDF\\u0DF2\\u0DF3\\u0F3E\\u0F3F\\u0F7F\\u102B\\u102C\\u1031\\u1038\\u103B\\u103C\\u1056\\u1057\\u1062-\\u1064\\u1067-\\u106D\\u1083\\u1084\\u1087-\\u108C\\u108F\\u109A-\\u109C\\u17B6\\u17BE-\\u17C5\\u17C7\\u17C8\\u1923-\\u1926\\u1929-\\u192B\\u1930\\u1931\\u1933-\\u1938\\u19B0-\\u19C0\\u19C8\\u19C9\\u1A19-\\u1A1B\\u1A55\\u1A57\\u1A61\\u1A63\\u1A64\\u1A6D-\\u1A72\\u1B04\\u1B35\\u1B3B\\u1B3D-\\u1B41\\u1B43\\u1B44\\u1B82\\u1BA1\\u1BA6\\u1BA7\\u1BAA\\u1C24-\\u1C2B\\u1C34\\u1C35\\u1CE1\\u1CF2\\uA823\\uA824\\uA827\\uA880\\uA881\\uA8B4-\\uA8C3\\uA952\\uA953\\uA983\\uA9B4\\uA9B5\\uA9BA\\uA9BB\\uA9BD-\\uA9C0\\uAA2F\\uAA30\\uAA33\\uAA34\\uAA4D\\uAA7B\\uABE3\\uABE4\\uABE6\\uABE7\\uABE9\\uABEA\\uABEC]"),
    connector_punctuation: RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
}

IDENTIFIER_PAT = /^[a-z_$][_a-z0-9$]*$/i
STRING_MODIFIERS = 'urfvURFV'
UNARY_POSTFIX = makePredicate([ "--", "++" ])

PRECEDENCE = (def(a, ret):
    for i in range(a.length):
        b = a[i]
        for j in range(b.length):
            ret[b[j]] = i+1
    return ret
)([
    # lowest precedence
    [ "||" ],
    [ "&&" ],
    [ "|" ],
    [ "^" ],
    [ "&" ],
    [ "==", "===", "!=", "!==" ],
    [ "<", ">", "<=", ">=", "in", "instanceof" ],
    [ ">>", "<<", ">>>" ],
    [ "+", "-" ],
    [ "*", "/", "//", "%" ],
    [ "**" ]
    # highest precedence
], {})

def is_letter(code):
    return code >= 97 and code <= 122 or code >= 65 and code <= 90 or code >= 170 and UNICODE.letter.test(String.fromCharCode(code))

def is_digit(code):
    return code >= 48 and code <= 57

def is_alphanumeric_char(code):
    return is_digit(code) or is_letter(code)

def is_unicode_combining_mark(ch):
    return UNICODE.non_spacing_mark.test(ch) or UNICODE.space_combining_mark.test(ch)

def is_unicode_connector_punctuation(ch):
    return UNICODE.connector_punctuation.test(ch)

def is_string_modifier(val):
    for ch in val:
        if ch in STRING_MODIFIERS:
            return True
    return False

def is_identifier(name):
    return not RESERVED_WORDS(name) and IDENTIFIER_PAT.test(name)

def is_identifier_start(code):
    return code is 36 or code is 95 or is_letter(code)

def is_identifier_char(ch):
    code = ch.charCodeAt(0)
    return is_identifier_start(code) or is_digit(code) or code is 8204 or code is 8205 or is_unicode_combining_mark(ch) or is_unicode_connector_punctuation(ch)

def parse_js_number(num):
    if RE_HEX_NUMBER.test(num):
        return parseInt(num.substr(2), 16)
    elif RE_OCT_NUMBER.test(num):
        return parseInt(num.substr(1), 8)
    elif RE_DEC_NUMBER.test(num):
        return parseFloat(num)

def is_token(token, type, val):
    type_subtype = type.split(':')
    type = type_subtype[0]
    subtype = type_subtype[1]
    return token.type is type and (subtype ? token.subtype is subtype : True  ) \
             and (val is None or val is undefined
                  or Array.isArray(val) and val.indexOf(token.value) >= 0
                  or token.value is val)


def js_error(message, filename, line, col, pos, is_eof):
    ast.Node.warn("ERROR: {message} [{file}:{line},{col}]", {
        message: message,
        file: filename,
        line: line,
        col: col
    })
    raise ParseError(message, line, col, pos, is_eof, filename)

EX_EOF = {}

def tokenizer($TEXT, filename):  # {{{
    S = {
        text: $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/\uFEFF/g, ""),
        filename: filename,
        pos: 0,
        tokpos: 0,
        line: 1,
        tokline: 0,
        col: 0,
        tokcol: 0,
        newline_before: False,
        regex_allowed: False,
        comments_before: [],
        whitespace_before: [],
        newblock: False,    # better rename to newblock_expect
        endblock: False,    # doesnt affect tokenizer logic, can be removed
        cached_whitespace: "",
        prev: undefined,
        in_scope: ["block"],   # "block" - means block indentation scope
                            # in_scope - tracks scopes of []{}() and block:
        comma_expect: False,
        block_expect: False
    }
    def peek():
        return S.text.charAt(S.pos)

    def prevChar():
        return S.text.charAt(S.tokpos - 1)

    def next(signal_eof, in_string):
        ch = S.text.charAt(S.pos)
        S.pos += 1
        if signal_eof and not ch:
            raise EX_EOF

        if ch is "\n":
            S.newline_before = S.newline_before or not in_string
            S.line += 1
            S.col = 0
        else:
            S.col += 1
        return ch

    def find(what, signal_eof):
        pos = S.text.indexOf(what, S.pos)
        if signal_eof and pos is -1:
            raise EX_EOF
        return pos

    def start_token():
        S.tokline = S.line
        S.tokcol = S.col
        S.tokpos = S.pos

    def token(full_type, value, is_comment, keep_newline):
        _full_type = full_type.split(':')
        type = _full_type[0]
        subtype = _full_type[1]
        _value = None

        S.regex_allowed = type is "operator" and not UNARY_POSTFIX[value] \
            or type is "keyword" and KEYWORDS_BEFORE_EXPRESSION(value) \
            or type is "punc" and PUNC_BEFORE_EXPRESSION(value)

        if type is "operator" and value is "is" and S.text.substr(S.pos).trimLeft().substr(0, 4).trimRight() is "not":
            next_token()
            value = "!=="

        if type is "operator" and OP_MAP[value]:
            _value = value
            value = OP_MAP[value]

        ret = {
            type: type,
            subtype: subtype,
            value: value,
            _value: _value,
            line: S.tokline,
            col: S.tokcol,
            pos: S.tokpos,
            endpos: S.pos,
            newline_before: S.newline_before,
            comma_expected: S.comma_expect,
            file: filename
        }
        S.comma_expect = False
        if not S.block_expect:
            S.block_expect = (
                S.in_scope[-1] is "{}"
                and (
                    S.prev.type is "keyword" and S.prev.value is "def"
                    and (
                        value is "("
                        or type in ["name", "string", "num"]
                        or full_type is 'operator:keyword'
                    )
                    or S.prev.type is "name" and S.prev.value in ["get", "set"]
                    and (
                        type in ["name", "string", "num"]
                        or full_type is 'operator:keyword'
                    )
                )
                or S.in_scope[-1] is "()" and S.prev.type is "keyword" and S.prev.value is "def"
                and (
                    value is "("
                    or type is "name"
                )
            )
        else:
            block_expect = True
            S.block_expect = False

        if not is_comment:
            ret.comments_before = S.comments_before
            S.comments_before = []
            # make note of any newlines in the comments that came before
            for i in range(len(ret.comments_before)):
                ret.newline_before = ret.newline_before or ret.comments_before[i].newline_before

        if not keep_newline:
            S.newline_before = False

        top_scope = S.in_scope[-1]
        if type is "punc":
            if value is ":":
                if (top_scope in ["block", "def", "[]"]):
                    if top_scope is "def":
                        S.in_scope.pop()
                    if (not S.text.substring(S.pos, find("\n")).trim() or not S.text.substring(S.pos, find("#")).trim()):
                        S.newblock = True
                        S.in_scope.push("block")

            elif value is "[":
                if S.prev and S.prev.type is "name":
                    S.in_scope.push("[:]") # prop, index, slice
                else:
                    S.in_scope.push("[]")
            elif value is "(":
                if block_expect or S.block_expect:
                    S.in_scope.push("def")
                    S.block_expect = False
                S.in_scope.push("()")
            elif value is "{" :
                S.in_scope.push("{}")
            # scope exits
            elif value in ["]", ")", "}"]:
                if top_scope[-1] is value:
                    S.in_scope.pop()
                else:
                    console.log('mismatch: ' + value)
            elif value is "endblock":
                ret.value = "}"
                if top_scope is "block":
                    S.in_scope.pop()
                    if S.in_scope[-1] in ["[]", "{}", "()"] :
                        S.comma_expect = True
                else:
                    console.log('missmatch: ' + value)
        S.prev = ast.Token(ret)
        return S.prev

    # this will transform leading whitespace to block tokens unless
    # part of array/hash, and skip non-leading whitespace
    def parse_whitespace():
        leading_whitespace = ""
        whitespace_exists = False
        while WHITESPACE_CHARS(peek()):
            whitespace_exists = True
            ch = next()
            if ch is "\n":
                leading_whitespace = ""
            else:
                leading_whitespace += ch
        if peek() is not "#":
            if not whitespace_exists:
                leading_whitespace = S.cached_whitespace
            else:
                S.cached_whitespace = leading_whitespace
            if S.newline_before: # or S.endblock  - S.newline_before is always True if S.endblock is True
                return test_indent_token(leading_whitespace)

    def test_indent_token(leading_whitespace):
        most_recent = S.whitespace_before[-1] or ""
        S.endblock = False
        if S.in_scope[-1] is 'block' and leading_whitespace is not most_recent:
            if S.newblock and leading_whitespace and leading_whitespace.indexOf(most_recent) is 0:
                # positive indent, new block
                S.newblock = False
                S.whitespace_before.push(leading_whitespace)
                return 1
            elif most_recent and most_recent.indexOf(leading_whitespace) is 0:
                # negative indent, block is ending
                S.endblock = True
                S.whitespace_before.pop()
                return -1
            else:
                # indent mismatch, inconsistent indentation
                TokenizerError("Inconsistent indentation")
        else:
            return 0

    def read_while(pred):
        ret = ""
        i = 0
        while (ch = peek()) and pred(ch, i):
            i += 1
            ret += next()
        return ret

    def TokenizerError(err, is_eof):
        js_error(err, filename, S.tokline, S.tokcol, S.tokpos, is_eof)

    def read_num(prefix):
        has_e = False
        after_e = False
        has_x = False
        has_dot = prefix is "."
        num = read_while(def(ch, i):
            nonlocal after_e
            code = ch.charCodeAt(0)
            tmp_ = code
            if tmp_ is 120 or tmp_ is 88:
                # xX
                return has_x ? False : has_x = True
            elif tmp_ is 101 or tmp_ is 69:
                # eE
                return has_x ? True : (has_e ? False : has_e = after_e = True)
            elif tmp_ is 45:
                # -
                return after_e or i is 0 and not prefix
            elif tmp_ is 43:
                # +
                return after_e
            elif tmp_ is 46:
                # .
                after_e = False
                return not has_dot and not has_x and not has_e ? has_dot = True : False
            return is_alphanumeric_char(code)
        )
        if prefix:
            num = prefix + num

        valid = parse_js_number(num)
        if not isNaN(valid):
            return token("num", valid)
        else:
            TokenizerError("Invalid syntax: " + num)


    def read_escaped_char(in_string, digester):
        digester = digester or def(in_str):
            return next(True, in_str)

        ch = digester(in_string)
        tmp_ = ch.charCodeAt(0)
        if tmp_ is 110:
            return "\n"
        elif tmp_ is 114:
            return "\r"
        elif tmp_ is 116:
            return "	"
        elif tmp_ is 98:
            return "\b"
        elif tmp_ is 118:
            return "\u000b" # \v
        elif tmp_ is 102:
            return "\f"
        elif tmp_ is 48:
            return "\0"
        elif tmp_ is 120:
            return String.fromCharCode(hex_bytes(2, digester))  # \x
        elif tmp_ is 117:
            return String.fromCharCode(hex_bytes(4, digester))  # \u
        elif tmp_ is 10:
            return ""   # newline
        else:
            return ch

    def hex_bytes(n, digester):
        num = 0
        for i in range(n):
            digit = parseInt(digester(), 16)
            if isNaN(digit):
                TokenizerError("Invalid hex-character pattern in string")
            num = num << 4 | digit
        return num

    @with_eof_error("Unterminated string constant")
    def read_string(modifier):
        token_type = "string"
        if modifier:
            token_type += ":" + modifier

        quote = next()
        ret = ""
        if peek() is quote:
            # two quotes in a row
            next(True)
            if peek() is quote:
                # multiline string (3 quotes in a row)
                next(True)
                i = find(quote + quote + quote, True) # location of string terminator
                if i is not -1:
                    tmp = S.text.substring(S.pos, i)
                    S.pos = i + 3
                    while tmp.length:
                        # parse escaped characters
                        if tmp[0] is "\\":
                            tmp = tmp[1:]
                            ret += read_escaped_char(True, def():
                                nonlocal tmp
                                ch = tmp[0]
                                tmp = tmp[1:]
                                return ch
                            )
                        else:
                            ret += tmp[0]
                            tmp = tmp[1:]

                    # add traversed lines to the counter
                    find_newlines = ret.match(/\n/g)
                    if find_newlines:
                        S.line += find_newlines.length
                    return token(token_type, ret)
            else:
                return token(token_type, "")

        while True:
            ch = next(True)
            if ch is "\n":
                TokenizerError("End of line while scanning string literal.")

            if ch is "\\":
                if peek() is "\n":
                    # skip newlines if escaped by backslash
                    next(True)
                    continue
                else:
                    ch = read_escaped_char(True)
            elif ch is quote:
                break
            ret += ch
        return token(token_type, ret)

    def read_line_comment(shebang=False):
        if not shebang:
            next()
        i = find("\n")

        if i is -1:
            ret = S.text.substr(S.pos)
            S.pos = S.text.length
        else:
            ret = S.text.substring(S.pos, i)
            S.pos = i

        return token(shebang ? "shebang" : "comment:line", ret, True)

    @with_eof_error("Unterminated multiline comment")
    def read_multiline_comment():
        next()
        i = find("*/", True)
        text = S.text.substring(S.pos, i)
        a = text.split("\n")
        n = a.length
        # update stream position
        S.pos = i + 2
        S.line += n - 1
        if n > 1:
            S.col = a[n - 1].length
        else:
            S.col += a[n - 1].length
        S.col += 2
        S.newline_before = S.newline_before or "\n" in text
        return token("comment:multiline", text, True)

    def read_name():
        backslash = False
        name = ""
        escaped = False

        while (ch = peek()) is not None:
            if not backslash:
                if ch is "\\":
                    if S.text.charAt(S.pos + 1) is "\n":
                        # stitching lines together
                        S.pos += 2
                        continue
                    else:
                        # actual backslash
                        escaped = backslash = True
                        next()
                elif is_identifier_char(ch):
                    name += next()
                else:
                    break
            else:
                if ch is not "u":
                    TokenizerError("Expecting UnicodeEscapeSequence -- uXXXX")
                ch = read_escaped_char()
                if not is_identifier_char(ch):
                    TokenizerError("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier")
                name += ch
                backslash = False

        if KEYWORDS(name) and escaped:
            hex = name.charCodeAt(0).toString(16).toUpperCase()
            name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1)
        return name

    @with_eof_error("Unterminated regular expression")
    def read_regexp(regexp):
        prev_backslash = False

        in_class = False
        verbose_regexp = False
        in_comment = False

        if peek() is '/':
            next(True)
            if peek() is '/':
                verbose_regexp = True
                next(True)
            else: # empty regexp (//)
                mods = read_name()
                return token("regexp", RegExp(regexp, mods))

        while ch = next(True):
            if in_comment:
                if ch is '\n':
                    in_comment = False
                continue

            if prev_backslash:
                regexp += "\\" + ch
                prev_backslash = False
            elif ch is "[":
                in_class = True
                regexp += ch
            elif ch is "]" and in_class:
                in_class = False
                regexp += ch
            elif ch is "/" and not in_class:
                if verbose_regexp:
                    if peek() is not '/':
                        regexp += '\\/'
                        continue
                    next(True)
                    if peek() is not '/':
                        regexp += '\\/\\/'
                        continue
                    next(True)
                break
            elif ch is "\\":
                prev_backslash = True
            elif verbose_regexp and not in_class and ch in ' \n\r\t':
                pass
            elif verbose_regexp and not in_class and ch is '#':
                in_comment = True
            else:
                regexp += ch

        mods = read_name()
        return token("regexp", RegExp(regexp, mods))

    def read_operator(prefix):
        def grow(op):
            if not peek():
                return op

            bigger = op + peek()
            if OPERATORS(bigger):
                next()
                return grow(bigger)
            else:
                return op
        op = grow(prefix or next())
        if op in ["++", "--", "===", "!=="]:
            TokenizerError("Invalid operator «" + op + "»")
        elif op is '->':
            # pretend that this is an operator as the tokenizer only allows
            # one character punctuation.
            return token('punc', op)
        return token("operator:symbol", op)

    def handle_slash():
        next()
        return (S.regex_allowed ? read_regexp("") : read_operator("/"))

    def handle_dot():
        next()
        return (is_digit(peek().charCodeAt(0)) ? read_num(".") : token("punc", "."))

    def read_word():
        word = read_name()
        return (KEYWORDS_ATOM(word) ? token("atom", word) : (
            not KEYWORDS(word) ? token("name", word) : (
                OPERATORS(word) and prevChar() is not "." ? token("operator:keyword", word) : token("keyword", word)
            )
        ))

    def with_eof_error(eof_error):
        return def(cont):
            return def(x):
                try:
                    return cont(x)
                except as ex:
                    if ex is EX_EOF:
                        TokenizerError(eof_error, True)
                    else:
                        raise

    def next_token(force_regexp):
        if force_regexp not in (None, undefined):
            return read_regexp(force_regexp)

        indent = parse_whitespace()
        if indent is -1:
            return token("punc", "endblock", False, True)

        start_token()
        ch = peek()
        if not ch:
            return token("eof")

        code = ch.charCodeAt(0)
        tmp_ = code
        if tmp_ is 34 or tmp_ is 39:    # double-quote (") or single quote (')
            return read_string()
        elif tmp_ is 35:                # pound-sign (#)
            if S.pos is 0 and S.text.charAt(1) is '!':
                #shebang
                return read_line_comment(True)
            regex_allowed = S.regex_allowed
            S.comments_before.push(read_line_comment())
            S.regex_allowed = regex_allowed
            return next_token()
        elif tmp_ is 46:                # dot (.)
            return handle_dot()
        elif tmp_ is 47:                # slash (/)
            return handle_slash()

        if is_digit(code):
            return read_num()

        if PUNC_CHARS(ch):
            return token("punc", next())

        if OPERATOR_CHARS(ch):
            return read_operator()

        if code is 92 and S.text.charAt(S.pos + 1) is "\n":
            # backslash will consume the newline character that follows
            next()
            # backslash
            next()
            # newline
            S.newline_before = False
            return next_token()

        if code is 92 or is_identifier_start(code):
            tok = read_word()
            if peek() in '\'"' and is_string_modifier(tok.value):
                mods = tok.value.toLowerCase()
                string_tok = read_string(mods)
                tok.endpos = string_tok.endpos
                tok.value = string_tok.value
                tok.subtype = string_tok.subtype
                tok.type = string_tok.type
            return tok

        TokenizerError("Unexpected character «" + ch + "»")

    next_token.context = def(nc):
        nonlocal S
        if nc:
            S = nc
        return S

    return next_token
# }}}
