All files / parser tokenizer.js

87.1% Statements 54/62
73.68% Branches 14/19
91.67% Functions 11/12
86.67% Lines 52/60
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149                                516x 25800x           25800x   258x 258x 258x 258x 258x 258x 258x 258x 258x       228x                       257x 257x 257x 257x 257x             1741x 1741x 1741x 1741x 2263x 253x 2010x 88036x 88036x 2010x 2010x 2010x 2010x   2010x 2010x 1488x 1488x 1488x           1488x   522x 522x 522x     522x                                               19x 19x 19x 19x 37x 19x                 4x                   24x 24x       135x 135x      
/**
 * Tokenizer class.
 */
//TODO: docs
export default class Tokenizer {
    //TODO: fix type of tokenmatchers in esdoc
    /**
     * Creates a new Tokenizer object.
     * @param {string[]|function(self: Tokenizer)[]} tokenMatchers - An array of
     *  the escaped string form of a regex matching a token, a function to
     *  return a token given the matched text, the type of the returned token,
     *  and the scopes the regex should match in.
     * @param {number} [scope=0] - Starting scope of the lexer
     * @param {string[]} [tokenTypes=[]]
     */
    constructor (tokenMatchers, scope: number = 0, tokenTypes: string[] = []) {
        this.tokenMatchers = tokenMatchers.map(tokenMatcher => tokenMatcher.map(object => {
            object[0] = new RegExp('^' + object[0].replace(/[\/\r\n\t]/g, match => ('\\' + {
                '/': '/',
                '\r': 'r',
                '\n': 'n',
                '\t': 't'
            }[match])).replace(/^[a-zA-Z]+$/, '$&(?=$|[^a-zA-Z0-9_])'));
            return object;
        }));
        this.tokenMatcher = tokenMatchers[scope];
        this.scope = scope;
        this.variables = {};
        this.code = '';
        this.originalCode = '';
        this.positions = [];
        this.index = 0;
        this.line = 0;
        this.column = 0;
    }
    
    save () {
        return {
            index: this.index,
            line: this.line,
            column: this.column
        }
    }
    
    /**
     * @param {string} code New code.
     * @param {Info} info Position info for 
     */
    reset (code: string, info: Object=null) {
        this.code = code;
        info = info || {index: 0, line: 0, column: 0};
        this.index = info.index;
        this.line = info.line;
        this.column = info.column;
    }
    
    /**
     * @return Next token, or undefined if there are no more tokens.
     */
    next () {
        let code = this.code,
            token = [],
            success = false;
        while (true) {
            if (code.length === 0)
                return undefined;
            for (let [regex, onSuccess, type] of this.tokenMatcher) {
                let match = regex.exec(code);
                if (match) {
                    let matched = match[0],
                        value = onSuccess(this, matched),
                        length = matched.length;
                    Iif (!length)
                        continue;
                    this.code = code = code.slice(length);
                    if (typeof value !== 'undefined') {
                        this.column += length;
                        this.index += length;
                        this.positions.push({
                            line: this.line,
                            column: this.column,
                            index: this.index,
                            length
                        });
                        return typeof type === 'undefined' ? {value} : [value, type];
                    } else
                        this.index += length;
                    success = true;
                    break;
                }
            }
            Iif (!success) {
                this.column++;
                this.index++;
                let value = code[0];
                this.code = code = code.slice(1);
                this.positions.push({
                    line: this.line,
                    column: this.column,
                    index: this.index,
                    length: 1
                });
                return {value};
            }
        }
    }
    
    /**
     * Tokenizes a given sequence of code
     * @param {string} code - The desired code chunk or string to tokenize
     * @return An array of tokens. If the token is of a complex type the format
     *         [value, TokenType]. Where `TokenType` is a positive integer in the
     *         enumeration of `TokenType` which represents the type of `value`
     */
    tokenize (code: string) {
        this.reset(code);
        let result = [],
            next = null;
        while ((next = this.next()))
            result.push(next);
        return result;
    }
    
    /**
     * Returns a formatted error message given a token.
     * 
     * @param {Token} token The token causing the error.
     */
    formatError (token: Object) {
        return token;
    }
    
    has (tokenType) {
        return false;
        //too slow since lexer will need to return {type: value} instead of array
        //['integer', 'decimal', 'string', 'identifier'].includes(tokenType);
    }
    
    begin (scope) {
        this.scope = scope;
        this.tokenMatcher = this.tokenMatchers[scope];
    }
    
    newline (lines: number=1, column: number=-1) {
        this.line += lines;
        this.column = column;
    }
}