import { ISourceList, SimpleTextContent } from "@cafetextual/util"; import G from "@cafetextual/util/dist/src/type/G"; import Assert from "@cafetextual/util/dist/src/assert/Assert"; /** * Simple line parsing infrastructure. * *

Manages

*/ export default class BaseSimpleParser { // default implentation - maintain locally. protected allLines:Array; protected currentLineIndex:number; protected content:ISourceList; protected value:string; protected pos:number; static TAB:string = "\t"; getPos():number { return this.pos; } /** * Inits the parser /w text * * @param value a string value, may have muultiple lines seperated by "\\n" character or IContentList * @param charOffset offset for a character * @return returns self for chaining (convenience only) */ init(value:string | ISourceList, charOffset:number = 0):BaseSimpleParser { if (G.isStr(value)) { /* content = null; allLines = []; allLines = (value as String).split('\n'); // TODO - support split on "\r" setPosition(0, charOffset); _tokens = []; */ // stress testing - this.content = new SimpleTextContent; (this.content as SimpleTextContent).init(value as string); this.setPosition(this.content.first(), charOffset); } else { this.allLines = null; this.content = value as ISourceList; this.setPosition(this.content.first(), charOffset); } return this; } // init setPosition(lineIndex:number, charIndex:number =0):void { if (this.content) { this.pos = charIndex; this.currentLineIndex = lineIndex; } else { Assert.assert(lineIndex < this.allLines.length); Assert.assert(charIndex <= this.allLines[lineIndex].length); this.currentLineIndex = lineIndex; this.pos = charIndex; } this.refresh(); } // setPosition /** * */ refresh():void { if (this.content) { this.value = this.content.indexToValue(this.currentLineIndex); } else { this.value = this.allLines[this.currentLineIndex]; } } /** * returns the value of the current line (exposed for testing) */ getCurrentLine():string { return this.value; } /** * current line index (exposed for testing) */ getCurrentLineIndex():number { return this.currentLineIndex; } /** * True is there exists a next line (exposed for testing only) */ hasNextLine(v:string = null):boolean { if (this.content) { var nextIndex:number = this.currentLineIndex >=0 ? this.content.nextLineIndex(this.currentLineIndex) : -1; if (nextIndex >= 0 ) { return v ? this.content.indexToValue(nextIndex).indexOf(v) == 0 : true; } return false; } if (this.currentLineIndex < (this.allLines.length-1)) { if (v) { return (this.allLines[this.currentLineIndex + 1]).indexOf(v) == 0 ; } return true; } return false; } nextLine():boolean { if (this.content) { var nextLineIndex:number = this.content.nextIndex(this.currentLineIndex); if (nextLineIndex >= 0) { this.currentLineIndex = nextLineIndex; this.value = this.content.indexToValue(nextLineIndex); this.pos = 0; return true; } this.currentLineIndex = -1; this.value = null; this.pos = -1 return false; } if (this.hasNextLine()) { Assert.assert(this.currentLineIndex < this.allLines.length); this.currentLineIndex++; this.value = this.allLines[this.currentLineIndex]; this.pos = 0; return true; } // no next line this.currentLineIndex++; this.value = null; this.pos = -1 return false; } eof():boolean { if (this.content) { return this.currentLineIndex < 0; } return this.currentLineIndex >= this.allLines.length; } /** * skips white space */ ws(token:string = "WS"):boolean { var pos0:number = this.pos; this.skipWS(); if (this.pos > pos0) { return true; } return false } wst():boolean { var pos0:number = this.pos; while (this.next() ==" " || this.next() == BaseSimpleParser.TAB) { this.pos++ } return this.pos > pos0; } textValue(fromIndex:number, toIndex:number):string { return this.value.slice(fromIndex, toIndex); } /** * Matches literal text */ match(str:string):boolean { var pos0:number = this.pos; if (str.length > 0 && this.eq(str)) { return true } return false; } /** * Returns all the rest of the string */ rest():string { if (this.pos < this.value.length) { var text:string = this.value.substr(this.pos); this.pos = this.value.length; return text; } else { return null; } } // rest /** * returns index of matched string from a vector of string */ matchFromList(tokens:Array, andConsume:boolean):number { for (var i:number =0; i pos0) { return text; } return null; } // text; id():string { return this.text(); } pid(token:string = null, prefixToken:string = null, postfixToken:string = null, errorToken:string = null):string { var pos0:number = this.pos; if (this.eq('(')) { var idText:string = this.extract(null, " ", false, ")"); if (this.eq(')')) { // tokenise success here. return idText; } else { // do error tokenising } } // return null w/o tokenising this.pos = pos0; return null; } count(v:string, token:string = ""):number { var pos0:number = this.pos; var count:number = 0; if (v.length < 0) { return 0; } while (this.eq(v)) { count++ } return count; } // count /** * extracts optionally quoted text * @param strict requie */ qtext(strict:boolean = false):string { var pos0:number = this.pos; var endChar:string = null; if (this.eq('"')) { endChar = '"'; } else if (this.eq("'")) { endChar = "'"; } if (strict && !endChar) { this.pos = pos0; return null; } var text:string = this.extract(null, (endChar ? endChar : " "), false); if (endChar && !this.eq(endChar)) { // require closing quote this.pos = pos0; return null; } if (this.pos > pos0) { //appendToken(pos0); return text; } return null; } // internal /** * look ahead n characters */ la(n:number = 1, andConsume:boolean = false):string { var out:string = (this.pos + n) <= (this.value.length ) ? this.value.substr(this.pos, n) : null; if (out && andConsume) { this.pos += out.length; } return out; } private consume(n:number):string { return this.la(n, true /*andConsume*/); } private isWS(c:string):boolean { return (c ==" " || c == "\t"); } /** * Consumes characters until a token in the given vector is matched - does not consume */ matchTokens(tokens:Array, ignoreWS:boolean = false):number { var v1:string = null; var lastLength:number = -1; for (var i:number = 0; i < tokens.length; i++) { var v0:string = tokens[i] v1 = (lastLength == v0.length) ? v1 : this.la(v0.length); lastLength = v0.length; if ( (v0 == v1) && !(ignoreWS && this.isWS(v1)) ) { return i; } } return -1; } // matchTokens toToken(tokens:Array, quoted:boolean = false, ignoreWS:boolean = false):string { var pos0:number = this.pos; var out:string = null; if (quoted) { out = this.qtext(true /*strict*/); if (out) { return out; } } while (!this.done() && this.matchTokens(tokens,ignoreWS) < 0) { // TO_OPTIMIZE - we've already matched the next token, so this.pos++; } if (this.pos - pos0 > 0) { out = this.value.substr(pos0,(this.pos -pos0)); } return out; } // until tokenWrapped(prefix:string, suffix:string):string { var pos0:number = this.pos; var out:string = null; if (!prefix || prefix == "" || this.eq(prefix)) { var pos1:number = this.pos; out = this.toToken([suffix]); if (!suffix || suffix =="" || this.eq(suffix)) { var pos3:number = this.pos; } else { this.pos = pos0; return null; } return out; } return null; } // tokenWrapped /** * */ whileTokens(tokens:Array, allowMany:boolean = true ):string { // lots of ways to optimise this, but probably not performace critical var pos0:number = this.pos; var len:number = tokens.length; var found:boolean = true; while (!this.done() && found) { found = false; for (var i:number = 0; i < len; i++) { var tlen:number = (tokens[i] as String).length; if (this.la(tlen, false) == tokens[i]) { this.pos += tlen; found = true; if (!allowMany) { return this.textValue(pos0, this.pos) } } } // iteration over tokens } return this.pos > pos0 ? this.textValue(pos0, this.pos) : null; } // whileTokens protected extract(exp:RegExp, until:string = null, removeUntil:boolean = true, suffix:string = null):string { var result:string = ''; // TO_OPTIMISE - optimise to get return value by indices rather than copying data var c:string = this.next(); // TODO - support multiple character suffixes while (c && ( this.isEscaped() || ( (!exp || exp.test(c) ) && // TODO - very inefficient mechanism, deprecate (!until || (c != until)) && // TODO - turn this into ws boolean (!suffix || (c != suffix)) ) ) ) { result += c; // TODO - optimisation would be to get substring from positions, but this is handier for debugging this.pos++; c = this.next(); } if (until != null && removeUntil) { this.eq(until); } return result.length > 0 ? result : ""; } protected next():string { return this.done() ? '' : this.value.charAt(this.pos); } protected isEscaped():boolean { var count:number = 0; var j:number = this.pos; while ( (j > 0) && (this.value.charAt(j-1) == "\\")) { count++; j--; } return ( (count > 0) && ((count % 2) == 1) ); // is escaped if preceded by an odd number or escape characters } // isEscaped /** * debug only * @private */ get remainder():string { return this.value && !this.eof() && !this.done() ? this.value.substr(this.pos, this.value.length) : "" } get currentMatch():string { return this.value ? this.value.substr(0,this.pos) : "" } get matchStatus():string { return '"' + this.currentMatch + '" <-- "' + this.remainder + '"'; } eq( char:string, ignoreEscape:boolean = true ):boolean { var pos0:number = this.pos; if (char.length ==1) { if (!this.done() && (this.value.charAt(this.pos) == char) && !(ignoreEscape && this.isEscaped()) ) { this.pos++; return true; } } else { if ((char.length > 1) && ((this.pos + char.length -1) < (this.value.length)) && !(ignoreEscape && this.isEscaped())){ if (this.value.substr(this.pos,char.length) == char) { this.pos += char.length; return true; } } } this.pos = pos0; return false; } // eq protected skipWS():void { while (!this.done() && ((this.value.charAt(this.pos) == ' ') || (this.value.charAt(this.pos) == '/t')|| (this.value.charAt(this.pos) == '/n') ) ){ this.pos++; } } isBefore(i1:number,i2:number):boolean { return this.content.isBefore(i1,i2); } protected done():boolean { return (this.pos >= this.value.length); } } // class