import { ISourceList, SimpleTextContent } from "@cafetextual/util";
import G from "@cafetextual/util/dist/src/type/G";
import Assert from "@cafetextual/util/dist/src/assert/Assert";
/**
* Simple line parsing infrastructure.
*
*
Manages
*/
export default class BaseSimpleParser
{
// default implentation - maintain locally.
protected allLines:Array;
protected currentLineIndex:number;
protected content:ISourceList;
protected value:string;
protected pos:number;
static TAB:string = "\t";
getPos():number {
return this.pos;
}
/**
* Inits the parser /w text
*
* @param value a string value, may have muultiple lines seperated by "\\n" character or IContentList
* @param charOffset offset for a character
* @return returns self for chaining (convenience only)
*/
init(value:string | ISourceList, charOffset:number = 0):BaseSimpleParser {
if (G.isStr(value)) {
/*
content = null;
allLines = [];
allLines = (value as String).split('\n'); // TODO - support split on "\r"
setPosition(0, charOffset);
_tokens = [];
*/
// stress testing -
this.content = new SimpleTextContent;
(this.content as SimpleTextContent).init(value as string);
this.setPosition(this.content.first(), charOffset);
} else {
this.allLines = null;
this.content = value as ISourceList;
this.setPosition(this.content.first(), charOffset);
}
return this;
} // init
setPosition(lineIndex:number, charIndex:number =0):void {
if (this.content) {
this.pos = charIndex;
this.currentLineIndex = lineIndex;
} else {
Assert.assert(lineIndex < this.allLines.length);
Assert.assert(charIndex <= this.allLines[lineIndex].length);
this.currentLineIndex = lineIndex;
this.pos = charIndex;
}
this.refresh();
} // setPosition
/**
*
*/
refresh():void {
if (this.content) {
this.value = this.content.indexToValue(this.currentLineIndex);
} else {
this.value = this.allLines[this.currentLineIndex];
}
}
/**
* returns the value of the current line (exposed for testing)
*/
getCurrentLine():string {
return this.value;
}
/**
* current line index (exposed for testing)
*/
getCurrentLineIndex():number {
return this.currentLineIndex;
}
/**
* True is there exists a next line (exposed for testing only)
*/
hasNextLine(v:string = null):boolean {
if (this.content) {
var nextIndex:number = this.currentLineIndex >=0 ? this.content.nextLineIndex(this.currentLineIndex) : -1;
if (nextIndex >= 0 ) {
return v ? this.content.indexToValue(nextIndex).indexOf(v) == 0 : true;
}
return false;
}
if (this.currentLineIndex < (this.allLines.length-1)) {
if (v) {
return (this.allLines[this.currentLineIndex + 1]).indexOf(v) == 0 ;
}
return true;
}
return false;
}
nextLine():boolean {
if (this.content) {
var nextLineIndex:number = this.content.nextIndex(this.currentLineIndex);
if (nextLineIndex >= 0) {
this.currentLineIndex = nextLineIndex;
this.value = this.content.indexToValue(nextLineIndex);
this.pos = 0;
return true;
}
this.currentLineIndex = -1;
this.value = null;
this.pos = -1
return false;
}
if (this.hasNextLine()) {
Assert.assert(this.currentLineIndex < this.allLines.length);
this.currentLineIndex++;
this.value = this.allLines[this.currentLineIndex];
this.pos = 0;
return true;
}
// no next line
this.currentLineIndex++;
this.value = null;
this.pos = -1
return false;
}
eof():boolean {
if (this.content) {
return this.currentLineIndex < 0;
}
return this.currentLineIndex >= this.allLines.length;
}
/**
* skips white space
*/
ws(token:string = "WS"):boolean {
var pos0:number = this.pos;
this.skipWS();
if (this.pos > pos0) {
return true;
}
return false
}
wst():boolean {
var pos0:number = this.pos;
while (this.next() ==" " || this.next() == BaseSimpleParser.TAB) {
this.pos++
}
return this.pos > pos0;
}
textValue(fromIndex:number, toIndex:number):string {
return this.value.slice(fromIndex, toIndex);
}
/**
* Matches literal text
*/
match(str:string):boolean {
var pos0:number = this.pos;
if (str.length > 0 && this.eq(str)) {
return true
}
return false;
}
/**
* Returns all the rest of the string
*/
rest():string {
if (this.pos < this.value.length) {
var text:string = this.value.substr(this.pos);
this.pos = this.value.length;
return text;
} else {
return null;
}
} // rest
/**
* returns index of matched string from a vector of string
*/
matchFromList(tokens:Array, andConsume:boolean):number {
for (var i:number =0; i pos0) {
return text;
}
return null;
} // text;
id():string {
return this.text();
}
pid(token:string = null, prefixToken:string = null, postfixToken:string = null, errorToken:string = null):string {
var pos0:number = this.pos;
if (this.eq('(')) {
var idText:string = this.extract(null, " ", false, ")");
if (this.eq(')')) {
// tokenise success here.
return idText;
} else {
// do error tokenising
}
}
// return null w/o tokenising
this.pos = pos0;
return null;
}
count(v:string, token:string = ""):number {
var pos0:number = this.pos;
var count:number = 0;
if (v.length < 0) {
return 0;
}
while (this.eq(v)) {
count++
}
return count;
} // count
/**
* extracts optionally quoted text
* @param strict requie
*/
qtext(strict:boolean = false):string {
var pos0:number = this.pos;
var endChar:string = null;
if (this.eq('"')) {
endChar = '"';
} else if (this.eq("'")) {
endChar = "'";
}
if (strict && !endChar) {
this.pos = pos0;
return null;
}
var text:string = this.extract(null, (endChar ? endChar : " "), false);
if (endChar && !this.eq(endChar)) { // require closing quote
this.pos = pos0;
return null;
}
if (this.pos > pos0) {
//appendToken(pos0);
return text;
}
return null;
}
// internal
/**
* look ahead n characters
*/
la(n:number = 1, andConsume:boolean = false):string {
var out:string = (this.pos + n) <= (this.value.length ) ? this.value.substr(this.pos, n) : null;
if (out && andConsume) {
this.pos += out.length;
}
return out;
}
private consume(n:number):string {
return this.la(n, true /*andConsume*/);
}
private isWS(c:string):boolean {
return (c ==" " || c == "\t");
}
/**
* Consumes characters until a token in the given vector is matched - does not consume
*/
matchTokens(tokens:Array, ignoreWS:boolean = false):number {
var v1:string = null;
var lastLength:number = -1;
for (var i:number = 0; i < tokens.length; i++) {
var v0:string = tokens[i]
v1 = (lastLength == v0.length) ? v1 : this.la(v0.length);
lastLength = v0.length;
if ( (v0 == v1) && !(ignoreWS && this.isWS(v1)) ) {
return i;
}
}
return -1;
} // matchTokens
toToken(tokens:Array, quoted:boolean = false, ignoreWS:boolean = false):string {
var pos0:number = this.pos;
var out:string = null;
if (quoted) {
out = this.qtext(true /*strict*/);
if (out) {
return out;
}
}
while (!this.done() && this.matchTokens(tokens,ignoreWS) < 0) { // TO_OPTIMIZE - we've already matched the next token, so
this.pos++;
}
if (this.pos - pos0 > 0) {
out = this.value.substr(pos0,(this.pos -pos0));
}
return out;
} // until
tokenWrapped(prefix:string, suffix:string):string {
var pos0:number = this.pos;
var out:string = null;
if (!prefix || prefix == "" || this.eq(prefix)) {
var pos1:number = this.pos;
out = this.toToken([suffix]);
if (!suffix || suffix =="" || this.eq(suffix)) {
var pos3:number = this.pos;
} else {
this.pos = pos0;
return null;
}
return out;
}
return null;
} // tokenWrapped
/**
*
*/
whileTokens(tokens:Array, allowMany:boolean = true ):string {
// lots of ways to optimise this, but probably not performace critical
var pos0:number = this.pos;
var len:number = tokens.length;
var found:boolean = true;
while (!this.done() && found) {
found = false;
for (var i:number = 0; i < len; i++) {
var tlen:number = (tokens[i] as String).length;
if (this.la(tlen, false) == tokens[i]) {
this.pos += tlen;
found = true;
if (!allowMany) {
return this.textValue(pos0, this.pos)
}
}
} // iteration over tokens
}
return this.pos > pos0 ? this.textValue(pos0, this.pos) : null;
} // whileTokens
protected extract(exp:RegExp, until:string = null, removeUntil:boolean = true, suffix:string = null):string {
var result:string = ''; // TO_OPTIMISE - optimise to get return value by indices rather than copying data
var c:string = this.next(); // TODO - support multiple character suffixes
while (c &&
( this.isEscaped() ||
( (!exp || exp.test(c) ) && // TODO - very inefficient mechanism, deprecate
(!until || (c != until)) && // TODO - turn this into ws boolean
(!suffix || (c != suffix))
)
)
) {
result += c; // TODO - optimisation would be to get substring from positions, but this is handier for debugging
this.pos++;
c = this.next();
}
if (until != null && removeUntil) {
this.eq(until);
}
return result.length > 0 ? result : "";
}
protected next():string {
return this.done() ? '' : this.value.charAt(this.pos);
}
protected isEscaped():boolean {
var count:number = 0;
var j:number = this.pos;
while ( (j > 0) && (this.value.charAt(j-1) == "\\")) {
count++;
j--;
}
return ( (count > 0) && ((count % 2) == 1) ); // is escaped if preceded by an odd number or escape characters
} // isEscaped
/**
* debug only
* @private
*/
get remainder():string {
return this.value && !this.eof() && !this.done() ? this.value.substr(this.pos, this.value.length) : ""
}
get currentMatch():string {
return this.value ? this.value.substr(0,this.pos) : ""
}
get matchStatus():string {
return '"' + this.currentMatch + '" <-- "' + this.remainder + '"';
}
eq( char:string, ignoreEscape:boolean = true ):boolean {
var pos0:number = this.pos;
if (char.length ==1) {
if (!this.done() && (this.value.charAt(this.pos) == char) && !(ignoreEscape && this.isEscaped()) ) {
this.pos++;
return true;
}
} else {
if ((char.length > 1) && ((this.pos + char.length -1) < (this.value.length)) && !(ignoreEscape && this.isEscaped())){
if (this.value.substr(this.pos,char.length) == char) {
this.pos += char.length;
return true;
}
}
}
this.pos = pos0;
return false;
} // eq
protected skipWS():void {
while (!this.done() && ((this.value.charAt(this.pos) == ' ') ||
(this.value.charAt(this.pos) == '/t')||
(this.value.charAt(this.pos) == '/n') ) ){
this.pos++;
}
}
isBefore(i1:number,i2:number):boolean {
return this.content.isBefore(i1,i2);
}
protected done():boolean {
return (this.pos >= this.value.length);
}
} // class