import BaseSimpleParser from "../simpletemplate/BaseSimpleParser";
import IParserDebug from "../parsertooling/IParserDebug";
import GRule, { GElement } from "./grammar/GRule";
import RuleState from "../parsertooling/RuleState";
import { PRuleResult, PElementResult } from "./PObjectMap";
import GErr from "./GErr";
import ParserDebug from "../parsertooling/ParserDebug";
import ElementState from "../parsertooling/ElementState";
import ParserState from "../parsertooling/ParserState";
import ObjectMapper from "./ObjectMapper";
import Assert from "@cafetextual/util/dist/src/assert/Assert";
import SeepParserDefs from "./SeepParserDefs";
/**
* Parses text from a SeepGrammar
*
*
Suffers from a rather awkward refactoring from a simple recursive algorith to faciliate asynchronous
* execution
*
*
* Notes:
* - form is a bit awkward, reflecting the process by which it was incrementally refactored from a simpler recursive mechanism to one
* capable of asynchronousexecution.
*
* - may executed either through synchronous recursion, or asynchronously and non-recusively (where an injected IParseDebug controls and may decorate execution )
*
* - entire stack is encapsulate in state objects, therefore uses neither closures not call stack state (as a typical recursive algorithm would)
*
* - by convention a method name with an underscore may be called asynchronously, and may contain other asynchronous points.
*
* Some debugging heuristics therefore:
* o only an async method (ie rule_iterate, element_iterate) may call another async method
* o no code may follow an async method ie must be immediately followed by a return. (this is because the method itself may have engaged an asynchronous break).
* o conversely, any return statement in a method wih underscores must be preceeded by a method w underscores
*
*
*/
export default class SeepParser extends BaseSimpleParser // implements IParserDebug
{
// mechanism to inject a debug parser for stress testing.
/**
* @private
*/
static STRESS_TEST_DEBUGGER:IParserDebug
constructor() {
super();
this.debugger = SeepParser.STRESS_TEST_DEBUGGER;
}
/**
* Here to prevent regressions breaking.
*/
autoObjectMap:boolean = true;
// there are features of the grammar - should be encapsulated in the rule/ element / token objects
// for stress test add a (synchronous) debugger.
debugger:IParserDebug;
static registerStressTestParse:Function;
/**
* attempt to match a rule on a single line.
*
* Also a rule on a portion of a line is asChild is true.
*
*/
rule(r:GRule, asChild:boolean = false, indent:string = "", asLineRule:boolean = false, consumeUntilEOF:boolean = true ):PRuleResult {
if (SeepParser.registerStressTestParse != null) {
SeepParser.registerStressTestParse(r, this.content.toString());
}
var state:RuleState = RuleState.create( this, r, asChild, indent, asLineRule)
state.consumeUntilEOF = consumeUntilEOF;
state.continueFn = this.rule_iterate;
this.resume(state);
return state.ruleResult;
} // rule
private basicRuleInit(state:RuleState):void {
var rule:PRuleResult = state.ruleResult;
if (rule == null) {
rule = PRuleResult.create();
state.ruleResult = rule;
} else if (rule.preResults != null) {
// kind of hacky - but if we've already got a preresult, then we've already initialised.
return;
}
rule.startLineIndex = this.currentLineIndex
rule.matchedRule = state.rule;
rule.source = this.value;
rule.startIndex = this.pos;
state.lastElementMatchedWS = false; // default, to remove
state.pos0 = this.pos;
state.line0 = this.currentLineIndex;
} // basicRuleInit
/**
* Mechanism to add a secondary grammar - ie blank lines, or maybe comments.
*
* May, currently, match one element (per line). To do otherwise would require ansynchronous infrastructure in order to
* support debug on element matching.
*
*
* @return true if a line has been matched
*/
private matchSecondaryGrammar(state:RuleState, isLine:boolean = false):boolean {
if (this.eof()) {
return false;
}
if (this.pos != 0) {
console.log(' bug');
}
Assert.assert(this.pos == 0); // fails if line hasn't been correctly invoked
this.wst(); // assume 2 - secondary grammar is whitespace only.
if (this.match("//") || this.match('----') ) {
this.rest();
}
if (!this.done() ) {
this.pos = 0;
return false;
}
var e:PElementResult = PElementResult.create();
state.populateElement(e, GErr.NOT_AN_ERROR, state, null, true);
if (isLine) {
state.appendLineSecondaryResult(e);
} else {
state.appendPreResult(e);
}
this.nextLine();
this.advanceRuleConsmeToPosX(state);
state.lineRuleLineX = this.currentLineIndex;
// NOTE - because we're
return true;
} // matchSecondaryGrammar
/**
* Iterate on element of rule, or if complete, then invoke processing for success, fail or recurse
*/
private rule_iterate(state:RuleState):void {
if (state.incrementPending) {
state.incrementElement();
state.incrementPending = false;
}
// 1. iteration hasn't started, so initialise.
if (state.elementIndex < 0) { // initialise
this.basicRuleInit(state); // issue if we've got a pre-line,
// if indent fails to match, we can't match
if (!state.asChild) {
if (this.eof()) {
state.appendError(GErr.EOF, state, state.rule.child(0), false, "eof encountered" );
// ----> rule not matched because of eof
this.rule_fail(state);
return;
}
if (this.content) {
//content.setResult(currentLineIndex, state); // TODO - deprecate
}
this.pos = 0;
// x. check if this is an empty line
var secondaryGrammarResult:boolean =this.matchSecondaryGrammar(state);
if (secondaryGrammarResult) {
// x.i we've matched a secondary line, append as a pre-result, formalise consumption of line, and iterate back.
this.debug(state, this.rule_iterate, ParserDebug.SECONDARY_PRE_LINE_DONE);
return;
}
if ((state.indent.length > 0) && !this.eq(state.indent)) {
state.appendError(GErr.NO_INDENT, state, state.rule.child(0), false, "indent not matched", null );
// ----> rule not matched because indent not matched
// FIX_THIS - rule is not initialised, so start line index will be wring
state.ruleInit(this.currentLineIndex);
this.rule_fail(state);
return;
}
// formalise consume of indentation
if (state.indent && state.indent.length > 0) {
this.advanceRuleConsmeToPosX(state);
}
if (!(state.asLineRule && state.rule.elements[0].lineAllowWS) ) {
if (this.wst()) {
// note - if we've got ws after required indent, by default this is an error.
// TODO - exception might be if first element includes a '$' indicator (for greedy consumption of ws)
state.appendError(GErr.WS_AFTER_INDENT, state, state.rule.child(0), false, "ws not allowed pos indent" )
state.ruleInit(this.currentLineIndex);
this.rule_fail(state);
return;
}
}
}
state.ruleInit(this.currentLineIndex);
}
// 2. if iteration in progress, attempt to match element
var out:PRuleResult = state.ruleResult;
var elements:Array = state.rule.elements;
var doneOnOR:boolean = state.rule.isOR && (state.elementIndex >= 0) && state.hasResult();
if (!doneOnOR && (state.elementIndex < elements.length) ) {
Assert.assert(state.element == elements[state.elementIndex]);
//state.element = elements[state.elementIndex]; // this could be done in state.incrementElement()
//trace(' ------------- extracting ' + state.rule.name + '[' +state.elementIndex + "]: '" + state.element.toString() + "' -------------- (pos=" + pos.toString() +")");
this.advanceRuleConsmeToPosX(state);// used to determine when we've got a zero length match, ie ^(var). This is the last known matched position as it is set before every attempt to match an element
// 3a. create a child element state and attempt to extract
var elementState:ElementState = ElementState.create(state); // TO_OPTIMISE --> this data structure can be reused in iteration
Assert.assert(elementState.parentRuleState == state);
state.currentElementState = elementState;
// (!) ---> add element here
this.element_iterate(elementState);
return;
} // iteration over elements
// 3.
if (doneOnOR || (state.elementIndex >= state.rule.elements.length) ) {
if (state.ruleResult.isValid() ) {
//trace(" <--- rule successful (not including line rules) " + state.rule.name );
this.rule_process(state); // ignoring for the moment iteration on childrules
return;
} else {
//trace(" <--- XXXXX rule unsuccessful " + state.rule.name);
this.rule_fail(state);
return;
}
}
Assert.fail(); // shouldn't reach here
return;
} // rule_iterate
private element_continue(state:ElementState, breakpoint:number):void {
Assert.assert(breakpoint == ParserDebug.ELEMENT_SKIP || breakpoint == ParserDebug.ELEMENT_MATCH || breakpoint == ParserDebug.LINE_ELEMENT_SKIP);
this.debug(state, this._and_element_continue, breakpoint);
}
private _and_element_continue(state:ElementState):void {
this.rule_iterate(state.parentRuleState);
}
private element_fail(state:ElementState):void {
this.debug(state, this._and_element_fail, ParserDebug.ELEMENT_FAIL);
}
private _and_element_fail(state:ElementState):void { // TODO - the debugger itself might manage this
this.rule_fail(state.parentRuleState);
}
/**
* Decides whether, based on last element match, we need to continue iterating on the rule, or fail.
*/
private element_done(estate:ElementState):void {
var parentState:RuleState = estate.parentRuleState;
Assert.assert( parentState.elementResult == parentState.currentElementState.elementResult );
// abreviations that do not affect statlessness of function
if (estate != parentState.currentElementState) {
Assert.assert(estate == parentState.currentElementState);
}
var elementResult:PElementResult = parentState.elementResult;
var ruleResult:PRuleResult = parentState.ruleResult;
// 1. determine if we've a valid result that consumes zero
if (! ( !elementResult || !elementResult.valid || (elementResult.childResult && !elementResult.childResult.isValid())
|| ( ( this.pos > parentState.posX) || (this.currentLineIndex > parentState.lineX) ) ) ) {
// element has matched, but hasn't consumed any text. Which means there's a tree node matched
// (ie ^(vars) or [^(vars) (var)?]* )
// which means we can't iterate again, as this would risk an infinite loop
parentState.elementMatchIsZeroLength = true;
}
var onLastORIndex:boolean = false; // not supported
var lstate:RuleState = parentState.parentRuleState;
var elementResultIsOptional:boolean = false;
// x. element is not valid, need to decide whether this is a legitimate error, or an optional element
// that can just be removed d
var resultValid:boolean = elementResult.isValid(); // TO_OPTIMISE - this wil recurse - probably want to cache the value here
if (!resultValid ) {
if (parentState.asLineRule) {
// x.i. if this is a line rule
Assert.assert(parentState.elementIndex == 0); // lineRules constrained to haveing only a single element
Assert.assert(lstate.currentLineRule.elements.length ==1);
elementResultIsOptional = lstate.currentLineIsOptional();
} else {
var element:GElement = parentState.element;
var rule:GRule = parentState.rule;
elementResultIsOptional = element.allowNone
|| (rule.isOR && !onLastORIndex )
|| (element.allowMany && parentState.foundAtLeastOneValidElement())
}
var cutpointEncountered:boolean = elementResult.errCode == GErr.EXPLICIT_CUTPOINT;
// x.i element is optional, so remove it from
if (elementResultIsOptional) {
// ... unless we;ve ancountered a cutpoint, in which case,
if (cutpointEncountered ) {
console.log("fails on cutpoint: " + elementResult.err);
elementResultIsOptional = false;
} else {
if (parentState.asLineRule) {
lstate.unappendLine(parentState.ruleResult);
} else {
if (parentState.cutpointHit()) {
parentState.markAsCutpointInvalid();
} else {
parentState.unappendElementResult(elementResult);
}
}
}
}
}
if (!resultValid) {
if (!elementResultIsOptional) {
// x. fails, and is non-optional
this.element_fail(estate);
return; // not matched, and not optional, so fail
} else {
this.resetRuleToPosX(parentState); // not (or partially) matched, and not optional, set pos back to last successful position
parentState.incrementPending = true;
this.element_continue(estate,/* parentState.asLineRule ? ParserDebug.LINE_ELEMENT_SKIP : */ParserDebug.ELEMENT_SKIP);
return;
}
}
// ... so far element is valid
// 1e. consume whitespace
if ( ( (estate.element.suffixTokenValue == null) && (elementResult.childResult && elementResult.childResult.matchedWS) )
|| this.eof() || this.done() ) {
parentState.lastElementMatchedWS = true; // this is a mechanism to prevent a parent element matching ws twice
} else {
var checkForWS:boolean = parentState.element.allowWS && !(parentState.elementMatchIsZeroLength)
&& (parentState.line1 == this.currentLineIndex); // FIX_THIS - this introduces a bug when we just have
// -------> BUG - if we've consumed a child line, and the line has alredy incremented, then is wrong
parentState.lastElementMatchedWS = checkForWS ? this.wst() : false;
}
// x. if we not found, element can still fail.
var requireWS:boolean = parentState.element.requireWS && !(parentState.elementMatchIsZeroLength);
if (requireWS && !this.eof() && !this.done() && !parentState.lastElementMatchedWS) {
parentState.appendError(GErr.REQUIRE_WS_AFTER_ELEMENT, parentState, parentState.element, false, "expecting whitespace");
// FIX_THIS - optional element that doesn't match ws shouldn't probably continue
if (parentState.asLineRule != false) {
console.log('x')
}
Assert.assert(parentState.asLineRule == false); // need to not be a line rule as we've not got the mechanism to append this as a line rule
this.element_fail(estate);
return;
}
// x. success. increment element in state and continue
if (!parentState.element.allowMany || parentState.elementMatchIsZeroLength) {
parentState.incrementPending = true;
}
// x. finalise text consume by updating the state /w the parser position
this.advanceRuleConsmeToPosX(parentState);
// x. success
this.element_continue(estate, ParserDebug.ELEMENT_MATCH);
return;
}
private checkForTrailingText(state:RuleState):boolean {
this.wst();
var trailingText:string = this.rest();
if (trailingText != null ) {
state.appendError(GErr.UNMATCHED_TRAILING_TEXT, state, state.element, false, "unmatched text: " + trailingText);
//trace(" failed to match b/c of unmatched text \"" + trailingText + "\"");
return true;
}
return false
}
/**
* Called after elements in a rule are successfully, to
*
* - manage some intracies about whitespace matching requirements
* - check for dangling content if we're finished the line
* - invoke lineProcess (synchronously) if this is a line rule
* (although lineProcess doesn't actually do that much any more)
* - increment line if necessary (for line rule or element NL)
* - redirect to line_iterate if the rule has child rules.
* - otherwise, rule is successful
*
* Called after a line rule, but *before* child line rules are called.
*
*/
private rule_process(state:RuleState):void {
var out:PRuleResult = state.ruleResult;
out.endIndex = this.pos;
var hasLineRules:boolean = state.rule.lineRules && (state.rule.lineRules.length > 0);
// 1. propagate matched WS
state.ruleResult.matchedWS = state.lastElementMatchedWS; // the value of the last matching element
// 2. Check for dangling content if we've incremented a line
var requiresLineIncrement:boolean = (!state.asChild || hasLineRules)
&& (this.currentLineIndex == state.line1);
var requiresIncrementBecauseOfElement:boolean = (!state.asChild || hasLineRules)
&& (this.currentLineIndex > state.line1) && (state.lineRuleLineX < this.currentLineIndex)
if (!this.eof() ) {
if (requiresLineIncrement || requiresIncrementBecauseOfElement) { // also - if a child line rule has already advanved
// fail if we have trailing text
if (this.checkForTrailingText(state)) { // this appends an UNMATCHED_TRAILING_TEXT error if trailing text found
this.rule_fail(state); //
return ; // as its invalid
}
} else {
// doesn't require line increment
}
}
// 3. increment line if necessary
// if we've finished a line rule, process it (synchronously), adding result to parent rule etc.
// - includes the possibility that if this line has failed and is non optional, the line wil be reset)
// - recall that if this line has failed (and is non optional), then we're done, as line rules are implicit cut points.
if (state.asLineRule) {
Assert.assert(state.ruleResult.isValid());
this.doLineProcess(state);
}
if (!state.asChild || hasLineRules) { // called if a) top rule, b) a line rule, c) rule /w line rules
// --- how the lineRuleX mechanism works ---
// a) after a line rule (or top rule) is matched, but before line rules are invoked,
// we need to advance the line position - unless a child element itslef has a line rule
// so we set the parent rule's lineRulelineX to current
// b) this rule may still fail if a child line rule fails, however, since a line inserts an implicit cutpoint
// we don't really need to deal with rollback .
//
// c) however, if this line has advanced because of an X --> Y construct, then we will need to explicitly advance to the next line
//
// d) --> is not an implicit cut op at the element level, so the existing element.line0 rollback mechanism will work
// only called if a) top rule, b) or has child rules
if ( requiresLineIncrement || requiresIncrementBecauseOfElement ) { // indicates that a child line rule element has previous incremented the line
this.nextLine();
this.advanceRuleConsmeToPosX(state);
if (state.parentElementState) {
state.parentElementState.parentRuleState.lineRuleLineX = this.currentLineIndex; // this will only propagate to the parent lineRule
} else if (state.parentRuleState) {
state.parentRuleState.lineRuleLineX = this.currentLineIndex;
}
}
// propagate consumed line position to parent
//if (state.parentRuleState) {
// advanceLineRuleX(state.parentRuleState);
//} else if (state.parentElementState && state.parentElementState.parentRuleState) {
// advanceLineRuleX(state.parentElementState.parentRuleState);
//}
}
// X. iterat on line results
if (hasLineRules ) {
state.ruleResult.lineIterationInProgress = true;
this.debug(state, this.line_iterate, ParserDebug.LINE_RULE_MATCH);
return;
} else {
}
//trace(" *** extracted text: \"" + state.ruleResult.contentString() + "\"");
this.rule_success(state);
} // rule_process
// TODO - absolutely no reason to have this here. All the information is in state to be used in rule_done
private rule_success(state:RuleState):void {
state.ruleResult.lineIterationInProgress = false;
var hasLineRules:boolean = state.hasLineRules();
if (!state.asChild || (state.asLineRule && !hasLineRules)) { // || state.asLineRule
//ParseDebugUtil.traceState(state);
this.rule_done(state, ParserDebug.LINE_RULE_MATCH);
return;
} else {
this.rule_done(state, ParserDebug.RULE_MATCH);
}
} // rule_success
private _element_child_rule_process(state:RuleState):void {
this.element_child_rule_process(state.parentElementState);
}
/**
* process the parentRuleState of the rules state.
*
*/
private doLineProcess(state:RuleState):boolean {
var parentState:RuleState = state.parentRuleState;
if (parentState) {
Assert.assert(parentState.lineRuleResult == state.ruleResult )
// parentState.lineRuleResult = state.ruleResult //rule(state.currentLineRule, false, state.indent, true /*asLineRule*/);
return this.lineProcess(parentState);
}
return true;
} // doLineProcess
/**
* rule done, whether it has matched or not.
*
* @param breakpoint may be RULE_FAIL, LINE_RULE_FAIL , PARENT_LINE_DONE
*
*/
private rule_done(state:RuleState, breakpoint:number):void {
if (state.asChild) {
// 1. if this is a simple child rule of an element, simply process it and proceed with iteration
Assert.assert(state.parentElementState && !state.asLineRule);
var parentElementState:ElementState = state.parentElementState;
// propagate cutpoint failure upwards
if (state.elementResult && state.elementResult.cutpointFailElement) {
parentElementState.elementResult.cutpointFailElement = state.elementResult.cutpointFailElement;
parentElementState.elementResult.valid = false;
}
var parentRuleState:RuleState = state.parentElementState.parentRuleState
if (state.lineRuleLineX >=0 ) {
parentRuleState.lineRuleLineX = state.lineRuleLineX;
}
this.debug(state, this._element_child_rule_process, breakpoint);
return;
} else if (state.asLineRule && state.lineRuleLineX >= 0) {
state.parentRuleState.lineRuleLineX = state.lineRuleLineX;
}
if (state.asLineRule) {
// 2. if this is a line rule ...
this.debug(state, this.line_parent_iterate, breakpoint);
return;
}
if ( breakpoint & ( ParserDebug.LINE_RULE_FAIL | ParserDebug.RULE_FAIL) ) {
this.debug(state, this._and_done, breakpoint);
return;
}
this.debug(state, this.top_rule_done, breakpoint);
} // rule_done
top_rule_done(state:RuleState):void {
Assert.assert( !state.asChild && !state.asLineRule );
if ( !this.eof() ) {
// rule has succeeded, match trailing secondary parser (ie properly managed ws whitespace)
var secondaryLineResults:boolean = this.matchSecondaryGrammar(state, true) // there is no longer a post option
if (secondaryLineResults) {
this.debug(state, this.top_rule_done, ParserDebug.SECONDARY_LINE_DONE);
return;
}
if (state.consumeUntilEOF && !this.eof()) {
if (!this.done() ) {
//trace(' unexpected text after completion "' + value + '"');
state.appendError(GErr.TEXT_AFTER_COMPLETION, state, state.element, false, "unexpcted text at eof \"" + this.value + "\"" );
this.debug(state, null, ParserDebug.DONE_WITH_UNMATCHED_TEXT);
return;
}
}
}
// 4. if there is no parent, then we're done
this._and_done(state);
} // rule_dones
private line_parent_iterate(state:RuleState):void {
var parentState:RuleState = state.parentRuleState;
this.line_iterate(parentState);
} // line_parent_iterate
private _and_done(state:RuleState):void {
Assert.assert(!state.asChild && !state.asLineRule);
// 2. to parent element or line, this is the top level
if (this.autoObjectMap) {
this._objectMapper.mapValues(state.ruleResult, state.rule, null, null, true /* isRoot */);
}
this.debug(state, null, ParserDebug.DONE);
} // _and_done
/**
* Iterate on on the line rules of a given rule.
*
* called recursively after matching a line
*
* @param state the state of the parent rule.
*/
private line_iterate(state:RuleState):void { // r:PRule, indent:string , parentResult:PRuleResult, parentIsLineRule:boolean):PRuleResult {
// 1. extract a seconday grammar (ie comments, whitespace, stuff we want to keep out of the primary grammar)
var secondaryResult:boolean = this.matchSecondaryGrammar(state, true );// TODO - state is the same state used by the parent rule
if (secondaryResult) {
this.debug(state, this.line_iterate, ParserDebug.SECONDARY_LINE_DONE);
return;
}
// 2. initialise if iteration hasn't begun yet
if (state.lineRuleIndex < 0) {
if (!this.eof() /*&& state.p.eq(state.currentLineIndent) */) {
this.pos = 0;
} else {
// we have no indent (or eof) , but we have non-optional line rules, then fail
if (state.rule.lineRules) {
var lr:GRule
for (lr of state.rule.lineRules) {
if (!(lr.child(0).lineAllowNone) ) {
state.appendError(GErr.EOF, state, lr.child(0), false, "end of file /w pending line rules ");
this.rule_fail(state);
return;
}
}
}
// if all child rules are optional
this.rule_done(state, ParserDebug.PARENT_LINE_DONE);
return; //
}
state.incrementLineRule();
}
// 3. increment, but remembering that
if (state.lineIncrementNotPending() ) { // this encapsulates state's cardinality etc
//if (state.lineAllowMany) {
// state.lineCollectionElementIndex++;
//}
} else {
state.incrementLineRule();
}
// 4. perform iteration on line rule
if (!this.eof() && state.lineRuleIndex < state.rule.lineRules.length) {
// note: we're createing a new child state for each line, even if the line rule repeats. Which is unnecessary
var child:RuleState = RuleState.createForLineRuleFromParent(state, state.currentLineRule);
state.lineRuleResult = child.ruleResult;
// --> here we might, plausibly the child rule, remove it later if necessary --
Assert.assert(state.currentLineRuleState == child);
Assert.assert(child.rule == state.rule.lineChild(state.lineRuleIndex) );
// parse rule
//trace('> iterating on child rule ' + state.lineRuleIndex.toString() );
Assert.assert(child.parentRuleState == state); // used in rule_done
this.rule_iterate(child);
return;
}
// 5. upon no more rules to iterate upon
if (this.eof() || state.lineRuleIndex >= state.rule.lineRules.length) { // if (!state.currentLineRule == null)
//ParseDebugUtil.traceState(state);
if (!state.ruleResult.isValid()) {
//
//trace(' child line must have failed');
}
this.rule_done(state, ParserDebug.PARENT_LINE_DONE);
return;
}
// shouldn;'t reach here
Assert.fail();
} // lineRule
/**
* TODO - see if this can't be gotten rid of. Most of the functionality is moved to element_process.
* Called on the parent rule state of a child line rule
* @return false if the the line hasn't been processed - ie it has failed but was optional.
*/
private lineProcess(state:RuleState):boolean {
if (state.lineRuleResult && state.lineRuleResult.isValid()) {
// 1. we have a valid line rule matched
// 1a. check assumptions that the parser constrains a line rule to a single element /w a child rule itself
Assert.assert(state.lineRuleResult.results.length == 1 ||
(state.lineRuleResult.results.length > 1 && state.currentLineRule.lineRules.length > 0 ) );
// FIX_THIS - far too low level coupling to lower level data structures
// (note here it's lineRules.length > 0 because we'll have already removed failing elements )
state.foundOneLineRule = true;
} else {
// 2. match has failed.
state.lineMatchFailed = true;
if (state.lineAllowNone || (state.lineAllowMany && state.foundOneLineRule) ) {
// 2a. line rule has failed, but it's optional so we can reset (the line position) and continue;
this.pos = 0; // TODO - move this elsewhere
return false;
} else {
// 2b.line is not optional so entire rule fails to match to add error (for debug) and fail.
if (state.line0 != state.line1) { // temporary data integrity check
if (!(state.ruleResult.preResultsLength() > 0)) {
Assert.assert(state.line0 == state.line1); // relax this with --> mechanism
}
}
this.resetRuleToLine0(state);
return true;
}
}
return true;
}
private rule_fail(state:RuleState):void {
// if parent element could do something here do something here
state.ruleResult.lineIterationInProgress = false;
if (!state.asChild || state.asLineRule) {
var resultAppended:boolean = true;
if (state.asLineRule) {
// result will be appended only if this is a genuine failure (and not merely an unmatched optional line)
// (which amounts to the implementation of an unmatched line as a #cutpoint)
resultAppended = this.doLineProcess(state);
}
this.rule_done(state, resultAppended ? ParserDebug.LINE_RULE_FAIL // result appended - which means that the rule the line has failed.
: ParserDebug.RULE_FAIL ); // not appended
//rule_done(state, state.asLineRule && resultAppended ? ParserDebug.LINE_RULE_FAIL : ParserDebug.LINE_RULE_SKIP );
} else {
if (state.cutpointHit()) {
state.markAsCutpointInvalid();
}
this.rule_done(state, ParserDebug.RULE_FAIL);
}
} // rule_fail
/**
* Helper that maps values to a rule following an output
*/
private _objectMapper:ObjectMapper = new ObjectMapper;
/**
* returns the union of two token vectors
*/
private tempMergeTokens(v1:Array, v2:Array):Array {
if (v1 && v2) {
var i:Number;
var token:string;
var out:Array = [];
for (token of v1) {
out.push(token);
}
for (token of v2) {
if (out.indexOf(token) < 0) {
out.push(token);
}
}
return out;
}
return v1 ? v1 : v2
} // tempMergeTokens
/**
* Iterates on a single element
*/
private element_iterate(state:ElementState):void {
var e:GElement = state.element; // local var for convenience only
// FIX_THIS - duplication of code. Previously, new lines are only done at the end of rules
if (state.element.elementPermitsNL() ) {
if ( this.done() ) {
// BUG - this break incremental parsing & tokeenising.
this.nextLine();
var requiredIndent:string = state.parentRuleState.indent + (state.element.elementNLIndent ? SeepParserDefs.INDENT : "");
if (requiredIndent && !this.eq(requiredIndent)) {
state.setError( GErr.NO_INDENT, false, "indent not matched");
this.element_done(state);
return;
}
}
}
// 1. match prefix
if (!state.prefixProcessed) {
state.elementResult = new PElementResult;
state.elementResult.element = state.element;
state.elementResult.startLineIndex = state.line0;
state.parentRuleState.elementResult = state.elementResult;
// ----> here we need to append the element
//ParseDebugUtil.traceState(state);
state.parentRuleState.appendElementResult(state.elementResult);
if (state.parentRuleState.asLineRule) {
//trace(state.parentRuleState.rule.name);
//trace(state.parentRuleState.parentRuleState.rule.name);
//state.parentRuleState.parentRuleState.lineRuleResult = state.parentRuleState.ruleResult;
Assert.assert(state.parentRuleState.parentRuleState.lineRuleResult == state.parentRuleState.ruleResult);
state.parentRuleState.parentRuleState.appendLine(state.parentRuleState.parentRuleState.lineRuleResult);
}
// ---->
var prefixOK:boolean = this.elementMatchPrefix(state); // --> loops back to iterate_element , or element_...
state.prefixProcessed = true;
if (!prefixOK) {
this.element_done(state);
return;
}
var elementResult:PElementResult = state.elementResult;
elementResult.tokenPrefixIndex = ( !(state.isFirst() && state.element.filterList)
&& state.element.prefixTokenValue ) ?
state.pos0 : -1;
elementResult.startIndex = state.pos0;
elementResult.endIndex = this.pos;
if (state.element.filterList && state.element.elementPermitsNL() ) {
this.wst();
if (this.done() ) {
this.nextLine(); // FIX_THIS - duplicates above code
var requiredIndentt:string = state.parentRuleState.indent + (state.element.elementNLIndent ? SeepParserDefs.INDENT : "");
if (requiredIndent && !this.eq(requiredIndentt)) {
state.setError(GErr.NO_INDENT, false, "indent not matched" );
this.element_done(state);
return;
}
}
}
}
// 0. can't match if end of file
if (this.eof()) {
state.setError(GErr.EOF, false, "eof encountered");
// FIX_THIS - this is almost certainly the wrong element
this.element_child_rule_process(state);
return;
}
if (e.childRule) {
// 2. recurse on child rule
Assert.assert(e.contentTokens == null);
if (e.prefixTokenValue) { // allow ws between token and child rule ie X:rule --> " X chidl content"
this.wst(); // eventually allow token to specify this ie token: X.: X
} // else {
// if (wst()) {
// state.elementResult = new PElementResult(e, false, "can't have ws here");
// element_done(state);
// }
//}
state.contentStart = this.pos;
state.childRuleState = RuleState.createFromParentElement(state);
state.childResult = state.childResult;
state.elementResult.childResult = state.childRuleState.ruleResult;
Assert.assert(state.parentRuleState == state.childRuleState.parentElementState.parentRuleState);
this.debug(state.childRuleState, this.rule_iterate, ParserDebug.RULE_BEFORE);
return;
}// else {
// only other options is simple leaf.
this.elementMatchLeaf(state);
this.debug(state, this.element_done, ParserDebug.ELEMENT_LEAF_POST);
return;
//}
} // element_iterate
private elementMatchLeaf(state:ElementState):void {
var e:GElement = state.element;
if (e.consumeAll()) {
// 1. we've got a (var)$ construct, which greedily consumes everything (including ws) the next element token
Assert.assert(!e.asTreeNode());
// element is suffixed by "$" ==> greedily consume everything until next token
state.contentStart = this.pos;
if (e.suffixTokenValue) {
state.varStr = this.toToken([e.suffixTokenValue], false /* consume quotes and all */);
state.contentEnd = this.pos;
if (!this.eq(e.suffixTokenValue)) {
this.resetElementToPos0(state);
// FIX_THIS - duplicates the below - to clean up when enhancing error mechanism
state.setError(GErr.BAD_SUFFIX, false, "expecting (suffix) character: " + e.suffixTokenValue, state.childResult); // ---------> expecting suffix token
return;
}
} else {
// TO_OPTIMIZE - cache this
var mergedNextTokens:Array = this.tempMergeTokens(e.possibleNextTokens, e.nextElementTokens);
if (mergedNextTokens) {
state.varStr = this.toToken(mergedNextTokens, false /* allow quotes*/, true /*ignoreWS */);
} else {
state.varStr = this.rest(); // just consume all
}
state.contentEnd = this.pos;
}
} else if ((e.varName) && !e.filterExists) {
// 2. if we have a (var) construct, extract text
if (e.prefixTokenValue && (e.varName && !e.asTree) ) {
this.wst(); // if token allows ws
}
if (!e.contentTokens) {
state.contentStart = this.pos;
}
if (e.contentTokens) {
// 2a. token declaration has sepecified explicit content
// note - content token extraction moved to prefix extraction (event though strictly speaking it's content, not prefix)
state.contentEnd = this.pos;
} else if (e.suffixTokenValue) {
// 2b. given a suffix token, match everything unti that
if (!e.asTreeNode()) { // only match content if not tree node
state.varStr = this.toToken([" ", e.suffixTokenValue], true /*allow quotes*/); // TO_OPTIMIZE
}
} else if ((e.varName) && !e.filterExists) {
// 2c. basic content extraction, no suffix token
if (!e.asTreeNode() || e.nameIsRef ) {
state.varStr = this.qtext(true); // by default attempt to extract quoted text first
if (!e.possibleNextTokens) {
//trace(' bug - no possible next tokens ');
}
if (state.varStr == null) { // no quoted text, match until next possible tokens
state.varStr = this.toToken(e.possibleNextTokens, true); // ie --var-name // --varName|filter etc (and recall that this will include ws)
}
if (!state.varStr && !e.prefixTokenValue) {
// 2d. no text matched
Assert.assert(this.pos == state.pos0);
Assert.assert(this.currentLineIndex == state.line0);
state.setError(GErr.NO_TEXT, false, "no text"); // or return null
return;
}
}
}
state.contentEnd = this.pos;
// 3. extract suffix token
if (e.suffixTokenValue) {
this.wst();
state.suffixStart = this.pos;
if ( !this.eq(e.suffixTokenValue) ) {
state.setError(GErr.BAD_SUFFIX,false, "expecting (suffix) character: " + e.suffixTokenValue);
return;
}
}
} // basic content extraction
//4. some last minute processing
state.varStr = state.varStr && state.varStr.length > 0 ? state.varStr : null; // prevent matching ""
// 5. ensure something has matched
if (!state.varStr && !e.prefixTokenValue && !e.asTreeNode()) {
state.setError(GErr.NOTHING_MATCHED, false, "nothing matched" );
return;
}
this.elementSuccess(state);
} // elementMatchLeaf
private elementMatchPrefix(state:ElementState):boolean {
var e:GElement = state.element;
// 1. match token
var prefixDone:boolean = !e.prefixTokenValue || (state.isFirst() && e.filterList) || this.eq(e.prefixTokenValue, false); // the "list" filter implements, for instance, a CSV, and just means the the prefix token (ie ',') doesn't apply to the first match.
// note that it's important that the check for the "list" filter perceeds the eq();
// 2. failted to match prefix
if (!prefixDone) { // || (!state.prefixDone && done()) ) {
state.setError(GErr.BAD_PREFIX, false, "expecting (prefix) character: " + e.prefixTokenValue);
return false;
}
var matchedTokens:string;
if (prefixDone && e.contentTokens) {
if (e.prefixTokenValue) {
this.wst();
}
Assert.assert(!e.childRule); // assumption of content tokens
state.contentStart = this.pos;
// TODO ie we want content tokens to match
if (e.tokenRef.multiTokens) { // [ X, Y , Z]* // match as many tokens as are present
matchedTokens = this.whileTokens(e.contentTokens, true);
} else {
matchedTokens = this.whileTokens(e.contentTokens, false);
}
state.varStr = matchedTokens;
if (matchedTokens == null || matchedTokens.length == 0) {
var missingTokens:string;
if (e.contentTokens) {
var t:string
for (t of e.contentTokens) {
missingTokens += t + " "
}
}
state.setError(GErr.BAD_CONTENT_TOKENS, false, " expecting content tokens " + missingTokens +"\"");
//element_done(state);
return false;
}
state.contentEnd = this.pos;
}
// 3. In some situations, we can check for
// a) no prefix token
// b) no content tokens / content tokens not matched
// c) no child rule (because possible next tokens not calcuated for hierarchical rule)
// d) element is not a (leaf) tree element ( ie ^(var) ), as such nodes can have 0 length
// d) precalcualted next tokens present
// TO_OPTIMIZE - optimization here. if e.skipToken was on by default, we could potentially very substantially
// speed up parsing b/c we know what patterns will cause all child expressions to fail
if ( !e.prefixTokenValue && !matchedTokens && e.nextElementTokens && (!e.childRule
|| (e.skip2 )) && !e.asTreeNode() ) {
// if there's no prefix of content tokens matched, check for collisions /w future tokens
Assert.assert(!e.suffixTokenValue); // implicit assumpution
var foundNextElementToken:number = this.matchFromList(e.nextElementTokens, false/* don't conumse*/);
if (foundNextElementToken >= 0) {
var offendingToken:string = e.nextElementTokens[foundNextElementToken];
//trace(" X <---- failed due to collision /w token \"" + offendingToken +"\"");
state.setError(GErr.ELEMENT_TOKEN_COLLISION, false, "failed due to collision /w token \"" + offendingToken +"\"");
// TO_OPTIMIZE - should be able to pre-construct a reference to the element of the colission token
// and use this to know where to jump to in the parsing, rather than go throught the grammar
//element_done(state);
return false;
}
}
// 1. prefix has matched ok
//element_iterate(state);
return true;
} // elementMatchPrefix
private element_child_rule_process(state:ElementState):void {
// having processsed a child rule, we need to look at state.childResult, a
// and if we have an error,
// a) appropriately populate state.elementResult /w an error element
// b) roll back parser position
state.childResult = state.childRuleState.ruleResult;
state.contentEnd = this.pos;
if (!state.childResult) {
// 1. child rule simply doesn't match.
this.resetElementToPos0(state);
state.elementResult = null;
this.element_done(state);
return;
} else if (!state.childResult.isValid() ) {
// 2. match but not valid
var childError:string = state.childResult.quickErrorStr();
this.resetElementToPos0(state);
if (state.childResult.matchedElements > 0) {
// 2a. partial match (possibility of keeping this for debug)
if (state.elementResult.cutpointFailElement) {
var cutpointElement:GElement = state.elementResult.cutpointFailElement.element;
state.setError(GErr.EXPLICIT_CUTPOINT, false, "cutpoint error: " + cutpointElement.cutpointMsg, state.childResult ); // if something has matched then return an error
} else {
state.setError(GErr.ELEMENT_CHILD_FAILED, false, "composite rule not matched: " + childError, state.childResult ); // if something has matched then return an error
}
this.element_done(state);
return;
} else {
// 2a. no elements match
state.setError(GErr.NO_CHILD_ELEMENTS_MATCHED, false, "no child elements matched", state.childResult);
this.element_done(state);
return; // but if nothing has matched, return null
}
}
// 3.we have a valid result, now match suffix token following child rule
if (state.element.suffixTokenValue != null) {
if (state.element.contentTokens) {
this.wst();
}
state.suffixStart = this.pos;
if (!this.eq(state.element.suffixTokenValue)) {
//pos = state.pos0;
this.resetElementToPos0(state);
state.setError(GErr.BAD_SUFFIX, false, "expecting (suffix) character: " + state.element.suffixTokenValue, state.childResult); // ---------> expecting suffix token
this.element_done(state);
return;
}
}
this.elementSuccess(state);
this.element_done(state);
} // elementChildRuleProcess
private elementSuccess(state:ElementState):void {
Assert.assert(state.elementResult != null);
var e:PElementResult = state.elementResult;
e.element = state.element;
e.valid = true;
var result:PElementResult = state.elementResult
// indexes
result.tokenPrefixIndex = ( !(state.isFirst() && state.element.filterList)
&& state.element.prefixTokenValue ) ?
state.pos0 : -1;
result.contentIndex = (state.contentEnd > state.contentStart) ? state.contentStart : -1;
result.contentEndIndex = (state.contentEnd > state.contentStart) ? state.contentEnd -1 : -1;
result.tokenSuffixIndex = state.suffixStart;
result.endIndex = (this.pos > state.pos0) ? this.pos -1 : -1;
result.startIndex =
result.tokenPrefixIndex >= 0 ? result.tokenPrefixIndex // a convenience value
: result.contentIndex >= 0 ? result.contentIndex
: result.tokenSuffixIndex >= 0 ? result.tokenSuffixIndex
: -1;
// content
// result.childResult = state.childResult;
Assert.assert(result.childResult == state.childResult);
Assert.assert(result.startLineIndex == state.line0);
Assert.assert(result.element == state.element);
result.varStr = state.varStr;
} // elementSuccess
// ------------- util ----------------
private ___deprecated___createElementResult(errCode:number, state:ParserState, element:GElement, valid:boolean = true, err:string = null, childResult:PRuleResult = null):PElementResult {
var e:PElementResult = new PElementResult(); // element, valid, err, childResult);
e.element = element;
e.valid = valid;
e.err = err;
e.childResult = childResult;
e.errCode = errCode;
// the awkwardnewss here is that we don't create the element at the bedinning of the match,
if (state.isElementState()) { // s ElementState) {
var estate:ElementState = state as ElementState;
e.startIndex = estate.pos0;
e.startLineIndex = this.currentLineIndex;
} else {
var rState:RuleState = state as RuleState;
e.startIndex = rState.posX >= 0 ? rState.posX : rState.pos0;
e.startLineIndex = this.currentLineIndex // state.line0;
}
Assert.assert(errCode == GErr.NOT_AN_ERROR || !valid);
return e;
} // createElementResult
/**
* Formalised consumption for a state. ie when an element - or even an indentation is matched, call this
* to ensure that if the next element fails, it won't backtrack
*/
private advanceRuleConsmeToPosX(state:RuleState):void {
state.posX = this.pos;
state.lineX = this.currentLineIndex;
} // advanceRulePos0
private advanceLineRuleX(state:RuleState):void {
state.lineRuleLineX = this.currentLineIndex;
}
/**
* reset position when an entire line has failed (ie line rule fails)
*/
private resetRuleToLine0(state:RuleState):void {
this.setPosition(state.line0, 0); // reset position
}
/**
* when an element has failed
*/
private resetElementToPos0(state:ElementState):void {
this.setPosition(state.line0, state.pos0);
} // resetPosition
private resetRuleToPosX(state:RuleState):void {
//Assert.assert(state.line0 == state.lineX);
//Assert.assert(state.posX == state.pos0);
this.setPosition(state.lineX, state.posX);
} // resetPosition
// -------- manage pause for debug -------
private _state:ParserState;
private paused:boolean;
private localPause:boolean = false;
debug(state:ParserState, done:Function, breakpoint:number):boolean {
state.breakpoint = breakpoint;
state.continueFn = done; // note that this is a simple pointer to function, not a closure
// 1. if an external parser is present, it may call for an asynchronous pause
if (this.debugger) {
this.paused = this.debugger.debug(state);
} else {
this.paused = false;
}
// 2. if an external debugger hasn't called for an asynchronous pause, in order to keep the recursion depth
// low, we may need to stop execution, and allow the parent execute loop to call the next function.
// execution goes up the call stack, and then calls the next function.
if (!this.paused) {
// preserver state to be resumed
this._state = state;
this.localPause = true; // (breakpoint & ( ParserDebug.ELEMENT_LEAF_POST | ParserDebug.RULE_BEFORE)) != 0
if (!this.localPause) {
this.resumeLocal();
}
} else {
// if debugger has invoked a pause, don't locall cache state
this._state = null;
this.localPause = false;
}
return this.paused;
} // debug
private resumeLocal():void {
Assert.assert(this.localPause);
this.localPause = false;
if (this._state.continueFn != null) {
this._state.continueFn.call(this, this._state);
}
} // resume
resume(state:ParserState):void {
this,state.continueFn.call(this,state);
while (this.localPause) {
this.resumeLocal();
}
}
// resume
} // class