import BaseSimpleParser from "../simpletemplate/BaseSimpleParser"; import IParserDebug from "../parsertooling/IParserDebug"; import GRule, { GElement } from "./grammar/GRule"; import RuleState from "../parsertooling/RuleState"; import { PRuleResult, PElementResult } from "./PObjectMap"; import GErr from "./GErr"; import ParserDebug from "../parsertooling/ParserDebug"; import ElementState from "../parsertooling/ElementState"; import ParserState from "../parsertooling/ParserState"; import ObjectMapper from "./ObjectMapper"; import Assert from "@cafetextual/util/dist/src/assert/Assert"; import SeepParserDefs from "./SeepParserDefs"; /** * Parses text from a SeepGrammar * *

Suffers from a rather awkward refactoring from a simple recursive algorith to faciliate asynchronous * execution

* * Notes: * - form is a bit awkward, reflecting the process by which it was incrementally refactored from a simpler recursive mechanism to one * capable of asynchronousexecution. * * - may executed either through synchronous recursion, or asynchronously and non-recusively (where an injected IParseDebug controls and may decorate execution ) * * - entire stack is encapsulate in state objects, therefore uses neither closures not call stack state (as a typical recursive algorithm would) * * - by convention a method name with an underscore may be called asynchronously, and may contain other asynchronous points. * * Some debugging heuristics therefore: * o only an async method (ie rule_iterate, element_iterate) may call another async method * o no code may follow an async method ie must be immediately followed by a return. (this is because the method itself may have engaged an asynchronous break). * o conversely, any return statement in a method wih underscores must be preceeded by a method w underscores * *

*/ export default class SeepParser extends BaseSimpleParser // implements IParserDebug { // mechanism to inject a debug parser for stress testing. /** * @private */ static STRESS_TEST_DEBUGGER:IParserDebug constructor() { super(); this.debugger = SeepParser.STRESS_TEST_DEBUGGER; } /** * Here to prevent regressions breaking. */ autoObjectMap:boolean = true; // there are features of the grammar - should be encapsulated in the rule/ element / token objects // for stress test add a (synchronous) debugger. debugger:IParserDebug; static registerStressTestParse:Function; /** * attempt to match a rule on a single line. * *

Also a rule on a portion of a line is asChild is true.

* */ rule(r:GRule, asChild:boolean = false, indent:string = "", asLineRule:boolean = false, consumeUntilEOF:boolean = true ):PRuleResult { if (SeepParser.registerStressTestParse != null) { SeepParser.registerStressTestParse(r, this.content.toString()); } var state:RuleState = RuleState.create( this, r, asChild, indent, asLineRule) state.consumeUntilEOF = consumeUntilEOF; state.continueFn = this.rule_iterate; this.resume(state); return state.ruleResult; } // rule private basicRuleInit(state:RuleState):void { var rule:PRuleResult = state.ruleResult; if (rule == null) { rule = PRuleResult.create(); state.ruleResult = rule; } else if (rule.preResults != null) { // kind of hacky - but if we've already got a preresult, then we've already initialised. return; } rule.startLineIndex = this.currentLineIndex rule.matchedRule = state.rule; rule.source = this.value; rule.startIndex = this.pos; state.lastElementMatchedWS = false; // default, to remove state.pos0 = this.pos; state.line0 = this.currentLineIndex; } // basicRuleInit /** * Mechanism to add a secondary grammar - ie blank lines, or maybe comments. * * May, currently, match one element (per line). To do otherwise would require ansynchronous infrastructure in order to * support debug on element matching. * * * @return true if a line has been matched */ private matchSecondaryGrammar(state:RuleState, isLine:boolean = false):boolean { if (this.eof()) { return false; } if (this.pos != 0) { console.log(' bug'); } Assert.assert(this.pos == 0); // fails if line hasn't been correctly invoked this.wst(); // assume 2 - secondary grammar is whitespace only. if (this.match("//") || this.match('----') ) { this.rest(); } if (!this.done() ) { this.pos = 0; return false; } var e:PElementResult = PElementResult.create(); state.populateElement(e, GErr.NOT_AN_ERROR, state, null, true); if (isLine) { state.appendLineSecondaryResult(e); } else { state.appendPreResult(e); } this.nextLine(); this.advanceRuleConsmeToPosX(state); state.lineRuleLineX = this.currentLineIndex; // NOTE - because we're return true; } // matchSecondaryGrammar /** * Iterate on element of rule, or if complete, then invoke processing for success, fail or recurse */ private rule_iterate(state:RuleState):void { if (state.incrementPending) { state.incrementElement(); state.incrementPending = false; } // 1. iteration hasn't started, so initialise. if (state.elementIndex < 0) { // initialise this.basicRuleInit(state); // issue if we've got a pre-line, // if indent fails to match, we can't match if (!state.asChild) { if (this.eof()) { state.appendError(GErr.EOF, state, state.rule.child(0), false, "eof encountered" ); // ----> rule not matched because of eof this.rule_fail(state); return; } if (this.content) { //content.setResult(currentLineIndex, state); // TODO - deprecate } this.pos = 0; // x. check if this is an empty line var secondaryGrammarResult:boolean =this.matchSecondaryGrammar(state); if (secondaryGrammarResult) { // x.i we've matched a secondary line, append as a pre-result, formalise consumption of line, and iterate back. this.debug(state, this.rule_iterate, ParserDebug.SECONDARY_PRE_LINE_DONE); return; } if ((state.indent.length > 0) && !this.eq(state.indent)) { state.appendError(GErr.NO_INDENT, state, state.rule.child(0), false, "indent not matched", null ); // ----> rule not matched because indent not matched // FIX_THIS - rule is not initialised, so start line index will be wring state.ruleInit(this.currentLineIndex); this.rule_fail(state); return; } // formalise consume of indentation if (state.indent && state.indent.length > 0) { this.advanceRuleConsmeToPosX(state); } if (!(state.asLineRule && state.rule.elements[0].lineAllowWS) ) { if (this.wst()) { // note - if we've got ws after required indent, by default this is an error. // TODO - exception might be if first element includes a '$' indicator (for greedy consumption of ws) state.appendError(GErr.WS_AFTER_INDENT, state, state.rule.child(0), false, "ws not allowed pos indent" ) state.ruleInit(this.currentLineIndex); this.rule_fail(state); return; } } } state.ruleInit(this.currentLineIndex); } // 2. if iteration in progress, attempt to match element var out:PRuleResult = state.ruleResult; var elements:Array = state.rule.elements; var doneOnOR:boolean = state.rule.isOR && (state.elementIndex >= 0) && state.hasResult(); if (!doneOnOR && (state.elementIndex < elements.length) ) { Assert.assert(state.element == elements[state.elementIndex]); //state.element = elements[state.elementIndex]; // this could be done in state.incrementElement() //trace(' ------------- extracting ' + state.rule.name + '[' +state.elementIndex + "]: '" + state.element.toString() + "' -------------- (pos=" + pos.toString() +")"); this.advanceRuleConsmeToPosX(state);// used to determine when we've got a zero length match, ie ^(var). This is the last known matched position as it is set before every attempt to match an element // 3a. create a child element state and attempt to extract var elementState:ElementState = ElementState.create(state); // TO_OPTIMISE --> this data structure can be reused in iteration Assert.assert(elementState.parentRuleState == state); state.currentElementState = elementState; // (!) ---> add element here this.element_iterate(elementState); return; } // iteration over elements // 3. if (doneOnOR || (state.elementIndex >= state.rule.elements.length) ) { if (state.ruleResult.isValid() ) { //trace(" <--- rule successful (not including line rules) " + state.rule.name ); this.rule_process(state); // ignoring for the moment iteration on childrules return; } else { //trace(" <--- XXXXX rule unsuccessful " + state.rule.name); this.rule_fail(state); return; } } Assert.fail(); // shouldn't reach here return; } // rule_iterate private element_continue(state:ElementState, breakpoint:number):void { Assert.assert(breakpoint == ParserDebug.ELEMENT_SKIP || breakpoint == ParserDebug.ELEMENT_MATCH || breakpoint == ParserDebug.LINE_ELEMENT_SKIP); this.debug(state, this._and_element_continue, breakpoint); } private _and_element_continue(state:ElementState):void { this.rule_iterate(state.parentRuleState); } private element_fail(state:ElementState):void { this.debug(state, this._and_element_fail, ParserDebug.ELEMENT_FAIL); } private _and_element_fail(state:ElementState):void { // TODO - the debugger itself might manage this this.rule_fail(state.parentRuleState); } /** * Decides whether, based on last element match, we need to continue iterating on the rule, or fail. */ private element_done(estate:ElementState):void { var parentState:RuleState = estate.parentRuleState; Assert.assert( parentState.elementResult == parentState.currentElementState.elementResult ); // abreviations that do not affect statlessness of function if (estate != parentState.currentElementState) { Assert.assert(estate == parentState.currentElementState); } var elementResult:PElementResult = parentState.elementResult; var ruleResult:PRuleResult = parentState.ruleResult; // 1. determine if we've a valid result that consumes zero if (! ( !elementResult || !elementResult.valid || (elementResult.childResult && !elementResult.childResult.isValid()) || ( ( this.pos > parentState.posX) || (this.currentLineIndex > parentState.lineX) ) ) ) { // element has matched, but hasn't consumed any text. Which means there's a tree node matched // (ie ^(vars) or [^(vars) (var)?]* ) // which means we can't iterate again, as this would risk an infinite loop parentState.elementMatchIsZeroLength = true; } var onLastORIndex:boolean = false; // not supported var lstate:RuleState = parentState.parentRuleState; var elementResultIsOptional:boolean = false; // x. element is not valid, need to decide whether this is a legitimate error, or an optional element // that can just be removed d var resultValid:boolean = elementResult.isValid(); // TO_OPTIMISE - this wil recurse - probably want to cache the value here if (!resultValid ) { if (parentState.asLineRule) { // x.i. if this is a line rule Assert.assert(parentState.elementIndex == 0); // lineRules constrained to haveing only a single element Assert.assert(lstate.currentLineRule.elements.length ==1); elementResultIsOptional = lstate.currentLineIsOptional(); } else { var element:GElement = parentState.element; var rule:GRule = parentState.rule; elementResultIsOptional = element.allowNone || (rule.isOR && !onLastORIndex ) || (element.allowMany && parentState.foundAtLeastOneValidElement()) } var cutpointEncountered:boolean = elementResult.errCode == GErr.EXPLICIT_CUTPOINT; // x.i element is optional, so remove it from if (elementResultIsOptional) { // ... unless we;ve ancountered a cutpoint, in which case, if (cutpointEncountered ) { console.log("fails on cutpoint: " + elementResult.err); elementResultIsOptional = false; } else { if (parentState.asLineRule) { lstate.unappendLine(parentState.ruleResult); } else { if (parentState.cutpointHit()) { parentState.markAsCutpointInvalid(); } else { parentState.unappendElementResult(elementResult); } } } } } if (!resultValid) { if (!elementResultIsOptional) { // x. fails, and is non-optional this.element_fail(estate); return; // not matched, and not optional, so fail } else { this.resetRuleToPosX(parentState); // not (or partially) matched, and not optional, set pos back to last successful position parentState.incrementPending = true; this.element_continue(estate,/* parentState.asLineRule ? ParserDebug.LINE_ELEMENT_SKIP : */ParserDebug.ELEMENT_SKIP); return; } } // ... so far element is valid // 1e. consume whitespace if ( ( (estate.element.suffixTokenValue == null) && (elementResult.childResult && elementResult.childResult.matchedWS) ) || this.eof() || this.done() ) { parentState.lastElementMatchedWS = true; // this is a mechanism to prevent a parent element matching ws twice } else { var checkForWS:boolean = parentState.element.allowWS && !(parentState.elementMatchIsZeroLength) && (parentState.line1 == this.currentLineIndex); // FIX_THIS - this introduces a bug when we just have // -------> BUG - if we've consumed a child line, and the line has alredy incremented, then is wrong parentState.lastElementMatchedWS = checkForWS ? this.wst() : false; } // x. if we not found, element can still fail. var requireWS:boolean = parentState.element.requireWS && !(parentState.elementMatchIsZeroLength); if (requireWS && !this.eof() && !this.done() && !parentState.lastElementMatchedWS) { parentState.appendError(GErr.REQUIRE_WS_AFTER_ELEMENT, parentState, parentState.element, false, "expecting whitespace"); // FIX_THIS - optional element that doesn't match ws shouldn't probably continue if (parentState.asLineRule != false) { console.log('x') } Assert.assert(parentState.asLineRule == false); // need to not be a line rule as we've not got the mechanism to append this as a line rule this.element_fail(estate); return; } // x. success. increment element in state and continue if (!parentState.element.allowMany || parentState.elementMatchIsZeroLength) { parentState.incrementPending = true; } // x. finalise text consume by updating the state /w the parser position this.advanceRuleConsmeToPosX(parentState); // x. success this.element_continue(estate, ParserDebug.ELEMENT_MATCH); return; } private checkForTrailingText(state:RuleState):boolean { this.wst(); var trailingText:string = this.rest(); if (trailingText != null ) { state.appendError(GErr.UNMATCHED_TRAILING_TEXT, state, state.element, false, "unmatched text: " + trailingText); //trace(" failed to match b/c of unmatched text \"" + trailingText + "\""); return true; } return false } /** * Called after elements in a rule are successfully, to * * - manage some intracies about whitespace matching requirements * - check for dangling content if we're finished the line * - invoke lineProcess (synchronously) if this is a line rule * (although lineProcess doesn't actually do that much any more) * - increment line if necessary (for line rule or element NL) * - redirect to line_iterate if the rule has child rules. * - otherwise, rule is successful * * Called after a line rule, but *before* child line rules are called. * */ private rule_process(state:RuleState):void { var out:PRuleResult = state.ruleResult; out.endIndex = this.pos; var hasLineRules:boolean = state.rule.lineRules && (state.rule.lineRules.length > 0); // 1. propagate matched WS state.ruleResult.matchedWS = state.lastElementMatchedWS; // the value of the last matching element // 2. Check for dangling content if we've incremented a line var requiresLineIncrement:boolean = (!state.asChild || hasLineRules) && (this.currentLineIndex == state.line1); var requiresIncrementBecauseOfElement:boolean = (!state.asChild || hasLineRules) && (this.currentLineIndex > state.line1) && (state.lineRuleLineX < this.currentLineIndex) if (!this.eof() ) { if (requiresLineIncrement || requiresIncrementBecauseOfElement) { // also - if a child line rule has already advanved // fail if we have trailing text if (this.checkForTrailingText(state)) { // this appends an UNMATCHED_TRAILING_TEXT error if trailing text found this.rule_fail(state); // return ; // as its invalid } } else { // doesn't require line increment } } // 3. increment line if necessary // if we've finished a line rule, process it (synchronously), adding result to parent rule etc. // - includes the possibility that if this line has failed and is non optional, the line wil be reset) // - recall that if this line has failed (and is non optional), then we're done, as line rules are implicit cut points. if (state.asLineRule) { Assert.assert(state.ruleResult.isValid()); this.doLineProcess(state); } if (!state.asChild || hasLineRules) { // called if a) top rule, b) a line rule, c) rule /w line rules // --- how the lineRuleX mechanism works --- // a) after a line rule (or top rule) is matched, but before line rules are invoked, // we need to advance the line position - unless a child element itslef has a line rule // so we set the parent rule's lineRulelineX to current // b) this rule may still fail if a child line rule fails, however, since a line inserts an implicit cutpoint // we don't really need to deal with rollback . // // c) however, if this line has advanced because of an X --> Y construct, then we will need to explicitly advance to the next line // // d) --> is not an implicit cut op at the element level, so the existing element.line0 rollback mechanism will work // only called if a) top rule, b) or has child rules if ( requiresLineIncrement || requiresIncrementBecauseOfElement ) { // indicates that a child line rule element has previous incremented the line this.nextLine(); this.advanceRuleConsmeToPosX(state); if (state.parentElementState) { state.parentElementState.parentRuleState.lineRuleLineX = this.currentLineIndex; // this will only propagate to the parent lineRule } else if (state.parentRuleState) { state.parentRuleState.lineRuleLineX = this.currentLineIndex; } } // propagate consumed line position to parent //if (state.parentRuleState) { // advanceLineRuleX(state.parentRuleState); //} else if (state.parentElementState && state.parentElementState.parentRuleState) { // advanceLineRuleX(state.parentElementState.parentRuleState); //} } // X. iterat on line results if (hasLineRules ) { state.ruleResult.lineIterationInProgress = true; this.debug(state, this.line_iterate, ParserDebug.LINE_RULE_MATCH); return; } else { } //trace(" *** extracted text: \"" + state.ruleResult.contentString() + "\""); this.rule_success(state); } // rule_process // TODO - absolutely no reason to have this here. All the information is in state to be used in rule_done private rule_success(state:RuleState):void { state.ruleResult.lineIterationInProgress = false; var hasLineRules:boolean = state.hasLineRules(); if (!state.asChild || (state.asLineRule && !hasLineRules)) { // || state.asLineRule //ParseDebugUtil.traceState(state); this.rule_done(state, ParserDebug.LINE_RULE_MATCH); return; } else { this.rule_done(state, ParserDebug.RULE_MATCH); } } // rule_success private _element_child_rule_process(state:RuleState):void { this.element_child_rule_process(state.parentElementState); } /** * process the parentRuleState of the rules state. * */ private doLineProcess(state:RuleState):boolean { var parentState:RuleState = state.parentRuleState; if (parentState) { Assert.assert(parentState.lineRuleResult == state.ruleResult ) // parentState.lineRuleResult = state.ruleResult //rule(state.currentLineRule, false, state.indent, true /*asLineRule*/); return this.lineProcess(parentState); } return true; } // doLineProcess /** * rule done, whether it has matched or not. * * @param breakpoint may be RULE_FAIL, LINE_RULE_FAIL , PARENT_LINE_DONE * */ private rule_done(state:RuleState, breakpoint:number):void { if (state.asChild) { // 1. if this is a simple child rule of an element, simply process it and proceed with iteration Assert.assert(state.parentElementState && !state.asLineRule); var parentElementState:ElementState = state.parentElementState; // propagate cutpoint failure upwards if (state.elementResult && state.elementResult.cutpointFailElement) { parentElementState.elementResult.cutpointFailElement = state.elementResult.cutpointFailElement; parentElementState.elementResult.valid = false; } var parentRuleState:RuleState = state.parentElementState.parentRuleState if (state.lineRuleLineX >=0 ) { parentRuleState.lineRuleLineX = state.lineRuleLineX; } this.debug(state, this._element_child_rule_process, breakpoint); return; } else if (state.asLineRule && state.lineRuleLineX >= 0) { state.parentRuleState.lineRuleLineX = state.lineRuleLineX; } if (state.asLineRule) { // 2. if this is a line rule ... this.debug(state, this.line_parent_iterate, breakpoint); return; } if ( breakpoint & ( ParserDebug.LINE_RULE_FAIL | ParserDebug.RULE_FAIL) ) { this.debug(state, this._and_done, breakpoint); return; } this.debug(state, this.top_rule_done, breakpoint); } // rule_done top_rule_done(state:RuleState):void { Assert.assert( !state.asChild && !state.asLineRule ); if ( !this.eof() ) { // rule has succeeded, match trailing secondary parser (ie properly managed ws whitespace) var secondaryLineResults:boolean = this.matchSecondaryGrammar(state, true) // there is no longer a post option if (secondaryLineResults) { this.debug(state, this.top_rule_done, ParserDebug.SECONDARY_LINE_DONE); return; } if (state.consumeUntilEOF && !this.eof()) { if (!this.done() ) { //trace(' unexpected text after completion "' + value + '"'); state.appendError(GErr.TEXT_AFTER_COMPLETION, state, state.element, false, "unexpcted text at eof \"" + this.value + "\"" ); this.debug(state, null, ParserDebug.DONE_WITH_UNMATCHED_TEXT); return; } } } // 4. if there is no parent, then we're done this._and_done(state); } // rule_dones private line_parent_iterate(state:RuleState):void { var parentState:RuleState = state.parentRuleState; this.line_iterate(parentState); } // line_parent_iterate private _and_done(state:RuleState):void { Assert.assert(!state.asChild && !state.asLineRule); // 2. to parent element or line, this is the top level if (this.autoObjectMap) { this._objectMapper.mapValues(state.ruleResult, state.rule, null, null, true /* isRoot */); } this.debug(state, null, ParserDebug.DONE); } // _and_done /** * Iterate on on the line rules of a given rule. * *

called recursively after matching a line

* * @param state the state of the parent rule. */ private line_iterate(state:RuleState):void { // r:PRule, indent:string , parentResult:PRuleResult, parentIsLineRule:boolean):PRuleResult { // 1. extract a seconday grammar (ie comments, whitespace, stuff we want to keep out of the primary grammar) var secondaryResult:boolean = this.matchSecondaryGrammar(state, true );// TODO - state is the same state used by the parent rule if (secondaryResult) { this.debug(state, this.line_iterate, ParserDebug.SECONDARY_LINE_DONE); return; } // 2. initialise if iteration hasn't begun yet if (state.lineRuleIndex < 0) { if (!this.eof() /*&& state.p.eq(state.currentLineIndent) */) { this.pos = 0; } else { // we have no indent (or eof) , but we have non-optional line rules, then fail if (state.rule.lineRules) { var lr:GRule for (lr of state.rule.lineRules) { if (!(lr.child(0).lineAllowNone) ) { state.appendError(GErr.EOF, state, lr.child(0), false, "end of file /w pending line rules "); this.rule_fail(state); return; } } } // if all child rules are optional this.rule_done(state, ParserDebug.PARENT_LINE_DONE); return; // } state.incrementLineRule(); } // 3. increment, but remembering that if (state.lineIncrementNotPending() ) { // this encapsulates state's cardinality etc //if (state.lineAllowMany) { // state.lineCollectionElementIndex++; //} } else { state.incrementLineRule(); } // 4. perform iteration on line rule if (!this.eof() && state.lineRuleIndex < state.rule.lineRules.length) { // note: we're createing a new child state for each line, even if the line rule repeats. Which is unnecessary var child:RuleState = RuleState.createForLineRuleFromParent(state, state.currentLineRule); state.lineRuleResult = child.ruleResult; // --> here we might, plausibly the child rule, remove it later if necessary -- Assert.assert(state.currentLineRuleState == child); Assert.assert(child.rule == state.rule.lineChild(state.lineRuleIndex) ); // parse rule //trace('> iterating on child rule ' + state.lineRuleIndex.toString() ); Assert.assert(child.parentRuleState == state); // used in rule_done this.rule_iterate(child); return; } // 5. upon no more rules to iterate upon if (this.eof() || state.lineRuleIndex >= state.rule.lineRules.length) { // if (!state.currentLineRule == null) //ParseDebugUtil.traceState(state); if (!state.ruleResult.isValid()) { // //trace(' child line must have failed'); } this.rule_done(state, ParserDebug.PARENT_LINE_DONE); return; } // shouldn;'t reach here Assert.fail(); } // lineRule /** * TODO - see if this can't be gotten rid of. Most of the functionality is moved to element_process. * Called on the parent rule state of a child line rule * @return false if the the line hasn't been processed - ie it has failed but was optional. */ private lineProcess(state:RuleState):boolean { if (state.lineRuleResult && state.lineRuleResult.isValid()) { // 1. we have a valid line rule matched // 1a. check assumptions that the parser constrains a line rule to a single element /w a child rule itself Assert.assert(state.lineRuleResult.results.length == 1 || (state.lineRuleResult.results.length > 1 && state.currentLineRule.lineRules.length > 0 ) ); // FIX_THIS - far too low level coupling to lower level data structures // (note here it's lineRules.length > 0 because we'll have already removed failing elements ) state.foundOneLineRule = true; } else { // 2. match has failed. state.lineMatchFailed = true; if (state.lineAllowNone || (state.lineAllowMany && state.foundOneLineRule) ) { // 2a. line rule has failed, but it's optional so we can reset (the line position) and continue; this.pos = 0; // TODO - move this elsewhere return false; } else { // 2b.line is not optional so entire rule fails to match to add error (for debug) and fail. if (state.line0 != state.line1) { // temporary data integrity check if (!(state.ruleResult.preResultsLength() > 0)) { Assert.assert(state.line0 == state.line1); // relax this with --> mechanism } } this.resetRuleToLine0(state); return true; } } return true; } private rule_fail(state:RuleState):void { // if parent element could do something here do something here state.ruleResult.lineIterationInProgress = false; if (!state.asChild || state.asLineRule) { var resultAppended:boolean = true; if (state.asLineRule) { // result will be appended only if this is a genuine failure (and not merely an unmatched optional line) // (which amounts to the implementation of an unmatched line as a #cutpoint) resultAppended = this.doLineProcess(state); } this.rule_done(state, resultAppended ? ParserDebug.LINE_RULE_FAIL // result appended - which means that the rule the line has failed. : ParserDebug.RULE_FAIL ); // not appended //rule_done(state, state.asLineRule && resultAppended ? ParserDebug.LINE_RULE_FAIL : ParserDebug.LINE_RULE_SKIP ); } else { if (state.cutpointHit()) { state.markAsCutpointInvalid(); } this.rule_done(state, ParserDebug.RULE_FAIL); } } // rule_fail /** * Helper that maps values to a rule following an output */ private _objectMapper:ObjectMapper = new ObjectMapper; /** * returns the union of two token vectors */ private tempMergeTokens(v1:Array, v2:Array):Array { if (v1 && v2) { var i:Number; var token:string; var out:Array = []; for (token of v1) { out.push(token); } for (token of v2) { if (out.indexOf(token) < 0) { out.push(token); } } return out; } return v1 ? v1 : v2 } // tempMergeTokens /** * Iterates on a single element */ private element_iterate(state:ElementState):void { var e:GElement = state.element; // local var for convenience only // FIX_THIS - duplication of code. Previously, new lines are only done at the end of rules if (state.element.elementPermitsNL() ) { if ( this.done() ) { // BUG - this break incremental parsing & tokeenising. this.nextLine(); var requiredIndent:string = state.parentRuleState.indent + (state.element.elementNLIndent ? SeepParserDefs.INDENT : ""); if (requiredIndent && !this.eq(requiredIndent)) { state.setError( GErr.NO_INDENT, false, "indent not matched"); this.element_done(state); return; } } } // 1. match prefix if (!state.prefixProcessed) { state.elementResult = new PElementResult; state.elementResult.element = state.element; state.elementResult.startLineIndex = state.line0; state.parentRuleState.elementResult = state.elementResult; // ----> here we need to append the element //ParseDebugUtil.traceState(state); state.parentRuleState.appendElementResult(state.elementResult); if (state.parentRuleState.asLineRule) { //trace(state.parentRuleState.rule.name); //trace(state.parentRuleState.parentRuleState.rule.name); //state.parentRuleState.parentRuleState.lineRuleResult = state.parentRuleState.ruleResult; Assert.assert(state.parentRuleState.parentRuleState.lineRuleResult == state.parentRuleState.ruleResult); state.parentRuleState.parentRuleState.appendLine(state.parentRuleState.parentRuleState.lineRuleResult); } // ----> var prefixOK:boolean = this.elementMatchPrefix(state); // --> loops back to iterate_element , or element_... state.prefixProcessed = true; if (!prefixOK) { this.element_done(state); return; } var elementResult:PElementResult = state.elementResult; elementResult.tokenPrefixIndex = ( !(state.isFirst() && state.element.filterList) && state.element.prefixTokenValue ) ? state.pos0 : -1; elementResult.startIndex = state.pos0; elementResult.endIndex = this.pos; if (state.element.filterList && state.element.elementPermitsNL() ) { this.wst(); if (this.done() ) { this.nextLine(); // FIX_THIS - duplicates above code var requiredIndentt:string = state.parentRuleState.indent + (state.element.elementNLIndent ? SeepParserDefs.INDENT : ""); if (requiredIndent && !this.eq(requiredIndentt)) { state.setError(GErr.NO_INDENT, false, "indent not matched" ); this.element_done(state); return; } } } } // 0. can't match if end of file if (this.eof()) { state.setError(GErr.EOF, false, "eof encountered"); // FIX_THIS - this is almost certainly the wrong element this.element_child_rule_process(state); return; } if (e.childRule) { // 2. recurse on child rule Assert.assert(e.contentTokens == null); if (e.prefixTokenValue) { // allow ws between token and child rule ie X:rule --> " X chidl content" this.wst(); // eventually allow token to specify this ie token: X.: X } // else { // if (wst()) { // state.elementResult = new PElementResult(e, false, "can't have ws here"); // element_done(state); // } //} state.contentStart = this.pos; state.childRuleState = RuleState.createFromParentElement(state); state.childResult = state.childResult; state.elementResult.childResult = state.childRuleState.ruleResult; Assert.assert(state.parentRuleState == state.childRuleState.parentElementState.parentRuleState); this.debug(state.childRuleState, this.rule_iterate, ParserDebug.RULE_BEFORE); return; }// else { // only other options is simple leaf. this.elementMatchLeaf(state); this.debug(state, this.element_done, ParserDebug.ELEMENT_LEAF_POST); return; //} } // element_iterate private elementMatchLeaf(state:ElementState):void { var e:GElement = state.element; if (e.consumeAll()) { // 1. we've got a (var)$ construct, which greedily consumes everything (including ws) the next element token Assert.assert(!e.asTreeNode()); // element is suffixed by "$" ==> greedily consume everything until next token state.contentStart = this.pos; if (e.suffixTokenValue) { state.varStr = this.toToken([e.suffixTokenValue], false /* consume quotes and all */); state.contentEnd = this.pos; if (!this.eq(e.suffixTokenValue)) { this.resetElementToPos0(state); // FIX_THIS - duplicates the below - to clean up when enhancing error mechanism state.setError(GErr.BAD_SUFFIX, false, "expecting (suffix) character: " + e.suffixTokenValue, state.childResult); // ---------> expecting suffix token return; } } else { // TO_OPTIMIZE - cache this var mergedNextTokens:Array = this.tempMergeTokens(e.possibleNextTokens, e.nextElementTokens); if (mergedNextTokens) { state.varStr = this.toToken(mergedNextTokens, false /* allow quotes*/, true /*ignoreWS */); } else { state.varStr = this.rest(); // just consume all } state.contentEnd = this.pos; } } else if ((e.varName) && !e.filterExists) { // 2. if we have a (var) construct, extract text if (e.prefixTokenValue && (e.varName && !e.asTree) ) { this.wst(); // if token allows ws } if (!e.contentTokens) { state.contentStart = this.pos; } if (e.contentTokens) { // 2a. token declaration has sepecified explicit content // note - content token extraction moved to prefix extraction (event though strictly speaking it's content, not prefix) state.contentEnd = this.pos; } else if (e.suffixTokenValue) { // 2b. given a suffix token, match everything unti that if (!e.asTreeNode()) { // only match content if not tree node state.varStr = this.toToken([" ", e.suffixTokenValue], true /*allow quotes*/); // TO_OPTIMIZE } } else if ((e.varName) && !e.filterExists) { // 2c. basic content extraction, no suffix token if (!e.asTreeNode() || e.nameIsRef ) { state.varStr = this.qtext(true); // by default attempt to extract quoted text first if (!e.possibleNextTokens) { //trace(' bug - no possible next tokens '); } if (state.varStr == null) { // no quoted text, match until next possible tokens state.varStr = this.toToken(e.possibleNextTokens, true); // ie --var-name // --varName|filter etc (and recall that this will include ws) } if (!state.varStr && !e.prefixTokenValue) { // 2d. no text matched Assert.assert(this.pos == state.pos0); Assert.assert(this.currentLineIndex == state.line0); state.setError(GErr.NO_TEXT, false, "no text"); // or return null return; } } } state.contentEnd = this.pos; // 3. extract suffix token if (e.suffixTokenValue) { this.wst(); state.suffixStart = this.pos; if ( !this.eq(e.suffixTokenValue) ) { state.setError(GErr.BAD_SUFFIX,false, "expecting (suffix) character: " + e.suffixTokenValue); return; } } } // basic content extraction //4. some last minute processing state.varStr = state.varStr && state.varStr.length > 0 ? state.varStr : null; // prevent matching "" // 5. ensure something has matched if (!state.varStr && !e.prefixTokenValue && !e.asTreeNode()) { state.setError(GErr.NOTHING_MATCHED, false, "nothing matched" ); return; } this.elementSuccess(state); } // elementMatchLeaf private elementMatchPrefix(state:ElementState):boolean { var e:GElement = state.element; // 1. match token var prefixDone:boolean = !e.prefixTokenValue || (state.isFirst() && e.filterList) || this.eq(e.prefixTokenValue, false); // the "list" filter implements, for instance, a CSV, and just means the the prefix token (ie ',') doesn't apply to the first match. // note that it's important that the check for the "list" filter perceeds the eq(); // 2. failted to match prefix if (!prefixDone) { // || (!state.prefixDone && done()) ) { state.setError(GErr.BAD_PREFIX, false, "expecting (prefix) character: " + e.prefixTokenValue); return false; } var matchedTokens:string; if (prefixDone && e.contentTokens) { if (e.prefixTokenValue) { this.wst(); } Assert.assert(!e.childRule); // assumption of content tokens state.contentStart = this.pos; // TODO ie we want content tokens to match if (e.tokenRef.multiTokens) { // [ X, Y , Z]* // match as many tokens as are present matchedTokens = this.whileTokens(e.contentTokens, true); } else { matchedTokens = this.whileTokens(e.contentTokens, false); } state.varStr = matchedTokens; if (matchedTokens == null || matchedTokens.length == 0) { var missingTokens:string; if (e.contentTokens) { var t:string for (t of e.contentTokens) { missingTokens += t + " " } } state.setError(GErr.BAD_CONTENT_TOKENS, false, " expecting content tokens " + missingTokens +"\""); //element_done(state); return false; } state.contentEnd = this.pos; } // 3. In some situations, we can check for // a) no prefix token // b) no content tokens / content tokens not matched // c) no child rule (because possible next tokens not calcuated for hierarchical rule) // d) element is not a (leaf) tree element ( ie ^(var) ), as such nodes can have 0 length // d) precalcualted next tokens present // TO_OPTIMIZE - optimization here. if e.skipToken was on by default, we could potentially very substantially // speed up parsing b/c we know what patterns will cause all child expressions to fail if ( !e.prefixTokenValue && !matchedTokens && e.nextElementTokens && (!e.childRule || (e.skip2 )) && !e.asTreeNode() ) { // if there's no prefix of content tokens matched, check for collisions /w future tokens Assert.assert(!e.suffixTokenValue); // implicit assumpution var foundNextElementToken:number = this.matchFromList(e.nextElementTokens, false/* don't conumse*/); if (foundNextElementToken >= 0) { var offendingToken:string = e.nextElementTokens[foundNextElementToken]; //trace(" X <---- failed due to collision /w token \"" + offendingToken +"\""); state.setError(GErr.ELEMENT_TOKEN_COLLISION, false, "failed due to collision /w token \"" + offendingToken +"\""); // TO_OPTIMIZE - should be able to pre-construct a reference to the element of the colission token // and use this to know where to jump to in the parsing, rather than go throught the grammar //element_done(state); return false; } } // 1. prefix has matched ok //element_iterate(state); return true; } // elementMatchPrefix private element_child_rule_process(state:ElementState):void { // having processsed a child rule, we need to look at state.childResult, a // and if we have an error, // a) appropriately populate state.elementResult /w an error element // b) roll back parser position state.childResult = state.childRuleState.ruleResult; state.contentEnd = this.pos; if (!state.childResult) { // 1. child rule simply doesn't match. this.resetElementToPos0(state); state.elementResult = null; this.element_done(state); return; } else if (!state.childResult.isValid() ) { // 2. match but not valid var childError:string = state.childResult.quickErrorStr(); this.resetElementToPos0(state); if (state.childResult.matchedElements > 0) { // 2a. partial match (possibility of keeping this for debug) if (state.elementResult.cutpointFailElement) { var cutpointElement:GElement = state.elementResult.cutpointFailElement.element; state.setError(GErr.EXPLICIT_CUTPOINT, false, "cutpoint error: " + cutpointElement.cutpointMsg, state.childResult ); // if something has matched then return an error } else { state.setError(GErr.ELEMENT_CHILD_FAILED, false, "composite rule not matched: " + childError, state.childResult ); // if something has matched then return an error } this.element_done(state); return; } else { // 2a. no elements match state.setError(GErr.NO_CHILD_ELEMENTS_MATCHED, false, "no child elements matched", state.childResult); this.element_done(state); return; // but if nothing has matched, return null } } // 3.we have a valid result, now match suffix token following child rule if (state.element.suffixTokenValue != null) { if (state.element.contentTokens) { this.wst(); } state.suffixStart = this.pos; if (!this.eq(state.element.suffixTokenValue)) { //pos = state.pos0; this.resetElementToPos0(state); state.setError(GErr.BAD_SUFFIX, false, "expecting (suffix) character: " + state.element.suffixTokenValue, state.childResult); // ---------> expecting suffix token this.element_done(state); return; } } this.elementSuccess(state); this.element_done(state); } // elementChildRuleProcess private elementSuccess(state:ElementState):void { Assert.assert(state.elementResult != null); var e:PElementResult = state.elementResult; e.element = state.element; e.valid = true; var result:PElementResult = state.elementResult // indexes result.tokenPrefixIndex = ( !(state.isFirst() && state.element.filterList) && state.element.prefixTokenValue ) ? state.pos0 : -1; result.contentIndex = (state.contentEnd > state.contentStart) ? state.contentStart : -1; result.contentEndIndex = (state.contentEnd > state.contentStart) ? state.contentEnd -1 : -1; result.tokenSuffixIndex = state.suffixStart; result.endIndex = (this.pos > state.pos0) ? this.pos -1 : -1; result.startIndex = result.tokenPrefixIndex >= 0 ? result.tokenPrefixIndex // a convenience value : result.contentIndex >= 0 ? result.contentIndex : result.tokenSuffixIndex >= 0 ? result.tokenSuffixIndex : -1; // content // result.childResult = state.childResult; Assert.assert(result.childResult == state.childResult); Assert.assert(result.startLineIndex == state.line0); Assert.assert(result.element == state.element); result.varStr = state.varStr; } // elementSuccess // ------------- util ---------------- private ___deprecated___createElementResult(errCode:number, state:ParserState, element:GElement, valid:boolean = true, err:string = null, childResult:PRuleResult = null):PElementResult { var e:PElementResult = new PElementResult(); // element, valid, err, childResult); e.element = element; e.valid = valid; e.err = err; e.childResult = childResult; e.errCode = errCode; // the awkwardnewss here is that we don't create the element at the bedinning of the match, if (state.isElementState()) { // s ElementState) { var estate:ElementState = state as ElementState; e.startIndex = estate.pos0; e.startLineIndex = this.currentLineIndex; } else { var rState:RuleState = state as RuleState; e.startIndex = rState.posX >= 0 ? rState.posX : rState.pos0; e.startLineIndex = this.currentLineIndex // state.line0; } Assert.assert(errCode == GErr.NOT_AN_ERROR || !valid); return e; } // createElementResult /** * Formalised consumption for a state. ie when an element - or even an indentation is matched, call this * to ensure that if the next element fails, it won't backtrack */ private advanceRuleConsmeToPosX(state:RuleState):void { state.posX = this.pos; state.lineX = this.currentLineIndex; } // advanceRulePos0 private advanceLineRuleX(state:RuleState):void { state.lineRuleLineX = this.currentLineIndex; } /** * reset position when an entire line has failed (ie line rule fails) */ private resetRuleToLine0(state:RuleState):void { this.setPosition(state.line0, 0); // reset position } /** * when an element has failed */ private resetElementToPos0(state:ElementState):void { this.setPosition(state.line0, state.pos0); } // resetPosition private resetRuleToPosX(state:RuleState):void { //Assert.assert(state.line0 == state.lineX); //Assert.assert(state.posX == state.pos0); this.setPosition(state.lineX, state.posX); } // resetPosition // -------- manage pause for debug ------- private _state:ParserState; private paused:boolean; private localPause:boolean = false; debug(state:ParserState, done:Function, breakpoint:number):boolean { state.breakpoint = breakpoint; state.continueFn = done; // note that this is a simple pointer to function, not a closure // 1. if an external parser is present, it may call for an asynchronous pause if (this.debugger) { this.paused = this.debugger.debug(state); } else { this.paused = false; } // 2. if an external debugger hasn't called for an asynchronous pause, in order to keep the recursion depth // low, we may need to stop execution, and allow the parent execute loop to call the next function. // execution goes up the call stack, and then calls the next function. if (!this.paused) { // preserver state to be resumed this._state = state; this.localPause = true; // (breakpoint & ( ParserDebug.ELEMENT_LEAF_POST | ParserDebug.RULE_BEFORE)) != 0 if (!this.localPause) { this.resumeLocal(); } } else { // if debugger has invoked a pause, don't locall cache state this._state = null; this.localPause = false; } return this.paused; } // debug private resumeLocal():void { Assert.assert(this.localPause); this.localPause = false; if (this._state.continueFn != null) { this._state.continueFn.call(this, this._state); } } // resume resume(state:ParserState):void { this,state.continueFn.call(this,state); while (this.localPause) { this.resumeLocal(); } } // resume } // class