atok 0.4.3

Atok

An atok stream

clear

Atok.prototype.clear()

method

Params

  • keep - rules set (default=false)

Reset the tokenizer by clearing its buffer and rules

Source

Atok.prototype.clear = function (keepRules) {
// include("Atok_properties.js")
  // Public properties
  this.buffer = null
  this.length = 0
  this.offset = 0
  this.markedOffset = -1    // Flag indicating whether the buffer should be kept when write() ends

  // Private properties
  this._firstRule = null        // Initial rule to be triggered
  this._resetRule = false       // Rule set was changed
  this._stringDecoder = this._encoding ? new StringDecoder(this._encoding) : null
  this._rulesToResolve = false  // Rules need to be resolved (continue() prop)
  this._rulesToLink = false     // Rules need to be relinked (after a rule set change)
  this._group = -1
  this._groupStart = 0
  this._groupEnd = 0
  this._groupStartPrev = []


  if (!keepRules) {

    this._rules = []              // Rules to be checked against
    this._defaultHandler = null   // Matched token default handler
    this._savedRules = {}         // Saved rules

  }

  this.clearProps()

  return this
}

slice

Atok.prototype.slice()

method

Params

  • starting - index
  • ending - index

Extract data from the buffer (Atok#slice)

Source

Atok.prototype.slice = function (start, end) {
  return this.buffer.slice(start, end)
}

flush

Atok.prototype.flush()

method

Terminate the current tokenizing and return the current buffer

Source

Atok.prototype.flush = function () {
  var data = this.slice()
  
  this.clear(true) // Keep rules!

  return data
}

setEncoding

Atok.prototype.setEncoding()

method

Params

  • encoding - to be used

Set the string encoding

Source

Atok.prototype.setEncoding = function (enc) {
  switch ( String(enc) ) {
    case 'null':
    case 'undefined':
      this._encoding = null
    break
    case 'UTF-8':
    case 'utf-8':
    case 'utf8':
    default:
      this._encoding = 'UTF-8'
  }
  this._stringDecoder = this._encoding
    ? new StringDecoder(this._encoding)
    : null

  return this
}

debug

Atok.prototype.debug()

method

Params

  • toggle - debug mode on and off

Turn debug mode on or off. Emits the [debug] event. The #loadRuleSet method is also put in debug mode. All handlers log their arguments.

Source

Atok.prototype.debug = function (flag) {
  var _debug = !!flag

  // Nothing to do if already in same mode
  if (_debug === this.debugMode) return this
  this.debugMode = _debug

  // Apply debug mode to all defined rules...
  var self = this
  this._rulesForEach(function (rule) {
    rule.setDebug(_debug, self)
  })

  // Apply debug mode to some methods
  ;[ 'loadRuleSet' ].forEach(function (method) {
    if (_debug) {
      var prevMethod = self[method]

      self[method] = function () {
        self.emit_debug( 'Atok#', method, arguments )
        return prevMethod.apply(self, arguments)
      }
    } else {
      // Restore the prototype method
      delete self[method]
    }
  })

  return this
}

currentRule

Atok.prototype.currentRule()

method

Get the current rule set name

Source

Atok.prototype.currentRule = function () {
  return this._firstRule ? this._firstRule.currentRule : null
}// include("methods_ruleprops.js")

setDefaultHandler

Atok.prototype.setDefaultHandler()

method

Params

  • number, - string|null)} rules handler (it is better to name it for debugging)

Set the default handler. Triggered on all subsequently defined rules if the handler is not supplied

Source

Atok.prototype.setDefaultHandler = function (handler) {
  this._defaultHandler = typeof handler === 'function' ? handler : null
  return this
}

next

Atok.prototype.next()

method

Params

  • name - of the rule set to load if rule successful
  • index - to start at

Skip matched data silently for all subsequent rules

Source

Atok.prototype.next = function (ruleSet, index) {
  this._p_next = typeof ruleSet === 'string' ? ruleSet : null
  this._p_nextIndex = typeof index === 'number' ? index : 0
  return this
}

ignore

Atok.prototype.ignore()

method

Params

  • flag -

Skip matched data silently for all subsequent rules

Source

Atok.prototype.ignore = function (flag) {
  this._p_ignore = (flag === true)
  return this
}

quiet

Atok.prototype.quiet()

method

Params

  • flag -

Do not supply matched data to the handler for all subsequent rules. This is used when the token data does not matter but a handler still needs to be called. Faster than standard handler call.

Source

Atok.prototype.quiet = function (flag) {
  this._p_quiet = (flag === true)
  return this
}

trimLeft

Atok.prototype.trimLeft()

method

Params

  • flag -

Remove the left matched pattern for all subsequent rules

Source

Atok.prototype.trimLeft = function (flag) {
  this._p_trimLeft = (flag === true)
  return this
}

trimRight

Atok.prototype.trimRight()

method

Params

  • flag -

Remove the right matched pattern for all subsequent rules If only 1 pattern, it is ignored

Source

Atok.prototype.trimRight = function (flag) {
  this._p_trimRight = (flag === true)
  return this
}

trim

Atok.prototype.trim()

method

Params

  • flag -

Remove the left and right matched patterns for all subsequent rules

Source

Atok.prototype.trim = function (flag) {
  return this.trimLeft(flag).trimRight(flag)
}

escape

Atok.prototype.escape()

method

Params

  • flag -

Do not remove the left and right matched patterns for all subsequent rules The default escape character is , can be changed by specifying it instead of a Boolean

Source

Atok.prototype.escape = function (flag) {
  this._p_escape = flag === true
    ? '\\'
    : flag && flag.length > 0
      ? flag.toString(this._encoding || 'utf8').charAt(0)
      : false
  return this
}

continue

Atok.prototype.continue()

method

Params

  • number - of rules to skip before continuing
  • when - the rule fails, number of rules to skip before continuing

Continue the rules flow if rule matches at the specified rule index

Source

Atok.prototype.continue = function (jump, jumpOnFail) {
  if (arguments.length === 0) {
    this._p_continue = null
    this._p_continueOnFail = null

    return this
  }

  if ( jump !== null && !/(number|string|function)/.test(typeof jump) )
    this._error( new Error('Atok#continue: Invalid jump (must be an integer/function/string): ' + jump) )
  
  if (arguments.length === 1)
    jumpOnFail = null
  else if ( jumpOnFail !== null && !/(number|string|function)/.test(typeof jumpOnFail) )
    this._error( new Error('Atok#continue: Invalid jump (must be an integer/function/string): ' + jumpOnFail) )
  
  this._p_continue = jump
  this._p_continueOnFail = jumpOnFail

  return this
}

break

Atok.prototype.break()

method

Abort a current rule set. Use continue(-1) to resume at the current subrule.

Source

Atok.prototype.break = function (flag) {
  this._p_break = (flag === true)
  return this
}

setProps

Atok.prototype.setProps()

method

Params

  • properties - to be loaded

Restore properties

Source

Atok.prototype.setProps = function (props) {
  var propNames = Object.keys(props || {})

  for (var prop, i = 0, n = propNames.length; i < n; i++) {
    prop = propNames[i]
    if ( this.hasOwnProperty('_p_' + prop) )
      switch (prop) {
        // Special case: continue has 2 properties
        case 'continue':
          this._p_continue = props[ prop ][0]
          this._p_continueOnFail = props[ prop ][1]
        break
        // Special case: next has 2 properties
        case 'next':
          this._p_next = props[ prop ][0]
          this._p_nextIndex = props[ prop ][1]
        break
        default:
          this[ '_p_' + prop ] = props[ prop ]
      }
  }

  return this
}

clearProps

Atok.prototype.clearProps()

method

Reset properties to their default values

Source

Atok.prototype.clearProps = function () {
// include("Atok_rule_properties.js")
  this._p_ignore = false        // Get the token size and skip
  this._p_quiet = false         // Get the token size and call the handler with no data
  this._p_escape = false        // Pattern must not be escaped
  this._p_trimLeft = true       // Remove the left pattern from the token
  this._p_trimRight = true      // Remove the right pattern from the token
  this._p_next = null           // Next rule to load
  this._p_nextIndex = 0         // Index for the next rule to load
  this._p_continue = null       // Next rule index to load
  this._p_continueOnFail = null // Next rule index to load when rule fails
  this._p_break = false         // Abort current rule set
  return this
}

getProps

Atok.prototype.getProps()

method

Reset properties to their default values

Source

Atok.prototype.getProps = function () {
  // Empty object with no prototype
  var props = Object.create(null)
  var propNames = arguments.length > 0
        ? sliceArguments(arguments, 0)
        : this._defaultProps

  for (var prop, i = 0, num = propNames.length; i < num; i++) {
    prop = propNames[i]
    if ( this.hasOwnProperty('_p_' + prop) )
      switch (prop) {
        // Special case: continue has 2 properties
        case 'continue':
          props[ prop ] = [ this._p_continue, this._p_continueOnFail ]
        break
        // Special case: next has 2 properties
        case 'next':
          props[ prop ] = [ this._p_next, this._p_nextIndex ]
        break
        default:
          props[ prop ] = this[ '_p_' + prop ]
      }
  }

  return props
}
// include("methods_ruleset.js")

addRuleFirst

Atok.prototype.addRuleFirst()

method

Params

  • name - of the rule to be added first
  • rule - item
  • rule - type

Add a rule as the first one

Source

Atok.prototype.addRuleFirst = function (rule,

addRuleBefore

Atok.prototype.addRuleBefore()

method

Params

  • name - of the rule to add before
  • rule - item
  • rule - type

Add a rule before an existing one

Source

Atok.prototype.addRuleBefore = function (existingRule, rule,

addRuleAfter

Atok.prototype.addRuleAfter()

method

Params

  • name - of the rule to add after
  • rule - item
  • rule - type

Add a rule after an existing one

Source

Atok.prototype.addRuleAfter = function (existingRule, rule,

addRule

Atok.prototype.addRule()

method

Params

  • match - at current buffer position (String: expect string, Integer: expect n characters, Array: expect one of the items). If not needed, use ''
  • rule - name/id (if no default handler set, emit a data event) or handler (executed when all matches are valid). If false, the rule is ignored.

Add a rule

Source

Atok.prototype.addRule = function (

removeRule

Atok.prototype.removeRule()

method

Params

  • name - of the rule to be removed

Remove a rule (first instance only)

Source

Atok.prototype.removeRule = function (

clearRule

Atok.prototype.clearRule()

method

Remove all rules

Source

Atok.prototype.clearRule = function () {
  this.clearProps()
  this._firstRule = null
  this._rules = []
  this._defaultHandler = null
  this._rulesToResolve = false

  return this
}

saveRuleSet

Atok.prototype.saveRuleSet()

method

Params

  • name - of the rule set

Save all rules and clear them

Source

Atok.prototype.saveRuleSet = function (name) {
  if (arguments.length === 0 || name === null)
    return this._error( new Error('Atok#saveRuleSet: invalid rule name supplied') )

  this._savedRules[name] = {
    rules: this._rules
      .map(function (rule) {    // Clone and assign the current rule set name
        return rule.clone(name)
      })
  }

  // Resolve and check continues
  this._resolveRules(name)

  return this
}

loadRuleSet

Atok.prototype.loadRuleSet()

method

Params

  • name - of the rule set
  • index - to start at

Load a rule set

Source

Atok.prototype.loadRuleSet = function (name, index) {
  var ruleSet = this._savedRules[name]
  if (!ruleSet)
    return this._error( new Error('Atok#loadRuleSet: Rule set ' + name + ' not found') )

  index = typeof index === 'number' ? index : 0

  this._rules = ruleSet.rules
  // Set the rule index
  this._firstRule = this._rules[index]
  this._resetRule = true

  return this
}

removeRuleSet

Atok.prototype.removeRuleSet()

method

Params

  • name - of the rule set

Delete a rule set

Source

Atok.prototype.removeRuleSet = function (name) {
  delete this._savedRules[name]

  return this
}

groupRule

Atok.prototype.groupRule()

method

Params

  • toggle - grouping on/off

Bind rules to the same index

Source

Atok.prototype.groupRule = function (flag) {
  var rules = this._rules

  if (flag) {
    this._group++
    this._groupStartPrev.push(this._groupStart)
    this._groupStart = rules.length

    return this
  }

  // Ignore invalid groupRule()
  if (this._group < 0) return this
  
  // 1 or 0 rule within the group, ignored it
  if (rules.length - this._groupStart < 2) {
    for (var i = this._groupStart, n = rules.length; i < n; i++) {
      rules[i].group = -1
      rules[i].groupStart = 0
      rules[i].groupEnd = 0
    }
  } else {
    // Set the last index of the group to all rules belonging to the current group
    for (var i = this._groupStart, n = rules.length; i < n; i++)
      if (rules[i].group === this._group)
        rules[i].groupEnd = n - 1
  }

  this._group--
  this._groupStart = this._groupStartPrev.pop() || 0
  this._groupEnd = 0

  return this
}

write

Atok.prototype.write()

method

Params

  • data - to be processed

Applies the current rules to the incoming data. When false is returned (the tokenizer is paused), the data is buffered but no processing occurs until the tokenizer is resumed.

Source

Atok.prototype.write = function (data) {
  if (this.ended) {
    this._error( new Error('Atok#write: write after end') )
    return false
  }

  if (!data || data.length === 0) return true

  // Setting the encoding by default when receiving a string
  if ( typeof data === 'string' && !this._encoding ) this.setEncoding('utf-8')

  // Buffer the incoming data...
  if (this.length > 0) {
    // Process strings and Buffers separately
    if ( this._encoding ) {
      this.buffer += this._stringDecoder.write( data.toString() )
    } else {
      this.buffer = this.buffer.concat(data)
      // this.buffer = Buffer.concat( [ this.buffer, data ], this.length )
    }
  } else {
    this.buffer = this._encoding ? data.toString() : data
  }
  this.length = this.buffer.length

  // Check rules resolution (pause __can__ be called before write)
  if (this._rulesToResolve) this._resolveRules() // Does linking too
  // No resolution but linking may be required
  else if (this._rulesToLink) this._linkRules()

  // ... hold on until tokenization completed on the current data set
  // or consume the data
  if (this.paused) {
    this.needDrain = true
    return false
  }

  return this._tokenize()
}

end

Atok.prototype.end()

method

Params

  • data - to be processed

Ends the stream and emit the end event. Any remaining data is passed to the listeners.

Source

Atok.prototype.end = function (data) {
  this.ending = true
  this.write(data)
  this.ended = true
  this.ending = false

  this.readable = false
  this.writable = false

  this._end()
  return this
}

pause

Atok.prototype.pause()

method

Pauses the stream - data is buffered until the stream is resumed with Atok#resume()

Source

Atok.prototype.pause = function () {
  this.paused = true
  return this
}

resume

Atok.prototype.resume()

method

Resumes the stream - buffered data is immediately processed

Source

Atok.prototype.resume = function () {
  this.paused = false
  return this._tokenize()
}

destroy

Atok.prototype.destroy()

method

Placeholder for Atok#destroy()

Source

Atok.prototype.destroy = function () {
  this.readable = false
  this.writable = false
}

_end

Atok.prototype._end()

method

End a stream by emitting the end event with remaining data

Source

Atok.prototype._end = function () {
  this.emit_end( this.buffer, -1, this.currentRule )
  this.clear()
}

_done

Atok.prototype._done()

method

End of Atok#write(): emit the drain event if required

Source

Atok.prototype._done = function () {
  if (this.needDrain) {
    this.needDrain = false
    this.emit_drain()
  }

  if (this.ended) {
    this._end()
    return false
  }

  return true
}

_tokenize

Atok.prototype._tokenize()

method

The core of Atok. Loops through the rules and check them against the data, calling handler or emitting the data event and branching appropriately.

Source

Atok.prototype._tokenize = function () {
  // NB. Rules and buffer can be reset by the token handler
  var p, props, matched
  var token

  p = this._firstRule
  this._resetRule = false

  while ( p && this.offset < this.length ) {
    props = p.props

    // Return the size of the matched data (0 is valid!)
    matched = p.test(this.buffer, this.offset)

    if ( matched < 0 ) {
      // End of the rule set, end the loop
      if (!p.nextFail) break

      // Next rule exists, carry on
      p = p.nextFail
      continue
    }

    // Is the token to be processed?
    if ( props.ignore ) {
      p = p.next
    } else {
      // Emit the data by default, unless the handler is set
      token = props.quiet
        ? matched - (p.single ? 0 : p.last.length) - p.first.length
        : this.buffer.slice(
            this.offset + p.first.length
          , this.offset + matched - (p.single ? 0 : p.last.length)
          )

      if (p.handler) p.handler(token, p.last.idx, p.type)
      else this.emit_data(token, p.last.idx, p.type)

      // Handler has changed rules, resolve and relink
      if (this._rulesToResolve) this._resolveRules()

      // RuleSet may have be changed by the handler
      if (this._resetRule) {
        this._resetRule = false
        p = this._firstRule
      } else {
        p = p.next
      }
    }

    this.offset += matched

    // NB. `break()` prevails over `pause()`
    if (props.break) break

    // Hold on if the stream was paused
    if (this.paused) {
      this._firstRule = p
      this.needDrain = true
      return false
    }
  }

  // Keep track of the rule we are at
  if (p) this._firstRule = p

  // Truncate the buffer if possible: min(offset, markedOffset)
  if (this.markedOffset < 0) {
    // No marked offset or beyond the current offset
    if (this.offset === this.length) {
      this.offset = 0
      this.buffer = null
      this.length = 0
      this.emit_empty(this.ending)

    } else if (this.offset < this.length) {
      this.buffer = this.buffer.slice(this.offset)
      this.length = this.buffer.length
      this.offset = 0

    } else {
      // Can only occurs if offset was manually incremented
      this.offset = this.offset - this.length
      this.buffer = null
      this.length = 0
    }

  } else {
    var maxOffset = 'markedOffset'
    var minOffset = 'offset'
    var _

    if (this.markedOffset < this.offset) {
      _ = maxOffset
      maxOffset = minOffset
      minOffset = _
    }

    if (this[minOffset] === this.length) {
      this[maxOffset] -= this[minOffset]
      this[minOffset] = 0
      this.buffer = null
      this.length = 0
      this.emit_empty(this.ending)

    } else if (this[minOffset] < this.length) {
      this[maxOffset] -= this[minOffset]
      this.buffer = this.buffer.slice(this[minOffset])
      this.length = this.buffer.length
      this[minOffset] = 0

    } else {
      // Can only occurs if offset was manually incremented
      this[maxOffset] -= this.length
      this[minOffset] -= this.length
      this.buffer = null
      this.length = 0
    }
  }

  return this._done()
}