Atok
An atok stream
clear
Atok.prototype.clear()
method
Params
- keep - rules set (default=false)
Reset the tokenizer by clearing its buffer and rules
Source
Atok.prototype.clear = function (keepRules) {
// include("Atok_properties.js")
// Public properties
this.buffer = null
this.length = 0
this.offset = 0
this.markedOffset = -1 // Flag indicating whether the buffer should be kept when write() ends
// Private properties
this._firstRule = null // Initial rule to be triggered
this._resetRule = false // Rule set was changed
this._stringDecoder = this._encoding ? new StringDecoder(this._encoding) : null
this._rulesToResolve = false // Rules need to be resolved (continue() prop)
this._rulesToLink = false // Rules need to be relinked (after a rule set change)
this._group = -1
this._groupStart = 0
this._groupEnd = 0
this._groupStartPrev = []
if (!keepRules) {
this._rules = [] // Rules to be checked against
this._defaultHandler = null // Matched token default handler
this._savedRules = {} // Saved rules
}
this.clearProps()
return this
}
slice
Atok.prototype.slice()
method
Params
- starting - index
- ending - index
Extract data from the buffer (Atok#slice)
Source
Atok.prototype.slice = function (start, end) {
return this.buffer.slice(start, end)
}
flush
Atok.prototype.flush()
method
Terminate the current tokenizing and return the current buffer
Source
Atok.prototype.flush = function () {
var data = this.slice()
this.clear(true) // Keep rules!
return data
}
setEncoding
Atok.prototype.setEncoding()
method
Params
- encoding - to be used
Set the string encoding
Source
Atok.prototype.setEncoding = function (enc) {
switch ( String(enc) ) {
case 'null':
case 'undefined':
this._encoding = null
break
case 'UTF-8':
case 'utf-8':
case 'utf8':
default:
this._encoding = 'UTF-8'
}
this._stringDecoder = this._encoding
? new StringDecoder(this._encoding)
: null
return this
}
debug
Atok.prototype.debug()
method
Params
- toggle - debug mode on and off
Turn debug mode on or off. Emits the [debug] event. The #loadRuleSet method is also put in debug mode. All handlers log their arguments.
Source
Atok.prototype.debug = function (flag) {
var _debug = !!flag
// Nothing to do if already in same mode
if (_debug === this.debugMode) return this
this.debugMode = _debug
// Apply debug mode to all defined rules...
var self = this
this._rulesForEach(function (rule) {
rule.setDebug(_debug, self)
})
// Apply debug mode to some methods
;[ 'loadRuleSet' ].forEach(function (method) {
if (_debug) {
var prevMethod = self[method]
self[method] = function () {
self.emit_debug( 'Atok#', method, arguments )
return prevMethod.apply(self, arguments)
}
} else {
// Restore the prototype method
delete self[method]
}
})
return this
}
currentRule
Atok.prototype.currentRule()
method
Get the current rule set name
Source
Atok.prototype.currentRule = function () {
return this._firstRule ? this._firstRule.currentRule : null
}// include("methods_ruleprops.js")
setDefaultHandler
Atok.prototype.setDefaultHandler()
method
Params
- number, - string|null)} rules handler (it is better to name it for debugging)
Set the default handler. Triggered on all subsequently defined rules if the handler is not supplied
Source
Atok.prototype.setDefaultHandler = function (handler) {
this._defaultHandler = typeof handler === 'function' ? handler : null
return this
}
next
Atok.prototype.next()
method
Params
- name - of the rule set to load if rule successful
- index - to start at
Skip matched data silently for all subsequent rules
Source
Atok.prototype.next = function (ruleSet, index) {
this._p_next = typeof ruleSet === 'string' ? ruleSet : null
this._p_nextIndex = typeof index === 'number' ? index : 0
return this
}
ignore
Atok.prototype.ignore()
method
Params
- flag -
Skip matched data silently for all subsequent rules
Source
Atok.prototype.ignore = function (flag) {
this._p_ignore = (flag === true)
return this
}
quiet
Atok.prototype.quiet()
method
Params
- flag -
Do not supply matched data to the handler for all subsequent rules. This is used when the token data does not matter but a handler still needs to be called. Faster than standard handler call.
Source
Atok.prototype.quiet = function (flag) {
this._p_quiet = (flag === true)
return this
}
trimLeft
Atok.prototype.trimLeft()
method
Params
- flag -
Remove the left matched pattern for all subsequent rules
Source
Atok.prototype.trimLeft = function (flag) {
this._p_trimLeft = (flag === true)
return this
}
trimRight
Atok.prototype.trimRight()
method
Params
- flag -
Remove the right matched pattern for all subsequent rules If only 1 pattern, it is ignored
Source
Atok.prototype.trimRight = function (flag) {
this._p_trimRight = (flag === true)
return this
}
trim
Atok.prototype.trim()
method
Params
- flag -
Remove the left and right matched patterns for all subsequent rules
Source
Atok.prototype.trim = function (flag) {
return this.trimLeft(flag).trimRight(flag)
}
escape
Atok.prototype.escape()
method
Params
- flag -
Do not remove the left and right matched patterns for all subsequent rules The default escape character is , can be changed by specifying it instead of a Boolean
Source
Atok.prototype.escape = function (flag) {
this._p_escape = flag === true
? '\\'
: flag && flag.length > 0
? flag.toString(this._encoding || 'utf8').charAt(0)
: false
return this
}
continue
Atok.prototype.continue()
method
Params
- number - of rules to skip before continuing
- when - the rule fails, number of rules to skip before continuing
Continue the rules flow if rule matches at the specified rule index
Source
Atok.prototype.continue = function (jump, jumpOnFail) {
if (arguments.length === 0) {
this._p_continue = null
this._p_continueOnFail = null
return this
}
if ( jump !== null && !/(number|string|function)/.test(typeof jump) )
this._error( new Error('Atok#continue: Invalid jump (must be an integer/function/string): ' + jump) )
if (arguments.length === 1)
jumpOnFail = null
else if ( jumpOnFail !== null && !/(number|string|function)/.test(typeof jumpOnFail) )
this._error( new Error('Atok#continue: Invalid jump (must be an integer/function/string): ' + jumpOnFail) )
this._p_continue = jump
this._p_continueOnFail = jumpOnFail
return this
}
break
Atok.prototype.break()
method
Abort a current rule set. Use continue(-1) to resume at the current subrule.
Source
Atok.prototype.break = function (flag) {
this._p_break = (flag === true)
return this
}
setProps
Atok.prototype.setProps()
method
Params
- properties - to be loaded
Restore properties
Source
Atok.prototype.setProps = function (props) {
var propNames = Object.keys(props || {})
for (var prop, i = 0, n = propNames.length; i < n; i++) {
prop = propNames[i]
if ( this.hasOwnProperty('_p_' + prop) )
switch (prop) {
// Special case: continue has 2 properties
case 'continue':
this._p_continue = props[ prop ][0]
this._p_continueOnFail = props[ prop ][1]
break
// Special case: next has 2 properties
case 'next':
this._p_next = props[ prop ][0]
this._p_nextIndex = props[ prop ][1]
break
default:
this[ '_p_' + prop ] = props[ prop ]
}
}
return this
}
clearProps
Atok.prototype.clearProps()
method
Reset properties to their default values
Source
Atok.prototype.clearProps = function () {
// include("Atok_rule_properties.js")
this._p_ignore = false // Get the token size and skip
this._p_quiet = false // Get the token size and call the handler with no data
this._p_escape = false // Pattern must not be escaped
this._p_trimLeft = true // Remove the left pattern from the token
this._p_trimRight = true // Remove the right pattern from the token
this._p_next = null // Next rule to load
this._p_nextIndex = 0 // Index for the next rule to load
this._p_continue = null // Next rule index to load
this._p_continueOnFail = null // Next rule index to load when rule fails
this._p_break = false // Abort current rule set
return this
}
getProps
Atok.prototype.getProps()
method
Reset properties to their default values
Source
Atok.prototype.getProps = function () {
// Empty object with no prototype
var props = Object.create(null)
var propNames = arguments.length > 0
? sliceArguments(arguments, 0)
: this._defaultProps
for (var prop, i = 0, num = propNames.length; i < num; i++) {
prop = propNames[i]
if ( this.hasOwnProperty('_p_' + prop) )
switch (prop) {
// Special case: continue has 2 properties
case 'continue':
props[ prop ] = [ this._p_continue, this._p_continueOnFail ]
break
// Special case: next has 2 properties
case 'next':
props[ prop ] = [ this._p_next, this._p_nextIndex ]
break
default:
props[ prop ] = this[ '_p_' + prop ]
}
}
return props
}
// include("methods_ruleset.js")
addRuleFirst
Atok.prototype.addRuleFirst()
method
Params
- name - of the rule to be added first
- rule - item
- rule - type
Add a rule as the first one
Source
Atok.prototype.addRuleFirst = function (rule,
addRuleBefore
Atok.prototype.addRuleBefore()
method
Params
- name - of the rule to add before
- rule - item
- rule - type
Add a rule before an existing one
Source
Atok.prototype.addRuleBefore = function (existingRule, rule,
addRuleAfter
Atok.prototype.addRuleAfter()
method
Params
- name - of the rule to add after
- rule - item
- rule - type
Add a rule after an existing one
Source
Atok.prototype.addRuleAfter = function (existingRule, rule,
addRule
Atok.prototype.addRule()
method
Params
- match - at current buffer position (String: expect string, Integer: expect n characters, Array: expect one of the items). If not needed, use ''
- rule - name/id (if no default handler set, emit a data event) or handler (executed when all matches are valid). If false, the rule is ignored.
Add a rule
Source
Atok.prototype.addRule = function (
removeRule
Atok.prototype.removeRule()
method
Params
- name - of the rule to be removed
Remove a rule (first instance only)
Source
Atok.prototype.removeRule = function (
clearRule
Atok.prototype.clearRule()
method
Remove all rules
Source
Atok.prototype.clearRule = function () {
this.clearProps()
this._firstRule = null
this._rules = []
this._defaultHandler = null
this._rulesToResolve = false
return this
}
saveRuleSet
Atok.prototype.saveRuleSet()
method
Params
- name - of the rule set
Save all rules and clear them
Source
Atok.prototype.saveRuleSet = function (name) {
if (arguments.length === 0 || name === null)
return this._error( new Error('Atok#saveRuleSet: invalid rule name supplied') )
this._savedRules[name] = {
rules: this._rules
.map(function (rule) { // Clone and assign the current rule set name
return rule.clone(name)
})
}
// Resolve and check continues
this._resolveRules(name)
return this
}
loadRuleSet
Atok.prototype.loadRuleSet()
method
Params
- name - of the rule set
- index - to start at
Load a rule set
Source
Atok.prototype.loadRuleSet = function (name, index) {
var ruleSet = this._savedRules[name]
if (!ruleSet)
return this._error( new Error('Atok#loadRuleSet: Rule set ' + name + ' not found') )
index = typeof index === 'number' ? index : 0
this._rules = ruleSet.rules
// Set the rule index
this._firstRule = this._rules[index]
this._resetRule = true
return this
}
removeRuleSet
Atok.prototype.removeRuleSet()
method
Params
- name - of the rule set
Delete a rule set
Source
Atok.prototype.removeRuleSet = function (name) {
delete this._savedRules[name]
return this
}
groupRule
Atok.prototype.groupRule()
method
Params
- toggle - grouping on/off
Bind rules to the same index
Source
Atok.prototype.groupRule = function (flag) {
var rules = this._rules
if (flag) {
this._group++
this._groupStartPrev.push(this._groupStart)
this._groupStart = rules.length
return this
}
// Ignore invalid groupRule()
if (this._group < 0) return this
// 1 or 0 rule within the group, ignored it
if (rules.length - this._groupStart < 2) {
for (var i = this._groupStart, n = rules.length; i < n; i++) {
rules[i].group = -1
rules[i].groupStart = 0
rules[i].groupEnd = 0
}
} else {
// Set the last index of the group to all rules belonging to the current group
for (var i = this._groupStart, n = rules.length; i < n; i++)
if (rules[i].group === this._group)
rules[i].groupEnd = n - 1
}
this._group--
this._groupStart = this._groupStartPrev.pop() || 0
this._groupEnd = 0
return this
}
write
Atok.prototype.write()
method
Params
- data - to be processed
Applies the current rules to the incoming data. When false is returned (the tokenizer is paused), the data is buffered but no processing occurs until the tokenizer is resumed.
Source
Atok.prototype.write = function (data) {
if (this.ended) {
this._error( new Error('Atok#write: write after end') )
return false
}
if (!data || data.length === 0) return true
// Setting the encoding by default when receiving a string
if ( typeof data === 'string' && !this._encoding ) this.setEncoding('utf-8')
// Buffer the incoming data...
if (this.length > 0) {
// Process strings and Buffers separately
if ( this._encoding ) {
this.buffer += this._stringDecoder.write( data.toString() )
} else {
this.buffer = this.buffer.concat(data)
// this.buffer = Buffer.concat( [ this.buffer, data ], this.length )
}
} else {
this.buffer = this._encoding ? data.toString() : data
}
this.length = this.buffer.length
// Check rules resolution (pause __can__ be called before write)
if (this._rulesToResolve) this._resolveRules() // Does linking too
// No resolution but linking may be required
else if (this._rulesToLink) this._linkRules()
// ... hold on until tokenization completed on the current data set
// or consume the data
if (this.paused) {
this.needDrain = true
return false
}
return this._tokenize()
}
end
Atok.prototype.end()
method
Params
- data - to be processed
Ends the stream and emit the end event. Any remaining data is passed to the listeners.
Source
Atok.prototype.end = function (data) {
this.ending = true
this.write(data)
this.ended = true
this.ending = false
this.readable = false
this.writable = false
this._end()
return this
}
pause
Atok.prototype.pause()
method
Pauses the stream - data is buffered until the stream is resumed with Atok#resume()
Source
Atok.prototype.pause = function () {
this.paused = true
return this
}
resume
Atok.prototype.resume()
method
Resumes the stream - buffered data is immediately processed
Source
Atok.prototype.resume = function () {
this.paused = false
return this._tokenize()
}
destroy
Atok.prototype.destroy()
method
Placeholder for Atok#destroy()
Source
Atok.prototype.destroy = function () {
this.readable = false
this.writable = false
}
_end
Atok.prototype._end()
method
End a stream by emitting the end event with remaining data
Source
Atok.prototype._end = function () {
this.emit_end( this.buffer, -1, this.currentRule )
this.clear()
}
_done
Atok.prototype._done()
method
End of Atok#write(): emit the drain event if required
Source
Atok.prototype._done = function () {
if (this.needDrain) {
this.needDrain = false
this.emit_drain()
}
if (this.ended) {
this._end()
return false
}
return true
}
_tokenize
Atok.prototype._tokenize()
method
The core of Atok. Loops through the rules and check them against the data, calling handler or emitting the data event and branching appropriately.
Source
Atok.prototype._tokenize = function () {
// NB. Rules and buffer can be reset by the token handler
var p, props, matched
var token
p = this._firstRule
this._resetRule = false
while ( p && this.offset < this.length ) {
props = p.props
// Return the size of the matched data (0 is valid!)
matched = p.test(this.buffer, this.offset)
if ( matched < 0 ) {
// End of the rule set, end the loop
if (!p.nextFail) break
// Next rule exists, carry on
p = p.nextFail
continue
}
// Is the token to be processed?
if ( props.ignore ) {
p = p.next
} else {
// Emit the data by default, unless the handler is set
token = props.quiet
? matched - (p.single ? 0 : p.last.length) - p.first.length
: this.buffer.slice(
this.offset + p.first.length
, this.offset + matched - (p.single ? 0 : p.last.length)
)
if (p.handler) p.handler(token, p.last.idx, p.type)
else this.emit_data(token, p.last.idx, p.type)
// Handler has changed rules, resolve and relink
if (this._rulesToResolve) this._resolveRules()
// RuleSet may have be changed by the handler
if (this._resetRule) {
this._resetRule = false
p = this._firstRule
} else {
p = p.next
}
}
this.offset += matched
// NB. `break()` prevails over `pause()`
if (props.break) break
// Hold on if the stream was paused
if (this.paused) {
this._firstRule = p
this.needDrain = true
return false
}
}
// Keep track of the rule we are at
if (p) this._firstRule = p
// Truncate the buffer if possible: min(offset, markedOffset)
if (this.markedOffset < 0) {
// No marked offset or beyond the current offset
if (this.offset === this.length) {
this.offset = 0
this.buffer = null
this.length = 0
this.emit_empty(this.ending)
} else if (this.offset < this.length) {
this.buffer = this.buffer.slice(this.offset)
this.length = this.buffer.length
this.offset = 0
} else {
// Can only occurs if offset was manually incremented
this.offset = this.offset - this.length
this.buffer = null
this.length = 0
}
} else {
var maxOffset = 'markedOffset'
var minOffset = 'offset'
var _
if (this.markedOffset < this.offset) {
_ = maxOffset
maxOffset = minOffset
minOffset = _
}
if (this[minOffset] === this.length) {
this[maxOffset] -= this[minOffset]
this[minOffset] = 0
this.buffer = null
this.length = 0
this.emit_empty(this.ending)
} else if (this[minOffset] < this.length) {
this[maxOffset] -= this[minOffset]
this.buffer = this.buffer.slice(this[minOffset])
this.length = this.buffer.length
this[minOffset] = 0
} else {
// Can only occurs if offset was manually incremented
this[maxOffset] -= this.length
this[minOffset] -= this.length
this.buffer = null
this.length = 0
}
}
return this._done()
}