fs               = require 'fs'
path             = require 'path'
HOMEDIR          = path.join(__dirname,'..')
LIB_DIR          = if fs.existsSync(path.join(HOMEDIR,'lib-cov')) then path.join(HOMEDIR,'lib-cov') else path.join(HOMEDIR,'lib')
DOMUtil          = require(path.join(LIB_DIR,'dom-util')).DOMUtil
PredicateFactory = require(path.join(LIB_DIR,'predicate-factory')).PredicateFactory

# **Stew** is a DOM selection engine that
# supports the full CSS selector syntax
# as well as CSS selectors extended with
# regular expressions.
#
# Method names that start with `_` are subject
# to change without notice. Other methods may be
# considered a part of the public API.
class Stew

  # **The Stew constructor** accepts an optional `DOMUtil` instance
  # (allowing callers to configure the `DOMUtil` used by `Stew`).
  constructor:(dom_util)->
      @factory = new PredicateFactory()
      @dom_util = dom_util ? new DOMUtil()

  # **select** selects nodes from the given `dom`
  # that match the given `selector`.
  #
  # If `selector` is a string, it will be parsed as
  # described in the README. Otherwise `selector`
  # is assumed to be a predicate function
  # (like those generated by `PredicateFactory`).
  #
  # If `dom` is a string, it will be parsed as HTML
  # (using `DOMUtil.parse_html`, which see). If `dom`
  # is a single node, the given `selector` will be
  # applied to it.  If `dom` is an array of nodes,
  # the given `selector` will be each element in turn.
  #
  # This results in an array of matching nodes.
  #
  # If a `callback` is provided, the resulting array is
  # passed to it (assuming the signature
  # `callback(err,nodeset)`).  Otherwise
  # the resulting array is returned by this function.
  #
  # Note that when `dom` is a string, a callback method
  # *must* be provided. (Since our HTML parsing
  # is asynchronous.) When `dom` is an object, the
  # callback method is optional (but will be used
  # when present)
  select:(dom,selector,callback)->
    if typeof selector is 'string'
      selector = @_parse_selectors(selector)
    if typeof dom is 'string'
      if callback?
        @dom_util.parse_html dom, (err, dom)=>
          if err?
            callback(err)
          else
            callback(null,@_unguarded_select(dom,selector))
      else
        throw new Error('When select is invoked on a string object, the `callback(err,nodeset)` parameter is required.')
    else
      nodeset = @_unguarded_select(dom,selector)
      callback?(null,nodeset)
      return nodeset

  # **_unguarded_select** is the "inner" method
  # for `select`.  It assumes `dom` is a node or
  # array of nodes and that `predicate` is a
  # predicate function. It returns an array of
  # matching nodes. (Generally this method
  # will not be directly called by clients.)
  _unguarded_select:(dom,predicate)->
    result = []
    visit = (node,parent,path,siblings,sib_index)->
      if predicate(node,parent,path,siblings,sib_index)
        result.push node
      return { 'continue':true, 'visit_children':true }
    @dom_util.walk_dom dom, visit:visit
    return result


  # **select_first** selects the first node in the
  # given `dom` that matches the given `selector`.
  #
  # It behaves exactly like `select` (which see)
  # save that it aborts processing as soon as
  # the first matching node is found, and returns
  # a single node rather than an array of nodes.
  select_first:(dom,selector,callback)->
    if typeof selector is 'string'
      selector = @_parse_selectors(selector)
    if typeof dom is 'string'
      if callback?
        @dom_util.parse_html dom, (err, dom)=>
          if err?
            callback(err)
          else
            callback(null,@_unguarded_select_first(dom,selector))
      else
        throw new Error('When select_first is invoked on a string object, the `callback(err,node)` parameter is required.')
    else
      node = @_unguarded_select_first(dom,selector)
      callback?(null,node)
      return node

  # **_unguarded_select_first** is the "inner" method for `select_first`.
  # (Generally this method will not be directly called by clients.)
  _unguarded_select_first:(dom,predicate)->
    result = null
    visit = (node,parent,path,siblings,sib_index)->
      if predicate(node,parent,path,siblings,sib_index)
        result = node
        return { 'continue':false, 'visit_children':false }
      else
        return { 'continue':true, 'visit_children':true }
    @dom_util.walk_dom dom, visit:visit
    return result

  # **_SPLIT_ON_WS_REGEXP** is regular expression that is
  # used to split a string of CSS selectors into individual
  # selectors. It is similiar to `str.split(/\s/)`, but:
  #  - treats "quoted phrases" (and `/regular expressions/`) as a single token
  #  - also splits on the CSS "operators" of `>`, `+`, `,` and `~`
  # (Shout-out to
  # http://stackoverflow.com/questions/2817646/javascript-split-string-on-space-or-on-quotes-to-array
  # from which this expression was originally derived.)
  _SPLIT_ON_WS_REGEXP = /([^\"\/\s,\+>]|(\"[^\"]+\")|(\/[^\/]+\/)|(\[[^\]]*\]))+|[,\+~>]/g

  # **_split_on_ws_respecting_quotes** is used to split a string of
  # CSS selectors into individual selectors.
  _split_on_ws_respecting_quotes:(selector)->
    result = []
    while true
      token = _SPLIT_ON_WS_REGEXP.exec(selector)
      if token?[0]?
        result.push(token[0])
      else
        break
    return result

  # **_parse_selectors** accepts a string containing one
  # or more CSS selectors and returns the corresponding
  # predicate (a boolean-valued function with the signature
  # `(node,node_metadata,all_metadata)`)
  _parse_selectors:(selectors)->
    result = []
    if typeof selectors is 'string'
      selectors = @_split_on_ws_respecting_quotes(selectors)
    child_operator = false # TODO there is probably a more elegant way to handle `>`, `+` and `,` here.
    adjacent_operator = false
    preceding_sibling_operator = false
    or_operator = false
    for selector in selectors
      if selector is '>'
        child_operator = true
      else if selector is '+'
        adjacent_operator = true
      else if selector is '~'
        preceding_sibling_operator = true
      else if selector is ','
        or_operator = true
      else
        predicate = @_parse_selector(selector)
        if child_operator
          result.push( @factory.direct_descendant_predicate( result.pop(), predicate ) )
          child_operator = false
        else if adjacent_operator
          result.push( @factory.adjacent_sibling_predicate( result.pop(), predicate  ) )
          adjacent_operator = false
        else if preceding_sibling_operator
          result.push( @factory.preceding_sibling_predicate( result.pop(), predicate  ) )
          preceding_sibling_operator = false
        else if or_operator
          result.push( @factory.or_predicate( [ result.pop(), predicate ] ) )
          or_operator = false
        else
          result.push( predicate )
    if result.length > 0
      result = @factory.descendant_predicate(result)
    return result

  # **_CSS_SELECTOR_REGEXP** is a regular expression for parsing an individual CSS selector
  # (which might include a tag name, an ID, one or more classes, one or more attributes and a pseudo class).
  #
  # `"tag#id.class-one.class-two[name~=\"value with spaces\"]".match(_CSS_SELECTOR_REGEXP)`
  #
  #{ TODO: Combine the `id` and `class` rules to make them order-indepedent? (I think CSS specifies the order, but still.)
  #{############################################################################################################################################################################################################################################################################
  #{                                                                                            11                  1           11  11                  1        112   2    2     2     2     2     22 22       3          33                  3                 3 3           #
  #{                     12                  3            4  56                  7          89  01                  2           34  56                  7        890   1    2     3     4     5     67 89       0          12                  3                 4 5           #
  _CSS_SELECTOR_REGEXP: /((\/[^\/]*\/[gmi]*)|(\*|[\w-]+))?(\#((\/[^\/]*\/[gmi]*)|([\w-]+)))?((\.((\/[^\/]*\/[gmi]*)|([\w-]+)))*)((\[((\/[^\/]*\/[gmi]*)|([\w-]+))(((=)|(~=)|(\|=)|(\*=)|(\^=)|(\$=))(("(([^\\"]|(\\"))*)")|((\/[^\/]*\/[gmi]*)|([\w- :]+))))?\])*)(:([\w-]+))?/ #
  #{                      \-name--------------------------/|\-id-----------------------------/\-class(es)-----------------------/||  \-attr-name-----------------/|\-operator----------------------/\-value-----------------------------------------------/|  | |\-pseduo--/   #
  #{                                                                                                                             ||                               \-operator-and-value---------------------------------------------------------------------/  | |              #
  #{                                                                                                                             |\-attr-clause-([])----------------------------------------------------------------------------------------------------------/ |              #
  #{                                                                                                                             \-attr-clauses-([][]...)-------------------------------------------------------------------------------------------------------/              #
  #{############################################################################################################################################################################################################################################################################

  # Indices of the important captured groups.
  _NAME         = 1
  _ID           = 4
  _CLASSES      = 8
  _ATTRIBUTES   = 13
  _PSEUDO_CLASS = 35

  # **_ATTRIBUTE_CLAUSE_REGEXP** is a regular expression used to
  # split one or more `[<name> <op> <value>]` expressions
  # into individual components.
  #{###########################################################################################################################################################
  #{                                                                          1     1     1     11 11       1          11                  2                  #
  #{                         1  23                  4        567   8    9     0     1     2     34 56       7          89                  0                  #
  _ATTRIBUTE_CLAUSE_REGEXP: /(\[((\/[^\/]*\/[gmi]*)|([\w-]+))(((=)|(~=)|(\|=)|(\*=)|(\^=)|(\$=))(("(([^\\"]|(\\"))*)")|((\/[^\/]*\/[gmi]*)|([\w- :]+))))?\])/g #
  #{                            \-name----------------------/|\-operator-----------------------/\-value-----------------------------------------------/|      #
  #{                                                         \-operator-and-value----------------------------------------------------------------------/      #
  #{###########################################################################################################################################################

  # Indices of the important captured groups.
  _ATTR_NAME              = 2
  _OPERATOR               = 6
  _DEQUOTED_ATTR_VALUE    = 15
  _NEVERQUOTED_ATTR_VALUE = 18

  # **_parse_selector** returns a (possibly compound) predicate
  # that matches the provided `selector` (string).
  _parse_selector:(selector)->
    match = @_CSS_SELECTOR_REGEXP.exec(selector)
    clauses = []

    # The name part.
    if match[_NAME]?
      if match[_NAME] is '*'
        clauses.push(@factory.any_tag_predicate())
      else
        clauses.push(@factory.by_tag_predicate(@_to_string_or_regex(match[_NAME])))

    # The ID part.
    if match[_ID]?
      clauses.push(@factory.by_id_predicate(@_to_string_or_regex(match[_ID].substring(1))))

    # One or more class parts.
    if match[_CLASSES]?.length > 0    # match[CLASSES] contains something like `.foo.bar`
      cs = match[_CLASSES].split('.') # split the string into individual class names
      cs.shift()                      # and skip the first (empty) token that is included
      for c in cs
        clauses.push(@factory.by_class_predicate(@_to_string_or_regex(c)))

    # TODO FIXME Support for `*=`, `^=` and `$=` is kinda hacked-in here.  Refactor to be more DRY.
    # One or more attribute parts.
    if match[_ATTRIBUTES]?.length > 0 # match[_ATTRIBUTES] contains one or more `[name=value]` (or `[name]`) strings
      attr_match = @_ATTRIBUTE_CLAUSE_REGEXP.exec(match[_ATTRIBUTES])
      while attr_match?
        if attr_match[_ATTR_NAME]? and (not attr_match[_OPERATOR]?)
          clauses.push(@factory.by_attr_exists_predicate(@_to_string_or_regex(attr_match[_ATTR_NAME])))
        if attr_match[_ATTR_NAME]? and attr_match[_OPERATOR]? and (attr_match[_DEQUOTED_ATTR_VALUE]? or attr_match[_NEVERQUOTED_ATTR_VALUE]?)
          delim = null
          if attr_match[_OPERATOR] is '~='
            delim = /\s+/
          if attr_match[_OPERATOR] is '|='
            clauses.push(
              @factory.by_attr_value_pipe_equals(
                @_to_string_or_regex(attr_match[_ATTR_NAME]),
                @_to_string_or_regex(attr_match[_DEQUOTED_ATTR_VALUE] ? attr_match[_NEVERQUOTED_ATTR_VALUE])
              )
            )
          else if attr_match[_OPERATOR] is '^=' # starts with
            aval = @_to_string_or_regex(attr_match[_DEQUOTED_ATTR_VALUE] ? attr_match[_NEVERQUOTED_ATTR_VALUE])
            if typeof aval is 'string'
              regexp_source = @factory._escape_for_regexp(aval)
              aval = new RegExp("^#{regexp_source}")
            else
              regexp_source = aval.source
              modifier = ''
              modifier += 'i' if aval.ignoreCase
              modifier += 'g' if aval.global
              modifier += 'm' if aval.multiline
              unless /^\^/.test regexp_source
                aval = new RegExp("^#{regexp_source}")
            clauses.push(@factory.by_attr_value_predicate(@_to_string_or_regex(attr_match[_ATTR_NAME]),aval))
          else if attr_match[_OPERATOR] is '$=' # ends with
            aval = @_to_string_or_regex(attr_match[_DEQUOTED_ATTR_VALUE] ? attr_match[_NEVERQUOTED_ATTR_VALUE])
            if typeof aval is 'string'
              regexp_source = @factory._escape_for_regexp(aval)
              aval = new RegExp("#{regexp_source}$")
            else
              regexp_source = aval.source
              modifier = ''
              modifier += 'i' if aval.ignoreCase
              modifier += 'g' if aval.global
              modifier += 'm' if aval.multiline
              unless /\$$/.test regexp_source
                aval = new RegExp("#{regexp_source}$")
            clauses.push(@factory.by_attr_value_predicate(@_to_string_or_regex(attr_match[_ATTR_NAME]),aval))
          else if attr_match[_OPERATOR] is '*=' # contains
            aval = @_to_string_or_regex(attr_match[_DEQUOTED_ATTR_VALUE] ? attr_match[_NEVERQUOTED_ATTR_VALUE])
            if typeof aval is 'string'
              regexp_source = @factory._escape_for_regexp(aval)
              aval = new RegExp(regexp_source)
            clauses.push(@factory.by_attr_value_predicate(@_to_string_or_regex(attr_match[_ATTR_NAME]),aval))
          else
            clauses.push(
              @factory.by_attr_value_predicate(
                @_to_string_or_regex(attr_match[_ATTR_NAME]),
                @_to_string_or_regex(attr_match[_DEQUOTED_ATTR_VALUE] ? attr_match[_NEVERQUOTED_ATTR_VALUE]),
                delim
              )
            )
        attr_match = @_ATTRIBUTE_CLAUSE_REGEXP.exec(match[_ATTRIBUTES])

    # The pseudo-class part.
    if match[_PSEUDO_CLASS]?
      if match[_PSEUDO_CLASS] is 'first-child'
        clauses.push(@factory.first_child_predicate())

    # Combine them with `and` if needed.
    if clauses.length > 0
      clauses = @factory.and_predicate(clauses)

    return clauses

  # **_to_string_or_regex** converts a string that starts and ends with `/`
  # (with an optional `g`, `m` or `i` suffix) into a regular expression,
  # and otherwise returns the original `str` value.
  _to_string_or_regex:(str)->
    match = str.match /^\/(.*)\/([gmi]*)$/
    if match?[1]?
      return new RegExp(match[1],match[2])
    else
      return str

# Public API includes `Stew` and `DOMUtil`
exports = exports ? this
exports.Stew = Stew
exports.DOMUtil = DOMUtil
