noflo = require 'noflo'
url = require 'url'
uri = require 'urijs'
Strip = require 'string'
he = require 'he'
he.encode.options.useNamedReferences = true
langs = require 'langs'

cleanUpHost = (host = '') ->
  host = host.replace('www.', '')
  host = host.replace('blog.', '')
  host

normalizeGist = (original) ->
  item =
    url: original.html_url
    metadata:
      '@type': 'Code'
      title: original.description
      author: [
        name: original.owner.login
        url: original.owner.html_url
        avatar:
          src: original.owner.avatar_url
      ]
      publisher:
        url: 'http://github.com'
        name: 'GitHub'
    html: ''

  for name, file of original.files
    lang = ''
    lang = 'language-' + file.language.toLowerCase() if file.language
    item.html += "<pre><code class=\"#{lang}\">#{he.encode(file.content)}</code></pre>\n"

  item

normalizePayload = (original) ->
  original.metadata = {} unless original.metadata
  delete original.metadata.body
  delete original.metadata.path
  unless original.metadata.description
    desc = new Strip(original.html).stripTags().s
    original.metadata.description = desc.trim().substring(0, 160)

  original.url = original.metadata.isBasedOnUrl if original.metadata.isBasedOnUrl

  for key, val of original.metadata
    continue unless key.substr(0,1) is '_'
    newKey = "@#{key.substr(1)}"
    original.metadata[newKey] = val
    delete original.metadata[key]

  original

normalizeEmbedly = (extract, compress) ->
  extract.description = '' unless extract.description
  extract.title = '' unless extract.title
  extract.description = '' if extract.description is extract.title
  language = langs.where 'name', extract.language
  item =
    id: extract.original_url
    html: extract.content or ''
    title: extract.title
    metadata:
      author: extract.authors.map (author) ->
        auth =
          name: author.name
          url: author.url
          avatar: {}
      related: extract.related
      publisher:
        url: extract.provider_url
        name: extract.provider_name or extract.provider_display
        favicon: extract.favicon_url
      keywords: extract.keywords.map (keyword) -> keyword.name
      description: extract.description
      inLanguage: language?['1'] or null

  item.html = '' if compress

  if extract.provider_display in ['www.youtube.com', 'vimeo.com', 'vine.co', ]
    item.metadata['@type'] = 'VideoObject'
  if extract.provider_url in ['http://wistia.com']
    item.metadata['@type'] = 'VideoObject'
  if extract.provider_display in ['soundcloud.com']
    item.metadata['@type'] = 'AudioObject'
  if extract.provider_name in ['Google Maps']
    item.metadata['@type'] = 'Place'
  if extract.provider_display in ['medium.com']
    item.metadata['@type'] = 'Article'

  # Media handling
  if extract.media?.html and extract.media.type in ['video', 'rich'] and extract.media.html.indexOf('iframe') isnt -1
    image = ''
    item.html += extract.media.html

  if extract.provider_display is 'www.facebook.com'
    item.title = ''
    item.metadata.title = ''
    item.metadata.description = extract.description
    item.html = ''
    if extract.images.length > 1
      item.html = "<img alt=\"#{he.encode(extract.description)}\" src=\"#{extract.images[0].url}\">"
      item.metadata['@type'] = 'Photograph'
    else
      item.html = "<blockquote>#{he.encode(extract.description)}</blockquote>"

  if extract.provider_display is 'twitter.com'
    delete item.title
    item.metadata['@type'] = 'Comment'
    if extract.media and extract.media.type is 'photo'
      extract.description = extract.description.replace(/pic\.twitter\.com\/[A-Za-z0-9]+$/g, '').trim()
      item.metadata.description = extract.description
    item.html = "<blockquote>#{he.encode(extract.description)}</blockquote>"

    if extract.media and extract.media.type is 'photo'
      if compress
        item.html = "<img alt=\"#{he.encode(extract.description)}\" src=\"#{extract.media.url}\">"
      else
        item.html += "<img src=\"#{extract.media.url}\">"
      item.metadata['@type'] = 'Photograph'

  if extract.provider_display is 'plus.google.com'
    item.metadata['@type'] = 'Comment'
    item.html = "<blockquote>#{extract.title}</blockquote>"

    if extract.images.length > 2
      image = extract.images[0].url
      if compress
        item.html = "<img alt=\"#{he.encode(extract.title)}\" src=\"#{image}\">"
      else
        item.html += "<img src=\"#{image}\">"
      item.metadata['@type'] = 'Photograph'
    delete item.title
    delete item.description

  if extract.provider_display is 'www.flickr.com'
    if extract.images.length > 0
      image = extract.images[0].url
      if compress
        item.html = "<img alt=\"#{he.encode(extract.title)}\" src=\"#{image}\">"
      else
        item.html += "<img src=\"#{image}\">"
      item.metadata['@type'] = 'Photograph'
      delete item.description
      delete item.metadata.description

  if extract.provider_display in ['instagram.com', 'www.instagram.com']
    if extract.media?.html and extract.media.type in ['video', 'rich'] and extract.media.html.indexOf('iframe') isnt -1
      item.html = extract.media.html
      delete item.description
      delete item.metadata.description
      item.metadata['@type'] = 'MediaObject'
    else if extract.images.length > 0
      image = extract.images[0].thumbnail_url or extract.images[0].url
      if compress
        item.html = "<img alt=\"#{he.encode(extract.title)}\" src=\"#{image}\">"
      else
        item.html += "<img src=\"#{image}\">"
      item.metadata['@type'] = 'Photograph'
      delete item.description
      delete item.metadata.description

  # Whitelisted sites where we know we are getting an avatar in the pictures array
  if extract.images and extract.images.length and extract.provider_display in [
    'twitter.com'
    'plus.google.com'
    'vimeo.com'
    'www.youtube.com'
    'www.facebook.com'
  ]
    profiles = extract.images.filter (image) -> image.width is image.height
    unless item.metadata.author.length
      item.metadata.author.push
        avatar: {}
    if profiles.length
      profilePic = profiles[0]
      if extract.provider_display is 'plus.google.com'
        profilePic = profiles[profiles.length - 1]
      item.metadata.author[0].avatar.src = profilePic.url
      item.metadata.author[0].avatar.width = profilePic.width
      item.metadata.author[0].avatar.height = profilePic.height
      if profilePic.colors
        item.metadata.author[0].avatar.colors = profilePic.colors.map (c) -> c.color

  if extract.images.length
    image = extract.images[0].url or extract.images[0].thumbnail_url

  if image and not compress and not item.html
    item.html += "<img src=\"#{extract.images[0].url}\">"

  if compress and not item.html
    imageHtml = ''
    imageHtml = "<img src=\"#{image}\">" if image
    item.html = "<article about=\"#{extract.original_url}\"><h1>#{he.encode(extract.title)}</h1><p>#{he.encode(extract.description)}</p>#{imageHtml}</article>"

  if extract.app_links
    item.metadata.app_links = extract.app_links

  item

normalizeFacebookProfile = (profile) ->
  item =
    id: profile.link or "https://www.facebook.com/#{profile.id}"
    html: ''
    metadata:
      '@type': 'Thing'

  item.url = item.id

  if profile.about
    item.html += "<h1>#{he.encode(profile.about)}</h1>\n"
  if profile.cover
    item.html += "<p><img src=\"#{profile.cover.source}\"></p>\n"
  if profile.description
    item.html += "<p>#{he.encode(profile.description)}</p>\n"
  if profile.name
    item.metadata.name = profile.name
  if profile.category
    item.metadata.keywords = [profile.category]
  if profile.likes
    item.metadata.likes = profile.likes

  item

normalizeFacebookPhoto = (photo) ->
  photo.name = '' unless photo.name
  item =
    id: photo.link or "https://www.facebook.com/#{photo.id}"
    html: "<figure><img src=\"#{photo.source}\"><figcaption>#{he.encode(photo.name)}</figcaption></figure>"
    metadata:
      '@type': 'Photograph'

  item.url = item.id
  item

exports.getComponent = ->
  c = new noflo.Component
  c.icon =  'cogs'
  c.description = 'Normalize post structure to Schema.org article metadata and content'
  c.inPorts.add 'in',
    datatype: 'object'
    description: 'Post instance as arriving from various importers'
  c.inPorts.add 'compress',
    datatype: 'boolean'
    description: 'Whether to compress the results into a single HTML element'
  c.outPorts.add 'out',
    datatype: 'object'

  noflo.helpers.WirePattern c,
    in: 'in'
    params: 'compress'
    out: 'out'
    forwardGroups: true
  , (original, groups, out) ->
    compress = c.params?.compress or false
    unless original.html
      if original.git_pull_url
        original = normalizeGist original
      else if original.provider_display and original.embeds
        original = normalizeEmbedly original, compress
      else if original.id and original.about and original.cover
        original = normalizeFacebookProfile original, compress
      else if original.id and original.source and original.images
        original = normalizeFacebookPhoto original, compress
      else
        # We have no use for items without HTML contents
        return
    else
      original = normalizePayload original, compress

    item =
      id: original.id or original.url
      html: original.html
      metadata: original.metadata or {}

    item.metadata['@context'] = 'http://schema.org'
    if compress
      item.metadata['@type'] = original.metadata['@type'] or 'MediaObject'
    else
      item.metadata['@type'] = original.metadata['@type'] or 'Article'
    item.metadata.isBasedOnUrl = original.resolved_url or original.url or original.id
    item.metadata.keywords = original.tags if original.tags
    item.metadata.title = original.title if original.title
    item.metadata.author = item.metadata.author or []
    if typeof original.author is 'string'
      item.metadata.author = []
      item.metadata.author.push
        name: original.author
    delete item.metadata.authors
    item.metadata.inLanguage = original.humanLanguage if original.humanLanguage
    item.metadata.datePublished = null

    if item.metadata.publisher?.url
      item.metadata.publisher = {} unless item.metadata.publisher
      item.metadata.publisher.domain = uri(item.metadata.publisher.url).hostname()
      item.metadata.publisher.name = cleanUpHost(item.metadata.publisher.domain) unless item.metadata.publisher.name

    if item.metadata.isBasedOnUrl and item.metadata.isBasedOnUrl.indexOf('amazonaws.com') isnt -1
      delete item.metadata.isBasedOnUrl

    if item.metadata.isBasedOnUrl
      item.metadata.publisher = {} unless item.metadata.publisher
      item.metadata.publisher.domain = uri(item.metadata.isBasedOnUrl).hostname()
      item.metadata.publisher.name = cleanUpHost(item.metadata.publisher.domain) unless item.metadata.publisher.name

    if item.metadata.isBasedOnUrl and not item.metadata.publisher
      parsed = url.parse item.metadata.isBasedOnUrl
      item.metadata.publisher =
        name: parsed.hostname.replace('www.', '')
        domain: parsed.hostname.replace('www.', '')

    if not compress and item.metadata.title and original.html.indexOf('<h1') is -1 and original.metadata?['@type'] isnt 'Code'
      item.html = "<h1>#{he.encode(item.metadata.title)}</h1>#{item.html}"

    item.options = original.options or {}
    item.options.compress = compress

    out.send item
  c
