# -*- coding: utf-8 -*-
import pytz
import re
import sys
from curses import ascii
from dateutil import parser
from datetime import datetime
from urlparse import urlparse
from xml.sax.saxutils import escape


class DEFAULT:
    def __repr__(self):
        return "<default>"


DEFAULT = DEFAULT()


def utc_date_to_string(utc_date):
    return utc_date.strftime("%Y-%m-%dT%H:%M:%S.%fZ")


def string_to_utc_date(from_date=None):
    if from_date is None:
        return datetime.utcnow().replace(tzinfo=pytz.utc)

    dt = parser.parse(from_date)
    utc_formatted_time = dt.replace(tzinfo=pytz.utc)
    if dt.utcoffset() is not None:
        utc_formatted_time += dt.utcoffset()
    return utc_formatted_time


def string_to_utc_string(from_date):
    return utc_date_to_string(string_to_utc_date(from_date))


def date_to_rfc2822_string(from_date):
    return from_date.strftime('%a, %d %b %Y %H:%M:%S %z')


def validate_domain_string(domain_string):
    """Validates domain query parameter syntax.

    Examples:
        All the following examples are well formed:
            `Travel`
            `History OR Travel`
            `gs:uk OR gs:mx OR gs:pt`
            `gs:uk or gs:mx or gs:pt`
            `Travel,History`

       The following examples are incorrect:
            `gs:uk gs:mx gs:pt`
            `Travel;History`
            `Travel, History`
            `Travel , History`
    """
    allowed_separator = 'OR'

    domain_regex = re.compile('^([a-zA-Z:]+\s)*[a-zA-Z:]+$')

    valid_domains = all(domain_regex.match(bit) is not None for bit in domain_string.split(','))
    valid_separators = all(sep.upper() == allowed_separator for sep in domain_string.split()[1::2])

    if not valid_domains or not valid_separators:
        raise ValueError(
            ('Invalid Domain. Domain must contain only characters or spaces.\n'
             'For example: domain=Travel or domain=News,Travel or domain=News OR Travel')
        )

    return domain_string


def validate_rfc3339_date(datestring):
    rfc3339_regex = re.compile(
        r"^(\d\d\d\d)\-(\d\d)\-(\d\d)T"
        r"(\d\d):(\d\d):(\d\d)(\.\d+)?(Z|([+\-])(\d\d):(\d\d))$")

    m = rfc3339_regex.match(datestring)
    if m is None:
        raise ValueError('Invalid format. Datetime format RFC3339. Example: 2015-08-13T18:30:02Z')

    # This will generate an error if the datetime can't parse to a real date eg: month 2 day 30
    # when a parse fails, it returns a ValueError with a python error message
    parser.parse(datestring)

    return datestring


def validate_hours_day_format(datestring):
    try:
        number = int(datestring[:-1])

        if datestring.endswith('d') and 1 <= number <= 100:
            return datestring
        elif datestring.endswith('h') and 1 <= number <= 23:
            return datestring
        else:
            raise ValueError()
    except ValueError:
        raise ValueError('Invalid format. Range: 1-23h 1-100d. Example: 7d, 5h')


def validate_uri_format(obj_uri, query):
    parsed_obj_uri = urlparse(obj_uri)
    obj_path = parsed_obj_uri.path[1:]
    query = fix_uri_trailing_slash(query) if obj_path[-1] == '/' else query
    if obj_path != query:
        return False
    return True


def fix_uri_trailing_slash(uri):
    if uri[-1] != '/':
        uri += '/'
    return uri


def get_worst_status(status_list):
    """
    Return the worst status based on a status list.

    :param status_list: The list of status, the status can be the string
                        'green', 'yellow' or 'red', invalid status are
                        ignored.
    :return: The worst status from the list (green, yellow or red).
    """
    if 'red' in status_list:
        return 'red'
    elif 'yellow' in status_list:
        return 'yellow'
    else:
        return 'green'


##########################################
# Used to deal with invalid xml characters
##########################################
def invalid_xml_remove(text):
    # http://stackoverflow.com/questions/1707890/fast-way-to-filter-illegal-xml-unicode-chars-in-python
    illegal_unichrs = [
        (0x00, 0x08), (0x0B, 0x1F), (0x7F, 0x84), (0x86, 0x9F),
        (0xD800, 0xDFFF), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF),
        (0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF),
        (0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
        (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF),
        (0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
        (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF),
        (0x10FFFE, 0x10FFFF)]

    illegal_ranges = ["%s-%s" % (unichr(low), unichr(high))
                      for (low, high) in illegal_unichrs
                      if low < sys.maxunicode]

    illegal_xml_re = re.compile(u'[%s]' % u''.join(illegal_ranges))
    return illegal_xml_re.sub('', text)


def scrub_literal(value):
    """
    Scrubs control characters from the incoming values to remove
    things like form feeds (\f) and line breaks (\n) which might
    cause problems with Jena.
    Data with these characters was found in the Backstage data.
    """
    if not value:
        return
    if (type(value) == long) or (type(value) == int):
        return value
    n = ''.join([c for c in value if not ascii.iscntrl(c)
                if not ascii.isctrl(c)])
    n = n.replace('"', '')
    n = n.replace('\ufffd', '')
    n = clean_char(n)
    if type(n) != unicode:
        n = unicode(n, errors='replace')
    return n.strip()


def sanitize_xml_text(text):
    """
    Removes all invalid XML characters and escape al special characters.
    """
    if text:
        return escape(scrub_literal(invalid_xml_remove(text)))
    return text


def clean_char(char):
    """
    Function for remove invalid XML characters from
    incoming data.
    """
    # Get rid of the ctrl characters first.
    # http://stackoverflow.com/questions/1833873/python-regex-escape-characters
    char = re.sub('\x1b[^m]*m', '', char)
    # Clean up invalid xml
    char = invalid_xml_remove(char)
    replacements = [
        (u'\u201c', '\"'),
        (u'\u201d', '\"'),
        (u"\u001B", ' '),  # http://www.fileformat.info/info/unicode/char/1b/index.htm
        (u"\u0019", ' '),  # http://www.fileformat.info/info/unicode/char/19/index.htm
        (u"\u0016", ' '),  # http://www.fileformat.info/info/unicode/char/16/index.htm
        (u"\u001C", ' '),  # http://www.fileformat.info/info/unicode/char/1c/index.htm
        (u"\u0003", ' '),  # http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=0x
        (u"\u000C", ' ')
    ]
    for rep, new_char in replacements:
        if char == rep:
            return new_char
    return char
