# -*- coding: utf-8 -*-
# Enable absolute import so we can import the elasticsearch client library
from __future__ import absolute_import

from datetime import datetime, timedelta

from elasticsearch.helpers import scan
from flask_restful import abort

from werkzeug.exceptions import NotFound

from distribution import app
from distribution.managers import BaseManager
from distribution.utils import string_to_utc_date


class FeedItemManager(BaseManager):
    index = app.config.get('ELASTICSEARCH_FEED_INDEX_NAME', 'distribution_feed')

    def count_sources(self, *args, **kwargs):
        kwargs["group_by"] = "sources"
        result = self.aggregated_search(*args, **kwargs)
        buckets = result['group_by_sources']['buckets']
        return dict((bucket["key"], bucket["doc_count"]) for bucket in buckets)

    def get_datetime_parameter(self, arguments):

        filter_by_datetime = True

        if self.has_date_variable('meta.last_update', arguments):
            datetime_parameter = 'meta.last_update'
        elif self.has_date_variable('last_modified_datetime', arguments):
            datetime_parameter = 'last_modified_datetime'
        elif self.has_date_variable('publication_datetime', arguments):
            datetime_parameter = 'publication_datetime'
        else:
            datetime_parameter = 'publication_datetime'
            filter_by_datetime = False

        return datetime_parameter, filter_by_datetime

    def build_search_query(self, *args, **kwargs):
        key_filters_without_default = app.config[
            'FEED_ITEM_FILTERS_WITHOUT_DEFAULT_DATETIME']
        arguments = kwargs.get('arguments', {})
        permissions = kwargs.get('permissions', {})
        future_content = permissions.get('future_content', False)
        deleted = arguments.get('deleted', False)
        parsed_arguments = self.parse_query(arguments, permissions, future_content)
        arguments.update(permissions)
        order = kwargs.get('order', 'desc')
        exists_fields = 'exists_fields' in arguments

        datetime_parameter, filter_by_datetime = self.get_datetime_parameter(arguments)
        default_filter = all(k not in arguments for k in key_filters_without_default)

        query = {
            'query': {
                'bool': {
                    'must': {
                        'bool': {
                            'must': [],
                            'must_not': []
                        }
                    },
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    'term': {
                                        'meta.is_deleted': deleted
                                    }
                                }
                            ],
                            "must_not": []
                        }

                    }
                }
            },
            'sort': [{
                datetime_parameter: {
                    'order': order,
                    'unmapped_type': "date",
                }
            }]
        }

        self._add_pagination(query, arguments)
        if exists_fields:
            self._build_exists_fields(query, arguments)

        self._build_query_metadata(query, arguments)
        self._build_query_content(query, parsed_arguments)

        if filter_by_datetime or default_filter:
            self._build_query_datetime(query, parsed_arguments,
                                       datetime_parameter)
            if not future_content and datetime_parameter != 'publication_datetime':
                self._build_query_datetime(query, parsed_arguments,
                                           'publication_datetime', from_range=None)
        else:
            if not future_content:
                self._build_query_datetime(query, parsed_arguments,
                                           'publication_datetime', from_range=None)
        if 'language' in arguments:
            self._build_query_languages(query, parsed_arguments)
        if 'title-abstract' in arguments:
            self._build_query_title_abstract(query, arguments)
        self._build_query_blacklist_source(query, parsed_arguments)
        self._build_query_aggregation(query, **kwargs)
        return query

    def parse_query(self, arguments, permissions, future_content=False):
        """
        Parse raw query arguments coming from a client request and
        return a normalized version of those arguments.

        The method normalizes the arguments according the
        following rules:

        ``publication_datetime__from``, ``publication_datetime__to``
        and ``publication_datetime`` arguments will be processed to
        return only ``publication_datetime__from`` and
        ``publication_datetime__to`` as datetimes strings or the word
        "now". These arguments will be always present in the normalized
        arguments.

        An argument called ``source_blacklist`` will be always returned
        with the ``SOURCE_BLACKLIST`` app setting.

        Some filters are also accepted like ``sources``, ``subjects``,
        ``locations`` and ``persons``, and a variant with word ``__not``
        appended to the argument name (like ``subjects__not``). The
        value of those arguments are the values to be filtered for each
        field, separated by the operator AND or OR, just one operator
        can be used at the same time, so note that this is a pretty limited
        feature, the string will be converted to a dictionary with
        more readable version of the query string. Also note that not all
        the arguments are going to be present in the normalized arguments,
        only those present in the raw arguments. This is an example of how
        we normalize the query:

        Raw:

            {
                'subjects': 'technology OR science OR engineering',
                'locations__not': 'Slovakia',
                'persons': 'Nicolas Dughetti AND Maximiliano Cecilia OR Alvaro Tuso',
                ...
            }

        Normalized:

            {
                'subjects': {
                    'operator': 'or',
                    'values': ['technology', 'science', 'engineering']
                },
                'locations__not': {
                    'operator': 'and'
                    'values': ['Slovakia']
                },
                'persons': {
                    'operator': 'and',
                    'values': ['Nicolas Dughetti', 'Maximiliano Cecilia OR Alvaro Tuso']
                },
                ...
            }

        :arg arguments: dict with the query arguments
        """
        parsed = {}
        if 'content_type' not in arguments:
            arguments['content_type'] = ','.join(app.config['FEED_DEFAULT_CONTENT_TYPES'])
        self._parse_query_content(parsed, arguments)
        self._parse_query_languages(parsed, arguments)
        if permissions:
            parsed_permissions = {}
            self._parse_query_content(parsed_permissions, permissions)
            self._parse_query_languages(parsed_permissions, permissions)
            self._merge_with_permissions(parsed, parsed_permissions)
        self._parse_query_datetime(parsed, arguments, 'publication_datetime',
                                   future_content)
        self._parse_query_datetime(parsed, arguments, 'last_modified_datetime',
                                   future_content)
        self._parse_query_datetime(parsed, arguments, 'meta.last_update',
                                   future_content)
        parsed['source_blacklist'] = app.config['SOURCE_BLACKLIST']

        return parsed

    def _merge_with_permissions(self, parsed, parsed_permissions):
        for k, v in parsed_permissions.items():
            if k not in parsed:
                parsed[k] = v
            else:
                intersection = set(v['values']) & set(parsed[k]['values'])
                parsed[k] = {
                    'operator': (parsed[k]['operator']
                                 if v['operator'] == 'or' and parsed[k]['operator'] == 'and'
                                 else v['operator']),
                    'values': list(intersection) if intersection else v['values']
                }

    def _parse_query_content(self, parsed, arguments):
        feed_item_filters = app.config['FEED_ITEM_FILTERS']
        for arg, value in arguments.items():
            field = arg.replace('__not', '')
            if value is not None and field in feed_item_filters:
                value = value.replace(',', ' OR ')
                operator = ' OR ' if ' OR ' in value else ' AND '
                parsed[arg] = {
                    'values': [v.strip() for v in value.split(operator)],
                    'operator': operator.lower().strip()
                }

    def _parse_query_languages(self, parsed, arguments):
        value = arguments.get('language')
        arg = 'language'
        if value is not None:
            operator = ' OR '
            parsed[arg] = {
                'values': [v.strip() for v in value.split(operator)],
                'operator': operator.lower().strip()
            }

    def _parse_query_datetime(self, parsed, arguments, field, future_content=False):
        from_datetime = arguments.get(field + '__from', None)
        to_datetime = arguments.get(field + '__to', 'now')

        if not from_datetime:
            date_time = arguments.get(field)
            if date_time:
                if 'd' in date_time:
                    time_delta = timedelta(days=int(date_time[:-1]))
                else:
                    time_delta = timedelta(hours=int(date_time[:-1]))
            else:
                time_delta = timedelta(days=self.MAX_DAYS)

            difference = datetime.utcnow() - time_delta
            from_datetime = difference.strftime("%Y-%m-%dT%H:%M:%SZ")
            to_datetime = 'now'

        if not future_content:
            now = string_to_utc_date()
            parsed_from_datetime = string_to_utc_date(from_datetime)
            parsed_to_datetime = string_to_utc_date(to_datetime) if to_datetime != 'now' else now
            if parsed_to_datetime > now or parsed_from_datetime > now:
                abort(400, message='The parameter future_content is invalid')

        parsed[field + '__from'] = from_datetime
        parsed[field + '__to'] = to_datetime

    def _create_match_condition(self, field, value):
        return {
            'match_phrase': {
                field: value
            }
        }

    def _create_multi_match_condition(self, fields, value):
        return {
            'multi_match': {
                'query': value,
                'type': 'phrase',
                'fields': fields
            }
        }

    def _build_query_content(self, query, arguments):
        from ..models.taxonomy import TaxonomyTopic
        get_taxonomy_topics = TaxonomyTopic.manager.get_with_descendants

        def match(field, value):
            elastic_fields = app.config['FEED_ITEM_FILTERS'][field]
            if isinstance(elastic_fields, list):
                return self._create_multi_match_condition(elastic_fields, value)
            return self._create_match_condition(elastic_fields, value)

        for arg in arguments:
            field = arg.replace('__not', '')
            if field in app.config['FEED_ITEM_FILTERS']:
                bool_operator = 'must_not' if '__not' in arg else 'must'
                argvalue = arguments[arg]
                conditions = []

                if field in app.config['TAXONOMY_TOPIC_TYPES']:
                    for v in argvalue['values']:
                        condition = {
                            'bool': {
                                'should': [],
                                'minimum_should_match': 1
                            }
                        }
                        for topic in get_taxonomy_topics([v], False):
                            condition['bool']['should'].append(
                                match(field, topic.key))
                        condition['bool']['should'].append(match(field, v))
                        conditions.append(condition)
                else:
                    for v in argvalue['values']:
                        conditions.append(match(field, v))

                filtered_query = query['query']['bool']['must']
                target_key = filtered_query['bool'][bool_operator]
                if argvalue['operator'] == 'and':
                    target_key.extend(conditions)
                else:
                    target_key.append({
                        'bool': {
                            'should': conditions,
                            'minimum_should_match': 1
                        }
                    })

    def _build_query_languages(self, query, arguments):
        filtered_query = query['query']['bool']['must']
        target_key = filtered_query['bool']['must']
        conditions = []
        for values in arguments['language']['values']:
            condition = {
                'bool': {
                    'should': [self._create_match_condition('language', values)],
                    'minimum_should_match': 1
                }
            }
            conditions.append(condition)

        target_key.append({
            'bool': {
                'should': conditions,
                'minimum_should_match': 1
            }
        })

    def _build_query_title_abstract(self, query, arguments):
        local_query = query['query']['bool']['must']['bool']['must']
        title_abstract = {
            "dis_max": {
                "queries": [{
                    "match": {
                        "title": {
                            "query": arguments['title-abstract'],
                            "operator": "and"
                        }
                    }
                }, {
                    "match": {
                        "abstract": {
                            "query": arguments['title-abstract'],
                            "operator": "and"
                        }
                    }
                }],
                "tie_breaker": 0.3
            }
        }
        local_query.append(title_abstract)

    def _build_query_metadata(self, query, arguments):
        """
        adds a metadata filter
        :param query: the query
        :param arguments: the value to decide if it is needed to filter
        """
        operator = (
            'must' if 'metadata' in arguments and arguments['metadata'] == 'true' else 'must_not')
        condition = {
            'exists': {
                'field': 'meta.is_metadata'
            }}
        query['query']['bool']['filter']['bool'][operator].append(condition)

    @staticmethod
    def _build_query_datetime(query, arguments, field, from_range='gte', to_range='lte'):
        query_range = {
            'range': {
                field: {}
            }
        }

        if from_range:
            query_range['range'][field][from_range] = arguments[field + '__from']
        if to_range:
            query_range['range'][field][to_range] = arguments[field + '__to']

        query['query']['bool']['filter']['bool']['must'].append(query_range)

    def _build_query_blacklist_source(self, query, arguments):
        blacklist = arguments['source_blacklist']

        if blacklist:
            for source in blacklist:
                query['query']['bool']['must']['bool']['must_not'].append(
                    self._create_match_condition('sources', source))

    def _add_pagination(self, query, arguments):
        page = int(arguments.get('page', 1))
        limit = int(arguments.get('limit', self.MAX_ROWS))

        query['from'] = (page - 1) * limit
        query['size'] = limit

        if (query['from'] + query['size']) > self.MAX_RESULT_WINDOW:
            abort(400, message='Pagination Limit Exceeded')

    def _build_exists_fields(self, query, arguments):
        """
        Exclude if:
            This field has no values.
            This field has no non-null values.
            This field is missing completely.
        """
        fields = arguments['exists_fields'].split(',')
        for field in fields:
            exists_query = {
                'exists': {
                    'field': field
                }
            }

            query['query']['bool']['filter']['bool']['must'].append(
                exists_query)

    @staticmethod
    def _build_query_aggregation(query, **kwargs):
        """
        Very simplistic implementation, returning just the counts of
        each group.

        """
        if "group_by" in kwargs:
            field = kwargs["group_by"]
            key = "group_by_{}".format(field)
            query.update(size=0, aggs={key: {"terms": {"field": field}}})

    def purge(self, domain, lower, upper):
        query = {
            'query': {
                'bool': {
                    'must': [
                        {
                            'range': {
                                'publication_datetime': {
                                    'gte': lower,
                                    'lte': upper
                                }
                            }
                        },
                        self._create_match_condition('meta.domain', domain)
                    ]
                }
            }
        }

        actions = []

        for doc in scan(self.backend.client, query, index=self.index,
                        doc_type=self.doc_type):
            actions.append({'_op_type': 'delete',
                            '_index': doc['_index'],
                            '_type': doc['_type'],
                            '_id': doc['_id']})

        msg = 'purge: Purging {} elements for query {}'
        app.logger.debug(msg.format(len(actions), query))
        self.backend.bulk(actions)

    @staticmethod
    def has_date_variable(variable, arguments):
        return variable in arguments or variable + '__from' in arguments
