/*-
 * Copyright (c) 2014-present MongoDB, Inc.
 * Copyright (c) 2008-2014 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

#include "wt_internal.h"

/*
 * __rec_dictionary_skip_search --
 *     Search a dictionary skiplist.
 */
static WT_REC_DICTIONARY *
__rec_dictionary_skip_search(WT_REC_DICTIONARY **head, uint64_t hash)
{
    WT_REC_DICTIONARY **e;
    int i;

    /*
     * Start at the highest skip level, then go as far as possible at each level before stepping
     * down to the next.
     */
    for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
        if (*e == NULL) { /* Empty levels */
            --i;
            --e;
            continue;
        }

        /*
         * Return any exact matches: we don't care in what search level we found a match.
         */
        if ((*e)->hash == hash) /* Exact match */
            return (*e);
        if ((*e)->hash > hash) { /* Drop down a level */
            --i;
            --e;
        } else /* Keep going at this level */
            e = &(*e)->next[i];
    }
    return (NULL);
}

/*
 * __rec_dictionary_skip_search_stack --
 *     Search a dictionary skiplist, returning an insert/remove stack.
 */
static void
__rec_dictionary_skip_search_stack(
  WT_REC_DICTIONARY **head, WT_REC_DICTIONARY ***stack, uint64_t hash)
{
    WT_REC_DICTIONARY **e;
    int i;

    /*
     * Start at the highest skip level, then go as far as possible at each level before stepping
     * down to the next.
     */
    for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;)
        if (*e == NULL || (*e)->hash > hash)
            stack[i--] = e--; /* Drop down a level */
        else
            e = &(*e)->next[i]; /* Keep going at this level */
}

/*
 * __rec_dictionary_skip_insert --
 *     Insert an entry into the dictionary skip-list.
 */
static void
__rec_dictionary_skip_insert(WT_REC_DICTIONARY **head, WT_REC_DICTIONARY *e, uint64_t hash)
{
    WT_REC_DICTIONARY **stack[WT_SKIP_MAXDEPTH];
    u_int i;

    /* Insert the new entry into the skiplist. */
    __rec_dictionary_skip_search_stack(head, stack, hash);
    for (i = 0; i < e->depth; ++i) {
        e->next[i] = *stack[i];
        *stack[i] = e;
    }
}

/*
 * __wti_rec_dictionary_init --
 *     Allocate and initialize the dictionary.
 */
int
__wti_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots)
{
    u_int depth, i;

    /* Free any previous dictionary. */
    __wti_rec_dictionary_free(session, r);

    r->dictionary_slots = slots;
    WT_RET(__wt_calloc(session, r->dictionary_slots, sizeof(WT_REC_DICTIONARY *), &r->dictionary));
    for (i = 0; i < r->dictionary_slots; ++i) {
        depth = __wt_skip_choose_depth(session);
        WT_RET(__wt_calloc(session, 1,
          sizeof(WT_REC_DICTIONARY) + depth * sizeof(WT_REC_DICTIONARY *), &r->dictionary[i]));
        r->dictionary[i]->depth = depth;
    }
    return (0);
}

/*
 * __wti_rec_dictionary_free --
 *     Free the dictionary.
 */
void
__wti_rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
    u_int i;

    if (r->dictionary == NULL)
        return;

    /*
     * We don't correct dictionary_slots when we fail during allocation, but that's OK, the value is
     * either NULL or a memory reference to be free'd.
     */
    for (i = 0; i < r->dictionary_slots; ++i)
        __wt_free(session, r->dictionary[i]);
    __wt_free(session, r->dictionary);
}

/*
 * __wti_rec_dictionary_reset --
 *     Reset the dictionary when reconciliation restarts and when crossing a page boundary (a
 *     potential split).
 */
void
__wti_rec_dictionary_reset(WT_RECONCILE *r)
{
    if (r->dictionary_slots) {
        r->dictionary_next = 0;
        memset(r->dictionary_head, 0, sizeof(r->dictionary_head));
    }
}

/*
 * __wti_rec_dictionary_lookup --
 *     Check the dictionary for a matching value on this page.
 */
int
__wti_rec_dictionary_lookup(
  WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp)
{
    WT_REC_DICTIONARY *dp, *next;
    uint64_t hash;
    bool match;

    *dpp = NULL;

    /* Search the dictionary, and return any match we find. */
    hash = __wt_hash_city64(val->buf.data, val->buf.size);
    for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
         dp != NULL && dp->hash == hash; dp = dp->next[0]) {
        WT_RET(
          __wt_cell_pack_value_match((WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset),
            &val->cell, val->buf.data, &match));
        if (match) {
            WT_STAT_DSRC_INCR(session, rec_dictionary);
            *dpp = dp;
            return (0);
        }
    }

    /*
     * We're not doing value replacement in the dictionary. We stop adding new entries if we run out
     * of empty dictionary slots (but continue to use the existing entries). I can't think of any
     * reason a leaf page value is more likely to be seen because it was seen more recently than
     * some other value: if we find working sets where that's not the case, it shouldn't be too
     * difficult to maintain a pointer which is the next dictionary slot to re-use.
     */
    if (r->dictionary_next >= r->dictionary_slots)
        return (0);

    /*
     * Set the hash value, we'll add this entry into the dictionary when we write it into the page's
     * disk image buffer (because that's when we know where on the page it will be written).
     */
    next = r->dictionary[r->dictionary_next++];
    next->offset = 0; /* Not necessary, just cautious. */
    next->hash = hash;
    __rec_dictionary_skip_insert(r->dictionary_head, next, hash);
    *dpp = next;
    return (0);
}
