/*-
 * Copyright (c) 2014-present MongoDB, Inc.
 * Copyright (c) 2008-2014 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

#include "wt_internal.h"

static int __curtable_open_indices(WT_CURSOR_TABLE *ctable);
static int __curtable_update(WT_CURSOR *cursor);

#define APPLY_CG(ctable, f)                                                             \
    do {                                                                                \
        WT_CURSOR **__cp;                                                               \
        u_int __i;                                                                      \
        for (__i = 0, __cp = (ctable)->cg_cursors; __i < WT_COLGROUPS((ctable)->table); \
             __i++, __cp++) {                                                           \
            WT_TRET((*__cp)->f(*__cp));                                                 \
            WT_ERR_NOTFOUND_OK(ret, true);                                              \
        }                                                                               \
    } while (0)

/* Cursor type for custom extractor callback. */
typedef struct {
    WT_CURSOR iface;
    WT_CURSOR_TABLE *ctable;
    WT_CURSOR *idxc;
    int (*f)(WT_CURSOR *);
} WT_CURSOR_EXTRACTOR;

/*
 * __curextract_insert --
 *     Handle a key produced by a custom extractor.
 */
static int
__curextract_insert(WT_CURSOR *cursor)
{
    WT_CURSOR_EXTRACTOR *cextract;
    WT_DECL_RET;
    WT_ITEM ikey, *key, pkey;
    WT_SESSION_IMPL *session;

    CURSOR_API_CALL(cursor, session, ret, insert, NULL);

    cextract = (WT_CURSOR_EXTRACTOR *)cursor;

    WT_ITEM_SET(ikey, cursor->key);
    /*
     * We appended a padding byte to the key to avoid rewriting the last column. Strip that away
     * here.
     */
    WT_ASSERT(session, ikey.size > 0);
    --ikey.size;
    WT_ERR(__wt_cursor_get_raw_key(cextract->ctable->cg_cursors[0], &pkey));

    /*
     * We have the index key in the format we need, and all of the primary key columns are required:
     * just append them.
     */
    key = &cextract->idxc->key;
    WT_ERR(__wt_buf_grow(session, key, ikey.size + pkey.size));
    memcpy((uint8_t *)key->mem, ikey.data, ikey.size);
    memcpy((uint8_t *)key->mem + ikey.size, pkey.data, pkey.size);
    key->size = ikey.size + pkey.size;

    /*
     * The index key is now set and the value is empty (it starts clear and is never set).
     */
    F_SET(cextract->idxc, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);

    /* Call the underlying cursor function to update the index. */
    ret = cextract->f(cextract->idxc);

err:
    API_END_RET_STAT(session, ret, cursor_insert);
}

/*
 * __wt_apply_single_idx --
 *     Apply an operation to a single index of a table.
 */
int
__wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur,
  WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *))
{
    WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
      __wt_cursor_get_value,                          /* get-value */
      __wt_cursor_get_raw_key_value,                  /* get-raw-key-value */
      __wt_cursor_set_key,                            /* set-key */
      __wt_cursor_set_value,                          /* set-value */
      __wti_cursor_compare_notsup,                    /* compare */
      __wti_cursor_equals_notsup,                     /* equals */
      __wt_cursor_notsup,                             /* next */
      __wt_cursor_notsup,                             /* prev */
      __wt_cursor_notsup,                             /* reset */
      __wt_cursor_notsup,                             /* search */
      __wt_cursor_search_near_notsup,                 /* search-near */
      __curextract_insert,                            /* insert */
      __wt_cursor_modify_notsup,                      /* modify */
      __wt_cursor_notsup,                             /* update */
      __wt_cursor_notsup,                             /* remove */
      __wt_cursor_notsup,                             /* reserve */
      __wt_cursor_config_notsup,                      /* reconfigure */
      __wt_cursor_notsup,                             /* largest_key */
      __wt_cursor_config_notsup,                      /* bound */
      __wt_cursor_notsup,                             /* cache */
      __wt_cursor_reopen_notsup,                      /* reopen */
      __wt_cursor_checkpoint_id,                      /* checkpoint ID */
      __wt_cursor_notsup);                            /* close */
    WT_CURSOR_EXTRACTOR extract_cursor;
    WT_DECL_RET;
    WT_ITEM key, value;

    if (idx->extractor) {
        extract_cursor.iface = iface;
        extract_cursor.iface.session = &session->iface;
        extract_cursor.iface.key_format = idx->exkey_format;
        extract_cursor.ctable = ctable;
        extract_cursor.idxc = cur;
        extract_cursor.f = f;

        WT_RET(__wt_cursor_get_raw_key(&ctable->iface, &key));
        WT_RET(__wt_cursor_get_raw_value(&ctable->iface, &value));
        ret = idx->extractor->extract(
          idx->extractor, &session->iface, &key, &value, &extract_cursor.iface);

        __wt_buf_free(session, &extract_cursor.iface.key);
        WT_RET(ret);
    } else {
        WT_RET(__wt_schema_project_merge(
          session, ctable->cg_cursors, idx->key_plan, idx->key_format, &cur->key));
        /*
         * The index key is now set and the value is empty (it starts clear and is never set).
         */
        F_SET(cur, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
        WT_RET(f(cur));
    }
    return (0);
}

/*
 * __apply_idx --
 *     Apply an operation to all indices of a table.
 */
static int
__apply_idx(WT_CURSOR_TABLE *ctable, size_t func_off, bool skip_immutable)
{
    WT_CURSOR **cp;
    WT_INDEX *idx;
    WT_SESSION_IMPL *session;
    u_int i;
    int (*f)(WT_CURSOR *);

    cp = ctable->idx_cursors;
    session = CUR2S(ctable);

    for (i = 0; i < ctable->table->nindices; i++, cp++) {
        idx = ctable->table->indices[i];
        if (skip_immutable && F_ISSET(idx, WT_INDEX_IMMUTABLE))
            continue;

        f = *(int (**)(WT_CURSOR *))((uint8_t *)*cp + func_off);
        WT_RET(__wt_apply_single_idx(session, idx, *cp, ctable, f));
        WT_RET((*cp)->reset(*cp));
    }

    return (0);
}

/*
 * __curtable_get_key --
 *     WT_CURSOR->get_key implementation for tables.
 */
static int
__curtable_get_key(WT_CURSOR *cursor, ...)
{
    WT_CURSOR *primary;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    va_list ap;

    ctable = (WT_CURSOR_TABLE *)cursor;
    primary = *ctable->cg_cursors;

    va_start(ap, cursor);
    ret = __wti_cursor_get_keyv(primary, cursor->flags, ap);
    va_end(ap);

    return (ret);
}

/*
 * __curtable_get_value --
 *     WT_CURSOR->get_value implementation for tables.
 */
static int
__curtable_get_value(WT_CURSOR *cursor, ...)
{
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    va_list ap;

    JOINABLE_CURSOR_API_CALL(cursor, session, ret, get_value, NULL);

    va_start(ap, cursor);
    ret = __wt_curtable_get_valuev(cursor, ap);
    va_end(ap);

err:
    API_END_RET_STAT(session, ret, cursor_get_value);
}

/*
 * __curtable_set_key --
 *     WT_CURSOR->set_key implementation for tables.
 */
static void
__curtable_set_key(WT_CURSOR *cursor, ...)
{
    WT_CURSOR **cp, *primary;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    u_int i;
    va_list ap;

    ctable = (WT_CURSOR_TABLE *)cursor;
    cp = ctable->cg_cursors;
    primary = *cp++;

    va_start(ap, cursor);
    if ((ret = __wti_cursor_set_keyv(primary, cursor->flags, ap)) != 0)
        WT_IGNORE_RET(__wt_panic(CUR2S(cursor), ret, "failed to set key"));
    va_end(ap);

    if (!F_ISSET(primary, WT_CURSTD_KEY_SET))
        return;

    /* Copy the primary key to the other cursors. */
    for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
        (*cp)->recno = primary->recno;
        (*cp)->key.data = primary->key.data;
        (*cp)->key.size = primary->key.size;
        F_SET(*cp, WT_CURSTD_KEY_EXT);
    }
}

/*
 * __curtable_set_valuev --
 *     WT_CURSOR->set_value implementation for tables.
 */
static int
__curtable_set_valuev(WT_CURSOR *cursor, va_list ap)
{
    WT_CURSOR **cp;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_ITEM *item, *tmp;
    WT_SESSION_IMPL *session;
    u_int i;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, set_value, NULL);

    if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON)) {
        item = va_arg(ap, WT_ITEM *);
        cursor->value.data = item->data;
        cursor->value.size = item->size;
        ret = __wt_schema_project_slice(
          session, ctable->cg_cursors, ctable->plan, 0, cursor->value_format, &cursor->value);
    } else {
        /*
         * The user may be passing us pointers returned by get_value that point into the buffers we
         * are about to update. Move them aside first.
         */
        for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++) {
            item = &(*cp)->value;
            if (F_ISSET(*cp, WT_CURSTD_VALUE_SET) && WT_DATA_IN_ITEM(item)) {
                ctable->cg_valcopy[i] = *item;
                item->mem = NULL;
                item->memsize = 0;
            }
        }

        ret = __wt_schema_project_in(session, ctable->cg_cursors, ctable->plan, ap);

        for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++) {
            tmp = &ctable->cg_valcopy[i];
            if (tmp->mem != NULL) {
                item = &(*cp)->value;
                if (item->mem == NULL) {
                    item->mem = tmp->mem;
                    item->memsize = tmp->memsize;
                } else
                    __wt_free(session, tmp->mem);
            }
        }
    }

    for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++)
        if (ret == 0)
            F_SET(*cp, WT_CURSTD_VALUE_EXT);
        else {
            (*cp)->saved_err = ret;
            F_CLR(*cp, WT_CURSTD_VALUE_SET);
        }

err:
    API_END_RET(session, ret);
}

/*
 * __curtable_set_value --
 *     WT_CURSOR->set_value implementation for tables.
 */
static void
__curtable_set_value(WT_CURSOR *cursor, ...)
{
    va_list ap;

    va_start(ap, cursor);
    WT_IGNORE_RET(__curtable_set_valuev(cursor, ap));
    va_end(ap);
}

/*
 * __curtable_compare --
 *     WT_CURSOR->compare implementation for tables.
 */
static int
__curtable_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
    WT_DECL_RET;
    WT_SESSION_IMPL *session;

    JOINABLE_CURSOR_API_CALL(a, session, ret, compare, NULL);

    /*
     * Confirm both cursors refer to the same source and have keys, then call the underlying
     * object's comparison routine.
     */
    if (strcmp(a->internal_uri, b->internal_uri) != 0)
        WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object");
    WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(a)));
    WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(b)));

    ret = WT_CURSOR_PRIMARY(a)->compare(WT_CURSOR_PRIMARY(a), WT_CURSOR_PRIMARY(b), cmpp);

err:
    API_END_RET(session, ret);
}

/*
 * __curtable_next --
 *     WT_CURSOR->next method for the table cursor type.
 */
static int
__curtable_next(WT_CURSOR *cursor)
{
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, next, NULL);
    API_RETRYABLE(session);
    CURSOR_REPOSITION_ENTER(cursor, session);
    APPLY_CG(ctable, next);

err:
    CURSOR_REPOSITION_END(cursor, session);
    API_RETRYABLE_END(session, ret);
    API_END_RET(session, ret);
}

/*
 * __curtable_next_random --
 *     WT_CURSOR->next method for the table cursor type when configured with next_random.
 */
static int
__curtable_next_random(WT_CURSOR *cursor)
{
    WT_CURSOR **cp, *primary;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    u_int i;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, next, NULL);
    cp = ctable->cg_cursors;

    /* Split out the first next, it retrieves the random record. */
    primary = *cp++;
    WT_ERR(primary->next(primary));

    /* Fill in the rest of the columns. */
    for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
        (*cp)->key.data = primary->key.data;
        (*cp)->key.size = primary->key.size;
        (*cp)->recno = primary->recno;
        F_SET(*cp, WT_CURSTD_KEY_EXT);
        WT_ERR((*cp)->search(*cp));
    }

err:
    API_END_RET(session, ret);
}

/*
 * __curtable_prev --
 *     WT_CURSOR->prev method for the table cursor type.
 */
static int
__curtable_prev(WT_CURSOR *cursor)
{
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, prev, NULL);
    API_RETRYABLE(session);
    CURSOR_REPOSITION_ENTER(cursor, session);
    APPLY_CG(ctable, prev);

err:
    CURSOR_REPOSITION_END(cursor, session);
    API_RETRYABLE_END(session, ret);
    API_END_RET(session, ret);
}

/*
 * __curtable_reset --
 *     WT_CURSOR->reset method for the table cursor type.
 */
static int
__curtable_reset(WT_CURSOR *cursor)
{
    WT_CURSOR **cp;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    u_int i;

    ctable = (WT_CURSOR_TABLE *)cursor;

    JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);

    APPLY_CG(ctable, reset);

    /*
     * The bounded cursor API clears bounds on external calls to cursor->reset. We determine this by
     * guarding the call to cursor bound reset with the API_USER_ENTRY macro. Doing so prevents
     * internal API calls from resetting cursor bounds unintentionally, e.g. cursor->remove. In the
     * case of the table cursor we walk each cursor and directly reset the bounds on them without
     * going through curfile_reset for that reason.
     */
    if (API_USER_ENTRY(session))
        for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++)
            __wt_cursor_bound_reset(*cp);

err:
    API_END_RET(session, ret);
}

/*
 * __curtable_search --
 *     WT_CURSOR->search method for the table cursor type.
 */
static int
__curtable_search(WT_CURSOR *cursor)
{
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, search, NULL);
    API_RETRYABLE(session);
    CURSOR_REPOSITION_ENTER(cursor, session);
    APPLY_CG(ctable, search);

err:
    CURSOR_REPOSITION_END(cursor, session);
    API_RETRYABLE_END(session, ret);
    API_END_RET(session, ret);
}

/*
 * __curtable_search_near --
 *     WT_CURSOR->search_near method for the table cursor type.
 */
static int
__curtable_search_near(WT_CURSOR *cursor, int *exact)
{
    WT_CURSOR **cp, *primary;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    u_int i;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, search_near, NULL);
    API_RETRYABLE(session);
    CURSOR_REPOSITION_ENTER(cursor, session);

    cp = ctable->cg_cursors;
    primary = *cp;
    WT_ERR(primary->search_near(primary, exact));

    for (i = 1, ++cp; i < WT_COLGROUPS(ctable->table); i++) {
        (*cp)->key.data = primary->key.data;
        (*cp)->key.size = primary->key.size;
        (*cp)->recno = primary->recno;
        F_SET(*cp, WT_CURSTD_KEY_EXT);
        WT_ERR((*cp)->search(*cp));
    }

err:
    CURSOR_REPOSITION_END(cursor, session);
    API_RETRYABLE_END(session, ret);
    API_END_RET(session, ret);
}

/*
 * __curtable_insert --
 *     WT_CURSOR->insert method for the table cursor type.
 */
static int
__curtable_insert(WT_CURSOR *cursor)
{
    WT_CURSOR **cp, *primary;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    uint64_t flag_orig;
    u_int i;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, ret, insert);
    WT_ERR(__curtable_open_indices(ctable));

    cp = ctable->cg_cursors;
    primary = *cp++;

    /*
     * Split out the first insert, it may be allocating a recno.
     *
     * If the table has indices, we also need to know whether this record is replacing an existing
     * record so that the existing index entries can be removed. We discover if this is an overwrite
     * by configuring the primary cursor for no-overwrite, and checking if the insert detects a
     * duplicate key. By default, when insert finds a duplicate, it returns the value it found. We
     * don't want that value to overwrite our own, override that behavior.
     */
    flag_orig = F_MASK(primary, WT_CURSTD_OVERWRITE);
    if (ctable->table->nindices > 0) {
        F_CLR(primary, WT_CURSTD_OVERWRITE);
        F_SET(primary, WT_CURSTD_DUP_NO_VALUE);
    }
    ret = primary->insert(primary);

    /*
     * WT_CURSOR.insert clears the set internally/externally flags but doesn't touch the items. We
     * could make a copy each time for overwrite cursors, but for now we just reset the flags.
     */
    F_SET(primary, flag_orig | WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
    F_CLR(primary, WT_CURSTD_DUP_NO_VALUE);

    if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
        WT_ERR(__curtable_update(cursor));

        /*
         * The cursor is no longer positioned. This isn't just cosmetic, without a reset, iteration
         * on this cursor won't start at the beginning/end of the table.
         */
        APPLY_CG(ctable, reset);
    } else {
        WT_ERR(ret);

        for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
            (*cp)->recno = primary->recno;
            WT_ERR((*cp)->insert(*cp));
        }

        WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, insert), false));
    }

    /*
     * Insert is the one cursor operation that doesn't end with the cursor pointing to an on-page
     * item (except for column-store appends, where we are returning a key). That is, the
     * application's cursor continues to reference the application's memory after a successful
     * cursor call, which isn't true anywhere else. We don't want to have to explain that scoping
     * corner case, so we reset the application's cursor so it can free the referenced memory and
     * continue on without risking subsequent core dumps.
     */
    F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
    if (F_ISSET(primary, WT_CURSTD_APPEND))
        F_SET(primary, WT_CURSTD_KEY_EXT);

err:
    CURSOR_UPDATE_API_END(session, ret);
    return (ret);
}

/*
 * __curtable_update --
 *     WT_CURSOR->update method for the table cursor type.
 */
static int
__curtable_update(WT_CURSOR *cursor)
{
    WT_CURSOR_TABLE *ctable;
    WT_DECL_ITEM(value_copy);
    WT_DECL_RET;
    WT_SESSION_IMPL *session;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, ret, update);
    WT_ERR(__curtable_open_indices(ctable));

    /*
     * If the table has indices, first delete any old index keys, then update the primary, then
     * insert the new index keys. This is complicated by the fact that we need the old value to
     * generate the old index keys, so we make a temporary copy of the new value.
     */
    if (ctable->table->nindices > 0) {
        WT_ERR(__wt_scr_alloc(session, ctable->cg_cursors[0]->value.size, &value_copy));
        WT_ERR(__wt_schema_project_merge(
          session, ctable->cg_cursors, ctable->plan, cursor->value_format, value_copy));
        APPLY_CG(ctable, search);

        /*
         * Remove if search found a key. The search key existing doesn't mean the value wasn't
         * previously removed, anticipate a "does not exist" not-found error from the remove.
         */
        if (ret == 0) {
            WT_ERR_NOTFOUND_OK(__apply_idx(ctable, offsetof(WT_CURSOR, remove), true), false);
            WT_ERR(__wt_schema_project_slice(
              session, ctable->cg_cursors, ctable->plan, 0, cursor->value_format, value_copy));
        } else
            WT_ERR_NOTFOUND_OK(ret, false);
    }

    APPLY_CG(ctable, update);
    WT_ERR(ret);

    if (ctable->table->nindices > 0)
        WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, insert), true));

err:
    CURSOR_UPDATE_API_END(session, ret);
    __wt_scr_free(session, &value_copy);
    return (ret);
}

/*
 * __curtable_remove --
 *     WT_CURSOR->remove method for the table cursor type.
 */
static int
__curtable_remove(WT_CURSOR *cursor)
{
    WT_CURSOR *primary;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    bool positioned;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_REMOVE_API_CALL(cursor, session, ret, NULL);
    WT_ERR(__curtable_open_indices(ctable));

    /* Check if the cursor was positioned. */
    primary = *ctable->cg_cursors;
    positioned = F_ISSET(primary, WT_CURSTD_KEY_INT);

    /* Find the old record so it can be removed from indices */
    if (ctable->table->nindices > 0) {
        APPLY_CG(ctable, search);
        WT_ERR(ret);
        WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, remove), false));
    }

    APPLY_CG(ctable, remove);
    WT_ERR(ret);

    /*
     * If the cursor was positioned, it stays positioned with a key but has no value, otherwise,
     * there's no position, key or value. This isn't just cosmetic, without a reset, iteration on
     * this cursor won't start at the beginning/end of the table.
     */
    F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
    if (positioned)
        F_SET(primary, WT_CURSTD_KEY_INT);
    else
        APPLY_CG(ctable, reset);

err:
    CURSOR_UPDATE_API_END(session, ret);
    return (ret);
}

/*
 * __curtable_reserve --
 *     WT_CURSOR->reserve method for the table cursor type.
 */
static int
__curtable_reserve(WT_CURSOR *cursor)
{
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, ret, reserve);

    /*
     * We don't have to open the indices here, but it makes the code similar to other cursor
     * functions, and it's odd for a reserve call to succeed but the subsequent update fail opening
     * indices.
     *
     * Check for a transaction before index open, opening the indices will start a transaction if
     * one isn't running.
     */
    WT_ERR(__wt_txn_context_check(session, true));
    WT_ERR(__curtable_open_indices(ctable));

    /* Reserve in column groups, ignore indices. */
    APPLY_CG(ctable, reserve);

err:
    CURSOR_UPDATE_API_END(session, ret);

    /*
     * The application might do a WT_CURSOR.get_value call when we return, so we need a value and
     * the underlying functions didn't set one up. For various reasons, those functions may not have
     * done a search and any previous value in the cursor might race with WT_CURSOR.reserve (and in
     * cases like LSM, the reserve never encountered the original key). For simplicity, repeat the
     * search here.
     */
    return (ret == 0 ? cursor->search(cursor) : ret);
}

/*
 * __wt_table_range_truncate --
 *     Truncate of a cursor range, table implementation.
 */
int
__wt_table_range_truncate(WT_TRUNCATE_INFO *trunc_info)
{
    WT_CURSOR *wt_start, *wt_stop;
    WT_CURSOR_TABLE *ctable, *start, *stop;
    WT_DECL_ITEM(key);
    WT_DECL_RET;
    WT_ITEM raw;
    WT_SESSION_IMPL *session;
    u_int i;
    int cmp;

    start = (WT_CURSOR_TABLE *)trunc_info->start;
    stop = (WT_CURSOR_TABLE *)trunc_info->stop;
    ctable = (start != NULL) ? start : stop;
    session = CUR2S(ctable);
    wt_start = start == NULL ? NULL : &start->iface;
    wt_stop = stop == NULL ? NULL : &stop->iface;

    /* Open any indices. */
    WT_RET(__curtable_open_indices(ctable));
    WT_RET(__wt_scr_alloc(session, 128, &key));
    WT_STAT_DSRC_INCR(session, cursor_truncate);

    /*
     * Step through the cursor range, removing the index entries.
     *
     * If there are indices, copy the key we're using to step through the cursor range (so we can
     * reset the cursor to its original position), then remove all of the index records in the
     * truncated range. Copy the raw key because the memory is only valid until the cursor moves.
     */
    if (ctable->table->nindices > 0) {
        if (start == NULL) {
            WT_ERR(__wt_cursor_get_raw_key(wt_stop, &raw));
            WT_ERR(__wt_buf_set(session, key, raw.data, raw.size));

            do {
                APPLY_CG(stop, search);
                WT_ERR(ret);
                WT_ERR(__apply_idx(stop, offsetof(WT_CURSOR, remove), false));
            } while ((ret = wt_stop->prev(wt_stop)) == 0);
            WT_ERR_NOTFOUND_OK(ret, false);

            __wt_cursor_set_raw_key(wt_stop, key);
            APPLY_CG(stop, search);
        } else {
            WT_ERR(__wt_cursor_get_raw_key(wt_start, &raw));
            WT_ERR(__wt_buf_set(session, key, raw.data, raw.size));

            cmp = -1;
            do {
                APPLY_CG(start, search);
                WT_ERR(ret);
                WT_ERR(__apply_idx(start, offsetof(WT_CURSOR, remove), false));
                if (stop != NULL)
                    WT_ERR(wt_start->compare(wt_start, wt_stop, &cmp));
            } while (cmp < 0 && (ret = wt_start->next(wt_start)) == 0);
            WT_ERR_NOTFOUND_OK(ret, false);

            __wt_cursor_set_raw_key(wt_start, key);
            APPLY_CG(start, search);
        }
    }

    /* Truncate the column groups. */
    for (i = 0; i < WT_COLGROUPS(ctable->table); i++)
        WT_ERR(__wt_range_truncate((start == NULL) ? NULL : start->cg_cursors[i],
          (stop == NULL) ? NULL : stop->cg_cursors[i]));

err:
    __wt_scr_free(session, &key);
    return (ret);
}

/*
 * __curtable_largest_key --
 *     WT_CURSOR->largest_key method for the table cursor type.
 */
static int
__curtable_largest_key(WT_CURSOR *cursor)
{
    WT_CURSOR *primary;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, largest_key, NULL);

    WT_ERR(cursor->reset(cursor));
    primary = *ctable->cg_cursors;
    WT_ERR(primary->largest_key(primary));

err:
    if (ret != 0)
        WT_TRET(cursor->reset(cursor));
    API_END_RET(session, ret);
}

/*
 * __curtable_bound --
 *     WT_CURSOR->bound method for the table cursor type.
 *
 */
static int
__curtable_bound(WT_CURSOR *cursor, const char *config)
{
    WT_CURSOR **cp, *primary;
    WT_CURSOR_BOUNDS_STATE saved_bounds;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    u_int i;

    WT_CLEAR(saved_bounds);
    ctable = (WT_CURSOR_TABLE *)cursor;
    primary = *ctable->cg_cursors;
    JOINABLE_CURSOR_API_CALL(cursor, session, ret, bound, NULL);

    /* Save the current state of the bounds in case we fail to apply the new state. */
    WT_ERR(__wt_cursor_bounds_save(session, primary, &saved_bounds));

    /* Call bound function on all column groups. */
    for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++)
        WT_ERR((*cp)->bound(*cp, config));
err:
    /* If applying bounds fails on one colgroup cursor, restore the previous state. */
    if (ret != 0)
        for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++)
            WT_TRET(__wt_cursor_bounds_restore(session, *cp, &saved_bounds));

    __wt_scr_free(session, &saved_bounds.lower_bound);
    __wt_scr_free(session, &saved_bounds.upper_bound);

    API_END_RET(session, ret);
}

/*
 * __curtable_close --
 *     WT_CURSOR->close method for the table cursor type.
 */
static int
__curtable_close(WT_CURSOR *cursor)
{
    WT_CURSOR **cp;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    u_int i;

    ctable = (WT_CURSOR_TABLE *)cursor;
    JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:

    if (ctable->cg_cursors != NULL)
        for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++)
            if (*cp != NULL) {
                WT_TRET((*cp)->close(*cp));
                *cp = NULL;
            }

    if (ctable->idx_cursors != NULL)
        for (i = 0, cp = ctable->idx_cursors; i < ctable->table->nindices; i++, cp++)
            if (*cp != NULL) {
                WT_TRET((*cp)->close(*cp));
                *cp = NULL;
            }

    if (ctable->plan != ctable->table->plan)
        __wt_free(session, ctable->plan);
    if (ctable->cfg != NULL) {
        for (i = 0; ctable->cfg[i] != NULL; ++i)
            __wt_free(session, ctable->cfg[i]);
        __wt_free(session, ctable->cfg);
    }
    if (cursor->value_format != ctable->table->value_format)
        __wt_free(session, cursor->value_format);
    __wt_free(session, ctable->cg_cursors);
    __wt_free(session, ctable->cg_valcopy);
    __wt_free(session, ctable->idx_cursors);

    WT_TRET(__wt_schema_release_table(session, &ctable->table));
    /* The URI is owned by the table. */
    cursor->internal_uri = NULL;
    __wt_cursor_close(cursor);

    API_END_RET(session, ret);
}

/*
 * __curtable_complete --
 *     Return failure if the table is not yet fully created.
 */
static int
__curtable_complete(WT_SESSION_IMPL *session, WT_TABLE *table)
{
    bool complete;

    if (table->cg_complete)
        return (0);

    /* If the table is incomplete, wait on the table lock and recheck. */
    WT_WITH_TABLE_READ_LOCK(session, complete = table->cg_complete);
    if (!complete)
        WT_RET_MSG(session, EINVAL, "'%s' not available until all column groups are created",
          table->iface.name);
    return (0);
}

/*
 * __curtable_open_colgroups --
 *     Open cursors on column groups for a table cursor.
 */
static int
__curtable_open_colgroups(WT_CURSOR_TABLE *ctable, const char *cfg_arg[])
{
    WT_CURSOR **cp;
    WT_SESSION_IMPL *session;
    WT_TABLE *table;
    /*
     * Underlying column groups are always opened without dump or readonly, and only the primary is
     * opened with next_random.
     */
    const char *cfg[] = {cfg_arg[0], cfg_arg[1], "dump=\"\",readonly=0", NULL, NULL};
    u_int i;

    session = CUR2S(ctable);
    table = ctable->table;

    WT_RET(__curtable_complete(session, table)); /* completeness check */

    WT_RET(__wt_calloc_def(session, WT_COLGROUPS(table), &ctable->cg_cursors));
    WT_RET(__wt_calloc_def(session, WT_COLGROUPS(table), &ctable->cg_valcopy));

    for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(table); i++, cp++) {
        WT_RET(__wt_open_cursor(session, table->cgroups[i]->source, &ctable->iface, cfg, cp));
        cfg[3] = "next_random=false";
    }
    return (0);
}

/*
 * __curtable_open_indices --
 *     Open cursors on indices for a table cursor.
 */
static int
__curtable_open_indices(WT_CURSOR_TABLE *ctable)
{
    WT_CURSOR **cp, *primary;
    WT_DECL_RET;
    WT_SESSION_IMPL *session;
    WT_TABLE *table;
    u_int i;

    session = CUR2S(ctable);
    table = ctable->table;

    WT_RET(__wt_schema_open_indices(session, table));
    if (table->nindices == 0 || ctable->idx_cursors != NULL)
        return (0);

    /* Check for bulk cursors. */
    primary = *ctable->cg_cursors;
    if (F_ISSET(primary, WT_CURSTD_BULK))
        WT_RET_MSG(session, ENOTSUP, "Bulk load is not supported for tables with indices");

    WT_RET(__wt_calloc_def(session, table->nindices, &ctable->idx_cursors));
    for (i = 0, cp = ctable->idx_cursors; i < table->nindices; i++, cp++)
        WT_ERR(
          __wt_open_cursor(session, table->indices[i]->source, &ctable->iface, ctable->cfg, cp));

    if (0) {
err:
        /*
         * On failure, we can't leave a subset of the indices open, since the table cursor is
         * already open and will remain open after this call. It's all or nothing, so we need to
         * close them all, and leave things as they were before the first cursor operation.
         *
         * The column group open code does not need to do this. Unlike indices, column groups are
         * opened when the table cursor is opened, and a failure there cannot result in an open
         * table cursor.
         */
        for (i = 0, cp = ctable->idx_cursors; i < table->nindices; i++, cp++)
            if (*cp != NULL) {
                WT_TRET((*cp)->close(*cp));
                *cp = NULL;
            }
        __wt_free(session, ctable->idx_cursors);
    }
    return (ret);
}

/*
 * __wt_curtable_open --
 *     WT_SESSION->open_cursor method for table cursors.
 */
int
__wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
  WT_CURSOR **cursorp)
{
    WT_CURSOR_STATIC_INIT(iface, __curtable_get_key, /* get-key */
      __curtable_get_value,                          /* get-value */
      __wti_cursor_get_raw_key_value_notsup,         /* get-raw-key-value */
      __curtable_set_key,                            /* set-key */
      __curtable_set_value,                          /* set-value */
      __curtable_compare,                            /* compare */
      __wt_cursor_equals,                            /* equals */
      __curtable_next,                               /* next */
      __curtable_prev,                               /* prev */
      __curtable_reset,                              /* reset */
      __curtable_search,                             /* search */
      __curtable_search_near,                        /* search-near */
      __curtable_insert,                             /* insert */
      __wt_cursor_modify_notsup,                     /* modify */
      __curtable_update,                             /* update */
      __curtable_remove,                             /* remove */
      __curtable_reserve,                            /* reserve */
      __wt_cursor_reconfigure,                       /* reconfigure */
      __curtable_largest_key,                        /* largest_key */
      __curtable_bound,                              /* bound */
      __wt_cursor_notsup,                            /* cache */
      __wt_cursor_reopen_notsup,                     /* reopen */
      __wt_cursor_checkpoint_id,                     /* checkpoint ID */
      __curtable_close);                             /* close */
    WT_CONFIG_ITEM cval;
    WT_CURSOR *cursor;
    WT_CURSOR_TABLE *ctable;
    WT_DECL_ITEM(tmp);
    WT_DECL_RET;
    WT_TABLE *table;
    size_t size;
    int cfg_cnt;
    const char *columns, *tablename;

    WT_VERIFY_OPAQUE_POINTER(WT_CURSOR_TABLE);

    tablename = uri;
    WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
    columns = strchr(tablename, '(');
    if (columns == NULL)
        WT_RET(__wt_schema_get_table_uri(session, uri, false, 0, &table));
    else {
        size = WT_PTRDIFF(columns, tablename);
        WT_RET(__wt_schema_get_table(session, tablename, size, false, 0, &table));
    }

    WT_RET(__curtable_complete(session, table)); /* completeness check */

    if (table->is_simple) {
        /* Just return a cursor on the underlying data source. */
        ret = __wt_open_cursor(session, table->cgroups[0]->source, NULL, cfg, cursorp);

        WT_TRET(__wt_schema_release_table(session, &table));
        if (ret == 0) {
            /* Fix up the public URI to match what was passed in. */
            cursor = *cursorp;
            __wt_free(session, cursor->uri);
            WT_TRET(__wt_strdup(session, uri, &cursor->uri));
        }
        return (ret);
    }

    WT_RET(__wt_calloc_one(session, &ctable));
    cursor = (WT_CURSOR *)ctable;
    *cursor = iface;
    cursor->session = (WT_SESSION *)session;
    cursor->internal_uri = table->iface.name;
    cursor->key_format = table->key_format;
    cursor->value_format = table->value_format;

    ctable->table = table;
    ctable->plan = table->plan;

    /* Handle projections. */
    WT_ERR(__wt_scr_alloc(session, 0, &tmp));
    if (columns != NULL) {
        WT_ERR(__wt_struct_reformat(session, table, columns, strlen(columns), NULL, false, tmp));
        WT_ERR(__wt_strndup(session, tmp->data, tmp->size, &cursor->value_format));

        WT_ERR(__wt_buf_init(session, tmp, 0));
        WT_ERR(__wt_struct_plan(session, table, columns, strlen(columns), false, tmp));
        WT_ERR(__wt_strndup(session, tmp->data, tmp->size, &ctable->plan));
    }

    /*
     * random_retrieval Random retrieval cursors only support next, reset and close.
     */
    WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
    if (cval.val != 0) {
        __wt_cursor_set_notsup(cursor);
        cursor->next = __curtable_next_random;
        cursor->reset = __curtable_reset;
    }

    WT_ERR(__wt_cursor_init(cursor, cursor->internal_uri, owner, cfg, cursorp));

    if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
        WT_ERR(__wt_json_column_init(cursor, uri, table->key_format, NULL, &table->colconf));

    /*
     * Open the colgroup cursors immediately: we're going to need them for
     * any operation.  We defer opening index cursors until we need them
     * for an update.  Note that this must come after the call to
     * __wt_cursor_init: the table cursor must already be on the list of
     * session cursors or we can't work out where to put the colgroup
     * cursor(s).
     */
    WT_ERR(__curtable_open_colgroups(ctable, cfg));

    /*
     * We'll need to squirrel away a copy of the cursor configuration for if/when we open indices.
     *
     * cfg[0] is the baseline configuration for the cursor open and we can acquire another copy from
     * the configuration structures, so it would be reasonable not to copy it here: but I'd rather
     * be safe than sorry.
     *
     * cfg[1] is the application configuration.
     *
     * Underlying indices are always opened without dump or readonly; that information is appended
     * to cfg[1] so later "fast" configuration calls (checking only cfg[0] and cfg[1]) work. I don't
     * expect to see more than two configuration strings here, but it's written to compact into two
     * configuration strings, a copy of cfg[0] and the rest in cfg[1].
     */
    WT_ERR(__wt_calloc_def(session, 3, &ctable->cfg));
    WT_ERR(__wt_strdup(session, cfg[0], &ctable->cfg[0]));
    WT_ERR(__wt_buf_set(session, tmp, "", 0));
    for (cfg_cnt = 1; cfg[cfg_cnt] != NULL; ++cfg_cnt)
        WT_ERR(__wt_buf_catfmt(session, tmp, "%s,", cfg[cfg_cnt]));
    WT_ERR(__wt_buf_catfmt(session, tmp, "dump=\"\",readonly=0"));
    WT_ERR(__wt_strdup(session, tmp->data, &ctable->cfg[1]));

    if (0) {
err:
        if (*cursorp != NULL) {
            /*
             * When a dump cursor is opened, then *cursorp, not cursor, is the dump cursor. Close
             * the dump cursor, and the table cursor will be closed as its child.
             */
            cursor = *cursorp;
            *cursorp = NULL;
        }
        WT_TRET(cursor->close(cursor));
    }

    __wt_scr_free(session, &tmp);
    return (ret);
}
