/*-
 * Copyright (c) 2014-present MongoDB, Inc.
 * Copyright (c) 2008-2014 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

#pragma once

/*
 * Variable-length integer encoding.
 * We need up to 64 bits, signed and unsigned.  Further, we want the packed
 * representation to have the same lexicographic ordering as the integer
 * values.  This avoids the need for special-purpose comparison code.
 *
 * Try hard to keep small values small (up to ~2 bytes): that gives the biggest
 * benefit for common cases storing small values.  After that, just encode the
 * length in the first byte: we could squeeze in a couple of extra bits, but
 * the marginal benefit is small, and we want this code to be relatively
 * easy to implement in client code or scripting APIs.
 *
 * First byte  | Next |                        |
 * byte        | bytes| Min Value              | Max Value
 * ------------+------+------------------------+--------------------------------
 * [00 00xxxx] | free | N/A                    | N/A
 * [00 01llll] | llll | -2^64                  | -2^13 - 2^6
 * [00 1xxxxx] | 1    | -2^13 - 2^6            | -2^6 - 1
 * [01 xxxxxx] | 0    | -2^6                   | -1
 * [10 xxxxxx] | 0    | 0                      | 2^6 - 1
 * [11 0xxxxx] | 1    | 2^6                    | 2^13 + 2^6 - 1
 * [11 10llll] | llll | 2^13 + 2^6             | 2^64 - 1
 * [11 11xxxx] | free | N/A                    | N/A
 */

#define NEG_MULTI_MARKER (uint8_t)0x10
#define NEG_2BYTE_MARKER (uint8_t)0x20
#define NEG_1BYTE_MARKER (uint8_t)0x40
#define POS_1BYTE_MARKER (uint8_t)0x80
#define POS_2BYTE_MARKER (uint8_t)0xc0
#define POS_MULTI_MARKER (uint8_t)0xe0

#define NEG_1BYTE_MIN (-(1 << 6))
#define NEG_2BYTE_MIN (-(1 << 13) + NEG_1BYTE_MIN)
#define POS_1BYTE_MAX ((1 << 6) - 1)
#define POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX)

/* Extract bits <start> to <end> from a value (counting from LSB == 0). */
#define GET_BITS(x, start, end) (((uint64_t)(x) & ((1U << (start)) - 1U)) >> (end))

/*
 * Size checks: return ENOMEM if not enough room when writing, EINVAL if the length is wrong when
 * reading (presumably the value is corrupted).
 */
#define WT_SIZE_CHECK_PACK(l, maxl) WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM)
#define WT_SIZE_CHECK_UNPACK(l, maxl) WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), EINVAL)

/* Count the leading zero bytes. */
#if defined(__GNUC__)
#define WT_LEADING_ZEROS(x, i) ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3)
#elif defined(_MSC_VER)
#define WT_LEADING_ZEROS(x, i)              \
    do {                                    \
        if ((x) == 0)                       \
            (i) = (int)sizeof(x);           \
        else {                              \
            unsigned long __index;          \
            _BitScanReverse64(&__index, x); \
            __index = 63 ^ __index;         \
            (i) = (int)(__index >> 3);      \
        }                                   \
    } while (0)
#else
#define WT_LEADING_ZEROS(x, i)                         \
    do {                                               \
        uint64_t __x = (x);                            \
        uint64_t __m = (uint64_t)0xff << 56;           \
        for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \
            __m >>= 8;                                 \
    } while (0)
#endif

/*
 * __wt_vpack_posint --
 *     Packs a positive variable-length integer in the specified location.
 */
static WT_INLINE int
__wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x)
{
    uint8_t *p;
    int len, lz, shift;

    WT_LEADING_ZEROS(x, lz);
    len = (int)sizeof(x) - lz;
    WT_SIZE_CHECK_PACK(len + 1, maxlen);
    p = *pp;

    /* There are four bits we can use in the first byte. */
    *p++ |= (len & 0xf);

    for (shift = (len - 1) << 3; len != 0; --len, shift -= 8)
        *p++ = (uint8_t)(x >> shift);

    *pp = p;
    return (0);
}

/*
 * __wt_vpack_negint --
 *     Packs a negative variable-length integer in the specified location.
 */
static WT_INLINE int
__wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x)
{
    uint8_t *p;
    int len, lz, shift;

    WT_LEADING_ZEROS(~x, lz);
    len = (int)sizeof(x) - lz;
    WT_SIZE_CHECK_PACK(len + 1, maxlen);
    p = *pp;

    /*
     * There are four size bits we can use in the first byte. For negative numbers, we store the
     * number of leading 0xff bytes to maintain ordering (if this is not obvious, it may help to
     * remember that -1 is the largest negative number).
     */
    *p++ |= (lz & 0xf);

    for (shift = (len - 1) << 3; len != 0; shift -= 8, --len)
        *p++ = (uint8_t)(x >> shift);

    *pp = p;
    return (0);
}

/*
 * __wt_vunpack_posint --
 *     Reads a variable-length positive integer from the specified location.
 */
static WT_INLINE int
__wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
{
    uint64_t x;
    uint8_t len;
    const uint8_t *p;

    /* There are four length bits in the first byte. */
    p = *pp;
    len = (*p++ & 0xf);
    WT_SIZE_CHECK_UNPACK(len + 1, maxlen);

    for (x = 0; len != 0; --len)
        x = (x << 8) | *p++;

    *retp = x;
    *pp = p;
    return (0);
}

/*
 * __wt_vunpack_negint --
 *     Reads a variable-length negative integer from the specified location.
 */
static WT_INLINE int
__wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
{
    uint64_t x;
    uint8_t len;
    const uint8_t *p;

    /* There are four length bits in the first byte. */
    p = *pp;
    len = (int)sizeof(x) - (*p++ & 0xf);
    WT_SIZE_CHECK_UNPACK(len + 1, maxlen);

    for (x = UINT64_MAX; len != 0; --len)
        x = (x << 8) | *p++;

    *retp = x;
    *pp = p;
    return (0);
}

/*
 * __wt_vpack_uint --
 *     Variable-sized packing for unsigned integers
 */
static WT_INLINE int
__wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x)
{
    uint8_t *p;

    WT_SIZE_CHECK_PACK(1, maxlen);
    p = *pp;
    if (x <= POS_1BYTE_MAX)
        *p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0);
    else if (x <= POS_2BYTE_MAX) {
        WT_SIZE_CHECK_PACK(2, maxlen);
        x -= POS_1BYTE_MAX + 1;
        *p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8);
        *p++ = GET_BITS(x, 8, 0);
    } else if (x == POS_2BYTE_MAX + 1) {
        /*
         * This is a special case where we could store the value with just a single byte, but we
         * append a zero byte so that the encoding doesn't get shorter for this one value.
         */
        *p++ = POS_MULTI_MARKER | 0x1;
        *p++ = 0;
    } else {
        x -= POS_2BYTE_MAX + 1;
        *p = POS_MULTI_MARKER;
        return (__wt_vpack_posint(pp, maxlen, x));
    }

    *pp = p;
    return (0);
}

/*
 * __wt_vpack_int --
 *     Variable-sized packing for signed integers
 */
static WT_INLINE int
__wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x)
{
    uint8_t *p;

    WT_SIZE_CHECK_PACK(1, maxlen);
    p = *pp;
    if (x < NEG_2BYTE_MIN) {
        *p = NEG_MULTI_MARKER;
        return (__wt_vpack_negint(pp, maxlen, (uint64_t)x));
    }
    if (x < NEG_1BYTE_MIN) {
        WT_SIZE_CHECK_PACK(2, maxlen);
        x -= NEG_2BYTE_MIN;
        *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8);
        *p++ = GET_BITS(x, 8, 0);
    } else if (x < 0) {
        x -= NEG_1BYTE_MIN;
        *p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0);
    } else
        /* For non-negative values, use the unsigned code above. */
        return (__wt_vpack_uint(pp, maxlen, (uint64_t)x));

    *pp = p;
    return (0);
}

/*
 * __wt_vunpack_uint --
 *     Variable-sized unpacking for unsigned integers
 */
static WT_INLINE int
__wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp)
{
    const uint8_t *p;

    WT_SIZE_CHECK_UNPACK(1, maxlen);
    p = *pp;
    switch (*p & 0xf0) {
    case POS_1BYTE_MARKER:
    case POS_1BYTE_MARKER | 0x10:
    case POS_1BYTE_MARKER | 0x20:
    case POS_1BYTE_MARKER | 0x30:
        *xp = GET_BITS(*p, 6, 0);
        p += 1;
        break;
    case POS_2BYTE_MARKER:
    case POS_2BYTE_MARKER | 0x10:
        WT_SIZE_CHECK_UNPACK(2, maxlen);
        *xp = GET_BITS(*p++, 5, 0) << 8;
        *xp |= *p++;
        *xp += POS_1BYTE_MAX + 1;
        break;
    case POS_MULTI_MARKER:
        WT_RET(__wt_vunpack_posint(pp, maxlen, xp));
        *xp += POS_2BYTE_MAX + 1;
        return (0);
    default:
        return (EINVAL);
    }

    *pp = p;
    return (0);
}

/*
 * __wt_vunpack_int --
 *     Variable-sized packing for signed integers
 */
static WT_INLINE int
__wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp)
{
    const uint8_t *p;

    WT_SIZE_CHECK_UNPACK(1, maxlen);
    p = *pp;
    switch (*p & 0xf0) {
    case NEG_MULTI_MARKER:
        WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp));
        return (0);
    case NEG_2BYTE_MARKER:
    case NEG_2BYTE_MARKER | 0x10:
        WT_SIZE_CHECK_UNPACK(2, maxlen);
        *xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8);
        *xp |= *p++;
        *xp += NEG_2BYTE_MIN;
        break;
    case NEG_1BYTE_MARKER:
    case NEG_1BYTE_MARKER | 0x10:
    case NEG_1BYTE_MARKER | 0x20:
    case NEG_1BYTE_MARKER | 0x30:
        *xp = NEG_1BYTE_MIN + (int64_t)GET_BITS(*p, 6, 0);
        p += 1;
        break;
    default:
        /* Identical to the unsigned case. */
        return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp));
    }

    *pp = p;
    return (0);
}

/*
 * __wt_vsize_posint --
 *     Return the packed size of a positive variable-length integer.
 */
static WT_INLINE size_t
__wt_vsize_posint(uint64_t x)
{
    int lz;

    WT_LEADING_ZEROS(x, lz);
    return ((size_t)(WT_INTPACK64_MAXSIZE - lz));
}

/*
 * __wt_vsize_negint --
 *     Return the packed size of a negative variable-length integer.
 */
static WT_INLINE size_t
__wt_vsize_negint(uint64_t x)
{
    int lz;

    WT_LEADING_ZEROS(~x, lz);
    return (size_t)(WT_INTPACK64_MAXSIZE - lz);
}

/*
 * __wt_vsize_uint --
 *     Return the packed size of an unsigned integer.
 */
static WT_INLINE size_t
__wt_vsize_uint(uint64_t x)
{
    if (x <= POS_1BYTE_MAX)
        return (1);
    if (x <= POS_2BYTE_MAX + 1)
        return (2);
    x -= POS_2BYTE_MAX + 1;
    return (__wt_vsize_posint(x));
}

/*
 * __wt_vsize_int --
 *     Return the packed size of a signed integer.
 */
static WT_INLINE size_t
__wt_vsize_int(int64_t x)
{
    if (x < NEG_2BYTE_MIN)
        return (__wt_vsize_negint((uint64_t)x));
    if (x < NEG_1BYTE_MIN)
        return (2);
    if (x < 0)
        return (1);
    /* For non-negative values, use the unsigned code above. */
    return (__wt_vsize_uint((uint64_t)x));
}
