/** * @file index.hpp * @author Ash Vardanian * @brief Single-header Vector Search engine. * @date April 26, 2023 */ #ifndef UNUM_USEARCH_HPP #define UNUM_USEARCH_HPP #define USEARCH_VERSION_MAJOR 2 #define USEARCH_VERSION_MINOR 21 #define USEARCH_VERSION_PATCH 4 // Inferring C++ version // https://stackoverflow.com/a/61552074 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) #define USEARCH_DEFINED_CPP17 #endif #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) || __cplusplus >= 202002L) #define USEARCH_DEFINED_CPP20 #endif // Inferring target OS: Windows, MacOS, or Linux #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__) #define USEARCH_DEFINED_WINDOWS #elif defined(__APPLE__) && defined(__MACH__) #define USEARCH_DEFINED_APPLE #elif defined(__linux__) #define USEARCH_DEFINED_LINUX #if defined(__ANDROID_API__) #define USEARCH_DEFINED_ANDROID #endif #endif // Inferring the compiler: Clang vs GCC #if defined(__clang__) #define USEARCH_DEFINED_CLANG #elif defined(__GNUC__) #define USEARCH_DEFINED_GCC #endif // The `#pragma region` and `#pragma endregion` are not supported by GCC 12 and older. // But they are supported by GCC 13, all recent Clang versions, and MSVC. #if defined(__GNUC__) && ((__GNUC__ > 13) || (__GNUC__ == 13 && __GNUC_MINOR__ >= 0)) #define USEARCH_USE_PRAGMA_REGION #elif defined(__clang__) || defined(_MSC_VER) #define USEARCH_USE_PRAGMA_REGION #endif // Inferring hardware architecture: x86 vs Arm #if defined(__x86_64__) #define USEARCH_DEFINED_X86 #elif defined(__aarch64__) #define USEARCH_DEFINED_ARM #endif // Inferring hardware bitness: 32 vs 64 // Using compiler predefined macros for is technically safer than including `` and // using the commonly advised `UINTPTR_MAX` trick, as that constant is optional in standard C/C++. // https://stackoverflow.com/a/5273354 // https://en.cppreference.com/w/cpp/types/integer.html #if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) #define USEARCH_64BIT_ENV #else #define USEARCH_32BIT_ENV #endif #if !defined(USEARCH_USE_OPENMP) #define USEARCH_USE_OPENMP 0 #endif // OS-specific includes #if defined(USEARCH_DEFINED_WINDOWS) #define _USE_MATH_DEFINES #define NOMINMAX #include #include // `fstat` for file size #undef NOMINMAX #undef _USE_MATH_DEFINES #else #include // `fallocate` #include // `posix_memalign` #include // `mmap` #include // `fstat` for file size #include // `open`, `close` #endif // STL includes #include // `std::sort_heap` #include // `std::atomic` #include // `std::bitset` #include // `CHAR_BIT` #include // `std::sqrt` #include // `std::memset` #include // `std::reverse_iterator` #include // `std::unique_lock` - replacement candidate #include // `std::default_random_engine` - replacement candidate #include // `std::runtime_exception` #include // `std::thread` #include // `std::pair` // Helper macros for concatenation and stringification #define usearch_concat_helper_m(a, b) a##b #define usearch_concat_m(a, b) usearch_concat_helper_m(a, b) #define usearch_stringify_helper_m(x) #x #define usearch_stringify_m(x) usearch_stringify_helper_m(x) // Prefetching #if defined(USEARCH_DEFINED_GCC) // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html // Zero means we are only going to read from that memory. // Three means high temporal locality and suggests to keep // the data in all layers of cache. #define usearch_prefetch_m(ptr) __builtin_prefetch((void*)(ptr), 0, 3) #elif defined(USEARCH_DEFINED_X86) #define usearch_prefetch_m(ptr) _mm_prefetch((void*)(ptr), _MM_HINT_T0) #else #define usearch_prefetch_m(ptr) #endif // Function profiling #if defined(usearch_defined_x86) #define usearch_profiled_m __attribute__((noinline)) #define usearch_profile_name_m(name) \ __asm__ volatile(".globl " usearch_stringify_m(usearch_concat_m(name, __COUNTER__)) "\n" usearch_stringify_m( \ usearch_concat_m(name, __COUNTER__)) ":") #elif defined(usearch_defined_arm) #define usearch_profiled_m __attribute__((noinline)) #define usearch_profile_name_m(name) \ __asm__ volatile(".global " usearch_stringify_m(usearch_concat_m(name, __COUNTER__)) "\n" usearch_stringify_m( \ usearch_concat_m(name, __COUNTER__)) ":") #else #define usearch_profiled_m #define usearch_profile_name_m(name) #endif // Alignment #if defined(USEARCH_DEFINED_WINDOWS) #define usearch_pack_m #define usearch_align_m __declspec(align(64)) #else #define usearch_pack_m __attribute__((packed)) #define usearch_align_m __attribute__((aligned(64))) #endif // Debugging #if defined(NDEBUG) #define usearch_assert_m(must_be_true, message) #define usearch_noexcept_m noexcept #else #define usearch_assert_m(must_be_true, message) \ if (!(must_be_true)) { \ usearch_raise_runtime_error(message); \ } #define usearch_noexcept_m #endif extern "C" { /// @brief Helper function to simplify debugging - trace just one symbol - `usearch_raise_runtime_error`. /// Assuming the `extern C` block, the name won't be mangled. inline static void usearch_raise_runtime_error(char const* message) { // On Windows we compile with `/EHc` flag, which specifies that functions // with C linkage do not throw C++ exceptions. #if !defined(__cpp_exceptions) || defined(USEARCH_DEFINED_WINDOWS) std::terminate(); #else throw std::runtime_error(message); #endif } } namespace unum { namespace usearch { using byte_t = char; template std::size_t divide_round_up(std::size_t num) noexcept { return (num + multiple_ak - 1) / multiple_ak; } inline std::size_t divide_round_up(std::size_t num, std::size_t denominator) noexcept { return (num + denominator - 1) / denominator; } inline std::size_t ceil2(std::size_t v) noexcept { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; #ifdef USEARCH_64BIT_ENV v |= v >> 32; #endif v++; return v; } /// @brief Simply dereferencing misaligned pointers can be dangerous. template void misaligned_store(void* ptr, at v) noexcept { static_assert(!std::is_reference::value, "Can't store a reference"); std::memcpy(ptr, &v, sizeof(at)); } /// @brief Simply dereferencing misaligned pointers can be dangerous. template at misaligned_load(void const* ptr) noexcept { static_assert(!std::is_reference::value, "Can't load a reference"); at v; std::memcpy(&v, ptr, sizeof(at)); return v; } /// @brief The `std::exchange` alternative for C++11. template at exchange(at& obj, other_at&& new_value) { at old_value = std::move(obj); obj = std::forward(new_value); return old_value; } #if defined(USEARCH_DEFINED_CPP20) template void destroy_at(at* obj) { std::destroy_at(obj); } template void construct_at(at* obj) { std::construct_at(obj); } #else /// @brief The `std::destroy_at` alternative for C++11. template typename std::enable_if::value>::type destroy_at(at*) {} template typename std::enable_if::value>::type destroy_at(at* obj) { obj->~sfinae_at(); } /// @brief The `std::construct_at` alternative for C++11. template typename std::enable_if::value>::type construct_at(at*) {} template typename std::enable_if::value>::type construct_at(at* obj) { new (obj) at(); } #endif /** * @brief A reference to a misaligned memory location with a specific type. * It is needed to avoid Undefined Behavior when dereferencing addresses * indivisible by `sizeof(at)`. */ template class misaligned_ref_gt { using element_t = at; using mutable_t = typename std::remove_const::type; byte_t* ptr_; public: misaligned_ref_gt(byte_t* ptr) noexcept : ptr_(ptr) {} operator mutable_t() const noexcept { return misaligned_load(ptr_); } misaligned_ref_gt& operator=(mutable_t const& v) noexcept { misaligned_store(ptr_, v); return *this; } void reset(byte_t* ptr) noexcept { ptr_ = ptr; } byte_t* ptr() const noexcept { return ptr_; } }; /** * @brief A pointer to a misaligned memory location with a specific type. * It is needed to avoid Undefined Behavior when dereferencing addresses * indivisible by `sizeof(at)`. */ template class misaligned_ptr_gt { using element_t = at; using mutable_t = typename std::remove_const::type; byte_t* ptr_; public: using iterator_category = std::random_access_iterator_tag; using value_type = element_t; using difference_type = std::ptrdiff_t; using pointer = misaligned_ptr_gt; using reference = misaligned_ref_gt; misaligned_ptr_gt(byte_t* ptr) noexcept : ptr_(ptr) {} reference operator*() const noexcept { return {ptr_}; } reference operator[](std::size_t i) noexcept { return reference(ptr_ + i * sizeof(element_t)); } value_type operator[](std::size_t i) const noexcept { return misaligned_load(ptr_ + i * sizeof(element_t)); } misaligned_ptr_gt& operator++() noexcept { ptr_ += sizeof(element_t); return *this; } misaligned_ptr_gt& operator--() noexcept { ptr_ -= sizeof(element_t); return *this; } misaligned_ptr_gt operator++(int) noexcept { misaligned_ptr_gt tmp = *this; ++(*this); return tmp; } misaligned_ptr_gt operator--(int) noexcept { misaligned_ptr_gt tmp = *this; --(*this); return tmp; } misaligned_ptr_gt operator+(difference_type d) const noexcept { return misaligned_ptr_gt(ptr_ + d * sizeof(element_t)); } misaligned_ptr_gt operator-(difference_type d) const noexcept { return misaligned_ptr_gt(ptr_ - d * sizeof(element_t)); } difference_type operator-(const misaligned_ptr_gt& other) const noexcept { return (ptr_ - other.ptr_) / sizeof(element_t); } misaligned_ptr_gt& operator+=(difference_type d) noexcept { ptr_ += d * sizeof(element_t); return *this; } misaligned_ptr_gt& operator-=(difference_type d) noexcept { ptr_ -= d * sizeof(element_t); return *this; } bool operator==(misaligned_ptr_gt const& other) const noexcept { return ptr_ == other.ptr_; } bool operator!=(misaligned_ptr_gt const& other) const noexcept { return ptr_ != other.ptr_; } bool operator<(misaligned_ptr_gt const& other) const noexcept { return ptr_ < other.ptr_; } bool operator<=(misaligned_ptr_gt const& other) const noexcept { return ptr_ <= other.ptr_; } bool operator>(misaligned_ptr_gt const& other) const noexcept { return ptr_ > other.ptr_; } bool operator>=(misaligned_ptr_gt const& other) const noexcept { return ptr_ >= other.ptr_; } }; /** * @brief Non-owning memory range view, similar to `std::span`, but for C++11. */ template class span_gt { scalar_at* data_; std::size_t size_; public: span_gt() noexcept : data_(nullptr), size_(0u) {} span_gt(scalar_at* begin, scalar_at* end) noexcept : data_(begin), size_(end - begin) {} span_gt(scalar_at* begin, std::size_t size) noexcept : data_(begin), size_(size) {} scalar_at* data() const noexcept { return data_; } std::size_t size() const noexcept { return size_; } scalar_at* begin() const noexcept { return data_; } scalar_at* end() const noexcept { return data_ + size_; } operator scalar_at*() const noexcept { return data(); } }; /** * @brief Similar to `std::vector`, but doesn't support dynamic resizing. * On the bright side, this can't throw exceptions. */ template > class buffer_gt { scalar_at* data_; std::size_t size_; public: buffer_gt() noexcept : data_(nullptr), size_(0u) {} buffer_gt(std::size_t size) noexcept : data_(allocator_at{}.allocate(size)), size_(data_ ? size : 0u) { if (!std::is_trivially_default_constructible::value) for (std::size_t i = 0; i != size_; ++i) construct_at(data_ + i); } ~buffer_gt() noexcept { reset(); } void reset() noexcept { if (!std::is_trivially_destructible::value) for (std::size_t i = 0; i != size_; ++i) unum::usearch::destroy_at(data_ + i); //< Facing some symbol visibility/ambiguity issues allocator_at{}.deallocate(data_, size_); data_ = nullptr; size_ = 0; } scalar_at* data() const noexcept { return data_; } std::size_t size() const noexcept { return size_; } scalar_at* begin() const noexcept { return data_; } scalar_at* end() const noexcept { return data_ + size_; } operator scalar_at*() const noexcept { return data(); } scalar_at& operator[](std::size_t i) noexcept { return data_[i]; } scalar_at const& operator[](std::size_t i) const noexcept { return data_[i]; } explicit operator bool() const noexcept { return data_; } scalar_at* release() noexcept { size_ = 0; return exchange(data_, nullptr); } buffer_gt(buffer_gt const&) = delete; buffer_gt& operator=(buffer_gt const&) = delete; buffer_gt(buffer_gt&& other) noexcept : data_(exchange(other.data_, nullptr)), size_(exchange(other.size_, 0)) {} buffer_gt& operator=(buffer_gt&& other) noexcept { std::swap(data_, other.data_); std::swap(size_, other.size_); return *this; } }; /** * @brief A lightweight error class for handling error messages, * which are expected to be allocated in static memory. */ class error_t { char const* message_{}; public: error_t() noexcept : message_(nullptr) {} error_t(char const* message) noexcept : message_(message) {} error_t& operator=(char const* message) noexcept { message_ = message; return *this; } error_t(error_t const&) = delete; error_t& operator=(error_t const&) = delete; error_t(error_t&& other) noexcept : message_(exchange(other.message_, nullptr)) {} error_t& operator=(error_t&& other) noexcept { std::swap(message_, other.message_); return *this; } /// @brief Checks if there was an error. explicit operator bool() const noexcept { return message_ != nullptr; } /// @brief Returns the error message. char const* what() const noexcept { return message_; } /// @brief Releases the error message, meaning the caller takes ownership. char const* release() noexcept { return exchange(message_, nullptr); } #if defined(__cpp_exceptions) || defined(__EXCEPTIONS) /// @brief Destructor raises an exception if an error was recorded. ~error_t() noexcept(false) { #if defined(USEARCH_DEFINED_CPP17) if (message_ && std::uncaught_exceptions() == 0) #else if (message_ && std::uncaught_exception() == 0) #endif raise(); } /// @brief Throws an exception using to be caught by `try` / `catch`. void raise() noexcept(false) { if (message_) throw std::runtime_error(exchange(message_, nullptr)); } #else /// @brief Destructor terminates if an error was recorded. ~error_t() noexcept { raise(); } /// @brief Terminates if an error was recorded. void raise() noexcept { if (message_) std::terminate(); } #endif }; /** * @brief Similar to `std::expected` in C++23, wraps a statement evaluation result, * or an error. It's used to avoid raising exception, and gracefully propagate * the error. * * @tparam result_at The type of the expected result. */ template struct expected_gt { result_at result; error_t error; operator result_at&() & { error.raise(); return result; } operator result_at&&() && { error.raise(); return std::move(result); } result_at const& operator*() const noexcept { return result; } explicit operator bool() const noexcept { return !error; } expected_gt failed(error_t message) noexcept { error = std::move(message); return std::move(*this); } }; /** * @brief Light-weight bitset implementation to sync nodes updates during graph mutations. * Extends basic functionality with @b atomic operations. */ template > class bitset_gt { using allocator_t = allocator_at; using byte_t = typename allocator_t::value_type; static_assert(sizeof(byte_t) == 1, "Allocator must allocate separate addressable bytes"); using compressed_slot_t = unsigned long; static constexpr std::size_t bits_per_slot() { return sizeof(compressed_slot_t) * CHAR_BIT; } static constexpr compressed_slot_t bits_mask() { return sizeof(compressed_slot_t) * CHAR_BIT - 1; } static constexpr std::size_t slots(std::size_t bits) { return divide_round_up(bits); } compressed_slot_t* slots_{}; /// @brief Number of slots. std::size_t count_{}; public: bitset_gt() noexcept {} ~bitset_gt() noexcept { reset(); } explicit operator bool() const noexcept { return slots_; } void clear() noexcept { if (slots_) std::memset(slots_, 0, count_ * sizeof(compressed_slot_t)); } void reset() noexcept { if (slots_) allocator_t{}.deallocate((byte_t*)slots_, count_ * sizeof(compressed_slot_t)); slots_ = nullptr; count_ = 0; } bitset_gt(std::size_t capacity) noexcept : slots_((compressed_slot_t*)allocator_t{}.allocate(slots(capacity) * sizeof(compressed_slot_t))), count_(slots_ ? slots(capacity) : 0u) { clear(); } bitset_gt(bitset_gt&& other) noexcept { slots_ = exchange(other.slots_, nullptr); count_ = exchange(other.count_, 0); } bitset_gt& operator=(bitset_gt&& other) noexcept { std::swap(slots_, other.slots_); std::swap(count_, other.count_); return *this; } bitset_gt(bitset_gt const&) = delete; bitset_gt& operator=(bitset_gt const&) = delete; inline bool test(std::size_t i) const noexcept { return slots_[i / bits_per_slot()] & (1ul << (i & bits_mask())); } inline bool set(std::size_t i) noexcept { compressed_slot_t& slot = slots_[i / bits_per_slot()]; compressed_slot_t mask{1ul << (i & bits_mask())}; bool value = slot & mask; slot |= mask; return value; } #if defined(USEARCH_DEFINED_WINDOWS) inline bool atomic_set(std::size_t i) noexcept { compressed_slot_t mask{1ul << (i & bits_mask())}; return InterlockedOr((long volatile*)&slots_[i / bits_per_slot()], mask) & mask; } inline void atomic_reset(std::size_t i) noexcept { compressed_slot_t mask{1ul << (i & bits_mask())}; InterlockedAnd((long volatile*)&slots_[i / bits_per_slot()], ~mask); } #else inline bool atomic_set(std::size_t i) noexcept { compressed_slot_t mask{1ul << (i & bits_mask())}; return __atomic_fetch_or(&slots_[i / bits_per_slot()], mask, __ATOMIC_ACQUIRE) & mask; } inline void atomic_reset(std::size_t i) noexcept { compressed_slot_t mask{1ul << (i & bits_mask())}; __atomic_fetch_and(&slots_[i / bits_per_slot()], ~mask, __ATOMIC_RELEASE); } #endif class lock_t { bitset_gt& bitset_; std::size_t bit_offset_; public: inline ~lock_t() noexcept { bitset_.atomic_reset(bit_offset_); } inline lock_t(bitset_gt& bitset, std::size_t bit_offset) noexcept : bitset_(bitset), bit_offset_(bit_offset) { while (bitset_.atomic_set(bit_offset_)) ; } }; inline lock_t lock(std::size_t i) noexcept { return {*this, i}; } }; using bitset_t = bitset_gt<>; /** * @brief Similar to `std::priority_queue`, but allows raw access to underlying * memory, in case you want to shuffle it or sort. Good for collections * from 100s to 10'000s elements. * * In a max-heap, the heap property ensures that the value of each node is greater * than or equal to the values of its children. This means that the largest element * is always at the root of the heap. * * @section Heap Structures * * There are several designs of heaps. Binary heaps are the simplest & most common * variant, that is easy to implement as a succint array. However, they are not the * most efficient for all operations. Most importantly, @b melding (merging) of * two heaps has linear complexity in time. * * +-----------------+---------+-----------+---------+--------------+---------+ * | Operation | find-max| delete-max| insert | increase-key | meld | * +-----------------+---------+-----------+---------+--------------+---------+ * | Binary | Θ(1) | Θ(log n) | O(log n)| O(log n) | Θ(n) | * | Leftist | Θ(1) | Θ(log n) | O(log n)| Θ(log n) | Θ(log n)| * | Binomial | Θ(1) | Θ(log n) | Θ(1) | Θ(log n) | O(log n)| * | Skew binomial | Θ(1) | Θ(log n) | Θ(1) | O(log n) | O(log n)| * | Pairing | Θ(1) | O(log n) | Θ(1) | o(log n) | Θ(1) | * | Rank-pairing | Θ(1) | O(log n) | Θ(1) | Θ(1) | Θ(1) | * | Fibonacci | Θ(1) | O(log n) | Θ(1) | Θ(1) | Θ(1) | * | Strict Fibonacci| Θ(1) | O(log n) | Θ(1) | Θ(1) | Θ(1) | * | Brodal | Θ(1) | Θ(log n) | Θ(1) | Θ(1) | Θ(1) | * | 2–3 heap | Θ(1) | O(log n) | Θ(1) | Θ(1) | O(log n)| * +-----------------+---------+-----------+---------+--------------+---------+ * * It's well known, that improved priority queue structures translate into better * graph-transversal algorithms. For example, Dijkstra's algorithm can be sped up * by using a Fibonacci heap for arbitrary weights. For integer weight bounded * by L, Schrijver reported following time complexities in 2004: * * +------------+-------------------------------------+----------------------------+--------------------------+ * | Weights | Algorithm | Time complexity | Author | * +------------+-------------------------------------+----------------------------+--------------------------+ * | R | | O(V^2 EL) | Ford 1956 | * | R | Bellman–Ford algorithm | O(VE) | Shimbel 1955, Bellman | * | | | | 1958, Moore 1959 | * | R | | O(V^2 log V) | Dantzig 1960 | * | R | Dijkstra's with list | O(V^2) | Leyzorek et al. 1957, | * | | | | Dijkstra 1959... | * | R | Dijkstra's with binary heap | O((E + V) log V) | Johnson 1977 | * | R | Dijkstra's with Fibonacci heap | O(E + V log V) | Fredman & Tarjan 1984, | * | | | | Fredman & Tarjan 1987 | * | R | Quantum Dijkstra | O(√VE log^2 V) | Dürr et al. 2006 | * | R | Dial's algorithm (Dijkstra's using | O(E + LV) | Dial 1969 | * | | a bucket queue with L buckets) | | | * | N | | O(E log log L) | Johnson 1981, Karlsson & | * | | | | Poblete 1983 | * | N | Gabow's algorithm | O(E log_E/V L) | Gabow 1983, Gabow 1985 | * | N | | O(E + V √log L) | Ahuja et al. 1990 | * | N | Thorup | O(E + V log log V) | Thorup 2004 | * +------------+-------------------------------------+----------------------------+--------------------------+ * * Possible improvements: * - Randomized meldable heaps: https://en.wikipedia.org/wiki/Randomized_meldable_heap * - D-ary heaps: https://en.wikipedia.org/wiki/D-ary_heap * - B-heap: https://en.wikipedia.org/wiki/B-heap */ template , // is needed before C++14. typename allocator_at = std::allocator> // class max_heap_gt { public: using element_t = element_at; using comparator_t = comparator_at; using allocator_t = allocator_at; using value_type = element_t; static_assert(std::is_trivially_destructible(), "This heap is designed for trivial structs"); static_assert(std::is_trivially_copy_constructible(), "This heap is designed for trivial structs"); private: element_t* elements_; std::size_t size_; std::size_t capacity_; public: max_heap_gt() noexcept : elements_(nullptr), size_(0), capacity_(0) {} max_heap_gt(max_heap_gt&& other) noexcept : elements_(exchange(other.elements_, nullptr)), size_(exchange(other.size_, 0)), capacity_(exchange(other.capacity_, 0)) {} max_heap_gt& operator=(max_heap_gt&& other) noexcept { std::swap(elements_, other.elements_); std::swap(size_, other.size_); std::swap(capacity_, other.capacity_); return *this; } max_heap_gt(max_heap_gt const&) = delete; max_heap_gt& operator=(max_heap_gt const&) = delete; ~max_heap_gt() noexcept { reset(); } void reset() noexcept { if (elements_) allocator_t{}.deallocate(elements_, capacity_); elements_ = nullptr; capacity_ = 0; size_ = 0; } inline bool empty() const noexcept { return !size_; } inline std::size_t size() const noexcept { return size_; } inline std::size_t capacity() const noexcept { return capacity_; } inline element_t* data() noexcept { return elements_; } inline element_t const* data() const noexcept { return elements_; } inline void clear() noexcept { size_ = 0; } inline void shrink(std::size_t n) noexcept { size_ = (std::min)(n, size_); } /// @brief Selects the largest element in the heap. /// @return Reference to the stored element. inline element_t const& top() const noexcept { return elements_[0]; } /// @brief Invalidates the "max-heap" property, transforming into ascending range. inline void sort_ascending() noexcept { std::sort_heap(elements_, elements_ + size_, &less); } /** * @brief Ensures the heap has enough capacity for the specified number of elements. * @param new_capacity The desired minimum capacity. * @return True if the capacity was successfully increased, false otherwise. */ usearch_profiled_m bool reserve(std::size_t new_capacity) noexcept { usearch_profile_name_m(max_heap_reserve); if (new_capacity < capacity_) return true; new_capacity = ceil2(new_capacity); new_capacity = (std::max)(new_capacity, (std::max)(capacity_ * 2u, 16u)); auto allocator = allocator_t{}; auto new_elements = allocator.allocate(new_capacity); if (!new_elements) return false; if (elements_) { std::memcpy(new_elements, elements_, size_ * sizeof(element_t)); allocator.deallocate(elements_, capacity_); } elements_ = new_elements; capacity_ = new_capacity; return new_elements; } /** * @brief Inserts an element into the heap. * @param element The element to be inserted. * @return True if the element was successfully inserted, false otherwise. */ bool insert(element_t&& element) noexcept { if (!reserve(size_ + 1)) return false; insert_reserved(std::move(element)); return true; } /** * @brief Inserts an element into the heap without reserving additional space. * @param element The element to be inserted. */ usearch_profiled_m void insert_reserved(element_t&& element) noexcept { usearch_profile_name_m(max_heap_insert_reserved); new (&elements_[size_]) element_t(element); size_++; shift_up(size_ - 1); } /** * @brief Inserts multiple elements into the heap. * @param elements Pointer to the elements to be inserted. * @return True if the elements were successfully inserted, false otherwise. */ inline bool insert_many(element_t const* elements) noexcept { // Wikipedia describes a procedure, due to Floyd, which constructs a heap from an array in linear time. // It also mentions a procedure for merging two heaps, of sizes 𝑛 and 𝑘, in time 𝑂(𝑘+log𝑘log𝑛). // Altogether, we can add 𝑘 elements to a heap of length 𝑛 in time 𝑂(𝑘+log𝑘log𝑛): first build a heap containing // 𝑘 elements to be inserted (takes 𝑂(𝑘) time), then merge that with the heap of size 𝑛 (takes 𝑂(𝑘+log𝑘log𝑛) // time). Compare this to repeated insertion, which would run in time 𝑂(𝑘log𝑛). return false; } usearch_profiled_m element_t pop() noexcept { usearch_profile_name_m(max_heap_pop); element_t result = top(); std::swap(elements_[0], elements_[size_ - 1]); size_--; elements_[size_].~element_t(); shift_down(0); return result; } private: static std::size_t parent_idx(std::size_t i) noexcept { return (i - 1u) / 2u; } static std::size_t left_child_idx(std::size_t i) noexcept { return (i * 2u) + 1u; } static std::size_t right_child_idx(std::size_t i) noexcept { return (i * 2u) + 2u; } static bool less(element_t const& a, element_t const& b) noexcept { return comparator_t{}(a, b); } /** * @brief Shifts an element up to maintain the heap property. * This operation is called when a new element is @b added at the end of the heap. * The element is moved up until the heap property is restored. * @param i Index of the element to be shifted up. */ void shift_up(std::size_t i) noexcept { for (; i && less(elements_[parent_idx(i)], elements_[i]); i = parent_idx(i)) std::swap(elements_[parent_idx(i)], elements_[i]); } /** * @brief Shifts an element down to maintain the heap property. * This operation is called when the root element is @b removed and the last element is moved to the root. * The element is moved down until the heap property is restored. * @param i Index of the element to be shifted down. */ void shift_down(std::size_t i) noexcept { std::size_t max_idx = i; std::size_t left = left_child_idx(i); if (left < size_ && less(elements_[max_idx], elements_[left])) max_idx = left; std::size_t right = right_child_idx(i); if (right < size_ && less(elements_[max_idx], elements_[right])) max_idx = right; if (i != max_idx) { std::swap(elements_[i], elements_[max_idx]); shift_down(max_idx); } } }; /** * @brief Similar to `std::priority_queue`, but allows raw access to underlying * memory and always keeps the data sorted. Ideal for small collections * under 128 elements. */ template , // is needed before C++14. typename allocator_at = std::allocator> // class sorted_buffer_gt { public: using element_t = element_at; using comparator_t = comparator_at; using allocator_t = allocator_at; static_assert(std::is_trivially_destructible(), "This heap is designed for trivial structs"); static_assert(std::is_trivially_copy_constructible(), "This heap is designed for trivial structs"); using value_type = element_t; private: element_t* elements_; std::size_t size_; std::size_t capacity_; public: sorted_buffer_gt() noexcept : elements_(nullptr), size_(0), capacity_(0) {} sorted_buffer_gt(sorted_buffer_gt&& other) noexcept : elements_(exchange(other.elements_, nullptr)), size_(exchange(other.size_, 0)), capacity_(exchange(other.capacity_, 0)) {} sorted_buffer_gt& operator=(sorted_buffer_gt&& other) noexcept { std::swap(elements_, other.elements_); std::swap(size_, other.size_); std::swap(capacity_, other.capacity_); return *this; } sorted_buffer_gt(sorted_buffer_gt const&) = delete; sorted_buffer_gt& operator=(sorted_buffer_gt const&) = delete; ~sorted_buffer_gt() noexcept { reset(); } void reset() noexcept { if (elements_) allocator_t{}.deallocate(elements_, capacity_); elements_ = nullptr; capacity_ = 0; size_ = 0; } inline bool empty() const noexcept { return !size_; } inline std::size_t size() const noexcept { return size_; } inline std::size_t capacity() const noexcept { return capacity_; } inline element_t const& top() const noexcept { return elements_[size_ - 1]; } inline void clear() noexcept { size_ = 0; } bool reserve(std::size_t new_capacity) noexcept { if (new_capacity < capacity_) return true; new_capacity = ceil2(new_capacity); new_capacity = (std::max)(new_capacity, (std::max)(capacity_ * 2u, 16u)); auto allocator = allocator_t{}; auto new_elements = allocator.allocate(new_capacity); if (!new_elements) return false; if (size_) std::memcpy(new_elements, elements_, size_ * sizeof(element_t)); if (elements_) allocator.deallocate(elements_, capacity_); elements_ = new_elements; capacity_ = new_capacity; return true; } inline void insert_reserved(element_t&& element) noexcept { std::size_t slot = size_ ? std::lower_bound(elements_, elements_ + size_, element, &less) - elements_ : 0; std::size_t to_move = size_ - slot; element_t* source = elements_ + size_ - 1; for (; to_move; --to_move, --source) source[1] = source[0]; elements_[slot] = element; size_++; } /** * @return `true` if the entry was added, `false` if it wasn't relevant enough. */ inline bool insert(element_t&& element, std::size_t limit) noexcept { std::size_t slot = size_ ? std::lower_bound(elements_, elements_ + size_, element, &less) - elements_ : 0; if (slot == limit) return false; std::size_t to_move = size_ - slot - (size_ == limit); element_t* source = elements_ + size_ - 1 - (size_ == limit); for (; to_move; --to_move, --source) source[1] = source[0]; elements_[slot] = element; size_ += size_ != limit; return true; } inline element_t pop() noexcept { size_--; element_t result = elements_[size_]; elements_[size_].~element_t(); return result; } void sort_ascending() noexcept {} inline void shrink(std::size_t n) noexcept { size_ = (std::min)(n, size_); } inline element_t* data() noexcept { return elements_; } inline element_t const* data() const noexcept { return elements_; } private: static bool less(element_t const& a, element_t const& b) noexcept { return comparator_t{}(a, b); } }; #if defined(USEARCH_DEFINED_WINDOWS) #pragma pack(push, 1) // Pack struct elements on 1-byte alignment #endif /** * @brief Five-byte integer type to address node clouds with over 4B entries. * * 40 bits is enough to address a @b Trillion entries potentially colocated on 1 machine. * At roughly 5 bytes * 20 neighbors + 100 bytes per entry, this translates to 200 TB of data, * which is similar to a single-server capacity of modern NVME arrays. */ class usearch_pack_m uint40_t { unsigned char octets[5]; inline uint40_t& broadcast(unsigned char c) { std::memset(octets, c, 5); return *this; } public: inline uint40_t() noexcept { broadcast(0); } inline uint40_t(std::uint32_t n) noexcept { std::memcpy(&octets, &n, 4); octets[4] = 0; } #ifdef USEARCH_64BIT_ENV inline uint40_t(std::uint64_t n) noexcept { std::memcpy(octets, &n, 5); } #endif uint40_t(uint40_t&&) = default; uint40_t(uint40_t const&) = default; uint40_t& operator=(uint40_t&&) = default; uint40_t& operator=(uint40_t const&) = default; #if defined(USEARCH_DEFINED_CLANG) && defined(USEARCH_DEFINED_APPLE) inline uint40_t(std::size_t n) noexcept { #ifdef USEARCH_64BIT_ENV std::memcpy(octets, &n, 5); #else std::memcpy(octets, &n, 4); octets[4] = 0; #endif // USEARCH_64BIT_ENV } #endif // USEARCH_DEFINED_CLANG && USEARCH_DEFINED_APPLE inline operator std::size_t() const noexcept { std::size_t result = 0; #ifdef USEARCH_64BIT_ENV std::memcpy(&result, octets, 5); #else std::memcpy(&result, octets, 4); #endif return result; } inline static uint40_t max() noexcept { return uint40_t{}.broadcast(0xFF); } inline static uint40_t min() noexcept { return uint40_t{}.broadcast(0); } inline bool operator==(uint40_t const& other) const noexcept { return std::memcmp(octets, other.octets, 5) == 0; } inline bool operator!=(uint40_t const& other) const noexcept { return !(*this == other); } inline bool operator>(uint40_t const& other) const noexcept { return other < *this; } inline bool operator<=(uint40_t const& other) const noexcept { return !(*this > other); } inline bool operator>=(uint40_t const& other) const noexcept { return !(*this < other); } inline bool operator<(uint40_t const& other) const noexcept { for (int i = 0; i < 5; ++i) { if (octets[4 - i] < other.octets[4 - i]) return true; if (octets[4 - i] > other.octets[4 - i]) return false; } return false; } }; #if defined(USEARCH_DEFINED_WINDOWS) #pragma pack(pop) // Reset alignment to default #endif static_assert(sizeof(uint40_t) == 5, "uint40_t must be exactly 5 bytes"); /** * @brief Reflection-helper to get the default "unused" value for a given type. * Needed to initialize hash-sets and bit-sets. */ template struct default_free_value_gt { template ::value>::type* = nullptr> static sfinae_element_at value() noexcept { return std::numeric_limits::max(); } template ::value>::type* = nullptr> static sfinae_element_at value() noexcept { return element_at(); } }; template <> struct default_free_value_gt { static uint40_t value() noexcept { return uint40_t::max(); } }; template element_at default_free_value() { return default_free_value_gt::value(); } /** * @brief Adapter to allow definining arbitrary hash functions for keys and slots. * It's added, as overloading `std::hash` is not recommended by the standard. */ template struct hash_gt { std::size_t operator()(element_at const& element) const noexcept { return std::hash{}(element); } }; template <> struct hash_gt { std::size_t operator()(uint40_t const& element) const noexcept { return std::hash{}(element); } }; /** * @brief Minimalistic hash-set implementation to track visited nodes during graph traversal. * In our primary usecase, its a sparse alternative to a bit-set. * * It doesn't support deletion of separate objects, but supports `clear`-ing all at once. * It expects `reserve` to be called ahead of all insertions, so no resizes are needed. * It also assumes `0xFF...FF` slots to be unused, to simplify the design. * It uses linear probing, the number of slots is always a power of two, and it uses linear-probing * in case of bucket collisions. */ template , typename allocator_at = std::allocator> class growing_hash_set_gt { using element_t = element_at; using hasher_t = hasher_at; using allocator_t = allocator_at; using byte_t = typename allocator_t::value_type; static_assert(sizeof(byte_t) == 1, "Allocator must allocate separate addressable bytes"); element_t* slots_{}; /// @brief Number of slots. std::size_t capacity_{}; /// @brief Number of populated. std::size_t count_{}; hasher_t hasher_{}; public: growing_hash_set_gt() noexcept {} ~growing_hash_set_gt() noexcept { reset(); } explicit operator bool() const noexcept { return slots_; } std::size_t size() const noexcept { return count_; } void clear() noexcept { if (slots_) std::memset((void*)slots_, 0xFF, capacity_ * sizeof(element_t)); count_ = 0; } void reset() noexcept { if (slots_) allocator_t{}.deallocate((byte_t*)slots_, capacity_ * sizeof(element_t)); slots_ = nullptr; capacity_ = 0; count_ = 0; } growing_hash_set_gt(std::size_t capacity) noexcept : slots_((element_t*)allocator_t{}.allocate(ceil2(capacity) * sizeof(element_t))), capacity_(slots_ ? ceil2(capacity) : 0u), count_(0u) { clear(); } growing_hash_set_gt(growing_hash_set_gt&& other) noexcept { slots_ = exchange(other.slots_, nullptr); capacity_ = exchange(other.capacity_, 0); count_ = exchange(other.count_, 0); } growing_hash_set_gt& operator=(growing_hash_set_gt&& other) noexcept { std::swap(slots_, other.slots_); std::swap(capacity_, other.capacity_); std::swap(count_, other.count_); return *this; } growing_hash_set_gt(growing_hash_set_gt const&) = delete; growing_hash_set_gt& operator=(growing_hash_set_gt const&) = delete; /** * @brief Checks if the element is already in the hash-set. * @return `true` if the element is already in the hash-set. */ inline bool test(element_t const& elem) const noexcept { std::size_t index = hasher_(elem) & (capacity_ - 1); while (slots_[index] != default_free_value()) { if (slots_[index] == elem) return true; index = (index + 1) & (capacity_ - 1); } return false; } /** * @brief Inserts an element into the hash-set. * @return Similar to `bitset_gt`, returns the previous value. */ inline bool set(element_t const& elem) noexcept { std::size_t index = hasher_(elem) & (capacity_ - 1); while (slots_[index] != default_free_value()) { // Already exists if (slots_[index] == elem) return true; index = (index + 1) & (capacity_ - 1); } slots_[index] = elem; ++count_; return false; } /** * @brief Extends the capacity of the hash-set. * @return `true` if enough capacity is available, `false` if memory allocation failed. */ bool reserve(std::size_t new_capacity) noexcept { new_capacity = (new_capacity * 5u) / 3u; if (new_capacity <= capacity_) return true; new_capacity = ceil2(new_capacity); element_t* new_slots = (element_t*)allocator_t{}.allocate(new_capacity * sizeof(element_t)); if (!new_slots) return false; std::memset((void*)new_slots, 0xFF, new_capacity * sizeof(element_t)); std::size_t new_count = count_; if (count_) { for (std::size_t old_index = 0; old_index != capacity_; ++old_index) { if (slots_[old_index] == default_free_value()) continue; std::size_t new_index = hasher_(slots_[old_index]) & (new_capacity - 1); while (new_slots[new_index] != default_free_value()) new_index = (new_index + 1) & (new_capacity - 1); new_slots[new_index] = slots_[old_index]; } } reset(); slots_ = new_slots; capacity_ = new_capacity; count_ = new_count; return true; } }; /** * @brief Basic single-threaded @b ring class, used for all kinds of task queues. */ template > // class ring_gt { public: using element_t = element_at; using allocator_t = allocator_at; static_assert(std::is_trivially_destructible(), "This ring is designed for trivial structs"); static_assert(std::is_trivially_copy_constructible(), "This ring is designed for trivial structs"); using value_type = element_t; private: element_t* elements_{}; std::size_t capacity_{}; std::size_t head_{}; std::size_t tail_{}; bool empty_{true}; allocator_t allocator_{}; public: explicit ring_gt(allocator_t const& alloc = allocator_t()) noexcept : allocator_(alloc) {} ring_gt(ring_gt const&) = delete; ring_gt& operator=(ring_gt const&) = delete; ring_gt(ring_gt&& other) noexcept { swap(other); } ring_gt& operator=(ring_gt&& other) noexcept { swap(other); return *this; } void swap(ring_gt& other) noexcept { std::swap(elements_, other.elements_); std::swap(capacity_, other.capacity_); std::swap(head_, other.head_); std::swap(tail_, other.tail_); std::swap(empty_, other.empty_); std::swap(allocator_, other.allocator_); } ~ring_gt() noexcept { reset(); } bool empty() const noexcept { return empty_; } size_t capacity() const noexcept { return capacity_; } size_t size() const noexcept { if (empty_) return 0; else if (head_ > tail_) return head_ - tail_; else return capacity_ - (tail_ - head_); } void clear() noexcept { head_ = 0; tail_ = 0; empty_ = true; } void reset() noexcept { if (elements_) allocator_.deallocate(elements_, capacity_); elements_ = nullptr; capacity_ = 0; head_ = 0; tail_ = 0; empty_ = true; } bool reserve(std::size_t n) noexcept { if (n < size()) return false; // prevent data loss if (n <= capacity()) return true; n = (std::max)(ceil2(n), 64u); element_t* elements = allocator_.allocate(n); if (!elements) return false; std::size_t i = 0; while (try_pop(elements[i])) i++; reset(); elements_ = elements; capacity_ = n; head_ = i; tail_ = 0; empty_ = (i == 0); return true; } void push(element_t const& value) usearch_noexcept_m { usearch_assert_m(capacity() > 0, "Ring buffer is not initialized"); usearch_assert_m(size() < capacity(), "Ring buffer is full"); elements_[head_] = value; head_ = (head_ + 1) % capacity_; empty_ = false; } bool try_push(element_t const& value) noexcept { if (head_ == tail_ && !empty_) return false; // `elements_` is full return push(value); return true; } bool try_pop(element_t& value) noexcept { if (empty_) return false; value = std::move(elements_[tail_]); tail_ = (tail_ + 1) % capacity_; empty_ = head_ == tail_; return true; } element_t const& operator[](std::size_t i) const noexcept { return elements_[(tail_ + i) % capacity_]; } }; /// @brief Number of neighbors per graph node. /// Defaults to 32 in FAISS and 16 in hnswlib. /// > It is called `M` in the paper. constexpr std::size_t default_connectivity() { return 16; } /// @brief Hyper-parameter controlling the quality of indexing. /// Defaults to 40 in FAISS and 200 in hnswlib. /// > It is called `efConstruction` in the paper. constexpr std::size_t default_expansion_add() { return 128; } /// @brief Hyper-parameter controlling the quality of search. /// Defaults to 16 in FAISS and 10 in hnswlib. /// > It is called `ef` in the paper. constexpr std::size_t default_expansion_search() { return 64; } constexpr std::size_t default_allocator_entry_bytes() { return 64; } /** * @brief Configuration settings for the index construction. * Includes the main `::connectivity` parameter (`M` in the paper) * and two expansion factors - for construction and search. */ struct index_config_t { /// @brief Number of neighbors per graph node. /// Defaults to 32 in FAISS and 16 in hnswlib. /// > It is called `M` in the paper. std::size_t connectivity = default_connectivity(); /// @brief Number of neighbors per graph node in base level graph. /// Defaults to double of the other levels, so 64 in FAISS and 32 in hnswlib. /// > It is called `M0` in the paper. std::size_t connectivity_base = default_connectivity() * 2; inline index_config_t() = default; inline index_config_t(std::size_t c, std::size_t cb = 0) noexcept : connectivity(c), connectivity_base(cb) {} /** * @brief Validates the configuration settings, updating them in-place. * @return Error message, if any. */ inline error_t validate() noexcept { if (connectivity == 0) connectivity = default_connectivity(); if (connectivity_base == 0) connectivity_base = connectivity * 2; if (connectivity < 2) return "Connectivity must be at least 2, otherwise the index degenerates into ropes"; if (connectivity_base < connectivity) return "Base layer should be at least as connected as the rest of the graph"; return {}; } /** * @brief Immutable function to check if the configuration is valid. * @return `true` if the configuration is valid. */ inline bool is_valid() const noexcept { return connectivity >= 2 && connectivity_base >= connectivity; } }; /** * @brief Growth settings for the index container. * Includes the upper bound for `::members` capacity, * and the number of read/write threads expected to work with the index. */ struct index_limits_t { /// @brief Maximum number of entries in the index. std::size_t members = 0; /// @brief Max number of threads simultaneously updating entries. std::size_t threads_add = std::thread::hardware_concurrency(); /// @brief Max number of threads simultaneously searching entries. std::size_t threads_search = std::thread::hardware_concurrency(); inline index_limits_t(std::size_t n, std::size_t t) noexcept : members(n), threads_add(t), threads_search(t) {} inline index_limits_t(std::size_t n = 0) noexcept : index_limits_t(n, std::thread::hardware_concurrency()) {} /// @brief Returns the upper limit for the number of threads. inline std::size_t threads() const noexcept { return (std::max)(threads_add, threads_search); } /// @brief Returns the concurrency-level of the index - the minimum of thread counts. inline std::size_t concurrency() const noexcept { return (std::min)(threads_add, threads_search); } }; struct index_update_config_t { /// @brief Hyper-parameter controlling the quality of indexing. /// Defaults to 40 in FAISS and 200 in hnswlib. /// > It is called `efConstruction` in the paper. std::size_t expansion = default_expansion_add(); /// @brief Optional thread identifier for multi-threaded construction. std::size_t thread = 0; }; struct index_search_config_t { /// @brief Hyper-parameter controlling the quality of search. /// Defaults to 16 in FAISS and 10 in hnswlib. /// > It is called `ef` in the paper. std::size_t expansion = default_expansion_search(); /// @brief Optional thread identifier for multi-threaded construction. std::size_t thread = 0; /// @brief Brute-forces exhaustive search over all entries in the index. bool exact = false; }; struct index_cluster_config_t { /// @brief Hyper-parameter controlling the quality of search. /// Defaults to 16 in FAISS and 10 in hnswlib. /// > It is called `ef` in the paper. std::size_t expansion = default_expansion_search(); /// @brief Optional thread identifier for multi-threaded construction. std::size_t thread = 0; }; struct index_copy_config_t {}; struct index_join_config_t { /// @brief Controls maximum number of proposals per man during stable marriage. std::size_t max_proposals = 0; /// @brief Hyper-parameter controlling the quality of search. /// Defaults to 16 in FAISS and 10 in hnswlib. /// > It is called `ef` in the paper. std::size_t expansion = default_expansion_search(); /// @brief Brute-forces exhaustive search over all entries in the index. bool exact = false; }; /// @brief C++17 and newer version deprecate the `std::result_of` template using return_type_gt = #if defined(USEARCH_DEFINED_CPP17) typename std::invoke_result::type; #else typename std::result_of::type; #endif /** * @brief An example of what a USearch-compatible ad-hoc filter would look like. * * A similar function object can be passed to search queries to further filter entries * on their auxiliary properties, such as some categorical keys stored in an external DBMS. */ struct dummy_predicate_t { template constexpr bool operator()(member_at&&) const noexcept { return true; } }; /** * @brief An example of what a USearch-compatible ad-hoc operation on in-flight entries. * * This kind of callbacks is used when the engine is being updated and you want to patch * the entries, while their are still under locks - limiting concurrent access and providing * consistency. */ struct dummy_callback_t { template void operator()(member_at&&) const noexcept {} }; /** * @brief An example of what a USearch-compatible progress-bar should look like. * * This is particularly helpful when handling long-running tasks, like serialization, * saving, and loading from disk, or index-level joins. * The reporter checks return value to continue or stop the process, `false` means need to stop. */ struct dummy_progress_t { inline bool operator()(std::size_t /*processed*/, std::size_t /*total*/) const noexcept { return true; } }; /** * @brief An example of what a USearch-compatible values prefetching mechanism should look like. * * USearch is designed to handle very large datasets, that may not fir into RAM. Fetching from * external memory is very expensive, so we've added a pre-fetching mechanism, that accepts * multiple objects at once, to cache in RAM ahead of the computation. * The received iterators support both `get_slot` and `get_key` operations. * An example usage may look like this: * * template * inline void operator()(member_citerator_like_at, member_citerator_like_at) const noexcept { * for (; begin != end; ++begin) * io_uring_prefetch(offset_in_file(get_key(begin))); * } */ struct dummy_prefetch_t { template inline void operator()(member_citerator_like_at, member_citerator_like_at) const noexcept {} }; /** * @brief An example of what a USearch-compatible executor (thread-pool) should look like. * * It's expected to have `parallel(callback)` API to schedule one task per thread; * an identical `fixed(count, callback)` and `dynamic(count, callback)` overloads that also accepts * the number of tasks, and somehow schedules them between threads; as well as `size()` to * determine the number of available threads. */ struct dummy_executor_t { dummy_executor_t() noexcept {} std::size_t size() const noexcept { return 1; } template void fixed(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept { for (std::size_t task_idx = 0; task_idx != tasks; ++task_idx) thread_aware_function(0, task_idx); } template void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept { for (std::size_t task_idx = 0; task_idx != tasks; ++task_idx) if (!thread_aware_function(0, task_idx)) break; } template void parallel(thread_aware_function_at&& thread_aware_function) noexcept { thread_aware_function(0); } }; /** * @brief An example of what a USearch-compatible key-to-key mapping should look like. * * This is particularly helpful for "Semantic Joins", where we map entries of one collection * to entries of another. In asymmetric setups, where A -> B is needed, but B -> A is not, * this can be passed to minimize memory usage. */ struct dummy_key_to_key_mapping_t { struct member_ref_t { template member_ref_t& operator=(key_at&&) noexcept { return *this; } }; template member_ref_t operator[](key_at&&) const noexcept { return {}; } }; /** * @brief Checks if the provided object has a dummy type, emulating an interface, * but performing no real computation. */ template static constexpr bool is_dummy() { using object_t = typename std::remove_all_extents::type; return std::is_same::type, dummy_predicate_t>::value || // std::is_same::type, dummy_callback_t>::value || // std::is_same::type, dummy_progress_t>::value || // std::is_same::type, dummy_prefetch_t>::value || // std::is_same::type, dummy_executor_t>::value || // std::is_same::type, dummy_key_to_key_mapping_t>::value; } template struct has_reset_gt { static_assert(std::integral_constant::value, "Second template parameter needs to be of function type."); }; template struct has_reset_gt { private: template static constexpr auto check(at*) -> typename std::is_same().reset(std::declval()...)), return_at>::type; template static constexpr std::false_type check(...); typedef decltype(check(0)) type; public: static constexpr bool value = type::value; }; /** * @brief Checks if a certain class has a member function called `reset`. */ template constexpr bool has_reset() { return has_reset_gt::value; } struct serialization_result_t { error_t error; explicit operator bool() const noexcept { return !error; } serialization_result_t failed(error_t message) noexcept { error = std::move(message); return std::move(*this); } }; /** * @brief Smart-pointer wrapping the LibC @b `FILE` for binary file @b outputs. * * This class raises no exceptions and corresponds errors through `serialization_result_t`. * The class automatically closes the file when the object is destroyed. */ class output_file_t { char const* path_ = nullptr; std::FILE* file_ = nullptr; public: output_file_t(char const* path) noexcept : path_(path) {} ~output_file_t() noexcept { close(); } output_file_t(output_file_t&& other) noexcept : path_(exchange(other.path_, nullptr)), file_(exchange(other.file_, nullptr)) {} output_file_t& operator=(output_file_t&& other) noexcept { std::swap(path_, other.path_); std::swap(file_, other.file_); return *this; } serialization_result_t open_if_not() noexcept { serialization_result_t result; if (!file_) file_ = std::fopen(path_, "wb"); if (!file_) return result.failed(std::strerror(errno)); return result; } serialization_result_t write(void const* begin, std::size_t length) noexcept { serialization_result_t result; std::size_t written = std::fwrite(begin, length, 1, file_); if (length && !written) return result.failed(std::strerror(errno)); return result; } void close() noexcept { if (file_) std::fclose(exchange(file_, nullptr)); } }; /** * @brief Smart-pointer wrapping the LibC @b `FILE` for binary files @b inputs. * * This class raises no exceptions and corresponds errors through `serialization_result_t`. * The class automatically closes the file when the object is destroyed. */ class input_file_t { char const* path_ = nullptr; std::FILE* file_ = nullptr; public: input_file_t(char const* path) noexcept : path_(path) {} ~input_file_t() noexcept { close(); } input_file_t(input_file_t&& other) noexcept : path_(exchange(other.path_, nullptr)), file_(exchange(other.file_, nullptr)) {} input_file_t& operator=(input_file_t&& other) noexcept { std::swap(path_, other.path_); std::swap(file_, other.file_); return *this; } serialization_result_t open_if_not() noexcept { serialization_result_t result; if (!file_) file_ = std::fopen(path_, "rb"); if (!file_) return result.failed(std::strerror(errno)); return result; } serialization_result_t read(void* begin, std::size_t length) noexcept { serialization_result_t result; std::size_t read = std::fread(begin, length, 1, file_); if (length && !read) { bool reached_eof = std::feof(file_); return result.failed(reached_eof ? "End of file reached!" : std::strerror(errno)); } return result; } void close() noexcept { if (file_) std::fclose(exchange(file_, nullptr)); } explicit operator bool() const noexcept { return file_; } bool seek_to(std::size_t progress) noexcept { return std::fseek(file_, static_cast(progress), SEEK_SET) == 0; } bool seek_to_end() noexcept { return std::fseek(file_, 0L, SEEK_END) == 0; } bool infer_progress(std::size_t& progress) noexcept { long int result = std::ftell(file_); if (result == -1L) return false; progress = static_cast(result); return true; } }; /** * @brief Represents a memory-mapped file or a pre-allocated anonymous memory region. * * This class provides a convenient way to memory-map a file and access its contents as a block of * memory. The class handles platform-specific memory-mapping operations on Windows, Linux, and MacOS. * The class automatically closes the file when the object is destroyed. */ class memory_mapped_file_t { char const* path_{}; /**< The path to the file to be memory-mapped. */ void* ptr_{}; /**< A pointer to the memory-mapping. */ size_t length_{}; /**< The length of the memory-mapped file in bytes. */ #if defined(USEARCH_DEFINED_WINDOWS) HANDLE file_handle_{}; /**< The file handle on Windows. */ HANDLE mapping_handle_{}; /**< The mapping handle on Windows. */ #else int file_descriptor_{}; /**< The file descriptor on Linux and MacOS. */ #endif public: explicit operator bool() const noexcept { return ptr_ != nullptr; } byte_t* data() noexcept { return reinterpret_cast(ptr_); } byte_t const* data() const noexcept { return reinterpret_cast(ptr_); } std::size_t size() const noexcept { return static_cast(length_); } memory_mapped_file_t() noexcept {} memory_mapped_file_t(char const* path) noexcept : path_(path) {} ~memory_mapped_file_t() noexcept { close(); } memory_mapped_file_t(memory_mapped_file_t&& other) noexcept : path_(exchange(other.path_, nullptr)), ptr_(exchange(other.ptr_, nullptr)), length_(exchange(other.length_, 0)), #if defined(USEARCH_DEFINED_WINDOWS) file_handle_(exchange(other.file_handle_, nullptr)), mapping_handle_(exchange(other.mapping_handle_, nullptr)) #else file_descriptor_(exchange(other.file_descriptor_, 0)) #endif { } memory_mapped_file_t(memory_mapped_file_t const&) = delete; memory_mapped_file_t& operator=(memory_mapped_file_t const&) = delete; memory_mapped_file_t(byte_t* data, std::size_t length) noexcept : ptr_(data), length_(length) {} memory_mapped_file_t& operator=(memory_mapped_file_t&& other) noexcept { std::swap(path_, other.path_); std::swap(ptr_, other.ptr_); std::swap(length_, other.length_); #if defined(USEARCH_DEFINED_WINDOWS) std::swap(file_handle_, other.file_handle_); std::swap(mapping_handle_, other.mapping_handle_); #else std::swap(file_descriptor_, other.file_descriptor_); #endif return *this; } serialization_result_t open_if_not() noexcept { serialization_result_t result; if (!path_ || ptr_) return result; #if defined(USEARCH_DEFINED_WINDOWS) HANDLE file_handle = CreateFile(path_, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); if (file_handle == INVALID_HANDLE_VALUE) return result.failed("Opening file failed!"); std::size_t file_length = GetFileSize(file_handle, 0); HANDLE mapping_handle = CreateFileMapping(file_handle, 0, PAGE_READONLY, 0, 0, 0); if (mapping_handle == 0) { CloseHandle(file_handle); return result.failed("Mapping file failed!"); } byte_t* file = (byte_t*)MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, file_length); if (file == 0) { CloseHandle(mapping_handle); CloseHandle(file_handle); return result.failed("View the map failed!"); } file_handle_ = file_handle; mapping_handle_ = mapping_handle; ptr_ = file; length_ = file_length; #else #if defined(USEARCH_DEFINED_LINUX) int descriptor = open(path_, O_RDONLY | O_NOATIME); #else int descriptor = open(path_, O_RDONLY); #endif if (descriptor < 0) return result.failed(std::strerror(errno)); // Estimate the file size struct stat file_stat; int fstat_status = fstat(descriptor, &file_stat); if (fstat_status < 0) { ::close(descriptor); return result.failed(std::strerror(errno)); } // Map the entire file byte_t* file = (byte_t*)mmap(NULL, file_stat.st_size, PROT_READ, MAP_SHARED, descriptor, 0); if (file == MAP_FAILED) { ::close(descriptor); return result.failed(std::strerror(errno)); } file_descriptor_ = descriptor; ptr_ = file; length_ = file_stat.st_size; #endif // Platform specific code return result; } void close() noexcept { if (!path_) { ptr_ = nullptr; length_ = 0; return; } #if defined(USEARCH_DEFINED_WINDOWS) UnmapViewOfFile(ptr_); CloseHandle(mapping_handle_); CloseHandle(file_handle_); mapping_handle_ = nullptr; file_handle_ = nullptr; #else munmap(ptr_, length_); ::close(file_descriptor_); file_descriptor_ = 0; #endif ptr_ = nullptr; length_ = 0; } }; /** * @brief Metadata header for the serialized index. * * This structure is very minimalistic by design. It contains no information * about the capacity of the index, so you'll have to `reserve` after loading. * It also contains no info on the metric or key types, so you'll have to store * that information elsewhere, like we do in `index_dense_head_t`. */ struct index_serialized_header_t { std::uint64_t size = 0; std::uint64_t connectivity = 0; std::uint64_t connectivity_base = 0; std::uint64_t max_level = 0; std::uint64_t entry_slot = 0; }; using default_key_t = std::uint64_t; using default_slot_t = std::uint32_t; using default_distance_t = float; template struct member_gt { key_at key; std::size_t slot; }; template inline std::size_t get_slot(member_gt const& m) noexcept { return m.slot; } template inline key_at get_key(member_gt const& m) noexcept { return m.key; } template struct member_cref_gt { misaligned_ref_gt key; std::size_t slot; }; template inline std::size_t get_slot(member_cref_gt const& m) noexcept { return m.slot; } template inline key_at get_key(member_cref_gt const& m) noexcept { return m.key; } template struct member_ref_gt { misaligned_ref_gt key; std::size_t slot; inline operator member_cref_gt() const noexcept { return {key.ptr(), slot}; } }; template inline std::size_t get_slot(member_ref_gt const& m) noexcept { return m.slot; } template inline key_at get_key(member_ref_gt const& m) noexcept { return m.key; } /** * @brief Approximate Nearest Neighbors Search @b index-structure using the * Hierarchical Navigable Small World @b (HNSW) graphs algorithm. * If classical containers store @b Key->Value mappings, this one can * be seen as a network of keys, accelerating approximate @b Value~>Key visited_members. * * Unlike most implementations, this one is generic and can be used for any search, * not just within equi-dimensional vectors. Examples range from Texts to similar Chess * positions, Geo-Spatial Search, and even Graphs. * * @tparam key_at * The type of primary objects stored in the index. * The values, to which those map, are not managed by the same index structure. * * @tparam compressed_slot_at * The smallest unsigned integer type to address indexed elements. * It is used internally to maximize space-efficiency and is generally * up-casted to @b `std::size_t` in public interfaces. * Can be a built-in @b `uint32_t`, `uint64_t`, or our custom @b `uint40_t`. * Which makes the most sense for 4B+ entry indexes. * * @tparam dynamic_allocator_at * Dynamic memory allocator for temporary buffers, visits indicators, and * priority queues, needed during construction and traversals of graphs. * The allocated buffers may be uninitialized. * * @tparam tape_allocator_at * Potentially different memory allocator for primary allocations of nodes and vectors. * It would never `deallocate` separate entries, and would only free all the space at once. * The allocated buffers may be uninitialized. * * @section Features * * - Thread-safe for concurrent construction, search, and updates. * - Doesn't allocate new threads, and reuses the ones its called from. * - Allows storing value externally, managing just the similarity index. * - Joins. * @section Usage * * @subsection Exceptions * * None of the methods throw exceptions in the "Release" compilation mode. * It may only `throw` if your memory ::dynamic_allocator_at or ::metric_at isn't * safe to copy. * * @subsection Serialization * * When serialized, doesn't include any additional metadata. * It is just the multi-level proximity-graph. You may want to store metadata about * the used metric and key types somewhere else. * * @section Implementation Details * * Like every HNSW implementation, USearch builds levels of "Proximity Graphs". * Every added vector forms a node in one or more levels of the graph. * Every node is present in the base level. Every following level contains a smaller * fraction of nodes. During search, the operation starts with the smaller levels * and zooms-in on every following iteration of larger graph traversals. * * Just one memory allocation is performed regardless of the number of levels. * The adjacency lists across all levels are concatenated into that single buffer. * That buffer starts with a "head", that stores the metadata, such as the * tallest "level" of the graph that it belongs to, the external "key", and the * number of "dimensions" in the vector. * * @section Metrics, Predicates and Callbacks * * * @section Smart References and Iterators * * - `member_citerator_t` and `member_iterator_t` have only slots, no indirections. * * - `member_cref_t` and `member_ref_t` contains the `slot` and a reference * to the key. So it passes through 1 level of visited_members in `nodes_`. * Retrieving the key via `get_key` will cause fetching yet another cache line. * * - `member_gt` contains an already prefetched copy of the key. * */ template , // typename tape_allocator_at = dynamic_allocator_at> // class index_gt { public: using distance_t = distance_at; using vector_key_t = key_at; using key_t = vector_key_t; using compressed_slot_t = compressed_slot_at; using dynamic_allocator_t = dynamic_allocator_at; using tape_allocator_t = tape_allocator_at; static_assert(sizeof(vector_key_t) >= sizeof(compressed_slot_t), "Having tiny keys doesn't make sense."); static_assert(std::is_signed::value, "Distance must be a signed type, as we use the unary minus."); using member_cref_t = member_cref_gt; using member_ref_t = member_ref_gt; template class member_iterator_gt { using ref_t = ref_at; using index_t = index_at; friend class index_gt; member_iterator_gt() noexcept {} member_iterator_gt(index_t* index, compressed_slot_t slot) noexcept : index_(index), slot_(slot) {} template ref_t call_key(std::true_type) const noexcept { return ref_t{index_->node_at_(slot_).ckey(), slot_}; } template ref_t call_key(std::false_type) const noexcept { return ref_t{index_->node_at_(slot_).key(), slot_}; } index_t* index_{}; compressed_slot_t slot_{}; public: using iterator_category = std::random_access_iterator_tag; using value_type = ref_t; using difference_type = std::ptrdiff_t; using pointer = void; using reference = ref_t; reference operator*() const noexcept { return call_key<0>(std::is_const()); } vector_key_t key() const noexcept { return index_->node_at_(slot_).ckey(); } friend inline compressed_slot_t get_slot(member_iterator_gt const& it) noexcept { return it.slot_; } friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); } // clang-format off member_iterator_gt operator++(int) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) + 1)); } member_iterator_gt operator--(int) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) - 1)); } member_iterator_gt operator+(difference_type d) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) + d)); } member_iterator_gt operator-(difference_type d) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) - d)); } member_iterator_gt& operator++() noexcept { slot_ = static_cast(static_cast(slot_) + 1); return *this; } member_iterator_gt& operator--() noexcept { slot_ = static_cast(static_cast(slot_) - 1); return *this; } member_iterator_gt& operator+=(difference_type d) noexcept { slot_ = static_cast(static_cast(slot_) + d); return *this; } member_iterator_gt& operator-=(difference_type d) noexcept { slot_ = static_cast(static_cast(slot_) - d); return *this; } bool operator==(member_iterator_gt const& other) const noexcept { return index_ == other.index_ && slot_ == other.slot_; } bool operator!=(member_iterator_gt const& other) const noexcept { return index_ != other.index_ || slot_ != other.slot_; } // clang-format on }; using member_iterator_t = member_iterator_gt; using member_citerator_t = member_iterator_gt; // STL compatibility: using value_type = vector_key_t; using allocator_type = dynamic_allocator_t; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using reference = member_ref_t; using const_reference = member_cref_t; using pointer = void; using const_pointer = void; using iterator = member_iterator_t; using const_iterator = member_citerator_t; using reverse_iterator = std::reverse_iterator; using reverse_const_iterator = std::reverse_iterator; using dynamic_allocator_traits_t = std::allocator_traits; using byte_t = typename dynamic_allocator_t::value_type; static_assert( // sizeof(byte_t) == 1, // "Primary allocator must allocate separate addressable bytes"); using tape_allocator_traits_t = std::allocator_traits; static_assert( // sizeof(typename tape_allocator_traits_t::value_type) == 1, // "Tape allocator must allocate separate addressable bytes"); private: /** * @brief Integer for the number of node neighbors at a specific level of the * multi-level graph. It's selected to be `std::uint32_t` to improve the * alignment in most common cases. */ using neighbors_count_t = std::uint32_t; using level_t = std::int16_t; /** * @brief How many bytes of memory are needed to form the "head" of the node. */ static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); } using nodes_mutexes_t = bitset_gt; using visits_hash_set_t = growing_hash_set_gt, dynamic_allocator_t>; struct precomputed_constants_t { double inverse_log_connectivity{}; std::size_t neighbors_bytes{}; std::size_t neighbors_base_bytes{}; }; /// @brief A space-efficient internal data-structure used in graph traversal queues. struct candidate_t { distance_t distance; compressed_slot_t slot; inline bool operator<(candidate_t other) const noexcept { return distance < other.distance; } }; using candidates_view_t = span_gt; using candidates_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; using top_candidates_t = sorted_buffer_gt, candidates_allocator_t>; using next_candidates_t = max_heap_gt, candidates_allocator_t>; /** * @brief A loosely-structured handle for every node. One such node is created for every member. * To minimize memory usage and maximize the number of entries per cache-line, it only * stores to pointers. The internal tape starts with a `vector_key_t` @b key, then * a `level_t` for the number of graph @b levels in which this member appears, * then the { `neighbors_count_t`, `compressed_slot_t`, `compressed_slot_t` ... } sequences * for @b each-level. */ class node_t { byte_t* tape_{}; public: explicit node_t(byte_t* tape) noexcept : tape_(tape) {} byte_t* tape() const noexcept { return tape_; } byte_t* neighbors_tape() const noexcept { return tape_ + node_head_bytes_(); } explicit operator bool() const noexcept { return tape_; } node_t() = default; node_t(node_t const&) = default; node_t& operator=(node_t const&) = default; misaligned_ref_gt ckey() const noexcept { return {tape_}; } misaligned_ref_gt ckey() noexcept { return {tape_}; } misaligned_ref_gt key() const noexcept { return {tape_}; } misaligned_ref_gt key() noexcept { return {tape_}; } misaligned_ref_gt level() noexcept { return {tape_ + sizeof(vector_key_t)}; } void key(vector_key_t v) noexcept { return misaligned_store(tape_, v); } void level(level_t v) noexcept { return misaligned_store(tape_ + sizeof(vector_key_t), v); } }; static_assert(std::is_trivially_copy_constructible::value, "Nodes must be light!"); static_assert(std::is_trivially_destructible::value, "Nodes must be light!"); /** * @brief A slice of the node's tape, containing a the list of neighbors * for a node in a single graph level. It's pre-allocated to fit * as many neighbors "slots", as may be needed at the target level, * and starts with a single integer `neighbors_count_t` counter. */ class neighbors_ref_t { byte_t* tape_; static constexpr std::size_t shift(std::size_t i = 0) noexcept { return sizeof(neighbors_count_t) + sizeof(compressed_slot_t) * i; } public: using iterator = misaligned_ptr_gt; using const_iterator = misaligned_ptr_gt; using value_type = compressed_slot_t; neighbors_ref_t(byte_t* tape) noexcept : tape_(tape) {} misaligned_ptr_gt begin() noexcept { return tape_ + shift(); } misaligned_ptr_gt end() noexcept { return begin() + size(); } misaligned_ptr_gt begin() const noexcept { return tape_ + shift(); } misaligned_ptr_gt end() const noexcept { return begin() + size(); } misaligned_ptr_gt cbegin() noexcept { return tape_ + shift(); } misaligned_ptr_gt cend() noexcept { return cbegin() + size(); } compressed_slot_t operator[](std::size_t i) const noexcept { return misaligned_load(tape_ + shift(i)); } std::size_t size() const noexcept { return misaligned_load(tape_); } void clear() noexcept { neighbors_count_t n = misaligned_load(tape_); std::memset(tape_, 0, shift(n)); misaligned_store(tape_, 0); } void push_back(compressed_slot_t slot) noexcept { neighbors_count_t n = misaligned_load(tape_); misaligned_store(tape_ + shift(n), slot); misaligned_store(tape_, n + 1); } template std::size_t erase_if(allow_slot_at&& allow_slot) noexcept { std::size_t old_count = misaligned_load(tape_); std::size_t removed_count = 0; for (std::size_t i = 0; i < old_count; ++i) { compressed_slot_t slot = misaligned_load(tape_ + shift(i)); if (allow_slot(slot)) { removed_count++; } else { misaligned_store(tape_ + shift(i - removed_count), slot); } } misaligned_store(tape_, old_count - removed_count); return removed_count; } }; /** * @brief A package of all kinds of temporary data-structures, that the threads * would reuse to process requests. Similar to having all of those as * separate `thread_local` global variables. */ struct usearch_align_m context_t { top_candidates_t top_candidates{}; top_candidates_t top_for_refine{}; next_candidates_t next_candidates{}; visits_hash_set_t visits{}; std::default_random_engine level_generator{}; std::size_t iteration_cycles{}; std::size_t computed_distances{}; std::size_t computed_distances_in_refines{}; std::size_t computed_distances_in_reverse_refines{}; /// @brief Heterogeneous distance calculation. template // inline distance_t measure(value_at const& first, entry_at const& second, metric_at&& metric) noexcept { static_assert( // std::is_same::value || std::is_same::value, "Unexpected type"); computed_distances++; return metric(first, second); } /// @brief Homogeneous distance calculation. template // inline distance_t measure(entry_at const& first, entry_at const& second, metric_at&& metric) noexcept { static_assert( // std::is_same::value || std::is_same::value, "Unexpected type"); computed_distances++; return metric(first, second); } /// @brief Heterogeneous batch distance calculation. template // inline void measure_batch(value_at const& first, entries_at const& second_entries, metric_at&& metric, candidate_allowed_at&& candidate_allowed, transform_at&& transform, callback_at&& callback) noexcept { using entry_t = typename std::remove_reference::type; metric.batch(first, second_entries, candidate_allowed, transform, [&](entry_t const& entry, distance_t distance) { callback(entry, distance); computed_distances++; }); } }; /// @brief Number of "slots" available for `node_t` objects. Equals to @b `limits_.members`. mutable std::atomic nodes_capacity_{}; /// @brief Number of "slots" already storing non-null nodes. mutable std::atomic nodes_count_{}; index_config_t config_{}; index_limits_t limits_{}; mutable dynamic_allocator_t dynamic_allocator_{}; tape_allocator_t tape_allocator_{}; precomputed_constants_t pre_{}; memory_mapped_file_t viewed_file_{}; /// @brief Controls access to `max_level_` and `entry_slot_`. /// If any thread is updating those values, no other threads can `add()` or `search()`. std::mutex global_mutex_{}; /// @brief The level of the top-most graph in the index. Grows as the logarithm of size, starts from zero. level_t max_level_{}; /// @brief The slot in which the only node of the top-level graph is stored. std::size_t entry_slot_{}; using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; /// @brief C-style array of `node_t` smart-pointers. Use `compressed_slot_t` for indexing. buffer_gt nodes_{}; /// @brief Mutex, that limits concurrent access to `nodes_`. mutable nodes_mutexes_t nodes_mutexes_{}; using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; /// @brief Array of thread-specific buffers for temporary data. mutable buffer_gt contexts_{}; public: std::size_t connectivity() const noexcept { return config_.connectivity; } std::size_t capacity() const noexcept { return nodes_capacity_; } std::size_t size() const noexcept { return nodes_count_; } std::size_t max_level() const noexcept { return nodes_count_ ? static_cast(max_level_) : 0; } index_config_t const& config() const noexcept { return config_; } index_limits_t const& limits() const noexcept { return limits_; } bool is_immutable() const noexcept { return bool(viewed_file_); } explicit operator bool() const noexcept { return config_.is_valid(); } /** * @brief Default index constructor, suitable only for stateless allocators. * @warning Consider `index_gt::make` instead, or explicitly convert to `bool` to check if the index is valid. * @section Exceptions * Doesn't throw, unless the ::dynamic_allocator's and ::tape_allocator's throw on move-construction. */ explicit index_gt( // dynamic_allocator_t dynamic_allocator = {}, tape_allocator_t tape_allocator = {}) noexcept(false) : nodes_capacity_(0u), nodes_count_(0u), config_(), limits_(0, 0), dynamic_allocator_(std::move(dynamic_allocator)), tape_allocator_(std::move(tape_allocator)), pre_(precompute_({})), max_level_(-1), entry_slot_(0u), nodes_(), nodes_mutexes_(), contexts_() {} /** * @brief Default index constructor, suitable only for stateless allocators. * @warning Consider `index_gt::make` instead, or explicitly convert to `bool` to check if the index is valid. * @section Exceptions * Doesn't throw, unless the ::dynamic_allocator's and ::tape_allocator's throw on move-construction. */ explicit index_gt(index_config_t config, dynamic_allocator_t dynamic_allocator = {}, tape_allocator_t tape_allocator = {}) noexcept(false) : index_gt(dynamic_allocator, tape_allocator) { config.validate(); config_ = config; pre_ = precompute_(config); } /** * @brief Clones the structure with the same hyper-parameters, but without contents. */ index_gt fork() noexcept { return index_gt{config_, dynamic_allocator_, tape_allocator_}; } ~index_gt() noexcept { reset(); } index_gt(index_gt&& other) noexcept { swap(other); } index_gt& operator=(index_gt&& other) noexcept { swap(other); return *this; } struct state_result_t { index_gt index; error_t error; explicit operator bool() const noexcept { return !error; } state_result_t failed(error_t message) noexcept { return {std::move(index), std::move(message)}; } operator index_gt&&() && { if (error) usearch_raise_runtime_error(error.what()); return std::move(index); } }; using copy_result_t = state_result_t; /** * @brief The recommended way to initialize the index, as unlike the constructor, * it can fail with an error message, without raising an exception. * * @param[in] config The configuration specs of the index. * @param[in] dynamic_allocator The allocator for temporary buffers and thread contexts, like priority queues. * @param[in] tape_allocator The allocator for the primary allocations of nodes and vectors. */ static state_result_t make( // index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}, tape_allocator_t tape_allocator = {}) noexcept { state_result_t result; result.error = config.validate(); if (result.error) return result; index_gt index; index.config_ = std::move(config); index.dynamic_allocator_ = std::move(dynamic_allocator); index.tape_allocator_ = std::move(tape_allocator); index.pre_ = precompute_(index.config_); index.nodes_count_ = 0u; index.max_level_ = -1; index.entry_slot_ = 0u; result.index = std::move(index); return result; } /** * @brief The recommended way to copy the index, as unlike the copy-constructor, * it can fail with an error message, without raising an exception. * * @param[in] config The configuration specs for the copy-operation. Currently unused. */ copy_result_t copy(index_copy_config_t config = {}) const noexcept { copy_result_t result; index_gt& other = result.index; other = index_gt(config_, dynamic_allocator_, tape_allocator_); if (!other.reserve(limits_)) return result.failed("Failed to reserve the contexts"); // Now all is left - is to allocate new `node_t` instances and populate // the `other.nodes_` array into it. for (std::size_t i = 0; i != nodes_count_; ++i) other.nodes_[i] = other.node_make_copy_(node_bytes_(nodes_[i])); other.nodes_count_ = nodes_count_.load(); other.max_level_ = max_level_; other.entry_slot_ = entry_slot_; // This controls nothing for now :) (void)config; return result; } member_citerator_t cbegin() const noexcept { return {this, static_cast(0u)}; } member_citerator_t cend() const noexcept { return {this, static_cast(size())}; } member_citerator_t begin() const noexcept { return {this, static_cast(0u)}; } member_citerator_t end() const noexcept { return {this, static_cast(size())}; } member_iterator_t begin() noexcept { return {this, static_cast(0u)}; } member_iterator_t end() noexcept { return {this, static_cast(size())}; } member_ref_t at(compressed_slot_t slot) noexcept { return {nodes_[slot].key(), slot}; } member_cref_t at(compressed_slot_t slot) const noexcept { return {nodes_[slot].ckey(), slot}; } member_iterator_t iterator_at(compressed_slot_t slot) noexcept { return {this, slot}; } member_citerator_t citerator_at(compressed_slot_t slot) const noexcept { return {this, slot}; } dynamic_allocator_t const& dynamic_allocator() const noexcept { return dynamic_allocator_; } tape_allocator_t const& tape_allocator() const noexcept { return tape_allocator_; } #if defined(USEARCH_USE_PRAGMA_REGION) #pragma region Adjusting Configuration #endif /** * @brief Erases all the vectors from the index. * * Will change `size()` to zero, but will keep the same `capacity()`. * Will keep the number of available threads/contexts the same as it was. */ void clear() noexcept { if (!has_reset()) { std::size_t n = nodes_count_; for (std::size_t i = 0; i != n; ++i) node_free_(i); } else tape_allocator_.deallocate(nullptr, 0); nodes_count_ = 0; max_level_ = -1; entry_slot_ = 0u; } /** * @brief Erases all members from index, closing files, and returning RAM to OS. * * Will change both `size()` and `capacity()` to zero. * Will deallocate all threads/contexts. * If the index is memory-mapped - releases the mapping and the descriptor. */ void reset() noexcept { clear(); nodes_ = {}; contexts_ = {}; nodes_mutexes_ = {}; limits_ = index_limits_t{0, 0}; nodes_capacity_ = 0; viewed_file_ = memory_mapped_file_t{}; tape_allocator_ = {}; } /** * @brief Swaps the underlying memory buffers and thread contexts. */ void swap(index_gt& other) noexcept { std::swap(config_, other.config_); std::swap(limits_, other.limits_); std::swap(dynamic_allocator_, other.dynamic_allocator_); std::swap(tape_allocator_, other.tape_allocator_); std::swap(pre_, other.pre_); std::swap(viewed_file_, other.viewed_file_); std::swap(max_level_, other.max_level_); std::swap(entry_slot_, other.entry_slot_); std::swap(nodes_, other.nodes_); std::swap(nodes_mutexes_, other.nodes_mutexes_); std::swap(contexts_, other.contexts_); // Non-atomic parts. std::size_t capacity_copy = nodes_capacity_; std::size_t count_copy = nodes_count_; nodes_capacity_ = other.nodes_capacity_.load(); nodes_count_ = other.nodes_count_.load(); other.nodes_capacity_ = capacity_copy; other.nodes_count_ = count_copy; } /** * @brief Increases the `capacity()` of the index to allow adding more vectors. * @return `true` on success, `false` on memory allocation errors. */ bool try_reserve(index_limits_t limits) usearch_noexcept_m { if (limits.threads_add <= limits_.threads_add // && limits.threads_search <= limits_.threads_search // && limits.members <= limits_.members) return true; // In some cases, we don't want to update the number of members, // just want to make sure that future reserves use the new thread limits. if (!limits.members && !size()) { limits_ = limits; return true; } nodes_mutexes_t new_mutexes(limits.members); buffer_gt new_nodes(limits.members); buffer_gt new_contexts(limits.threads()); if (!new_nodes || !new_contexts || !new_mutexes) return false; // Move the nodes info, and deallocate previous buffers. if (nodes_) std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size()); // Pre-reserve the capacity for `top_for_refine`, which always contains at most one more // element than the connectivity factors. std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity); for (std::size_t i = 0; i != new_contexts.size(); ++i) if (!new_contexts[i].top_for_refine.reserve(connectivity_max + 1)) return false; limits_ = limits; nodes_capacity_ = limits.members; nodes_ = std::move(new_nodes); contexts_ = std::move(new_contexts); nodes_mutexes_ = std::move(new_mutexes); return true; } /** * @brief Increases the `capacity()` of the index to allow adding more vectors. * @warning Unlike STL, won't throw exceptions on memory allocations, so check the return value. * @return `true` on success, `false` on memory allocation errors. */ bool reserve(index_limits_t limits) usearch_noexcept_m { return try_reserve(limits); } #if defined(USEARCH_USE_PRAGMA_REGION) #pragma endregion #pragma region Construction and Search #endif struct add_result_t { error_t error{}; std::size_t new_size{}; std::size_t visited_members{}; std::size_t computed_distances{}; std::size_t computed_distances_in_refines{}; std::size_t computed_distances_in_reverse_refines{}; compressed_slot_t slot{}; explicit operator bool() const noexcept { return !error; } add_result_t failed(error_t message) noexcept { error = std::move(message); return std::move(*this); } }; /// @brief Describes a matched search result, augmenting `member_cref_t` /// contents with `distance` to the query object. struct match_t { member_cref_t member; distance_t distance; inline match_t() noexcept : member({nullptr, 0}), distance(std::numeric_limits::max()) {} inline match_t(member_cref_t member, distance_t distance) noexcept : member(member), distance(distance) {} inline match_t(match_t&& other) noexcept : member({other.member.key.ptr(), other.member.slot}), distance(other.distance) {} inline match_t(match_t const& other) noexcept : member({other.member.key.ptr(), other.member.slot}), distance(other.distance) {} inline match_t& operator=(match_t const& other) noexcept { member.key.reset(other.member.key.ptr()); member.slot = other.member.slot; distance = other.distance; return *this; } inline match_t& operator=(match_t&& other) noexcept { member.key.reset(other.member.key.ptr()); member.slot = other.member.slot; distance = other.distance; return *this; } }; class search_result_t { node_t const* nodes_{}; top_candidates_t const* top_{}; friend class index_gt; inline search_result_t(index_gt const& index, top_candidates_t const* top) noexcept : nodes_(index.nodes_), top_(top) {} public: /** @brief Number of search results found. */ std::size_t count{}; /** @brief Number of graph nodes traversed. */ std::size_t visited_members{}; /** @brief Number of times the distances were computed. */ std::size_t computed_distances{}; error_t error{}; inline search_result_t() noexcept {} inline search_result_t(search_result_t&&) = default; inline search_result_t& operator=(search_result_t&&) = default; explicit operator bool() const noexcept { return !error; } search_result_t failed(error_t message) noexcept { error = std::move(message); return std::move(*this); } inline operator std::size_t() const noexcept { return count; } inline std::size_t size() const noexcept { return count; } inline bool empty() const noexcept { return !count; } inline match_t operator[](std::size_t i) const noexcept { return at(i); } inline match_t front() const noexcept { return at(0); } inline match_t back() const noexcept { usearch_assert_m(count > 0, "Can't call back() on an empty result set"); return at(count - 1); } inline bool contains(vector_key_t key) const noexcept { for (std::size_t i = 0; i != count; ++i) if (at(i).member.key == key) return true; return false; } inline match_t at(std::size_t i) const noexcept { candidate_t const* top_ordered = top_->data(); candidate_t candidate = top_ordered[i]; node_t node = nodes_[candidate.slot]; return {member_cref_t{node.ckey(), candidate.slot}, candidate.distance}; } /** * @brief Extracts the search results into a user-provided buffer, that unlike `dump_to`, * may already contain some data, so the new and old results are merged together. * @return The number of results stored in the buffer. * @param[in] keys The buffer to store the keys of the search results. * @param[in] distances The buffer to store the distances to the search results. * @param[in] old_count The number of results already stored in the buffers. * @param[in] max_count The maximum number of results that can be stored in the buffers. */ inline std::size_t merge_into( // vector_key_t* keys, distance_t* distances, // std::size_t old_count, std::size_t max_count) const noexcept { std::size_t merged_count = old_count; for (std::size_t i = 0; i != count; ++i) { match_t result = operator[](i); distance_t* merged_end = distances + merged_count; std::size_t offset = std::lower_bound(distances, merged_end, result.distance) - distances; if (offset == max_count) continue; std::size_t count_worse = merged_count - offset - (max_count == merged_count); std::memmove(keys + offset + 1, keys + offset, count_worse * sizeof(vector_key_t)); std::memmove(distances + offset + 1, distances + offset, count_worse * sizeof(distance_t)); keys[offset] = result.member.key; distances[offset] = result.distance; merged_count += merged_count != max_count; } return merged_count; } /** * @brief Extracts the search results into a user-provided buffer. * @return The number of results stored in the buffer. * @param[in] keys The buffer to store the keys of the search results. * @param[in] distances The buffer to store the distances to the search results. */ inline std::size_t dump_to(vector_key_t* keys, distance_t* distances) const noexcept { for (std::size_t i = 0; i != count; ++i) { match_t result = operator[](i); keys[i] = result.member.key; distances[i] = result.distance; } return count; } /** * @brief Extracts the search results into a user-provided buffer. * @return The number of results stored in the buffer. * @param[in] keys The buffer to store the keys of the search results. */ inline std::size_t dump_to(vector_key_t* keys) const noexcept { for (std::size_t i = 0; i != count; ++i) { match_t result = operator[](i); keys[i] = result.member.key; } return count; } /** * @brief Extracts the search results into a user-provided buffer. * @return The number of results stored in the buffer. * @param[in] keys The buffer to store the keys of the search results. * @param[in] distances The buffer to store the distances to the search results. * @param[in] capacity The maximum number of results that can be stored in the buffers. */ inline std::size_t dump_to(vector_key_t* keys, distance_t* distances, std::size_t capacity) const noexcept { std::size_t i = 0; std::size_t initialized_count = (std::min)(count, capacity); for (; i != initialized_count; ++i) { match_t result = operator[](i); keys[i] = result.member.key; distances[i] = result.distance; } for (; i != capacity; ++i) { keys[i] = vector_key_t{}; distances[i] = std::numeric_limits::has_signaling_NaN ? std::numeric_limits::signaling_NaN() : std::numeric_limits::max(); } return initialized_count; } /** * @brief Extracts the search results into a user-provided buffer. * @return The number of results stored in the buffer. * @param[in] keys The buffer to store the keys of the search results. * @param[in] capacity The maximum number of results that can be stored in the buffers. */ inline std::size_t dump_to(vector_key_t* keys, std::size_t capacity) const noexcept { std::size_t i = 0; std::size_t initialized_count = (std::min)(this->count, capacity); for (; i != initialized_count; ++i) { match_t result = operator[](i); keys[i] = result.member.key; } for (; i != capacity; ++i) keys[i] = vector_key_t{}; return initialized_count; } }; struct cluster_result_t { error_t error{}; std::size_t visited_members{}; std::size_t computed_distances{}; match_t cluster{}; explicit operator bool() const noexcept { return !error; } cluster_result_t failed(error_t message) noexcept { error = std::move(message); return std::move(*this); } }; /** * @brief Inserts a new entry into the index. Thread-safe. Supports @b heterogeneous lookups. * Expects needed capacity to be reserved ahead of time: `size() < capacity()`. * * @tparam metric_at * A function responsible for computing the distance @b (dis-similarity) between two objects. * It should be callable into distinctly different scenarios: * - `distance_t operator() (value_at, entry_at)` - from new object to existing entries. * - `distance_t operator() (entry_at, entry_at)` - between existing entries. * Where any possible `entry_at` has both two interfaces: `std::size_t slot()`, `vector_key_t key()`. * * @param[in] key External identifier/name/descriptor for the new entry. * @param[in] value Content that will be compared against other entries to index. * @param[in] metric Callable object measuring distance between ::value and present objects. * @param[in] config Configuration options for this specific operation. * @param[in] callback On-success callback, executed while the `member_ref_t` is still under lock. */ template < // typename value_at, // typename metric_at, // typename callback_at = dummy_callback_t, // typename prefetch_at = dummy_prefetch_t // > add_result_t add( // vector_key_t key, value_at&& value, metric_at&& metric, // index_update_config_t config = {}, // callback_at&& callback = callback_at{}, // prefetch_at&& prefetch = prefetch_at{}) usearch_noexcept_m { add_result_t result; if (is_immutable()) return result.failed("Can't add to an immutable index"); // Make sure we have enough local memory to perform this request context_t& context = contexts_[config.thread]; top_candidates_t& top = context.top_candidates; next_candidates_t& next = context.next_candidates; top.clear(); next.clear(); // The top list needs one more slot than the connectivity of the base level // for the heuristic, that tries to squeeze one more element into saturated list. std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity); std::size_t top_limit = (std::max)(connectivity_max + 1, config.expansion); if (!top.reserve(top_limit)) return result.failed("Out of memory!"); if (!next.reserve(config.expansion)) return result.failed("Out of memory!"); // Determining how much memory to allocate for the node depends on the target level std::unique_lock new_level_lock(global_mutex_); level_t max_level_copy = max_level_; // Copy under lock compressed_slot_t entry_slot_copy = static_cast(entry_slot_); // Copy under lock level_t new_target_level = choose_random_level_(context.level_generator); // Make sure we are not overflowing std::size_t capacity = nodes_capacity_.load(); std::size_t old_size = nodes_count_.fetch_add(1); if (old_size >= capacity) { nodes_count_.fetch_sub(1); return result.failed("Reserve capacity ahead of insertions!"); } // Allocate the neighbors node_t new_node = node_make_(key, new_target_level); if (!new_node) { nodes_count_.fetch_sub(1); return result.failed("Out of memory!"); } if (new_target_level <= max_level_copy) new_level_lock.unlock(); nodes_[old_size] = new_node; result.new_size = old_size + 1; compressed_slot_t new_slot = result.slot = static_cast(old_size); callback(at(result.slot)); // Do nothing for the first element if (!old_size) { entry_slot_ = result.slot; max_level_ = new_target_level; return result; } // Pull stats result.computed_distances = context.computed_distances; result.computed_distances_in_refines = context.computed_distances_in_refines; result.computed_distances_in_reverse_refines = context.computed_distances_in_reverse_refines; result.visited_members = context.iteration_cycles; // Go down the level, tracking only the closest match compressed_slot_t closest_slot = search_for_one_( // value, metric, prefetch, // entry_slot_copy, max_level_copy, new_target_level, context); // From `new_target_level` down - perform proper extensive search for (level_t level = (std::min)(new_target_level, max_level_copy); level >= 0; --level) { // TODO: Handle out of memory conditions search_to_insert_(value, metric, prefetch, closest_slot, level, config.expansion, context); candidates_view_t closest_view; { node_lock_t new_lock = node_lock_(new_slot); neighbors_(new_node, level).clear(); closest_view = form_links_to_closest_(metric, new_slot, level, context); closest_slot = closest_view[0].slot; } form_reverse_links_(metric, new_slot, closest_view, value, level, context); } // Normalize stats result.computed_distances = context.computed_distances - result.computed_distances; result.computed_distances_in_refines = context.computed_distances_in_refines - result.computed_distances_in_refines; result.computed_distances_in_reverse_refines = context.computed_distances_in_reverse_refines - result.computed_distances_in_reverse_refines; result.visited_members = context.iteration_cycles - result.visited_members; // Updating the entry point if needed if (new_target_level > max_level_copy) { entry_slot_ = new_slot; max_level_ = new_target_level; } return result; } /** * @brief Update an existing entry. Thread-safe. Supports @b heterogeneous lookups. * * ! It's assumed that different threads aren't updating the same entry at the same time. * ! The state won't be corrupted, but no transactional guarantees are provided and the * ! resulting value & neighbors list may be inconsistent. * * @tparam metric_at * A function responsible for computing the distance @b (dis-similarity) between two objects. * It should be callable into distinctly different scenarios: * - `distance_t operator() (value_at, entry_at)` - from new object to existing entries. * - `distance_t operator() (entry_at, entry_at)` - between existing entries. * For any possible `entry_at` following interfaces will work: * - `std::size_t get_slot(entry_at const &)` * - `vector_key_t get_key(entry_at const &)` * * @param[in] iterator Iterator pointing to an existing entry to be replaced. * @param[in] key External identifier/name/descriptor for the entry. * @param[in] value Content that will be compared against other entries in the index. * @param[in] metric Callable object measuring distance between ::value and present objects. * @param[in] config Configuration options for this specific operation. * @param[in] callback On-success callback, executed while the `member_ref_t` is still under lock. */ template < // typename value_at, // typename metric_at, // typename callback_at = dummy_callback_t, // typename prefetch_at = dummy_prefetch_t // > add_result_t update( // member_iterator_t iterator, // vector_key_t key, // value_at&& value, // metric_at&& metric, // index_update_config_t config = {}, // callback_at&& callback = callback_at{}, // prefetch_at&& prefetch = prefetch_at{}) usearch_noexcept_m { // Someone is gonna fuzz this, so let's make sure we cover the basics if (!config.expansion) config.expansion = default_expansion_add(); usearch_assert_m(!is_immutable(), "Can't add to an immutable index"); add_result_t result; compressed_slot_t updated_slot = iterator.slot_; // Make sure we have enough local memory to perform this request context_t& context = contexts_[config.thread]; top_candidates_t& top = context.top_candidates; next_candidates_t& next = context.next_candidates; top.clear(); next.clear(); // The top list needs one more slot than the connectivity of the base level // for the heuristic, that tries to squeeze one more element into saturated list. std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity); std::size_t top_limit = (std::max)(connectivity_max + 1, config.expansion); if (!top.reserve(top_limit)) return result.failed("Out of memory!"); if (!next.reserve(config.expansion)) return result.failed("Out of memory!"); node_t updated_node = node_at_(updated_slot); level_t updated_node_level = updated_node.level(); // Copy entry coordinates under locks level_t max_level_copy; compressed_slot_t entry_slot_copy; { std::unique_lock new_level_lock(global_mutex_); max_level_copy = max_level_; // Copy under lock entry_slot_copy = static_cast(entry_slot_); // Copy under lock } // Pull stats result.computed_distances = context.computed_distances; result.visited_members = context.iteration_cycles; // Go down the level, tracking only the closest match; // It may even be equal to the `updated_slot` compressed_slot_t closest_slot = // If we are updating the entry node itself, it won't contain any neighbors, // so we should traverse a level down to find the closest match. updated_node_level == max_level_copy // ? entry_slot_copy : search_for_one_( // value, metric, prefetch, // entry_slot_copy, max_level_copy, updated_node_level, context); // From `updated_node_level` down - perform proper extensive search for (level_t level = (std::min)(updated_node_level, max_level_copy); level >= 0; --level) { if (!search_to_update_(value, metric, prefetch, closest_slot, updated_slot, level, config.expansion, context)) return result.failed("Out of memory!"); candidates_view_t closest_view; { node_lock_t updated_lock = node_lock_(updated_slot); // TODO: Go through existing neighbors removing reverse links // for (compressed_slot_t slot : neighbors_(updated_node, level)) // remove_link_(slot, updated_slot, level); neighbors_(updated_node, level).clear(); closest_view = form_links_to_closest_(metric, updated_slot, level, context); if (closest_view.size()) closest_slot = closest_view[0].slot; } form_reverse_links_(metric, updated_slot, closest_view, value, level, context); } if (static_cast(updated_node.key()) != key) updated_node.key(key); // Normalize stats result.computed_distances = context.computed_distances - result.computed_distances; result.visited_members = context.iteration_cycles - result.visited_members; result.slot = updated_slot; callback(at(updated_slot)); return result; } /** * @brief Searches for the closest elements to the given ::query. Thread-safe. * * @param[in] query Content that will be compared against other entries in the index. * @param[in] wanted The upper bound for the number of results to return. * @param[in] config Configuration options for this specific operation. * @param[in] predicate Optional filtering predicate for `member_cref_t`. * @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`. */ template < // typename value_at, // typename metric_at, // typename predicate_at = dummy_predicate_t, // typename prefetch_at = dummy_prefetch_t // > search_result_t search( // value_at&& query, // std::size_t wanted, // metric_at&& metric, // index_search_config_t config = {}, // predicate_at&& predicate = predicate_at{}, // prefetch_at&& prefetch = prefetch_at{}) const usearch_noexcept_m { // Someone is gonna fuzz this, so let's make sure we cover the basics if (!wanted) return search_result_t{}; // Expansion factor set to zero is equivalent to the default value if (!config.expansion) config.expansion = default_expansion_search(); // Using references is cleaner, but would result in UBSan false positives context_t* context_ptr = contexts_.data() ? contexts_.data() + config.thread : nullptr; top_candidates_t* top_ptr = context_ptr ? &context_ptr->top_candidates : nullptr; search_result_t result{*this, top_ptr}; if (!nodes_count_.load(std::memory_order_relaxed)) return result; usearch_assert_m(contexts_.size() > config.thread, "Thread index out of bounds"); context_t& context = *context_ptr; top_candidates_t& top = *top_ptr; // Go down the level, tracking only the closest match result.computed_distances = context.computed_distances; result.visited_members = context.iteration_cycles; if (config.exact) { if (!top.reserve(wanted)) return result.failed("Out of memory!"); search_exact_(query, metric, predicate, wanted, context); } else { next_candidates_t& next = context.next_candidates; std::size_t expansion = (std::max)(config.expansion, wanted); usearch_assert_m(expansion > 0, "Expansion factor can't be a zero!"); if (!next.reserve(expansion)) return result.failed("Out of memory!"); if (!top.reserve(expansion)) return result.failed("Out of memory!"); compressed_slot_t closest_slot = search_for_one_( query, metric, prefetch, static_cast(entry_slot_), max_level_, 0, context); // For bottom layer we need a more optimized procedure if (!search_to_find_in_base_(query, metric, predicate, prefetch, closest_slot, expansion, context)) return result.failed("Out of memory!"); } top.sort_ascending(); top.shrink(wanted); // Normalize stats result.computed_distances = context.computed_distances - result.computed_distances; result.visited_members = context.iteration_cycles - result.visited_members; result.count = top.size(); return result; } /** * @brief Identifies the closest cluster to the given ::query. Thread-safe. * * @param[in] query Content that will be compared against other entries in the index. * @param[in] level The index level to target. Higher means lower resolution. * @param[in] config Configuration options for this specific operation. * @param[in] predicate Optional filtering predicate for `member_cref_t`. * @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`. */ template < // typename value_at, // typename metric_at, // typename predicate_at = dummy_predicate_t, // typename prefetch_at = dummy_prefetch_t // > cluster_result_t cluster( // value_at&& query, // std::size_t level, // metric_at&& metric, // index_cluster_config_t config = {}, // predicate_at&& predicate = predicate_at{}, // prefetch_at&& prefetch = prefetch_at{}) const noexcept { context_t& context = contexts_[config.thread]; cluster_result_t result; if (!nodes_count_) return result.failed("No clusters to identify"); // Go down the level, tracking only the closest match result.computed_distances = context.computed_distances; result.visited_members = context.iteration_cycles; next_candidates_t& next = context.next_candidates; std::size_t expansion = config.expansion; if (!next.reserve(expansion)) return result.failed("Out of memory!"); result.cluster.member = at(search_for_one_(query, metric, prefetch, static_cast(entry_slot_), max_level_, static_cast(level <= 0 ? 0 : level - 1), context)); result.cluster.distance = context.measure(query, result.cluster.member, metric); // Normalize stats result.computed_distances = context.computed_distances - result.computed_distances; result.visited_members = context.iteration_cycles - result.visited_members; (void)predicate; return result; } #if defined(USEARCH_USE_PRAGMA_REGION) #pragma endregion #pragma region Metadata #endif struct stats_t { std::size_t nodes{}; std::size_t edges{}; std::size_t max_edges{}; std::size_t allocated_bytes{}; }; /** * @brief Aggregates stats on the number of nodes, edges, and memory usage across all levels. */ stats_t stats() const noexcept { stats_t result{}; for (std::size_t i = 0; i != size(); ++i) { node_t node = node_at_(i); std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base; std::size_t edges = 0; for (level_t level = 0; level <= node.level(); ++level) edges += neighbors_(node, level).size(); ++result.nodes; result.allocated_bytes += node_bytes_(node).size(); result.edges += edges; result.max_edges += max_edges; } return result; } /** * @brief Aggregates stats on the number of nodes, edges, and memory usage up to a specific level. * * The `level` parameter is zero-based, where `0` is the base level. * For example, `level=1` will include the base level and the first level of connections. */ stats_t stats(std::size_t level) const noexcept { stats_t result{}; std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes; std::size_t max_edges_per_node = !level ? config_.connectivity_base : config_.connectivity; for (std::size_t i = 0; i != size(); ++i) { node_t node = node_at_(i); if (static_cast(node.level()) < level) continue; ++result.nodes; result.edges += neighbors_(node, level).size(); result.allocated_bytes += node_head_bytes_() + neighbors_bytes; } result.max_edges = result.nodes * max_edges_per_node; return result; } /** * @brief Aggregates stats on the number of nodes, edges, and memory usage up to a specific level, * simultaneously exporting the stats for each level into the `stats_per_level` C-style array. * * The `max_level` parameter is zero-based, where `0` is the base level. * For example, `max_level=1` will include the base level and the first level of connections. */ stats_t stats(stats_t* stats_per_level, std::size_t max_level) const noexcept { std::size_t head_bytes = node_head_bytes_(); for (std::size_t i = 0; i != size(); ++i) { node_t node = node_at_(i); stats_per_level[0].nodes++; stats_per_level[0].edges += neighbors_(node, 0).size(); stats_per_level[0].allocated_bytes += pre_.neighbors_base_bytes + head_bytes; level_t node_level = static_cast(node.level()); for (level_t l = 1; l <= (std::min)(node_level, static_cast(max_level)); ++l) { stats_per_level[l].nodes++; stats_per_level[l].edges += neighbors_(node, l).size(); stats_per_level[l].allocated_bytes += pre_.neighbors_bytes; } } // The `max_edges` parameter can be inferred from `nodes` stats_per_level[0].max_edges = stats_per_level[0].nodes * config_.connectivity_base; for (std::size_t l = 1; l <= max_level; ++l) stats_per_level[l].max_edges = stats_per_level[l].nodes * config_.connectivity; // Aggregate stats across levels stats_t result{}; for (std::size_t l = 0; l <= max_level; ++l) result.nodes += stats_per_level[l].nodes, // result.edges += stats_per_level[l].edges, // result.allocated_bytes += stats_per_level[l].allocated_bytes, // result.max_edges += stats_per_level[l].max_edges; // return result; } /** * @brief A relatively accurate lower bound on the amount of memory consumed by the system. * In practice it's error will be below 10%. * * @see `serialized_length` for the length of the binary serialized representation. */ std::size_t memory_usage(std::size_t allocator_entry_bytes = default_allocator_entry_bytes()) const noexcept { std::size_t total = 0; if (!viewed_file_) { stats_t s = stats(); total += s.allocated_bytes; total += s.nodes * allocator_entry_bytes; } // Temporary data-structures, proportional to the number of nodes: total += limits_.members * sizeof(node_t) + allocator_entry_bytes; // Temporary data-structures, proportional to the number of threads: total += limits_.threads() * sizeof(context_t) + allocator_entry_bytes * 3; return total; } std::size_t memory_usage_per_node(level_t level) const noexcept { return node_bytes_(level); } double inverse_log_connectivity() const { return pre_.inverse_log_connectivity; } std::size_t neighbors_base_bytes() const { return pre_.neighbors_base_bytes; } std::size_t neighbors_bytes() const { return pre_.neighbors_bytes; } #if defined(USEARCH_USE_PRAGMA_REGION) #pragma endregion #pragma region Serialization #endif /** * @brief Estimate the binary length (in bytes) of the serialized index. */ std::size_t serialized_length() const noexcept { std::size_t neighbors_length = 0; for (std::size_t i = 0; i != size(); ++i) neighbors_length += node_bytes_(node_at_(i).level()) + sizeof(level_t); return sizeof(index_serialized_header_t) + neighbors_length; } /** * @brief Saves serialized binary index representation to a stream. */ template serialization_result_t save_to_stream(output_callback_at&& output, progress_at&& progress = {}) const noexcept { serialization_result_t result; // Export some basic metadata index_serialized_header_t header; header.size = nodes_count_; header.connectivity = config_.connectivity; header.connectivity_base = config_.connectivity_base; header.max_level = max_level_; header.entry_slot = entry_slot_; if (!output(&header, sizeof(header))) return result.failed("Failed to serialize the header into stream"); // Progress status std::size_t processed = 0; std::size_t const total = 2 * header.size; // Export the number of levels per node // That is both enough to estimate the overall memory consumption, // and to be able to estimate the offsets of every entry in the file. for (std::size_t i = 0; i != header.size; ++i) { node_t node = node_at_(i); level_t level = node.level(); if (!output(&level, sizeof(level))) return result.failed("Failed to serialize into stream"); if (!progress(++processed, total)) return result.failed("Terminated by user"); } // After that dump the nodes themselves for (std::size_t i = 0; i != header.size; ++i) { span_bytes_t node_bytes = node_bytes_(node_at_(i)); if (!output(node_bytes.data(), node_bytes.size())) return result.failed("Failed to serialize into stream"); if (!progress(++processed, total)) return result.failed("Terminated by user"); } return {}; } /** * @brief Symmetric to `save_from_stream`, pulls data from a stream. */ template serialization_result_t load_from_stream(input_callback_at&& input, progress_at&& progress = {}) noexcept { serialization_result_t result; // Remove previously stored objects index_limits_t old_limits = limits_; reset(); // Pull basic metadata index_serialized_header_t header; if (!input(&header, sizeof(header))) return result.failed("Failed to pull the header from the stream"); // We are loading an empty index, no more work to do if (!header.size) { reset(); return result; } // Allocate some dynamic memory to read all the levels using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; buffer_gt levels(header.size); if (!levels) return result.failed("Out of memory"); if (!input(levels, header.size * sizeof(level_t))) return result.failed("Failed to pull nodes levels from the stream"); // Submit metadata config_.connectivity = header.connectivity; config_.connectivity_base = header.connectivity_base; error_t error = config_.validate(); if (error) return result.failed(std::move(error)); pre_ = precompute_(config_); index_limits_t limits; limits.members = header.size; limits.threads_add = (std::max)(1, old_limits.threads_add); limits.threads_search = (std::max)(1, old_limits.threads_search); if (!reserve(limits)) { reset(); return result.failed("Out of memory"); } nodes_count_ = header.size; max_level_ = static_cast(header.max_level); entry_slot_ = static_cast(header.entry_slot); // Load the nodes for (std::size_t i = 0; i != header.size; ++i) { span_bytes_t node_bytes = node_malloc_(levels[i]); if (!input(node_bytes.data(), node_bytes.size())) { reset(); return result.failed("Failed to pull nodes from the stream"); } nodes_[i] = node_t{node_bytes.data()}; if (!progress(i + 1, header.size)) return result.failed("Terminated by user"); } return {}; } template serialization_result_t save(char const* file_path, progress_at&& progress = {}) const noexcept { return save(output_file_t(file_path), std::forward(progress)); } template serialization_result_t load(char const* file_path, progress_at&& progress = {}) noexcept { return load(input_file_t(file_path), std::forward(progress)); } /** * @brief Saves serialized binary index representation to a file, generally on disk. */ template serialization_result_t save(output_file_t file, progress_at&& progress = {}) const noexcept { serialization_result_t io_result = file.open_if_not(); if (!io_result) return io_result; serialization_result_t stream_result = save_to_stream( [&](void* buffer, std::size_t length) { io_result = file.write(buffer, length); return !!io_result; }, std::forward(progress)); if (!stream_result) { // Drop generic messages like "end of file reached" in favor // of more specific messages from the stream io_result.error.release(); return stream_result; } return io_result; } /** * @brief Memory-maps the serialized binary index representation from disk, * @b without copying data into RAM, and fetching it on-demand. */ template serialization_result_t save(memory_mapped_file_t file, std::size_t offset = 0, progress_at&& progress = {}) const noexcept { serialization_result_t io_result = file.open_if_not(); if (!io_result) return io_result; serialization_result_t stream_result = save_to_stream( [&](void* buffer, std::size_t length) { if (offset + length > file.size()) return false; std::memcpy(file.data() + offset, buffer, length); offset += length; return true; }, std::forward(progress)); return stream_result; } /** * @brief Loads the serialized binary index representation from disk to RAM. * Adjusts the configuration properties of the constructed index to * match the settings in the file. */ template serialization_result_t load(input_file_t file, progress_at&& progress = {}) noexcept { serialization_result_t io_result = file.open_if_not(); if (!io_result) return io_result; serialization_result_t stream_result = load_from_stream( [&](void* buffer, std::size_t length) { io_result = file.read(buffer, length); return !!io_result; }, std::forward(progress)); if (!stream_result) { // Drop generic messages like "end of file reached" in favor // of more specific messages from the stream io_result.error.release(); return stream_result; } return io_result; } /** * @brief Loads the serialized binary index representation from disk to RAM. * Adjusts the configuration properties of the constructed index to * match the settings in the file. */ template serialization_result_t load(memory_mapped_file_t file, std::size_t offset = 0, progress_at&& progress = {}) noexcept { serialization_result_t io_result = file.open_if_not(); if (!io_result) return io_result; serialization_result_t stream_result = load_from_stream( [&](void* buffer, std::size_t length) { if (offset + length > file.size()) return false; std::memcpy(buffer, file.data() + offset, length); offset += length; return true; }, std::forward(progress)); return stream_result; } /** * @brief Memory-maps the serialized binary index representation from disk, * @b without copying data into RAM, and fetching it on-demand. */ template serialization_result_t view(memory_mapped_file_t file, std::size_t offset = 0, progress_at&& progress = {}) noexcept { // Remove previously stored objects index_limits_t old_limits = limits_; reset(); serialization_result_t result = file.open_if_not(); if (!result) return result; // Pull basic metadata index_serialized_header_t header; if (file.size() - offset < sizeof(header)) return result.failed("File is corrupted and lacks a header"); std::memcpy(&header, file.data() + offset, sizeof(header)); if (!header.size) { reset(); return result; } // Precompute offsets of every node, but before that we need to update the configs // This could have been done with `std::exclusive_scan`, but it's only available from C++17. using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; buffer_gt offsets(header.size); if (!offsets) return result.failed("Out of memory"); config_.connectivity = header.connectivity; config_.connectivity_base = header.connectivity_base; error_t error = config_.validate(); if (error) return result.failed(std::move(error)); pre_ = precompute_(config_); misaligned_ptr_gt levels{(byte_t*)file.data() + offset + sizeof(header)}; offsets[0u] = offset + sizeof(header) + sizeof(level_t) * header.size; for (std::size_t i = 1; i < header.size; ++i) offsets[i] = offsets[i - 1] + node_bytes_(levels[i - 1]); std::size_t total_bytes = offsets[header.size - 1] + node_bytes_(levels[header.size - 1]); if (file.size() < total_bytes) { reset(); return result.failed("File is corrupted and can't fit all the nodes"); } // Submit metadata and reserve memory index_limits_t limits; limits.members = header.size; limits.threads_add = (std::max)(1, old_limits.threads_add); limits.threads_search = (std::max)(1, old_limits.threads_search); if (!reserve(limits)) { reset(); return result.failed("Out of memory"); } nodes_count_ = header.size; max_level_ = static_cast(header.max_level); entry_slot_ = static_cast(header.entry_slot); // Rapidly address all the nodes for (std::size_t i = 0; i != header.size; ++i) { nodes_[i] = node_t{(byte_t*)file.data() + offsets[i]}; if (!progress(i + 1, header.size)) return result.failed("Terminated by user"); } viewed_file_ = std::move(file); return {}; } #if defined(USEARCH_USE_PRAGMA_REGION) #pragma endregion #endif /** * @brief Performs compaction on the whole HNSW index, purging some entries * and links to them, while also generating a more efficient mapping, * putting the more frequently used entries closer together. * * @param[in] values A []-subscriptable object, providing access to the values. * @param[in] metric Callable object measuring distance between any ::values and present objects. * @param[in] slot_transition Callable object to inform changes in slot assignments. * @param[in] executor Thread-pool to execute the job in parallel. * @param[in] progress Callback to report the execution progress. * @param[in] prefetch Callable object to prefetch data into the cache. */ template void compact( // values_at&& values, // metric_at&& metric, // slot_transition_at&& slot_transition, // executor_at&& executor = executor_at{}, // progress_at&& progress = progress_at{}, // prefetch_at&& prefetch = prefetch_at{}) noexcept { // Export all the keys, slots, and levels. // Partition them with the predicate. // Sort the allowed entries in descending order of their level. // Create a new array mapping old slots to the new ones (INT_MAX for deleted items). struct slot_level_t { compressed_slot_t old_slot; compressed_slot_t cluster; level_t level; }; using slot_level_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; buffer_gt slots_and_levels(size()); // Progress status std::atomic do_tasks{true}; std::atomic processed{0}; std::size_t const total = 3 * slots_and_levels.size(); // For every bottom level node, determine its parent cluster executor.dynamic(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot_as_uint) { context_t& context = contexts_[thread_idx]; compressed_slot_t old_slot = static_cast(old_slot_as_uint); compressed_slot_t cluster = search_for_one_( // values[citerator_at(old_slot)], // metric, prefetch, // static_cast(entry_slot_), max_level_, 0, context); slots_and_levels[old_slot] = {old_slot, cluster, node_at_(old_slot).level()}; ++processed; if (thread_idx == 0) do_tasks = progress(processed.load(), total); return do_tasks.load(); }); if (!do_tasks.load()) return; // Where the actual permutation happens: std::sort(slots_and_levels.begin(), slots_and_levels.end(), [](slot_level_t const& a, slot_level_t const& b) { return a.level == b.level ? a.cluster < b.cluster : a.level > b.level; }); using size_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; buffer_gt old_slot_to_new(slots_and_levels.size()); for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) old_slot_to_new[slots_and_levels[new_slot].old_slot] = new_slot; // Erase all the incoming links buffer_gt reordered_nodes(slots_and_levels.size()); tape_allocator_t reordered_tape; for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) { std::size_t old_slot = slots_and_levels[new_slot].old_slot; node_t old_node = node_at_(old_slot); std::size_t node_bytes = node_bytes_(old_node.level()); byte_t* new_data = (byte_t*)reordered_tape.allocate(node_bytes); node_t new_node{new_data}; std::memcpy(new_data, old_node.tape(), node_bytes); for (level_t level = 0; level <= old_node.level(); ++level) for (misaligned_ref_gt neighbor : neighbors_(new_node, level)) neighbor = static_cast(old_slot_to_new[compressed_slot_t(neighbor)]); reordered_nodes[new_slot] = new_node; if (!progress(++processed, total)) return; } for (std::size_t new_slot = 0; new_slot != slots_and_levels.size(); ++new_slot) { std::size_t old_slot = slots_and_levels[new_slot].old_slot; slot_transition(node_at_(old_slot).ckey(), // static_cast(old_slot), // static_cast(new_slot)); if (!progress(++processed, total)) return; } nodes_ = std::move(reordered_nodes); tape_allocator_ = std::move(reordered_tape); entry_slot_ = old_slot_to_new[entry_slot_]; } /** * @brief Scans the whole collection, removing the links leading towards * banned entries. This essentially isolates some nodes from the rest * of the graph, while keeping their outgoing links, in case the node * is structurally relevant and has a crucial role in the index. * It won't reclaim the memory. * * @param[in] allow_member Predicate to mark nodes for isolation. * @param[in] executor Thread-pool to execute the job in parallel. * @param[in] progress Callback to report the execution progress. */ template < // typename allow_member_at = dummy_predicate_t, // typename executor_at = dummy_executor_t, // typename progress_at = dummy_progress_t // > void isolate( // allow_member_at&& allow_member, // executor_at&& executor = executor_at{}, // progress_at&& progress = progress_at{}) noexcept { // Progress status std::atomic do_tasks{true}; std::atomic processed{0}; // Erase all the incoming links std::size_t nodes_count = size(); executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) { node_t node = node_at_(node_idx); for (level_t level = 0; level <= node.level(); ++level) { neighbors_ref_t neighbors = neighbors_(node, level); neighbors.erase_if([&](compressed_slot_t neighbor_slot) { node_t neighbor = node_at_(neighbor_slot); return !allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}); }); } ++processed; if (thread_idx == 0) do_tasks = progress(processed.load(), nodes_count); return do_tasks.load(); }); // At the end report the latest numbers, because the reporter thread may be finished earlier progress(processed.load(), nodes_count); } private: inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept { precomputed_constants_t pre; pre.inverse_log_connectivity = 1.0 / std::log(static_cast(config.connectivity)); pre.neighbors_bytes = config.connectivity * sizeof(compressed_slot_t) + sizeof(neighbors_count_t); pre.neighbors_base_bytes = config.connectivity_base * sizeof(compressed_slot_t) + sizeof(neighbors_count_t); return pre; } using span_bytes_t = span_gt; inline span_bytes_t node_bytes_(node_t node) const noexcept { return {node.tape(), node_bytes_(node.level())}; } inline std::size_t node_bytes_(level_t level) const noexcept { return node_head_bytes_() + node_neighbors_bytes_(level); } inline std::size_t node_neighbors_bytes_(node_t node) const noexcept { return node_neighbors_bytes_(node.level()); } inline std::size_t node_neighbors_bytes_(level_t level) const noexcept { return pre_.neighbors_base_bytes + pre_.neighbors_bytes * level; } span_bytes_t node_malloc_(level_t level) noexcept { std::size_t node_bytes = node_bytes_(level); byte_t* data = (byte_t*)tape_allocator_.allocate(node_bytes); return data ? span_bytes_t{data, node_bytes} : span_bytes_t{}; } node_t node_make_(vector_key_t key, level_t level) noexcept { span_bytes_t node_bytes = node_malloc_(level); if (!node_bytes) return {}; std::memset(node_bytes.data(), 0, node_bytes.size()); node_t node{(byte_t*)node_bytes.data()}; node.key(key); node.level(level); return node; } node_t node_make_copy_(span_bytes_t old_bytes) noexcept { byte_t* data = (byte_t*)tape_allocator_.allocate(old_bytes.size()); if (!data) return {}; std::memcpy(data, old_bytes.data(), old_bytes.size()); return node_t{data}; } void node_free_(std::size_t idx) noexcept { if (viewed_file_) return; node_t& node = nodes_[idx]; tape_allocator_.deallocate(node.tape(), node_bytes_(node).size()); node = node_t{}; } inline node_t node_at_(std::size_t idx) const noexcept { return nodes_[idx]; } inline neighbors_ref_t neighbors_base_(node_t node) const noexcept { return {node.neighbors_tape()}; } inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept { usearch_assert_m(level > 0 && level <= node.level(), "Linking to missing level"); return {node.neighbors_tape() + pre_.neighbors_base_bytes + (level - 1) * pre_.neighbors_bytes}; } inline neighbors_ref_t neighbors_(node_t node, level_t level) const noexcept { return level ? neighbors_non_base_(node, level) : neighbors_base_(node); } struct node_lock_t { nodes_mutexes_t& mutexes; std::size_t slot; inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); } }; inline node_lock_t node_lock_(std::size_t slot) const noexcept { while (nodes_mutexes_.atomic_set(slot)) ; return {nodes_mutexes_, slot}; } struct optional_node_lock_t { nodes_mutexes_t& mutexes; std::size_t slot; inline ~optional_node_lock_t() noexcept { if (slot != std::numeric_limits::max()) mutexes.atomic_reset(slot); } }; inline optional_node_lock_t optional_node_lock_(std::size_t slot, bool condition) const noexcept { if (condition) { while (nodes_mutexes_.atomic_set(slot)) ; return {nodes_mutexes_, slot}; } else { return {nodes_mutexes_, std::numeric_limits::max()}; } } struct node_conditional_lock_t { nodes_mutexes_t& mutexes; std::size_t slot; inline ~node_conditional_lock_t() noexcept { if (slot != std::numeric_limits::max()) mutexes.atomic_reset(slot); } }; inline node_conditional_lock_t node_try_conditional_lock_(std::size_t slot, bool condition, bool& failed_to_acquire) const noexcept { if (!condition) { failed_to_acquire = false; return {nodes_mutexes_, std::numeric_limits::max()}; } failed_to_acquire = nodes_mutexes_.atomic_set(slot); return {nodes_mutexes_, failed_to_acquire ? std::numeric_limits::max() : slot}; } template candidates_view_t form_links_to_closest_( // metric_at&& metric, std::size_t new_slot, level_t level, context_t& context) usearch_noexcept_m { node_t new_node = node_at_(new_slot); top_candidates_t& top = context.top_candidates; usearch_assert_m(top.size() || !require_non_empty_ak, "No candidates found"); candidates_view_t top_view = refine_(metric, config_.connectivity, top, context, context.computed_distances_in_refines); usearch_assert_m(top_view.size() || !require_non_empty_ak, "This would lead to isolated nodes"); // Outgoing links from `new_slot`: neighbors_ref_t new_neighbors = neighbors_(new_node, level); usearch_assert_m(!new_neighbors.size(), "The newly inserted element should have blank link list"); for (std::size_t idx = 0; idx != top_view.size(); idx++) { usearch_assert_m(!new_neighbors[idx], "Possible memory corruption"); usearch_assert_m(level <= node_at_(top_view[idx].slot).level(), "Linking to missing level"); new_neighbors.push_back(top_view[idx].slot); } return top_view; } template void form_reverse_links_( // metric_at&& metric, compressed_slot_t new_slot, candidates_view_t new_neighbors, value_at&& value, level_t level, context_t& context) usearch_noexcept_m { top_candidates_t& top_for_refine = context.top_for_refine; std::size_t const connectivity_max = level ? config_.connectivity : config_.connectivity_base; // Reverse links from the neighbors: for (auto new_neighbor : new_neighbors) { compressed_slot_t close_slot = new_neighbor.slot; if (close_slot == new_slot) continue; node_lock_t close_lock = node_lock_(close_slot); node_t close_node = node_at_(close_slot); neighbors_ref_t close_header = neighbors_(close_node, level); // The node may have no neighbors only in one case, when it's the first one in the index, // but that is problematic to track in multi-threaded environments, where the order of insertion // is not guaranteed. // usearch_assert_m(close_header.size() || new_slot == 1, "Possible corruption - isolated node"); usearch_assert_m(close_header.size() <= connectivity_max, "Possible corruption - overflow"); usearch_assert_m(close_slot != new_slot, "Self-loops are impossible"); usearch_assert_m(level <= close_node.level(), "Linking to missing level"); // If `new_slot` is already present in the neighboring connections of `close_slot` // then no need to modify any connections or run the heuristics. if (close_header.size() < connectivity_max) { close_header.push_back(new_slot); continue; } top_for_refine.clear(); top_for_refine.insert_reserved({context.measure(value, citerator_at(close_slot), metric), new_slot}); for (compressed_slot_t successor_slot : close_header) top_for_refine.insert_reserved( {context.measure(citerator_at(close_slot), citerator_at(successor_slot), metric), successor_slot}); // Export the results: close_header.clear(); candidates_view_t top_view = refine_(metric, connectivity_max, top_for_refine, context, context.computed_distances_in_reverse_refines); usearch_assert_m(top_view.size(), "This would lead to isolated nodes"); for (std::size_t idx = 0; idx != top_view.size(); idx++) close_header.push_back(top_view[idx].slot); } } level_t choose_random_level_(std::default_random_engine& level_generator) const noexcept { std::uniform_real_distribution distribution(0.0, 1.0); double r = -std::log(distribution(level_generator)) * pre_.inverse_log_connectivity; return (level_t)r; } struct candidates_range_t; class candidates_iterator_t { friend struct candidates_range_t; index_gt const& index_; neighbors_ref_t neighbors_; visits_hash_set_t& visits_; std::size_t current_; candidates_iterator_t& skip_missing() noexcept { if (!visits_.size()) return *this; while (current_ != neighbors_.size()) { compressed_slot_t neighbor_slot = neighbors_[current_]; if (visits_.test(neighbor_slot)) current_++; else break; } return *this; } public: using element_t = compressed_slot_t; using iterator_category = std::forward_iterator_tag; using value_type = element_t; using difference_type = std::ptrdiff_t; using pointer = misaligned_ptr_gt; using reference = misaligned_ref_gt; value_type operator*() const noexcept { return neighbors_[current_]; } candidates_iterator_t(index_gt const& index, neighbors_ref_t neighbors, visits_hash_set_t& visits, std::size_t progress) noexcept : index_(index), neighbors_(neighbors), visits_(visits), current_(progress) {} candidates_iterator_t operator++(int) noexcept { return candidates_iterator_t(index_, neighbors_, visits_, current_ + 1).skip_missing(); } candidates_iterator_t& operator++() noexcept { ++current_; skip_missing(); return *this; } bool operator==(candidates_iterator_t const& other) noexcept { return current_ == other.current_; } bool operator!=(candidates_iterator_t const& other) noexcept { return current_ != other.current_; } vector_key_t key() const noexcept { return index_.node_at_(slot()).key(); } compressed_slot_t slot() const noexcept { return neighbors_[current_]; } friend inline std::size_t get_slot(candidates_iterator_t const& it) noexcept { return it.slot(); } friend inline vector_key_t get_key(candidates_iterator_t const& it) noexcept { return it.key(); } }; struct candidates_range_t { index_gt const& index; neighbors_ref_t neighbors; visits_hash_set_t& visits; candidates_iterator_t begin() const noexcept { return candidates_iterator_t{index, neighbors, visits, 0}.skip_missing(); } candidates_iterator_t end() const noexcept { return {index, neighbors, visits, neighbors.size()}; } }; template compressed_slot_t search_for_one_( // value_at&& query, metric_at&& metric, prefetch_at&& prefetch, // compressed_slot_t closest_slot, level_t begin_level, level_t end_level, context_t& context) const noexcept { visits_hash_set_t& visits = context.visits; visits.clear(); // Optional prefetching if (!is_dummy()) prefetch(citerator_at(closest_slot), citerator_at(closest_slot) + 1); distance_t closest_dist = context.measure(query, citerator_at(closest_slot), metric); for (level_t level = begin_level; level > end_level; --level) { bool changed; do { changed = false; node_lock_t closest_lock = node_lock_(closest_slot); neighbors_ref_t closest_neighbors = neighbors_non_base_(node_at_(closest_slot), level); // Optional prefetching if (!is_dummy()) { candidates_range_t missing_candidates{*this, closest_neighbors, visits}; prefetch(missing_candidates.begin(), missing_candidates.end()); } // Actual traversal for (compressed_slot_t candidate_slot : closest_neighbors) { distance_t candidate_dist = context.measure(query, citerator_at(candidate_slot), metric); if (candidate_dist < closest_dist) { closest_dist = candidate_dist; closest_slot = candidate_slot; changed = true; } } context.iteration_cycles++; } while (changed); } return closest_slot; } /** * @brief Traverses a layer of a graph, to find the best place to insert a new node. * Locks the nodes in the process, assuming other threads are updating neighbors lists. * @return `true` if procedure succeeded, `false` if run out of memory. */ template bool search_to_insert_( // value_at&& query, metric_at&& metric, prefetch_at&& prefetch, // compressed_slot_t start_slot, level_t level, std::size_t top_limit, context_t& context) noexcept { visits_hash_set_t& visits = context.visits; next_candidates_t& next = context.next_candidates; // pop min, push top_candidates_t& top = context.top_candidates; // pop max, push visits.clear(); next.clear(); top.clear(); // At the very least we are going to explore the starting node and its neighbors if (!visits.reserve(config_.connectivity_base + 1u)) return false; // Optional prefetching if (!is_dummy()) prefetch(citerator_at(start_slot), citerator_at(start_slot) + 1); distance_t radius = context.measure(query, citerator_at(start_slot), metric); next.insert_reserved({-radius, start_slot}); top.insert_reserved({radius, start_slot}); visits.set(start_slot); // The primary loop of the graph traversal while (!next.empty()) { candidate_t candidacy = next.top(); if ((-candidacy.distance) > radius && top.size() == top_limit) break; next.pop(); context.iteration_cycles++; compressed_slot_t candidate_slot = candidacy.slot; node_t candidate_ref = node_at_(candidate_slot); node_lock_t candidate_lock = node_lock_(candidate_slot); neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level); // Optional prefetching if (!is_dummy()) { candidates_range_t missing_candidates{*this, candidate_neighbors, visits}; prefetch(missing_candidates.begin(), missing_candidates.end()); } // Assume the worst-case when reserving memory if (!visits.reserve(visits.size() + candidate_neighbors.size())) return false; for (compressed_slot_t successor_slot : candidate_neighbors) { if (visits.set(successor_slot)) continue; // We don't access the neighbors of the `successor_slot` node, // so we don't have to lock it. // node_lock_t successor_lock = node_lock_(successor_slot); distance_t successor_dist = context.measure(query, citerator_at(successor_slot), metric); if (top.size() < top_limit || successor_dist < radius) { // This can substantially grow our priority queue: next.insert({-successor_dist, successor_slot}); // This will automatically evict poor matches: top.insert({successor_dist, successor_slot}, top_limit); radius = top.top().distance; } } } return true; } /** * @brief Traverses a layer of a graph, to find the best neighbors list for updated node. * Locks the nodes in the process, assuming other threads are updating neighbors lists. * @return `true` if procedure succeeded, `false` if run out of memory. */ template bool search_to_update_( // value_at&& query, metric_at&& metric, prefetch_at&& prefetch, // compressed_slot_t start_slot, compressed_slot_t updated_slot, level_t level, std::size_t top_limit, context_t& context) noexcept { visits_hash_set_t& visits = context.visits; next_candidates_t& next = context.next_candidates; // pop min, push top_candidates_t& top = context.top_candidates; // pop max, push visits.clear(); next.clear(); top.clear(); // At the very least we are going to explore the starting node and its neighbors if (!visits.reserve(config_.connectivity_base + 1u)) return false; // Optional prefetching if (!is_dummy()) prefetch(citerator_at(start_slot), citerator_at(start_slot) + 1); distance_t radius = context.measure(query, citerator_at(start_slot), metric); next.insert_reserved({-radius, start_slot}); visits.set(start_slot); if (start_slot != updated_slot) top.insert_reserved({radius, start_slot}); // The primary loop of the graph traversal while (!next.empty()) { candidate_t candidacy = next.top(); if ((-candidacy.distance) > radius && top.size() == top_limit) break; next.pop(); context.iteration_cycles++; compressed_slot_t candidate_slot = candidacy.slot; node_t candidate_ref = node_at_(candidate_slot); // The trickiest part of update-heavy workloads is mitigating dead-locks // in connected nodes during traversal. A "good enough" solution would be // to skip concurrent access, assuming the other "close" node is gonna add // this one when forming reverse connections. bool failed_to_acquire = false; node_conditional_lock_t candidate_lock = node_try_conditional_lock_(candidate_slot, updated_slot != candidate_slot, failed_to_acquire); if (failed_to_acquire) continue; auto optional_node_lock = optional_node_lock_(candidate_slot, updated_slot == candidate_slot); neighbors_ref_t candidate_neighbors = neighbors_(candidate_ref, level); // Optional prefetching if (!is_dummy()) { candidates_range_t missing_candidates{*this, candidate_neighbors, visits}; prefetch(missing_candidates.begin(), missing_candidates.end()); } // Assume the worst-case when reserving memory if (!visits.reserve(visits.size() + candidate_neighbors.size())) return false; for (compressed_slot_t successor_slot : candidate_neighbors) { if (visits.set(successor_slot)) continue; // We don't access the neighbors of the `successor_slot` node, // so we don't have to lock it. // node_conditional_lock_t successor_lock = // node_try_conditional_lock_(successor_slot, updated_slot != successor_slot); distance_t successor_dist = context.measure(query, citerator_at(successor_slot), metric); if (top.size() < top_limit || successor_dist < radius) { // This can substantially grow our priority queue: next.insert({-successor_dist, successor_slot}); // This will automatically evict poor matches: if (updated_slot != successor_slot) top.insert({successor_dist, successor_slot}, top_limit); radius = top.top().distance; } } } return true; } /** * @brief Traverses the @b base layer of a graph, to find a close match. * Doesn't lock any nodes, assuming read-only simultaneous access. * @return `true` if procedure succeeded, `false` if run out of memory. */ template bool search_to_find_in_base_( // value_at&& query, metric_at&& metric, predicate_at&& predicate, prefetch_at&& prefetch, // compressed_slot_t start_slot, std::size_t expansion, context_t& context) const usearch_noexcept_m { visits_hash_set_t& visits = context.visits; next_candidates_t& next = context.next_candidates; // pop min, push top_candidates_t& top = context.top_candidates; // pop max, push std::size_t const top_limit = expansion; visits.clear(); next.clear(); top.clear(); if (!visits.reserve(config_.connectivity_base + 1u)) return false; // Optional prefetching if (!is_dummy()) prefetch(citerator_at(start_slot), citerator_at(start_slot) + 1); distance_t radius = context.measure(query, citerator_at(start_slot), metric); usearch_assert_m(next.capacity(), "The `max_heap_gt` must have been reserved in the search entry point"); next.insert_reserved({-radius, start_slot}); visits.set(start_slot); // Don't populate the top list if the predicate is not satisfied if (is_dummy() || predicate(member_cref_t{node_at_(start_slot).ckey(), start_slot})) { usearch_assert_m(top.capacity(), "The `sorted_buffer_gt` must have been reserved in the search entry point"); top.insert_reserved({radius, start_slot}); } while (!next.empty()) { candidate_t candidate = next.top(); if ((-candidate.distance) > radius && top.size() == top_limit) break; next.pop(); context.iteration_cycles++; neighbors_ref_t candidate_neighbors = neighbors_base_(node_at_(candidate.slot)); // Optional prefetching if (!is_dummy()) { candidates_range_t missing_candidates{*this, candidate_neighbors, visits}; prefetch(missing_candidates.begin(), missing_candidates.end()); } // Assume the worst-case when reserving memory if (!visits.reserve(visits.size() + candidate_neighbors.size())) return false; for (compressed_slot_t successor_slot : candidate_neighbors) { if (visits.set(successor_slot)) continue; distance_t successor_dist = context.measure(query, citerator_at(successor_slot), metric); if (top.size() < top_limit || successor_dist < radius) { // This can substantially grow our priority queue: next.insert({-successor_dist, successor_slot}); if (is_dummy() || predicate(member_cref_t{node_at_(successor_slot).ckey(), successor_slot})) { top.insert({successor_dist, successor_slot}, top_limit); radius = top.top().distance; } } } } return true; } /** * @brief Iterates through all members, without actually touching the index. */ template void search_exact_( // value_at&& query, metric_at&& metric, predicate_at&& predicate, // std::size_t count, context_t& context) const noexcept { top_candidates_t& top = context.top_candidates; top.clear(); top.reserve(count); for (std::size_t i = 0; i != size(); ++i) { auto slot = static_cast(i); if (!is_dummy()) if (!predicate(at(slot))) continue; distance_t distance = context.measure(query, citerator_at(slot), metric); top.insert(candidate_t{distance, slot}, count); } } /** * @brief This algorithm from the original paper implements a heuristic, * that massively reduces the number of connections a point has, * to keep only the neighbors, that are from each other. */ template candidates_view_t refine_( // metric_at&& metric, // std::size_t needed, top_candidates_t& top, context_t& context, // std::size_t& refines_counter) const noexcept { // Avoid expensive computation, if the set is already small candidate_t* top_data = top.data(); std::size_t const top_count = top.size(); if (top_count < needed) return {top_data, top_count}; // Sort before processing top.sort_ascending(); std::size_t submitted_count = 1; std::size_t consumed_count = 1; /// Always equal or greater than `submitted_count`. while (submitted_count < needed && consumed_count < top_count) { candidate_t candidate = top_data[consumed_count]; bool good = true; std::size_t idx = 0; for (; idx < submitted_count; idx++) { candidate_t submitted = top_data[idx]; distance_t inter_result_dist = context.measure( // citerator_at(candidate.slot), // citerator_at(submitted.slot), // metric); if (inter_result_dist < candidate.distance) { good = false; break; } } refines_counter += idx; if (good) { top_data[submitted_count] = top_data[consumed_count]; submitted_count++; } consumed_count++; } top.shrink(submitted_count); return {top_data, submitted_count}; } }; struct join_result_t { error_t error{}; std::size_t intersection_size{}; std::size_t engagements{}; std::size_t visited_members{}; std::size_t computed_distances{}; explicit operator bool() const noexcept { return !error; } join_result_t failed(error_t message) noexcept { error = std::move(message); return std::move(*this); } }; /** * @brief Adapts the Male-Optimal Stable Marriage algorithm for unequal sets * to perform fast one-to-one matching between two large collections * of vectors, using approximate nearest neighbors search. * * @param[inout] man_to_woman Container to map ::men keys to ::women. * @param[inout] woman_to_man Container to map ::women keys to ::men. * @param[in] executor Thread-pool to execute the job in parallel. * @param[in] progress Callback to report the execution progress. */ template < // typename men_at, // typename women_at, // typename men_values_at, // typename women_values_at, // typename men_metric_at, // typename women_metric_at, // typename man_to_woman_at = dummy_key_to_key_mapping_t, // typename woman_to_man_at = dummy_key_to_key_mapping_t, // typename executor_at = dummy_executor_t, // typename progress_at = dummy_progress_t // > static join_result_t join( // men_at const& men, // women_at const& women, // men_values_at const& men_values, // women_values_at const& women_values, // men_metric_at&& men_metric, // women_metric_at&& women_metric, // index_join_config_t config = {}, // man_to_woman_at&& man_to_woman = man_to_woman_at{}, // woman_to_man_at&& woman_to_man = woman_to_man_at{}, // executor_at&& executor = executor_at{}, // progress_at&& progress = progress_at{}) noexcept { if (women.size() < men.size()) return unum::usearch::join( // women, men, // women_values, men_values, // std::forward(women_metric), std::forward(men_metric), // config, // std::forward(woman_to_man), // std::forward(man_to_woman), // std::forward(executor), // std::forward(progress)); join_result_t result; // Sanity checks and argument validation: if (&men == &women) return result.failed("Can't join with itself, consider copying"); if (config.max_proposals == 0) config.max_proposals = std::log(men.size()) + executor.size(); using proposals_count_t = std::uint16_t; config.max_proposals = (std::min)(men.size(), config.max_proposals); using distance_t = typename men_at::distance_t; using dynamic_allocator_traits_t = typename men_at::dynamic_allocator_traits_t; using man_key_t = typename men_at::vector_key_t; using woman_key_t = typename women_at::vector_key_t; // Use the `compressed_slot_t` type of the larger collection using compressed_slot_t = typename women_at::compressed_slot_t; using compressed_slot_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; using proposals_count_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc; // Create an atomic queue, as a ring structure, from/to which // free men will be added/pulled. std::mutex free_men_mutex{}; ring_gt free_men; free_men.reserve(men.size()); for (std::size_t i = 0; i != men.size(); ++i) free_men.push(static_cast(i)); // We are gonna need some temporary memory. buffer_gt proposal_counts(men.size()); buffer_gt man_to_woman_slots(men.size()); buffer_gt woman_to_man_slots(women.size()); if (!proposal_counts || !man_to_woman_slots || !woman_to_man_slots) return result.failed("Can't temporary mappings"); compressed_slot_t missing_slot; std::memset((void*)&missing_slot, 0xFF, sizeof(compressed_slot_t)); std::memset((void*)man_to_woman_slots.data(), 0xFF, sizeof(compressed_slot_t) * men.size()); std::memset((void*)woman_to_man_slots.data(), 0xFF, sizeof(compressed_slot_t) * women.size()); std::memset(proposal_counts.data(), 0, sizeof(proposals_count_t) * men.size()); // Define locks, to limit concurrent accesses to `man_to_woman_slots` and `woman_to_man_slots`. bitset_t men_locks(men.size()), women_locks(women.size()); if (!men_locks || !women_locks) return result.failed("Can't allocate locks"); std::atomic rounds{0}; std::atomic engagements{0}; std::atomic computed_distances{0}; std::atomic visited_members{0}; std::atomic atomic_error{nullptr}; // Concurrently process all the men executor.parallel([&](std::size_t thread_idx) { index_search_config_t search_config; search_config.expansion = config.expansion; search_config.exact = config.exact; search_config.thread = thread_idx; compressed_slot_t free_man_slot; // While there exist a free man who still has a woman to propose to. while (!atomic_error.load(std::memory_order_relaxed)) { std::size_t passed_rounds = 0; std::size_t total_rounds = 0; { std::unique_lock pop_lock(free_men_mutex); if (!free_men.try_pop(free_man_slot)) // Primary exit path, we have exhausted the list of candidates break; passed_rounds = ++rounds; total_rounds = passed_rounds + free_men.size(); } if (thread_idx == 0 && !progress(passed_rounds, total_rounds)) { atomic_error.store("Terminated by user"); break; } while (men_locks.atomic_set(free_man_slot)) ; proposals_count_t& free_man_proposals = proposal_counts[free_man_slot]; if (free_man_proposals >= config.max_proposals) continue; // Find the closest woman, to whom this man hasn't proposed yet. ++free_man_proposals; auto candidates = women.search(men_values[free_man_slot], free_man_proposals, women_metric, search_config); visited_members += candidates.visited_members; computed_distances += candidates.computed_distances; if (!candidates) { atomic_error = candidates.error.release(); break; } auto match = candidates.back(); auto woman = match.member; while (women_locks.atomic_set(woman.slot)) ; compressed_slot_t husband_slot = woman_to_man_slots[woman.slot]; bool woman_is_free = husband_slot == missing_slot; if (woman_is_free) { // Engagement man_to_woman_slots[free_man_slot] = static_cast(woman.slot); woman_to_man_slots[woman.slot] = free_man_slot; engagements++; } else { distance_t distance_from_husband = women_metric(women_values[static_cast(woman.slot)], men_values[husband_slot]); distance_t distance_from_candidate = match.distance; if (distance_from_husband > distance_from_candidate) { // Break-up while (men_locks.atomic_set(husband_slot)) ; man_to_woman_slots[husband_slot] = missing_slot; men_locks.atomic_reset(husband_slot); // New Engagement man_to_woman_slots[free_man_slot] = static_cast(woman.slot); woman_to_man_slots[woman.slot] = free_man_slot; engagements++; std::unique_lock push_lock(free_men_mutex); free_men.push(husband_slot); } else { std::unique_lock push_lock(free_men_mutex); free_men.push(free_man_slot); } } men_locks.atomic_reset(free_man_slot); women_locks.atomic_reset(woman.slot); } }); if (atomic_error) return result.failed(atomic_error.load()); // Export the "slots" into keys: std::size_t intersection_size = 0; for (std::size_t man_slot = 0; man_slot != men.size(); ++man_slot) { compressed_slot_t woman_slot = man_to_woman_slots[man_slot]; if (woman_slot != missing_slot) { man_key_t man = men.at(static_cast(man_slot)).key; woman_key_t woman = women.at(woman_slot).key; man_to_woman[man] = woman; woman_to_man[woman] = man; intersection_size++; } } // Export stats result.engagements = engagements; result.intersection_size = intersection_size; result.computed_distances = computed_distances; result.visited_members = visited_members; return result; } } // namespace usearch } // namespace unum #endif