IVF-HNSW
 All Classes Functions Variables Typedefs Pages
IndexIVF_HNSW.h
1 #ifndef IVF_HNSW_LIB_IVF_HNSW_H
2 #define IVF_HNSW_LIB_IVF_HNSW_H
3 
4 #include <iostream>
5 #include <fstream>
6 #include <cstdio>
7 #include <unordered_map>
8 
9 #include <faiss/index_io.h>
10 #include <faiss/Heap.h>
11 #include <faiss/ProductQuantizer.h>
12 #include <faiss/VectorTransform.h>
13 #include <faiss/FaissAssert.h>
14 #include <faiss/utils.h>
15 
16 #include <hnswlib/hnswalg.h>
17 #include "utils.h"
18 
19 namespace ivfhnsw {
43  {
44  typedef uint32_t idx_t;
45 
46  size_t d;
47  size_t nc;
48  size_t code_size;
49 
50  hnswlib::HierarchicalNSW *quantizer;
51 
52  faiss::ProductQuantizer *pq;
53  faiss::ProductQuantizer *norm_pq;
54  faiss::LinearTransform *opq_matrix;
55  bool do_opq;
56 
57  size_t nprobe;
58  size_t max_codes;
59 
60  std::vector<std::vector<idx_t> > ids;
61  std::vector<std::vector<uint8_t> > codes;
62  std::vector<std::vector<uint8_t> > norm_codes;
63 
64  protected:
65  std::vector<float> norms;
66  std::vector<float> centroid_norms;
67 
68  public:
69  explicit IndexIVF_HNSW(size_t dim, size_t ncentroids, size_t bytes_per_code, size_t nbits_per_idx);
70  virtual ~IndexIVF_HNSW();
71 
81  void build_quantizer(const char *path_data, const char *path_info, const char *path_edges,
82  size_t M=16, size_t efConstruction = 500);
83 
91  void assign (size_t n, const float *x, idx_t *labels, size_t k = 1);
92 
103  virtual void search(size_t k, const float *x, float *distances, long *labels);
104 
112  virtual void add_batch(size_t n, const float *x, const idx_t *xids, const idx_t *precomputed_idx = nullptr);
113 
119  virtual void train_pq(size_t n, const float *x);
120 
122  virtual void write(const char *path);
123 
125  virtual void read(const char *path);
126 
128  void compute_centroid_norms();
129 
131  void rotate_quantizer();
132 
133  protected:
135  std::vector<float> precomputed_table;
136 
138  float pq_L2sqr(const uint8_t *code);
139 
140  private:
141  void reconstruct(size_t n, float *x, const float *decoded_residuals, const idx_t *keys);
142  void compute_residuals(size_t n, const float *x, float *residuals, const idx_t *keys);
143  };
144 }
145 #endif //IVF_HNSW_LIB_INDEX_HNSW_H
std::vector< std::vector< idx_t > > ids
Inverted lists for indexes.
Definition: IndexIVF_HNSW.h:60
void assign(size_t n, const float *x, idx_t *labels, size_t k=1)
Definition: IndexIVF_HNSW.cpp:65
void compute_centroid_norms()
Compute norms of the HNSW vertices.
Definition: IndexIVF_HNSW.cpp:330
virtual void search(size_t k, const float *x, float *distances, long *labels)
Definition: IndexIVF_HNSW.cpp:165
size_t code_size
Code size per vector in bytes.
Definition: IndexIVF_HNSW.h:48
size_t nc
Number of centroids.
Definition: IndexIVF_HNSW.h:47
std::vector< float > precomputed_table
Size pq.M * pq.ksub.
Definition: IndexIVF_HNSW.h:135
size_t d
Vector dimension.
Definition: IndexIVF_HNSW.h:46
faiss::ProductQuantizer * norm_pq
Produces the norm codes of reconstructed base vectors.
Definition: IndexIVF_HNSW.h:53
bool do_opq
Turn on/off OPQ encoding.
Definition: IndexIVF_HNSW.h:55
Definition: IndexIVF_HNSW.h:42
std::vector< float > norms
L2 square norms of reconstructed base vectors.
Definition: IndexIVF_HNSW.h:65
virtual void add_batch(size_t n, const float *x, const idx_t *xids, const idx_t *precomputed_idx=nullptr)
Definition: IndexIVF_HNSW.cpp:72
virtual void write(const char *path)
Write index to the path.
Definition: IndexIVF_HNSW.cpp:279
virtual void read(const char *path)
Read index from the path.
Definition: IndexIVF_HNSW.cpp:305
size_t max_codes
Max number of codes to visit to do a query.
Definition: IndexIVF_HNSW.h:58
hnswlib::HierarchicalNSW * quantizer
Quantizer that maps vectors to inverted lists (HNSW [Y.Malkov])
Definition: IndexIVF_HNSW.h:50
std::vector< std::vector< uint8_t > > norm_codes
PQ codes of norms of reconstructed base vectors.
Definition: IndexIVF_HNSW.h:62
faiss::LinearTransform * opq_matrix
Rotation matrix for OPQ encoding.
Definition: IndexIVF_HNSW.h:54
float pq_L2sqr(const uint8_t *code)
L2 sqr distance function for PQ codes.
Definition: IndexIVF_HNSW.cpp:351
void build_quantizer(const char *path_data, const char *path_info, const char *path_edges, size_t M=16, size_t efConstruction=500)
Definition: IndexIVF_HNSW.cpp:38
faiss::ProductQuantizer * pq
Produces the residual codes.
Definition: IndexIVF_HNSW.h:52
std::vector< float > centroid_norms
L2 square norms of coarse centroids.
Definition: IndexIVF_HNSW.h:66
void rotate_quantizer()
For correct search using OPQ encoding rotate points in the coarse quantizer.
Definition: IndexIVF_HNSW.cpp:338
size_t nprobe
Number of probes at search time.
Definition: IndexIVF_HNSW.h:57
virtual void train_pq(size_t n, const float *x)
Definition: IndexIVF_HNSW.cpp:219
std::vector< std::vector< uint8_t > > codes
PQ codes of residuals.
Definition: IndexIVF_HNSW.h:61
uint32_t idx_t
all indices are this type
Definition: IndexIVF_HNSW.h:44