IVF-HNSW
 All Classes Functions Variables Typedefs Pages
Parser.h
1 #ifndef IVF_HNSW_LIB_PARSER_H
2 #define IVF_HNSW_LIB_PARSER_H
3 
4 #include <cstring>
5 #include <iostream>
6 
7 //==============
8 // Parser Class
9 //==============
10 struct Parser
11 {
12  const char *cmd;
13 
14  //=================
15  // HNSW parameters
16  //=================
17  size_t M;
18  size_t efConstruction;
19 
20  //=================
21  // Data parameters
22  //=================
23  size_t nb;
24  size_t nt;
25  size_t nsubt;
26  size_t nc;
27  size_t nsubc;
28  size_t nq;
29  size_t ngt;
30  size_t d;
31 
32  //=================
33  // PQ parameters
34  //=================
35  size_t code_size;
36  bool do_opq;
37 
38  //===================
39  // Search parameters
40  //===================
41  size_t k;
42  size_t nprobe;
43  size_t max_codes;
44  size_t efSearch;
45  bool do_pruning;
46 
47  //=======
48  // Paths
49  //=======
50  const char *path_base;
51  const char *path_learn;
52  const char *path_q;
53  const char *path_gt;
54  const char *path_centroids;
55 
56  const char *path_precomputed_idxs;
57 
58  const char *path_info;
59  const char *path_edges;
60 
61  const char *path_pq;
62  const char *path_opq_matrix;
63  const char *path_norm_pq;
64  const char *path_index;
65 
66  Parser(int argc, char **argv)
67  {
68  cmd = argv[0];
69  if (argc == 1)
70  usage();
71 
72  for (size_t i = 1 ; i < argc; i++) {
73  char *a = argv[i];
74 
75  if (!strcmp (a, "-h") || !strcmp (a, "--help"))
76  usage();
77 
78  if (i == argc-1)
79  break;
80 
81  //=================
82  // HNSW parameters
83  //=================
84  if (!strcmp (a, "-M")) sscanf(argv[++i], "%zu", &M);
85  else if (!strcmp (a, "-efConstruction")) sscanf(argv[++i], "%zu", &efConstruction);
86 
87  //=================
88  // Data parameters
89  //=================
90  else if (!strcmp (a, "-nb")) sscanf(argv[++i], "%zu", &nb);
91  else if (!strcmp (a, "-nc")) sscanf(argv[++i], "%zu", &nc);
92  else if (!strcmp (a, "-nsubc")) sscanf(argv[++i], "%zu", &nsubc);
93  else if (!strcmp (a, "-nt")) sscanf(argv[++i], "%zu", &nt);
94  else if (!strcmp (a, "-nsubt")) sscanf(argv[++i], "%zu", &nsubt);
95  else if (!strcmp (a, "-nq")) sscanf(argv[++i], "%zu", &nq);
96  else if (!strcmp (a, "-ngt")) sscanf(argv[++i], "%zu", &ngt);
97  else if (!strcmp (a, "-d")) sscanf(argv[++i], "%zu", &d);
98 
99  //===============
100  // PQ parameters
101  //===============
102  else if (!strcmp (a, "-code_size"))sscanf(argv[++i], "%zu", &code_size);
103  else if (!strcmp (a, "-opq")) do_opq = !strcmp(argv[++i], "on");
104 
105  //===================
106  // Search parameters
107  //===================
108  else if (!strcmp (a, "-k")) sscanf(argv[++i], "%zu", &k);
109  else if (!strcmp (a, "-nprobe")) sscanf(argv[++i], "%zu", &nprobe);
110  else if (!strcmp (a, "-max_codes")) sscanf(argv[++i], "%zu", &max_codes);
111  else if (!strcmp (a, "-efSearch")) sscanf(argv[++i], "%zu", &efSearch);
112  else if (!strcmp (a, "-pruning")) do_pruning = !strcmp(argv[++i], "on");
113 
114  //=======
115  // Paths
116  //=======
117  else if (!strcmp (a, "-path_base")) path_base = argv[++i];
118  else if (!strcmp (a, "-path_learn")) path_learn = argv[++i];
119  else if (!strcmp (a, "-path_q")) path_q = argv[++i];
120  else if (!strcmp (a, "-path_gt")) path_gt = argv[++i];
121  else if (!strcmp (a, "-path_centroids")) path_centroids = argv[++i];
122 
123  else if (!strcmp (a, "-path_precomputed_idx")) path_precomputed_idxs = argv[++i];
124 
125  else if (!strcmp (a, "-path_info")) path_info = argv[++i];
126  else if (!strcmp (a, "-path_edges")) path_edges = argv[++i];
127 
128  else if (!strcmp (a, "-path_pq")) path_pq = argv[++i];
129  else if (!strcmp (a, "-path_opq_matrix")) path_opq_matrix = argv[++i];
130  else if (!strcmp (a, "-path_norm_pq")) path_norm_pq = argv[++i];
131  else if (!strcmp (a, "-path_index")) path_index = argv[++i];
132  }
133  }
134 
135  void usage()
136  {
137  printf ("Usage: %s [options]\n", cmd);
138  printf ("###################\n"
139  "# HNSW Parameters #\n"
140  "###################\n"
141  " -M # Min number of edges per point\n"
142  " -efConstruction # Max number of candidate vertices in priority queue to observe during construction\n"
143  "###################\n"
144  "# Data Parameters #\n"
145  "###################\n"
146  " -nb # Number of base vectors\n"
147  " -nt # Number of learn vectors\n"
148  " -nsubt # Number of learn vectors to train (random subset of the learn set)\n"
149  " -nc # Number of centroids for HNSW quantizer\n"
150  " -nsubc # Number of subcentroids per group\n"
151  " -nq # Number of queries\n"
152  " -ngt # Number of groundtruth neighbours per query\n"
153  " -d # Vector dimension\n"
154  "#################\n"
155  "# PQ Parameters #\n"
156  "#################\n"
157  " -code_size # Code size per vector in bytes\n"
158  " -opq on/off Turn on/off OPQ compression\n"
159  "####################\n"
160  "# Search Parameters #\n"
161  "#####################\n"
162  " -k # Number of the closest vertices to search\n"
163  " -nprobe # Number of probes at query time\n"
164  " -max_codes # Max number of codes to visit to do a query\n"
165  " -efSearch # Max number of candidate vertices in priority queue to observe during searching\n"
166  " -pruning on/off Turn on/off pruning in the grouping scheme\n"
167  "#########\n"
168  "# Paths #\n"
169  "#########\n"
170  " -path_base filename Path to a base set\n"
171  " -path_learn filename Path to a learn set\n"
172  " -path_q filename Path to queries\n"
173  " -path_gt filename Path to groundtruth\n"
174  " -path_centroids filename Path to coarse centroids\n"
175  " \n"
176  " -path_precomputed_idxs filename Path to coarse centroid indices for base points\n"
177  " \n"
178  " -path_info filename Path to parameters of HNSW graph\n"
179  " -path_edges filename Path to edges of HNSW graph\n"
180  " \n"
181  " -path_pq filename Path to the product quantizer for residuals\n"
182  " -path_opq_matrix filename Path to the rotation matrix for OPQ compression\n"
183  " -path_norm_pq filename Path to the product quantizer for norms of reconstructed base points\n"
184  " "
185  " -path_index filename Path to the constructed index\n"
186  );
187  exit(0);
188  }
189 };
190 
191 #endif //IVF_HNSW_LIB_PARSER_H
bool do_pruning
Turn on/off pruning in the grouping scheme.
Definition: Parser.h:45
const char * path_base
Path to a base set.
Definition: Parser.h:50
size_t nc
Number of centroids for HNSW quantizer.
Definition: Parser.h:26
size_t efSearch
Max number of candidate vertices in priority queue to observe during searching.
Definition: Parser.h:44
size_t nb
Number of base vectors.
Definition: Parser.h:23
size_t nsubc
Number of subcentroids per group.
Definition: Parser.h:27
size_t code_size
Code size per vector in bytes.
Definition: Parser.h:35
Definition: Parser.h:10
const char * path_opq_matrix
Path to OPQ rotation matrix for OPQ fine encoding.
Definition: Parser.h:62
size_t ngt
Number of groundtruth neighbours per query.
Definition: Parser.h:29
const char * path_gt
Path to groundtruth.
Definition: Parser.h:53
const char * path_precomputed_idxs
Path to coarse centroid indices for base points.
Definition: Parser.h:56
size_t nt
Number of learn vectors.
Definition: Parser.h:24
bool do_opq
Turn on/off OPQ fine encoding.
Definition: Parser.h:36
const char * path_edges
Path to edges of HNSW graph.
Definition: Parser.h:59
size_t nprobe
Number of probes at query time.
Definition: Parser.h:42
size_t max_codes
Max number of codes to visit to do a query.
Definition: Parser.h:43
size_t d
Vector dimension.
Definition: Parser.h:30
size_t efConstruction
Max number of candidate vertices in priority queue to observe during construction.
Definition: Parser.h:18
const char * path_pq
Path to the product quantizer for residuals.
Definition: Parser.h:61
size_t k
Number of the closest vertices to search.
Definition: Parser.h:41
const char * path_norm_pq
Path to the product quantizer for norms of reconstructed base points.
Definition: Parser.h:63
const char * path_info
Path to parameters of HNSW graph.
Definition: Parser.h:58
const char * path_learn
Path to a learn set.
Definition: Parser.h:51
const char * path_centroids
Path to coarse centroids.
Definition: Parser.h:54
const char * path_q
Path to queries.
Definition: Parser.h:52
size_t nsubt
Number of learn vectors to train (random subset of the learn set)
Definition: Parser.h:25
size_t M
Min number of edges per point.
Definition: Parser.h:17
const char * path_index
Path to the constructed index.
Definition: Parser.h:64
const char * cmd
main command - argv[0]
Definition: Parser.h:12
size_t nq
Number of queries.
Definition: Parser.h:28