forked from facebookresearch/faiss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIndexIVF_c.h
185 lines (160 loc) · 6.11 KB
/
IndexIVF_c.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c -*-
#ifndef FAISS_INDEX_IVF_C_H
#define FAISS_INDEX_IVF_C_H
#include "Clustering_c.h"
#include "Index_c.h"
#include "faiss_c.h"
#include "impl/AuxIndexStructures_c.h"
#ifdef __cplusplus
extern "C" {
#endif
FAISS_DECLARE_CLASS_INHERITED(SearchParametersIVF, SearchParameters)
FAISS_DECLARE_DESTRUCTOR(SearchParametersIVF)
FAISS_DECLARE_SEARCH_PARAMETERS_DOWNCAST(SearchParametersIVF)
int faiss_SearchParametersIVF_new(FaissSearchParametersIVF** p_sp);
int faiss_SearchParametersIVF_new_with(
FaissSearchParametersIVF** p_sp,
FaissIDSelector* sel,
size_t nprobe,
size_t max_codes);
FAISS_DECLARE_GETTER(SearchParametersIVF, const FaissIDSelector*, sel)
FAISS_DECLARE_GETTER_SETTER(SearchParametersIVF, size_t, nprobe)
FAISS_DECLARE_GETTER_SETTER(SearchParametersIVF, size_t, max_codes)
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an Index instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is then stored.
*
* At search time, the vector to be searched is also quantized, and
* only the list corresponding to the quantization index is
* searched. This speeds up the search by making it
* non-exhaustive. This can be relaxed using multi-probe search: a few
* (nprobe) quantization indices are selected and several inverted
* lists are visited.
*
* Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to database vectors.
*/
FAISS_DECLARE_CLASS_INHERITED(IndexIVF, Index)
FAISS_DECLARE_DESTRUCTOR(IndexIVF)
FAISS_DECLARE_INDEX_DOWNCAST(IndexIVF)
/// number of possible key values
FAISS_DECLARE_GETTER(IndexIVF, size_t, nlist)
/// number of probes at query time
FAISS_DECLARE_GETTER_SETTER(IndexIVF, size_t, nprobe)
/// quantizer that maps vectors to inverted lists
FAISS_DECLARE_GETTER(IndexIVF, FaissIndex*, quantizer)
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
FAISS_DECLARE_GETTER(IndexIVF, char, quantizer_trains_alone)
/// whether object owns the quantizer
FAISS_DECLARE_GETTER_SETTER(IndexIVF, int, own_fields)
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
int faiss_IndexIVF_merge_from(
FaissIndexIVF* index,
FaissIndexIVF* other,
idx_t add_id);
/** copy a subset of the entries index to the other index
*
* if subset_type == 0: copies ids in [a1, a2)
* if subset_type == 1: copies ids if id % a1 == a2
* if subset_type == 2: copies inverted lists such that a1
* elements are left before and a2 elements are after
*/
int faiss_IndexIVF_copy_subset_to(
const FaissIndexIVF* index,
FaissIndexIVF* other,
int subset_type,
idx_t a1,
idx_t a2);
/** search a set of vectors, that are pre-quantized by the IVF
* quantizer. Fill in the corresponding heaps with the query
* results. search() calls this.
*
* @param n nb of vectors to query
* @param x query vectors, size nx * d
* @param assign coarse quantization indices, size nx * nprobe
* @param centroid_dis
* distances to coarse centroids, size nx * nprobe
* @param distance
* output distances, size n * k
* @param labels output labels, size n * k
* @param store_pairs store inv list index + inv list offset
* instead in upper/lower 32 bit of result,
* instead of ids (used for reranking).
*/
int faiss_IndexIVF_search_preassigned(
const FaissIndexIVF* index,
idx_t n,
const float* x,
idx_t k,
const idx_t* assign,
const float* centroid_dis,
float* distances,
idx_t* labels,
int store_pairs);
size_t faiss_IndexIVF_get_list_size(const FaissIndexIVF* index, size_t list_no);
/** initialize a direct map
*
* @param new_maintain_direct_map if true, create a direct map,
* else clear it
*/
int faiss_IndexIVF_make_direct_map(
FaissIndexIVF* index,
int new_maintain_direct_map);
/** Check the inverted lists' imbalance factor.
*
* 1= perfectly balanced, >1: imbalanced
*/
double faiss_IndexIVF_imbalance_factor(const FaissIndexIVF* index);
/// display some stats about the inverted lists of the index
void faiss_IndexIVF_print_stats(const FaissIndexIVF* index);
/// Get the IDs in an inverted list. IDs are written to `invlist`, which must be
/// large enough
//// to accommodate the full list.
///
/// @param list_no the list ID
/// @param invlist output pointer to a slice of memory, at least as long as the
/// list's size
/// @see faiss_IndexIVF_get_list_size(size_t)
void faiss_IndexIVF_invlists_get_ids(
const FaissIndexIVF* index,
size_t list_no,
idx_t* invlist);
int faiss_IndexIVF_train_encoder(
FaissIndexIVF* index,
idx_t n,
const float* x,
const idx_t* assign);
typedef struct FaissIndexIVFStats {
size_t nq; // nb of queries run
size_t nlist; // nb of inverted lists scanned
size_t ndis; // nb of distances computed
size_t nheap_updates; // nb of times the heap was updated
double quantization_time; // time spent quantizing vectors (in ms)
double search_time; // time spent searching lists (in ms)
} FaissIndexIVFStats;
void faiss_IndexIVFStats_reset(FaissIndexIVFStats* stats);
inline void faiss_IndexIVFStats_init(FaissIndexIVFStats* stats) {
faiss_IndexIVFStats_reset(stats);
}
/// global var that collects all statists
FaissIndexIVFStats* faiss_get_indexIVF_stats();
#ifdef __cplusplus
}
#endif
#endif