mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-30 07:25:37 +08:00
Using elkan K-Means to improve IVF (#2180)
Signed-off-by: shengjun.li <shengjun.li@zilliz.com>
This commit is contained in:
parent
b3aa06fce1
commit
a76e705a92
@ -6,7 +6,6 @@ Please mark all change in change log and use the issue from GitHub
|
||||
|
||||
## Bug
|
||||
- \#1705 Limit the insert data batch size
|
||||
- \#1796 Too much low performance of building index on ubuntu-mysql-version
|
||||
- \#1929 Skip MySQL meta schema field width check
|
||||
- \#1997 Index file missed after compact
|
||||
- \#2073 Fix CheckDBConfigBackendUrl error message
|
||||
@ -28,10 +27,12 @@ Please mark all change in change log and use the issue from GitHub
|
||||
|
||||
## Improvement
|
||||
- \#221 Refactor LOG macro
|
||||
- \#1796 Compile Openblas with source code to improve the performance
|
||||
- \#2039 Support Milvus run on SSE CPUs
|
||||
- \#2149 Merge server_cpu_config.template and server_gpu_config.template
|
||||
- \#2153 Upgrade thirdparty oatpp to v1.0.0
|
||||
- \#2167 Merge log_config.conf with server_config.yaml
|
||||
- \#2178 Using elkan K-Means to improve IVF
|
||||
|
||||
## Task
|
||||
|
||||
|
||||
@ -51,7 +51,6 @@ int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha,
|
||||
namespace faiss {
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* Matrix/vector ops
|
||||
***************************************************************************/
|
||||
@ -353,6 +352,67 @@ static void knn_L2sqr_sse (
|
||||
*/
|
||||
}
|
||||
|
||||
static void elkan_L2_sse (
|
||||
const float * x,
|
||||
const float * y,
|
||||
size_t d, size_t nx, size_t ny,
|
||||
float_maxheap_array_t * res) {
|
||||
|
||||
if (nx == 0 || ny == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t bs_y = 1024;
|
||||
float *data = (float *) malloc((bs_y * (bs_y - 1) / 2) * sizeof (float));
|
||||
|
||||
for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
|
||||
size_t j1 = j0 + bs_y;
|
||||
if (j1 > ny) j1 = ny;
|
||||
|
||||
auto Y = [&](size_t i, size_t j) -> float& {
|
||||
assert(i != j);
|
||||
i -= j0, j -= j0;
|
||||
return (i > j) ? data[j + i * (i - 1) / 2] : data[i + j * (j - 1) / 2];
|
||||
};
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t i = j0 + 1; i < j1; i++) {
|
||||
const float *y_i = y + i * d;
|
||||
for (size_t j = j0; j < i; j++) {
|
||||
const float *y_j = y + j * d;
|
||||
Y(i, j) = sqrt(fvec_L2sqr(y_i, y_j, d));
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < nx; i++) {
|
||||
const float *x_i = x + i * d;
|
||||
|
||||
int64_t ids_i = j0;
|
||||
float val_i = sqrt(fvec_L2sqr(x_i, y + j0 * d, d));
|
||||
float val_i_2 = val_i * 2;
|
||||
for (size_t j = j0 + 1; j < j1; j++) {
|
||||
if (val_i_2 <= Y(ids_i, j)) {
|
||||
continue;
|
||||
}
|
||||
const float *y_j = y + j * d;
|
||||
float disij = sqrt(fvec_L2sqr(x_i, y_j, d));
|
||||
if (disij < val_i) {
|
||||
ids_i = j;
|
||||
val_i = disij;
|
||||
val_i_2 = val_i * 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (j0 == 0 || res->val[i] > val_i) {
|
||||
res->val[i] = val_i;
|
||||
res->ids[i] = ids_i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(data);
|
||||
}
|
||||
|
||||
/** Find the nearest neighbors for nx queries in a set of ny vectors */
|
||||
static void knn_inner_product_blas (
|
||||
@ -608,7 +668,11 @@ void knn_L2sqr (const float * x,
|
||||
float_maxheap_array_t * res,
|
||||
ConcurrentBitsetPtr bitset)
|
||||
{
|
||||
if (d % 4 == 0 && nx < distance_compute_blas_threshold) {
|
||||
if (bitset == nullptr && res->k == 1 && nx >= ny * 2) {
|
||||
// Note: L2 but not L2sqr
|
||||
// usually used in IVF::train
|
||||
elkan_L2_sse(x, y, d, nx, ny, res);
|
||||
} else if (d % 4 == 0 && nx < distance_compute_blas_threshold) {
|
||||
knn_L2sqr_sse (x, y, d, nx, ny, res, bitset);
|
||||
} else {
|
||||
NopDistanceCorrection nop;
|
||||
@ -1004,4 +1068,4 @@ void pairwise_L2sqr (int64_t d,
|
||||
}
|
||||
|
||||
|
||||
} // namespace faiss
|
||||
} // namespace faiss
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user