mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-04 09:52:30 +08:00
* optimize knowhere unittest Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * code clean Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * optimize knowhere util Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix clang-format Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * update changelog Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * fix unittest build error Signed-off-by: yudong.cai <yudong.cai@zilliz.com> * retry CI Signed-off-by: yudong.cai <yudong.cai@zilliz.com> Co-authored-by: Jin Hai <hai.jin@zilliz.com>
355 lines
13 KiB
C++
355 lines
13 KiB
C++
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
|
|
|
#include <fiu-control.h>
|
|
#include <fiu-local.h>
|
|
#include <gtest/gtest.h>
|
|
#include <memory>
|
|
|
|
#include "knowhere/common/Exception.h"
|
|
#include "knowhere/index/vector_index/FaissBaseIndex.h"
|
|
#include "knowhere/index/vector_index/IndexNSG.h"
|
|
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
|
#ifdef MILVUS_GPU_VERSION
|
|
#include "knowhere/index/vector_index/gpu/IndexGPUIDMAP.h"
|
|
#include "knowhere/index/vector_index/helpers/Cloner.h"
|
|
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
|
|
#endif
|
|
|
|
#include "knowhere/common/Timer.h"
|
|
#include "knowhere/index/vector_index/impl/nsg/NSGIO.h"
|
|
|
|
#include "unittest/utils.h"
|
|
|
|
using ::testing::Combine;
|
|
using ::testing::TestWithParam;
|
|
using ::testing::Values;
|
|
|
|
constexpr int64_t DEVICEID = 0;
|
|
|
|
class NSGInterfaceTest : public DataGen, public ::testing::Test {
|
|
protected:
|
|
void
|
|
SetUp() override {
|
|
#ifdef MILVUS_GPU_VERSION
|
|
int64_t MB = 1024 * 1024;
|
|
milvus::knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, MB * 200, MB * 600, 1);
|
|
#endif
|
|
Generate(256, 1000000 / 100, 10);
|
|
index_ = std::make_shared<milvus::knowhere::NSG>();
|
|
|
|
train_conf = milvus::knowhere::Config{{milvus::knowhere::meta::DIM, 256},
|
|
{milvus::knowhere::IndexParams::nlist, 163},
|
|
{milvus::knowhere::IndexParams::nprobe, 8},
|
|
{milvus::knowhere::IndexParams::knng, 20},
|
|
{milvus::knowhere::IndexParams::search_length, 40},
|
|
{milvus::knowhere::IndexParams::out_degree, 30},
|
|
{milvus::knowhere::IndexParams::candidate, 100},
|
|
{milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}};
|
|
|
|
search_conf = milvus::knowhere::Config{
|
|
{milvus::knowhere::meta::TOPK, k},
|
|
{milvus::knowhere::IndexParams::search_length, 30},
|
|
};
|
|
}
|
|
|
|
void
|
|
TearDown() override {
|
|
#ifdef MILVUS_GPU_VERSION
|
|
milvus::knowhere::FaissGpuResourceMgr::GetInstance().Free();
|
|
#endif
|
|
}
|
|
|
|
protected:
|
|
std::shared_ptr<milvus::knowhere::NSG> index_;
|
|
milvus::knowhere::Config train_conf;
|
|
milvus::knowhere::Config search_conf;
|
|
};
|
|
|
|
TEST_F(NSGInterfaceTest, basic_test) {
|
|
assert(!xb.empty());
|
|
fiu_init(0);
|
|
// untrained index
|
|
{
|
|
ASSERT_ANY_THROW(index_->Query(query_dataset, search_conf));
|
|
ASSERT_ANY_THROW(index_->Serialize());
|
|
}
|
|
// train_conf->gpu_id = milvus::knowhere::INVALID_VALUE;
|
|
// auto model_invalid_gpu = index_->Train(base_dataset, train_conf);
|
|
train_conf[milvus::knowhere::meta::DEVICEID] = DEVICEID;
|
|
index_->Train(base_dataset, train_conf);
|
|
auto result = index_->Query(query_dataset, search_conf);
|
|
AssertAnns(result, nq, k);
|
|
|
|
auto binaryset = index_->Serialize();
|
|
{
|
|
fiu_enable("NSG.Serialize.throw_exception", 1, nullptr, 0);
|
|
ASSERT_ANY_THROW(index_->Serialize());
|
|
fiu_disable("NSG.Serialize.throw_exception");
|
|
}
|
|
|
|
auto new_index = std::make_shared<milvus::knowhere::NSG>();
|
|
new_index->Load(binaryset);
|
|
{
|
|
fiu_enable("NSG.Load.throw_exception", 1, nullptr, 0);
|
|
ASSERT_ANY_THROW(new_index->Load(binaryset));
|
|
fiu_disable("NSG.Load.throw_exception");
|
|
}
|
|
|
|
auto new_result = new_index->Query(query_dataset, search_conf);
|
|
AssertAnns(result, nq, k);
|
|
|
|
ASSERT_EQ(index_->Count(), nb);
|
|
ASSERT_EQ(index_->Dim(), dim);
|
|
// ASSERT_THROW({ index_->Clone(); }, milvus::knowhere::KnowhereException);
|
|
// ASSERT_NO_THROW({
|
|
// index_->Add(base_dataset, milvus::knowhere::Config());
|
|
// index_->Seal();
|
|
// });
|
|
}
|
|
|
|
TEST_F(NSGInterfaceTest, compare_test) {
|
|
milvus::knowhere::impl::DistanceL2 distanceL2;
|
|
milvus::knowhere::impl::DistanceIP distanceIP;
|
|
|
|
milvus::knowhere::TimeRecorder tc("Compare");
|
|
for (int i = 0; i < 1000; ++i) {
|
|
distanceL2.Compare(xb.data(), xq.data(), 256);
|
|
}
|
|
tc.RecordSection("L2");
|
|
for (int i = 0; i < 1000; ++i) {
|
|
distanceIP.Compare(xb.data(), xq.data(), 256);
|
|
}
|
|
tc.RecordSection("IP");
|
|
}
|
|
|
|
TEST_F(NSGInterfaceTest, delete_test) {
|
|
assert(!xb.empty());
|
|
|
|
train_conf[milvus::knowhere::meta::DEVICEID] = DEVICEID;
|
|
index_->Train(base_dataset, train_conf);
|
|
|
|
auto result = index_->Query(query_dataset, search_conf);
|
|
AssertAnns(result, nq, k);
|
|
|
|
ASSERT_EQ(index_->Count(), nb);
|
|
ASSERT_EQ(index_->Dim(), dim);
|
|
|
|
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(nb);
|
|
for (int i = 0; i < nq; i++) {
|
|
bitset->set(i);
|
|
}
|
|
|
|
auto I_before = result->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
|
/*
|
|
printf("I=\n");
|
|
for (int i = 0; i < nq; i++) {
|
|
for (int j = 0; j < k; j++) printf("%5ld ", I_before[i * k + j]);
|
|
printf("\n");
|
|
}*/
|
|
|
|
// search xq with delete
|
|
index_->SetBlacklist(bitset);
|
|
auto result_after = index_->Query(query_dataset, search_conf);
|
|
AssertAnns(result_after, nq, k, CheckMode::CHECK_NOT_EQUAL);
|
|
auto I_after = result_after->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
|
|
|
/*
|
|
printf("I=\n");
|
|
for (int i = 0; i < nq; i++) {
|
|
for (int j = 0; j < k; j++) printf("%5ld ", I_after[i * k + j]);
|
|
printf("\n");
|
|
}*/
|
|
|
|
// First vector deleted
|
|
for (int i = 0; i < nq; i++) {
|
|
ASSERT_NE(I_before[i * k], I_after[i * k]);
|
|
}
|
|
|
|
/*
|
|
// Other results are the same
|
|
for (int i = 0; i < nq; i++) {
|
|
for (int j = 1; j <= k / 2; j++) {
|
|
ASSERT_EQ(I_before[i * k + j], I_after[i * k + j - 1]);
|
|
}
|
|
}*/
|
|
}
|
|
|
|
// TEST(test, ori_nsg) {
|
|
// // float* p_data = nullptr;
|
|
// size_t rows, dim;
|
|
// char* filename = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_base.fvecs";
|
|
// // loads_data(filename, p_data, rows, dim);
|
|
// float* p_data = fvecs_read(filename, &dim, &rows);
|
|
//
|
|
// std::string knng_filename =
|
|
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.1M.50NN.graph";
|
|
// std::vector<std::vector<int64_t>> knng;
|
|
// Load_nns_graph(knng, knng_filename.c_str());
|
|
//
|
|
// // float* search_data = nullptr;
|
|
// size_t nq, search_dim;
|
|
// char* searchfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_query.fvecs";
|
|
// // loads_data(searchfile, search_data, nq, search_dim);
|
|
// float* search_data = fvecs_read(searchfile, &search_dim, &nq);
|
|
// assert(search_dim == dim);
|
|
//
|
|
// size_t k, nq2;
|
|
// char* gtfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_groundtruth.ivecs";
|
|
// int* gt_int = ivecs_read(gtfile, &k, &nq2);
|
|
// int64_t* gt = new int64_t[k * nq2];
|
|
// for (int i = 0; i < k * nq2; i++) {
|
|
// gt[i] = gt_int[i];
|
|
// }
|
|
// delete[] gt_int;
|
|
//
|
|
// std::vector<int64_t> store_ids(rows);
|
|
// for (int i = 0; i < rows; ++i) {
|
|
// store_ids[i] = i;
|
|
// }
|
|
//
|
|
// int64_t* I = new int64_t[nq * k];
|
|
// float* D = new float[nq * k];
|
|
//#if 0
|
|
// efanna2e::Parameters params;
|
|
// params.Set<int64_t>("L", 50);
|
|
// params.Set<int64_t>("R", 55);
|
|
// params.Set<int64_t>("C", 300);
|
|
// auto orinsg = std::make_shared<efanna2e::IndexNSG>(dim, rows, efanna2e::Metric::L2, nullptr);
|
|
// orinsg->Load_nn_graph(knng);
|
|
// orinsg->Build(rows, (float*)p_data, params);
|
|
//
|
|
// efanna2e::Parameters paras;
|
|
// paras.Set<unsigned>("L_search", 45);
|
|
// paras.Set<unsigned>("P_search",100);
|
|
// k = 10;
|
|
// std::vector<std::vector<int64_t> > res;
|
|
// for (unsigned i = 0; i < nq; i++) {
|
|
// std::vector<int64_t> tmp(k);
|
|
// orinsg->Search(search_data + i * dim, p_data, k, paras, tmp.data());
|
|
// res.push_back(tmp);
|
|
// }
|
|
// }
|
|
//#else
|
|
// knowhere::algo::BuildParams params;
|
|
// params.search_length = 50;
|
|
// params.out_degree = 55;
|
|
// params.candidate_pool_size = 300;
|
|
// auto nsg = std::make_shared<knowhere::algo::NsgIndex>(dim, rows);
|
|
//#if 1
|
|
// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
|
|
// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data());
|
|
// auto config = std::make_shared<knowhere::IVFCfg>();
|
|
// config->d = dim;
|
|
// config->gpu_id = 0;
|
|
// config->metric_type = knowhere::METRICTYPE::L2;
|
|
// auto preprocess_index = std::make_shared<knowhere::IDMAP>();
|
|
// preprocess_index->Train(config);
|
|
// preprocess_index->AddWithoutId(dataset, config);
|
|
// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config);
|
|
// auto ss = std::dynamic_pointer_cast<knowhere::GPUIDMAP>(xx);
|
|
//
|
|
// std::vector<std::vector<int64_t>> kng;
|
|
// ss->GenGraph(p_data, 50, kng, config);
|
|
// nsg->SetKnnGraph(kng);
|
|
// knowhere::FaissGpuResourceMgr::GetInstance().Free();
|
|
//#else
|
|
// nsg->SetKnnGraph(knng);
|
|
//#endif
|
|
// nsg->Build_with_ids(rows, (float*)p_data, store_ids.data(), params);
|
|
// knowhere::algo::SearchParams s_params;
|
|
// s_params.search_length = 45;
|
|
// nsg->Search(search_data, nq, dim, k, D, I, s_params);
|
|
//#endif
|
|
//
|
|
// int n_1 = 0, n_10 = 0, n_100 = 0;
|
|
// for (int i = 0; i < nq; i++) {
|
|
// int gt_nn = gt[i * k];
|
|
// for (int j = 0; j < k; j++) {
|
|
// if (I[i * k + j] == gt_nn) {
|
|
// if (j < 1)
|
|
// n_1++;
|
|
// if (j < 10)
|
|
// n_10++;
|
|
// if (j < 100)
|
|
// n_100++;
|
|
// }
|
|
// }
|
|
// }
|
|
// printf("R@1 = %.4f\n", n_1 / float(nq));
|
|
// printf("R@10 = %.4f\n", n_10 / float(nq));
|
|
// printf("R@100 = %.4f\n", n_100 / float(nq));
|
|
//}
|
|
//
|
|
// TEST(testxx, test_idmap){
|
|
// int k = 50;
|
|
// std::string knng_filename =
|
|
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.50NN.graph";
|
|
// std::vector<std::vector<int64_t>> gt_knng;
|
|
// Load_nns_graph(gt_knng, knng_filename.c_str());
|
|
//
|
|
// size_t rows, dim;
|
|
// char* filename =
|
|
// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/siftsmall/siftsmall_base.fvecs";
|
|
// float* p_data = fvecs_read(filename, &dim, &rows);
|
|
//
|
|
// std::vector<int64_t> store_ids(rows);
|
|
// for (int i = 0; i < rows; ++i) {
|
|
// store_ids[i] = i;
|
|
// }
|
|
//
|
|
// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
|
|
// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data());
|
|
// auto config = std::make_shared<knowhere::IVFCfg>();
|
|
// config->d = dim;
|
|
// config->gpu_id = 0;
|
|
// config->metric_type = knowhere::METRICTYPE::L2;
|
|
// auto preprocess_index = std::make_shared<knowhere::IDMAP>();
|
|
// preprocess_index->Train(config);
|
|
// preprocess_index->AddWithoutId(dataset, config);
|
|
// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config);
|
|
// auto ss = std::dynamic_pointer_cast<knowhere::GPUIDMAP>(xx);
|
|
// std::vector<std::vector<int64_t>> idmap_knng;
|
|
// ss->GenGraph(p_data, k, idmap_knng,config);
|
|
// knowhere::FaissGpuResourceMgr::GetInstance().Free();
|
|
//
|
|
// int n_1 = 0, n_10 = 0, n_100 = 0;
|
|
// for (int i = 0; i < rows; i++) {
|
|
// int gt_nn = gt_knng[i][0];
|
|
// int l_n_1 = 0;
|
|
// int l_n_10 = 0;
|
|
// int l_n_100 = 0;
|
|
// for (int j = 0; j < k; j++) {
|
|
// if (idmap_knng[i][j] == gt_nn) {
|
|
// if (j < 1){
|
|
// n_1++;
|
|
// l_n_1++;
|
|
// }
|
|
// if (j < 10){
|
|
// n_10++;
|
|
// l_n_10++;
|
|
// }
|
|
// if (j < 100){
|
|
// n_100++;
|
|
// l_n_100++;
|
|
// }
|
|
//
|
|
// }
|
|
// if ((j == k-1) && (l_n_100 == 0)){
|
|
// std::cout << "error id: " << i << std::endl;
|
|
// }
|
|
// }
|
|
// }
|
|
// printf("R@1 = %.4f\n", n_1 / float(rows));
|
|
// printf("R@10 = %.4f\n", n_10 / float(rows));
|
|
// printf("R@100 = %.4f\n", n_100 / float(rows));
|
|
//}
|