From 0c31540df499d4afe63c0f6983b3c5e5d3553322 Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sun, 13 Oct 2019 14:35:51 +0800 Subject: [PATCH 1/5] update v1 Former-commit-id: 5b30f0f34233e831bee4572111c4bd9e67e9a33a --- .../index/vector_index/IndexGPUIVFSQ.cpp | 9 ++ .../index/vector_index/IndexGPUIVFSQ.h | 4 + cpp/src/core/unittest/test_ivf.cpp | 91 +++++-------------- cpp/src/core/unittest/test_kdt.cpp | 1 + cpp/src/core/unittest/test_nsg/test_nsg.cpp | 14 +-- cpp/unittest/scheduler/test_resource.cpp | 2 +- cpp/unittest/wrapper/test_wrapper.cpp | 2 +- 7 files changed, 46 insertions(+), 77 deletions(-) diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp index 1b4f4e9edb..5e1f5226f2 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp @@ -71,4 +71,13 @@ GPUIVFSQ::CopyGpuToCpu(const Config& config) { return std::make_shared(new_index); } +void +GPUIVFSQ::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { +#ifdef CUSTOMIZATION + GPUIVF::search_impl(n, data, k, distances, labels, cfg); +#else + IVF::search_impl(n, data, k, distances, labels, cfg); +#endif +} + } // namespace knowhere diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h index ed8013d77f..7332bce691 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h @@ -38,6 +38,10 @@ class GPUIVFSQ : public GPUIVF { VectorIndexPtr CopyGpuToCpu(const Config& config) override; + + protected: + void + search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override; }; } // namespace knowhere diff --git a/cpp/src/core/unittest/test_ivf.cpp b/cpp/src/core/unittest/test_ivf.cpp index c6faea9182..1d49d91b7c 100644 --- a/cpp/src/core/unittest/test_ivf.cpp +++ b/cpp/src/core/unittest/test_ivf.cpp @@ -154,8 +154,8 @@ class IVFTest : public DataGen, public TestWithParam<::std::tupleSearch(query_dataset, conf); AssertAnns(result, nq, conf->k); PrintResult(result, nq, k); + hybrid_1_idx->UnsetQuantizer(); } - { - auto hybrid_2_idx = std::make_shared(device_id); - - auto binaryset = index_->Serialize(); - hybrid_2_idx->Load(binaryset); - - auto quantizer_conf = std::make_shared(); - quantizer_conf->mode = 1; - quantizer_conf->gpu_id = device_id; - auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf); - quantizer_conf->mode = 2; - hybrid_2_idx->LoadData(q, quantizer_conf); - - auto result = hybrid_2_idx->Search(query_dataset, conf); - AssertAnns(result, nq, conf->k); - PrintResult(result, nq, k); - } +// { +// auto hybrid_2_idx = std::make_shared(device_id); +// +// auto binaryset = index_->Serialize(); +// hybrid_2_idx->Load(binaryset); +// +// auto quantizer_conf = std::make_shared(); +// quantizer_conf->mode = 1; +// quantizer_conf->gpu_id = device_id; +// auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf); +// quantizer_conf->mode = 2; +// hybrid_2_idx->LoadData(q, quantizer_conf); +// +// auto result = hybrid_2_idx->Search(query_dataset, conf); +// AssertAnns(result, nq, conf->k); +// PrintResult(result, nq, k); +// } } // TEST_P(IVFTest, gpu_to_cpu) { @@ -438,6 +439,7 @@ TEST_P(IVFTest, clone_test) { } } +#ifdef CUSTOMIZATION TEST_P(IVFTest, seal_test) { // FaissGpuResourceMgr::GetInstance().InitDevice(device_id); @@ -472,6 +474,7 @@ TEST_P(IVFTest, seal_test) { auto with_seal = tc.RecordSection("With seal"); ASSERT_GE(without_seal, with_seal); } +#endif class GPURESTEST : public DataGen, public ::testing::Test { protected: @@ -637,7 +640,7 @@ TEST_F(GPURESTEST, copyandsearch) { // search and copy at the same time printf("==================\n"); - index_type = "GPUIVFSQ"; + index_type = "GPUIVF"; index_ = IndexFactory(index_type); auto conf = std::make_shared(); @@ -693,54 +696,6 @@ TEST_F(GPURESTEST, copyandsearch) { std::thread search_thread(search_func); std::thread load_thread(load_func); - search_thread.join(); - load_thread.join(); - tc.RecordSection("Copy&search total"); -} - -TEST_F(GPURESTEST, TrainAndSearch) { - index_type = "GPUIVFSQ"; - index_ = IndexFactory(index_type); - - auto conf = std::make_shared(); - conf->nlist = 1638; - conf->d = dim; - conf->gpu_id = device_id; - conf->metric_type = knowhere::METRICTYPE::L2; - conf->k = k; - conf->nbits = 8; - conf->nprobe = 1; - - auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); - index_->set_preprocessor(preprocessor); - auto model = index_->Train(base_dataset, conf); - auto new_index = IndexFactory(index_type); - new_index->set_index_model(model); - new_index->Add(base_dataset, conf); - auto cpu_idx = knowhere::cloner::CopyGpuToCpu(new_index, knowhere::Config()); - cpu_idx->Seal(); - auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config()); - - constexpr int train_count = 1; - constexpr int search_count = 5000; - auto train_stage = [&] { - for (int i = 0; i < train_count; ++i) { - auto model = index_->Train(base_dataset, conf); - auto test_idx = IndexFactory(index_type); - test_idx->set_index_model(model); - test_idx->Add(base_dataset, conf); - } - }; - auto search_stage = [&](knowhere::VectorIndexPtr& search_idx) { - for (int i = 0; i < search_count; ++i) { - auto result = search_idx->Search(query_dataset, conf); - AssertAnns(result, nq, k); - } - }; - - // TimeRecorder tc("record"); - // train_stage(); - // tc.RecordSection("train cost"); // search_stage(search_idx); // tc.RecordSection("search cost"); diff --git a/cpp/src/core/unittest/test_kdt.cpp b/cpp/src/core/unittest/test_kdt.cpp index 875944be83..8758fee669 100644 --- a/cpp/src/core/unittest/test_kdt.cpp +++ b/cpp/src/core/unittest/test_kdt.cpp @@ -36,6 +36,7 @@ class KDTTest : public DataGen, public ::testing::Test { protected: void SetUp() override { + Generate(96, 1000, 10); index_ = std::make_shared(); auto tempconf = std::make_shared(); diff --git a/cpp/src/core/unittest/test_nsg/test_nsg.cpp b/cpp/src/core/unittest/test_nsg/test_nsg.cpp index 5aaa65abe2..657387f219 100644 --- a/cpp/src/core/unittest/test_nsg/test_nsg.cpp +++ b/cpp/src/core/unittest/test_nsg/test_nsg.cpp @@ -38,17 +38,17 @@ class NSGInterfaceTest : public DataGen, public ::testing::Test { SetUp() override { // Init_with_default(); knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2); - Generate(256, 1000000, 1); + Generate(256, 1000000 / 100, 1); index_ = std::make_shared(); auto tmp_conf = std::make_shared(); tmp_conf->gpu_id = DEVICE_ID; - tmp_conf->knng = 100; - tmp_conf->nprobe = 32; - tmp_conf->nlist = 16384; - tmp_conf->search_length = 60; - tmp_conf->out_degree = 70; - tmp_conf->candidate_pool_size = 500; + tmp_conf->knng = 20; + tmp_conf->nprobe = 8; + tmp_conf->nlist = 163; + tmp_conf->search_length = 40; + tmp_conf->out_degree = 30; + tmp_conf->candidate_pool_size = 100; tmp_conf->metric_type = knowhere::METRICTYPE::L2; train_conf = tmp_conf; diff --git a/cpp/unittest/scheduler/test_resource.cpp b/cpp/unittest/scheduler/test_resource.cpp index 9d859d6243..1ff0d9fdc1 100644 --- a/cpp/unittest/scheduler/test_resource.cpp +++ b/cpp/unittest/scheduler/test_resource.cpp @@ -184,7 +184,7 @@ class ResourceAdvanceTest : public testing::Test { }; TEST_F(ResourceAdvanceTest, DISK_RESOURCE_TEST) { - const uint64_t NUM = 10; + const uint64_t NUM = max_once_load; std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { diff --git a/cpp/unittest/wrapper/test_wrapper.cpp b/cpp/unittest/wrapper/test_wrapper.cpp index fe8cc3d914..7accef649c 100644 --- a/cpp/unittest/wrapper/test_wrapper.cpp +++ b/cpp/unittest/wrapper/test_wrapper.cpp @@ -188,7 +188,7 @@ INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_CPU, "Default", DIM, NB, 10, 10), - std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), +// std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_MIX, "Default", DIM, NB, 10, 10), // std::make_tuple(IndexType::NSG_MIX, "Default", 128, 250000, 10, 10), // std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 128, 250000, 10, 10), From 1141bbbb061e38eca0f6a283199810e7eddf5676 Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sun, 13 Oct 2019 16:57:34 +0800 Subject: [PATCH 2/5] update v2 Former-commit-id: 1240499e9e5f0042a2296300b00588ed11dc07c3 --- cpp/src/core/unittest/test_ivf.cpp | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/cpp/src/core/unittest/test_ivf.cpp b/cpp/src/core/unittest/test_ivf.cpp index 1d49d91b7c..987692deb5 100644 --- a/cpp/src/core/unittest/test_ivf.cpp +++ b/cpp/src/core/unittest/test_ivf.cpp @@ -696,6 +696,54 @@ TEST_F(GPURESTEST, copyandsearch) { std::thread search_thread(search_func); std::thread load_thread(load_func); + search_thread.join(); + load_thread.join(); + tc.RecordSection("Copy&search total"); +} + +TEST_F(GPURESTEST, TrainAndSearch) { + index_type = "GPUIVFSQ"; + index_ = IndexFactory(index_type); + + auto conf = std::make_shared(); + conf->nlist = 1638; + conf->d = dim; + conf->gpu_id = device_id; + conf->metric_type = knowhere::METRICTYPE::L2; + conf->k = k; + conf->nbits = 8; + conf->nprobe = 1; + + auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); + index_->set_preprocessor(preprocessor); + auto model = index_->Train(base_dataset, conf); + auto new_index = IndexFactory(index_type); + new_index->set_index_model(model); + new_index->Add(base_dataset, conf); + auto cpu_idx = knowhere::cloner::CopyGpuToCpu(new_index, knowhere::Config()); + cpu_idx->Seal(); + auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config()); + + constexpr int train_count = 1; + constexpr int search_count = 5000; + auto train_stage = [&] { + for (int i = 0; i < train_count; ++i) { + auto model = index_->Train(base_dataset, conf); + auto test_idx = IndexFactory(index_type); + test_idx->set_index_model(model); + test_idx->Add(base_dataset, conf); + } + }; + auto search_stage = [&](knowhere::VectorIndexPtr& search_idx) { + for (int i = 0; i < search_count; ++i) { + auto result = search_idx->Search(query_dataset, conf); + AssertAnns(result, nq, k); + } + }; + + // TimeRecorder tc("record"); + // train_stage(); + // tc.RecordSection("train cost"); // search_stage(search_idx); // tc.RecordSection("search cost"); From c12e99c3fb3af6167df9c7158a8483389f6b7691 Mon Sep 17 00:00:00 2001 From: zhiru Date: Sun, 13 Oct 2019 17:30:05 +0800 Subject: [PATCH 3/5] update Former-commit-id: 661cc9057b3c52fb09bef6adfec050e909ca4c3c --- cpp/src/core/unittest/test_ivf.cpp | 2 +- cpp/unittest/db/test_db.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/core/unittest/test_ivf.cpp b/cpp/src/core/unittest/test_ivf.cpp index 987692deb5..b25f230c0f 100644 --- a/cpp/src/core/unittest/test_ivf.cpp +++ b/cpp/src/core/unittest/test_ivf.cpp @@ -702,7 +702,7 @@ TEST_F(GPURESTEST, copyandsearch) { } TEST_F(GPURESTEST, TrainAndSearch) { - index_type = "GPUIVFSQ"; + index_type = "GPUIVF"; index_ = IndexFactory(index_type); auto conf = std::make_shared(); diff --git a/cpp/unittest/db/test_db.cpp b/cpp/unittest/db/test_db.cpp index 9e80afbc09..aaf0b7752c 100644 --- a/cpp/unittest/db/test_db.cpp +++ b/cpp/unittest/db/test_db.cpp @@ -297,6 +297,7 @@ TEST_F(DBTest, SEARCH_TEST) { ASSERT_TRUE(stat.ok()); } +#ifdef CUSTOMIZATION //test FAISS_IVFSQ8H optimizer index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8H; db_->CreateIndex(TABLE_NAME, index); // wait until build index finish @@ -314,9 +315,7 @@ TEST_F(DBTest, SEARCH_TEST) { stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, results); ASSERT_TRUE(stat.ok()); } - - - // TODO(lxj): add groundTruth assert +#endif- } TEST_F(DBTest, PRELOADTABLE_TEST) { From eea7f982004d9a22ff101f67f768c7fc3003ea9f Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sun, 13 Oct 2019 17:50:43 +0800 Subject: [PATCH 4/5] update v3 Former-commit-id: af75de607e6f9db0276c8831868a6c0fa9b95a86 --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 8d25c19ce8..2f81deb0e7 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -36,6 +36,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-619 - Add optimizer class in scheduler - MS-614 - Preload table at startup - MS-626 - Refactor DataObj to support cache any type data +- MS-648 - Improve unittest ## New Feature - MS-627 - Integrate new index: IVFSQHybrid From 657c0163f5c5d3c0fc17472bbed5f7eca2296633 Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sun, 13 Oct 2019 17:57:04 +0800 Subject: [PATCH 5/5] update v3 Former-commit-id: f0fc9af8a0207695522fbdedaf87ed7affb367c4 --- cpp/src/core/unittest/test_ivf.cpp | 34 +++++++++++++++--------------- cpp/unittest/db/test_db.cpp | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/src/core/unittest/test_ivf.cpp b/cpp/src/core/unittest/test_ivf.cpp index b25f230c0f..3c403f690c 100644 --- a/cpp/src/core/unittest/test_ivf.cpp +++ b/cpp/src/core/unittest/test_ivf.cpp @@ -243,23 +243,23 @@ TEST_P(IVFTest, hybrid) { hybrid_1_idx->UnsetQuantizer(); } -// { -// auto hybrid_2_idx = std::make_shared(device_id); -// -// auto binaryset = index_->Serialize(); -// hybrid_2_idx->Load(binaryset); -// -// auto quantizer_conf = std::make_shared(); -// quantizer_conf->mode = 1; -// quantizer_conf->gpu_id = device_id; -// auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf); -// quantizer_conf->mode = 2; -// hybrid_2_idx->LoadData(q, quantizer_conf); -// -// auto result = hybrid_2_idx->Search(query_dataset, conf); -// AssertAnns(result, nq, conf->k); -// PrintResult(result, nq, k); -// } + // { + // auto hybrid_2_idx = std::make_shared(device_id); + // + // auto binaryset = index_->Serialize(); + // hybrid_2_idx->Load(binaryset); + // + // auto quantizer_conf = std::make_shared(); + // quantizer_conf->mode = 1; + // quantizer_conf->gpu_id = device_id; + // auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf); + // quantizer_conf->mode = 2; + // hybrid_2_idx->LoadData(q, quantizer_conf); + // + // auto result = hybrid_2_idx->Search(query_dataset, conf); + // AssertAnns(result, nq, conf->k); + // PrintResult(result, nq, k); + // } } // TEST_P(IVFTest, gpu_to_cpu) { diff --git a/cpp/unittest/db/test_db.cpp b/cpp/unittest/db/test_db.cpp index aaf0b7752c..9e2730a8dd 100644 --- a/cpp/unittest/db/test_db.cpp +++ b/cpp/unittest/db/test_db.cpp @@ -315,7 +315,7 @@ TEST_F(DBTest, SEARCH_TEST) { stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, results); ASSERT_TRUE(stat.ok()); } -#endif- +#endif } TEST_F(DBTest, PRELOADTABLE_TEST) {