From 93f2ce45f421ffa08fd0faf19cceca38931908d8 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Fri, 29 Nov 2019 14:13:56 +0800 Subject: [PATCH 01/17] #331 add exception handle when search fail --- .../knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp | 4 +++- core/src/scheduler/task/SearchTask.cpp | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index 923ca0db56..d496d8085c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -133,9 +133,11 @@ GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, i device_index->nprobe = search_cfg->nprobe; // assert(device_index->getNumProbes() == search_cfg->nprobe); - { + try { ResScope rs(res_, gpu_id_); device_index->search(n, (float*)data, k, distances, labels); + } catch (faiss::FaissException& e) { + KNOWHERE_THROW_MSG(e.what()); } } else { KNOWHERE_THROW_MSG("Not a GpuIndexIVF type."); diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 08bc6525aa..34c8fc4b4b 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -212,7 +212,13 @@ XSearchTask::Execute() { ResMgrInst::GetInstance()->GetResource(path().Last())->type() == ResourceType::CPU) { hybrid = true; } - index_engine_->Search(nq, vectors, topk, nprobe, output_distance.data(), output_ids.data(), hybrid); + Status s = + index_engine_->Search(nq, vectors, topk, nprobe, output_distance.data(), output_ids.data(), hybrid); + if (!s.ok()) { + search_job->GetStatus() = s; + search_job->SearchDone(index_id_); + return; + } double span = rc.RecordSection(hdr + ", do search"); // search_job->AccumSearchCost(span); From 9a467fb1a0b8cbad15d0dcc46afd823c96c59106 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Fri, 29 Nov 2019 14:16:03 +0800 Subject: [PATCH 02/17] #331 update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eca7440ac3..1fa66be4c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#248 - Reside src/external in thirdparty - \#316 - Some files not merged after vectors added - \#327 - Search does not use GPU when index type is FLAT +- \#331 - Add exception handle when search fail - \#340 - Test cases run failed on 0.6.0 - \#353 - Rename config.h.in to version.h.in - \#374 - sdk_simple return empty result From 3a0cc24d57cd2d05d8b70de2c5a3b9b3f674873e Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Fri, 29 Nov 2019 14:31:53 +0800 Subject: [PATCH 03/17] #331 update exception handle when search fail --- .../index/vector_index/IndexGPUIVF.cpp | 11 +-- .../knowhere/index/vector_index/IndexIVF.cpp | 71 ++++++++++--------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index d496d8085c..872f05aa67 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -131,14 +131,9 @@ GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, i if (auto device_index = std::dynamic_pointer_cast(index_)) { auto search_cfg = std::dynamic_pointer_cast(cfg); device_index->nprobe = search_cfg->nprobe; - // assert(device_index->getNumProbes() == search_cfg->nprobe); - - try { - ResScope rs(res_, gpu_id_); - device_index->search(n, (float*)data, k, distances, labels); - } catch (faiss::FaissException& e) { - KNOWHERE_THROW_MSG(e.what()); - } + //assert(device_index->getNumProbes() == search_cfg->nprobe); + ResScope rs(res_, gpu_id_); + device_index->search(n, (float*)data, k, distances, labels); } else { KNOWHERE_THROW_MSG("Not a GpuIndexIVF type."); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index f99c6e3972..884aa8c31d 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -119,42 +119,49 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { GETTENSOR(dataset) - auto elems = rows * search_cfg->k; - auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); - auto res_dis = (float*)malloc(sizeof(float) * elems); + try { + auto elems = rows * search_cfg->k; + auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems); + auto res_dis = (float *) malloc(sizeof(float) * elems); - search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config); + search_impl(rows, (float *) p_data, search_cfg->k, res_dis, res_ids, config); - // std::stringstream ss_res_id, ss_res_dist; - // for (int i = 0; i < 10; ++i) { - // printf("%llu", res_ids[i]); - // printf("\n"); - // printf("%.6f", res_dis[i]); - // printf("\n"); - // ss_res_id << res_ids[i] << " "; - // ss_res_dist << res_dis[i] << " "; - // } - // std::cout << std::endl << "after search: " << std::endl; - // std::cout << ss_res_id.str() << std::endl; - // std::cout << ss_res_dist.str() << std::endl << std::endl; + // std::stringstream ss_res_id, ss_res_dist; + // for (int i = 0; i < 10; ++i) { + // printf("%llu", res_ids[i]); + // printf("\n"); + // printf("%.6f", res_dis[i]); + // printf("\n"); + // ss_res_id << res_ids[i] << " "; + // ss_res_dist << res_dis[i] << " "; + // } + // std::cout << std::endl << "after search: " << std::endl; + // std::cout << ss_res_id.str() << std::endl; + // std::cout << ss_res_dist.str() << std::endl << std::endl; - // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); - // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); - // - // std::vector id_bufs{nullptr, id_buf}; - // std::vector dist_bufs{nullptr, dist_buf}; - // - // auto int64_type = std::make_shared(); - // auto float_type = std::make_shared(); - // - // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - // - // auto ids = std::make_shared>(id_array_data); - // auto dists = std::make_shared>(dist_array_data); - // std::vector array{ids, dists}; + // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); + // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); + // + // std::vector id_bufs{nullptr, id_buf}; + // std::vector dist_bufs{nullptr, dist_buf}; + // + // auto int64_type = std::make_shared(); + // auto float_type = std::make_shared(); + // + // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); + // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); + // + // auto ids = std::make_shared>(id_array_data); + // auto dists = std::make_shared>(dist_array_data); + // std::vector array{ids, dists}; - return std::make_shared((void*)res_ids, (void*)res_dis); + return std::make_shared((void *) res_ids, (void *) res_dis); + + } catch (faiss::FaissException& e) { + KNOWHERE_THROW_MSG(e.what()); + } catch (std::exception& e) { + KNOWHERE_THROW_MSG(e.what()); + } } void From 3af4da6d526799cf04ea1bf81c8ce9a27b6be1bf Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Fri, 29 Nov 2019 14:40:42 +0800 Subject: [PATCH 04/17] #331 update clang-format --- .../knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp | 2 +- .../knowhere/knowhere/index/vector_index/IndexIVF.cpp | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index 872f05aa67..2001dd0511 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -131,7 +131,7 @@ GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, i if (auto device_index = std::dynamic_pointer_cast(index_)) { auto search_cfg = std::dynamic_pointer_cast(cfg); device_index->nprobe = search_cfg->nprobe; - //assert(device_index->getNumProbes() == search_cfg->nprobe); + // assert(device_index->getNumProbes() == search_cfg->nprobe); ResScope rs(res_, gpu_id_); device_index->search(n, (float*)data, k, distances, labels); } else { diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 884aa8c31d..021819c83e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -121,10 +121,10 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { try { auto elems = rows * search_cfg->k; - auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems); - auto res_dis = (float *) malloc(sizeof(float) * elems); + auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); + auto res_dis = (float*)malloc(sizeof(float) * elems); - search_impl(rows, (float *) p_data, search_cfg->k, res_dis, res_ids, config); + search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config); // std::stringstream ss_res_id, ss_res_dist; // for (int i = 0; i < 10; ++i) { @@ -155,8 +155,7 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { // auto dists = std::make_shared>(dist_array_data); // std::vector array{ids, dists}; - return std::make_shared((void *) res_ids, (void *) res_dis); - + return std::make_shared((void*)res_ids, (void*)res_dis); } catch (faiss::FaissException& e) { KNOWHERE_THROW_MSG(e.what()); } catch (std::exception& e) { From 4f098c121fa15a27f8ec667baf5326ec3c503570 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Sat, 30 Nov 2019 11:26:17 +0800 Subject: [PATCH 05/17] Update timeout --- tests/milvus_python_test/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 8ce03b6a61..5172d2588f 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -18,7 +18,7 @@ index_file_size = 10 vectors = gen_vectors(nb, dim) vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() -BUILD_TIMEOUT = 60 +BUILD_TIMEOUT = 180 nprobe = 1 tag = "1970-01-01" From dcbe1a0c3f0ed89d0f7439eceb4c2afcf86706d0 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sat, 30 Nov 2019 11:26:40 +0800 Subject: [PATCH 06/17] #579 when gpu resources disabled, use cpu index instead --- CHANGELOG.md | 1 + core/src/db/engine/ExecutionEngineImpl.cpp | 24 ++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e83f996780..90a1358592 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#545 - Avoid dead circle of build index thread when error occurs - \#552 - Server down during building index_type: IVF_PQ using GPU-edition - \#561 - Milvus server should report exception/error message or terminate on mysql metadata backend error +- \#579 - Build index hang in GPU version when gpu_resources disabled - \#599 - Build index log is incorrect - \#602 - Optimizer specify wrong gpu_id - \#606 - No log generated during building index with CPU diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 9c6a2a0f33..9f2b48642d 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -86,6 +86,9 @@ ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, const std::string& l VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { + server::Config& config = server::Config::GetInstance(); + bool gpu_resource_enable = true; + config.GetGpuResourceConfigEnable(gpu_resource_enable); std::shared_ptr index; switch (type) { case EngineType::FAISS_IDMAP: { @@ -94,18 +97,20 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) { } case EngineType::FAISS_IVFFLAT: { #ifdef MILVUS_GPU_VERSION - index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_MIX); -#else - index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU); + if (gpu_resource_enable) + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_MIX); + else #endif + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU); break; } case EngineType::FAISS_IVFSQ8: { #ifdef MILVUS_GPU_VERSION - index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_MIX); -#else - index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_CPU); + if (gpu_resource_enable) + index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_MIX); + else #endif + index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_CPU); break; } case EngineType::NSG_MIX: { @@ -120,10 +125,11 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) { #endif case EngineType::FAISS_PQ: { #ifdef MILVUS_GPU_VERSION - index = GetVecIndexFactory(IndexType::FAISS_IVFPQ_MIX); -#else - index = GetVecIndexFactory(IndexType::FAISS_IVFPQ_CPU); + if (gpu_resource_enable) + index = GetVecIndexFactory(IndexType::FAISS_IVFPQ_MIX); + else #endif + index = GetVecIndexFactory(IndexType::FAISS_IVFPQ_CPU); break; } case EngineType::SPTAG_KDT: { From c039d0fdb0bc198859d46e8bd61c9c241c57c800 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Sat, 30 Nov 2019 11:58:00 +0800 Subject: [PATCH 07/17] remove multiprocessing case --- tests/milvus_python_test/test_table_count.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/milvus_python_test/test_table_count.py b/tests/milvus_python_test/test_table_count.py index 77780c8faa..a96ccb12b3 100644 --- a/tests/milvus_python_test/test_table_count.py +++ b/tests/milvus_python_test/test_table_count.py @@ -327,8 +327,9 @@ class TestTableCountIP: status, res = connect.get_table_row_count(ip_table) assert res == 0 + # TODO: enable @pytest.mark.timeout(60) - def test_table_rows_count_multiprocessing(self, connect, ip_table, args): + def _test_table_rows_count_multiprocessing(self, connect, ip_table, args): ''' target: test table rows_count is correct or not with multiprocess method: create table and add vectors in it, From 525b6df0b6d67ded1384efe668687076cff0a4b6 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sat, 30 Nov 2019 12:00:47 +0800 Subject: [PATCH 08/17] #579 update index build error message --- core/src/db/DBImpl.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 67769717c4..e2099739ed 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -1033,11 +1033,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex if (!failed_files.empty()) { std::string msg = "Failed to build index for " + std::to_string(failed_files.size()) + ((failed_files.size() == 1) ? " file" : " files"); -#ifdef MILVUS_GPU_VERSION - msg += ", file size is too large or gpu memory is not enough."; -#else msg += ", please double check index parameters."; -#endif return Status(DB_ERROR, msg); } From de440acaa02ea1366be99ae46babb810b4f6dfb9 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sat, 30 Nov 2019 14:03:46 +0800 Subject: [PATCH 09/17] #579 fix CPU version build error --- core/src/db/engine/ExecutionEngineImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 9f2b48642d..1189d35b94 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -86,9 +86,11 @@ ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, const std::string& l VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { +#ifdef MILVUS_GPU_VERSION server::Config& config = server::Config::GetInstance(); bool gpu_resource_enable = true; config.GetGpuResourceConfigEnable(gpu_resource_enable); +#endif std::shared_ptr index; switch (type) { case EngineType::FAISS_IDMAP: { From 8f09b1f2306bd2833dd9ffd2d91efd174f3c9b0e Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sat, 30 Nov 2019 17:23:10 +0800 Subject: [PATCH 10/17] solve part of the problem about nsg --- CHANGELOG.md | 1 + .../index/vector_index/IndexGPUIDMAP.cpp | 34 +++ .../index/vector_index/IndexGPUIDMAP.h | 3 + .../index/vector_index/IndexIDMAP.cpp | 20 ++ .../knowhere/index/vector_index/IndexIDMAP.h | 14 ++ .../knowhere/index/vector_index/IndexIVF.cpp | 25 +-- .../knowhere/index/vector_index/IndexIVF.h | 2 +- .../knowhere/index/vector_index/IndexNSG.cpp | 18 +- .../knowhere/index/vector_index/nsg/NSG.cpp | 212 +++++++----------- .../knowhere/index/vector_index/nsg/NSG.h | 5 +- core/src/index/unittest/test_nsg/test_nsg.cpp | 172 ++++++++++++++ core/src/index/unittest/utils.cpp | 69 ++++++ core/src/index/unittest/utils.h | 9 + core/src/wrapper/ConfAdapter.cpp | 7 +- 14 files changed, 436 insertions(+), 155 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b86ca0d74..12d679c646 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#527 - faiss benchmark not compatible with faiss 1.6.0 - \#530 - BuildIndex stop when do build index and search simultaneously - \#533 - NSG build failed with MetricType Inner Product +- \#548 - NSG search accuracy is too low ## Feature - \#12 - Pure CPU version for Milvus diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp index 1aded3ddaa..edf42abc8d 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp @@ -126,4 +126,38 @@ GPUIDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances, index_->search(n, (float*)data, k, distances, labels); } +void +GPUIDMAP::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) { + int64_t K = k + 1; + auto ntotal = Count(); + + size_t dim = config->d; + auto batch_size = 1000; + auto tail_batch_size = ntotal % batch_size; + auto batch_search_count = ntotal / batch_size; + auto total_search_count = tail_batch_size == 0 ? batch_search_count : batch_search_count + 1; + + std::vector res_dis(K * batch_size); + graph.resize(ntotal); + Graph res_vec(total_search_count); + for (int i = 0; i < total_search_count; ++i) { + auto b_size = (i == (total_search_count - 1)) && tail_batch_size != 0 ? tail_batch_size : batch_size; + + auto& res = res_vec[i]; + res.resize(K * b_size); + + auto xq = data + batch_size * dim * i; + search_impl(b_size, (float*)xq, K, res_dis.data(), res.data(), config); + + for (int j = 0; j < b_size; ++j) { + auto& node = graph[batch_size * i + j]; + node.resize(k); + auto start_pos = j * K + 1; + for (int m = 0, cursor = start_pos; m < k && cursor < start_pos + k; ++m, ++cursor) { + node[m] = res[cursor]; + } + } + } +} + } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h index f1dfe2f21a..b9325a9cc1 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h @@ -47,6 +47,9 @@ class GPUIDMAP : public IDMAP, public GPUIndex { VectorIndexPtr CopyGpuToGpu(const int64_t& device_id, const Config& config) override; + void + GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config); + protected: void search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 7aedf98613..351209c10f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -121,6 +121,26 @@ IDMAP::Add(const DatasetPtr& dataset, const Config& config) { index_->add_with_ids(rows, (float*)p_data, p_ids); } +void +IDMAP::AddWithoutId(const DatasetPtr& dataset, const Config& config) { + if (!index_) { + KNOWHERE_THROW_MSG("index not initialize"); + } + + std::lock_guard lk(mutex_); + GETTENSOR(dataset) + + // TODO: magic here. + auto array = dataset->array()[0]; + + std::vector new_ids(rows); + for (int i = 0; i < rows; ++i) { + new_ids[i] = i; + } + + index_->add_with_ids(rows, (float*)p_data, new_ids.data()); +} + int64_t IDMAP::Count() { return index_->ntotal; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h index 0f66e8fac0..9f1369c7d3 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h @@ -34,20 +34,31 @@ class IDMAP : public VectorIndex, public FaissBaseIndex { BinarySet Serialize() override; + void Load(const BinarySet& index_binary) override; + void Train(const Config& config); + DatasetPtr Search(const DatasetPtr& dataset, const Config& config) override; + int64_t Count() override; + VectorIndexPtr Clone() override; + int64_t Dimension() override; + void Add(const DatasetPtr& dataset, const Config& config) override; + + void + AddWithoutId(const DatasetPtr& dataset, const Config& config); + VectorIndexPtr CopyCpuToGpu(const int64_t& device_id, const Config& config); void @@ -55,12 +66,15 @@ class IDMAP : public VectorIndex, public FaissBaseIndex { virtual float* GetRawVectors(); + virtual int64_t* GetRawIds(); protected: virtual void search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg); + + protected: std::mutex mutex_; }; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 8b734abdc6..a74bddc94c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -189,35 +189,34 @@ IVF::Dimension() { } void -IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config) { - GETTENSOR(dataset) - +IVF::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) { + int64_t K = k + 1; auto ntotal = Count(); - auto batch_size = 100; + size_t dim = config->d; + auto batch_size = 1000; auto tail_batch_size = ntotal % batch_size; auto batch_search_count = ntotal / batch_size; auto total_search_count = tail_batch_size == 0 ? batch_search_count : batch_search_count + 1; - std::vector res_dis(k * batch_size); + std::vector res_dis(K * batch_size); graph.resize(ntotal); Graph res_vec(total_search_count); for (int i = 0; i < total_search_count; ++i) { - auto b_size = i == total_search_count - 1 && tail_batch_size != 0 ? tail_batch_size : batch_size; + auto b_size = (i == (total_search_count - 1)) && tail_batch_size != 0 ? tail_batch_size : batch_size; auto& res = res_vec[i]; - res.resize(k * b_size); + res.resize(K * b_size); - auto xq = p_data + batch_size * dim * i; - search_impl(b_size, (float*)xq, k, res_dis.data(), res.data(), config); + auto xq = data + batch_size * dim * i; + search_impl(b_size, (float*)xq, K, res_dis.data(), res.data(), config); - int tmp = 0; for (int j = 0; j < b_size; ++j) { auto& node = graph[batch_size * i + j]; node.resize(k); - for (int m = 0; m < k && tmp < k * b_size; ++m, ++tmp) { - // TODO(linxj): avoid memcopy here. - node[m] = res[tmp]; + auto start_pos = j * K + 1; + for (int m = 0, cursor = start_pos; m < k && cursor < start_pos + k; ++m, ++cursor) { + node[m] = res[cursor]; } } } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index e064b6f08c..24b006a565 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -57,7 +57,7 @@ class IVF : public VectorIndex, public FaissBaseIndex { Search(const DatasetPtr& dataset, const Config& config) override; void - GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config); + GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config); BinarySet Serialize() override; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 3cf0122233..9571571945 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -21,6 +21,8 @@ #include "knowhere/common/Timer.h" #ifdef MILVUS_GPU_VERSION #include "knowhere/index/vector_index/IndexGPUIVF.h" +#include "knowhere/index/vector_index/IndexGPUIDMAP.h" +#include "knowhere/index/vector_index/helpers/Cloner.h" #endif #include "knowhere/index/vector_index/IndexIVF.h" @@ -110,6 +112,7 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) { IndexModelPtr NSG::Train(const DatasetPtr& dataset, const Config& config) { + config->Dump(); auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { build_cfg->CheckValid(); // throw exception @@ -117,23 +120,26 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) { // TODO(linxj): dev IndexFactory, support more IndexType #ifdef MILVUS_GPU_VERSION - auto preprocess_index = std::make_shared(build_cfg->gpu_id); +// auto preprocess_index = std::make_shared(build_cfg->gpu_id); #else auto preprocess_index = std::make_shared(); #endif - auto model = preprocess_index->Train(dataset, config); - preprocess_index->set_index_model(model); - preprocess_index->AddWithoutIds(dataset, config); + auto preprocess_index = std::make_shared(); + preprocess_index->Train(config); + preprocess_index->AddWithoutId(dataset, config); + float* raw_data = preprocess_index->GetRawVectors(); + auto xx = cloner::CopyCpuToGpu(preprocess_index, 0, config); + auto ss = std::dynamic_pointer_cast(xx); Graph knng; - preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); + ss->GenGraph(raw_data, build_cfg->knng, knng, config); + GETTENSOR(dataset) algo::BuildParams b_params; b_params.candidate_pool_size = build_cfg->candidate_pool_size; b_params.out_degree = build_cfg->out_degree; b_params.search_length = build_cfg->search_length; - GETTENSOR(dataset) auto array = dataset->array()[0]; auto p_ids = array->data()->GetValues(1, 0); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp index e9e65b1191..f303543259 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -29,12 +28,13 @@ #include "knowhere/index/vector_index/nsg/NSG.h" #include "knowhere/index/vector_index/nsg/NSGHelper.h" -// TODO: enable macro //#include namespace knowhere { namespace algo { +unsigned int seed = 100; + NsgIndex::NsgIndex(const size_t& dimension, const size_t& n, METRICTYPE metric) : dimension(dimension), ntotal(n), metric_type(metric) { switch (metric) { @@ -55,8 +55,6 @@ NsgIndex::~NsgIndex() { void NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) { - TimeRecorder rc("NSG"); - ntotal = nb; ori_data_ = new float[ntotal * dimension]; ids_ = new int64_t[ntotal]; @@ -67,25 +65,17 @@ NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const out_degree = parameters.out_degree; candidate_pool_size = parameters.candidate_pool_size; + TimeRecorder rc("NSG", 1); + InitNavigationPoint(); rc.RecordSection("init"); Link(); rc.RecordSection("Link"); - //>> Debug code - ///// - // int count = 0; - // for (int i = 0; i < ntotal; ++i) { - // count += nsg[i].size(); - //} - ///// - CheckConnectivity(); rc.RecordSection("Connect"); - //>> Debug code - /// int total_degree = 0; for (size_t i = 0; i < ntotal; ++i) { total_degree += nsg[i].size(); @@ -93,9 +83,17 @@ NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const KNOWHERE_LOG_DEBUG << "Graph physical size: " << total_degree * sizeof(node_t) / 1024 / 1024 << "m"; KNOWHERE_LOG_DEBUG << "Average degree: " << total_degree / ntotal; - ///// is_trained = true; + + // Debug code + // for (size_t i = 0; i < ntotal; i++) { + // auto& x = nsg[i]; + // for (size_t j = 0; j < x.size(); j++) { + // std::cout << "id: " << x[j] << std::endl; + // } + // std::cout << std::endl; + // } } void @@ -114,28 +112,22 @@ NsgIndex::InitNavigationPoint() { } // select navigation point - std::vector resset, fullset; - unsigned int seed = 100; + std::vector resset; navigation_point = rand_r(&seed) % ntotal; // random initialize navigating point - - //>> Debug code - ///// - // navigation_point = drand48(); - ///// - GetNeighbors(center, resset, knng); navigation_point = resset[0].id; - //>> Debug code - ///// + // Debug code // std::cout << "ep: " << navigation_point << std::endl; - ///// - - //>> Debug code - ///// + // for (int k = 0; k < resset.size(); ++k) { + // std::cout << "id: " << resset[k].id << ", dis: " << resset[k].distance << std::endl; + // } + // std::cout << std::endl; + // + // std::cout << "ep: " << navigation_point << std::endl; + // // float r1 = distance_->Compare(center, ori_data_ + navigation_point * dimension, dimension); // assert(r1 == resset[0].distance); - ///// } // Specify Link @@ -149,7 +141,9 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::v // TODO: throw exception here. } - std::vector init_ids; + resset.resize(search_length); + std::vector init_ids(buffer_size); + // std::vector init_ids; { /* @@ -158,25 +152,26 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::v size_t count = 0; // Get all neighbors - for (size_t i = 0; i < graph[navigation_point].size(); ++i) { - init_ids.push_back(graph[navigation_point][i]); + for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) { + // for (size_t i = 0; i < graph[navigation_point].size(); ++i) { + // init_ids.push_back(graph[navigation_point][i]); + init_ids[i] = graph[navigation_point][i]; has_calculated_dist[init_ids[i]] = true; ++count; } - - unsigned int seed = 100; while (count < buffer_size) { node_t id = rand_r(&seed) % ntotal; if (has_calculated_dist[id]) continue; // duplicate id - init_ids.push_back(id); + // init_ids.push_back(id); + init_ids[count] = id; ++count; has_calculated_dist[id] = true; } } { - resset.resize(init_ids.size()); + // resset.resize(init_ids.size()); // init resset and sort by distance for (size_t i = 0; i < init_ids.size(); ++i) { @@ -190,7 +185,7 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::v float dist = distance_->Compare(ori_data_ + dimension * id, query, dimension); resset[i] = Neighbor(id, dist, false); - ///////////// difference from other GetNeighbors /////////////// + //// difference from other GetNeighbors fullset.push_back(resset[i]); /////////////////////////////////////// } @@ -247,8 +242,10 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::v // TODO: throw exception here. } - std::vector init_ids; - boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; // TODO: ? + // std::vector init_ids; + std::vector init_ids(buffer_size); + resset.resize(buffer_size); + boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; { /* @@ -257,24 +254,26 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::v size_t count = 0; // Get all neighbors - for (size_t i = 0; i < graph[navigation_point].size(); ++i) { - init_ids.push_back(graph[navigation_point][i]); + for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) { + // for (size_t i = 0; i < graph[navigation_point].size(); ++i) { + // init_ids.push_back(graph[navigation_point][i]); + init_ids[i] = graph[navigation_point][i]; has_calculated_dist[init_ids[i]] = true; ++count; } - unsigned int seed = 100; while (count < buffer_size) { node_t id = rand_r(&seed) % ntotal; if (has_calculated_dist[id]) continue; // duplicate id - init_ids.push_back(id); + // init_ids.push_back(id); + init_ids[count] = id; ++count; has_calculated_dist[id] = true; } } { - resset.resize(init_ids.size()); + // resset.resize(init_ids.size()); // init resset and sort by distance for (size_t i = 0; i < init_ids.size(); ++i) { @@ -333,13 +332,15 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::v void NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& graph, SearchParams* params) { - size_t& buffer_size = params ? params->search_length : search_length; + size_t buffer_size = params ? params->search_length : search_length; if (buffer_size > ntotal) { // TODO: throw exception here. } - std::vector init_ids; + // std::vector init_ids; + std::vector init_ids(buffer_size); + resset.resize(buffer_size); boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; { @@ -349,33 +350,33 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& size_t count = 0; // Get all neighbors - for (size_t i = 0; i < graph[navigation_point].size(); ++i) { - init_ids.push_back(graph[navigation_point][i]); + for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) { + // for (size_t i = 0; i < graph[navigation_point].size(); ++i) { + // init_ids.push_back(graph[navigation_point][i]); + init_ids[i] = graph[navigation_point][i]; has_calculated_dist[init_ids[i]] = true; ++count; } - unsigned int seed = 100; while (count < buffer_size) { node_t id = rand_r(&seed) % ntotal; if (has_calculated_dist[id]) continue; // duplicate id - init_ids.push_back(id); + // init_ids.push_back(id); + init_ids[count] = id; ++count; has_calculated_dist[id] = true; } } { - resset.resize(init_ids.size()); + // resset.resize(init_ids.size()); // init resset and sort by distance for (size_t i = 0; i < init_ids.size(); ++i) { node_t id = init_ids[i]; - // assert(id < ntotal); if (id >= static_cast(ntotal)) { KNOWHERE_THROW_MSG("Build Index Error, id > ntotal"); - continue; } float dist = distance_->Compare(ori_data_ + id * dimension, query, dimension); @@ -383,13 +384,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& } std::sort(resset.begin(), resset.end()); // sort by distance - //>> Debug code - ///// - // for (int j = 0; j < buffer_size; ++j) { - // std::cout << "resset_id: " << resset[j].id << ", resset_dist: " << resset[j].distance << std::endl; - //} - ///// - // search nearest neighbor size_t cursor = 0; while (cursor < buffer_size) { @@ -410,7 +404,8 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& if (dist >= resset[buffer_size - 1].distance) continue; - ///////////// difference from other GetNeighbors /////////////// + + //// difference from other GetNeighbors Neighbor nn(id, dist, false); /////////////////////////////////////// @@ -440,59 +435,50 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& void NsgIndex::Link() { - auto cut_graph_dist = new float[ntotal * out_degree]; + float* cut_graph_dist = new float[ntotal * out_degree]; nsg.resize(ntotal); #pragma omp parallel { std::vector fullset; std::vector temp; - boost::dynamic_bitset<> flags{ntotal, 0}; // TODO: ? + boost::dynamic_bitset<> flags{ntotal, 0}; #pragma omp for schedule(dynamic, 100) for (size_t n = 0; n < ntotal; ++n) { fullset.clear(); + temp.clear(); flags.reset(); GetNeighbors(ori_data_ + dimension * n, temp, fullset, flags); - - //>> Debug code - ///// - // float r1 = distance_->Compare(ori_data_ + n * dimension, ori_data_ + temp[0].id * dimension, dimension); - // assert(r1 == temp[0].distance); - ///// SyncPrune(n, fullset, flags, cut_graph_dist); } + + // Debug code + // std::cout << "ep: " << 0 << std::endl; + // for (int k = 0; k < fullset.size(); ++k) { + // std::cout << "id: " << fullset[k].id << ", dis: " << fullset[k].distance << std::endl; + // } } - //>> Debug code - ///// - // auto bak_nsg = nsg; - ///// + // Debug code + // for (size_t i = 0; i < ntotal; i++) + // { + // auto& x = nsg[i]; + // for (size_t j=0; j < x.size(); j++) + // { + // std::cout << "id: " << x[j] << std::endl; + // } + // std::cout << std::endl; + // } knng.clear(); - knng.shrink_to_fit(); std::vector mutex_vec(ntotal); - #pragma omp for schedule(dynamic, 100) for (unsigned n = 0; n < ntotal; ++n) { InterInsert(n, mutex_vec, cut_graph_dist); } + delete[] cut_graph_dist; - - //>> Debug code - ///// - // int count = 0; - // for (int i = 0; i < ntotal; ++i) { - // if (bak_nsg[i].size() != nsg[i].size()) { - // //count += nsg[i].size() - bak_nsg[i].size(); - // count += nsg[i].size(); - // } - //} - ///// - - for (size_t i = 0; i < ntotal; ++i) { - nsg[i].shrink_to_fit(); - } } void @@ -654,9 +640,9 @@ NsgIndex::DFS(size_t root, boost::dynamic_bitset<>& has_linked, int64_t& linked_ std::stack s; s.push(root); if (!has_linked[root]) { - linked_count++; // not link - has_linked[root] = true; // link start... + linked_count++; // not link } + has_linked[root] = true; // link start... while (!s.empty()) { size_t next = ntotal + 1; @@ -709,7 +695,6 @@ NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root } } if (found == 0) { - unsigned int seed = 100; while (true) { // random a linked-node and add unlinked-node as its neighbor size_t rid = rand_r(&seed) % ntotal; if (has_linked[rid]) { @@ -726,7 +711,10 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co int64_t* ids, SearchParams& params) { std::vector> resset(nq); - params.search_length = k; + if (k >= 45) { + params.search_length = k; + } + TimeRecorder rc("NsgIndex::search", 1); // TODO(linxj): when to use openmp if (nq <= 4) { @@ -734,7 +722,7 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co } else { #pragma omp parallel for for (unsigned int i = 0; i < nq; ++i) { - auto single_query = query + i * dim; + const float* single_query = query + i * dim; GetNeighbors(single_query, resset[i], nsg, ¶ms); } } @@ -759,13 +747,6 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co } rc.RecordSection("merge"); - //>> Debug: test single insert - // int x_0 = resset[0].size(); - // for (int l = 0; l < resset[0].size(); ++l) { - // resset[0].pop_back(); - //} - // resset.clear(); - // ProfilerStart("xx.prof"); // std::vector resset; // GetNeighbors(query, resset, nsg, ¶ms); @@ -781,30 +762,5 @@ NsgIndex::SetKnnGraph(Graph& g) { knng = std::move(g); } -// void NsgIndex::GetKnnGraphFromFile() { -// //std::string filename = "sift.1M.50NN.graph"; -// std::string filename = "sift.50NN.graph"; -// -// std::ifstream in(filename, std::ios::binary); -// unsigned k; -// in.read((char *) &k, sizeof(unsigned)); -// in.seekg(0, std::ios::end); -// std::ios::pos_type ss = in.tellg(); -// size_t fsize = (size_t) ss; -// size_t num = (unsigned) (fsize / (k + 1) / 4); -// in.seekg(0, std::ios::beg); -// -// knng.resize(num); -// knng.reserve(num); -// unsigned kk = (k + 3) / 4 * 4; -// for (size_t i = 0; i < num; i++) { -// in.seekg(4, std::ios::cur); -// knng[i].resize(k); -// knng[i].reserve(kk); -// in.read((char *) knng[i].data(), k * sizeof(unsigned)); -// } -// in.close(); -//} - } // namespace algo } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h index 5dd128610f..3dd827466a 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h @@ -52,7 +52,7 @@ class NsgIndex { Distance* distance_; float* ori_data_; - int64_t* ids_; // TODO: support different type + int64_t* ids_; Graph nsg; // final graph Graph knng; // reset after build @@ -134,9 +134,6 @@ class NsgIndex { void FindUnconnectedNode(boost::dynamic_bitset<>& flags, int64_t& root); - - // private: - // void GetKnnGraphFromFile(); }; } // namespace algo diff --git a/core/src/index/unittest/test_nsg/test_nsg.cpp b/core/src/index/unittest/test_nsg/test_nsg.cpp index 4722c7e8f6..a5eac12b2a 100644 --- a/core/src/index/unittest/test_nsg/test_nsg.cpp +++ b/core/src/index/unittest/test_nsg/test_nsg.cpp @@ -23,6 +23,8 @@ #include "knowhere/index/vector_index/IndexNSG.h" #ifdef MILVUS_GPU_VERSION #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" +#include "knowhere/index/vector_index/IndexGPUIDMAP.h" +#include "knowhere/index/vector_index/helpers/Cloner.h" #endif #include "knowhere/common/Timer.h" @@ -113,3 +115,173 @@ TEST_F(NSGInterfaceTest, comparetest) { } tc.RecordSection("IP"); } + +//#include +//TEST(test, ori_nsg) { +// // float* p_data = nullptr; +// size_t rows, dim; +// char* filename = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_base.fvecs"; +// // loads_data(filename, p_data, rows, dim); +// float* p_data = fvecs_read(filename, &dim, &rows); +// +// std::string knng_filename = +// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.1M.50NN.graph"; +// std::vector> knng; +// Load_nns_graph(knng, knng_filename.c_str()); +// +// // float* search_data = nullptr; +// size_t nq, search_dim; +// char* searchfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_query.fvecs"; +// // loads_data(searchfile, search_data, nq, search_dim); +// float* search_data = fvecs_read(searchfile, &search_dim, &nq); +// assert(search_dim == dim); +// +// size_t k, nq2; +// char* gtfile = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_groundtruth.ivecs"; +// int* gt_int = ivecs_read(gtfile, &k, &nq2); +// int64_t* gt = new int64_t[k * nq2]; +// for (int i = 0; i < k * nq2; i++) { +// gt[i] = gt_int[i]; +// } +// delete[] gt_int; +// +// std::vector store_ids(rows); +// for (int i = 0; i < rows; ++i) { +// store_ids[i] = i; +// } +// +// int64_t* I = new int64_t[nq * k]; +// float* D = new float[nq * k]; +//#if 0 +// efanna2e::Parameters params; +// params.Set("L", 50); +// params.Set("R", 55); +// params.Set("C", 300); +// auto orinsg = std::make_shared(dim, rows, efanna2e::Metric::L2, nullptr); +// orinsg->Load_nn_graph(knng); +// orinsg->Build(rows, (float*)p_data, params); +// +// efanna2e::Parameters paras; +// paras.Set("L_search", 45); +// paras.Set("P_search",100); +// k = 10; +// std::vector > res; +// for (unsigned i = 0; i < nq; i++) { +// std::vector tmp(k); +// orinsg->Search(search_data + i * dim, p_data, k, paras, tmp.data()); +// res.push_back(tmp); +// } +// } +//#else +// knowhere::algo::BuildParams params; +// params.search_length = 50; +// params.out_degree = 55; +// params.candidate_pool_size = 300; +// auto nsg = std::make_shared(dim, rows); +//#if 1 +// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2); +// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data()); +// auto config = std::make_shared(); +// config->d = dim; +// config->gpu_id = 0; +// config->metric_type = knowhere::METRICTYPE::L2; +// auto preprocess_index = std::make_shared(); +// preprocess_index->Train(config); +// preprocess_index->AddWithoutId(dataset, config); +// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config); +// auto ss = std::dynamic_pointer_cast(xx); +// +// std::vector> kng; +// ss->GenGraph(p_data, 50, kng, config); +// nsg->SetKnnGraph(kng); +// knowhere::FaissGpuResourceMgr::GetInstance().Free(); +//#else +// nsg->SetKnnGraph(knng); +//#endif +// nsg->Build_with_ids(rows, (float*)p_data, store_ids.data(), params); +// knowhere::algo::SearchParams s_params; +// s_params.search_length = 45; +// nsg->Search(search_data, nq, dim, k, D, I, s_params); +//#endif +// +// int n_1 = 0, n_10 = 0, n_100 = 0; +// for (int i = 0; i < nq; i++) { +// int gt_nn = gt[i * k]; +// for (int j = 0; j < k; j++) { +// if (I[i * k + j] == gt_nn) { +// if (j < 1) +// n_1++; +// if (j < 10) +// n_10++; +// if (j < 100) +// n_100++; +// } +// } +// } +// printf("R@1 = %.4f\n", n_1 / float(nq)); +// printf("R@10 = %.4f\n", n_10 / float(nq)); +// printf("R@100 = %.4f\n", n_100 / float(nq)); +//} +// +//TEST(testxx, test_idmap){ +// int k = 50; +// std::string knng_filename = +// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.50NN.graph"; +// std::vector> gt_knng; +// Load_nns_graph(gt_knng, knng_filename.c_str()); +// +// size_t rows, dim; +// char* filename = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/siftsmall/siftsmall_base.fvecs"; +// float* p_data = fvecs_read(filename, &dim, &rows); +// +// std::vector store_ids(rows); +// for (int i = 0; i < rows; ++i) { +// store_ids[i] = i; +// } +// +// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2); +// auto dataset = generate_dataset(int64_t(rows), int64_t(dim), p_data, store_ids.data()); +// auto config = std::make_shared(); +// config->d = dim; +// config->gpu_id = 0; +// config->metric_type = knowhere::METRICTYPE::L2; +// auto preprocess_index = std::make_shared(); +// preprocess_index->Train(config); +// preprocess_index->AddWithoutId(dataset, config); +// auto xx = knowhere::cloner::CopyCpuToGpu(preprocess_index, 0, config); +// auto ss = std::dynamic_pointer_cast(xx); +// std::vector> idmap_knng; +// ss->GenGraph(p_data, k, idmap_knng,config); +// knowhere::FaissGpuResourceMgr::GetInstance().Free(); +// +// int n_1 = 0, n_10 = 0, n_100 = 0; +// for (int i = 0; i < rows; i++) { +// int gt_nn = gt_knng[i][0]; +// int l_n_1 = 0; +// int l_n_10 = 0; +// int l_n_100 = 0; +// for (int j = 0; j < k; j++) { +// if (idmap_knng[i][j] == gt_nn) { +// if (j < 1){ +// n_1++; +// l_n_1++; +// } +// if (j < 10){ +// n_10++; +// l_n_10++; +// } +// if (j < 100){ +// n_100++; +// l_n_100++; +// } +// +// } +// if ((j == k-1) && (l_n_100 == 0)){ +// std::cout << "error id: " << i << std::endl; +// } +// } +// } +// printf("R@1 = %.4f\n", n_1 / float(rows)); +// printf("R@10 = %.4f\n", n_10 / float(rows)); +// printf("R@100 = %.4f\n", n_100 / float(rows)); +//} diff --git a/core/src/index/unittest/utils.cpp b/core/src/index/unittest/utils.cpp index 11dad4a8b9..a2ff6fd829 100644 --- a/core/src/index/unittest/utils.cpp +++ b/core/src/index/unittest/utils.cpp @@ -178,3 +178,72 @@ PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) { std::cout << "id\n" << ss_id.str() << std::endl; std::cout << "dist\n" << ss_dist.str() << std::endl; } + +void +Load_nns_graph(std::vector>& final_graph, const char* filename) { + std::vector> knng; + + std::ifstream in(filename, std::ios::binary); + unsigned k; + in.read((char*)&k, sizeof(unsigned)); + in.seekg(0, std::ios::end); + std::ios::pos_type ss = in.tellg(); + size_t fsize = (size_t)ss; + size_t num = (size_t)(fsize / (k + 1) / 4); + in.seekg(0, std::ios::beg); + + knng.resize(num); + knng.reserve(num); + int64_t kk = (k + 3) / 4 * 4; + for (size_t i = 0; i < num; i++) { + in.seekg(4, std::ios::cur); + knng[i].resize(k); + knng[i].reserve(kk); + in.read((char*)knng[i].data(), k * sizeof(unsigned)); + } + in.close(); + + final_graph.resize(knng.size()); + for (int i = 0; i < knng.size(); ++i) { + final_graph[i].resize(knng[i].size()); + for (int j = 0; j < knng[i].size(); ++j) { + final_graph[i][j] = knng[i][j]; + } + } +} + +float* +fvecs_read(const char* fname, size_t* d_out, size_t* n_out) { + FILE* f = fopen(fname, "r"); + if (!f) { + fprintf(stderr, "could not open %s\n", fname); + perror(""); + abort(); + } + int d; + fread(&d, 1, sizeof(int), f); + assert((d > 0 && d < 1000000) || !"unreasonable dimension"); + fseek(f, 0, SEEK_SET); + struct stat st; + fstat(fileno(f), &st); + size_t sz = st.st_size; + assert(sz % ((d + 1) * 4) == 0 || !"weird file size"); + size_t n = sz / ((d + 1) * 4); + + *d_out = d; + *n_out = n; + float* x = new float[n * (d + 1)]; + size_t nr = fread(x, sizeof(float), n * (d + 1), f); + assert(nr == n * (d + 1) || !"could not read whole file"); + + // shift array to remove row headers + for (size_t i = 0; i < n; i++) memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x)); + + fclose(f); + return x; +} + +int* // not very clean, but works as long as sizeof(int) == sizeof(float) +ivecs_read(const char* fname, size_t* d_out, size_t* n_out) { + return (int*)fvecs_read(fname, d_out, n_out); +} diff --git a/core/src/index/unittest/utils.h b/core/src/index/unittest/utils.h index b39cf9ea14..f11ad28163 100644 --- a/core/src/index/unittest/utils.h +++ b/core/src/index/unittest/utils.h @@ -93,3 +93,12 @@ struct FileIOReader { size_t operator()(void* ptr, size_t size); }; + +void +Load_nns_graph(std::vector>& final_graph_, const char* filename); + +float* +fvecs_read(const char* fname, size_t* d_out, size_t* n_out); + +int* +ivecs_read(const char* fname, size_t* d_out, size_t* n_out); \ No newline at end of file diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index d49747d8f4..7ad1b8b74b 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -201,10 +201,11 @@ NSGConfAdapter::Match(const TempMetaConf& metaconf) { auto scale_factor = round(metaconf.dim / 128.0); scale_factor = scale_factor >= 4 ? 4 : scale_factor; conf->nprobe = int64_t(conf->nlist * 0.01); - conf->knng = 40 + 10 * scale_factor; // the size of knng - conf->search_length = 40 + 5 * scale_factor; +// conf->knng = 40 + 10 * scale_factor; // the size of knng + conf->knng = 50; + conf->search_length = 50 + 5 * scale_factor; conf->out_degree = 50 + 5 * scale_factor; - conf->candidate_pool_size = 200 + 100 * scale_factor; + conf->candidate_pool_size = 300; MatchBase(conf); return conf; } From 31da89d9a286a4bf9b359e085fef7b6cf493333a Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sat, 30 Nov 2019 18:07:26 +0800 Subject: [PATCH 11/17] format code --- .../knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h | 1 + .../knowhere/knowhere/index/vector_index/IndexNSG.cpp | 2 +- .../index/knowhere/knowhere/index/vector_index/nsg/NSG.h | 4 ++-- core/src/index/unittest/test_nsg/test_nsg.cpp | 9 +++++---- core/src/index/unittest/utils.h | 2 +- core/src/wrapper/ConfAdapter.cpp | 2 +- 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h index d538f2d0da..31c7039f50 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h @@ -23,6 +23,7 @@ #include #include +#include namespace knowhere { diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 661bde3bf3..db8b05f992 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -25,8 +25,8 @@ #include "knowhere/index/vector_index/helpers/Cloner.h" #endif -#include "knowhere/index/vector_index/IndexIVF.h" #include "knowhere/index/vector_index/IndexIDMAP.h" +#include "knowhere/index/vector_index/IndexIVF.h" #include "knowhere/index/vector_index/nsg/NSG.h" #include "knowhere/index/vector_index/nsg/NSGIO.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h index 3dd827466a..f4eefb476e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.h @@ -53,8 +53,8 @@ class NsgIndex { float* ori_data_; int64_t* ids_; - Graph nsg; // final graph - Graph knng; // reset after build + Graph nsg; // final graph + Graph knng; // reset after build node_t navigation_point; // offset of node in origin data diff --git a/core/src/index/unittest/test_nsg/test_nsg.cpp b/core/src/index/unittest/test_nsg/test_nsg.cpp index 450f5a7723..c67fec0e32 100644 --- a/core/src/index/unittest/test_nsg/test_nsg.cpp +++ b/core/src/index/unittest/test_nsg/test_nsg.cpp @@ -22,9 +22,9 @@ #include "knowhere/index/vector_index/FaissBaseIndex.h" #include "knowhere/index/vector_index/IndexNSG.h" #ifdef MILVUS_GPU_VERSION -#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #include "knowhere/index/vector_index/IndexGPUIDMAP.h" #include "knowhere/index/vector_index/helpers/Cloner.h" +#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #endif #include "knowhere/common/Timer.h" @@ -120,7 +120,7 @@ TEST_F(NSGInterfaceTest, comparetest) { } //#include -//TEST(test, ori_nsg) { +// TEST(test, ori_nsg) { // // float* p_data = nullptr; // size_t rows, dim; // char* filename = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Data/sift/sift_base.fvecs"; @@ -226,7 +226,7 @@ TEST_F(NSGInterfaceTest, comparetest) { // printf("R@100 = %.4f\n", n_100 / float(nq)); //} // -//TEST(testxx, test_idmap){ +// TEST(testxx, test_idmap){ // int k = 50; // std::string knng_filename = // "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/sift.50NN.graph"; @@ -234,7 +234,8 @@ TEST_F(NSGInterfaceTest, comparetest) { // Load_nns_graph(gt_knng, knng_filename.c_str()); // // size_t rows, dim; -// char* filename = "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/siftsmall/siftsmall_base.fvecs"; +// char* filename = +// "/mnt/112d53a6-5592-4360-a33b-7fd789456fce/workspace/Cellar/anns/efanna_graph/tests/siftsmall/siftsmall_base.fvecs"; // float* p_data = fvecs_read(filename, &dim, &rows); // // std::vector store_ids(rows); diff --git a/core/src/index/unittest/utils.h b/core/src/index/unittest/utils.h index f11ad28163..03fd157222 100644 --- a/core/src/index/unittest/utils.h +++ b/core/src/index/unittest/utils.h @@ -101,4 +101,4 @@ float* fvecs_read(const char* fname, size_t* d_out, size_t* n_out); int* -ivecs_read(const char* fname, size_t* d_out, size_t* n_out); \ No newline at end of file +ivecs_read(const char* fname, size_t* d_out, size_t* n_out); diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index b96f2c80e5..6b1667f9d7 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -204,7 +204,7 @@ NSGConfAdapter::Match(const TempMetaConf& metaconf) { auto scale_factor = round(metaconf.dim / 128.0); scale_factor = scale_factor >= 4 ? 4 : scale_factor; conf->nprobe = int64_t(conf->nlist * 0.01); -// conf->knng = 40 + 10 * scale_factor; // the size of knng + // conf->knng = 40 + 10 * scale_factor; // the size of knng conf->knng = 50; conf->search_length = 50 + 5 * scale_factor; conf->out_degree = 50 + 5 * scale_factor; From 28ca297115c2af6d14b7f9dccf09411031ba20ab Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sat, 30 Nov 2019 19:19:01 +0800 Subject: [PATCH 12/17] fix unittest --- core/src/index/unittest/test_nsg/test_nsg.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/index/unittest/test_nsg/test_nsg.cpp b/core/src/index/unittest/test_nsg/test_nsg.cpp index c67fec0e32..3f7fc0fba5 100644 --- a/core/src/index/unittest/test_nsg/test_nsg.cpp +++ b/core/src/index/unittest/test_nsg/test_nsg.cpp @@ -52,6 +52,7 @@ class NSGInterfaceTest : public DataGen, public ::testing::Test { auto tmp_conf = std::make_shared(); tmp_conf->gpu_id = DEVICEID; + tmp_conf->d = 256; tmp_conf->knng = 20; tmp_conf->nprobe = 8; tmp_conf->nlist = 163; From e4c7115b9d3f9df69f7ee00fe83c1ed04800c7ea Mon Sep 17 00:00:00 2001 From: zhenwu Date: Sat, 30 Nov 2019 19:45:47 +0800 Subject: [PATCH 13/17] Update timeout value --- tests/milvus_python_test/test_index.py | 38 +++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 5172d2588f..b89f682039 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -18,7 +18,7 @@ index_file_size = 10 vectors = gen_vectors(nb, dim) vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() -BUILD_TIMEOUT = 180 +BUILD_TIMEOUT = 300 nprobe = 1 tag = "1970-01-01" @@ -51,27 +51,27 @@ class TestIndexBase: """ @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index(self, connect, table, get_index_params): + def test_create_index(self, connect, table, get_simple_index_params): ''' target: test create index interface method: create table and add vectors in it, create index expected: return code equals to 0, and search success ''' - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_params) assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_partition(self, connect, table, get_index_params): + def test_create_index_partition(self, connect, table, get_simple_index_params): ''' target: test create index interface method: create table, create partition, and add vectors in it, create index expected: return code equals to 0, and search success ''' partition_name = gen_unique_str() - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) status = connect.create_partition(table, partition_name, tag) status, ids = connect.add_vectors(table, vectors, partition_tag=tag) @@ -91,13 +91,13 @@ class TestIndexBase: status = dis_connect.create_index(table, index_param) @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_search_with_query_vectors(self, connect, table, get_index_params): + def test_create_index_search_with_query_vectors(self, connect, table, get_simple_index_params): ''' target: test create index interface, search with more query vectors method: create table and add vectors in it, create index expected: return code equals to 0, and search success ''' - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_params) @@ -291,13 +291,13 @@ class TestIndexBase: ****************************************************************** """ - def test_describe_index(self, connect, table, get_index_params): + def test_describe_index(self, connect, table, get_simple_index_params): ''' target: test describe index interface method: create table and add vectors in it, create index, call describe index expected: return code 0, and index instructure ''' - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_params) @@ -398,13 +398,13 @@ class TestIndexBase: ****************************************************************** """ - def test_drop_index(self, connect, table, get_index_params): + def test_drop_index(self, connect, table, get_simple_index_params): ''' target: test drop index interface method: create table and add vectors in it, create index, call drop index expected: return code 0, and default index param ''' - index_param = get_index_params + index_param = get_simple_index_params status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_param) assert status.OK() @@ -418,13 +418,13 @@ class TestIndexBase: assert result._table_name == table assert result._index_type == IndexType.FLAT - def test_drop_index_repeatly(self, connect, table, get_index_params): + def test_drop_index_repeatly(self, connect, table, get_simple_index_params): ''' target: test drop index repeatly method: create index, call drop index, and drop again expected: return code 0 ''' - index_param = get_index_params + index_param = get_simple_index_params status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_param) assert status.OK() @@ -560,13 +560,13 @@ class TestIndexIP: """ @pytest.mark.level(2) @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index(self, connect, ip_table, get_index_params): + def test_create_index(self, connect, ip_table, get_simple_index_params): ''' target: test create index interface method: create table and add vectors in it, create index expected: return code equals to 0, and search success ''' - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) @@ -576,14 +576,14 @@ class TestIndexIP: assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_partition(self, connect, ip_table, get_index_params): + def test_create_index_partition(self, connect, ip_table, get_simple_index_params): ''' target: test create index interface method: create table, create partition, and add vectors in it, create index expected: return code equals to 0, and search success ''' partition_name = gen_unique_str() - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) status = connect.create_partition(ip_table, partition_name, tag) status, ids = connect.add_vectors(ip_table, vectors, partition_tag=tag) @@ -606,13 +606,13 @@ class TestIndexIP: status = dis_connect.create_index(ip_table, index_param) @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_search_with_query_vectors(self, connect, ip_table, get_index_params): + def test_create_index_search_with_query_vectors(self, connect, ip_table, get_simple_index_params): ''' target: test create index interface, search with more query vectors method: create table and add vectors in it, create index expected: return code equals to 0, and search success ''' - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) From c1bd16a55e1e229afe56b58f009a0dd1097ffce7 Mon Sep 17 00:00:00 2001 From: bugfixer Date: Sun, 1 Dec 2019 01:40:11 +0000 Subject: [PATCH 14/17] #631 - FAISS isn't compiled with O3 option --- CHANGELOG.md | 1 + core/src/index/cmake/ThirdPartyPackagesCore.cmake | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb082d2772..c7739c5b4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#599 - Build index log is incorrect - \#602 - Optimizer specify wrong gpu_id - \#606 - No log generated during building index with CPU +- \#631 - FAISS isn't compiled with O3 option ## Feature - \#12 - Pure CPU version for Milvus diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index 624f1f422e..9c9187d2cc 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -708,7 +708,7 @@ macro(build_faiss) set(FAISS_CONFIGURE_ARGS "--prefix=${FAISS_PREFIX}" "CFLAGS=${EP_C_FLAGS}" - "CXXFLAGS=${EP_CXX_FLAGS} -mavx2 -mf16c" + "CXXFLAGS=${EP_CXX_FLAGS} -mavx2 -mf16c -O3" --without-python) if (FAISS_WITH_MKL) From 216a534eff5753b42b12d82fcdbfaad6b72095e4 Mon Sep 17 00:00:00 2001 From: bugfixer Date: Sun, 1 Dec 2019 01:52:17 +0000 Subject: [PATCH 15/17] #634 - FAISS GPU version is compiled with O0 --- CHANGELOG.md | 1 + core/src/index/thirdparty/faiss/makefile.inc.in | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb082d2772..684b0c51fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#470 - Small raw files should not be build index - \#584 - Intergrate internal FAISS - \#611 - Remove MILVUS_CPU_VERSION +- \#634 - FAISS GPU version is compiled with O0 ## Task diff --git a/core/src/index/thirdparty/faiss/makefile.inc.in b/core/src/index/thirdparty/faiss/makefile.inc.in index 2aaaf3cd19..744ca6d50e 100644 --- a/core/src/index/thirdparty/faiss/makefile.inc.in +++ b/core/src/index/thirdparty/faiss/makefile.inc.in @@ -17,7 +17,7 @@ NVCC = @NVCC@ CUDA_ROOT = @CUDA_PREFIX@ CUDA_ARCH = @CUDA_ARCH@ NVCCFLAGS = -I $(CUDA_ROOT)/targets/x86_64-linux/include/ \ --O0 -g \ +-O3 \ -Xcompiler -fPIC \ -Xcudafe --diag_suppress=unrecognized_attribute \ $(CUDA_ARCH) \ From fef2a9f9ae80b933a500ad8e429b550dc3f1295d Mon Sep 17 00:00:00 2001 From: zhenwu Date: Sun, 1 Dec 2019 10:50:01 +0800 Subject: [PATCH 16/17] Remove some pq cases --- tests/milvus_python_test/test_add_vectors.py | 2 + tests/milvus_python_test/test_index.py | 293 +++++++++--------- .../milvus_python_test/test_search_vectors.py | 12 +- tests/milvus_python_test/test_table.py | 2 + tests/milvus_python_test/test_table_count.py | 2 + 5 files changed, 167 insertions(+), 144 deletions(-) diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index 7c9d9e691c..5d06a4f43b 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -31,6 +31,8 @@ class TestAddBase: if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") + if request.param["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip PQ Temporary") return request.param def test_add_vector_create_table(self, connect, table): diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index b89f682039..924aee270e 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -14,7 +14,7 @@ from utils import * nb = 10000 dim = 128 -index_file_size = 10 +index_file_size = 20 vectors = gen_vectors(nb, dim) vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() @@ -63,6 +63,18 @@ class TestIndexBase: status = connect.create_index(table, index_params) assert status.OK() + @pytest.mark.timeout(BUILD_TIMEOUT) + def test_create_index_no_vectors(self, connect, table, get_simple_index_params): + ''' + target: test create index interface + method: create table and add vectors in it, create index + expected: return code equals to 0, and search success + ''' + index_params = get_simple_index_params + logging.getLogger().info(index_params) + status = connect.create_index(table, index_params) + assert status.OK() + @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_partition(self, connect, table, get_simple_index_params): ''' @@ -72,6 +84,8 @@ class TestIndexBase: ''' partition_name = gen_unique_str() index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") logging.getLogger().info(index_params) status = connect.create_partition(table, partition_name, tag) status, ids = connect.add_vectors(table, vectors, partition_tag=tag) @@ -242,6 +256,8 @@ class TestIndexBase: expected: return code equals to 0 ''' index_param = get_simple_index_params + if index_param["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status = connect.create_index(table, index_param) status, ids = connect.add_vectors(table, vectors) assert status.OK() @@ -255,6 +271,8 @@ class TestIndexBase: ''' status, ids = connect.add_vectors(table, vectors) index_param = get_simple_index_params + if index_param["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status = connect.create_index(table, index_param) status = connect.create_index(table, index_param) assert status.OK() @@ -291,15 +309,15 @@ class TestIndexBase: ****************************************************************** """ - def test_describe_index(self, connect, table, get_simple_index_params): + def test_describe_index(self, connect, table, get_index_params): ''' target: test describe index interface method: create table and add vectors in it, create index, call describe index expected: return code 0, and index instructure ''' - index_params = get_simple_index_params + index_params = get_index_params logging.getLogger().info(index_params) - status, ids = connect.add_vectors(table, vectors) + # status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_params) status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -325,6 +343,8 @@ class TestIndexBase: 'metric_type': MetricType.L2} connect.create_table(param) index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") logging.getLogger().info(index_params) status, ids = connect.add_vectors(table_name=table_name, records=vectors) status = connect.create_index(table_name, index_params) @@ -405,7 +425,7 @@ class TestIndexBase: expected: return code 0, and default index param ''' index_param = get_simple_index_params - status, ids = connect.add_vectors(table, vectors) + # status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_param) assert status.OK() status, result = connect.describe_index(table) @@ -425,7 +445,7 @@ class TestIndexBase: expected: return code 0 ''' index_param = get_simple_index_params - status, ids = connect.add_vectors(table, vectors) + # status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_param) assert status.OK() status, result = connect.describe_index(table) @@ -494,10 +514,9 @@ class TestIndexBase: expected: return code 0 ''' index_params = get_simple_index_params - status, ids = connect.add_vectors(table, vectors) + # status, ids = connect.add_vectors(table, vectors) for i in range(2): status = connect.create_index(table, index_params) - assert status.OK() status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -517,7 +536,7 @@ class TestIndexBase: ''' nlist = 16384 index_params = [{"index_type": IndexType.IVFLAT, "nlist": nlist}, {"index_type": IndexType.IVF_SQ8, "nlist": nlist}] - status, ids = connect.add_vectors(table, vectors) + # status, ids = connect.add_vectors(table, vectors) for i in range(2): status = connect.create_index(table, index_params[i]) assert status.OK() @@ -570,10 +589,7 @@ class TestIndexIP: logging.getLogger().info(index_params) status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() + assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_partition(self, connect, ip_table, get_simple_index_params): @@ -584,14 +600,13 @@ class TestIndexIP: ''' partition_name = gen_unique_str() index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") logging.getLogger().info(index_params) status = connect.create_partition(ip_table, partition_name, tag) status, ids = connect.add_vectors(ip_table, vectors, partition_tag=tag) status = connect.create_index(partition_name, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() + assert status.OK() @pytest.mark.level(2) def test_create_index_without_connect(self, dis_connect, ip_table): @@ -616,17 +631,16 @@ class TestIndexIP: logging.getLogger().info(index_params) status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) + logging.getLogger().info(connect.describe_index(ip_table)) + query_vecs = [vectors[0], vectors[1], vectors[2]] + top_k = 5 + status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) + logging.getLogger().info(result) if index_params["index_type"] == IndexType.IVF_PQ: assert not status.OK() else: assert status.OK() - logging.getLogger().info(connect.describe_index(ip_table)) - query_vecs = [vectors[0], vectors[1], vectors[2]] - top_k = 5 - status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) - logging.getLogger().info(result) - assert status.OK() - assert len(result) == len(query_vecs) + assert len(result) == len(query_vecs) # TODO: enable @pytest.mark.timeout(BUILD_TIMEOUT) @@ -734,6 +748,8 @@ class TestIndexIP: expected: return code equals to 0 ''' index_param = get_simple_index_params + if index_param["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status = connect.create_index(ip_table, index_param) status, ids = connect.add_vectors(ip_table, vectors) assert status.OK() @@ -792,7 +808,7 @@ class TestIndexIP: ''' index_params = get_simple_index_params logging.getLogger().info(index_params) - status, ids = connect.add_vectors(ip_table, vectors) + # status, ids = connect.add_vectors(ip_table, vectors[:5000]) status = connect.create_index(ip_table, index_params) status, result = connect.describe_index(ip_table) logging.getLogger().info(result) @@ -808,6 +824,8 @@ class TestIndexIP: ''' partition_name = gen_unique_str() index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") logging.getLogger().info(index_params) status = connect.create_partition(ip_table, partition_name, tag) status, ids = connect.add_vectors(ip_table, vectors, partition_tag=tag) @@ -831,6 +849,8 @@ class TestIndexIP: ''' partition_name = gen_unique_str() index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") logging.getLogger().info(index_params) status = connect.create_partition(ip_table, partition_name, tag) status, ids = connect.add_vectors(ip_table, vectors, partition_tag=tag) @@ -856,6 +876,8 @@ class TestIndexIP: new_partition_name = gen_unique_str() new_tag = "new_tag" index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") logging.getLogger().info(index_params) status = connect.create_partition(ip_table, partition_name, tag) status = connect.create_partition(ip_table, new_partition_name, new_tag) @@ -892,6 +914,8 @@ class TestIndexIP: 'metric_type': MetricType.IP} connect.create_table(param) index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") logging.getLogger().info(index_params) status, ids = connect.add_vectors(table_name=table_name, records=vectors) status = connect.create_index(table_name, index_params) @@ -944,28 +968,25 @@ class TestIndexIP: ****************************************************************** """ - def test_drop_index(self, connect, ip_table, get_index_params): + def test_drop_index(self, connect, ip_table, get_simple_index_params): ''' target: test drop index interface method: create table and add vectors in it, create index, call drop index expected: return code 0, and default index param ''' - index_params = get_index_params + index_params = get_simple_index_params status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - status = connect.drop_index(ip_table) - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == ip_table - assert result._index_type == IndexType.FLAT + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + status = connect.drop_index(ip_table) + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == ip_table + assert result._index_type == IndexType.FLAT def test_drop_index_partition(self, connect, ip_table, get_simple_index_params): ''' @@ -975,22 +996,21 @@ class TestIndexIP: ''' partition_name = gen_unique_str() index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status = connect.create_partition(ip_table, partition_name, tag) status, ids = connect.add_vectors(ip_table, vectors, partition_tag=tag) status = connect.create_index(ip_table, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - status = connect.drop_index(ip_table) - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == ip_table - assert result._index_type == IndexType.FLAT + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + status = connect.drop_index(ip_table) + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == ip_table + assert result._index_type == IndexType.FLAT def test_drop_index_partition_A(self, connect, ip_table, get_simple_index_params): ''' @@ -1000,25 +1020,24 @@ class TestIndexIP: ''' partition_name = gen_unique_str() index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status = connect.create_partition(ip_table, partition_name, tag) status, ids = connect.add_vectors(ip_table, vectors, partition_tag=tag) status = connect.create_index(partition_name, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() - status = connect.drop_index(ip_table) - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == ip_table - assert result._index_type == IndexType.FLAT - status, result = connect.describe_index(partition_name) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == partition_name - assert result._index_type == IndexType.FLAT + assert status.OK() + status = connect.drop_index(ip_table) + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == ip_table + assert result._index_type == IndexType.FLAT + status, result = connect.describe_index(partition_name) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == partition_name + assert result._index_type == IndexType.FLAT def test_drop_index_partition_B(self, connect, ip_table, get_simple_index_params): ''' @@ -1028,25 +1047,24 @@ class TestIndexIP: ''' partition_name = gen_unique_str() index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status = connect.create_partition(ip_table, partition_name, tag) status, ids = connect.add_vectors(ip_table, vectors, partition_tag=tag) status = connect.create_index(partition_name, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() - status = connect.drop_index(partition_name) - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == ip_table - assert result._index_type == IndexType.FLAT - status, result = connect.describe_index(partition_name) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == partition_name - assert result._index_type == IndexType.FLAT + assert status.OK() + status = connect.drop_index(partition_name) + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == ip_table + assert result._index_type == IndexType.FLAT + status, result = connect.describe_index(partition_name) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == partition_name + assert result._index_type == IndexType.FLAT def test_drop_index_partition_C(self, connect, ip_table, get_simple_index_params): ''' @@ -1058,31 +1076,30 @@ class TestIndexIP: new_partition_name = gen_unique_str() new_tag = "new_tag" index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status = connect.create_partition(ip_table, partition_name, tag) status = connect.create_partition(ip_table, new_partition_name, new_tag) status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() - status = connect.drop_index(new_partition_name) - assert status.OK() - status, result = connect.describe_index(new_partition_name) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == new_partition_name - assert result._index_type == IndexType.FLAT - status, result = connect.describe_index(partition_name) - logging.getLogger().info(result) - assert result._nlist == index_params["nlist"] - assert result._table_name == partition_name - assert result._index_type == index_params["index_type"] - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - assert result._nlist == index_params["nlist"] - assert result._table_name == ip_table - assert result._index_type == index_params["index_type"] + assert status.OK() + status = connect.drop_index(new_partition_name) + assert status.OK() + status, result = connect.describe_index(new_partition_name) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == new_partition_name + assert result._index_type == IndexType.FLAT + status, result = connect.describe_index(partition_name) + logging.getLogger().info(result) + assert result._nlist == index_params["nlist"] + assert result._table_name == partition_name + assert result._index_type == index_params["index_type"] + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + assert result._nlist == index_params["nlist"] + assert result._table_name == ip_table + assert result._index_type == index_params["index_type"] def test_drop_index_repeatly(self, connect, ip_table, get_simple_index_params): ''' @@ -1091,23 +1108,20 @@ class TestIndexIP: expected: return code 0 ''' index_params = get_simple_index_params - status, ids = connect.add_vectors(ip_table, vectors) + # status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - status = connect.drop_index(ip_table) - assert status.OK() - status = connect.drop_index(ip_table) - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == ip_table - assert result._index_type == IndexType.FLAT + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + status = connect.drop_index(ip_table) + assert status.OK() + status = connect.drop_index(ip_table) + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == ip_table + assert result._index_type == IndexType.FLAT @pytest.mark.level(2) def test_drop_index_without_connect(self, dis_connect, ip_table): @@ -1145,22 +1159,21 @@ class TestIndexIP: expected: return code 0 ''' index_params = get_simple_index_params + if index_params["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip some PQ cases") status, ids = connect.add_vectors(ip_table, vectors) for i in range(2): status = connect.create_index(ip_table, index_params) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - status = connect.drop_index(ip_table) - assert status.OK() - status, result = connect.describe_index(ip_table) - logging.getLogger().info(result) - assert result._nlist == 16384 - assert result._table_name == ip_table - assert result._index_type == IndexType.FLAT + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + status = connect.drop_index(ip_table) + assert status.OK() + status, result = connect.describe_index(ip_table) + logging.getLogger().info(result) + assert result._nlist == 16384 + assert result._table_name == ip_table + assert result._index_type == IndexType.FLAT def test_create_drop_index_repeatly_different_index_params(self, connect, ip_table): ''' @@ -1200,7 +1213,7 @@ class TestIndexTableInvalid(object): def get_table_name(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_create_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name nlist = 16384 @@ -1208,13 +1221,13 @@ class TestIndexTableInvalid(object): status = connect.create_index(table_name, index_param) assert not status.OK() - @pytest.mark.level(2) + @pytest.mark.level(1) def test_describe_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status, result = connect.describe_index(table_name) assert not status.OK() - @pytest.mark.level(2) + @pytest.mark.level(1) def test_drop_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status = connect.drop_index(table_name) @@ -1232,13 +1245,13 @@ class TestCreateIndexParamsInvalid(object): def get_index_params(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_create_index_with_invalid_index_params(self, connect, table, get_index_params): index_params = get_index_params index_type = index_params["index_type"] nlist = index_params["nlist"] logging.getLogger().info(index_params) - status, ids = connect.add_vectors(table, vectors) + # status, ids = connect.add_vectors(table, vectors) if (not index_type) or (not nlist) or (not isinstance(index_type, IndexType)) or (not isinstance(nlist, int)): with pytest.raises(Exception) as e: status = connect.create_index(table, index_params) diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py index 7aebc78e31..464a28efea 100644 --- a/tests/milvus_python_test/test_search_vectors.py +++ b/tests/milvus_python_test/test_search_vectors.py @@ -48,6 +48,8 @@ class TestSearchBase: if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") + if request.param["index_type"] == IndexType.IVF_PQ: + pytest.skip("skip pq case temporary") return request.param @pytest.fixture( @@ -58,6 +60,8 @@ class TestSearchBase: if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") + if request.param["index_type"] == IndexType.IVF_PQ: + pytest.skip("skip pq case temporary") return request.param """ generate top-k params @@ -89,13 +93,13 @@ class TestSearchBase: else: assert not status.OK() - def test_search_l2_index_params(self, connect, table, get_index_params): + def test_search_l2_index_params(self, connect, table, get_simple_index_params): ''' target: test basic search fuction, all the search params is corrent, test all index params, and build method: search with the given vectors, check the result expected: search status ok, and the length of the result is top_k ''' - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) vectors, ids = self.init_data(connect, table) status = connect.create_index(table, index_params) @@ -297,14 +301,14 @@ class TestSearchBase: assert result[0][0].distance <= epsilon assert result[1][0].distance <= epsilon - def test_search_ip_index_params(self, connect, ip_table, get_index_params): + def test_search_ip_index_params(self, connect, ip_table, get_simple_index_params): ''' target: test basic search fuction, all the search params is corrent, test all index params, and build method: search with the given vectors, check the result expected: search status ok, and the length of the result is top_k ''' - index_params = get_index_params + index_params = get_simple_index_params logging.getLogger().info(index_params) vectors, ids = self.init_data(connect, ip_table) status = connect.create_index(ip_table, index_params) diff --git a/tests/milvus_python_test/test_table.py b/tests/milvus_python_test/test_table.py index 40b0850859..db1b0c7a08 100644 --- a/tests/milvus_python_test/test_table.py +++ b/tests/milvus_python_test/test_table.py @@ -594,6 +594,8 @@ class TestTable: if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") + # if request.param["index_type"] == IndexType.IVF_PQ: + # pytest.skip("sq8h not support in open source") return request.param @pytest.mark.level(1) diff --git a/tests/milvus_python_test/test_table_count.py b/tests/milvus_python_test/test_table_count.py index a96ccb12b3..bcff3aca1f 100644 --- a/tests/milvus_python_test/test_table_count.py +++ b/tests/milvus_python_test/test_table_count.py @@ -270,6 +270,8 @@ class TestTableCountIP: if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") + if request.param["index_type"] == IndexType.IVF_PQ: + pytest.skip("skip pq case temporary") return request.param def test_table_rows_count(self, connect, ip_table, add_vectors_nb): From ba3625677a19d7ab1e2e7c10c6f74287059d2538 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Sun, 1 Dec 2019 12:43:19 +0800 Subject: [PATCH 17/17] assert failed if create index with pq on ip_table --- tests/milvus_python_test/test_index.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 924aee270e..3a3ffc3857 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -636,10 +636,7 @@ class TestIndexIP: top_k = 5 status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) logging.getLogger().info(result) - if index_params["index_type"] == IndexType.IVF_PQ: - assert not status.OK() - else: - assert status.OK() + assert status.OK() assert len(result) == len(query_vecs) # TODO: enable @@ -975,9 +972,12 @@ class TestIndexIP: expected: return code 0, and default index param ''' index_params = get_simple_index_params - status, ids = connect.add_vectors(ip_table, vectors) + # status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) - assert status.OK() + if index_params["index_type"] == IndexType.IVF_PQ: + assert not status.OK() + else: + assert status.OK() status, result = connect.describe_index(ip_table) logging.getLogger().info(result) status = connect.drop_index(ip_table)