From 4b8a72f9aee179733faf82fdfc73bc12564a1090 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 29 Nov 2019 11:38:27 +0800 Subject: [PATCH 1/6] NSG build failed using GPU-edition if set gpu_enable false --- CHANGELOG.md | 1 + .../index/knowhere/knowhere/index/vector_index/IndexNSG.cpp | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba7561c333..12e7e01cbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#533 - NSG build failed with MetricType Inner Product - \#543 - client raise exception in shards when search results is empty - \#545 - Avoid dead circle of build index thread when error occurs +- \#547 - NSG build failed using GPU-edition if set gpu_enable false - \#552 - Server down during building index_type: IVF_PQ using GPU-edition - \#561 - Milvus server should report exception/error message or terminate on mysql metadata backend error - \#599 - Build index log is incorrect diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 16c0b9172f..9f00c82fd4 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -117,7 +117,13 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) { // TODO(linxj): dev IndexFactory, support more IndexType #ifdef MILVUS_GPU_VERSION + auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(build_cfg->gpu_id); +#if temp_resource == nullptr + auto preprocess_index = std::make_shared(); +#else auto preprocess_index = std::make_shared(build_cfg->gpu_id); +#endif + #else auto preprocess_index = std::make_shared(); #endif From fd304cf4b43344dfe3d0a161b3e79915d81ad2fb Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 29 Nov 2019 14:52:08 +0800 Subject: [PATCH 2/6] remove #if --- .../knowhere/index/vector_index/IndexNSG.cpp | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 9f00c82fd4..71660551c1 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -116,23 +116,29 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) { } // TODO(linxj): dev IndexFactory, support more IndexType + bool use_gpu = false; #ifdef MILVUS_GPU_VERSION + use_gpu = true; auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(build_cfg->gpu_id); -#if temp_resource == nullptr - auto preprocess_index = std::make_shared(); -#else - auto preprocess_index = std::make_shared(build_cfg->gpu_id); + if (temp_resource == nullptr) + use_gpu = false; #endif - -#else - auto preprocess_index = std::make_shared(); -#endif - auto model = preprocess_index->Train(dataset, config); - preprocess_index->set_index_model(model); - preprocess_index->AddWithoutIds(dataset, config); - Graph knng; - preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); + if (use_gpu) { + auto preprocess_index = std::make_shared(build_cfg->gpu_id); + auto model = preprocess_index->Train(dataset, config); + preprocess_index->set_index_model(model); + preprocess_index->AddWithoutIds(dataset, config); + + preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); + } else { + auto preprocess_index = std::make_shared(); + auto model = preprocess_index->Train(dataset, config); + preprocess_index->set_index_model(model); + preprocess_index->AddWithoutIds(dataset, config); + + preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); + } algo::BuildParams b_params; b_params.candidate_pool_size = build_cfg->candidate_pool_size; From 6fcd2a13da5374ab48d43cf579be8519c40b0c40 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 29 Nov 2019 15:57:42 +0800 Subject: [PATCH 3/6] fix CPU version bug --- .../knowhere/index/vector_index/IndexNSG.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 71660551c1..8cd98a74d7 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -116,29 +116,29 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) { } // TODO(linxj): dev IndexFactory, support more IndexType - bool use_gpu = false; -#ifdef MILVUS_GPU_VERSION - use_gpu = true; - auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(build_cfg->gpu_id); - if (temp_resource == nullptr) - use_gpu = false; -#endif Graph knng; - if (use_gpu) { - auto preprocess_index = std::make_shared(build_cfg->gpu_id); - auto model = preprocess_index->Train(dataset, config); - preprocess_index->set_index_model(model); - preprocess_index->AddWithoutIds(dataset, config); - - preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); - } else { +#ifdef MILVUS_GPU_VERSION + auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(build_cfg->gpu_id); + if (temp_resource == nullptr) { auto preprocess_index = std::make_shared(); auto model = preprocess_index->Train(dataset, config); preprocess_index->set_index_model(model); preprocess_index->AddWithoutIds(dataset, config); - + preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); + } else { + auto preprocess_index = std::make_shared(build_cfg->gpu_id); + auto model = preprocess_index->Train(dataset, config); + preprocess_index->set_index_model(model); + preprocess_index->AddWithoutIds(dataset, config); preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); } +#else + auto preprocess_index = std::make_shared(); + auto model = preprocess_index->Train(dataset, config); + preprocess_index->set_index_model(model); + preprocess_index->AddWithoutIds(dataset, config); + preprocess_index->GenGraph(build_cfg->knng, knng, dataset, config); +#endif algo::BuildParams b_params; b_params.candidate_pool_size = build_cfg->candidate_pool_size; From fdfb3979181580e3c4a82a52ed7f65f1b7c14c9c Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 29 Nov 2019 20:17:37 +0800 Subject: [PATCH 4/6] fix test_nsg failed bug --- core/src/db/engine/ExecutionEngineImpl.cpp | 3 +++ .../index/knowhere/knowhere/index/vector_index/IndexNSG.cpp | 3 +-- core/src/wrapper/ConfAdapter.cpp | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 5a3d6e5e2a..ba8a4c34f9 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -611,6 +611,9 @@ ExecutionEngineImpl::Init() { server::Config& config = server::Config::GetInstance(); std::vector gpu_ids; Status s = config.GetGpuResourceConfigBuildIndexResources(gpu_ids); + if (!s.ok()) { + gpu_num_ = knowhere::INVALID_VALUE; + } for (auto id : gpu_ids) { if (gpu_num_ == id) { return Status::OK(); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 8cd98a74d7..370df76b9b 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -118,8 +118,7 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) { // TODO(linxj): dev IndexFactory, support more IndexType Graph knng; #ifdef MILVUS_GPU_VERSION - auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(build_cfg->gpu_id); - if (temp_resource == nullptr) { + if (build_cfg->gpu_id == knowhere::INVALID_VALUE) { auto preprocess_index = std::make_shared(); auto model = preprocess_index->Train(dataset, config); preprocess_index->set_index_model(model); diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index 7644e77ef5..9ee2f060b1 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -39,8 +39,6 @@ void ConfAdapter::MatchBase(knowhere::Config conf) { if (conf->metric_type == knowhere::DEFAULT_TYPE) conf->metric_type = knowhere::METRICTYPE::L2; - if (conf->gpu_id == knowhere::INVALID_VALUE) - conf->gpu_id = 0; } knowhere::Config From f45df1a2419903e391b8d37f0dfc8a2a99da42cd Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Sat, 30 Nov 2019 09:28:24 +0800 Subject: [PATCH 5/6] Remove src/grpc/README.md --- core/src/grpc/README.md | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 core/src/grpc/README.md diff --git a/core/src/grpc/README.md b/core/src/grpc/README.md deleted file mode 100644 index 6a3fe1157c..0000000000 --- a/core/src/grpc/README.md +++ /dev/null @@ -1,6 +0,0 @@ -We manually change two APIs in "milvus.pb.h": - add_vector_data() - add_row_id_array() - add_ids() - add_distances() -If proto files need be generated again, remember to re-change above APIs. \ No newline at end of file From 10d50d2fb768ae731de6511018aac60a9fd4083d Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Sat, 30 Nov 2019 14:57:35 +0800 Subject: [PATCH 6/6] IVF_PQ search on CPUs when using GPU-version --- core/src/scheduler/SchedInst.h | 4 + .../scheduler/optimizer/FaissIVFPQPass.cpp | 74 +++++++++++++++++++ core/src/scheduler/optimizer/FaissIVFPQPass.h | 58 +++++++++++++++ 3 files changed, 136 insertions(+) create mode 100644 core/src/scheduler/optimizer/FaissIVFPQPass.cpp create mode 100644 core/src/scheduler/optimizer/FaissIVFPQPass.h diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 1e8a7acf2e..6cca377033 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -25,6 +25,7 @@ #include "optimizer/BuildIndexPass.h" #include "optimizer/FaissFlatPass.h" #include "optimizer/FaissIVFFlatPass.h" +#include "optimizer/FaissIVFPQPass.h" #include "optimizer/FaissIVFSQ8HPass.h" #include "optimizer/FaissIVFSQ8Pass.h" #include "optimizer/FallbackPass.h" @@ -129,7 +130,10 @@ class OptimizerInst { pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); +#ifdef CUSTOMIZATION pass_list.push_back(std::make_shared()); +#endif + pass_list.push_back(std::make_shared()); } #endif pass_list.push_back(std::make_shared()); diff --git a/core/src/scheduler/optimizer/FaissIVFPQPass.cpp b/core/src/scheduler/optimizer/FaissIVFPQPass.cpp new file mode 100644 index 0000000000..f97fec63b4 --- /dev/null +++ b/core/src/scheduler/optimizer/FaissIVFPQPass.cpp @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifdef MILVUS_GPU_VERSION +#include "scheduler/optimizer/FaissIVFPQPass.h" +#include "cache/GpuCacheMgr.h" +#include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" +#include "scheduler/task/SearchTask.h" +#include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" +#include "utils/Log.h" + +namespace milvus { +namespace scheduler { + +void +FaissIVFPQPass::Init() { +#ifdef MILVUS_GPU_VERSION + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } + s = config.GetGpuResourceConfigSearchResources(gpus); + if (!s.ok()) { + throw; + } +#endif +} + +bool +FaissIVFPQPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_PQ) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + ResourcePtr res_ptr; + if (search_job->nq() < threshold_) { + SERVER_LOG_DEBUG << "FaissIVFPQPass: nq < gpu_search_threshold, specify cpu to search!"; + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + SERVER_LOG_DEBUG << "FaissIVFPQPass: nq > gpu_search_threshold, specify gpu" << best_device_id << " to search!"; + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, gpus[best_device_id]); + } + auto label = std::make_shared(res_ptr); + task->label() = label; + return true; +} + +} // namespace scheduler +} // namespace milvus +#endif diff --git a/core/src/scheduler/optimizer/FaissIVFPQPass.h b/core/src/scheduler/optimizer/FaissIVFPQPass.h new file mode 100644 index 0000000000..9225f84b7c --- /dev/null +++ b/core/src/scheduler/optimizer/FaissIVFPQPass.h @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifdef MILVUS_GPU_VERSION +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class FaissIVFPQPass : public Pass { + public: + FaissIVFPQPass() = default; + + public: + void + Init() override; + + bool + Run(const TaskPtr& task) override; + + private: + int64_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; + std::vector gpus; +}; + +using FaissIVFPQPassPtr = std::shared_ptr; + +} // namespace scheduler +} // namespace milvus +#endif