From 7338a044f36a8db69be154780cd53626ea6a50a8 Mon Sep 17 00:00:00 2001 From: wxyu Date: Sun, 13 Oct 2019 16:46:55 +0800 Subject: [PATCH] SQ8H in GPU part3 Former-commit-id: bd95d08bede45255fa10f4d8fdeb8674e435860b --- .../knowhere/index/vector_index/IndexIVF.cpp | 14 +++++ .../index/vector_index/IndexIVFSQHybrid.cpp | 51 ++++++++++++------- .../index/vector_index/IndexIVFSQHybrid.h | 3 ++ cpp/src/db/engine/ExecutionEngineImpl.cpp | 17 ++----- cpp/src/wrapper/VecImpl.cpp | 19 +++++++ cpp/src/wrapper/VecImpl.h | 2 + cpp/src/wrapper/VecIndex.h | 5 ++ 7 files changed, 80 insertions(+), 31 deletions(-) diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 510ab46bd6..99dd2e2926 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -115,6 +115,20 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config); +// std::stringstream ss_res_id, ss_res_dist; +// for (int i = 0; i < 10; ++i) { +// printf("%llu", res_ids[i]); +// printf("\n"); +// printf("%.6f", res_dis[i]); +// printf("\n"); +// ss_res_id << res_ids[i] << " "; +// ss_res_dist << res_dis[i] << " "; +// } +// std::cout << std::endl << "after search: " << std::endl; +// std::cout << ss_res_id.str() << std::endl; +// std::cout << ss_res_dist.str() << std::endl << std::endl; + + auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index c6c9291388..8176ee0b49 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -79,20 +79,8 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) { VectorIndexPtr IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { - ResScope rs(res, device_id, false); - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - faiss::IndexComposition index_composition; - index_composition.index = index_.get(); - index_composition.quantizer = nullptr; - index_composition.mode = 0; // copy all - - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option); - - std::shared_ptr device_index; - device_index.reset(gpu_index); - return std::make_shared(device_index, device_id, res); + auto p = CopyCpuToGpuWithQuantizer(device_id, config); + return p.first; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); } @@ -188,9 +176,10 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { KNOWHERE_THROW_MSG("mode only support 2 in this func"); } } - if (quantizer_conf->gpu_id != gpu_id_) { - KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card"); - } +// if (quantizer_conf->gpu_id != gpu_id_) { +// KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card"); +// } + gpu_id_ = quantizer_conf->gpu_id; if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { ResScope rs(res, gpu_id_, false); @@ -216,6 +205,34 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { } } +std::pair +IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) { + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { + + ResScope rs(res, device_id, false); + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = index_.get(); + index_composition.quantizer = nullptr; + index_composition.mode = 0; // copy all + + auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option); + + std::shared_ptr device_index; + device_index.reset(gpu_index); + auto new_idx = std::make_shared(device_index, device_id, res); + + auto q = std::make_shared(); + q->quantizer = index_composition.quantizer; + q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float); + return std::make_pair(new_idx, q); + } else { + KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); + } +} + FaissIVFQuantizer::~FaissIVFQuantizer() { if (quantizer != nullptr) { delete quantizer; diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h index d0c58baaf3..cc59940028 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h @@ -63,6 +63,9 @@ class IVFSQHybrid : public GPUIVFSQ { VectorIndexPtr LoadData(const knowhere::QuantizerPtr& q, const Config& conf); + std::pair + CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config); + IndexModelPtr Train(const DatasetPtr& dataset, const Config& config) override; diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp index c70a5c3b21..5b2c8eb2f3 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.cpp +++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp @@ -256,27 +256,16 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { conf->gpu_id = device_id; if (quantizer) { - std::cout << "cache hit" << std::endl; // cache hit conf->mode = 2; auto new_index = index_->LoadData(quantizer->Data(), conf); index_ = new_index; } else { - std::cout << "cache miss" << std::endl; - // cache hit - // cache miss - if (index_ == nullptr) { - ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to copy to gpu"; - return Status(DB_ERROR, "index is null"); - } - conf->mode = 1; - auto q = index_->LoadQuantizer(conf); - conf->mode = 2; - auto new_index = index_->LoadData(q, conf); - index_ = new_index; + auto pair = index_->CopyToGpuWithQuantizer(device_id); + index_ = pair.first; // cache - auto cached_quantizer = std::make_shared(q); + auto cached_quantizer = std::make_shared(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } return Status::OK(); diff --git a/cpp/src/wrapper/VecImpl.cpp b/cpp/src/wrapper/VecImpl.cpp index 3ff79690aa..c97900f839 100644 --- a/cpp/src/wrapper/VecImpl.cpp +++ b/cpp/src/wrapper/VecImpl.cpp @@ -332,5 +332,24 @@ IVFHybridIndex::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { return nullptr; } +std::pair +IVFHybridIndex::CopyToGpuWithQuantizer(const int64_t& device_id, const Config& cfg) { + try { + // TODO(linxj): Hardcode here + if (auto hybrid_idx = std::dynamic_pointer_cast(index_)) { + auto pair = hybrid_idx->CopyCpuToGpuWithQuantizer(device_id, cfg); + auto new_idx = std::make_shared(pair.first, type); + return std::make_pair(new_idx, pair.second); + } else { + WRAPPER_LOG_ERROR << "Hybrid mode not support for index type: " << int(type); + } + } catch (knowhere::KnowhereException& e) { + WRAPPER_LOG_ERROR << e.what(); + } catch (std::exception& e) { + WRAPPER_LOG_ERROR << e.what(); + } + return std::make_pair(nullptr, nullptr); +} + } // namespace engine } // namespace milvus diff --git a/cpp/src/wrapper/VecImpl.h b/cpp/src/wrapper/VecImpl.h index 1f5ca296bb..84b2f11564 100644 --- a/cpp/src/wrapper/VecImpl.h +++ b/cpp/src/wrapper/VecImpl.h @@ -105,6 +105,8 @@ class IVFHybridIndex : public IVFMixIndex { Status UnsetQuantizer() override; + std::pair CopyToGpuWithQuantizer(const int64_t& device_id, + const Config& cfg) override; VecIndexPtr LoadData(const knowhere::QuantizerPtr& q, const Config& conf) override; diff --git a/cpp/src/wrapper/VecIndex.h b/cpp/src/wrapper/VecIndex.h index 55981ef528..e52c26f0bf 100644 --- a/cpp/src/wrapper/VecIndex.h +++ b/cpp/src/wrapper/VecIndex.h @@ -117,6 +117,11 @@ class VecIndex : public cache::DataObj { UnsetQuantizer() { return Status::OK(); } + + virtual std::pair + CopyToGpuWithQuantizer(const int64_t& device_id, const Config& cfg = Config()) { + return std::make_pair(nullptr, nullptr); + } //////////////// private: int64_t size_ = 0;