diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 862c1026d2..9c411deba1 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -22,10 +22,7 @@ #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" - #include "knowhere/common/Config.h" -#include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "scheduler/Utils.h" #include "server/Config.h" #include "wrapper/ConfAdapter.h" @@ -249,6 +246,56 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { +#if 1 + const std::string key = location_ + ".quantizer"; + std::vector gpus = scheduler::get_gpu_pool(); + + const int64_t NOT_FOUND = -1; + int64_t device_id = NOT_FOUND; + + // cache hit + { + knowhere::QuantizerPtr quantizer = nullptr; + + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + if (auto cached_quantizer = cache->GetIndex(key)) { + device_id = gpu; + quantizer = std::static_pointer_cast(cached_quantizer)->Data(); + } + } + + if (device_id != NOT_FOUND) { + // cache hit + auto config = std::make_shared(); + config->gpu_id = device_id; + config->mode = 2; + auto new_index = index_->LoadData(quantizer, config); + index_ = new_index; + } + } + + if (device_id == NOT_FOUND) { + // cache miss + std::vector all_free_mem; + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); + all_free_mem.push_back(free_mem); + } + + auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); + auto best_index = std::distance(all_free_mem.begin(), max_e); + device_id = gpus[best_index]; + + auto pair = index_->CopyToGpuWithQuantizer(device_id); + index_ = pair.first; + + // cache + auto cached_quantizer = std::make_shared(pair.second); + cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); + } +#endif return Status::OK(); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h index f3fceebb88..359af97d90 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h @@ -38,7 +38,7 @@ class FaissBaseIndex { virtual void SealImpl(); - protected: + public: std::shared_ptr index_ = nullptr; }; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index a5e8f90f34..65938e1630 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include + #include #include #include -#include +#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -130,13 +130,12 @@ void GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { std::lock_guard lk(mutex_); - // TODO(linxj): gpu index support GenParams if (auto device_index = std::dynamic_pointer_cast(index_)) { auto search_cfg = std::dynamic_pointer_cast(cfg); - device_index->setNumProbes(search_cfg->nprobe); + device_index->nprobe = search_cfg->nprobe; +// assert(device_index->getNumProbes() == search_cfg->nprobe); { - // TODO(linxj): allocate gpu mem ResScope rs(res_, gpu_id_); device_index->search(n, (float*)data, k, distances, labels); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp index 213141b3ac..9ba8dd0456 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp @@ -16,8 +16,10 @@ // under the License. #include -#include #include +#include +#include + #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp index 5e1f5226f2..fff27cd7db 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include +#include +#include + #include -#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -71,13 +72,4 @@ GPUIVFSQ::CopyGpuToCpu(const Config& config) { return std::make_shared(new_index); } -void -GPUIVFSQ::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { -#ifdef CUSTOMIZATION - GPUIVF::search_impl(n, data, k, distances, labels, cfg); -#else - IVF::search_impl(n, data, k, distances, labels, cfg); -#endif -} - } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h index 7332bce691..ed8013d77f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h @@ -38,10 +38,6 @@ class GPUIVFSQ : public GPUIVF { VectorIndexPtr CopyGpuToCpu(const Config& config) override; - - protected: - void - search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override; }; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 2371591b5c..643bb16076 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include #include #include -#include #include +#include +#include + #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 0c4856f2b6..02708ff5d7 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -15,15 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include #include #include #include #include -#include -#include -#include #include #include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index ef9982fa30..e064b6f08c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -30,7 +30,7 @@ namespace knowhere { using Graph = std::vector>; -class IVF : public VectorIndex, protected FaissBaseIndex { +class IVF : public VectorIndex, public FaissBaseIndex { public: IVF() : FaissBaseIndex(nullptr) { } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index 063dc63550..80b4c78883 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include +#include +#include #include #include "knowhere/adapter/VectorAdapter.h" @@ -56,14 +57,7 @@ IVFSQ::CopyCpuToGpu(const int64_t& device_id, const Config& config) { if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); -#ifdef CUSTOMIZATION - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get(), &option); -#else auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get()); -#endif std::shared_ptr device_index; device_index.reset(gpu_index); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index fe5bf0990a..af67722266 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -17,19 +17,25 @@ // under the License. #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" -#include -#include "faiss/AutoTune.h" -#include "faiss/gpu/GpuAutoTune.h" -#include "faiss/gpu/GpuIndexIVF.h" #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" +#include + +#include +#include +#include + namespace knowhere { #ifdef CUSTOMIZATION +//std::mutex g_mutex; + IndexModelPtr IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { +// std::lock_guard lk(g_mutex); + auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { build_cfg->CheckValid(); // throw exception @@ -63,23 +69,25 @@ IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { VectorIndexPtr IVFSQHybrid::CopyGpuToCpu(const Config& config) { + if (gpu_mode == 0) { + return std::make_shared(index_); + } std::lock_guard lk(mutex_); - if (auto device_idx = std::dynamic_pointer_cast(index_)) { faiss::Index* device_index = index_.get(); faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); std::shared_ptr new_index; new_index.reset(host_index); return std::make_shared(new_index); - } else { - // TODO(linxj): why? jinhai - return std::make_shared(index_); - } } VectorIndexPtr IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { + if (gpu_mode != 0) { + KNOWHERE_THROW_MSG("Not a GpuIndex Type"); + } + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; @@ -105,16 +113,26 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { FaissBaseIndex::LoadImpl(index_binary); // load on cpu auto* ivf_index = dynamic_cast(index_.get()); ivf_index->backup_quantizer(); + gpu_mode = 0; } void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { +// std::lock_guard lk(g_mutex); +// static int64_t search_count; +// ++search_count; + if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); - } else if (gpu_mode == 1) { - ResScope rs(res_, gpu_id_); - IVF::search_impl(n, data, k, distances, labels, cfg); +// index_->search(n, (float*)data, k, distances, labels); + } else if (gpu_mode == 1) { // hybrid + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(quantizer_gpu_id_)) { + ResScope rs(res, quantizer_gpu_id_, true); + IVF::search_impl(n, data, k, distances, labels, cfg); + } else { + KNOWHERE_THROW_MSG("Hybrid Search Error, can't get gpu: " + std::to_string(quantizer_gpu_id_) + "resource"); + } } else if (gpu_mode == 0) { IVF::search_impl(n, data, k, distances, labels, cfg); } @@ -122,16 +140,18 @@ IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distanc QuantizerPtr IVFSQHybrid::LoadQuantizer(const Config& conf) { +// std::lock_guard lk(g_mutex); + auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { if (quantizer_conf->mode != 1) { KNOWHERE_THROW_MSG("mode only support 1 in this func"); } } - gpu_id_ = quantizer_conf->gpu_id; + auto gpu_id = quantizer_conf->gpu_id; - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { - ResScope rs(res, gpu_id_, false); + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id)) { + ResScope rs(res, gpu_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; @@ -148,16 +168,19 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { auto& q_ptr = index_composition->quantizer; q->size = q_ptr->d * q_ptr->getNumVecs() * sizeof(float); q->quantizer = q_ptr; + q->gpu_id = gpu_id; res_ = res; gpu_mode = 1; return q; } else { - KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); + KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id) + "resource"); } } void IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { +// std::lock_guard lk(g_mutex); + auto ivf_quantizer = std::dynamic_pointer_cast(q); if (ivf_quantizer == nullptr) { KNOWHERE_THROW_MSG("Quantizer type error"); @@ -170,20 +193,27 @@ IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { // delete ivf_index->quantizer; ivf_index->quantizer = ivf_quantizer->quantizer; } + quantizer_gpu_id_ = ivf_quantizer->gpu_id; + gpu_mode = 1; } void IVFSQHybrid::UnsetQuantizer() { +// std::lock_guard lk(g_mutex); + auto* ivf_index = dynamic_cast(index_.get()); if (ivf_index == nullptr) { KNOWHERE_THROW_MSG("Index type error"); } ivf_index->quantizer = nullptr; + quantizer_gpu_id_ = -1; } VectorIndexPtr IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { +// std::lock_guard lk(g_mutex); + auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { if (quantizer_conf->mode != 2) { @@ -192,13 +222,11 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { } else { KNOWHERE_THROW_MSG("conf error"); } - // if (quantizer_conf->gpu_id != gpu_id_) { - // KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card"); - // } - gpu_id_ = quantizer_conf->gpu_id; - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { - ResScope rs(res, gpu_id_, false); + auto gpu_id = quantizer_conf->gpu_id; + + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id)) { + ResScope rs(res, gpu_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; @@ -211,18 +239,20 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { index_composition->quantizer = ivf_quantizer->quantizer; index_composition->mode = quantizer_conf->mode; // only 2 - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option); + auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id, index_composition, &option); std::shared_ptr new_idx; new_idx.reset(gpu_index); - auto sq_idx = std::make_shared(new_idx, gpu_id_, res); + auto sq_idx = std::make_shared(new_idx, gpu_id, res); return sq_idx; } else { - KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); + KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id) + "resource"); } } std::pair IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) { +// std::lock_guard lk(g_mutex); + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; @@ -242,12 +272,29 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c auto q = std::make_shared(); q->quantizer = index_composition.quantizer; q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float); + q->gpu_id = device_id; return std::make_pair(new_idx, q); } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } +void +IVFSQHybrid::set_index_model(IndexModelPtr model) { + std::lock_guard lk(mutex_); + + auto host_index = std::static_pointer_cast(model); + if (auto gpures = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { + ResScope rs(gpures, gpu_id_, false); + auto device_index = faiss::gpu::index_cpu_to_gpu(gpures->faiss_res.get(), gpu_id_, host_index->index_.get()); + index_.reset(device_index); + res_ = gpures; + gpu_mode = 2; + } else { + KNOWHERE_THROW_MSG("load index model error, can't get gpu_resource"); + } +} + FaissIVFQuantizer::~FaissIVFQuantizer() { if (quantizer != nullptr) { delete quantizer; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h index f54c61c20f..87cc22931f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h @@ -18,6 +18,8 @@ #pragma once #include +#include + #include #include @@ -29,6 +31,7 @@ namespace knowhere { #ifdef CUSTOMIZATION struct FaissIVFQuantizer : public Quantizer { faiss::gpu::GpuIndexFlat* quantizer = nullptr; + int64_t gpu_id; ~FaissIVFQuantizer() override; }; @@ -52,6 +55,9 @@ class IVFSQHybrid : public GPUIVFSQ { } public: + void + set_index_model(IndexModelPtr model) override; + QuantizerPtr LoadQuantizer(const Config& conf); @@ -85,6 +91,7 @@ class IVFSQHybrid : public GPUIVFSQ { protected: int64_t gpu_mode = 0; // 0,1,2 + int64_t quantizer_gpu_id_ = -1; }; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h index 810c4d2ea4..6509458b7b 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h @@ -48,6 +48,7 @@ class VectorIndex : public Index { virtual void Seal() = 0; + // TODO(linxj): Deprecated virtual VectorIndexPtr Clone() = 0; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h index 7cce5bbbac..a7f8f349e1 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h @@ -17,7 +17,7 @@ #pragma once -#include +#include namespace knowhere { diff --git a/core/src/index/unittest/Helper.h b/core/src/index/unittest/Helper.h index d11a484c03..8d4bb0f4ae 100644 --- a/core/src/index/unittest/Helper.h +++ b/core/src/index/unittest/Helper.h @@ -26,7 +26,7 @@ #include "knowhere/index/vector_index/IndexIVFSQ.h" #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" -constexpr int DEVICEID = 0; +int DEVICEID = 0; constexpr int64_t DIM = 128; constexpr int64_t NB = 10000; constexpr int64_t NQ = 10; diff --git a/core/src/index/unittest/test_customized_index.cpp b/core/src/index/unittest/test_customized_index.cpp index 1e0b1d932d..f9b48b8b67 100644 --- a/core/src/index/unittest/test_customized_index.cpp +++ b/core/src/index/unittest/test_customized_index.cpp @@ -16,17 +16,23 @@ // under the License. #include +#include #include "unittest/Helper.h" #include "unittest/utils.h" +#include "knowhere/common/Timer.h" + class SingleIndexTest : public DataGen, public TestGpuIndexBase { protected: void SetUp() override { TestGpuIndexBase::SetUp(); - Generate(DIM, NB, NQ); - k = K; + nb = 1000000; + nq = 1000; + dim = DIM; + Generate(dim, nb, nq); + k = 1000; } void @@ -119,4 +125,114 @@ TEST_F(SingleIndexTest, IVFSQHybrid) { } } +//TEST_F(SingleIndexTest, thread_safe) { +// assert(!xb.empty()); +// +// index_type = "IVFSQHybrid"; +// index_ = IndexFactory(index_type); +// auto base = ParamGenerator::GetInstance().Gen(ParameterType::ivfsq); +// auto conf = std::dynamic_pointer_cast(base); +// conf->nlist = 16384; +// conf->k = k; +// conf->nprobe = 10; +// conf->d = dim; +// auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); +// index_->set_preprocessor(preprocessor); +// +// auto model = index_->Train(base_dataset, conf); +// index_->set_index_model(model); +// index_->Add(base_dataset, conf); +// EXPECT_EQ(index_->Count(), nb); +// EXPECT_EQ(index_->Dimension(), dim); +// +// auto binaryset = index_->Serialize(); +// +// +// +// auto cpu_idx = std::make_shared(DEVICEID); +// cpu_idx->Load(binaryset); +// auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICEID, conf); +// auto quantizer = pair.second; +// +// auto quantizer_conf = std::make_shared(); +// quantizer_conf->mode = 2; // only copy data +// quantizer_conf->gpu_id = DEVICEID; +// +// auto CopyAllToGpu = [&](int64_t search_count, bool do_search = false) { +// for (int i = 0; i < search_count; ++i) { +// auto gpu_idx = cpu_idx->CopyCpuToGpu(DEVICEID, conf); +// if (do_search) { +// auto result = gpu_idx->Search(query_dataset, conf); +// AssertAnns(result, nq, conf->k); +// } +// } +// }; +// +// auto hybrid_qt_idx = std::make_shared(DEVICEID); +// hybrid_qt_idx->Load(binaryset); +// auto SetQuantizerDoSearch = [&](int64_t search_count) { +// for (int i = 0; i < search_count; ++i) { +// hybrid_qt_idx->SetQuantizer(quantizer); +// auto result = hybrid_qt_idx->Search(query_dataset, conf); +// AssertAnns(result, nq, conf->k); +// // PrintResult(result, nq, k); +// hybrid_qt_idx->UnsetQuantizer(); +// } +// }; +// +// auto hybrid_data_idx = std::make_shared(DEVICEID); +// hybrid_data_idx->Load(binaryset); +// auto LoadDataDoSearch = [&](int64_t search_count, bool do_search = false) { +// for (int i = 0; i < search_count; ++i) { +// auto hybrid_idx = hybrid_data_idx->LoadData(quantizer, quantizer_conf); +// if (do_search) { +// auto result = hybrid_idx->Search(query_dataset, conf); +//// AssertAnns(result, nq, conf->k); +// } +// } +// }; +// +// knowhere::TimeRecorder tc(""); +// CopyAllToGpu(2000/2, false); +// tc.RecordSection("CopyAllToGpu witout search"); +// CopyAllToGpu(400/2, true); +// tc.RecordSection("CopyAllToGpu with search"); +// SetQuantizerDoSearch(6); +// tc.RecordSection("SetQuantizer with search"); +// LoadDataDoSearch(2000/2, false); +// tc.RecordSection("LoadData without search"); +// LoadDataDoSearch(400/2, true); +// tc.RecordSection("LoadData with search"); +// +// { +// std::thread t1(CopyAllToGpu, 2000, false); +// std::thread t2(CopyAllToGpu, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(SetQuantizerDoSearch, 12); +// std::thread t2(CopyAllToGpu, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(SetQuantizerDoSearch, 12); +// std::thread t2(LoadDataDoSearch, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(LoadDataDoSearch, 2000, false); +// std::thread t2(LoadDataDoSearch, 400, true); +// t1.join(); +// t2.join(); +// } +// +//} + + #endif diff --git a/core/src/index/unittest/test_ivf.cpp b/core/src/index/unittest/test_ivf.cpp index fae27b0dd3..3fd3e16d0e 100644 --- a/core/src/index/unittest/test_ivf.cpp +++ b/core/src/index/unittest/test_ivf.cpp @@ -20,19 +20,12 @@ #include #include -#include -#include #include #include "knowhere/common/Exception.h" #include "knowhere/common/Timer.h" #include "knowhere/index/vector_index/IndexGPUIVF.h" -#include "knowhere/index/vector_index/IndexGPUIVFPQ.h" -#include "knowhere/index/vector_index/IndexGPUIVFSQ.h" #include "knowhere/index/vector_index/IndexIVF.h" -#include "knowhere/index/vector_index/IndexIVFPQ.h" -#include "knowhere/index/vector_index/IndexIVFSQ.h" -#include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "knowhere/index/vector_index/helpers/Cloner.h" #include "unittest/Helper.h" @@ -51,6 +44,9 @@ class IVFTest : public DataGen, public TestWithParam<::std::tuple gpu_idx{"GPUIVFSQ"}; - auto finder = std::find(gpu_idx.cbegin(), gpu_idx.cend(), index_type); - if (finder != gpu_idx.cend()) { - return knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, knowhere::Config()); - } - return index_; - } - protected: std::string index_type; knowhere::Config conf; @@ -100,8 +86,7 @@ TEST_P(IVFTest, ivf_basic) { EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); // PrintResult(result, nq, k); } @@ -134,8 +119,7 @@ TEST_P(IVFTest, ivf_serialize) { index_->set_index_model(model); index_->Add(base_dataset, conf); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); } @@ -159,8 +143,7 @@ TEST_P(IVFTest, ivf_serialize) { index_->Load(binaryset); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); } } @@ -176,8 +159,7 @@ TEST_P(IVFTest, clone_test) { index_->Add(base_dataset, conf); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); // PrintResult(result, nq, k); @@ -210,12 +192,6 @@ TEST_P(IVFTest, clone_test) { // } // } - { - if (index_type == "IVFSQHybrid") { - return; - } - } - { // copy from gpu to cpu std::vector support_idx_vec{"GPUIVF", "GPUIVFSQ", "IVFSQHybrid"}; @@ -277,8 +253,7 @@ TEST_P(IVFTest, gpu_seal_test) { index_->Add(base_dataset, conf); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config()); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 60033731ae..b9153d3bc3 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -94,6 +94,7 @@ class OptimizerInst { std::lock_guard lock(mutex_); if (instance == nullptr) { std::vector pass_list; + pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); } diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index 8368a90000..0d5a81a7b6 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -26,48 +26,48 @@ namespace milvus { namespace scheduler { -// bool -// LargeSQ8HPass::Run(const TaskPtr& task) { -// if (task->Type() != TaskType::SearchTask) { -// return false; -// } -// -// auto search_task = std::static_pointer_cast(task); -// if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8H) { -// return false; -// } -// -// auto search_job = std::static_pointer_cast(search_task->job_.lock()); -// -// // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu -// if (search_job->nq() < 100) { -// return false; -// } -// -// std::vector gpus = scheduler::get_gpu_pool(); -// std::vector all_free_mem; -// for (auto& gpu : gpus) { -// auto cache = cache::GpuCacheMgr::GetInstance(gpu); -// auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); -// all_free_mem.push_back(free_mem); -// } -// -// auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); -// auto best_index = std::distance(all_free_mem.begin(), max_e); -// auto best_device_id = gpus[best_index]; -// -// ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); -// if (not res_ptr) { -// SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; -// // TODO: throw critical error and exit -// return false; -// } -// -// auto label = std::make_shared(std::weak_ptr(res_ptr)); -// task->label() = label; -// -// return true; -// } + bool + LargeSQ8HPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8H) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + + // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu + if (search_job->nq() < 100) { + return false; + } + + std::vector gpus = scheduler::get_gpu_pool(); + std::vector all_free_mem; + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); + all_free_mem.push_back(free_mem); + } + + auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); + auto best_index = std::distance(all_free_mem.begin(), max_e); + auto best_device_id = gpus[best_index]; + + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + if (not res_ptr) { + SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; + // TODO: throw critical error and exit + return false; + } + + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + + return true; + } } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/LargeSQ8HPass.h index 3335a37cc7..49e658002f 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.h @@ -37,8 +37,8 @@ class LargeSQ8HPass : public Pass { LargeSQ8HPass() = default; public: - // bool - // Run(const TaskPtr& task) override; + bool + Run(const TaskPtr& task) override; }; using LargeSQ8HPassPtr = std::shared_ptr; diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index a5b892ad47..34c8d38faf 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include +#include #include #include #include diff --git a/core/src/wrapper/VecIndex.h b/core/src/wrapper/VecIndex.h index 05da9ccc03..1729d583ae 100644 --- a/core/src/wrapper/VecIndex.h +++ b/core/src/wrapper/VecIndex.h @@ -70,6 +70,7 @@ class VecIndex : public cache::DataObj { virtual VecIndexPtr CopyToCpu(const Config& cfg = Config()) = 0; + // TODO(linxj): Deprecated virtual VecIndexPtr Clone() = 0; diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index f112fc7e65..2f8fd6fafe 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -74,7 +74,7 @@ INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_CPU, "Default", DIM, NB, 10, 10), -// std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), + std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_MIX, "Default", DIM, NB, 10, 10), // std::make_tuple(IndexType::NSG_MIX, "Default", 128, 250000, 10, 10), // std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 128, 250000, 10, 10),