mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-30 15:35:33 +08:00
SQ8H in GPU part3
Former-commit-id: bd95d08bede45255fa10f4d8fdeb8674e435860b
This commit is contained in:
parent
e1e9ffeb46
commit
7338a044f3
@ -115,6 +115,20 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
|
||||
|
||||
search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config);
|
||||
|
||||
// std::stringstream ss_res_id, ss_res_dist;
|
||||
// for (int i = 0; i < 10; ++i) {
|
||||
// printf("%llu", res_ids[i]);
|
||||
// printf("\n");
|
||||
// printf("%.6f", res_dis[i]);
|
||||
// printf("\n");
|
||||
// ss_res_id << res_ids[i] << " ";
|
||||
// ss_res_dist << res_dis[i] << " ";
|
||||
// }
|
||||
// std::cout << std::endl << "after search: " << std::endl;
|
||||
// std::cout << ss_res_id.str() << std::endl;
|
||||
// std::cout << ss_res_dist.str() << std::endl << std::endl;
|
||||
|
||||
|
||||
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
|
||||
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
|
||||
|
||||
|
||||
@ -79,20 +79,8 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) {
|
||||
VectorIndexPtr
|
||||
IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
|
||||
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
|
||||
ResScope rs(res, device_id, false);
|
||||
faiss::gpu::GpuClonerOptions option;
|
||||
option.allInGpu = true;
|
||||
|
||||
faiss::IndexComposition index_composition;
|
||||
index_composition.index = index_.get();
|
||||
index_composition.quantizer = nullptr;
|
||||
index_composition.mode = 0; // copy all
|
||||
|
||||
auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);
|
||||
|
||||
std::shared_ptr<faiss::Index> device_index;
|
||||
device_index.reset(gpu_index);
|
||||
return std::make_shared<IVFSQHybrid>(device_index, device_id, res);
|
||||
auto p = CopyCpuToGpuWithQuantizer(device_id, config);
|
||||
return p.first;
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
}
|
||||
@ -188,9 +176,10 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
|
||||
KNOWHERE_THROW_MSG("mode only support 2 in this func");
|
||||
}
|
||||
}
|
||||
if (quantizer_conf->gpu_id != gpu_id_) {
|
||||
KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
|
||||
}
|
||||
// if (quantizer_conf->gpu_id != gpu_id_) {
|
||||
// KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
|
||||
// }
|
||||
gpu_id_ = quantizer_conf->gpu_id;
|
||||
|
||||
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
|
||||
ResScope rs(res, gpu_id_, false);
|
||||
@ -216,6 +205,34 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<VectorIndexPtr, QuantizerPtr>
|
||||
IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) {
|
||||
if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
|
||||
|
||||
ResScope rs(res, device_id, false);
|
||||
faiss::gpu::GpuClonerOptions option;
|
||||
option.allInGpu = true;
|
||||
|
||||
faiss::IndexComposition index_composition;
|
||||
index_composition.index = index_.get();
|
||||
index_composition.quantizer = nullptr;
|
||||
index_composition.mode = 0; // copy all
|
||||
|
||||
auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);
|
||||
|
||||
std::shared_ptr<faiss::Index> device_index;
|
||||
device_index.reset(gpu_index);
|
||||
auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);
|
||||
|
||||
auto q = std::make_shared<FaissIVFQuantizer>();
|
||||
q->quantizer = index_composition.quantizer;
|
||||
q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
|
||||
return std::make_pair(new_idx, q);
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
}
|
||||
}
|
||||
|
||||
FaissIVFQuantizer::~FaissIVFQuantizer() {
|
||||
if (quantizer != nullptr) {
|
||||
delete quantizer;
|
||||
|
||||
@ -63,6 +63,9 @@ class IVFSQHybrid : public GPUIVFSQ {
|
||||
VectorIndexPtr
|
||||
LoadData(const knowhere::QuantizerPtr& q, const Config& conf);
|
||||
|
||||
std::pair<VectorIndexPtr, QuantizerPtr>
|
||||
CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config);
|
||||
|
||||
IndexModelPtr
|
||||
Train(const DatasetPtr& dataset, const Config& config) override;
|
||||
|
||||
|
||||
@ -256,27 +256,16 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
|
||||
conf->gpu_id = device_id;
|
||||
|
||||
if (quantizer) {
|
||||
std::cout << "cache hit" << std::endl;
|
||||
// cache hit
|
||||
conf->mode = 2;
|
||||
auto new_index = index_->LoadData(quantizer->Data(), conf);
|
||||
index_ = new_index;
|
||||
} else {
|
||||
std::cout << "cache miss" << std::endl;
|
||||
// cache hit
|
||||
// cache miss
|
||||
if (index_ == nullptr) {
|
||||
ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to copy to gpu";
|
||||
return Status(DB_ERROR, "index is null");
|
||||
}
|
||||
conf->mode = 1;
|
||||
auto q = index_->LoadQuantizer(conf);
|
||||
conf->mode = 2;
|
||||
auto new_index = index_->LoadData(q, conf);
|
||||
index_ = new_index;
|
||||
auto pair = index_->CopyToGpuWithQuantizer(device_id);
|
||||
index_ = pair.first;
|
||||
|
||||
// cache
|
||||
auto cached_quantizer = std::make_shared<CachedQuantizer>(q);
|
||||
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
|
||||
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
|
||||
}
|
||||
return Status::OK();
|
||||
|
||||
@ -332,5 +332,24 @@ IVFHybridIndex::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::pair<VecIndexPtr, knowhere::QuantizerPtr>
|
||||
IVFHybridIndex::CopyToGpuWithQuantizer(const int64_t& device_id, const Config& cfg) {
|
||||
try {
|
||||
// TODO(linxj): Hardcode here
|
||||
if (auto hybrid_idx = std::dynamic_pointer_cast<knowhere::IVFSQHybrid>(index_)) {
|
||||
auto pair = hybrid_idx->CopyCpuToGpuWithQuantizer(device_id, cfg);
|
||||
auto new_idx = std::make_shared<IVFHybridIndex>(pair.first, type);
|
||||
return std::make_pair(new_idx, pair.second);
|
||||
} else {
|
||||
WRAPPER_LOG_ERROR << "Hybrid mode not support for index type: " << int(type);
|
||||
}
|
||||
} catch (knowhere::KnowhereException& e) {
|
||||
WRAPPER_LOG_ERROR << e.what();
|
||||
} catch (std::exception& e) {
|
||||
WRAPPER_LOG_ERROR << e.what();
|
||||
}
|
||||
return std::make_pair(nullptr, nullptr);
|
||||
}
|
||||
|
||||
} // namespace engine
|
||||
} // namespace milvus
|
||||
|
||||
@ -105,6 +105,8 @@ class IVFHybridIndex : public IVFMixIndex {
|
||||
|
||||
Status
|
||||
UnsetQuantizer() override;
|
||||
std::pair<VecIndexPtr, knowhere::QuantizerPtr> CopyToGpuWithQuantizer(const int64_t& device_id,
|
||||
const Config& cfg) override;
|
||||
|
||||
VecIndexPtr
|
||||
LoadData(const knowhere::QuantizerPtr& q, const Config& conf) override;
|
||||
|
||||
@ -117,6 +117,11 @@ class VecIndex : public cache::DataObj {
|
||||
UnsetQuantizer() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual std::pair<VecIndexPtr, knowhere::QuantizerPtr>
|
||||
CopyToGpuWithQuantizer(const int64_t& device_id, const Config& cfg = Config()) {
|
||||
return std::make_pair(nullptr, nullptr);
|
||||
}
|
||||
////////////////
|
||||
private:
|
||||
int64_t size_ = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user