From 7338a044f36a8db69be154780cd53626ea6a50a8 Mon Sep 17 00:00:00 2001
From: wxyu <xy.wang@zilliz.com>
Date: Sun, 13 Oct 2019 16:46:55 +0800
Subject: [PATCH] SQ8H in GPU part3

Former-commit-id: bd95d08bede45255fa10f4d8fdeb8674e435860b
---
 .../knowhere/index/vector_index/IndexIVF.cpp  | 14 +++++
 .../index/vector_index/IndexIVFSQHybrid.cpp   | 51 ++++++++++++-------
 .../index/vector_index/IndexIVFSQHybrid.h     |  3 ++
 cpp/src/db/engine/ExecutionEngineImpl.cpp     | 17 ++-----
 cpp/src/wrapper/VecImpl.cpp                   | 19 +++++++
 cpp/src/wrapper/VecImpl.h                     |  2 +
 cpp/src/wrapper/VecIndex.h                    |  5 ++
 7 files changed, 80 insertions(+), 31 deletions(-)
diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp
index 510ab46bd6..99dd2e2926 100644
--- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp
@@ -115,6 +115,20 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
 
     search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config);
 
+//    std::stringstream ss_res_id, ss_res_dist;
+//    for (int i = 0; i < 10; ++i) {
+//        printf("%llu", res_ids[i]);
+//        printf("\n");
+//        printf("%.6f", res_dis[i]);
+//        printf("\n");
+//        ss_res_id << res_ids[i] << " ";
+//        ss_res_dist << res_dis[i] << " ";
+//    }
+//    std::cout << std::endl << "after search: " << std::endl;
+//    std::cout << ss_res_id.str() << std::endl;
+//    std::cout << ss_res_dist.str() << std::endl << std::endl;
+
+
     auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
     auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
 
diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
index c6c9291388..8176ee0b49 100644
--- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
@@ -79,20 +79,8 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) {
 VectorIndexPtr
 IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
     if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
-        ResScope rs(res, device_id, false);
-        faiss::gpu::GpuClonerOptions option;
-        option.allInGpu = true;
-
-        faiss::IndexComposition index_composition;
-        index_composition.index = index_.get();
-        index_composition.quantizer = nullptr;
-        index_composition.mode = 0;  // copy all
-
-        auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);
-
-        std::shared_ptr<faiss::Index> device_index;
-        device_index.reset(gpu_index);
-        return std::make_shared<IVFSQHybrid>(device_index, device_id, res);
+        auto p = CopyCpuToGpuWithQuantizer(device_id, config);
+        return p.first;
     } else {
         KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
     }
@@ -188,9 +176,10 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
             KNOWHERE_THROW_MSG("mode only support 2 in this func");
         }
     }
-    if (quantizer_conf->gpu_id != gpu_id_) {
-        KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
-    }
+//    if (quantizer_conf->gpu_id != gpu_id_) {
+//        KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
+//    }
+    gpu_id_ = quantizer_conf->gpu_id;
 
     if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
         ResScope rs(res, gpu_id_, false);
@@ -216,6 +205,34 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
     }
 }
 
+std::pair<VectorIndexPtr, QuantizerPtr>
+IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) {
+    if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
+
+        ResScope rs(res, device_id, false);
+        faiss::gpu::GpuClonerOptions option;
+        option.allInGpu = true;
+
+        faiss::IndexComposition index_composition;
+        index_composition.index = index_.get();
+        index_composition.quantizer = nullptr;
+        index_composition.mode = 0;  // copy all
+
+        auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);
+
+        std::shared_ptr<faiss::Index> device_index;
+        device_index.reset(gpu_index);
+                auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);
+
+        auto q = std::make_shared<FaissIVFQuantizer>();
+        q->quantizer = index_composition.quantizer;
+        q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
+        return std::make_pair(new_idx, q);
+    } else {
+        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
+    }
+}
+
 FaissIVFQuantizer::~FaissIVFQuantizer() {
     if (quantizer != nullptr) {
         delete quantizer;
diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h
index d0c58baaf3..cc59940028 100644
--- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h
@@ -63,6 +63,9 @@ class IVFSQHybrid : public GPUIVFSQ {
     VectorIndexPtr
     LoadData(const knowhere::QuantizerPtr& q, const Config& conf);
 
+    std::pair<VectorIndexPtr, QuantizerPtr>
+    CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config);
+
     IndexModelPtr
     Train(const DatasetPtr& dataset, const Config& config) override;
 
diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp
index c70a5c3b21..5b2c8eb2f3 100644
--- a/cpp/src/db/engine/ExecutionEngineImpl.cpp
+++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp
@@ -256,27 +256,16 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
         conf->gpu_id = device_id;
 
         if (quantizer) {
-            std::cout << "cache hit" << std::endl;
             // cache hit
             conf->mode = 2;
             auto new_index = index_->LoadData(quantizer->Data(), conf);
             index_ = new_index;
         } else {
-            std::cout << "cache miss" << std::endl;
-            // cache hit
-            // cache miss
-            if (index_ == nullptr) {
-                ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to copy to gpu";
-                return Status(DB_ERROR, "index is null");
-            }
-            conf->mode = 1;
-            auto q = index_->LoadQuantizer(conf);
-            conf->mode = 2;
-            auto new_index = index_->LoadData(q, conf);
-            index_ = new_index;
+            auto pair = index_->CopyToGpuWithQuantizer(device_id);
+            index_ = pair.first;
 
             // cache
-            auto cached_quantizer = std::make_shared<CachedQuantizer>(q);
+            auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
             cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
         }
         return Status::OK();
diff --git a/cpp/src/wrapper/VecImpl.cpp b/cpp/src/wrapper/VecImpl.cpp
index 3ff79690aa..c97900f839 100644
--- a/cpp/src/wrapper/VecImpl.cpp
+++ b/cpp/src/wrapper/VecImpl.cpp
@@ -332,5 +332,24 @@ IVFHybridIndex::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
     return nullptr;
 }
 
+std::pair<VecIndexPtr, knowhere::QuantizerPtr>
+IVFHybridIndex::CopyToGpuWithQuantizer(const int64_t& device_id, const Config& cfg) {
+    try {
+        // TODO(linxj): Hardcode here
+        if (auto hybrid_idx = std::dynamic_pointer_cast<knowhere::IVFSQHybrid>(index_)) {
+            auto pair = hybrid_idx->CopyCpuToGpuWithQuantizer(device_id, cfg);
+            auto new_idx = std::make_shared<IVFHybridIndex>(pair.first, type);
+            return std::make_pair(new_idx, pair.second);
+        } else {
+            WRAPPER_LOG_ERROR << "Hybrid mode not support for index type: " << int(type);
+        }
+    } catch (knowhere::KnowhereException& e) {
+        WRAPPER_LOG_ERROR << e.what();
+    } catch (std::exception& e) {
+        WRAPPER_LOG_ERROR << e.what();
+    }
+    return std::make_pair(nullptr, nullptr);
+}
+
 }  // namespace engine
 }  // namespace milvus
diff --git a/cpp/src/wrapper/VecImpl.h b/cpp/src/wrapper/VecImpl.h
index 1f5ca296bb..84b2f11564 100644
--- a/cpp/src/wrapper/VecImpl.h
+++ b/cpp/src/wrapper/VecImpl.h
@@ -105,6 +105,8 @@ class IVFHybridIndex : public IVFMixIndex {
 
     Status
     UnsetQuantizer() override;
+    std::pair<VecIndexPtr, knowhere::QuantizerPtr> CopyToGpuWithQuantizer(const int64_t& device_id,
+                                                                          const Config& cfg) override;
 
     VecIndexPtr
     LoadData(const knowhere::QuantizerPtr& q, const Config& conf) override;
diff --git a/cpp/src/wrapper/VecIndex.h b/cpp/src/wrapper/VecIndex.h
index 55981ef528..e52c26f0bf 100644
--- a/cpp/src/wrapper/VecIndex.h
+++ b/cpp/src/wrapper/VecIndex.h
@@ -117,6 +117,11 @@ class VecIndex : public cache::DataObj {
     UnsetQuantizer() {
         return Status::OK();
     }
+
+    virtual std::pair<VecIndexPtr, knowhere::QuantizerPtr>
+    CopyToGpuWithQuantizer(const int64_t& device_id, const Config& cfg = Config()) {
+        return std::make_pair(nullptr, nullptr);
+    }
     ////////////////
  private:
     int64_t size_ = 0;