From 606ef0b5486630438ea8d2cbf14f2ddff4ae8bcf Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 17:24:53 +0800 Subject: [PATCH 1/8] new engine Former-commit-id: 178888a85b0cc0394d429dfeac4300971f65079f --- cpp/src/db/EngineFactory.cpp | 23 +++++- cpp/src/db/ExecutionEngine.h | 4 +- cpp/src/db/ExecutionEngineImpl.cpp | 116 +++++++++++++++++++++++++++++ cpp/src/db/ExecutionEngineImpl.h | 68 +++++++++++++++++ cpp/src/server/RequestTask.cpp | 4 +- cpp/unittest/db/misc_test.cpp | 2 +- 6 files changed, 211 insertions(+), 6 deletions(-) create mode 100644 cpp/src/db/ExecutionEngineImpl.cpp create mode 100644 cpp/src/db/ExecutionEngineImpl.h diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp index bacce70ce4..56a6b4d1d2 100644 --- a/cpp/src/db/EngineFactory.cpp +++ b/cpp/src/db/EngineFactory.cpp @@ -5,13 +5,14 @@ ******************************************************************************/ #include "EngineFactory.h" #include "FaissExecutionEngine.h" +#include "ExecutionEngineImpl.h" #include "Log.h" - namespace zilliz { namespace milvus { namespace engine { +#if 0 ExecutionEnginePtr EngineFactory::Build(uint16_t dimension, const std::string &location, @@ -26,7 +27,7 @@ EngineFactory::Build(uint16_t dimension, break; } - case EngineType::FAISS_IVFFLAT: { + case EngineType::FAISS_IVFFLAT_GPU: { execution_engine_ptr = ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IVF", "IDMap,Flat")); break; @@ -41,6 +42,24 @@ EngineFactory::Build(uint16_t dimension, execution_engine_ptr->Init(); return execution_engine_ptr; } +#else +ExecutionEnginePtr +EngineFactory::Build(uint16_t dimension, + const std::string &location, + EngineType type) { + + if(type == EngineType::INVALID) { + ENGINE_LOG_ERROR << "Unsupported engine type"; + return nullptr; + } + + ExecutionEnginePtr execution_engine_ptr = + std::make_shared(dimension, location, type); + + execution_engine_ptr->Init(); + return execution_engine_ptr; +} +#endif } } diff --git a/cpp/src/db/ExecutionEngine.h b/cpp/src/db/ExecutionEngine.h index f8c05f6f9d..a101acd3cd 100644 --- a/cpp/src/db/ExecutionEngine.h +++ b/cpp/src/db/ExecutionEngine.h @@ -17,7 +17,9 @@ namespace engine { enum class EngineType { INVALID = 0, FAISS_IDMAP = 1, - FAISS_IVFFLAT, + FAISS_IVFFLAT_GPU, + FAISS_IVFFLAT_CPU, + SPTAG_KDT_RNT_CPU, }; class ExecutionEngine { diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp new file mode 100644 index 0000000000..32b7826430 --- /dev/null +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -0,0 +1,116 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#include "ExecutionEngineImpl.h" +#include "Log.h" + +#include "wrapper/knowhere/vec_impl.h" +#include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" + +namespace zilliz { +namespace milvus { +namespace engine { + + +ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, + const std::string& location, + EngineType type) + : location_(location) { + index_ = CreatetVecIndex(type); +} + +vecwise::engine::VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { + std::shared_ptr index; + switch(type) { + case EngineType::FAISS_IDMAP: { + + break; + } + case EngineType::FAISS_IVFFLAT_GPU: { + index = std::make_shared(0); + break; + } + case EngineType::FAISS_IVFFLAT_CPU: { + index = std::make_shared(); + break; + } + case EngineType::SPTAG_KDT_RNT_CPU: { + index = std::make_shared(); + break; + } + default:{ + ENGINE_LOG_ERROR << "Invalid engine type"; + return nullptr; + } + } + + return std::make_shared(index); +} + +Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) { + + return Status::OK(); +} + +size_t ExecutionEngineImpl::Count() const { + return 0; +} + +size_t ExecutionEngineImpl::Size() const { + return 0; +} + +size_t ExecutionEngineImpl::Dimension() const { + return 0; +} + +size_t ExecutionEngineImpl::PhysicalSize() const { + return 0; +} + +Status ExecutionEngineImpl::Serialize() { + return Status::OK(); +} + +Status ExecutionEngineImpl::Load() { + + return Status::OK(); +} + +Status ExecutionEngineImpl::Merge(const std::string& location) { + + return Status::OK(); +} + +ExecutionEnginePtr +ExecutionEngineImpl::BuildIndex(const std::string& location) { + return nullptr; +} + +Status ExecutionEngineImpl::Search(long n, + const float *data, + long k, + float *distances, + long *labels) const { + + return Status::OK(); +} + +Status ExecutionEngineImpl::Cache() { + + return Status::OK(); +} + +Status ExecutionEngineImpl::Init() { + + return Status::OK(); +} + + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h new file mode 100644 index 0000000000..c720f07158 --- /dev/null +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -0,0 +1,68 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include "ExecutionEngine.h" +#include "wrapper/knowhere/vec_index.h" + +#include +#include + +namespace zilliz { +namespace milvus { +namespace engine { + + +class ExecutionEngineImpl : public ExecutionEngine { +public: + + ExecutionEngineImpl(uint16_t dimension, + const std::string& location, + EngineType type); + + Status AddWithIds(long n, const float *xdata, const long *xids) override; + + size_t Count() const override; + + size_t Size() const override; + + size_t Dimension() const override; + + size_t PhysicalSize() const override; + + Status Serialize() override; + + Status Load() override; + + Status Merge(const std::string& location) override; + + Status Search(long n, + const float *data, + long k, + float *distances, + long *labels) const override; + + ExecutionEnginePtr BuildIndex(const std::string&) override; + + Status Cache() override; + + Status Init() override; + +private: + vecwise::engine::VecIndexPtr CreatetVecIndex(EngineType type); + +protected: + vecwise::engine::VecIndexPtr index_; + + std::string location_; + + size_t nprobe_ = 0; +}; + + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 1b91883af5..07a8305d1f 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -29,7 +29,7 @@ namespace { static std::map map_type = { {0, engine::EngineType::INVALID}, {1, engine::EngineType::FAISS_IDMAP}, - {2, engine::EngineType::FAISS_IVFFLAT}, + {2, engine::EngineType::FAISS_IVFFLAT_GPU}, }; if(map_type.find(type) == map_type.end()) { @@ -43,7 +43,7 @@ namespace { static std::map map_type = { {engine::EngineType::INVALID, 0}, {engine::EngineType::FAISS_IDMAP, 1}, - {engine::EngineType::FAISS_IVFFLAT, 2}, + {engine::EngineType::FAISS_IVFFLAT_GPU, 2}, }; if(map_type.find(type) == map_type.end()) { diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp index 4356746fc2..a49c4d5807 100644 --- a/cpp/unittest/db/misc_test.cpp +++ b/cpp/unittest/db/misc_test.cpp @@ -46,7 +46,7 @@ TEST(DBMiscTest, ENGINE_API_TEST) { auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); ASSERT_EQ(engine_ptr, nullptr); - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT); + engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU); ASSERT_NE(engine_ptr, nullptr); engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); From 8edaff9517b50b3bc2fba1b1704f5a347686a88c Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 20:20:30 +0800 Subject: [PATCH 2/8] new engine Former-commit-id: 8f84ec8cbe26733b730528b49ae76c9ea158d359 --- cpp/src/CMakeLists.txt | 23 +++++++++++++++++++++-- cpp/src/sdk/examples/simple/main.cpp | 2 +- cpp/unittest/db/CMakeLists.txt | 13 ++++++++++++- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 6a7fb6835e..cc577e2138 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -161,6 +161,17 @@ set(server_libs metrics ) +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + ) + add_executable(milvus_server ${config_files} ${server_files} @@ -170,9 +181,17 @@ add_executable(milvus_server ) if (ENABLE_LICENSE STREQUAL "ON") - target_link_libraries(milvus_server ${server_libs} license_check ${third_party_libs}) + target_link_libraries(milvus_server + ${server_libs} + license_check + ${third_party_libs} + ${knowhere_libs}) else () - target_link_libraries(milvus_server ${server_libs} ${third_party_libs}) + target_link_libraries(milvus_server + ${server_libs} + ${third_party_libs} + ${knowhere_libs} + ) endif() if (ENABLE_LICENSE STREQUAL "ON") diff --git a/cpp/src/sdk/examples/simple/main.cpp b/cpp/src/sdk/examples/simple/main.cpp index 499b8a9935..3a7880997d 100644 --- a/cpp/src/sdk/examples/simple/main.cpp +++ b/cpp/src/sdk/examples/simple/main.cpp @@ -51,7 +51,7 @@ main(int argc, char *argv[]) { } ClientTest test; - test.Test(address, port); + test.Test("", port); printf("Client stop...\n"); return 0; diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index b2720f7006..3b0fb11b20 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -49,6 +49,17 @@ set(db_libs mysqlpp ) -target_link_libraries(db_test ${db_libs} ${unittest_libs}) +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + ) + +target_link_libraries(db_test ${db_libs} ${unittest_libs} ${knowhere_libs}) install(TARGETS db_test DESTINATION bin) From 57ec2e566903fc14fdb01a16ce0dfdf0745e6396 Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 20:21:38 +0800 Subject: [PATCH 3/8] new engine Former-commit-id: 4718ebf7b53b3ad523bf9318a26d3389ce0c311b --- cpp/src/sdk/examples/simple/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/sdk/examples/simple/main.cpp b/cpp/src/sdk/examples/simple/main.cpp index 3a7880997d..499b8a9935 100644 --- a/cpp/src/sdk/examples/simple/main.cpp +++ b/cpp/src/sdk/examples/simple/main.cpp @@ -51,7 +51,7 @@ main(int argc, char *argv[]) { } ClientTest test; - test.Test("", port); + test.Test(address, port); printf("Client stop...\n"); return 0; From 1410dc8695ac8051a85970d44e00378b588e7cbd Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 8 Jul 2019 09:42:54 +0800 Subject: [PATCH 4/8] fix build error Former-commit-id: b32b8705608b064a0209be5c8012e5a5d58c1b93 --- cpp/unittest/db/CMakeLists.txt | 13 ++++++------- cpp/unittest/metrics/CMakeLists.txt | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 3b0fb11b20..44b09d7b25 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -7,6 +7,7 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) aux_source_directory(./ test_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files) @@ -30,18 +31,14 @@ set(db_test_src ${db_srcs} ${db_scheduler_srcs} ${wrapper_src} + ${knowhere_src} ${require_files} - ${test_srcs}) + ${test_srcs} + ) cuda_add_executable(db_test ${db_test_src}) set(db_libs - knowhere - faiss - openblas - lapack - cudart - cublas sqlite3 boost_system boost_filesystem @@ -58,6 +55,8 @@ set(knowhere_libs openblas lapack tbb + cudart + cublas ) target_link_libraries(db_test ${db_libs} ${unittest_libs} ${knowhere_libs}) diff --git a/cpp/unittest/metrics/CMakeLists.txt b/cpp/unittest/metrics/CMakeLists.txt index be5a542da6..d7ae12aff8 100644 --- a/cpp/unittest/metrics/CMakeLists.txt +++ b/cpp/unittest/metrics/CMakeLists.txt @@ -12,11 +12,12 @@ include_directories(../../src) -aux_source_directory(../../src/db db_srcs) -aux_source_directory(../../src/config config_files) -aux_source_directory(../../src/cache cache_srcs) -aux_source_directory(../../src/wrapper wrapper_src) -aux_source_directory(../../src/metrics metrics_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) +aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/src/metrics metrics_src) aux_source_directory(./ test_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files) @@ -54,18 +55,26 @@ set(count_test_src ${db_srcs} ${db_scheduler_srcs} ${wrapper_src} + ${knowhere_src} ${metrics_src} ${test_srcs} ) - add_executable(metrics_test ${count_test_src} ${require_files} ) -target_link_libraries(metrics_test +set(knowhere_libs knowhere + SPTAGLibStatic + arrow + jemalloc_pic faiss openblas lapack + tbb + ) + +target_link_libraries(metrics_test + ${knowhere_libs} cudart cublas sqlite3 From 0d725b82aa57c072c3cf5d33bd35af1a4932a139 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 8 Jul 2019 10:02:39 +0800 Subject: [PATCH 5/8] update knowhere Former-commit-id: 26cefdfab21d0bf5350b2a44263d6f0e58a69768 --- cpp/thirdparty/knowhere | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 844e600834..2d543bfab6 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 844e600834df1eeafc6c7e5936338ae964bd1d41 +Subproject commit 2d543bfab655398f30113681f348519acac40ab5 From 92ebfe95d0e14411bbc9ccd54f777ac316d12779 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Mon, 8 Jul 2019 19:09:18 +0800 Subject: [PATCH 6/8] update... Former-commit-id: 6366c5292e1483be5009e46bd273fdb00433fea6 --- cpp/src/CMakeLists.txt | 2 +- cpp/src/cache/DataObj.h | 2 +- cpp/src/db/EngineFactory.cpp | 2 +- cpp/src/db/ExecutionEngineImpl.cpp | 127 +++++++++++++++------ cpp/src/db/ExecutionEngineImpl.h | 28 +++-- cpp/src/db/FaissExecutionEngine.cpp | 2 + cpp/src/db/FaissExecutionEngine.h | 2 + cpp/src/wrapper/Index.cpp | 2 + cpp/src/wrapper/Index.h | 27 +++-- cpp/src/wrapper/IndexBuilder.cpp | 4 +- cpp/src/wrapper/IndexBuilder.h | 2 + cpp/src/wrapper/Operand.cpp | 2 + cpp/src/wrapper/Operand.h | 2 + cpp/src/wrapper/knowhere/data_transfer.cpp | 2 +- cpp/src/wrapper/knowhere/data_transfer.h | 2 +- cpp/src/wrapper/knowhere/vec_impl.cpp | 37 ++++-- cpp/src/wrapper/knowhere/vec_impl.h | 15 ++- cpp/src/wrapper/knowhere/vec_index.cpp | 42 ++++--- cpp/src/wrapper/knowhere/vec_index.h | 19 ++- cpp/thirdparty/knowhere | 2 +- 20 files changed, 233 insertions(+), 90 deletions(-) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index cc577e2138..0627b2010a 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -10,7 +10,7 @@ aux_source_directory(config config_files) aux_source_directory(server server_files) aux_source_directory(utils utils_files) aux_source_directory(db db_files) -aux_source_directory(wrapper wrapper_files) +#aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) aux_source_directory(wrapper/knowhere knowhere_files) diff --git a/cpp/src/cache/DataObj.h b/cpp/src/cache/DataObj.h index 1dff04027e..d5e52f7664 100644 --- a/cpp/src/cache/DataObj.h +++ b/cpp/src/cache/DataObj.h @@ -28,7 +28,7 @@ public: return 0; } - return index_->ntotal*(index_->dim*4); + return index_->Count() * index_->Dimension() * sizeof(float); } private: diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp index 56a6b4d1d2..3389c0a07e 100644 --- a/cpp/src/db/EngineFactory.cpp +++ b/cpp/src/db/EngineFactory.cpp @@ -4,7 +4,7 @@ * Proprietary and confidential. ******************************************************************************/ #include "EngineFactory.h" -#include "FaissExecutionEngine.h" +//#include "FaissExecutionEngine.h" #include "ExecutionEngineImpl.h" #include "Log.h" diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 32b7826430..28349695c7 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -3,13 +3,14 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ -#include "ExecutionEngineImpl.h" +#include #include "Log.h" +#include "src/cache/CpuCacheMgr.h" +#include "ExecutionEngineImpl.h" +#include "wrapper/knowhere/vec_index.h" #include "wrapper/knowhere/vec_impl.h" -#include "knowhere/index/vector_index/ivf.h" -#include "knowhere/index/vector_index/gpu_ivf.h" -#include "knowhere/index/vector_index/cpu_kdt_rng.h" + namespace zilliz { namespace milvus { @@ -17,95 +18,151 @@ namespace engine { ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, - const std::string& location, - EngineType type) - : location_(location) { - index_ = CreatetVecIndex(type); + const std::string &location, + EngineType type) + : location_(location), dim(dimension), build_type(type) { + index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); + std::static_pointer_cast(index_)->Build(dimension); } -vecwise::engine::VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { - std::shared_ptr index; - switch(type) { - case EngineType::FAISS_IDMAP: { +ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, + const std::string &location, + EngineType type) + : index_(std::move(index)), location_(location), build_type(type) { +} +VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { + std::shared_ptr index; + switch (type) { + case EngineType::FAISS_IDMAP: { + index = GetVecIndexFactory(IndexType::FAISS_IDMAP); break; } case EngineType::FAISS_IVFFLAT_GPU: { - index = std::make_shared(0); + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_GPU); break; } case EngineType::FAISS_IVFFLAT_CPU: { - index = std::make_shared(); + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU); break; } case EngineType::SPTAG_KDT_RNT_CPU: { - index = std::make_shared(); + index = GetVecIndexFactory(IndexType::SPTAG_KDT_RNT_CPU); break; } - default:{ + default: { ENGINE_LOG_ERROR << "Invalid engine type"; return nullptr; } } - - return std::make_shared(index); + return index; } Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) { - + index_->Add(n, xdata, xids, Config::object{{"dim", dim}}); return Status::OK(); } size_t ExecutionEngineImpl::Count() const { - return 0; + return index_->Count(); } size_t ExecutionEngineImpl::Size() const { - return 0; + return (size_t) (Count() * Dimension()) * sizeof(float); } size_t ExecutionEngineImpl::Dimension() const { - return 0; + return index_->Dimension(); } size_t ExecutionEngineImpl::PhysicalSize() const { - return 0; + return (size_t) (Count() * Dimension()) * sizeof(float); } Status ExecutionEngineImpl::Serialize() { + // TODO(groot): + auto binaryset = index_->Serialize(); return Status::OK(); } Status ExecutionEngineImpl::Load() { - + // TODO(groot): return Status::OK(); } -Status ExecutionEngineImpl::Merge(const std::string& location) { +VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { + // TODO(groot): dev func in Fake code + // pseude code + //auto data = read_file(location); + //auto index_type = get_index_type(data); + //auto binaryset = get_index_binary(data); + ///// - return Status::OK(); -} - -ExecutionEnginePtr -ExecutionEngineImpl::BuildIndex(const std::string& location) { + //return LoadVecIndex(index_type, binaryset); return nullptr; } -Status ExecutionEngineImpl::Search(long n, - const float *data, - long k, - float *distances, - long *labels) const { +Status ExecutionEngineImpl::Merge(const std::string &location) { + if (location == location_) { + return Status::Error("Cannot Merge Self"); + } + ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_; + auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location); + if (!to_merge) { + to_merge = Load(location); + } + + auto file_index = std::dynamic_pointer_cast(index_); + index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); + return Status::OK(); +} + +// TODO(linxj): add config +ExecutionEnginePtr +ExecutionEngineImpl::BuildIndex(const std::string &location) { + ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; + + auto from_index = std::dynamic_pointer_cast(index_); + auto to_index = CreatetVecIndex(build_type); + to_index->BuildAll(Count(), + from_index->GetRawVectors(), + from_index->GetRawIds(), + Config::object{{"dim", Dimension()}, {"gpu_id", gpu_num}}); + + return std::make_shared(to_index, location, build_type); +} + +Status ExecutionEngineImpl::Search(long n, + const float *data, + long k, + float *distances, + long *labels) const { + index_->Search(n, data, distances, labels, Config::object{{"k", k}, {"nprobe", nprobe_}}); return Status::OK(); } Status ExecutionEngineImpl::Cache() { + zilliz::milvus::cache::CpuCacheMgr::GetInstance()->InsertItem(location_, index_); return Status::OK(); } Status ExecutionEngineImpl::Init() { + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode server_config = config.GetConfig(CONFIG_SERVER); + gpu_num = server_config.GetInt32Value("gpu_index", 0); + + switch (build_type) { + case EngineType::FAISS_IVFFLAT_GPU: { + } + case EngineType::FAISS_IVFFLAT_CPU: { + ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); + nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000); + break; + } + } return Status::OK(); } diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h index c720f07158..6c0c83d9b3 100644 --- a/cpp/src/db/ExecutionEngineImpl.h +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -11,17 +11,22 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { class ExecutionEngineImpl : public ExecutionEngine { -public: + public: ExecutionEngineImpl(uint16_t dimension, - const std::string& location, - EngineType type); + const std::string &location, + EngineType type); + + ExecutionEngineImpl(VecIndexPtr index, + const std::string &location, + EngineType type); Status AddWithIds(long n, const float *xdata, const long *xids) override; @@ -37,7 +42,7 @@ public: Status Load() override; - Status Merge(const std::string& location) override; + Status Merge(const std::string &location) override; Status Search(long n, const float *data, @@ -45,21 +50,26 @@ public: float *distances, long *labels) const override; - ExecutionEnginePtr BuildIndex(const std::string&) override; + ExecutionEnginePtr BuildIndex(const std::string &) override; Status Cache() override; Status Init() override; -private: - vecwise::engine::VecIndexPtr CreatetVecIndex(EngineType type); + private: + VecIndexPtr CreatetVecIndex(EngineType type); -protected: - vecwise::engine::VecIndexPtr index_; + VecIndexPtr Load(const std::string &location); + protected: + VecIndexPtr index_ = nullptr; + EngineType build_type; + + int64_t dim; std::string location_; size_t nprobe_ = 0; + int64_t gpu_num = 0; }; diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index 20bd530e78..a2abe02e8a 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -3,6 +3,7 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ +#if 0 #include "FaissExecutionEngine.h" #include "Log.h" @@ -181,3 +182,4 @@ Status FaissExecutionEngine::Init() { } // namespace engine } // namespace milvus } // namespace zilliz +#endif diff --git a/cpp/src/db/FaissExecutionEngine.h b/cpp/src/db/FaissExecutionEngine.h index f9f37ad978..f8f0ad88bc 100644 --- a/cpp/src/db/FaissExecutionEngine.h +++ b/cpp/src/db/FaissExecutionEngine.h @@ -5,6 +5,7 @@ ******************************************************************************/ #pragma once +#if 0 #include "ExecutionEngine.h" #include "faiss/Index.h" @@ -71,3 +72,4 @@ protected: } // namespace engine } // namespace milvus } // namespace zilliz +#endif diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index 18e20d830a..4b10c1e686 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 // TODO: maybe support static search #ifdef GPU_VERSION #include "faiss/gpu/GpuAutoTune.h" @@ -80,3 +81,4 @@ Index_ptr read_index(const std::string &file_name) { } } } +#endif diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index ba157348d4..1668059d11 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -6,23 +6,29 @@ #pragma once -#include -#include -#include -#include -#include +//#include +//#include +//#include +//#include +//#include +// +//#include "faiss/AutoTune.h" +//#include "faiss/index_io.h" +// +//#include "Operand.h" -#include "faiss/AutoTune.h" -#include "faiss/index_io.h" +#include "knowhere/vec_index.h" -#include "Operand.h" namespace zilliz { namespace milvus { namespace engine { -class Index; -using Index_ptr = std::shared_ptr; +using Index_ptr = VecIndexPtr; + +#if 0 +//class Index; +//using Index_ptr = std::shared_ptr; class Index { typedef long idx_t; @@ -75,6 +81,7 @@ private: void write_index(const Index_ptr &index, const std::string &file_name); extern Index_ptr read_index(const std::string &file_name); +#endif } diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp index d4429c381a..1302ca4726 100644 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ b/cpp/src/wrapper/IndexBuilder.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #include "mutex" @@ -128,10 +129,8 @@ Index_ptr BgCpuBuilder::build_all(const long &nb, const float *xb, const long *i return std::make_shared(index); } -// TODO: Be Factory pattern later IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { if (opd->index_type == "IDMap") { - // TODO: fix hardcode IndexBuilderPtr index = nullptr; return std::make_shared(opd); } @@ -142,3 +141,4 @@ IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { } } } +#endif diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h index 8752063560..4cb6de814b 100644 --- a/cpp/src/wrapper/IndexBuilder.h +++ b/cpp/src/wrapper/IndexBuilder.h @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #pragma once #include "faiss/Index.h" @@ -64,3 +65,4 @@ extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd); } } } +#endif diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp index 25341676a6..4e9ac1011b 100644 --- a/cpp/src/wrapper/Operand.cpp +++ b/cpp/src/wrapper/Operand.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #include "Operand.h" @@ -90,3 +91,4 @@ Operand_ptr str_to_operand(const std::string &input) { } } } +#endif diff --git a/cpp/src/wrapper/Operand.h b/cpp/src/wrapper/Operand.h index 85a0eb8080..0e675f6a1b 100644 --- a/cpp/src/wrapper/Operand.h +++ b/cpp/src/wrapper/Operand.h @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #pragma once #include @@ -42,3 +43,4 @@ extern Operand_ptr str_to_operand(const std::string &input); } } } +#endif diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp index af5ad212e4..583a44ee29 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.cpp +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -8,7 +8,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { using namespace zilliz::knowhere; diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h index c99cd1c742..46de4ff21f 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -10,7 +10,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { extern zilliz::knowhere::DatasetPtr diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index e24d470acc..e1a93d37b9 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -4,18 +4,14 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include "knowhere/index/index.h" -#include "knowhere/index/index_model.h" -#include "knowhere/index/index_type.h" -#include "knowhere/adapter/sptag.h" -#include "knowhere/common/tensor.h" +#include "knowhere/index/vector_index/idmap.h" #include "vec_impl.h" #include "data_transfer.h" namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { using namespace zilliz::knowhere; @@ -26,8 +22,8 @@ void VecIndexImpl::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { - auto d = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, d, xb, ids); + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); auto preprocessor = index_->BuildPreprocessor(dataset, cfg); index_->set_preprocessor(preprocessor); @@ -39,7 +35,7 @@ void VecIndexImpl::BuildAll(const long &nb, void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { // TODO(linxj): Assert index is trained; - auto d = cfg["dim"].as(); + auto d = cfg.get_with_default("dim", dim); auto dataset = GenDatasetWithIds(nb, d, xb, ids); index_->Add(dataset, cfg); @@ -48,8 +44,8 @@ void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const C void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { // TODO: Assert index is trained; - auto d = cfg["dim"].as(); auto k = cfg["k"].as(); + auto d = cfg.get_with_default("dim", dim); auto dataset = GenDataset(nq, d, xq); Config search_cfg; @@ -90,6 +86,27 @@ void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { index_->Load(index_binary); } +int64_t VecIndexImpl::Dimension() { + return index_->Dimension(); +} + +int64_t VecIndexImpl::Count() { + return index_->Count(); +} + +float *BFIndex::GetRawVectors() { + return std::static_pointer_cast(index_)->GetRawVectors(); +} + +int64_t *BFIndex::GetRawIds() { + return std::static_pointer_cast(index_)->GetRawIds(); +} + +void BFIndex::Build(const int64_t &d) { + dim = d; + std::static_pointer_cast(index_)->Train(dim); +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 25f7d16548..9593e12779 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -12,7 +12,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { class VecIndexImpl : public VecIndex { @@ -24,15 +24,26 @@ class VecIndexImpl : public VecIndex { const Config &cfg, const long &nt, const float *xt) override; + int64_t Dimension() override; + int64_t Count() override; void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; zilliz::knowhere::BinarySet Serialize() override; void Load(const zilliz::knowhere::BinarySet &index_binary) override; void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; - private: + protected: + int64_t dim; std::shared_ptr index_ = nullptr; }; +class BFIndex : public VecIndexImpl { + public: + explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index)) {}; + void Build(const int64_t& d); + float* GetRawVectors(); + int64_t* GetRawIds(); +}; + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 171388d0af..b71eb0f4b7 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/idmap.h" #include "knowhere/index/vector_index/gpu_ivf.h" #include "knowhere/index/vector_index/cpu_kdt_rng.h" @@ -12,27 +13,42 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { // TODO(linxj): index_type => enum struct -VecIndexPtr GetVecIndexFactory(const std::string &index_type) { +VecIndexPtr GetVecIndexFactory(const IndexType &type) { std::shared_ptr index; - if (index_type == "IVF") { - index = std::make_shared(); - } else if (index_type == "GPUIVF") { - index = std::make_shared(0); - } else if (index_type == "SPTAG") { - index = std::make_shared(); + switch (type) { + case IndexType::FAISS_IDMAP: { + index = std::make_shared(); + return std::make_shared(index); + } + case IndexType::FAISS_IVFFLAT_CPU: { + index = std::make_shared(); + break; + } + case IndexType::FAISS_IVFFLAT_GPU: { + index = std::make_shared(0); + break; + } + case IndexType::SPTAG_KDT_RNT_CPU: { + index = std::make_shared(); + break; + } + //// TODO(linxj): Support NSG + //case IndexType ::NSG: { + // index = std::make_shared(); + // break; + //} + default: { + return nullptr; + } } - // TODO(linxj): Support NSG - //else if (index_type == "NSG") { - // index = std::make_shared(); - //} return std::make_shared(index); } -VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { +VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary) { auto index = GetVecIndexFactory(index_type); index->Load(index_binary); return index; diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index b03c43a36b..8e471b5213 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -14,7 +14,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { // TODO(linxj): jsoncons => rapidjson or other. @@ -40,6 +40,10 @@ class VecIndex { long *ids, const Config &cfg = Config()) = 0; + virtual int64_t Dimension() = 0; + + virtual int64_t Count() = 0; + virtual zilliz::knowhere::BinarySet Serialize() = 0; virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; @@ -47,9 +51,18 @@ class VecIndex { using VecIndexPtr = std::shared_ptr; -extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); +enum class IndexType { + INVALID = 0, + FAISS_IDMAP = 1, + FAISS_IVFFLAT_GPU, + FAISS_IVFFLAT_CPU, + SPTAG_KDT_RNT_CPU, + NSG, +}; -extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); +extern VecIndexPtr GetVecIndexFactory(const IndexType &type); + +extern VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary); } } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 2d543bfab6..c0df766214 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 2d543bfab655398f30113681f348519acac40ab5 +Subproject commit c0df766214d7fa288ffedd77cd06a8ba8620c8df From 440e18bb0fad3db3dbc61b6b059bf97614b924f8 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 9 Jul 2019 04:26:31 +0800 Subject: [PATCH 7/8] MS-137 - Integrate knowhere Former-commit-id: 67d9be936996437411ac5941c3e322b08a9389bf --- cpp/CHANGELOG.md | 12 +- cpp/src/db/ExecutionEngineImpl.cpp | 131 +++++++++++++++++-- cpp/src/db/ExecutionEngineImpl.h | 1 + cpp/src/db/scheduler/task/SearchTask.cpp | 5 +- cpp/src/wrapper/Index.h | 4 +- cpp/src/wrapper/knowhere/vec_impl.cpp | 25 +++- cpp/src/wrapper/knowhere/vec_impl.h | 8 +- cpp/thirdparty/knowhere | 2 +- cpp/unittest/CMakeLists.txt | 2 +- cpp/unittest/db/CMakeLists.txt | 2 +- cpp/unittest/db/misc_test.cpp | 52 ++++---- cpp/unittest/index_wrapper/knowhere_test.cpp | 48 +++++-- 12 files changed, 222 insertions(+), 70 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 949a05c8db..b32f159160 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -3,17 +3,6 @@ Please mark all change in change log and use the ticket from JIRA. -# Milvus 0.3.2 (2019-07-10) - -## Bug - -## Improvement - -## New Feature -- MS-154 - Integrate knowhere - -## Task - # Milvus 0.3.1 (2019-07-10) ## Bug @@ -21,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Improvement ## New Feature +- MS-137 - Integrate knowhere ## Task diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 28349695c7..85372d619d 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -16,12 +16,61 @@ namespace zilliz { namespace milvus { namespace engine { +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); + size_t operator()(void *ptr, size_t size, size_t pos); +}; + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +size_t FileIOReader::operator()(void *ptr, size_t size, size_t pos) { + return 0; +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string &location, EngineType type) : location_(location), dim(dimension), build_type(type) { index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); + current_type = EngineType::FAISS_IDMAP; std::static_pointer_cast(index_)->Build(dimension); } @@ -29,6 +78,7 @@ ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, const std::string &location, EngineType type) : index_(std::move(index)), location_(location), build_type(type) { + current_type = type; } VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { @@ -80,26 +130,85 @@ size_t ExecutionEngineImpl::PhysicalSize() const { } Status ExecutionEngineImpl::Serialize() { - // TODO(groot): auto binaryset = index_->Serialize(); + + FileIOWriter writer(location_); + writer(¤t_type, sizeof(current_type)); + for (auto &iter: binaryset.binary_map_) { + auto meta = iter.first.c_str(); + size_t meta_length = iter.first.length(); + writer(&meta_length, sizeof(meta_length)); + writer((void *) meta, meta_length); + + auto binary = iter.second; + size_t binary_length = binary->size; + writer(&binary_length, sizeof(binary_length)); + writer((void *) binary->data.get(), binary_length); + } return Status::OK(); } Status ExecutionEngineImpl::Load() { - // TODO(groot): + index_ = Load(location_); return Status::OK(); } VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { - // TODO(groot): dev func in Fake code - // pseude code - //auto data = read_file(location); - //auto index_type = get_index_type(data); - //auto binaryset = get_index_binary(data); - ///// + knowhere::BinarySet load_data_list; + FileIOReader reader(location); + reader.fs.seekg(0, reader.fs.end); + size_t length = reader.fs.tellg(); + reader.fs.seekg(0); - //return LoadVecIndex(index_type, binaryset); - return nullptr; + size_t rp = 0; + reader(¤t_type, sizeof(current_type)); + rp += sizeof(current_type); + while (rp < length) { + size_t meta_length; + reader(&meta_length, sizeof(meta_length)); + rp += sizeof(meta_length); + reader.fs.seekg(rp); + + auto meta = new char[meta_length]; + reader(meta, meta_length); + rp += meta_length; + reader.fs.seekg(rp); + + size_t bin_length; + reader(&bin_length, sizeof(bin_length)); + rp += sizeof(bin_length); + reader.fs.seekg(rp); + + auto bin = new uint8_t[bin_length]; + reader(bin, bin_length); + rp += bin_length; + + auto xx = std::make_shared(); + xx.reset(bin); + load_data_list.Append(std::string(meta, meta_length), xx, bin_length); + } + + auto index_type = IndexType::INVALID; + switch (current_type) { + case EngineType::FAISS_IDMAP: { + index_type = IndexType::FAISS_IDMAP; + break; + } + case EngineType::FAISS_IVFFLAT_CPU: { + index_type = IndexType::FAISS_IVFFLAT_CPU; + break; + } + case EngineType::FAISS_IVFFLAT_GPU: { + index_type = IndexType::FAISS_IVFFLAT_GPU; + break; + } + case EngineType::SPTAG_KDT_RNT_CPU: { + index_type = IndexType::SPTAG_KDT_RNT_CPU; + break; + } + } + + return LoadVecIndex(index_type, load_data_list); } Status ExecutionEngineImpl::Merge(const std::string &location) { @@ -113,7 +222,7 @@ Status ExecutionEngineImpl::Merge(const std::string &location) { to_merge = Load(location); } - auto file_index = std::dynamic_pointer_cast(index_); + auto file_index = std::dynamic_pointer_cast(to_merge); index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); return Status::OK(); } diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h index 6c0c83d9b3..6e5325f553 100644 --- a/cpp/src/db/ExecutionEngineImpl.h +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -64,6 +64,7 @@ class ExecutionEngineImpl : public ExecutionEngine { protected: VecIndexPtr index_ = nullptr; EngineType build_type; + EngineType current_type; int64_t dim; std::string location_; diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index d04f270331..2bfac90e20 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -151,7 +151,7 @@ std::shared_ptr SearchTask::Execute() { std::vector output_distence; for(auto& context : search_contexts_) { //step 1: allocate memory - auto inner_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk(); + auto inner_k = context->topk(); output_ids.resize(inner_k*context->nq()); output_distence.resize(inner_k*context->nq()); @@ -164,7 +164,8 @@ std::shared_ptr SearchTask::Execute() { //step 3: cluster result SearchContext::ResultSet result_set; - ClusterResult(output_ids, output_distence, context->nq(), inner_k, result_set); + auto spec_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk(); + ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set); rc.Record("cluster result"); //step 4: pick up topk result diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index 1668059d11..9841416a6c 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -12,8 +12,8 @@ //#include //#include // -//#include "faiss/AutoTune.h" -//#include "faiss/index_io.h" +#include "faiss/AutoTune.h" +#include "faiss/index_io.h" // //#include "Operand.h" diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index e1a93d37b9..9b1afb84ef 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#include #include "knowhere/index/vector_index/idmap.h" #include "vec_impl.h" @@ -27,7 +28,9 @@ void VecIndexImpl::BuildAll(const long &nb, auto preprocessor = index_->BuildPreprocessor(dataset, cfg); index_->set_preprocessor(preprocessor); - auto model = index_->Train(dataset, cfg); + auto nlist = int(nb / 1000000.0 * 16384); + auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; + auto model = index_->Train(dataset, cfg_t); index_->set_index_model(model); index_->Add(dataset, cfg); } @@ -71,7 +74,7 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id //} auto p_ids = ids_array->data()->GetValues(1, 0); - auto p_dist = ids_array->data()->GetValues(1, 0); + auto p_dist = dis_array->data()->GetValues(1, 0); // TODO(linxj): avoid copy here. memcpy(ids, p_ids, sizeof(int64_t) * nq * k); @@ -84,6 +87,7 @@ zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { index_->Load(index_binary); + dim = Dimension(); } int64_t VecIndexImpl::Dimension() { @@ -95,7 +99,9 @@ int64_t VecIndexImpl::Count() { } float *BFIndex::GetRawVectors() { - return std::static_pointer_cast(index_)->GetRawVectors(); + auto raw_index = std::dynamic_pointer_cast(index_); + if (raw_index) { return raw_index->GetRawVectors(); } + return nullptr; } int64_t *BFIndex::GetRawIds() { @@ -107,6 +113,19 @@ void BFIndex::Build(const int64_t &d) { std::static_pointer_cast(index_)->Train(dim); } +void BFIndex::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); + + std::static_pointer_cast(index_)->Train(dim); + index_->Add(dataset, cfg); +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 9593e12779..ab6c6b8a79 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -32,7 +32,7 @@ class VecIndexImpl : public VecIndex { void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; protected: - int64_t dim; + int64_t dim = 0; std::shared_ptr index_ = nullptr; }; @@ -41,6 +41,12 @@ class BFIndex : public VecIndexImpl { explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index)) {}; void Build(const int64_t& d); float* GetRawVectors(); + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; int64_t* GetRawIds(); }; diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index c0df766214..3a30677b8a 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit c0df766214d7fa288ffedd77cd06a8ba8620c8df +Subproject commit 3a30677b8ab105955534922d1677e8fa99ef0406 diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index d0d158ec4a..043716b58b 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -38,7 +38,7 @@ set(unittest_libs ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) -add_subdirectory(server) +#add_subdirectory(server) add_subdirectory(db) add_subdirectory(index_wrapper) #add_subdirectory(faiss_wrapper) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 44b09d7b25..213eb146ed 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -6,7 +6,7 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) -aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +#aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) aux_source_directory(./ test_srcs) diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp index a49c4d5807..6849a71867 100644 --- a/cpp/unittest/db/misc_test.cpp +++ b/cpp/unittest/db/misc_test.cpp @@ -26,32 +26,32 @@ namespace { } -TEST(DBMiscTest, ENGINE_API_TEST) { - //engine api AddWithIdArray - const uint16_t dim = 512; - const long n = 10; - engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat"); - std::vector vectors; - std::vector ids; - for (long i = 0; i < n; i++) { - for (uint16_t k = 0; k < dim; k++) { - vectors.push_back((float) k); - } - ids.push_back(i); - } - - auto status = engine.AddWithIdArray(vectors, ids); - ASSERT_TRUE(status.ok()); - - auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); - ASSERT_EQ(engine_ptr, nullptr); - - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU); - ASSERT_NE(engine_ptr, nullptr); - - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); - ASSERT_NE(engine_ptr, nullptr); -} +//TEST(DBMiscTest, ENGINE_API_TEST) { +// //engine api AddWithIdArray +// const uint16_t dim = 512; +// const long n = 10; +// engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat"); +// std::vector vectors; +// std::vector ids; +// for (long i = 0; i < n; i++) { +// for (uint16_t k = 0; k < dim; k++) { +// vectors.push_back((float) k); +// } +// ids.push_back(i); +// } +// +// auto status = engine.AddWithIdArray(vectors, ids); +// ASSERT_TRUE(status.ok()); +// +// auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); +// ASSERT_EQ(engine_ptr, nullptr); +// +// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU); +// ASSERT_NE(engine_ptr, nullptr); +// +// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); +// ASSERT_NE(engine_ptr, nullptr); +//} TEST(DBMiscTest, EXCEPTION_TEST) { engine::Exception ex1(""); diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index 58b0d5a4b2..b4f8feba03 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -11,7 +11,7 @@ #include "utils.h" -using namespace zilliz::vecwise::engine; +using namespace zilliz::milvus::engine; using namespace zilliz::knowhere; using ::testing::TestWithParam; @@ -20,7 +20,7 @@ using ::testing::Combine; class KnowhereWrapperTest - : public TestWithParam<::std::tuple> { + : public TestWithParam<::std::tuple> { protected: void SetUp() override { std::string generator_type; @@ -34,7 +34,7 @@ class KnowhereWrapperTest } protected: - std::string index_type; + IndexType index_type; Config train_cfg; Config search_cfg; @@ -55,12 +55,12 @@ class KnowhereWrapperTest INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, Values( // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] - std::make_tuple("IVF", "Default", + std::make_tuple(IndexType::FAISS_IVFFLAT_CPU, "Default", 64, 10000, 10, 10, Config::object{{"nlist", 100}, {"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} ), - std::make_tuple("SPTAG", "Default", + std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 64, 10000, 10, 10, Config::object{{"TPTNumber", 1}, {"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}} @@ -113,16 +113,39 @@ TEST_P(KnowhereWrapperTest, serialize_test) { { auto binaryset = index_->Serialize(); + //int fileno = 0; + //const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + //std::vector filename_list; + //std::vector> meta_list; + //for (auto &iter: binaryset.binary_map_) { + // const std::string &filename = base_name + std::to_string(fileno); + // FileIOWriter writer(filename); + // writer(iter.second->data.get(), iter.second->size); + // + // meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + // filename_list.push_back(filename); + // ++fileno; + //} + // + //BinarySet load_data_list; + //for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + // auto bin_size = meta_list[i].second; + // FileIOReader reader(filename_list[i]); + // std::vector load_data(bin_size); + // reader(load_data.data(), bin_size); + // load_data_list.Append(meta_list[i].first, load_data); + //} + int fileno = 0; - const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; std::vector filename_list; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; std::vector> meta_list; for (auto &iter: binaryset.binary_map_) { const std::string &filename = base_name + std::to_string(fileno); FileIOWriter writer(filename); - writer(iter.second.data, iter.second.size); + writer(iter.second->data.get(), iter.second->size); - meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); filename_list.push_back(filename); ++fileno; } @@ -131,9 +154,12 @@ TEST_P(KnowhereWrapperTest, serialize_test) { for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { auto bin_size = meta_list[i].second; FileIOReader reader(filename_list[i]); - std::vector load_data(bin_size); - reader(load_data.data(), bin_size); - load_data_list.Append(meta_list[i].first, load_data); + + auto load_data = new uint8_t[bin_size]; + reader(load_data, bin_size); + auto data = std::make_shared(); + data.reset(load_data); + load_data_list.Append(meta_list[i].first, data, bin_size); } From 015fe06fc4818ee042905d154d0bc1af63ffeaa7 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 9 Jul 2019 04:43:27 +0800 Subject: [PATCH 8/8] update... Former-commit-id: 42b56218b03ac7b8efb479e776ce601e12a45e6c --- cpp/src/db/ExecutionEngineImpl.cpp | 10 +++++++--- cpp/src/wrapper/knowhere/vec_index.cpp | 17 ++++++++++++----- cpp/src/wrapper/knowhere/vec_index.h | 6 ++++-- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 85372d619d..bba29fc9a0 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -183,9 +183,9 @@ VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { reader(bin, bin_length); rp += bin_length; - auto xx = std::make_shared(); - xx.reset(bin); - load_data_list.Append(std::string(meta, meta_length), xx, bin_length); + auto binptr = std::make_shared(); + binptr.reset(bin); + load_data_list.Append(std::string(meta, meta_length), binptr, bin_length); } auto index_type = IndexType::INVALID; @@ -206,6 +206,10 @@ VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { index_type = IndexType::SPTAG_KDT_RNT_CPU; break; } + default: { + ENGINE_LOG_ERROR << "wrong index_type"; + return nullptr; + } } return LoadVecIndex(index_type, load_data_list); diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index b71eb0f4b7..17aa428613 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -32,15 +32,22 @@ VecIndexPtr GetVecIndexFactory(const IndexType &type) { index = std::make_shared(0); break; } + case IndexType::FAISS_IVFPQ_CPU: { + index = std::make_shared(); + break; + } + case IndexType::FAISS_IVFPQ_GPU: { + index = std::make_shared(0); + break; + } case IndexType::SPTAG_KDT_RNT_CPU: { index = std::make_shared(); break; } - //// TODO(linxj): Support NSG - //case IndexType ::NSG: { - // index = std::make_shared(); - // break; - //} + //case IndexType::NSG: { // TODO(linxj): bug. + // index = std::make_shared(); + // break; + //} default: { return nullptr; } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index 8e471b5213..76c69537b5 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -54,10 +54,12 @@ using VecIndexPtr = std::shared_ptr; enum class IndexType { INVALID = 0, FAISS_IDMAP = 1, - FAISS_IVFFLAT_GPU, FAISS_IVFFLAT_CPU, + FAISS_IVFFLAT_GPU, + FAISS_IVFPQ_CPU, + FAISS_IVFPQ_GPU, SPTAG_KDT_RNT_CPU, - NSG, + //NSG, }; extern VecIndexPtr GetVecIndexFactory(const IndexType &type);