diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 949a05c8db..b32f159160 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -3,17 +3,6 @@ Please mark all change in change log and use the ticket from JIRA. -# Milvus 0.3.2 (2019-07-10) - -## Bug - -## Improvement - -## New Feature -- MS-154 - Integrate knowhere - -## Task - # Milvus 0.3.1 (2019-07-10) ## Bug @@ -21,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Improvement ## New Feature +- MS-137 - Integrate knowhere ## Task diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 6a7fb6835e..0627b2010a 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -10,7 +10,7 @@ aux_source_directory(config config_files) aux_source_directory(server server_files) aux_source_directory(utils utils_files) aux_source_directory(db db_files) -aux_source_directory(wrapper wrapper_files) +#aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) aux_source_directory(wrapper/knowhere knowhere_files) @@ -161,6 +161,17 @@ set(server_libs metrics ) +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + ) + add_executable(milvus_server ${config_files} ${server_files} @@ -170,9 +181,17 @@ add_executable(milvus_server ) if (ENABLE_LICENSE STREQUAL "ON") - target_link_libraries(milvus_server ${server_libs} license_check ${third_party_libs}) + target_link_libraries(milvus_server + ${server_libs} + license_check + ${third_party_libs} + ${knowhere_libs}) else () - target_link_libraries(milvus_server ${server_libs} ${third_party_libs}) + target_link_libraries(milvus_server + ${server_libs} + ${third_party_libs} + ${knowhere_libs} + ) endif() if (ENABLE_LICENSE STREQUAL "ON") diff --git a/cpp/src/cache/DataObj.h b/cpp/src/cache/DataObj.h index 1dff04027e..d5e52f7664 100644 --- a/cpp/src/cache/DataObj.h +++ b/cpp/src/cache/DataObj.h @@ -28,7 +28,7 @@ public: return 0; } - return index_->ntotal*(index_->dim*4); + return index_->Count() * index_->Dimension() * sizeof(float); } private: diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp index bacce70ce4..3389c0a07e 100644 --- a/cpp/src/db/EngineFactory.cpp +++ b/cpp/src/db/EngineFactory.cpp @@ -4,14 +4,15 @@ * Proprietary and confidential. ******************************************************************************/ #include "EngineFactory.h" -#include "FaissExecutionEngine.h" +//#include "FaissExecutionEngine.h" +#include "ExecutionEngineImpl.h" #include "Log.h" - namespace zilliz { namespace milvus { namespace engine { +#if 0 ExecutionEnginePtr EngineFactory::Build(uint16_t dimension, const std::string &location, @@ -26,7 +27,7 @@ EngineFactory::Build(uint16_t dimension, break; } - case EngineType::FAISS_IVFFLAT: { + case EngineType::FAISS_IVFFLAT_GPU: { execution_engine_ptr = ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IVF", "IDMap,Flat")); break; @@ -41,6 +42,24 @@ EngineFactory::Build(uint16_t dimension, execution_engine_ptr->Init(); return execution_engine_ptr; } +#else +ExecutionEnginePtr +EngineFactory::Build(uint16_t dimension, + const std::string &location, + EngineType type) { + + if(type == EngineType::INVALID) { + ENGINE_LOG_ERROR << "Unsupported engine type"; + return nullptr; + } + + ExecutionEnginePtr execution_engine_ptr = + std::make_shared(dimension, location, type); + + execution_engine_ptr->Init(); + return execution_engine_ptr; +} +#endif } } diff --git a/cpp/src/db/ExecutionEngine.h b/cpp/src/db/ExecutionEngine.h index f8c05f6f9d..a101acd3cd 100644 --- a/cpp/src/db/ExecutionEngine.h +++ b/cpp/src/db/ExecutionEngine.h @@ -17,7 +17,9 @@ namespace engine { enum class EngineType { INVALID = 0, FAISS_IDMAP = 1, - FAISS_IVFFLAT, + FAISS_IVFFLAT_GPU, + FAISS_IVFFLAT_CPU, + SPTAG_KDT_RNT_CPU, }; class ExecutionEngine { diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp new file mode 100644 index 0000000000..bba29fc9a0 --- /dev/null +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -0,0 +1,286 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#include +#include "Log.h" + +#include "src/cache/CpuCacheMgr.h" +#include "ExecutionEngineImpl.h" +#include "wrapper/knowhere/vec_index.h" +#include "wrapper/knowhere/vec_impl.h" + + +namespace zilliz { +namespace milvus { +namespace engine { + +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); + size_t operator()(void *ptr, size_t size, size_t pos); +}; + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +size_t FileIOReader::operator()(void *ptr, size_t size, size_t pos) { + return 0; +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} + +ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, + const std::string &location, + EngineType type) + : location_(location), dim(dimension), build_type(type) { + index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); + current_type = EngineType::FAISS_IDMAP; + std::static_pointer_cast(index_)->Build(dimension); +} + +ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, + const std::string &location, + EngineType type) + : index_(std::move(index)), location_(location), build_type(type) { + current_type = type; +} + +VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { + std::shared_ptr index; + switch (type) { + case EngineType::FAISS_IDMAP: { + index = GetVecIndexFactory(IndexType::FAISS_IDMAP); + break; + } + case EngineType::FAISS_IVFFLAT_GPU: { + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_GPU); + break; + } + case EngineType::FAISS_IVFFLAT_CPU: { + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU); + break; + } + case EngineType::SPTAG_KDT_RNT_CPU: { + index = GetVecIndexFactory(IndexType::SPTAG_KDT_RNT_CPU); + break; + } + default: { + ENGINE_LOG_ERROR << "Invalid engine type"; + return nullptr; + } + } + return index; +} + +Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) { + index_->Add(n, xdata, xids, Config::object{{"dim", dim}}); + return Status::OK(); +} + +size_t ExecutionEngineImpl::Count() const { + return index_->Count(); +} + +size_t ExecutionEngineImpl::Size() const { + return (size_t) (Count() * Dimension()) * sizeof(float); +} + +size_t ExecutionEngineImpl::Dimension() const { + return index_->Dimension(); +} + +size_t ExecutionEngineImpl::PhysicalSize() const { + return (size_t) (Count() * Dimension()) * sizeof(float); +} + +Status ExecutionEngineImpl::Serialize() { + auto binaryset = index_->Serialize(); + + FileIOWriter writer(location_); + writer(¤t_type, sizeof(current_type)); + for (auto &iter: binaryset.binary_map_) { + auto meta = iter.first.c_str(); + size_t meta_length = iter.first.length(); + writer(&meta_length, sizeof(meta_length)); + writer((void *) meta, meta_length); + + auto binary = iter.second; + size_t binary_length = binary->size; + writer(&binary_length, sizeof(binary_length)); + writer((void *) binary->data.get(), binary_length); + } + return Status::OK(); +} + +Status ExecutionEngineImpl::Load() { + index_ = Load(location_); + return Status::OK(); +} + +VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { + knowhere::BinarySet load_data_list; + FileIOReader reader(location); + reader.fs.seekg(0, reader.fs.end); + size_t length = reader.fs.tellg(); + reader.fs.seekg(0); + + size_t rp = 0; + reader(¤t_type, sizeof(current_type)); + rp += sizeof(current_type); + while (rp < length) { + size_t meta_length; + reader(&meta_length, sizeof(meta_length)); + rp += sizeof(meta_length); + reader.fs.seekg(rp); + + auto meta = new char[meta_length]; + reader(meta, meta_length); + rp += meta_length; + reader.fs.seekg(rp); + + size_t bin_length; + reader(&bin_length, sizeof(bin_length)); + rp += sizeof(bin_length); + reader.fs.seekg(rp); + + auto bin = new uint8_t[bin_length]; + reader(bin, bin_length); + rp += bin_length; + + auto binptr = std::make_shared(); + binptr.reset(bin); + load_data_list.Append(std::string(meta, meta_length), binptr, bin_length); + } + + auto index_type = IndexType::INVALID; + switch (current_type) { + case EngineType::FAISS_IDMAP: { + index_type = IndexType::FAISS_IDMAP; + break; + } + case EngineType::FAISS_IVFFLAT_CPU: { + index_type = IndexType::FAISS_IVFFLAT_CPU; + break; + } + case EngineType::FAISS_IVFFLAT_GPU: { + index_type = IndexType::FAISS_IVFFLAT_GPU; + break; + } + case EngineType::SPTAG_KDT_RNT_CPU: { + index_type = IndexType::SPTAG_KDT_RNT_CPU; + break; + } + default: { + ENGINE_LOG_ERROR << "wrong index_type"; + return nullptr; + } + } + + return LoadVecIndex(index_type, load_data_list); +} + +Status ExecutionEngineImpl::Merge(const std::string &location) { + if (location == location_) { + return Status::Error("Cannot Merge Self"); + } + ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_; + + auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location); + if (!to_merge) { + to_merge = Load(location); + } + + auto file_index = std::dynamic_pointer_cast(to_merge); + index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); + return Status::OK(); +} + +// TODO(linxj): add config +ExecutionEnginePtr +ExecutionEngineImpl::BuildIndex(const std::string &location) { + ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; + + auto from_index = std::dynamic_pointer_cast(index_); + auto to_index = CreatetVecIndex(build_type); + to_index->BuildAll(Count(), + from_index->GetRawVectors(), + from_index->GetRawIds(), + Config::object{{"dim", Dimension()}, {"gpu_id", gpu_num}}); + + return std::make_shared(to_index, location, build_type); +} + +Status ExecutionEngineImpl::Search(long n, + const float *data, + long k, + float *distances, + long *labels) const { + index_->Search(n, data, distances, labels, Config::object{{"k", k}, {"nprobe", nprobe_}}); + return Status::OK(); +} + +Status ExecutionEngineImpl::Cache() { + zilliz::milvus::cache::CpuCacheMgr::GetInstance()->InsertItem(location_, index_); + + return Status::OK(); +} + +Status ExecutionEngineImpl::Init() { + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode server_config = config.GetConfig(CONFIG_SERVER); + gpu_num = server_config.GetInt32Value("gpu_index", 0); + + switch (build_type) { + case EngineType::FAISS_IVFFLAT_GPU: { + } + case EngineType::FAISS_IVFFLAT_CPU: { + ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); + nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000); + break; + } + } + + return Status::OK(); +} + + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h new file mode 100644 index 0000000000..6e5325f553 --- /dev/null +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -0,0 +1,79 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include "ExecutionEngine.h" +#include "wrapper/knowhere/vec_index.h" + +#include +#include + + +namespace zilliz { +namespace milvus { +namespace engine { + + +class ExecutionEngineImpl : public ExecutionEngine { + public: + + ExecutionEngineImpl(uint16_t dimension, + const std::string &location, + EngineType type); + + ExecutionEngineImpl(VecIndexPtr index, + const std::string &location, + EngineType type); + + Status AddWithIds(long n, const float *xdata, const long *xids) override; + + size_t Count() const override; + + size_t Size() const override; + + size_t Dimension() const override; + + size_t PhysicalSize() const override; + + Status Serialize() override; + + Status Load() override; + + Status Merge(const std::string &location) override; + + Status Search(long n, + const float *data, + long k, + float *distances, + long *labels) const override; + + ExecutionEnginePtr BuildIndex(const std::string &) override; + + Status Cache() override; + + Status Init() override; + + private: + VecIndexPtr CreatetVecIndex(EngineType type); + + VecIndexPtr Load(const std::string &location); + + protected: + VecIndexPtr index_ = nullptr; + EngineType build_type; + EngineType current_type; + + int64_t dim; + std::string location_; + + size_t nprobe_ = 0; + int64_t gpu_num = 0; +}; + + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index 20bd530e78..a2abe02e8a 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -3,6 +3,7 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ +#if 0 #include "FaissExecutionEngine.h" #include "Log.h" @@ -181,3 +182,4 @@ Status FaissExecutionEngine::Init() { } // namespace engine } // namespace milvus } // namespace zilliz +#endif diff --git a/cpp/src/db/FaissExecutionEngine.h b/cpp/src/db/FaissExecutionEngine.h index f9f37ad978..f8f0ad88bc 100644 --- a/cpp/src/db/FaissExecutionEngine.h +++ b/cpp/src/db/FaissExecutionEngine.h @@ -5,6 +5,7 @@ ******************************************************************************/ #pragma once +#if 0 #include "ExecutionEngine.h" #include "faiss/Index.h" @@ -71,3 +72,4 @@ protected: } // namespace engine } // namespace milvus } // namespace zilliz +#endif diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index d04f270331..2bfac90e20 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -151,7 +151,7 @@ std::shared_ptr SearchTask::Execute() { std::vector output_distence; for(auto& context : search_contexts_) { //step 1: allocate memory - auto inner_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk(); + auto inner_k = context->topk(); output_ids.resize(inner_k*context->nq()); output_distence.resize(inner_k*context->nq()); @@ -164,7 +164,8 @@ std::shared_ptr SearchTask::Execute() { //step 3: cluster result SearchContext::ResultSet result_set; - ClusterResult(output_ids, output_distence, context->nq(), inner_k, result_set); + auto spec_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk(); + ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set); rc.Record("cluster result"); //step 4: pick up topk result diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 1b91883af5..07a8305d1f 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -29,7 +29,7 @@ namespace { static std::map map_type = { {0, engine::EngineType::INVALID}, {1, engine::EngineType::FAISS_IDMAP}, - {2, engine::EngineType::FAISS_IVFFLAT}, + {2, engine::EngineType::FAISS_IVFFLAT_GPU}, }; if(map_type.find(type) == map_type.end()) { @@ -43,7 +43,7 @@ namespace { static std::map map_type = { {engine::EngineType::INVALID, 0}, {engine::EngineType::FAISS_IDMAP, 1}, - {engine::EngineType::FAISS_IVFFLAT, 2}, + {engine::EngineType::FAISS_IVFFLAT_GPU, 2}, }; if(map_type.find(type) == map_type.end()) { diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index 18e20d830a..4b10c1e686 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 // TODO: maybe support static search #ifdef GPU_VERSION #include "faiss/gpu/GpuAutoTune.h" @@ -80,3 +81,4 @@ Index_ptr read_index(const std::string &file_name) { } } } +#endif diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index ba157348d4..9841416a6c 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -6,23 +6,29 @@ #pragma once -#include -#include -#include -#include -#include - +//#include +//#include +//#include +//#include +//#include +// #include "faiss/AutoTune.h" #include "faiss/index_io.h" +// +//#include "Operand.h" + +#include "knowhere/vec_index.h" -#include "Operand.h" namespace zilliz { namespace milvus { namespace engine { -class Index; -using Index_ptr = std::shared_ptr; +using Index_ptr = VecIndexPtr; + +#if 0 +//class Index; +//using Index_ptr = std::shared_ptr; class Index { typedef long idx_t; @@ -75,6 +81,7 @@ private: void write_index(const Index_ptr &index, const std::string &file_name); extern Index_ptr read_index(const std::string &file_name); +#endif } diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp index d4429c381a..1302ca4726 100644 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ b/cpp/src/wrapper/IndexBuilder.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #include "mutex" @@ -128,10 +129,8 @@ Index_ptr BgCpuBuilder::build_all(const long &nb, const float *xb, const long *i return std::make_shared(index); } -// TODO: Be Factory pattern later IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { if (opd->index_type == "IDMap") { - // TODO: fix hardcode IndexBuilderPtr index = nullptr; return std::make_shared(opd); } @@ -142,3 +141,4 @@ IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { } } } +#endif diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h index 8752063560..4cb6de814b 100644 --- a/cpp/src/wrapper/IndexBuilder.h +++ b/cpp/src/wrapper/IndexBuilder.h @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #pragma once #include "faiss/Index.h" @@ -64,3 +65,4 @@ extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd); } } } +#endif diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp index 25341676a6..4e9ac1011b 100644 --- a/cpp/src/wrapper/Operand.cpp +++ b/cpp/src/wrapper/Operand.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #include "Operand.h" @@ -90,3 +91,4 @@ Operand_ptr str_to_operand(const std::string &input) { } } } +#endif diff --git a/cpp/src/wrapper/Operand.h b/cpp/src/wrapper/Operand.h index 85a0eb8080..0e675f6a1b 100644 --- a/cpp/src/wrapper/Operand.h +++ b/cpp/src/wrapper/Operand.h @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #pragma once #include @@ -42,3 +43,4 @@ extern Operand_ptr str_to_operand(const std::string &input); } } } +#endif diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp index af5ad212e4..583a44ee29 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.cpp +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -8,7 +8,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { using namespace zilliz::knowhere; diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h index c99cd1c742..46de4ff21f 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -10,7 +10,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { extern zilliz::knowhere::DatasetPtr diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index e24d470acc..9b1afb84ef 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -4,18 +4,15 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include "knowhere/index/index.h" -#include "knowhere/index/index_model.h" -#include "knowhere/index/index_type.h" -#include "knowhere/adapter/sptag.h" -#include "knowhere/common/tensor.h" +#include +#include "knowhere/index/vector_index/idmap.h" #include "vec_impl.h" #include "data_transfer.h" namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { using namespace zilliz::knowhere; @@ -26,12 +23,14 @@ void VecIndexImpl::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { - auto d = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, d, xb, ids); + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); auto preprocessor = index_->BuildPreprocessor(dataset, cfg); index_->set_preprocessor(preprocessor); - auto model = index_->Train(dataset, cfg); + auto nlist = int(nb / 1000000.0 * 16384); + auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; + auto model = index_->Train(dataset, cfg_t); index_->set_index_model(model); index_->Add(dataset, cfg); } @@ -39,7 +38,7 @@ void VecIndexImpl::BuildAll(const long &nb, void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { // TODO(linxj): Assert index is trained; - auto d = cfg["dim"].as(); + auto d = cfg.get_with_default("dim", dim); auto dataset = GenDatasetWithIds(nb, d, xb, ids); index_->Add(dataset, cfg); @@ -48,8 +47,8 @@ void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const C void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { // TODO: Assert index is trained; - auto d = cfg["dim"].as(); auto k = cfg["k"].as(); + auto d = cfg.get_with_default("dim", dim); auto dataset = GenDataset(nq, d, xq); Config search_cfg; @@ -75,7 +74,7 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id //} auto p_ids = ids_array->data()->GetValues(1, 0); - auto p_dist = ids_array->data()->GetValues(1, 0); + auto p_dist = dis_array->data()->GetValues(1, 0); // TODO(linxj): avoid copy here. memcpy(ids, p_ids, sizeof(int64_t) * nq * k); @@ -88,6 +87,43 @@ zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { index_->Load(index_binary); + dim = Dimension(); +} + +int64_t VecIndexImpl::Dimension() { + return index_->Dimension(); +} + +int64_t VecIndexImpl::Count() { + return index_->Count(); +} + +float *BFIndex::GetRawVectors() { + auto raw_index = std::dynamic_pointer_cast(index_); + if (raw_index) { return raw_index->GetRawVectors(); } + return nullptr; +} + +int64_t *BFIndex::GetRawIds() { + return std::static_pointer_cast(index_)->GetRawIds(); +} + +void BFIndex::Build(const int64_t &d) { + dim = d; + std::static_pointer_cast(index_)->Train(dim); +} + +void BFIndex::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); + + std::static_pointer_cast(index_)->Train(dim); + index_->Add(dataset, cfg); } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 25f7d16548..ab6c6b8a79 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -12,7 +12,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { class VecIndexImpl : public VecIndex { @@ -24,15 +24,32 @@ class VecIndexImpl : public VecIndex { const Config &cfg, const long &nt, const float *xt) override; + int64_t Dimension() override; + int64_t Count() override; void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; zilliz::knowhere::BinarySet Serialize() override; void Load(const zilliz::knowhere::BinarySet &index_binary) override; void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; - private: + protected: + int64_t dim = 0; std::shared_ptr index_ = nullptr; }; +class BFIndex : public VecIndexImpl { + public: + explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index)) {}; + void Build(const int64_t& d); + float* GetRawVectors(); + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; + int64_t* GetRawIds(); +}; + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 171388d0af..17aa428613 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/idmap.h" #include "knowhere/index/vector_index/gpu_ivf.h" #include "knowhere/index/vector_index/cpu_kdt_rng.h" @@ -12,27 +13,49 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { // TODO(linxj): index_type => enum struct -VecIndexPtr GetVecIndexFactory(const std::string &index_type) { +VecIndexPtr GetVecIndexFactory(const IndexType &type) { std::shared_ptr index; - if (index_type == "IVF") { - index = std::make_shared(); - } else if (index_type == "GPUIVF") { - index = std::make_shared(0); - } else if (index_type == "SPTAG") { - index = std::make_shared(); + switch (type) { + case IndexType::FAISS_IDMAP: { + index = std::make_shared(); + return std::make_shared(index); + } + case IndexType::FAISS_IVFFLAT_CPU: { + index = std::make_shared(); + break; + } + case IndexType::FAISS_IVFFLAT_GPU: { + index = std::make_shared(0); + break; + } + case IndexType::FAISS_IVFPQ_CPU: { + index = std::make_shared(); + break; + } + case IndexType::FAISS_IVFPQ_GPU: { + index = std::make_shared(0); + break; + } + case IndexType::SPTAG_KDT_RNT_CPU: { + index = std::make_shared(); + break; + } + //case IndexType::NSG: { // TODO(linxj): bug. + // index = std::make_shared(); + // break; + //} + default: { + return nullptr; + } } - // TODO(linxj): Support NSG - //else if (index_type == "NSG") { - // index = std::make_shared(); - //} return std::make_shared(index); } -VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { +VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary) { auto index = GetVecIndexFactory(index_type); index->Load(index_binary); return index; diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index b03c43a36b..76c69537b5 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -14,7 +14,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { // TODO(linxj): jsoncons => rapidjson or other. @@ -40,6 +40,10 @@ class VecIndex { long *ids, const Config &cfg = Config()) = 0; + virtual int64_t Dimension() = 0; + + virtual int64_t Count() = 0; + virtual zilliz::knowhere::BinarySet Serialize() = 0; virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; @@ -47,9 +51,20 @@ class VecIndex { using VecIndexPtr = std::shared_ptr; -extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); +enum class IndexType { + INVALID = 0, + FAISS_IDMAP = 1, + FAISS_IVFFLAT_CPU, + FAISS_IVFFLAT_GPU, + FAISS_IVFPQ_CPU, + FAISS_IVFPQ_GPU, + SPTAG_KDT_RNT_CPU, + //NSG, +}; -extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); +extern VecIndexPtr GetVecIndexFactory(const IndexType &type); + +extern VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary); } } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 844e600834..3a30677b8a 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 844e600834df1eeafc6c7e5936338ae964bd1d41 +Subproject commit 3a30677b8ab105955534922d1677e8fa99ef0406 diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index d0d158ec4a..043716b58b 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -38,7 +38,7 @@ set(unittest_libs ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) -add_subdirectory(server) +#add_subdirectory(server) add_subdirectory(db) add_subdirectory(index_wrapper) #add_subdirectory(faiss_wrapper) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index b2720f7006..213eb146ed 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -6,7 +6,8 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) -aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +#aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) aux_source_directory(./ test_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files) @@ -30,18 +31,14 @@ set(db_test_src ${db_srcs} ${db_scheduler_srcs} ${wrapper_src} + ${knowhere_src} ${require_files} - ${test_srcs}) + ${test_srcs} + ) cuda_add_executable(db_test ${db_test_src}) set(db_libs - knowhere - faiss - openblas - lapack - cudart - cublas sqlite3 boost_system boost_filesystem @@ -49,6 +46,19 @@ set(db_libs mysqlpp ) -target_link_libraries(db_test ${db_libs} ${unittest_libs}) +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + cudart + cublas + ) + +target_link_libraries(db_test ${db_libs} ${unittest_libs} ${knowhere_libs}) install(TARGETS db_test DESTINATION bin) diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp index 4356746fc2..6849a71867 100644 --- a/cpp/unittest/db/misc_test.cpp +++ b/cpp/unittest/db/misc_test.cpp @@ -26,32 +26,32 @@ namespace { } -TEST(DBMiscTest, ENGINE_API_TEST) { - //engine api AddWithIdArray - const uint16_t dim = 512; - const long n = 10; - engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat"); - std::vector vectors; - std::vector ids; - for (long i = 0; i < n; i++) { - for (uint16_t k = 0; k < dim; k++) { - vectors.push_back((float) k); - } - ids.push_back(i); - } - - auto status = engine.AddWithIdArray(vectors, ids); - ASSERT_TRUE(status.ok()); - - auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); - ASSERT_EQ(engine_ptr, nullptr); - - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT); - ASSERT_NE(engine_ptr, nullptr); - - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); - ASSERT_NE(engine_ptr, nullptr); -} +//TEST(DBMiscTest, ENGINE_API_TEST) { +// //engine api AddWithIdArray +// const uint16_t dim = 512; +// const long n = 10; +// engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat"); +// std::vector vectors; +// std::vector ids; +// for (long i = 0; i < n; i++) { +// for (uint16_t k = 0; k < dim; k++) { +// vectors.push_back((float) k); +// } +// ids.push_back(i); +// } +// +// auto status = engine.AddWithIdArray(vectors, ids); +// ASSERT_TRUE(status.ok()); +// +// auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); +// ASSERT_EQ(engine_ptr, nullptr); +// +// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU); +// ASSERT_NE(engine_ptr, nullptr); +// +// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); +// ASSERT_NE(engine_ptr, nullptr); +//} TEST(DBMiscTest, EXCEPTION_TEST) { engine::Exception ex1(""); diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index 58b0d5a4b2..b4f8feba03 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -11,7 +11,7 @@ #include "utils.h" -using namespace zilliz::vecwise::engine; +using namespace zilliz::milvus::engine; using namespace zilliz::knowhere; using ::testing::TestWithParam; @@ -20,7 +20,7 @@ using ::testing::Combine; class KnowhereWrapperTest - : public TestWithParam<::std::tuple> { + : public TestWithParam<::std::tuple> { protected: void SetUp() override { std::string generator_type; @@ -34,7 +34,7 @@ class KnowhereWrapperTest } protected: - std::string index_type; + IndexType index_type; Config train_cfg; Config search_cfg; @@ -55,12 +55,12 @@ class KnowhereWrapperTest INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, Values( // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] - std::make_tuple("IVF", "Default", + std::make_tuple(IndexType::FAISS_IVFFLAT_CPU, "Default", 64, 10000, 10, 10, Config::object{{"nlist", 100}, {"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} ), - std::make_tuple("SPTAG", "Default", + std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 64, 10000, 10, 10, Config::object{{"TPTNumber", 1}, {"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}} @@ -113,16 +113,39 @@ TEST_P(KnowhereWrapperTest, serialize_test) { { auto binaryset = index_->Serialize(); + //int fileno = 0; + //const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + //std::vector filename_list; + //std::vector> meta_list; + //for (auto &iter: binaryset.binary_map_) { + // const std::string &filename = base_name + std::to_string(fileno); + // FileIOWriter writer(filename); + // writer(iter.second->data.get(), iter.second->size); + // + // meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + // filename_list.push_back(filename); + // ++fileno; + //} + // + //BinarySet load_data_list; + //for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + // auto bin_size = meta_list[i].second; + // FileIOReader reader(filename_list[i]); + // std::vector load_data(bin_size); + // reader(load_data.data(), bin_size); + // load_data_list.Append(meta_list[i].first, load_data); + //} + int fileno = 0; - const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; std::vector filename_list; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; std::vector> meta_list; for (auto &iter: binaryset.binary_map_) { const std::string &filename = base_name + std::to_string(fileno); FileIOWriter writer(filename); - writer(iter.second.data, iter.second.size); + writer(iter.second->data.get(), iter.second->size); - meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); filename_list.push_back(filename); ++fileno; } @@ -131,9 +154,12 @@ TEST_P(KnowhereWrapperTest, serialize_test) { for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { auto bin_size = meta_list[i].second; FileIOReader reader(filename_list[i]); - std::vector load_data(bin_size); - reader(load_data.data(), bin_size); - load_data_list.Append(meta_list[i].first, load_data); + + auto load_data = new uint8_t[bin_size]; + reader(load_data, bin_size); + auto data = std::make_shared(); + data.reset(load_data); + load_data_list.Append(meta_list[i].first, data, bin_size); } diff --git a/cpp/unittest/metrics/CMakeLists.txt b/cpp/unittest/metrics/CMakeLists.txt index be5a542da6..d7ae12aff8 100644 --- a/cpp/unittest/metrics/CMakeLists.txt +++ b/cpp/unittest/metrics/CMakeLists.txt @@ -12,11 +12,12 @@ include_directories(../../src) -aux_source_directory(../../src/db db_srcs) -aux_source_directory(../../src/config config_files) -aux_source_directory(../../src/cache cache_srcs) -aux_source_directory(../../src/wrapper wrapper_src) -aux_source_directory(../../src/metrics metrics_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) +aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/src/metrics metrics_src) aux_source_directory(./ test_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files) @@ -54,18 +55,26 @@ set(count_test_src ${db_srcs} ${db_scheduler_srcs} ${wrapper_src} + ${knowhere_src} ${metrics_src} ${test_srcs} ) - add_executable(metrics_test ${count_test_src} ${require_files} ) -target_link_libraries(metrics_test +set(knowhere_libs knowhere + SPTAGLibStatic + arrow + jemalloc_pic faiss openblas lapack + tbb + ) + +target_link_libraries(metrics_test + ${knowhere_libs} cudart cublas sqlite3