From d36afb4ae98dc9478a5de4ddd3cb1a008886c719 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 26 Jun 2019 17:42:28 +0800 Subject: [PATCH 1/7] 1. update Cmakemodule to support autodownload knowhere 2. add basic knowhere wrapper and unittest Former-commit-id: ed32f9e851e014272bbd37f1554c3541ad0740bf --- cpp/cmake/DefineOptions.cmake | 10 +- cpp/cmake/ThirdPartyPackages.cmake | 57 +++++++++++ cpp/src/CMakeLists.txt | 21 ++-- cpp/src/wrapper/knowhere/data_transfer.h | 29 ++++++ cpp/src/wrapper/knowhere/vec_impl.cpp | 102 +++++++++++++++++++ cpp/src/wrapper/knowhere/vec_impl.h | 37 +++++++ cpp/src/wrapper/knowhere/vec_index.cpp | 31 ++++++ cpp/src/wrapper/knowhere/vec_index.h | 56 ++++++++++ cpp/unittest/CMakeLists.txt | 3 +- cpp/unittest/index_wrapper/CMakeLists.txt | 19 ++++ cpp/unittest/index_wrapper/knowhere_test.cpp | 93 +++++++++++++++++ 11 files changed, 447 insertions(+), 11 deletions(-) create mode 100644 cpp/src/wrapper/knowhere/data_transfer.h create mode 100644 cpp/src/wrapper/knowhere/vec_impl.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_impl.h create mode 100644 cpp/src/wrapper/knowhere/vec_index.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_index.h create mode 100644 cpp/unittest/index_wrapper/CMakeLists.txt create mode 100644 cpp/unittest/index_wrapper/knowhere_test.cpp diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index d95e7c7ed1..5e9cdd6f9b 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -68,20 +68,20 @@ define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON) define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON) -define_option(MILVUS_WITH_FAISS "Build with FAISS library" ON) +define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF) -define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON) +define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF) #define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against" # "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61") -define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" ON) +define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF) define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF) -define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" ON) +define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" OFF) define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) @@ -99,6 +99,8 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) +define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" ON) + if(CMAKE_VERSION VERSION_LESS 3.7) set(MILVUS_WITH_ZSTD_DEFAULT OFF) else() diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index cb5f3532fe..72902d5219 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -23,6 +23,7 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES Easylogging++ FAISS GTest + Knowhere JSONCONS LAPACK Lz4 @@ -58,6 +59,8 @@ macro(build_dependency DEPENDENCY_NAME) build_faiss() elseif("${DEPENDENCY_NAME}" STREQUAL "LAPACK") build_lapack() + elseif("${DEPENDENCY_NAME}" STREQUAL "Knowhere") + build_knowhere() elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4") build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest") @@ -239,6 +242,12 @@ else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() +if(DEFINED ENV{MILVUS_KNOWHERE_URL}) + set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") +else() + set(KNOWHERE_SOURCE_URL "${CMAKE_SOURCE_DIR}/thirdparty/knowhere") +endif() + if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () @@ -632,6 +641,54 @@ if(MILVUS_WITH_BZ2) include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") endif() +# ---------------------------------------------------------------------- +# Knowhere + +macro(build_knowhere) + message(STATUS "Building knowhere from source") + set(KNOWHERE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep") + set(KNOWHERE_INCLUDE_DIR "${KNOWHERE_PREFIX}/include") + set(KNOWHERE_STATIC_LIB + "${KNOWHERE_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}knowhere${CMAKE_STATIC_LIBRARY_SUFFIX}") + + set(KNOWHERE_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${KNOWHERE_PREFIX}" + -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + -DCMAKE_BUILD_TYPE=Release) + + externalproject_add(knowhere_ep + URL + ${KNOWHERE_SOURCE_URL} + ${EP_LOG_OPTIONS} + CMAKE_ARGS + ${KNOWHERE_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + ${KNOWHERE_STATIC_LIB}) + + file(MAKE_DIRECTORY "${KNOWHERE_INCLUDE_DIR}") + add_library(knowhere STATIC IMPORTED) + set_target_properties( + knowhere + PROPERTIES IMPORTED_LOCATION "${KNOWHERE_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${KNOWHERE_INCLUDE_DIR}") + + add_dependencies(knowhere knowhere_ep) +endmacro() + +if(MILVUS_WITH_KNOWHERE) + resolve_dependency(Knowhere) + + get_target_property(KNOWHERE_INCLUDE_DIR knowhere INTERFACE_INCLUDE_DIRECTORIES) + link_directories(SYSTEM "${KNOWHERE_PREFIX}/lib") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}/SPTAG/AnnService") +endif() + # ---------------------------------------------------------------------- # Easylogging++ diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index e00420b2d1..c0bb8334a4 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -12,6 +12,7 @@ aux_source_directory(utils utils_files) aux_source_directory(db db_files) aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) +aux_source_directory(wrapper/knowhere knowhere_files) aux_source_directory(db/scheduler scheduler_files) aux_source_directory(db/scheduler/context scheduler_context_files) @@ -50,6 +51,7 @@ set(engine_files ${wrapper_files} # metrics/Metrics.cpp ${metrics_files} + ${knowhere_files} ) set(get_sys_info_files @@ -64,14 +66,18 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") include_directories(thrift/gen-cpp) set(third_party_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss +# libgpufaiss.a + openblas + lapack easyloggingpp sqlite thrift yaml-cpp - libgpufaiss.a - faiss - lapack - openblas prometheus-cpp-push prometheus-cpp-pull prometheus-cpp-core @@ -83,6 +89,8 @@ set(third_party_libs snappy zlib zstd + cudart + cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) if (MEGASEARCH_WITH_ARROW STREQUAL "ON") @@ -95,8 +103,8 @@ if (GPU_VERSION STREQUAL "ON") pthread libgomp.a libgfortran.a - cudart - cublas +# cudart +# cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() @@ -130,6 +138,7 @@ endif () cuda_add_library(milvus_engine STATIC ${engine_files}) +#cuda_add_library(milvus_engine SHARED ${engine_files}) target_link_libraries(milvus_engine ${engine_libs} ${third_party_libs}) add_library(metrics STATIC ${metrics_files}) diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h new file mode 100644 index 0000000000..e5a9402cff --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -0,0 +1,29 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#define GENDATASET(n,d,xb,ids)\ + size_t elems = (n) * (d);\ + std::vector shape{n, d};\ + auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ + std::vector tensors{tensor};\ + std::vector tensor_fields{ConstructFloatField("data")};\ + auto tensor_schema = std::make_shared(tensor_fields);\ + auto id_array = ConstructInt64Array((uint8_t *) (ids), (n) * sizeof(int64_t));\ + std::vector arrays{id_array};\ + std::vector array_fields{ConstructInt64Field("id")};\ + auto array_schema = std::make_shared(tensor_fields);\ + auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema);\ + +#define GENQUERYDATASET(n,d,xb)\ + size_t elems = (n) * (d);\ + std::vector shape{(n), (d)};\ + auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ + std::vector tensors{tensor};\ + std::vector tensor_fields{ConstructFloatField("data")};\ + auto tensor_schema = std::make_shared(tensor_fields);\ + auto dataset = std::make_shared(std::move(tensors), tensor_schema);\ diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp new file mode 100644 index 0000000000..bcb537fda2 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -0,0 +1,102 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "knowhere/index/index.h" +#include "knowhere/index/index_model.h" +#include "knowhere/index/index_type.h" +#include "knowhere/adapter/sptag.h" +#include "knowhere/common/tensor.h" + +#include "vec_impl.h" +#include "data_transfer.h" + +//using Index = zilliz::knowhere::Index; +//using IndexModel = zilliz::knowhere::IndexModel; +//using IndexType = zilliz::knowhere::IndexType; +//using IndexPtr = std::shared_ptr; +//using IndexModelPtr = std::shared_ptr; + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +void VecIndexImpl::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + using namespace zilliz::knowhere; + + auto d = cfg["dim"].as(); + GENDATASET(nb, d, xb, ids) + + Config train_cfg; + Config add_cfg; + Config search_cfg; + auto model = index_->Train(dataset, cfg); + index_->set_index_model(model); + index_->Add(dataset, add_cfg); +} + +void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { + // TODO: Assert index is trained; + + auto d = cfg["dim"].as(); + GENDATASET(nb, d, xb, ids) + + index_->Add(dataset, cfg); +} + +void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { + // TODO: Assert index is trained; + + auto d = cfg["dim"].as(); + auto k = cfg["k"].as(); + GENQUERYDATASET(nq, d, xq) + + Config search_cfg; + auto res = index_->Search(dataset, cfg); + auto ids_array = res->array()[0]; + auto dis_array = res->array()[1]; + //{ + // auto& ids = ids_array; + // auto& dists = dis_array; + // std::stringstream ss_id; + // std::stringstream ss_dist; + // for (auto i = 0; i < 10; i++) { + // for (auto j = 0; j < k; ++j) { + // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; + // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; + // } + // ss_id << std::endl; + // ss_dist << std::endl; + // } + // std::cout << "id\n" << ss_id.str() << std::endl; + // std::cout << "dist\n" << ss_dist.str() << std::endl; + //} + + // TODO: deep copy here. + auto p_ids = ids_array->data()->GetValues(1, 0); + auto p_dist = ids_array->data()->GetValues(1, 0); + + memcpy(ids, p_ids, sizeof(int64_t) * nq * k); + memcpy(dist, p_dist, sizeof(float) * nq * k); +} + +zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { + return index_->Serialize(); +} + +void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { + index_->Load(index_binary); +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h new file mode 100644 index 0000000000..f144dc43a2 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -0,0 +1,37 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "knowhere/index/vector_index/vector_index.h" +#include "vec_index.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +class VecIndexImpl : public VecIndex { + public: + explicit VecIndexImpl(std::shared_ptr index):index_(std::move(index)){}; + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; + void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; + zilliz::knowhere::BinarySet Serialize() override; + void Load(const zilliz::knowhere::BinarySet &index_binary) override; + void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; + + private: + std::shared_ptr index_ = nullptr; +}; + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp new file mode 100644 index 0000000000..1365836fd9 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -0,0 +1,31 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/gpu_ivf.h" + +#include "vec_index.h" +#include "vec_impl.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +VecIndexPtr GetVecIndexFactory(const std::string &index_type) { + std::shared_ptr index; + if (index_type == "IVF") { + index = std::make_shared(); + } else if (index_type == "GPUIVF") { + index = std::make_shared(); + } + auto ret_index = std::make_shared(index); + //return std::static_pointer_cast(std::make_shared(index)); + return std::make_shared(index); +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h new file mode 100644 index 0000000000..3094eb336b --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -0,0 +1,56 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include + +#include "knowhere/common/config.h" +#include "knowhere/common/binary_set.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using Config = zilliz::knowhere::Config; + +class VecIndex { + public: + virtual void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt = 0, + const float *xt = nullptr) = 0; + + virtual void Add(const long &nb, + const float *xb, + const long *ids, + const Config &cfg) = 0; + + virtual void Search(const long &nq, + const float *xq, + float *dist, + long *ids, + const Config &cfg) = 0; + + virtual zilliz::knowhere::BinarySet Serialize() = 0; + + virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; +}; + +using VecIndexPtr = std::shared_ptr; + +extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); + +// TODO +extern VecIndexPtr LoadVecIndex(const zilliz::knowhere::BinarySet &index_binary); + +} +} +} diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 2e2b1f91b6..0fd521357d 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -45,7 +45,8 @@ set(unittest_libs add_subdirectory(server) add_subdirectory(db) -add_subdirectory(faiss_wrapper) +#add_subdirectory(faiss_wrapper) +add_subdirectory(index_wrapper) add_subdirectory(license) add_subdirectory(metrics) add_subdirectory(storage) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt new file mode 100644 index 0000000000..5c4b71227c --- /dev/null +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -0,0 +1,19 @@ +include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") +link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) + +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + cudart + cublas + ) + +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src}) +target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp new file mode 100644 index 0000000000..caf287340e --- /dev/null +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -0,0 +1,93 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include + +using namespace zilliz::vecwise::engine; + +TEST(knowhere_test, ivf_test) { + auto d = 128; + auto nt = 1000; + auto nb = 10000; + auto nq = 10; + //{ + // std::vector xb; + // std::vector xt; + // std::vector xq; + // std::vector ids; + // + // //prepare train data + // std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + // std::random_device rd; + // std::mt19937 gen(rd()); + // xt.resize(nt*d); + // for (size_t i = 0; i < nt * d; i++) { + // xt[i] = dis_xt(gen); + // } + // xb.resize(nb*d); + // ids.resize(nb); + // for (size_t i = 0; i < nb * d; i++) { + // xb[i] = dis_xt(gen); + // if (i < nb) { + // ids[i] = i; + // } + // } + // xq.resize(nq*d); + // for (size_t i = 0; i < nq * d; i++) { + // xq[i] = dis_xt(gen); + // } + //} + + auto elems = nb * d; + auto p_data = (float *) malloc(elems * sizeof(float)); + auto p_id = (int64_t *) malloc(elems * sizeof(int64_t)); + assert(p_data != nullptr && p_id != nullptr); + + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < d; ++j) { + p_data[i * d + j] = drand48(); + } + p_data[d * i] += i / 1000.; + p_id[i] = i; + } + + auto q_elems = nq * d; + auto q_data = (float *) malloc(q_elems * sizeof(float)); + + for (auto i = 0; i < nq; ++i) { + for (auto j = 0; j < d; ++j) { + q_data[i * d + j] = drand48(); + } + q_data[d * i] += i / 1000.; + } + + Config build_cfg = Config::object{ + {"dim", d}, + {"nlist", 100}, + }; + + auto k = 10; + Config search_cfg = Config::object{ + {"dim", d}, + {"k", k}, + }; + + std::vector ret_dist(nq*k); + std::vector ret_ids(nq*k); + + const std::string& index_type = "IVF"; + auto index = GetVecIndexFactory(index_type); + index->BuildAll(nb, p_data, p_id, build_cfg); + + auto add_bin = index->Serialize(); + index->Load(add_bin); + + index->Search(nq, q_data, ret_dist.data(), ret_ids.data(), search_cfg); + + std::cout << "he"; +} From a8b068db5770b28639025ed2b7a3f460b449b4db Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 26 Jun 2019 19:14:39 +0800 Subject: [PATCH 2/7] 1. git add submodule knowhere and set default branch 2. Update README.md about how to use git submodule Former-commit-id: fd996485d8ef0b09055c4f3e8c20838ce9fd95ad --- .gitmodules | 4 ++++ cpp/README.md | 3 +++ cpp/thirdparty/knowhere | 1 + 3 files changed, 8 insertions(+) create mode 100644 .gitmodules create mode 160000 cpp/thirdparty/knowhere diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..297cf0e592 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "cpp/thirdparty/knowhere"] + path = cpp/thirdparty/knowhere + url = git@192.168.1.105:xiaojun.lin/knowhere.git + branch = develop diff --git a/cpp/README.md b/cpp/README.md index 0c7706df23..d7297aecb0 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -12,6 +12,9 @@ cmake_build/src/milvus_server is the server cmake_build/src/libmilvus_engine.a is the static library + git submodule init + git submodule update + cd [sourcecode path]/cpp ./build.sh -t Debug ./build.sh -t Release diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere new file mode 160000 index 0000000000..291b3b4226 --- /dev/null +++ b/cpp/thirdparty/knowhere @@ -0,0 +1 @@ +Subproject commit 291b3b422664f2509bab79d5cc63823dedbe903c From 82150885d0b57d45b78f82d0b11e880b54f1e3fb Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 2 Jul 2019 18:37:33 +0800 Subject: [PATCH 3/7] update wrapper and wrapper test Former-commit-id: a57d040aae7d8f5ba20c99ea6a0c6220efcaeacd --- cpp/src/wrapper/knowhere/data_transfer.cpp | 48 +++++ cpp/src/wrapper/knowhere/data_transfer.h | 35 ++-- cpp/src/wrapper/knowhere/vec_impl.cpp | 25 +-- cpp/src/wrapper/knowhere/vec_impl.h | 3 +- cpp/src/wrapper/knowhere/vec_index.cpp | 18 +- cpp/src/wrapper/knowhere/vec_index.h | 8 +- cpp/thirdparty/knowhere | 2 +- cpp/unittest/index_wrapper/CMakeLists.txt | 6 +- cpp/unittest/index_wrapper/knowhere_test.cpp | 200 ++++++++++++------- cpp/unittest/index_wrapper/utils.cpp | 81 ++++++++ cpp/unittest/index_wrapper/utils.h | 61 ++++++ 11 files changed, 369 insertions(+), 118 deletions(-) create mode 100644 cpp/src/wrapper/knowhere/data_transfer.cpp create mode 100644 cpp/unittest/index_wrapper/utils.cpp create mode 100644 cpp/unittest/index_wrapper/utils.h diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp new file mode 100644 index 0000000000..af5ad212e4 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -0,0 +1,48 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); + std::vector arrays{id_array}; + std::vector array_fields{ConstructInt64Field("id")}; + auto array_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(arrays), array_schema, + std::move(tensors), tensor_schema); + return dataset; +} + +DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(tensors), tensor_schema); + return dataset; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h index e5a9402cff..c99cd1c742 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -6,24 +6,19 @@ #pragma once -#define GENDATASET(n,d,xb,ids)\ - size_t elems = (n) * (d);\ - std::vector shape{n, d};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto id_array = ConstructInt64Array((uint8_t *) (ids), (n) * sizeof(int64_t));\ - std::vector arrays{id_array};\ - std::vector array_fields{ConstructInt64Field("id")};\ - auto array_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema);\ +#include "knowhere/adapter/structure.h" -#define GENQUERYDATASET(n,d,xb)\ - size_t elems = (n) * (d);\ - std::vector shape{(n), (d)};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(tensors), tensor_schema);\ + +namespace zilliz { +namespace vecwise { +namespace engine { + +extern zilliz::knowhere::DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids); + +extern zilliz::knowhere::DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb); + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index bcb537fda2..e24d470acc 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -13,11 +13,6 @@ #include "vec_impl.h" #include "data_transfer.h" -//using Index = zilliz::knowhere::Index; -//using IndexModel = zilliz::knowhere::IndexModel; -//using IndexType = zilliz::knowhere::IndexType; -//using IndexPtr = std::shared_ptr; -//using IndexModelPtr = std::shared_ptr; namespace zilliz { namespace vecwise { @@ -31,24 +26,21 @@ void VecIndexImpl::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { - using namespace zilliz::knowhere; - auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); - Config train_cfg; - Config add_cfg; - Config search_cfg; + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); auto model = index_->Train(dataset, cfg); index_->set_index_model(model); - index_->Add(dataset, add_cfg); + index_->Add(dataset, cfg); } void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { - // TODO: Assert index is trained; + // TODO(linxj): Assert index is trained; auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); index_->Add(dataset, cfg); } @@ -58,12 +50,13 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id auto d = cfg["dim"].as(); auto k = cfg["k"].as(); - GENQUERYDATASET(nq, d, xq) + auto dataset = GenDataset(nq, d, xq); Config search_cfg; auto res = index_->Search(dataset, cfg); auto ids_array = res->array()[0]; auto dis_array = res->array()[1]; + //{ // auto& ids = ids_array; // auto& dists = dis_array; @@ -81,10 +74,10 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id // std::cout << "dist\n" << ss_dist.str() << std::endl; //} - // TODO: deep copy here. auto p_ids = ids_array->data()->GetValues(1, 0); auto p_dist = ids_array->data()->GetValues(1, 0); + // TODO(linxj): avoid copy here. memcpy(ids, p_ids, sizeof(int64_t) * nq * k); memcpy(dist, p_dist, sizeof(float) * nq * k); } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index f144dc43a2..25f7d16548 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -7,6 +7,7 @@ #pragma once #include "knowhere/index/vector_index/vector_index.h" + #include "vec_index.h" @@ -16,7 +17,7 @@ namespace engine { class VecIndexImpl : public VecIndex { public: - explicit VecIndexImpl(std::shared_ptr index):index_(std::move(index)){}; + explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; void BuildAll(const long &nb, const float *xb, const long *ids, diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 1365836fd9..171388d0af 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -5,6 +5,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "knowhere/index/vector_index/ivf.h" #include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" #include "vec_index.h" #include "vec_impl.h" @@ -14,18 +15,29 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): index_type => enum struct VecIndexPtr GetVecIndexFactory(const std::string &index_type) { std::shared_ptr index; if (index_type == "IVF") { index = std::make_shared(); } else if (index_type == "GPUIVF") { - index = std::make_shared(); + index = std::make_shared(0); + } else if (index_type == "SPTAG") { + index = std::make_shared(); } - auto ret_index = std::make_shared(index); - //return std::static_pointer_cast(std::make_shared(index)); + // TODO(linxj): Support NSG + //else if (index_type == "NSG") { + // index = std::make_shared(); + //} return std::make_shared(index); } +VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { + auto index = GetVecIndexFactory(index_type); + index->Load(index_binary); + return index; +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index 3094eb336b..b03c43a36b 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -17,6 +17,7 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): jsoncons => rapidjson or other. using Config = zilliz::knowhere::Config; class VecIndex { @@ -31,13 +32,13 @@ class VecIndex { virtual void Add(const long &nb, const float *xb, const long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual void Search(const long &nq, const float *xq, float *dist, long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual zilliz::knowhere::BinarySet Serialize() = 0; @@ -48,8 +49,7 @@ using VecIndexPtr = std::shared_ptr; extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); -// TODO -extern VecIndexPtr LoadVecIndex(const zilliz::knowhere::BinarySet &index_binary); +extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); } } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 291b3b4226..32187bacba 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 291b3b422664f2509bab79d5cc63823dedbe903c +Subproject commit 32187bacbaac0460676f5f6aa54ad904f5f2b5bc diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt index 5c4b71227c..51bd97b575 100644 --- a/cpp/unittest/index_wrapper/CMakeLists.txt +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -3,6 +3,9 @@ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) +set(helper + utils.cpp) + set(knowhere_libs knowhere SPTAGLibStatic @@ -11,9 +14,10 @@ set(knowhere_libs faiss openblas lapack + tbb cudart cublas ) -add_executable(knowhere_test knowhere_test.cpp ${knowhere_src}) +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index caf287340e..58b0d5a4b2 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -8,86 +8,142 @@ #include +#include "utils.h" + + using namespace zilliz::vecwise::engine; +using namespace zilliz::knowhere; -TEST(knowhere_test, ivf_test) { - auto d = 128; - auto nt = 1000; - auto nb = 10000; - auto nq = 10; - //{ - // std::vector xb; - // std::vector xt; - // std::vector xq; - // std::vector ids; - // - // //prepare train data - // std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - // std::random_device rd; - // std::mt19937 gen(rd()); - // xt.resize(nt*d); - // for (size_t i = 0; i < nt * d; i++) { - // xt[i] = dis_xt(gen); - // } - // xb.resize(nb*d); - // ids.resize(nb); - // for (size_t i = 0; i < nb * d; i++) { - // xb[i] = dis_xt(gen); - // if (i < nb) { - // ids[i] = i; - // } - // } - // xq.resize(nq*d); - // for (size_t i = 0; i < nq * d; i++) { - // xq[i] = dis_xt(gen); - // } - //} +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::Combine; - auto elems = nb * d; - auto p_data = (float *) malloc(elems * sizeof(float)); - auto p_id = (int64_t *) malloc(elems * sizeof(int64_t)); - assert(p_data != nullptr && p_id != nullptr); - for (auto i = 0; i < nb; ++i) { - for (auto j = 0; j < d; ++j) { - p_data[i * d + j] = drand48(); - } - p_data[d * i] += i / 1000.; - p_id[i] = i; +class KnowhereWrapperTest + : public TestWithParam<::std::tuple> { + protected: + void SetUp() override { + std::string generator_type; + std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam(); + + //auto generator = GetGenerateFactory(generator_type); + auto generator = std::make_shared(); + generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); + + index_ = GetVecIndexFactory(index_type); } - auto q_elems = nq * d; - auto q_data = (float *) malloc(q_elems * sizeof(float)); + protected: + std::string index_type; + Config train_cfg; + Config search_cfg; - for (auto i = 0; i < nq; ++i) { - for (auto j = 0; j < d; ++j) { - q_data[i * d + j] = drand48(); - } - q_data[d * i] += i / 1000.; + int dim = 64; + int nb = 10000; + int nq = 10; + int k = 10; + std::vector xb; + std::vector xq; + std::vector ids; + + VecIndexPtr index_ = nullptr; + + // Ground Truth + std::vector gt_ids; +}; + +INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, + Values( + // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] + std::make_tuple("IVF", "Default", + 64, 10000, 10, 10, + Config::object{{"nlist", 100}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} + ), + std::make_tuple("SPTAG", "Default", + 64, 10000, 10, 10, + Config::object{{"TPTNumber", 1}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}} + ) + ) +); + +void AssertAnns(const std::vector >, + const std::vector &res, + const int &nq, + const int &k) { + EXPECT_EQ(res.size(), nq * k); + + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(gt[i * k], res[i * k]); } - Config build_cfg = Config::object{ - {"dim", d}, - {"nlist", 100}, - }; + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (gt[i * nq + j] == res[i * nq + l]) match++; + } + } + } - auto k = 10; - Config search_cfg = Config::object{ - {"dim", d}, - {"k", k}, - }; - - std::vector ret_dist(nq*k); - std::vector ret_ids(nq*k); - - const std::string& index_type = "IVF"; - auto index = GetVecIndexFactory(index_type); - index->BuildAll(nb, p_data, p_id, build_cfg); - - auto add_bin = index->Serialize(); - index->Load(add_bin); - - index->Search(nq, q_data, ret_dist.data(), ret_ids.data(), search_cfg); - - std::cout << "he"; + // TODO(linxj): percision check + EXPECT_GT(float(match/nq*k), 0.5); +} + +TEST_P(KnowhereWrapperTest, base_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + delete[] D; +} + +TEST_P(KnowhereWrapperTest, serialize_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + + { + auto binaryset = index_->Serialize(); + int fileno = 0; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + std::vector filename_list; + std::vector> meta_list; + for (auto &iter: binaryset.binary_map_) { + const std::string &filename = base_name + std::to_string(fileno); + FileIOWriter writer(filename); + writer(iter.second.data, iter.second.size); + + meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + filename_list.push_back(filename); + ++fileno; + } + + BinarySet load_data_list; + for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + auto bin_size = meta_list[i].second; + FileIOReader reader(filename_list[i]); + std::vector load_data(bin_size); + reader(load_data.data(), bin_size); + load_data_list.Append(meta_list[i].first, load_data); + } + + + res_ids.clear(); + res_ids.resize(nq * k); + auto new_index = GetVecIndexFactory(index_type); + new_index->Load(load_data_list); + new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + } + + delete[] D; } diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp new file mode 100644 index 0000000000..e228ae001d --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.cpp @@ -0,0 +1,81 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "utils.h" + + +DataGenPtr GetGenerateFactory(const std::string &gen_type) { + std::shared_ptr generator; + if (gen_type == "default") { + generator = std::make_shared(); + } + return generator; +} + +void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, + float *xb, float *xq, long *ids, + const int &k, long *gt_ids) { + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < dim; ++j) { + //p_data[i * d + j] = float(base + i); + xb[i * dim + j] = drand48(); + } + xb[dim * i] += i / 1000.; + ids[i] = i; + } + for (size_t i = 0; i < nq * dim; ++i) { + xq[i] = xb[i]; + } + + faiss::IndexFlatL2 index(dim); + //index.add_with_ids(nb, xb, ids); + index.add(nb, xb); + float *D = new float[k * nq]; + index.search(nq, xq, k, D, gt_ids); +} + +void DataGenBase::GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids) { + xb.resize(nb * dim); + xq.resize(nq * dim); + ids.resize(nb); + gt_ids.resize(nq * k); + GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); +} + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h new file mode 100644 index 0000000000..bbc52a011b --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.h @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include +#include + + +class DataGenBase; + +using DataGenPtr = std::shared_ptr; + +extern DataGenPtr GetGenerateFactory(const std::string &gen_type); + + +class DataGenBase { + public: + virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids); + + virtual void GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids); +}; + + +class SanityCheck : public DataGenBase { + public: + void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids) override; +}; + +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); +}; From 0224845fb3838301641269634e8fd48116b6af44 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 2 Jul 2019 18:37:33 +0800 Subject: [PATCH 4/7] 1. update wrapper and wrapper-test Former-commit-id: 73af1a5b254676e9d4c6360b4f4cb8f79cc47514 --- cpp/src/wrapper/knowhere/data_transfer.cpp | 48 +++++ cpp/src/wrapper/knowhere/data_transfer.h | 35 ++-- cpp/src/wrapper/knowhere/vec_impl.cpp | 25 +-- cpp/src/wrapper/knowhere/vec_impl.h | 3 +- cpp/src/wrapper/knowhere/vec_index.cpp | 18 +- cpp/src/wrapper/knowhere/vec_index.h | 8 +- cpp/thirdparty/knowhere | 2 +- cpp/unittest/index_wrapper/CMakeLists.txt | 6 +- cpp/unittest/index_wrapper/knowhere_test.cpp | 200 ++++++++++++------- cpp/unittest/index_wrapper/utils.cpp | 81 ++++++++ cpp/unittest/index_wrapper/utils.h | 61 ++++++ 11 files changed, 369 insertions(+), 118 deletions(-) create mode 100644 cpp/src/wrapper/knowhere/data_transfer.cpp create mode 100644 cpp/unittest/index_wrapper/utils.cpp create mode 100644 cpp/unittest/index_wrapper/utils.h diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp new file mode 100644 index 0000000000..af5ad212e4 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -0,0 +1,48 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); + std::vector arrays{id_array}; + std::vector array_fields{ConstructInt64Field("id")}; + auto array_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(arrays), array_schema, + std::move(tensors), tensor_schema); + return dataset; +} + +DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(tensors), tensor_schema); + return dataset; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h index e5a9402cff..c99cd1c742 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -6,24 +6,19 @@ #pragma once -#define GENDATASET(n,d,xb,ids)\ - size_t elems = (n) * (d);\ - std::vector shape{n, d};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto id_array = ConstructInt64Array((uint8_t *) (ids), (n) * sizeof(int64_t));\ - std::vector arrays{id_array};\ - std::vector array_fields{ConstructInt64Field("id")};\ - auto array_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema);\ +#include "knowhere/adapter/structure.h" -#define GENQUERYDATASET(n,d,xb)\ - size_t elems = (n) * (d);\ - std::vector shape{(n), (d)};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(tensors), tensor_schema);\ + +namespace zilliz { +namespace vecwise { +namespace engine { + +extern zilliz::knowhere::DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids); + +extern zilliz::knowhere::DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb); + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index bcb537fda2..e24d470acc 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -13,11 +13,6 @@ #include "vec_impl.h" #include "data_transfer.h" -//using Index = zilliz::knowhere::Index; -//using IndexModel = zilliz::knowhere::IndexModel; -//using IndexType = zilliz::knowhere::IndexType; -//using IndexPtr = std::shared_ptr; -//using IndexModelPtr = std::shared_ptr; namespace zilliz { namespace vecwise { @@ -31,24 +26,21 @@ void VecIndexImpl::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { - using namespace zilliz::knowhere; - auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); - Config train_cfg; - Config add_cfg; - Config search_cfg; + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); auto model = index_->Train(dataset, cfg); index_->set_index_model(model); - index_->Add(dataset, add_cfg); + index_->Add(dataset, cfg); } void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { - // TODO: Assert index is trained; + // TODO(linxj): Assert index is trained; auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); index_->Add(dataset, cfg); } @@ -58,12 +50,13 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id auto d = cfg["dim"].as(); auto k = cfg["k"].as(); - GENQUERYDATASET(nq, d, xq) + auto dataset = GenDataset(nq, d, xq); Config search_cfg; auto res = index_->Search(dataset, cfg); auto ids_array = res->array()[0]; auto dis_array = res->array()[1]; + //{ // auto& ids = ids_array; // auto& dists = dis_array; @@ -81,10 +74,10 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id // std::cout << "dist\n" << ss_dist.str() << std::endl; //} - // TODO: deep copy here. auto p_ids = ids_array->data()->GetValues(1, 0); auto p_dist = ids_array->data()->GetValues(1, 0); + // TODO(linxj): avoid copy here. memcpy(ids, p_ids, sizeof(int64_t) * nq * k); memcpy(dist, p_dist, sizeof(float) * nq * k); } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index f144dc43a2..25f7d16548 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -7,6 +7,7 @@ #pragma once #include "knowhere/index/vector_index/vector_index.h" + #include "vec_index.h" @@ -16,7 +17,7 @@ namespace engine { class VecIndexImpl : public VecIndex { public: - explicit VecIndexImpl(std::shared_ptr index):index_(std::move(index)){}; + explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; void BuildAll(const long &nb, const float *xb, const long *ids, diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 1365836fd9..171388d0af 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -5,6 +5,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "knowhere/index/vector_index/ivf.h" #include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" #include "vec_index.h" #include "vec_impl.h" @@ -14,18 +15,29 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): index_type => enum struct VecIndexPtr GetVecIndexFactory(const std::string &index_type) { std::shared_ptr index; if (index_type == "IVF") { index = std::make_shared(); } else if (index_type == "GPUIVF") { - index = std::make_shared(); + index = std::make_shared(0); + } else if (index_type == "SPTAG") { + index = std::make_shared(); } - auto ret_index = std::make_shared(index); - //return std::static_pointer_cast(std::make_shared(index)); + // TODO(linxj): Support NSG + //else if (index_type == "NSG") { + // index = std::make_shared(); + //} return std::make_shared(index); } +VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { + auto index = GetVecIndexFactory(index_type); + index->Load(index_binary); + return index; +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index 3094eb336b..b03c43a36b 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -17,6 +17,7 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): jsoncons => rapidjson or other. using Config = zilliz::knowhere::Config; class VecIndex { @@ -31,13 +32,13 @@ class VecIndex { virtual void Add(const long &nb, const float *xb, const long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual void Search(const long &nq, const float *xq, float *dist, long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual zilliz::knowhere::BinarySet Serialize() = 0; @@ -48,8 +49,7 @@ using VecIndexPtr = std::shared_ptr; extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); -// TODO -extern VecIndexPtr LoadVecIndex(const zilliz::knowhere::BinarySet &index_binary); +extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); } } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 291b3b4226..32187bacba 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 291b3b422664f2509bab79d5cc63823dedbe903c +Subproject commit 32187bacbaac0460676f5f6aa54ad904f5f2b5bc diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt index 5c4b71227c..51bd97b575 100644 --- a/cpp/unittest/index_wrapper/CMakeLists.txt +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -3,6 +3,9 @@ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) +set(helper + utils.cpp) + set(knowhere_libs knowhere SPTAGLibStatic @@ -11,9 +14,10 @@ set(knowhere_libs faiss openblas lapack + tbb cudart cublas ) -add_executable(knowhere_test knowhere_test.cpp ${knowhere_src}) +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index caf287340e..58b0d5a4b2 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -8,86 +8,142 @@ #include +#include "utils.h" + + using namespace zilliz::vecwise::engine; +using namespace zilliz::knowhere; -TEST(knowhere_test, ivf_test) { - auto d = 128; - auto nt = 1000; - auto nb = 10000; - auto nq = 10; - //{ - // std::vector xb; - // std::vector xt; - // std::vector xq; - // std::vector ids; - // - // //prepare train data - // std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - // std::random_device rd; - // std::mt19937 gen(rd()); - // xt.resize(nt*d); - // for (size_t i = 0; i < nt * d; i++) { - // xt[i] = dis_xt(gen); - // } - // xb.resize(nb*d); - // ids.resize(nb); - // for (size_t i = 0; i < nb * d; i++) { - // xb[i] = dis_xt(gen); - // if (i < nb) { - // ids[i] = i; - // } - // } - // xq.resize(nq*d); - // for (size_t i = 0; i < nq * d; i++) { - // xq[i] = dis_xt(gen); - // } - //} +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::Combine; - auto elems = nb * d; - auto p_data = (float *) malloc(elems * sizeof(float)); - auto p_id = (int64_t *) malloc(elems * sizeof(int64_t)); - assert(p_data != nullptr && p_id != nullptr); - for (auto i = 0; i < nb; ++i) { - for (auto j = 0; j < d; ++j) { - p_data[i * d + j] = drand48(); - } - p_data[d * i] += i / 1000.; - p_id[i] = i; +class KnowhereWrapperTest + : public TestWithParam<::std::tuple> { + protected: + void SetUp() override { + std::string generator_type; + std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam(); + + //auto generator = GetGenerateFactory(generator_type); + auto generator = std::make_shared(); + generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); + + index_ = GetVecIndexFactory(index_type); } - auto q_elems = nq * d; - auto q_data = (float *) malloc(q_elems * sizeof(float)); + protected: + std::string index_type; + Config train_cfg; + Config search_cfg; - for (auto i = 0; i < nq; ++i) { - for (auto j = 0; j < d; ++j) { - q_data[i * d + j] = drand48(); - } - q_data[d * i] += i / 1000.; + int dim = 64; + int nb = 10000; + int nq = 10; + int k = 10; + std::vector xb; + std::vector xq; + std::vector ids; + + VecIndexPtr index_ = nullptr; + + // Ground Truth + std::vector gt_ids; +}; + +INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, + Values( + // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] + std::make_tuple("IVF", "Default", + 64, 10000, 10, 10, + Config::object{{"nlist", 100}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} + ), + std::make_tuple("SPTAG", "Default", + 64, 10000, 10, 10, + Config::object{{"TPTNumber", 1}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}} + ) + ) +); + +void AssertAnns(const std::vector >, + const std::vector &res, + const int &nq, + const int &k) { + EXPECT_EQ(res.size(), nq * k); + + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(gt[i * k], res[i * k]); } - Config build_cfg = Config::object{ - {"dim", d}, - {"nlist", 100}, - }; + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (gt[i * nq + j] == res[i * nq + l]) match++; + } + } + } - auto k = 10; - Config search_cfg = Config::object{ - {"dim", d}, - {"k", k}, - }; - - std::vector ret_dist(nq*k); - std::vector ret_ids(nq*k); - - const std::string& index_type = "IVF"; - auto index = GetVecIndexFactory(index_type); - index->BuildAll(nb, p_data, p_id, build_cfg); - - auto add_bin = index->Serialize(); - index->Load(add_bin); - - index->Search(nq, q_data, ret_dist.data(), ret_ids.data(), search_cfg); - - std::cout << "he"; + // TODO(linxj): percision check + EXPECT_GT(float(match/nq*k), 0.5); +} + +TEST_P(KnowhereWrapperTest, base_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + delete[] D; +} + +TEST_P(KnowhereWrapperTest, serialize_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + + { + auto binaryset = index_->Serialize(); + int fileno = 0; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + std::vector filename_list; + std::vector> meta_list; + for (auto &iter: binaryset.binary_map_) { + const std::string &filename = base_name + std::to_string(fileno); + FileIOWriter writer(filename); + writer(iter.second.data, iter.second.size); + + meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + filename_list.push_back(filename); + ++fileno; + } + + BinarySet load_data_list; + for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + auto bin_size = meta_list[i].second; + FileIOReader reader(filename_list[i]); + std::vector load_data(bin_size); + reader(load_data.data(), bin_size); + load_data_list.Append(meta_list[i].first, load_data); + } + + + res_ids.clear(); + res_ids.resize(nq * k); + auto new_index = GetVecIndexFactory(index_type); + new_index->Load(load_data_list); + new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + } + + delete[] D; } diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp new file mode 100644 index 0000000000..e228ae001d --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.cpp @@ -0,0 +1,81 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "utils.h" + + +DataGenPtr GetGenerateFactory(const std::string &gen_type) { + std::shared_ptr generator; + if (gen_type == "default") { + generator = std::make_shared(); + } + return generator; +} + +void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, + float *xb, float *xq, long *ids, + const int &k, long *gt_ids) { + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < dim; ++j) { + //p_data[i * d + j] = float(base + i); + xb[i * dim + j] = drand48(); + } + xb[dim * i] += i / 1000.; + ids[i] = i; + } + for (size_t i = 0; i < nq * dim; ++i) { + xq[i] = xb[i]; + } + + faiss::IndexFlatL2 index(dim); + //index.add_with_ids(nb, xb, ids); + index.add(nb, xb); + float *D = new float[k * nq]; + index.search(nq, xq, k, D, gt_ids); +} + +void DataGenBase::GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids) { + xb.resize(nb * dim); + xq.resize(nq * dim); + ids.resize(nb); + gt_ids.resize(nq * k); + GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); +} + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h new file mode 100644 index 0000000000..bbc52a011b --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.h @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include +#include + + +class DataGenBase; + +using DataGenPtr = std::shared_ptr; + +extern DataGenPtr GetGenerateFactory(const std::string &gen_type); + + +class DataGenBase { + public: + virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids); + + virtual void GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids); +}; + + +class SanityCheck : public DataGenBase { + public: + void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids) override; +}; + +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); +}; From 3d9a0df2bda8434b708656d46ff968616a03fb63 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 2 Jul 2019 19:58:15 +0800 Subject: [PATCH 5/7] update README.md Former-commit-id: e28550bbbf254514b93e318a7aa8798274e5e55d --- cpp/README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/README.md b/cpp/README.md index c54797b03f..d13e6b7fcd 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -1,13 +1,12 @@ ### Compilation #### Step 1: install necessery tools - Install MySQL centos7 : - yum install gfortran qt4 flex bison mysql-devel + yum install gfortran qt4 flex bison mysql-devel mysql ubuntu16.04 : - sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev + sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link: @@ -56,10 +55,10 @@ If you encounter the following error when building: ### Launch server Set config in cpp/conf/server_config.yaml -Add milvus/bin/lib to LD_LIBRARY_PATH +Add milvus/lib to LD_LIBRARY_PATH ``` -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/bin/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib ``` Then launch server with config: From 00acbd9ff62dde53858451e74f4376f213867698 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 3 Jul 2019 11:50:47 +0800 Subject: [PATCH 6/7] update R Former-commit-id: 668ee056c2b0de6a56465be8fb78f910935c0e5c --- cpp/src/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index b4d50d4822..1a770ef2ad 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -72,7 +72,6 @@ set(third_party_libs arrow jemalloc_pic faiss -# libgpufaiss.a openblas lapack easyloggingpp @@ -105,8 +104,6 @@ if (GPU_VERSION STREQUAL "ON") pthread libgomp.a libgfortran.a -# cudart -# cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() From 88110a0d65d7d2d7956b95f49d6f0b45d5bf83c6 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 3 Jul 2019 11:53:42 +0800 Subject: [PATCH 7/7] update ... Former-commit-id: 6b721737d082237cd84128228d187f7b0a336d9b --- cpp/cmake/DefineOptions.cmake | 3 --- cpp/src/CMakeLists.txt | 1 - 2 files changed, 4 deletions(-) diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index 1873688a17..82259d2eb5 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -72,9 +72,6 @@ define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF) define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF) -#define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against" -# "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61") - define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF) define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 1a770ef2ad..6a7fb6835e 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -137,7 +137,6 @@ endif () cuda_add_library(milvus_engine STATIC ${engine_files}) -#cuda_add_library(milvus_engine SHARED ${engine_files}) target_link_libraries(milvus_engine ${engine_libs} ${third_party_libs}) add_library(metrics STATIC ${metrics_files})