From d36afb4ae98dc9478a5de4ddd3cb1a008886c719 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 26 Jun 2019 17:42:28 +0800 Subject: [PATCH 01/19] 1. update Cmakemodule to support autodownload knowhere 2. add basic knowhere wrapper and unittest Former-commit-id: ed32f9e851e014272bbd37f1554c3541ad0740bf --- cpp/cmake/DefineOptions.cmake | 10 +- cpp/cmake/ThirdPartyPackages.cmake | 57 +++++++++++ cpp/src/CMakeLists.txt | 21 ++-- cpp/src/wrapper/knowhere/data_transfer.h | 29 ++++++ cpp/src/wrapper/knowhere/vec_impl.cpp | 102 +++++++++++++++++++ cpp/src/wrapper/knowhere/vec_impl.h | 37 +++++++ cpp/src/wrapper/knowhere/vec_index.cpp | 31 ++++++ cpp/src/wrapper/knowhere/vec_index.h | 56 ++++++++++ cpp/unittest/CMakeLists.txt | 3 +- cpp/unittest/index_wrapper/CMakeLists.txt | 19 ++++ cpp/unittest/index_wrapper/knowhere_test.cpp | 93 +++++++++++++++++ 11 files changed, 447 insertions(+), 11 deletions(-) create mode 100644 cpp/src/wrapper/knowhere/data_transfer.h create mode 100644 cpp/src/wrapper/knowhere/vec_impl.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_impl.h create mode 100644 cpp/src/wrapper/knowhere/vec_index.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_index.h create mode 100644 cpp/unittest/index_wrapper/CMakeLists.txt create mode 100644 cpp/unittest/index_wrapper/knowhere_test.cpp diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index d95e7c7ed1..5e9cdd6f9b 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -68,20 +68,20 @@ define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON) define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON) -define_option(MILVUS_WITH_FAISS "Build with FAISS library" ON) +define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF) -define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON) +define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF) #define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against" # "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61") -define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" ON) +define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF) define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF) -define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" ON) +define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" OFF) define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) @@ -99,6 +99,8 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) +define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" ON) + if(CMAKE_VERSION VERSION_LESS 3.7) set(MILVUS_WITH_ZSTD_DEFAULT OFF) else() diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index cb5f3532fe..72902d5219 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -23,6 +23,7 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES Easylogging++ FAISS GTest + Knowhere JSONCONS LAPACK Lz4 @@ -58,6 +59,8 @@ macro(build_dependency DEPENDENCY_NAME) build_faiss() elseif("${DEPENDENCY_NAME}" STREQUAL "LAPACK") build_lapack() + elseif("${DEPENDENCY_NAME}" STREQUAL "Knowhere") + build_knowhere() elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4") build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest") @@ -239,6 +242,12 @@ else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() +if(DEFINED ENV{MILVUS_KNOWHERE_URL}) + set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") +else() + set(KNOWHERE_SOURCE_URL "${CMAKE_SOURCE_DIR}/thirdparty/knowhere") +endif() + if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () @@ -632,6 +641,54 @@ if(MILVUS_WITH_BZ2) include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") endif() +# ---------------------------------------------------------------------- +# Knowhere + +macro(build_knowhere) + message(STATUS "Building knowhere from source") + set(KNOWHERE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep") + set(KNOWHERE_INCLUDE_DIR "${KNOWHERE_PREFIX}/include") + set(KNOWHERE_STATIC_LIB + "${KNOWHERE_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}knowhere${CMAKE_STATIC_LIBRARY_SUFFIX}") + + set(KNOWHERE_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${KNOWHERE_PREFIX}" + -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + -DCMAKE_BUILD_TYPE=Release) + + externalproject_add(knowhere_ep + URL + ${KNOWHERE_SOURCE_URL} + ${EP_LOG_OPTIONS} + CMAKE_ARGS + ${KNOWHERE_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + ${KNOWHERE_STATIC_LIB}) + + file(MAKE_DIRECTORY "${KNOWHERE_INCLUDE_DIR}") + add_library(knowhere STATIC IMPORTED) + set_target_properties( + knowhere + PROPERTIES IMPORTED_LOCATION "${KNOWHERE_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${KNOWHERE_INCLUDE_DIR}") + + add_dependencies(knowhere knowhere_ep) +endmacro() + +if(MILVUS_WITH_KNOWHERE) + resolve_dependency(Knowhere) + + get_target_property(KNOWHERE_INCLUDE_DIR knowhere INTERFACE_INCLUDE_DIRECTORIES) + link_directories(SYSTEM "${KNOWHERE_PREFIX}/lib") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}/SPTAG/AnnService") +endif() + # ---------------------------------------------------------------------- # Easylogging++ diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index e00420b2d1..c0bb8334a4 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -12,6 +12,7 @@ aux_source_directory(utils utils_files) aux_source_directory(db db_files) aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) +aux_source_directory(wrapper/knowhere knowhere_files) aux_source_directory(db/scheduler scheduler_files) aux_source_directory(db/scheduler/context scheduler_context_files) @@ -50,6 +51,7 @@ set(engine_files ${wrapper_files} # metrics/Metrics.cpp ${metrics_files} + ${knowhere_files} ) set(get_sys_info_files @@ -64,14 +66,18 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") include_directories(thrift/gen-cpp) set(third_party_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss +# libgpufaiss.a + openblas + lapack easyloggingpp sqlite thrift yaml-cpp - libgpufaiss.a - faiss - lapack - openblas prometheus-cpp-push prometheus-cpp-pull prometheus-cpp-core @@ -83,6 +89,8 @@ set(third_party_libs snappy zlib zstd + cudart + cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) if (MEGASEARCH_WITH_ARROW STREQUAL "ON") @@ -95,8 +103,8 @@ if (GPU_VERSION STREQUAL "ON") pthread libgomp.a libgfortran.a - cudart - cublas +# cudart +# cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() @@ -130,6 +138,7 @@ endif () cuda_add_library(milvus_engine STATIC ${engine_files}) +#cuda_add_library(milvus_engine SHARED ${engine_files}) target_link_libraries(milvus_engine ${engine_libs} ${third_party_libs}) add_library(metrics STATIC ${metrics_files}) diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h new file mode 100644 index 0000000000..e5a9402cff --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -0,0 +1,29 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#define GENDATASET(n,d,xb,ids)\ + size_t elems = (n) * (d);\ + std::vector shape{n, d};\ + auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ + std::vector tensors{tensor};\ + std::vector tensor_fields{ConstructFloatField("data")};\ + auto tensor_schema = std::make_shared(tensor_fields);\ + auto id_array = ConstructInt64Array((uint8_t *) (ids), (n) * sizeof(int64_t));\ + std::vector arrays{id_array};\ + std::vector array_fields{ConstructInt64Field("id")};\ + auto array_schema = std::make_shared(tensor_fields);\ + auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema);\ + +#define GENQUERYDATASET(n,d,xb)\ + size_t elems = (n) * (d);\ + std::vector shape{(n), (d)};\ + auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ + std::vector tensors{tensor};\ + std::vector tensor_fields{ConstructFloatField("data")};\ + auto tensor_schema = std::make_shared(tensor_fields);\ + auto dataset = std::make_shared(std::move(tensors), tensor_schema);\ diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp new file mode 100644 index 0000000000..bcb537fda2 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -0,0 +1,102 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "knowhere/index/index.h" +#include "knowhere/index/index_model.h" +#include "knowhere/index/index_type.h" +#include "knowhere/adapter/sptag.h" +#include "knowhere/common/tensor.h" + +#include "vec_impl.h" +#include "data_transfer.h" + +//using Index = zilliz::knowhere::Index; +//using IndexModel = zilliz::knowhere::IndexModel; +//using IndexType = zilliz::knowhere::IndexType; +//using IndexPtr = std::shared_ptr; +//using IndexModelPtr = std::shared_ptr; + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +void VecIndexImpl::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + using namespace zilliz::knowhere; + + auto d = cfg["dim"].as(); + GENDATASET(nb, d, xb, ids) + + Config train_cfg; + Config add_cfg; + Config search_cfg; + auto model = index_->Train(dataset, cfg); + index_->set_index_model(model); + index_->Add(dataset, add_cfg); +} + +void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { + // TODO: Assert index is trained; + + auto d = cfg["dim"].as(); + GENDATASET(nb, d, xb, ids) + + index_->Add(dataset, cfg); +} + +void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { + // TODO: Assert index is trained; + + auto d = cfg["dim"].as(); + auto k = cfg["k"].as(); + GENQUERYDATASET(nq, d, xq) + + Config search_cfg; + auto res = index_->Search(dataset, cfg); + auto ids_array = res->array()[0]; + auto dis_array = res->array()[1]; + //{ + // auto& ids = ids_array; + // auto& dists = dis_array; + // std::stringstream ss_id; + // std::stringstream ss_dist; + // for (auto i = 0; i < 10; i++) { + // for (auto j = 0; j < k; ++j) { + // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; + // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; + // } + // ss_id << std::endl; + // ss_dist << std::endl; + // } + // std::cout << "id\n" << ss_id.str() << std::endl; + // std::cout << "dist\n" << ss_dist.str() << std::endl; + //} + + // TODO: deep copy here. + auto p_ids = ids_array->data()->GetValues(1, 0); + auto p_dist = ids_array->data()->GetValues(1, 0); + + memcpy(ids, p_ids, sizeof(int64_t) * nq * k); + memcpy(dist, p_dist, sizeof(float) * nq * k); +} + +zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { + return index_->Serialize(); +} + +void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { + index_->Load(index_binary); +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h new file mode 100644 index 0000000000..f144dc43a2 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -0,0 +1,37 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "knowhere/index/vector_index/vector_index.h" +#include "vec_index.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +class VecIndexImpl : public VecIndex { + public: + explicit VecIndexImpl(std::shared_ptr index):index_(std::move(index)){}; + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; + void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; + zilliz::knowhere::BinarySet Serialize() override; + void Load(const zilliz::knowhere::BinarySet &index_binary) override; + void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; + + private: + std::shared_ptr index_ = nullptr; +}; + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp new file mode 100644 index 0000000000..1365836fd9 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -0,0 +1,31 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/gpu_ivf.h" + +#include "vec_index.h" +#include "vec_impl.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +VecIndexPtr GetVecIndexFactory(const std::string &index_type) { + std::shared_ptr index; + if (index_type == "IVF") { + index = std::make_shared(); + } else if (index_type == "GPUIVF") { + index = std::make_shared(); + } + auto ret_index = std::make_shared(index); + //return std::static_pointer_cast(std::make_shared(index)); + return std::make_shared(index); +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h new file mode 100644 index 0000000000..3094eb336b --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -0,0 +1,56 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include + +#include "knowhere/common/config.h" +#include "knowhere/common/binary_set.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using Config = zilliz::knowhere::Config; + +class VecIndex { + public: + virtual void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt = 0, + const float *xt = nullptr) = 0; + + virtual void Add(const long &nb, + const float *xb, + const long *ids, + const Config &cfg) = 0; + + virtual void Search(const long &nq, + const float *xq, + float *dist, + long *ids, + const Config &cfg) = 0; + + virtual zilliz::knowhere::BinarySet Serialize() = 0; + + virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; +}; + +using VecIndexPtr = std::shared_ptr; + +extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); + +// TODO +extern VecIndexPtr LoadVecIndex(const zilliz::knowhere::BinarySet &index_binary); + +} +} +} diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 2e2b1f91b6..0fd521357d 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -45,7 +45,8 @@ set(unittest_libs add_subdirectory(server) add_subdirectory(db) -add_subdirectory(faiss_wrapper) +#add_subdirectory(faiss_wrapper) +add_subdirectory(index_wrapper) add_subdirectory(license) add_subdirectory(metrics) add_subdirectory(storage) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt new file mode 100644 index 0000000000..5c4b71227c --- /dev/null +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -0,0 +1,19 @@ +include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") +link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) + +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + cudart + cublas + ) + +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src}) +target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp new file mode 100644 index 0000000000..caf287340e --- /dev/null +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -0,0 +1,93 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include + +using namespace zilliz::vecwise::engine; + +TEST(knowhere_test, ivf_test) { + auto d = 128; + auto nt = 1000; + auto nb = 10000; + auto nq = 10; + //{ + // std::vector xb; + // std::vector xt; + // std::vector xq; + // std::vector ids; + // + // //prepare train data + // std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + // std::random_device rd; + // std::mt19937 gen(rd()); + // xt.resize(nt*d); + // for (size_t i = 0; i < nt * d; i++) { + // xt[i] = dis_xt(gen); + // } + // xb.resize(nb*d); + // ids.resize(nb); + // for (size_t i = 0; i < nb * d; i++) { + // xb[i] = dis_xt(gen); + // if (i < nb) { + // ids[i] = i; + // } + // } + // xq.resize(nq*d); + // for (size_t i = 0; i < nq * d; i++) { + // xq[i] = dis_xt(gen); + // } + //} + + auto elems = nb * d; + auto p_data = (float *) malloc(elems * sizeof(float)); + auto p_id = (int64_t *) malloc(elems * sizeof(int64_t)); + assert(p_data != nullptr && p_id != nullptr); + + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < d; ++j) { + p_data[i * d + j] = drand48(); + } + p_data[d * i] += i / 1000.; + p_id[i] = i; + } + + auto q_elems = nq * d; + auto q_data = (float *) malloc(q_elems * sizeof(float)); + + for (auto i = 0; i < nq; ++i) { + for (auto j = 0; j < d; ++j) { + q_data[i * d + j] = drand48(); + } + q_data[d * i] += i / 1000.; + } + + Config build_cfg = Config::object{ + {"dim", d}, + {"nlist", 100}, + }; + + auto k = 10; + Config search_cfg = Config::object{ + {"dim", d}, + {"k", k}, + }; + + std::vector ret_dist(nq*k); + std::vector ret_ids(nq*k); + + const std::string& index_type = "IVF"; + auto index = GetVecIndexFactory(index_type); + index->BuildAll(nb, p_data, p_id, build_cfg); + + auto add_bin = index->Serialize(); + index->Load(add_bin); + + index->Search(nq, q_data, ret_dist.data(), ret_ids.data(), search_cfg); + + std::cout << "he"; +} From a8b068db5770b28639025ed2b7a3f460b449b4db Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 26 Jun 2019 19:14:39 +0800 Subject: [PATCH 02/19] 1. git add submodule knowhere and set default branch 2. Update README.md about how to use git submodule Former-commit-id: fd996485d8ef0b09055c4f3e8c20838ce9fd95ad --- .gitmodules | 4 ++++ cpp/README.md | 3 +++ cpp/thirdparty/knowhere | 1 + 3 files changed, 8 insertions(+) create mode 100644 .gitmodules create mode 160000 cpp/thirdparty/knowhere diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..297cf0e592 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "cpp/thirdparty/knowhere"] + path = cpp/thirdparty/knowhere + url = git@192.168.1.105:xiaojun.lin/knowhere.git + branch = develop diff --git a/cpp/README.md b/cpp/README.md index 0c7706df23..d7297aecb0 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -12,6 +12,9 @@ cmake_build/src/milvus_server is the server cmake_build/src/libmilvus_engine.a is the static library + git submodule init + git submodule update + cd [sourcecode path]/cpp ./build.sh -t Debug ./build.sh -t Release diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere new file mode 160000 index 0000000000..291b3b4226 --- /dev/null +++ b/cpp/thirdparty/knowhere @@ -0,0 +1 @@ +Subproject commit 291b3b422664f2509bab79d5cc63823dedbe903c From f30c3273866ed2d036f304b16dfac8c4543d953a Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Tue, 2 Jul 2019 15:03:21 +0800 Subject: [PATCH 03/19] Update faiss parameter Former-commit-id: a1ae1ac1bc963c18ac8a5281f8aa07ecba50eb94 --- cpp/src/wrapper/Index.cpp | 4 ++++ cpp/src/wrapper/Operand.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index 35dd646fdf..c306b65e1a 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -13,6 +13,7 @@ #include "Index.h" #include "faiss/index_io.h" +#include "faiss/IndexIVF.h" namespace zilliz { namespace milvus { @@ -55,6 +56,9 @@ bool Index::add_with_ids(idx_t n, const float *xdata, const long *xids) { bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const { try { + if(auto ivf_index = std::dynamic_pointer_cast(index_)) { + ivf_index->nprobe = 100; + } index_->search(n, data, k, distances, labels); } catch (std::exception &e) { diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp index 25341676a6..30e31067fd 100644 --- a/cpp/src/wrapper/Operand.cpp +++ b/cpp/src/wrapper/Operand.cpp @@ -39,7 +39,7 @@ string Operand::get_index_type(const int &nb) { } case IVF: { index_str += (ncent != 0 ? index_type + std::to_string(ncent) : - index_type + std::to_string(int(nb / 1000000.0 * 16384))); + index_type + std::to_string(int(nb / 1000000.0 * 1638))); break; } case IDMAP: { From 5546de4586a633ae2e1bf2e4df6ca4977b512f58 Mon Sep 17 00:00:00 2001 From: jinhai Date: Tue, 2 Jul 2019 18:17:39 +0800 Subject: [PATCH 04/19] Add Milvus EULA Former-commit-id: 80422eb3dcd1b7eaa64b369a4e558aba968ee4da --- cpp/CMakeLists.txt | 7 ++- cpp/Milvus-EULA-cn.md | 119 ++++++++++++++++++++++++++++++++++++++ cpp/Milvus-EULA-en.md | 129 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 cpp/Milvus-EULA-cn.md create mode 100644 cpp/Milvus-EULA-en.md diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 947759f793..4d8f43b5b9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -152,7 +152,12 @@ install(FILES conf/log_config.conf DESTINATION conf) - +install(FILES + ./Milvus-EULA-cn.md + ./Milvus-EULA-en.md + DESTINATION + license + ) config_summary() diff --git a/cpp/Milvus-EULA-cn.md b/cpp/Milvus-EULA-cn.md new file mode 100644 index 0000000000..e9a25dcc6c --- /dev/null +++ b/cpp/Milvus-EULA-cn.md @@ -0,0 +1,119 @@ +# **Milvus**终端用户授权许可条款及条件 + +#### 2019-06-30 版 + + + +本条款和条件(下称“本协议”)适用于使用由上海赜睿信息科技有限公司(下称“**ZILLIZ**”)所提供的Milvus产品(参见如下定义) 的用户。 + +**请仔细阅读如下条款:** + +**若您(下称“您”或“用户”)代表某公司或者其他机构使用任何产品时, 您特此陈述您作为该公司或该等其他机构的员工或代理,您有权代表该公司或该等其他机构接受本协议项下所要求的全部条款和条件。** + +**若使用任何产品,您知晓并同意:** + +**(A)您已阅读本协议中所有的条款和条件;** + +**(B)您已理解本协议中所有的条款和条件;** + +**(C)您已同意本协议中所有条款和条件对您具有法律约束力。** + +**如果您不同意本协议所述条款和条件中的任意内容,则可以选择不使用产品的任何部分。** + +**本协议的“生效日期”是指您第一次下载任何产品的日期。** + +1. **产品**,指本协议项下任何 **ZILLIZ** 的Milvus产品和软件,包括: Milvus向量检索数据库Docker版与其相关的升级、更新、故障修复或修改版本(统称“更新软件”)。无论本协议是否另有规定: + + (a)仅Milvus向量检索数据库Docker版是免费授权用户的版本,且ZILLIZ保留收回该授权的权力; + + (b)任何使用或者试用Milvus向量检索数据库Docker版的个人与组织,需要通过support@zilliz.com向ZILLIZ告知个人或者组织的身份、联系方式以及使用Milvus的目的。 + + (c)制作和使用额外的副本仅限于必要的备份目的。 + +2. **全部协议**,本协议包括本授权许可条款及条件以及任何[Milvus官方网站](https://milvus.io)展示或者网页链接所附或引用的全部条款。本协议是双方就相关事项达成的完整协议,取代 **ZILLIZ** 与用户之间就本条款相关事项所达成的其他任何协议,无论是口头的还是书面的。 + +3. **使用许可**,**ZILLIZ** 授予用户非排他性的、不可转让的、非可再授权的、可撤回的和有限的许可进行访问和使用第1条所定义的产品,该访问和使用许可仅限于用户内部使用之目的。通过电子下载或其他经许可的来源获得产品的用户均应受限于本协议的内容。 + +4. **许可限制**,除非本协议另有明文规定,否则用户将不被允许: + + (a)修改、翻译或制造产品的衍生作品; + + (b)反向编译、反向工程、破解产品的任何部分或试图发现有关产品的任何源代码、基本理念或运算方法; (c)销售、分派、再授权、出租、出借、出质、提供或另行翻译全部或部分产品; + + (d)制造、获取非法制造的、再版或复制产品; + + (e)删除或更改与产品相关联的任何商标、标志、版权或其他专有标 ; + + (f)不得在没有 **ZILLIZ** 明确书面授权的情况下,使用或许可他人使用产品为第三方提供服务,无论是在产品服务过程中使用或采用分时的方式; + + (g)引起或许可任何其他方进行上述任何一种禁止行为。 + +5. **所有权**,**ZILLIZ** 和用户在本协议项下的许可需明确,**ZILLIZ** 有以下各项的全部权利、所有权和相关利益:(a)产品(包括但不限于,任何更新软件、修订版本或其衍生作品); + + (b)在 **ZILLIZ** 根据本协议提供任何服务的过程中或作为其提供服务的结果,由 **ZILLIZ** 发现、 产生或发展出来的所有的概念、发明、发现、改进、信息、创意作品等; + + (c)前述各项所含的任何知识产权权利。在本协议项下,“知识产权”是指在任何管辖区域经申请和注册获得认可和保护的全部专利、版权、道德权利、商标、商业秘密和任何其他形式的权利。**ZILLIZ** 与用户同意,在受限于法律法规规定及本协议全部条款和条件的前提下,用户拥有使用产品而产生的数据的权利、所有权等相关利益。本协议中无任何默示许可,**ZILLIZ** 保留本协议项下未明确授权的全部权利。除非本协议明确约定,**ZILLIZ** 在本协议下未授予用户任何许可权利,无论是通过暗示、默许或其他方式。 + +6. **保密**,保密信息是指,无论是在本协议生效前或生效后,由 **ZILLIZ** 披露给用户的与本协议或与 **ZILLIZ** 相关的所有信息(无论是以口头、书面或其他有形、无形的形式)。保密信息包括但不限于,商业计划的内容、产品、发明、设计图纸、财务计划、计算机程序、发明、用户信息、战略和其他类似信息。在本协议期限内,除非获得明确许可, 用户需保证保密信息的秘密性,并确保不会使用上述保密信息。用户将采用与保护其自身保密信息的同等谨慎程度(不论在何种情况下均不低于合理的谨慎程度)来保护 **ZILLIZ** 的保密信息,来避免使得保密信息被未经授权的使用和披露。保密信息只供用户根据本协议规定使用产品之目的而使用。此外,用户将: + + (a)除非用户为了根据本协议的规定而使用产品之目的外,不得以任何形式复制、使用或披露保密信息; (b)只向为确保用户可根据本协议使用产品而必需知道该保密信息的员工和顾问披露保密信息,前提是上述员工和顾问已签署了包含保密义务不低于本条所述内容的保密协议。 + + 保密信息不包括下列信息: + + (a) 非因用户过错违反本协议导致已进入公共领域可被第三方获取的; + + (b) 用户能合理证明其在通过 **ZILLIZ** 获得之前已知晓的; + + (c)用户能证明没有使用或参考该保密信息而独立获得的; + + (d)用户从其他无披露限制或无保密义务的第三方获得的。如无另行说明,由用户提供给 **ZILLIZ** 有关产品的任何建议、评论或者其他反馈(统称“反馈信息”)将同样构成保密信息。 + + 此外,**ZILLIZ** 有权使用、披露、复制、许可和利用上述反馈信息,而无需承担任何知识产权负担或其他任何形式的义务或限制。根据相关法律法规,与本协议的履行和用户使用 **ZILLIZ** 产品相关的情况下: + + (a)**ZILLIZ** 同意不会要求用户提供任何个人身份信息; + + (b)用户同意不提供任何个人身份信息给 **ZILLIZ**。 + +7. **免责声明**,用户陈述、保证及承诺如下: + + (a)其所有员工和顾问都将遵守本协议的全部条款; + + (b)在履行本协议时将遵守全部可适用的政府部门颁发的法律、法规、规章、命令和其他要求(无论是现行有效还是之后生效的)。 + + 无论本协议是否另有规定,用户将持续对其雇员或顾问的全部作为或不作为承担责任,如同该等作为或不作为系其自身所为。 + + 产品系按照原状或现状提供给用户,不含任何形式的陈述、保证、 承诺或条件。**ZILLIZ** 及其供应商不保证任何产品将无任何故障、错误或漏洞。**ZILLIZ** 和其供应商不为产品的如下内容提供任何陈述和保证(无论是明示或暗示,口头或书面),不论该内容是否依据法律之规定,行业惯例,交易习惯或其他原因而要求的: + + (a)保证适销性; + + (b)保证可适用于任何目的(不论 **ZILLIZ** 是否知晓、应当知晓、被建议或另行得知该目的); + + (c)保证不侵权和拥有全部所有权。用户已明确知悉并同意产品上无任何陈述和保证。此外,鉴于进行入侵和网络攻击的新技术在不断发展,**ZILLIZ** 并不保证产品或产品所使用的系统或网络将免于任何入侵或攻击。 + +8. **损害赔偿**,用户应赔偿、保护或使得 **ZILLIZ** 及其董事、高管、 雇员、供应商、顾问、承包商和代理商(统称为“**ZILLIZ **受保障方”)免受所有现存或潜在的针对 **ZILLIZ** 受保障方因提起请求、诉讼或其他程序而引起的要求其赔偿损害损失、支付费用、罚款、调解、 损失费用等支出(包括但不限于律师费、费用、罚款、利息和垫付款),用户承担上述责任的前提是该请求、诉讼或其他程序,不论是否成功系在如下情况发生时导致、引起的,或以任何形式与下述情况相关: + + (a)任何对本协议的违反(包括但不限于,任何违反用户陈述和保证或约定的情况); + + (b)用户过失或故意产生的过错行为; + + (c)引起争议的数据和信息系在产品的使用过程中产生或收集的。 + +9. **责任限制**,除了 **ZILLIZ** 存在欺诈或故意的过错行为,在任何情况下: + + (a)**ZILLIZ** 都不会赔偿用户或任何第三方的因本协议或产品(包括用户使用或无法使用产品的情况)而遭受的任何利润损失、数 据损失、使用损失、收入损失、商誉损失、任何经营活动的中断,任何其他商业损害或损失,或任何间接的、特殊的、附带的、惩戒性、惩罚性或伴随的损失,不论上述损失系因合同、侵权、严格责任或其他原因而确认的,即使 **ZILLIZ** 已被通知或因其他可能的渠道知晓上述损失发生的可能性; + + (b)**ZILLIZ** 因本协议所需承担的全部赔偿责任不应超过用户已支付或将支付给 **ZILLIZ** 的全部款项总额(若有),多项请求亦不得超过该金额限制。上述限制、排除情况及声明应在相关法律允许的最大范围内得以适用,即便任何补偿无法达到其实质目的。 + +10. **第三方供应商**,产品可能包括由第三方供应商许可提供的软件或其他代码(下称“第三方软件”)。用户已知悉第三方供应商不对产品或其任何部分提供任何陈述和保证,**ZILLIZ** 不承担因产品或用户对第三方软件的使用或不能使用的情况而产生的任何责任。 + +11. **诊断和报告**,用户了解并同意该产品包含诊断功能作为其默认配置。 诊断功能用于收集有关使用环境和产品使用过程中的配置文件、节点数、 软件版本、日志文档和其他信息,并将上述信息报告给 **ZILLIZ** 用于提前识别潜在的支持问题、了解用户的使用环境、并提高产品的使用性能。虽然用户可以选择更改诊断功能来禁用自动定时报告或仅用于报告服务记录,但用户需同意,每季度须至少运行一次诊断功能并将结果报告给**ZILLIZ**。 + +12. **终止**,本协议期限从生效之日起直到 **ZILLIZ** 网站规定的期限终止,除非本协议因用户违反本协议中条款而提前终止。无论本协议是否另有规定,在用户存在违反第3、4、5或7条时,**ZILLIZ**有权立即终止本协议。本协议期满或提前终止时: + + (a)根据本协议所授予给用户的所有权利将立即终止,在此情况下用户应立即停止使用产品; + + (b) 用户应及时将届时仍由其占有的所有保密信息及其副本(包括但不限于产品)交还给 **ZILLIZ**,或根据 **ZILLIZ** 的自行审慎决定及指示, 销毁该等保密信息全部副本,未经 **ZILLIZ** 书面同意,用户不得擅自保留任何由 **ZILLIZ** 提供的保密信息及其副本。 + +13. **第三方资源**, **ZILLIZ** 供应的产品可能包括对其他网站、内容或资源的超链接(下称“第三方资源”),且 **ZILLIZ** 此类产品的正常使用可能依赖于第三方资源的可用性。**ZILLIZ** 无法控制任何第三方资源。用户承认并同意,**ZILLIZ** 不就第三方资源的可用性及安全性承担任何责任,也不对该等第三方资源所涉及的或从其中获得的任何广告、产品或其他材料提供保证。用户承认并同意,**ZILLIZ** 不应因第三方资源的可用性及安全性、或用户依赖于第三方资源所涉及的或从其中获得的任何广告、产品或其他材料的完整性、准确性及存续而可能遭受的损失或损害承担任何责任。 + +14. **其他**,本协议全部内容均在中华人民共和国境内履行,受中华人民共和国法律管辖并根据其解释(但不适用相关冲突法的法律条款)。用 **ZILLIZ** 同意与本协议有关的任何争议将向上海市徐汇区人民法院提出,且不可撤销无条件的同意上述法院对因本协议提起的全部诉讼、争议拥有排他的管辖权。一旦确定任何条款无效、非法或无法执行, **ZILLIZ** 保留修改和解释该条款的权利。任何需要发送给用户的通知如公布在 **ZILLIZ** 的网站上则被视为已有效、合法地发送给用户。除了本合同项下应支付款项的义务外,任何一方将不对因不可抗力而导致的无法合理控制的全部或部分未能履行或延迟履行本协议的行为负责, 不可抗力包括但不限于火灾、暴风雨、洪水、地震、内乱、电信中断、 电力中断或其他基础设施的中断、**ZILLIZ** 使用的服务提供商存在问题导致服务中断或终止、罢工、故意毁坏事件、电缆被切断、病毒入侵或其他任意第三方故意或非法的行为引起的其他类似事件。在上述迟延履行情况出现时,可延迟履行协议的时间为因上述原因引起的延迟时间。 本协议另有明确规定外,本协议所要求或认可的通知或通讯均需以书面形式经一方有权代表签署或授权并以直接呈递、隔夜快递,经确认的电子邮件发送,经确认的传真或邮寄挂号信、挂号邮件保留回单等方式送达。对本协议的任何修改、补充或删除或权利放弃,必须通过书面由双方适当授权的代表签署确认后方为有效。任何一方对任何权利或救济的不履行或迟延履行(部分或全部)不构成对该等权利或救济的放弃,也不影响任何其他权利或救济。本协议项下的所有权利主张和救济均可为累积的且不排除本协议中包含的或法律所规定的其他任何权利或救济。 对本协议中任何一项违约责任的豁免或延迟行使任何权利,并不构成对其他后续违约责任的豁免。 \ No newline at end of file diff --git a/cpp/Milvus-EULA-en.md b/cpp/Milvus-EULA-en.md new file mode 100644 index 0000000000..3444dd722b --- /dev/null +++ b/cpp/Milvus-EULA-en.md @@ -0,0 +1,129 @@ +# ZILLIZ End-User License Agreement + +#### Last updated: 2019-06-30 + + + +This End-user License Agreement ("Agreement") is applicable to all users who uses Milvus provided by ZILLIZ company. + +**Please read this agreement carefully before clicking the I Agree button, downloading or using this Application.** + +**If you ("You" or "User") use any product on behalf of a company or other organization, you hereby state that you are an employee or agent of the company or such other institution, and you have the right to represent the company or such institutions to accept all the terms and conditions required under this Agreement. ** + +**If you use any product, you acknowledge and agree:** + +**(A) You have read all the terms and conditions in the Agreement;** + +**(B) You have understand all the terms and conditions in the Agreement;** + +**(C) You have agreed that all the terms and conditions of this Agreement are legally binding on you.** + +**If you do not agree to any of the terms and conditions set forth in this Agreement, you may choose not to use any part of the product.** + +**This agreement takes effect immediately the first time you download the application**. + +1. **Product**. In this Agreement, it refers to Milvus and other related software products of **ZILLIZ**, including Milvus vector indexing database and its updates, higher versions, maintenance or patch releases ("Updated Software"). + + (a) Only the Docker version of Milvus vector indexing database is granted free to the User. **ZILLIZ** retains the right to revoke this grant; + + (b) Any person or organization that intend to use or try the Docker version of Milvus vector indexing database need to inform **ZILLIZ** of the personal identity, contact information and purposes of using the Product by sending an email to: support@zilliz.com; + + (c)Making or using additional copy of the Product is only restricted to necessary copy purposes. + +2. **Related Agreements**. The Related Agreements includes this Agreement and all other related terms and conditions that appear in [Milvus official website](https://milvus.io). This Agreement is the entire and final agreement that replaces all other terms agreed between the User and **ZILLIZ** about issues listed here, oral or written. + +3. **License Grant**. **ZILLIZ** grant You a revocable, non-exclusive, non-transferable limited right to install and use the Application defined above for your personal, non-commercial purposes. The User who uses the Application through downloading and other permitted channels are also subject to this Agreement; + +4. **Restrictions on Use.** You shall use the Application in accordance with the terms in the Agreement, and shall not: + + (a)Make any modification, translation or derivative work from the Application; + + (b)Decompile, reverse engineer, disassemble, attempt to derive the source code or algorithm of the Application; + + (c)Sell, distribute, license re-granting or provide translation of the whole or part of the Application; + + (d)Use the Application for creating a product, service or software. + + (e)Remove, alter or obscure any proprietary notice, trademark, or copyright of the Company and Application; + + (f)Install or use the Application to provide service to third-party partners, without acquiring formal grant of **ZILLIZ** ; + + (g)Perform or permit any behaviors that might lead to one of the above prohibited actions. + +5. **Ownership**. **ZILLIZ** enjoys the ownership of the following: + + (a)Products (includes but is not restricted to any updated software, patch releases, or derivative products); + + (b)All concepts, innovations, discoveries, improvements, information, or creative products developed and discovered by **ZILLIZ** as a result of or arising out of the service providing process; + + (c)Intellectual property rights of the above mentioned products and innovations. In this Agreement, "Intellectual Property" refers to trademarks, patents, designations of origin, industrial designs and models and copyright. **ZILLIZ** and the User agree that the User enjoy all the rights to use data produced by using the Product, while **ZILLIZ** keeps all other rights not explicitly stated in the Agreement. Unless otherwise stated, **ZILLIZ** has not granted any additional rights to Users, either implied, acquiesced or in other ways. + +6. **Non-disclosure**. Confidential Information refers to any and all information revealed to the User by **ZILLIZ**, either oral or written, tangible or intangible, before or after the Agreement takes effect. Confidential information includes but is not restricted to business plans and strategies, product, innovations, design papers, financial plans, computer programs, User information, etc. Within the term of this Agreement, unless granted definite permission, the User shall hold and maintain the Confidential Information in strictest confidence for the sole and exclusive benefit of **ZILLIZ** and using the Product. In addition: + + (a)You shall not copy, use or disclose Confidential Information for purposes other than using the Product agreed in this Agreement; + + (b)You shall carefully restrict access to Confidential Information to employees, contractors, and third parties as is reasonably required and shall require those persons to sign nondisclosure restrictions at least as protective as those in this Agreement. + + Confidential Information does not include: + + (a)Information that can be obtained by third-parties not due to User's violation of the Agreement; + + (b)Information that can be proven to be provided to Users not by **ZILLIZ** ; + + (c) Information that are obtained with no reference to Confidential Information; + + (d)Information the User gets from third-parties that are not subject to non-disclosure agreement. Unless otherwise stated, any comments, suggestions or other feedback ("Feedback Information") about the Product by the User to **ZILLIZ** will also be counted as Confidential Information. + + Furthermore, **ZILLIZ** has the right to use, disclose, copy or use above Feedback Information, and bearing no intellectual property burden or restrictions. According to related laws and regulations, during the fulfillment of this Agreement: + + (a)**ZILLIZ** agree not to require the User to provide any information regarding personal identities; + + (b)The User agree not to provide **ZILLIZ** with any personal information. + +7. **Disclaimer of Warranties**. You acknowledge, agree and promise that: + + (a) All employees and consultants will obey all terms in the Agreement; + + (b)Application of the Agreement is subject to all laws, terms, acts, commands and other requirements issued by the government (no matter these laws are in effect now or will be effective in the future). + + The User shall be held responsible for all the behaviors in relation to the Application. + + The Application is provided on an "As is" or "As available" basis, and that You use or reliance on the Application is at your sole risk and discretion. **ZILLIZ** and its partners make no warranty that the Application will meet all Your requirements and expectations. + + **ZILLIZ** and its suppliers hereby disclaim any and all representations, warranties and guaranties regarding the Application, whether expressed, implied or statutory: + + (a)The implied warranty of merchantability; + + (b)Fitness for a particular purpose; + + (c)Non-infringement. + + Further more, considering the continuous advancement of Internet hacking and attaching technologies, **ZILLIZ** make no guarantee that the Application or the systems and Internet it uses will be exempt from any hack or attack. + +8. **Damages and Penalties**. The User shall pay, protect or prevent **ZILLIZ** and its board members, executives, employees, consultants or representative agencies (**ZILLIZ** Protected Party) from any existing or potential damage loss, fees, penalties and other outgoing payments (include but are not limited to lawyer fees, fines, interests and advance payment) arising out of legal request, litigation or other processes. The prerequisite condition of the above obligations are that the legal request, litigation or process are caused by any of the following situations: + + (a)Any violation of the Agreement; + + (b)User fault or deliberate behavior; + + (c)Controversial data is produced or collected during the usage of the Product. + +9. **Limitation of Liability**. Unless due to deliberate fraud or error from **ZILLIZ**, below terms are applicable: + + (a)Under no circumstances shall **ZILLIZ** be held liable for any profit loss, data loss, revenue loss, termination of operations, any indirect, special, exemplary or consequential damages arising out or in connection with Your access or use of the Application; + + (b)Without limiting the generality of the foregoing, **ZILLIZ**'s aggregate liability to You shall not exceed the total amount of money You already paid or will pay to **ZILLIZ** (if any). + +10. **Third-party Suppliers**. The User acknowledge that no statement and guarantee should be expected from Third-party Suppliers about the Product or its components. **ZILLIZ** hold no obligations to the Users' usage of the softwares provided by third-party Suppliers. + +11. **Diagnosis and Report**. The User know and agree that Diagnosis is part of the configuration of the Product. Diagnosis is used to collect the configuration files, node numbers, software version, logs and related information, and send a Report to **ZILLIZ** to recognize potential support problems, get to know User environment, and to enhance product features. Although You can choose to turn off the Diagnosis function of automatic report sending, however, You shall run the Diagnosis at least once every quarter and send the Report to **ZILLIZ**. + +12. **Termination of Licensing**. This Agreement is valid from the day it takes effect to the termination dated defined in **ZILLIZ** website, unless the User has disobeyed the terms and caused the Agreement to end in advance. Whether or not listed, if the User has violated terms in Clause 3, 4, 5 or 7, **ZILLIZ** may, in its sole and absolute discretion, terminate this License and the rights afforded to You. Upon the expiration or termination of the License: + + (a)All rights afforded to the User based upon this Agreement will be terminated. You shall ease use of the Product and uninstall related software; + + (b)The User shall return all confidential information and the copy (includes but not restricted to Product) back to **ZILLIZ**, or destroy all copy of confidential information on permission of **ZILLIZ**. Without the written approval of **ZILLIZ**, the User is not allowed to keep any confidential information or its copy provided by **ZILLIZ**. + +13. **Third-party Resources**. Products supplied by **ZILLIZ** may include hyperlinks to other websites, content or resources ("Third Party Resources"), and the normal use of such products may depend on the availability of third party resources. **ZILLIZ** is unable to control any third-party resources. The User acknowledges and agrees that **ZILLIZ** is not responsible for the availability and security of third-party resources and does not guarantee any advertising, products or other materials that are or are derived from such third party resources. The User acknowledges and agrees that **ZILLIZ** shall not hold obligations about any liability for loss or damage that may be suffered due to the availability and security of third party resources, or the integrity or accuracy of any advertisements, products or other materials that the User relies on or obtains from third party resources. + +14. **Other**. The entire contents of this Agreement are performed within the territory of the People's Republic of China and are governed by and construed in accordance with the laws of the People's Republic of China (but not applicable to the relevant conflict laws). **ZILLIZ** agrees that any disputes relating to this Agreement will be submitted to the Xuhui District People's Court of Shanghai, and irrevocably and unconditionally agree that the above courts have exclusive jurisdiction over all litigations and disputes brought about by this Agreement. Once it is determined that any provision is invalid, illegal or unenforceable, **ZILLIZ** reserves the right to modify and interpret the terms. Any notice that needs to be sent to the user, if posted on the **ZILLIZ** website, is deemed to have been validly and legally sent to the user. Except for the obligation to pay under this contract, neither party will be liable for failure to perform or delayed performance of this Agreement in whole or in part due to force majeure. The force majeure includes but is not limited to fire, storm, flood , earthquake, civil strife, telecommunications disruption, power outage or other infrastructure disruption, service interruption or termination caused by **ZILLIZ** service provider problems, strikes, intentional destruction events, cable cuts, virus intrusion or any other similar incidents caused by intentional or illegal acts by third parties. In the case of the above-mentioned delayed performance, the delay in fulfilling the agreement may be the delay time due to the above reasons. Unless otherwise stated in this Agreement, notices or communications required or endorsed by this Agreement must be signed or authorized in writing by a party, and delivered by direct delivery, overnight delivery, confirmed email, confirmed fax or by mailing a registered letter, registered mail, and returning the order, etc. Any modification, addition or deletion or waiver of this Agreement must be confirmed by a written confirmation by a suitably authorized representative of both parties. The non-performance or delay in the performance of any right or remedy by any party (partially or wholly) does not constitute a waiver of such rights or remedies, nor does it affect any other rights or remedies. All claims and remedies under this Agreement may be cumulative and do not exclude any other rights or remedies contained in this Agreement or as required by law. Exemption from the waiver or delay of any liability for breach of contract in this Agreement does not constitute an exemption from other subsequent breach of contract obligations. \ No newline at end of file From 82150885d0b57d45b78f82d0b11e880b54f1e3fb Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 2 Jul 2019 18:37:33 +0800 Subject: [PATCH 05/19] update wrapper and wrapper test Former-commit-id: a57d040aae7d8f5ba20c99ea6a0c6220efcaeacd --- cpp/src/wrapper/knowhere/data_transfer.cpp | 48 +++++ cpp/src/wrapper/knowhere/data_transfer.h | 35 ++-- cpp/src/wrapper/knowhere/vec_impl.cpp | 25 +-- cpp/src/wrapper/knowhere/vec_impl.h | 3 +- cpp/src/wrapper/knowhere/vec_index.cpp | 18 +- cpp/src/wrapper/knowhere/vec_index.h | 8 +- cpp/thirdparty/knowhere | 2 +- cpp/unittest/index_wrapper/CMakeLists.txt | 6 +- cpp/unittest/index_wrapper/knowhere_test.cpp | 200 ++++++++++++------- cpp/unittest/index_wrapper/utils.cpp | 81 ++++++++ cpp/unittest/index_wrapper/utils.h | 61 ++++++ 11 files changed, 369 insertions(+), 118 deletions(-) create mode 100644 cpp/src/wrapper/knowhere/data_transfer.cpp create mode 100644 cpp/unittest/index_wrapper/utils.cpp create mode 100644 cpp/unittest/index_wrapper/utils.h diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp new file mode 100644 index 0000000000..af5ad212e4 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -0,0 +1,48 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); + std::vector arrays{id_array}; + std::vector array_fields{ConstructInt64Field("id")}; + auto array_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(arrays), array_schema, + std::move(tensors), tensor_schema); + return dataset; +} + +DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(tensors), tensor_schema); + return dataset; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h index e5a9402cff..c99cd1c742 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -6,24 +6,19 @@ #pragma once -#define GENDATASET(n,d,xb,ids)\ - size_t elems = (n) * (d);\ - std::vector shape{n, d};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto id_array = ConstructInt64Array((uint8_t *) (ids), (n) * sizeof(int64_t));\ - std::vector arrays{id_array};\ - std::vector array_fields{ConstructInt64Field("id")};\ - auto array_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema);\ +#include "knowhere/adapter/structure.h" -#define GENQUERYDATASET(n,d,xb)\ - size_t elems = (n) * (d);\ - std::vector shape{(n), (d)};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(tensors), tensor_schema);\ + +namespace zilliz { +namespace vecwise { +namespace engine { + +extern zilliz::knowhere::DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids); + +extern zilliz::knowhere::DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb); + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index bcb537fda2..e24d470acc 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -13,11 +13,6 @@ #include "vec_impl.h" #include "data_transfer.h" -//using Index = zilliz::knowhere::Index; -//using IndexModel = zilliz::knowhere::IndexModel; -//using IndexType = zilliz::knowhere::IndexType; -//using IndexPtr = std::shared_ptr; -//using IndexModelPtr = std::shared_ptr; namespace zilliz { namespace vecwise { @@ -31,24 +26,21 @@ void VecIndexImpl::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { - using namespace zilliz::knowhere; - auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); - Config train_cfg; - Config add_cfg; - Config search_cfg; + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); auto model = index_->Train(dataset, cfg); index_->set_index_model(model); - index_->Add(dataset, add_cfg); + index_->Add(dataset, cfg); } void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { - // TODO: Assert index is trained; + // TODO(linxj): Assert index is trained; auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); index_->Add(dataset, cfg); } @@ -58,12 +50,13 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id auto d = cfg["dim"].as(); auto k = cfg["k"].as(); - GENQUERYDATASET(nq, d, xq) + auto dataset = GenDataset(nq, d, xq); Config search_cfg; auto res = index_->Search(dataset, cfg); auto ids_array = res->array()[0]; auto dis_array = res->array()[1]; + //{ // auto& ids = ids_array; // auto& dists = dis_array; @@ -81,10 +74,10 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id // std::cout << "dist\n" << ss_dist.str() << std::endl; //} - // TODO: deep copy here. auto p_ids = ids_array->data()->GetValues(1, 0); auto p_dist = ids_array->data()->GetValues(1, 0); + // TODO(linxj): avoid copy here. memcpy(ids, p_ids, sizeof(int64_t) * nq * k); memcpy(dist, p_dist, sizeof(float) * nq * k); } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index f144dc43a2..25f7d16548 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -7,6 +7,7 @@ #pragma once #include "knowhere/index/vector_index/vector_index.h" + #include "vec_index.h" @@ -16,7 +17,7 @@ namespace engine { class VecIndexImpl : public VecIndex { public: - explicit VecIndexImpl(std::shared_ptr index):index_(std::move(index)){}; + explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; void BuildAll(const long &nb, const float *xb, const long *ids, diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 1365836fd9..171388d0af 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -5,6 +5,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "knowhere/index/vector_index/ivf.h" #include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" #include "vec_index.h" #include "vec_impl.h" @@ -14,18 +15,29 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): index_type => enum struct VecIndexPtr GetVecIndexFactory(const std::string &index_type) { std::shared_ptr index; if (index_type == "IVF") { index = std::make_shared(); } else if (index_type == "GPUIVF") { - index = std::make_shared(); + index = std::make_shared(0); + } else if (index_type == "SPTAG") { + index = std::make_shared(); } - auto ret_index = std::make_shared(index); - //return std::static_pointer_cast(std::make_shared(index)); + // TODO(linxj): Support NSG + //else if (index_type == "NSG") { + // index = std::make_shared(); + //} return std::make_shared(index); } +VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { + auto index = GetVecIndexFactory(index_type); + index->Load(index_binary); + return index; +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index 3094eb336b..b03c43a36b 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -17,6 +17,7 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): jsoncons => rapidjson or other. using Config = zilliz::knowhere::Config; class VecIndex { @@ -31,13 +32,13 @@ class VecIndex { virtual void Add(const long &nb, const float *xb, const long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual void Search(const long &nq, const float *xq, float *dist, long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual zilliz::knowhere::BinarySet Serialize() = 0; @@ -48,8 +49,7 @@ using VecIndexPtr = std::shared_ptr; extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); -// TODO -extern VecIndexPtr LoadVecIndex(const zilliz::knowhere::BinarySet &index_binary); +extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); } } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 291b3b4226..32187bacba 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 291b3b422664f2509bab79d5cc63823dedbe903c +Subproject commit 32187bacbaac0460676f5f6aa54ad904f5f2b5bc diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt index 5c4b71227c..51bd97b575 100644 --- a/cpp/unittest/index_wrapper/CMakeLists.txt +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -3,6 +3,9 @@ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) +set(helper + utils.cpp) + set(knowhere_libs knowhere SPTAGLibStatic @@ -11,9 +14,10 @@ set(knowhere_libs faiss openblas lapack + tbb cudart cublas ) -add_executable(knowhere_test knowhere_test.cpp ${knowhere_src}) +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index caf287340e..58b0d5a4b2 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -8,86 +8,142 @@ #include +#include "utils.h" + + using namespace zilliz::vecwise::engine; +using namespace zilliz::knowhere; -TEST(knowhere_test, ivf_test) { - auto d = 128; - auto nt = 1000; - auto nb = 10000; - auto nq = 10; - //{ - // std::vector xb; - // std::vector xt; - // std::vector xq; - // std::vector ids; - // - // //prepare train data - // std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - // std::random_device rd; - // std::mt19937 gen(rd()); - // xt.resize(nt*d); - // for (size_t i = 0; i < nt * d; i++) { - // xt[i] = dis_xt(gen); - // } - // xb.resize(nb*d); - // ids.resize(nb); - // for (size_t i = 0; i < nb * d; i++) { - // xb[i] = dis_xt(gen); - // if (i < nb) { - // ids[i] = i; - // } - // } - // xq.resize(nq*d); - // for (size_t i = 0; i < nq * d; i++) { - // xq[i] = dis_xt(gen); - // } - //} +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::Combine; - auto elems = nb * d; - auto p_data = (float *) malloc(elems * sizeof(float)); - auto p_id = (int64_t *) malloc(elems * sizeof(int64_t)); - assert(p_data != nullptr && p_id != nullptr); - for (auto i = 0; i < nb; ++i) { - for (auto j = 0; j < d; ++j) { - p_data[i * d + j] = drand48(); - } - p_data[d * i] += i / 1000.; - p_id[i] = i; +class KnowhereWrapperTest + : public TestWithParam<::std::tuple> { + protected: + void SetUp() override { + std::string generator_type; + std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam(); + + //auto generator = GetGenerateFactory(generator_type); + auto generator = std::make_shared(); + generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); + + index_ = GetVecIndexFactory(index_type); } - auto q_elems = nq * d; - auto q_data = (float *) malloc(q_elems * sizeof(float)); + protected: + std::string index_type; + Config train_cfg; + Config search_cfg; - for (auto i = 0; i < nq; ++i) { - for (auto j = 0; j < d; ++j) { - q_data[i * d + j] = drand48(); - } - q_data[d * i] += i / 1000.; + int dim = 64; + int nb = 10000; + int nq = 10; + int k = 10; + std::vector xb; + std::vector xq; + std::vector ids; + + VecIndexPtr index_ = nullptr; + + // Ground Truth + std::vector gt_ids; +}; + +INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, + Values( + // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] + std::make_tuple("IVF", "Default", + 64, 10000, 10, 10, + Config::object{{"nlist", 100}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} + ), + std::make_tuple("SPTAG", "Default", + 64, 10000, 10, 10, + Config::object{{"TPTNumber", 1}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}} + ) + ) +); + +void AssertAnns(const std::vector >, + const std::vector &res, + const int &nq, + const int &k) { + EXPECT_EQ(res.size(), nq * k); + + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(gt[i * k], res[i * k]); } - Config build_cfg = Config::object{ - {"dim", d}, - {"nlist", 100}, - }; + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (gt[i * nq + j] == res[i * nq + l]) match++; + } + } + } - auto k = 10; - Config search_cfg = Config::object{ - {"dim", d}, - {"k", k}, - }; - - std::vector ret_dist(nq*k); - std::vector ret_ids(nq*k); - - const std::string& index_type = "IVF"; - auto index = GetVecIndexFactory(index_type); - index->BuildAll(nb, p_data, p_id, build_cfg); - - auto add_bin = index->Serialize(); - index->Load(add_bin); - - index->Search(nq, q_data, ret_dist.data(), ret_ids.data(), search_cfg); - - std::cout << "he"; + // TODO(linxj): percision check + EXPECT_GT(float(match/nq*k), 0.5); +} + +TEST_P(KnowhereWrapperTest, base_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + delete[] D; +} + +TEST_P(KnowhereWrapperTest, serialize_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + + { + auto binaryset = index_->Serialize(); + int fileno = 0; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + std::vector filename_list; + std::vector> meta_list; + for (auto &iter: binaryset.binary_map_) { + const std::string &filename = base_name + std::to_string(fileno); + FileIOWriter writer(filename); + writer(iter.second.data, iter.second.size); + + meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + filename_list.push_back(filename); + ++fileno; + } + + BinarySet load_data_list; + for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + auto bin_size = meta_list[i].second; + FileIOReader reader(filename_list[i]); + std::vector load_data(bin_size); + reader(load_data.data(), bin_size); + load_data_list.Append(meta_list[i].first, load_data); + } + + + res_ids.clear(); + res_ids.resize(nq * k); + auto new_index = GetVecIndexFactory(index_type); + new_index->Load(load_data_list); + new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + } + + delete[] D; } diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp new file mode 100644 index 0000000000..e228ae001d --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.cpp @@ -0,0 +1,81 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "utils.h" + + +DataGenPtr GetGenerateFactory(const std::string &gen_type) { + std::shared_ptr generator; + if (gen_type == "default") { + generator = std::make_shared(); + } + return generator; +} + +void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, + float *xb, float *xq, long *ids, + const int &k, long *gt_ids) { + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < dim; ++j) { + //p_data[i * d + j] = float(base + i); + xb[i * dim + j] = drand48(); + } + xb[dim * i] += i / 1000.; + ids[i] = i; + } + for (size_t i = 0; i < nq * dim; ++i) { + xq[i] = xb[i]; + } + + faiss::IndexFlatL2 index(dim); + //index.add_with_ids(nb, xb, ids); + index.add(nb, xb); + float *D = new float[k * nq]; + index.search(nq, xq, k, D, gt_ids); +} + +void DataGenBase::GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids) { + xb.resize(nb * dim); + xq.resize(nq * dim); + ids.resize(nb); + gt_ids.resize(nq * k); + GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); +} + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h new file mode 100644 index 0000000000..bbc52a011b --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.h @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include +#include + + +class DataGenBase; + +using DataGenPtr = std::shared_ptr; + +extern DataGenPtr GetGenerateFactory(const std::string &gen_type); + + +class DataGenBase { + public: + virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids); + + virtual void GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids); +}; + + +class SanityCheck : public DataGenBase { + public: + void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids) override; +}; + +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); +}; From 0224845fb3838301641269634e8fd48116b6af44 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 2 Jul 2019 18:37:33 +0800 Subject: [PATCH 06/19] 1. update wrapper and wrapper-test Former-commit-id: 73af1a5b254676e9d4c6360b4f4cb8f79cc47514 --- cpp/src/wrapper/knowhere/data_transfer.cpp | 48 +++++ cpp/src/wrapper/knowhere/data_transfer.h | 35 ++-- cpp/src/wrapper/knowhere/vec_impl.cpp | 25 +-- cpp/src/wrapper/knowhere/vec_impl.h | 3 +- cpp/src/wrapper/knowhere/vec_index.cpp | 18 +- cpp/src/wrapper/knowhere/vec_index.h | 8 +- cpp/thirdparty/knowhere | 2 +- cpp/unittest/index_wrapper/CMakeLists.txt | 6 +- cpp/unittest/index_wrapper/knowhere_test.cpp | 200 ++++++++++++------- cpp/unittest/index_wrapper/utils.cpp | 81 ++++++++ cpp/unittest/index_wrapper/utils.h | 61 ++++++ 11 files changed, 369 insertions(+), 118 deletions(-) create mode 100644 cpp/src/wrapper/knowhere/data_transfer.cpp create mode 100644 cpp/unittest/index_wrapper/utils.cpp create mode 100644 cpp/unittest/index_wrapper/utils.h diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp new file mode 100644 index 0000000000..af5ad212e4 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -0,0 +1,48 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); + std::vector arrays{id_array}; + std::vector array_fields{ConstructInt64Field("id")}; + auto array_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(arrays), array_schema, + std::move(tensors), tensor_schema); + return dataset; +} + +DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(tensors), tensor_schema); + return dataset; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h index e5a9402cff..c99cd1c742 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -6,24 +6,19 @@ #pragma once -#define GENDATASET(n,d,xb,ids)\ - size_t elems = (n) * (d);\ - std::vector shape{n, d};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto id_array = ConstructInt64Array((uint8_t *) (ids), (n) * sizeof(int64_t));\ - std::vector arrays{id_array};\ - std::vector array_fields{ConstructInt64Field("id")};\ - auto array_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema);\ +#include "knowhere/adapter/structure.h" -#define GENQUERYDATASET(n,d,xb)\ - size_t elems = (n) * (d);\ - std::vector shape{(n), (d)};\ - auto tensor = ConstructFloatTensor((uint8_t *) (xb), elems * sizeof(float), shape);\ - std::vector tensors{tensor};\ - std::vector tensor_fields{ConstructFloatField("data")};\ - auto tensor_schema = std::make_shared(tensor_fields);\ - auto dataset = std::make_shared(std::move(tensors), tensor_schema);\ + +namespace zilliz { +namespace vecwise { +namespace engine { + +extern zilliz::knowhere::DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids); + +extern zilliz::knowhere::DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb); + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index bcb537fda2..e24d470acc 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -13,11 +13,6 @@ #include "vec_impl.h" #include "data_transfer.h" -//using Index = zilliz::knowhere::Index; -//using IndexModel = zilliz::knowhere::IndexModel; -//using IndexType = zilliz::knowhere::IndexType; -//using IndexPtr = std::shared_ptr; -//using IndexModelPtr = std::shared_ptr; namespace zilliz { namespace vecwise { @@ -31,24 +26,21 @@ void VecIndexImpl::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { - using namespace zilliz::knowhere; - auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); - Config train_cfg; - Config add_cfg; - Config search_cfg; + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); auto model = index_->Train(dataset, cfg); index_->set_index_model(model); - index_->Add(dataset, add_cfg); + index_->Add(dataset, cfg); } void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { - // TODO: Assert index is trained; + // TODO(linxj): Assert index is trained; auto d = cfg["dim"].as(); - GENDATASET(nb, d, xb, ids) + auto dataset = GenDatasetWithIds(nb, d, xb, ids); index_->Add(dataset, cfg); } @@ -58,12 +50,13 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id auto d = cfg["dim"].as(); auto k = cfg["k"].as(); - GENQUERYDATASET(nq, d, xq) + auto dataset = GenDataset(nq, d, xq); Config search_cfg; auto res = index_->Search(dataset, cfg); auto ids_array = res->array()[0]; auto dis_array = res->array()[1]; + //{ // auto& ids = ids_array; // auto& dists = dis_array; @@ -81,10 +74,10 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id // std::cout << "dist\n" << ss_dist.str() << std::endl; //} - // TODO: deep copy here. auto p_ids = ids_array->data()->GetValues(1, 0); auto p_dist = ids_array->data()->GetValues(1, 0); + // TODO(linxj): avoid copy here. memcpy(ids, p_ids, sizeof(int64_t) * nq * k); memcpy(dist, p_dist, sizeof(float) * nq * k); } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index f144dc43a2..25f7d16548 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -7,6 +7,7 @@ #pragma once #include "knowhere/index/vector_index/vector_index.h" + #include "vec_index.h" @@ -16,7 +17,7 @@ namespace engine { class VecIndexImpl : public VecIndex { public: - explicit VecIndexImpl(std::shared_ptr index):index_(std::move(index)){}; + explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; void BuildAll(const long &nb, const float *xb, const long *ids, diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 1365836fd9..171388d0af 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -5,6 +5,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "knowhere/index/vector_index/ivf.h" #include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" #include "vec_index.h" #include "vec_impl.h" @@ -14,18 +15,29 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): index_type => enum struct VecIndexPtr GetVecIndexFactory(const std::string &index_type) { std::shared_ptr index; if (index_type == "IVF") { index = std::make_shared(); } else if (index_type == "GPUIVF") { - index = std::make_shared(); + index = std::make_shared(0); + } else if (index_type == "SPTAG") { + index = std::make_shared(); } - auto ret_index = std::make_shared(index); - //return std::static_pointer_cast(std::make_shared(index)); + // TODO(linxj): Support NSG + //else if (index_type == "NSG") { + // index = std::make_shared(); + //} return std::make_shared(index); } +VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { + auto index = GetVecIndexFactory(index_type); + index->Load(index_binary); + return index; +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index 3094eb336b..b03c43a36b 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -17,6 +17,7 @@ namespace zilliz { namespace vecwise { namespace engine { +// TODO(linxj): jsoncons => rapidjson or other. using Config = zilliz::knowhere::Config; class VecIndex { @@ -31,13 +32,13 @@ class VecIndex { virtual void Add(const long &nb, const float *xb, const long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual void Search(const long &nq, const float *xq, float *dist, long *ids, - const Config &cfg) = 0; + const Config &cfg = Config()) = 0; virtual zilliz::knowhere::BinarySet Serialize() = 0; @@ -48,8 +49,7 @@ using VecIndexPtr = std::shared_ptr; extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); -// TODO -extern VecIndexPtr LoadVecIndex(const zilliz::knowhere::BinarySet &index_binary); +extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); } } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 291b3b4226..32187bacba 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 291b3b422664f2509bab79d5cc63823dedbe903c +Subproject commit 32187bacbaac0460676f5f6aa54ad904f5f2b5bc diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt index 5c4b71227c..51bd97b575 100644 --- a/cpp/unittest/index_wrapper/CMakeLists.txt +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -3,6 +3,9 @@ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) +set(helper + utils.cpp) + set(knowhere_libs knowhere SPTAGLibStatic @@ -11,9 +14,10 @@ set(knowhere_libs faiss openblas lapack + tbb cudart cublas ) -add_executable(knowhere_test knowhere_test.cpp ${knowhere_src}) +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index caf287340e..58b0d5a4b2 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -8,86 +8,142 @@ #include +#include "utils.h" + + using namespace zilliz::vecwise::engine; +using namespace zilliz::knowhere; -TEST(knowhere_test, ivf_test) { - auto d = 128; - auto nt = 1000; - auto nb = 10000; - auto nq = 10; - //{ - // std::vector xb; - // std::vector xt; - // std::vector xq; - // std::vector ids; - // - // //prepare train data - // std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - // std::random_device rd; - // std::mt19937 gen(rd()); - // xt.resize(nt*d); - // for (size_t i = 0; i < nt * d; i++) { - // xt[i] = dis_xt(gen); - // } - // xb.resize(nb*d); - // ids.resize(nb); - // for (size_t i = 0; i < nb * d; i++) { - // xb[i] = dis_xt(gen); - // if (i < nb) { - // ids[i] = i; - // } - // } - // xq.resize(nq*d); - // for (size_t i = 0; i < nq * d; i++) { - // xq[i] = dis_xt(gen); - // } - //} +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::Combine; - auto elems = nb * d; - auto p_data = (float *) malloc(elems * sizeof(float)); - auto p_id = (int64_t *) malloc(elems * sizeof(int64_t)); - assert(p_data != nullptr && p_id != nullptr); - for (auto i = 0; i < nb; ++i) { - for (auto j = 0; j < d; ++j) { - p_data[i * d + j] = drand48(); - } - p_data[d * i] += i / 1000.; - p_id[i] = i; +class KnowhereWrapperTest + : public TestWithParam<::std::tuple> { + protected: + void SetUp() override { + std::string generator_type; + std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam(); + + //auto generator = GetGenerateFactory(generator_type); + auto generator = std::make_shared(); + generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); + + index_ = GetVecIndexFactory(index_type); } - auto q_elems = nq * d; - auto q_data = (float *) malloc(q_elems * sizeof(float)); + protected: + std::string index_type; + Config train_cfg; + Config search_cfg; - for (auto i = 0; i < nq; ++i) { - for (auto j = 0; j < d; ++j) { - q_data[i * d + j] = drand48(); - } - q_data[d * i] += i / 1000.; + int dim = 64; + int nb = 10000; + int nq = 10; + int k = 10; + std::vector xb; + std::vector xq; + std::vector ids; + + VecIndexPtr index_ = nullptr; + + // Ground Truth + std::vector gt_ids; +}; + +INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, + Values( + // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] + std::make_tuple("IVF", "Default", + 64, 10000, 10, 10, + Config::object{{"nlist", 100}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} + ), + std::make_tuple("SPTAG", "Default", + 64, 10000, 10, 10, + Config::object{{"TPTNumber", 1}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}} + ) + ) +); + +void AssertAnns(const std::vector >, + const std::vector &res, + const int &nq, + const int &k) { + EXPECT_EQ(res.size(), nq * k); + + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(gt[i * k], res[i * k]); } - Config build_cfg = Config::object{ - {"dim", d}, - {"nlist", 100}, - }; + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (gt[i * nq + j] == res[i * nq + l]) match++; + } + } + } - auto k = 10; - Config search_cfg = Config::object{ - {"dim", d}, - {"k", k}, - }; - - std::vector ret_dist(nq*k); - std::vector ret_ids(nq*k); - - const std::string& index_type = "IVF"; - auto index = GetVecIndexFactory(index_type); - index->BuildAll(nb, p_data, p_id, build_cfg); - - auto add_bin = index->Serialize(); - index->Load(add_bin); - - index->Search(nq, q_data, ret_dist.data(), ret_ids.data(), search_cfg); - - std::cout << "he"; + // TODO(linxj): percision check + EXPECT_GT(float(match/nq*k), 0.5); +} + +TEST_P(KnowhereWrapperTest, base_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + delete[] D; +} + +TEST_P(KnowhereWrapperTest, serialize_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + + { + auto binaryset = index_->Serialize(); + int fileno = 0; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + std::vector filename_list; + std::vector> meta_list; + for (auto &iter: binaryset.binary_map_) { + const std::string &filename = base_name + std::to_string(fileno); + FileIOWriter writer(filename); + writer(iter.second.data, iter.second.size); + + meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + filename_list.push_back(filename); + ++fileno; + } + + BinarySet load_data_list; + for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + auto bin_size = meta_list[i].second; + FileIOReader reader(filename_list[i]); + std::vector load_data(bin_size); + reader(load_data.data(), bin_size); + load_data_list.Append(meta_list[i].first, load_data); + } + + + res_ids.clear(); + res_ids.resize(nq * k); + auto new_index = GetVecIndexFactory(index_type); + new_index->Load(load_data_list); + new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + } + + delete[] D; } diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp new file mode 100644 index 0000000000..e228ae001d --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.cpp @@ -0,0 +1,81 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "utils.h" + + +DataGenPtr GetGenerateFactory(const std::string &gen_type) { + std::shared_ptr generator; + if (gen_type == "default") { + generator = std::make_shared(); + } + return generator; +} + +void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, + float *xb, float *xq, long *ids, + const int &k, long *gt_ids) { + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < dim; ++j) { + //p_data[i * d + j] = float(base + i); + xb[i * dim + j] = drand48(); + } + xb[dim * i] += i / 1000.; + ids[i] = i; + } + for (size_t i = 0; i < nq * dim; ++i) { + xq[i] = xb[i]; + } + + faiss::IndexFlatL2 index(dim); + //index.add_with_ids(nb, xb, ids); + index.add(nb, xb); + float *D = new float[k * nq]; + index.search(nq, xq, k, D, gt_ids); +} + +void DataGenBase::GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids) { + xb.resize(nb * dim); + xq.resize(nq * dim); + ids.resize(nb); + gt_ids.resize(nq * k); + GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); +} + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h new file mode 100644 index 0000000000..bbc52a011b --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.h @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include +#include + + +class DataGenBase; + +using DataGenPtr = std::shared_ptr; + +extern DataGenPtr GetGenerateFactory(const std::string &gen_type); + + +class DataGenBase { + public: + virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids); + + virtual void GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids); +}; + + +class SanityCheck : public DataGenBase { + public: + void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids) override; +}; + +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); +}; From 3fda33ac0bba66e9eac5b1162c860fcf0fbe299e Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Tue, 2 Jul 2019 19:36:06 +0800 Subject: [PATCH 07/19] alter nprobe Former-commit-id: 21cc3f6523580303bf702f7e616bcee7416a053f --- cpp/conf/server_config.yaml | 7 +++++-- cpp/src/wrapper/Index.cpp | 31 ++++++++++++++++++++++++++++++- cpp/src/wrapper/Operand.cpp | 2 +- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index 0053ba365a..359f8d86bf 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -5,7 +5,7 @@ server_config: mode: single # milvus deployment type: single, cluster db_config: - db_path: /opt/milvus # milvus data storage path + db_path: /tmp/milvus # milvus data storage path db_backend_url: http://127.0.0.1 # meta database uri index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB archive_disk_threshold: 512 # triger archive action if storage size exceed this value, unit: GB @@ -22,7 +22,10 @@ metric_config: license_config: # license configure - license_path: "/opt/milvus/system.license" # license file path + license_path: "/tmp/milvus/system.license" # license file path cache_config: # cache configure cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory + +engine_config: + nprobe: 3000 \ No newline at end of file diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index c306b65e1a..b051bfb07e 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -14,6 +14,8 @@ #include "Index.h" #include "faiss/index_io.h" #include "faiss/IndexIVF.h" +#include +#include "server/ServerConfig.h" namespace zilliz { namespace milvus { @@ -23,6 +25,32 @@ using std::string; using std::unordered_map; using std::vector; +class Nprobe { + public: + static Nprobe &GetInstance() { + static Nprobe instance; + return instance; + } + + void SelectNprobe() { + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); + nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000); + } + + size_t GetNprobe() { + return nprobe_; + } + + private: + Nprobe() : nprobe_(1000) { SelectNprobe(); } + + private: + size_t nprobe_; +}; + + Index::Index(const std::shared_ptr &raw_index) { index_ = raw_index; dim = index_->d; @@ -57,7 +85,8 @@ bool Index::add_with_ids(idx_t n, const float *xdata, const long *xids) { bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const { try { if(auto ivf_index = std::dynamic_pointer_cast(index_)) { - ivf_index->nprobe = 100; + ivf_index->nprobe = Nprobe::GetInstance().GetNprobe(); + std::cout << "nprobe = " << ivf_index->nprobe << std::endl; } index_->search(n, data, k, distances, labels); } diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp index 30e31067fd..25341676a6 100644 --- a/cpp/src/wrapper/Operand.cpp +++ b/cpp/src/wrapper/Operand.cpp @@ -39,7 +39,7 @@ string Operand::get_index_type(const int &nb) { } case IVF: { index_str += (ncent != 0 ? index_type + std::to_string(ncent) : - index_type + std::to_string(int(nb / 1000000.0 * 1638))); + index_type + std::to_string(int(nb / 1000000.0 * 16384))); break; } case IDMAP: { From 3d9a0df2bda8434b708656d46ff968616a03fb63 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 2 Jul 2019 19:58:15 +0800 Subject: [PATCH 08/19] update README.md Former-commit-id: e28550bbbf254514b93e318a7aa8798274e5e55d --- cpp/README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/README.md b/cpp/README.md index c54797b03f..d13e6b7fcd 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -1,13 +1,12 @@ ### Compilation #### Step 1: install necessery tools - Install MySQL centos7 : - yum install gfortran qt4 flex bison mysql-devel + yum install gfortran qt4 flex bison mysql-devel mysql ubuntu16.04 : - sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev + sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link: @@ -56,10 +55,10 @@ If you encounter the following error when building: ### Launch server Set config in cpp/conf/server_config.yaml -Add milvus/bin/lib to LD_LIBRARY_PATH +Add milvus/lib to LD_LIBRARY_PATH ``` -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/bin/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib ``` Then launch server with config: From c480cb8b7f38d581287f72ad52cf77864b609ed5 Mon Sep 17 00:00:00 2001 From: jinhai Date: Tue, 2 Jul 2019 20:06:00 +0800 Subject: [PATCH 09/19] Fix typo Former-commit-id: 1bccdd0999347c1fa76bdef446e9a7a2109ef66a --- cpp/src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp index bfdafcc1c3..ec536c2ee2 100644 --- a/cpp/src/main.cpp +++ b/cpp/src/main.cpp @@ -26,7 +26,7 @@ using namespace zilliz::milvus; int main(int argc, char *argv[]) { - std::cout << std::endl << "Welcome to use Milvus by Zillz!" << std::endl; + std::cout << std::endl << "Welcome to use Milvus by Zilliz!" << std::endl; std::cout << "Milvus " << BUILD_TYPE << " version: v" << MILVUS_VERSION << " built at " << BUILD_TIME << std::endl; signal(SIGINT, server::SignalUtil::HandleSignal); From 87af3d632de89fc976d2a6b7045b72ac1f1ca010 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Tue, 2 Jul 2019 20:13:10 +0800 Subject: [PATCH 10/19] add engine config Former-commit-id: 326501e3adc70b88fe3b8304ba39920fb11cf2d0 --- cpp/src/server/ServerConfig.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index f337275a46..412581bc1f 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -43,6 +43,9 @@ static const std::string CONFIG_METRIC_COLLECTOR = "collector"; static const std::string CONFIG_PROMETHEUS = "prometheus_config"; static const std::string CONFIG_METRIC_PROMETHEUS_PORT = "port"; +static const std::string CONFIG_ENGINE = "engine_config"; +static const std::string CONFIG_NPROBE = "nprobe"; + class ServerConfig { public: static ServerConfig &GetInstance(); From 380363f477b7ddb7f3181fa56369bb516ebbc04d Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Tue, 2 Jul 2019 20:24:41 +0800 Subject: [PATCH 11/19] update Former-commit-id: 99f1f53c6f2b1775f78acf0ba5560a112c68140b --- cpp/src/wrapper/Index.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index b051bfb07e..57c462a201 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -14,7 +14,7 @@ #include "Index.h" #include "faiss/index_io.h" #include "faiss/IndexIVF.h" -#include +#include "faiss/IVFlib.h" #include "server/ServerConfig.h" namespace zilliz { @@ -86,7 +86,6 @@ bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long * try { if(auto ivf_index = std::dynamic_pointer_cast(index_)) { ivf_index->nprobe = Nprobe::GetInstance().GetNprobe(); - std::cout << "nprobe = " << ivf_index->nprobe << std::endl; } index_->search(n, data, k, distances, labels); } From 4aabe33207c68c84e1c94af7d59033eea935b3da Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 3 Jul 2019 11:09:17 +0800 Subject: [PATCH 12/19] update IVF nprobe Former-commit-id: 41aa77c14de1db37cfb1a177ba223de90829e003 --- cpp/conf/server_config.template | 5 ++++- cpp/src/db/EngineFactory.cpp | 34 +++++++++++++++++++++-------- cpp/src/db/ExecutionEngine.h | 2 ++ cpp/src/db/FaissExecutionEngine.cpp | 30 ++++++++++++++++++++++++- cpp/src/db/FaissExecutionEngine.h | 9 ++++---- cpp/src/wrapper/Index.cpp | 29 ------------------------ 6 files changed, 65 insertions(+), 44 deletions(-) diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index c2ed775601..0383e00b53 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -30,4 +30,7 @@ license_config: # license configure license_path: "@MILVUS_DB_PATH@/system.license" # license file path cache_config: # cache configure - cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory \ No newline at end of file + cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory + +engine_config: + nprobe: 10 \ No newline at end of file diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp index 26ef639c88..bacce70ce4 100644 --- a/cpp/src/db/EngineFactory.cpp +++ b/cpp/src/db/EngineFactory.cpp @@ -7,23 +7,39 @@ #include "FaissExecutionEngine.h" #include "Log.h" + namespace zilliz { namespace milvus { namespace engine { ExecutionEnginePtr EngineFactory::Build(uint16_t dimension, - const std::string& location, - EngineType type) { - switch(type) { - case EngineType::FAISS_IDMAP: - return ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IDMap", "IDMap,Flat")); - case EngineType::FAISS_IVFFLAT: - return ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IVF", "IDMap,Flat")); - default: - ENGINE_LOG_ERROR << "Unsupportted engine type"; + const std::string &location, + EngineType type) { + + ExecutionEnginePtr execution_engine_ptr; + + switch (type) { + case EngineType::FAISS_IDMAP: { + execution_engine_ptr = + ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IDMap", "IDMap,Flat")); + break; + } + + case EngineType::FAISS_IVFFLAT: { + execution_engine_ptr = + ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IVF", "IDMap,Flat")); + break; + } + + default: { + ENGINE_LOG_ERROR << "Unsupported engine type"; return nullptr; + } } + + execution_engine_ptr->Init(); + return execution_engine_ptr; } } diff --git a/cpp/src/db/ExecutionEngine.h b/cpp/src/db/ExecutionEngine.h index d2b4d01e67..f8c05f6f9d 100644 --- a/cpp/src/db/ExecutionEngine.h +++ b/cpp/src/db/ExecutionEngine.h @@ -50,6 +50,8 @@ public: virtual std::shared_ptr BuildIndex(const std::string&) = 0; virtual Status Cache() = 0; + + virtual Status Init() = 0; }; using ExecutionEnginePtr = std::shared_ptr; diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index 9dfdd978c3..20bd530e78 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -13,6 +13,7 @@ #include #include #include +#include "faiss/IndexIVF.h" #include "metrics/Metrics.h" @@ -135,7 +136,16 @@ Status FaissExecutionEngine::Search(long n, float *distances, long *labels) const { auto start_time = METRICS_NOW_TIME; - pIndex_->search(n, data, k, distances, labels); + + std::shared_ptr ivf_index = std::dynamic_pointer_cast(pIndex_); + if(ivf_index) { + ENGINE_LOG_DEBUG << "Index type: IVFFLAT nProbe: " << nprobe_; + ivf_index->nprobe = nprobe_; + ivf_index->search(n, data, k, distances, labels); + } else { + pIndex_->search(n, data, k, distances, labels); + } + auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time,end_time); server::Metrics::GetInstance().QueryIndexTypePerSecondSet(build_index_type_, double(n)/double(total_time)); @@ -149,6 +159,24 @@ Status FaissExecutionEngine::Cache() { return Status::OK(); } +Status FaissExecutionEngine::Init() { + + if(build_index_type_ == "IVF") { + + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); + nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000); + + } else if(build_index_type_ == "IDMap") { + ; + } else { + return Status::Error("Wrong index type: ", build_index_type_); + } + + return Status::OK(); +} + } // namespace engine } // namespace milvus diff --git a/cpp/src/db/FaissExecutionEngine.h b/cpp/src/db/FaissExecutionEngine.h index 5667df34ea..f9f37ad978 100644 --- a/cpp/src/db/FaissExecutionEngine.h +++ b/cpp/src/db/FaissExecutionEngine.h @@ -6,14 +6,11 @@ #pragma once #include "ExecutionEngine.h" +#include "faiss/Index.h" #include #include -namespace faiss { - class Index; -} - namespace zilliz { namespace milvus { namespace engine { @@ -58,12 +55,16 @@ public: Status Cache() override; + Status Init() override; + protected: std::shared_ptr pIndex_; std::string location_; std::string build_index_type_; std::string raw_index_type_; + + size_t nprobe_ = 0; }; diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index 57c462a201..18e20d830a 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -25,32 +25,6 @@ using std::string; using std::unordered_map; using std::vector; -class Nprobe { - public: - static Nprobe &GetInstance() { - static Nprobe instance; - return instance; - } - - void SelectNprobe() { - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); - nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000); - } - - size_t GetNprobe() { - return nprobe_; - } - - private: - Nprobe() : nprobe_(1000) { SelectNprobe(); } - - private: - size_t nprobe_; -}; - - Index::Index(const std::shared_ptr &raw_index) { index_ = raw_index; dim = index_->d; @@ -84,9 +58,6 @@ bool Index::add_with_ids(idx_t n, const float *xdata, const long *xids) { bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const { try { - if(auto ivf_index = std::dynamic_pointer_cast(index_)) { - ivf_index->nprobe = Nprobe::GetInstance().GetNprobe(); - } index_->search(n, data, k, distances, labels); } catch (std::exception &e) { From 81e13fd1cbd134b40cd0f76461e43fd0ca00faab Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 3 Jul 2019 11:18:34 +0800 Subject: [PATCH 13/19] change CHANGELOG Former-commit-id: f1b0bebcca37b9fb33efc3faea4b9db66a9a4a7e --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index be89f31d11..bca5826ccb 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -62,6 +62,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-105 - Add MySQL - MS-130 - Add prometheus_test - MS-144 - Add nprobe config +- MS-147 - Enable IVF ## Task - MS-74 - Change README.md in cpp From 00acbd9ff62dde53858451e74f4376f213867698 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 3 Jul 2019 11:50:47 +0800 Subject: [PATCH 14/19] update R Former-commit-id: 668ee056c2b0de6a56465be8fb78f910935c0e5c --- cpp/src/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index b4d50d4822..1a770ef2ad 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -72,7 +72,6 @@ set(third_party_libs arrow jemalloc_pic faiss -# libgpufaiss.a openblas lapack easyloggingpp @@ -105,8 +104,6 @@ if (GPU_VERSION STREQUAL "ON") pthread libgomp.a libgfortran.a -# cudart -# cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() From 88110a0d65d7d2d7956b95f49d6f0b45d5bf83c6 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 3 Jul 2019 11:53:42 +0800 Subject: [PATCH 15/19] update ... Former-commit-id: 6b721737d082237cd84128228d187f7b0a336d9b --- cpp/cmake/DefineOptions.cmake | 3 --- cpp/src/CMakeLists.txt | 1 - 2 files changed, 4 deletions(-) diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index 1873688a17..82259d2eb5 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -72,9 +72,6 @@ define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF) define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF) -#define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against" -# "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61") - define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF) define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 1a770ef2ad..6a7fb6835e 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -137,7 +137,6 @@ endif () cuda_add_library(milvus_engine STATIC ${engine_files}) -#cuda_add_library(milvus_engine SHARED ${engine_files}) target_link_libraries(milvus_engine ${engine_libs} ${third_party_libs}) add_library(metrics STATIC ${metrics_files}) From 697427e7ea6691ff5bb5632973d8e28aa7e196d6 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 3 Jul 2019 15:17:28 +0800 Subject: [PATCH 16/19] Revert "Merge branch 'integrate_knowhere-0.3.1' into 'branch-0.3.1'" This reverts merge request !152 Former-commit-id: 739bf84961e2d6daa2ebbe2f45882efd2acaa89f --- .gitmodules | 4 - cpp/CHANGELOG.md | 1 - cpp/README.md | 12 +- cpp/cmake/DefineOptions.cmake | 13 +- cpp/cmake/ThirdPartyPackages.cmake | 57 ------- cpp/src/CMakeLists.txt | 17 +-- cpp/src/wrapper/knowhere/data_transfer.cpp | 48 ------ cpp/src/wrapper/knowhere/data_transfer.h | 24 --- cpp/src/wrapper/knowhere/vec_impl.cpp | 95 ------------ cpp/src/wrapper/knowhere/vec_impl.h | 38 ----- cpp/src/wrapper/knowhere/vec_index.cpp | 43 ------ cpp/src/wrapper/knowhere/vec_index.h | 56 ------- cpp/thirdparty/knowhere | 1 - cpp/unittest/CMakeLists.txt | 3 - cpp/unittest/index_wrapper/CMakeLists.txt | 23 --- cpp/unittest/index_wrapper/knowhere_test.cpp | 149 ------------------- cpp/unittest/index_wrapper/utils.cpp | 81 ---------- cpp/unittest/index_wrapper/utils.h | 61 -------- 18 files changed, 18 insertions(+), 708 deletions(-) delete mode 100644 .gitmodules delete mode 100644 cpp/src/wrapper/knowhere/data_transfer.cpp delete mode 100644 cpp/src/wrapper/knowhere/data_transfer.h delete mode 100644 cpp/src/wrapper/knowhere/vec_impl.cpp delete mode 100644 cpp/src/wrapper/knowhere/vec_impl.h delete mode 100644 cpp/src/wrapper/knowhere/vec_index.cpp delete mode 100644 cpp/src/wrapper/knowhere/vec_index.h delete mode 160000 cpp/thirdparty/knowhere delete mode 100644 cpp/unittest/index_wrapper/CMakeLists.txt delete mode 100644 cpp/unittest/index_wrapper/knowhere_test.cpp delete mode 100644 cpp/unittest/index_wrapper/utils.cpp delete mode 100644 cpp/unittest/index_wrapper/utils.h diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 297cf0e592..0000000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ -[submodule "cpp/thirdparty/knowhere"] - path = cpp/thirdparty/knowhere - url = git@192.168.1.105:xiaojun.lin/knowhere.git - branch = develop diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 5d039d6755..bca5826ccb 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -61,7 +61,6 @@ Please mark all change in change log and use the ticket from JIRA. - MS-97 - Add S3 SDK for MinIO Storage - MS-105 - Add MySQL - MS-130 - Add prometheus_test -- MS-143 - Intergrate Knowhere but not activate - MS-144 - Add nprobe config - MS-147 - Enable IVF diff --git a/cpp/README.md b/cpp/README.md index d13e6b7fcd..1b2f507db2 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -1,12 +1,13 @@ ### Compilation #### Step 1: install necessery tools + Install MySQL centos7 : - yum install gfortran qt4 flex bison mysql-devel mysql + yum install gfortran qt4 flex bison mysql-devel ubuntu16.04 : - sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client + sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link: @@ -20,9 +21,6 @@ cmake_build/src/milvus_server is the server cmake_build/src/libmilvus_engine.a is the static library - git submodule init - git submodule update - cd [sourcecode path]/cpp ./build.sh -t Debug ./build.sh -t Release @@ -55,10 +53,10 @@ If you encounter the following error when building: ### Launch server Set config in cpp/conf/server_config.yaml -Add milvus/lib to LD_LIBRARY_PATH +Add milvus/bin/lib to LD_LIBRARY_PATH ``` -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/bin/lib ``` Then launch server with config: diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index 82259d2eb5..147663d0db 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -68,17 +68,20 @@ define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON) define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON) -define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF) +define_option(MILVUS_WITH_FAISS "Build with FAISS library" ON) -define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF) +define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON) -define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF) +#define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against" +# "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61") + +define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" ON) define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF) -define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" OFF) +define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" ON) define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) @@ -98,8 +101,6 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) -define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" ON) - if(CMAKE_VERSION VERSION_LESS 3.7) set(MILVUS_WITH_ZSTD_DEFAULT OFF) else() diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index 2c6c61dbf4..9aa3f62124 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -23,7 +23,6 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES Easylogging++ FAISS GTest - Knowhere JSONCONS LAPACK Lz4 @@ -62,8 +61,6 @@ macro(build_dependency DEPENDENCY_NAME) build_gtest() elseif("${DEPENDENCY_NAME}" STREQUAL "LAPACK") build_lapack() - elseif("${DEPENDENCY_NAME}" STREQUAL "Knowhere") - build_knowhere() elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4") build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "MySQLPP") @@ -245,12 +242,6 @@ else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() -if(DEFINED ENV{MILVUS_KNOWHERE_URL}) - set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") -else() - set(KNOWHERE_SOURCE_URL "${CMAKE_SOURCE_DIR}/thirdparty/knowhere") -endif() - if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () @@ -650,54 +641,6 @@ if(MILVUS_WITH_BZ2) include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") endif() -# ---------------------------------------------------------------------- -# Knowhere - -macro(build_knowhere) - message(STATUS "Building knowhere from source") - set(KNOWHERE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep") - set(KNOWHERE_INCLUDE_DIR "${KNOWHERE_PREFIX}/include") - set(KNOWHERE_STATIC_LIB - "${KNOWHERE_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}knowhere${CMAKE_STATIC_LIBRARY_SUFFIX}") - - set(KNOWHERE_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - "-DCMAKE_INSTALL_PREFIX=${KNOWHERE_PREFIX}" - -DCMAKE_INSTALL_LIBDIR=lib - -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc - -DCMAKE_BUILD_TYPE=Release) - - externalproject_add(knowhere_ep - URL - ${KNOWHERE_SOURCE_URL} - ${EP_LOG_OPTIONS} - CMAKE_ARGS - ${KNOWHERE_CMAKE_ARGS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_BYPRODUCTS - ${KNOWHERE_STATIC_LIB}) - - file(MAKE_DIRECTORY "${KNOWHERE_INCLUDE_DIR}") - add_library(knowhere STATIC IMPORTED) - set_target_properties( - knowhere - PROPERTIES IMPORTED_LOCATION "${KNOWHERE_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${KNOWHERE_INCLUDE_DIR}") - - add_dependencies(knowhere knowhere_ep) -endmacro() - -if(MILVUS_WITH_KNOWHERE) - resolve_dependency(Knowhere) - - get_target_property(KNOWHERE_INCLUDE_DIR knowhere INTERFACE_INCLUDE_DIRECTORIES) - link_directories(SYSTEM "${KNOWHERE_PREFIX}/lib") - include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}") - include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}/SPTAG/AnnService") -endif() - # ---------------------------------------------------------------------- # Easylogging++ diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 6a7fb6835e..d0029d5175 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -12,7 +12,6 @@ aux_source_directory(utils utils_files) aux_source_directory(db db_files) aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) -aux_source_directory(wrapper/knowhere knowhere_files) aux_source_directory(db/scheduler scheduler_files) aux_source_directory(db/scheduler/context scheduler_context_files) @@ -51,7 +50,6 @@ set(engine_files ${wrapper_files} # metrics/Metrics.cpp ${metrics_files} - ${knowhere_files} ) set(get_sys_info_files @@ -67,17 +65,14 @@ include_directories(thrift/gen-cpp) include_directories(/usr/include/mysql) set(third_party_libs - knowhere - SPTAGLibStatic - arrow - jemalloc_pic - faiss - openblas - lapack easyloggingpp sqlite thrift yaml-cpp + libgpufaiss.a + faiss + lapack + openblas prometheus-cpp-push prometheus-cpp-pull prometheus-cpp-core @@ -89,8 +84,6 @@ set(third_party_libs snappy zlib zstd - cudart - cublas mysqlpp ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) @@ -104,6 +97,8 @@ if (GPU_VERSION STREQUAL "ON") pthread libgomp.a libgfortran.a + cudart + cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp deleted file mode 100644 index af5ad212e4..0000000000 --- a/cpp/src/wrapper/knowhere/data_transfer.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#include "data_transfer.h" - - -namespace zilliz { -namespace vecwise { -namespace engine { - -using namespace zilliz::knowhere; - -DatasetPtr -GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) { - std::vector shape{nb, dim}; - auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); - - auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); - std::vector arrays{id_array}; - std::vector array_fields{ConstructInt64Field("id")}; - auto array_schema = std::make_shared(tensor_fields); - - auto dataset = std::make_shared(std::move(arrays), array_schema, - std::move(tensors), tensor_schema); - return dataset; -} - -DatasetPtr -GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) { - std::vector shape{nb, dim}; - auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); - - auto dataset = std::make_shared(std::move(tensors), tensor_schema); - return dataset; -} - -} -} -} diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h deleted file mode 100644 index c99cd1c742..0000000000 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ /dev/null @@ -1,24 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include "knowhere/adapter/structure.h" - - -namespace zilliz { -namespace vecwise { -namespace engine { - -extern zilliz::knowhere::DatasetPtr -GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids); - -extern zilliz::knowhere::DatasetPtr -GenDataset(const int64_t &nb, const int64_t &dim, const float *xb); - -} -} -} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp deleted file mode 100644 index e24d470acc..0000000000 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ /dev/null @@ -1,95 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#include "knowhere/index/index.h" -#include "knowhere/index/index_model.h" -#include "knowhere/index/index_type.h" -#include "knowhere/adapter/sptag.h" -#include "knowhere/common/tensor.h" - -#include "vec_impl.h" -#include "data_transfer.h" - - -namespace zilliz { -namespace vecwise { -namespace engine { - -using namespace zilliz::knowhere; - -void VecIndexImpl::BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) { - auto d = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, d, xb, ids); - - auto preprocessor = index_->BuildPreprocessor(dataset, cfg); - index_->set_preprocessor(preprocessor); - auto model = index_->Train(dataset, cfg); - index_->set_index_model(model); - index_->Add(dataset, cfg); -} - -void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { - // TODO(linxj): Assert index is trained; - - auto d = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, d, xb, ids); - - index_->Add(dataset, cfg); -} - -void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { - // TODO: Assert index is trained; - - auto d = cfg["dim"].as(); - auto k = cfg["k"].as(); - auto dataset = GenDataset(nq, d, xq); - - Config search_cfg; - auto res = index_->Search(dataset, cfg); - auto ids_array = res->array()[0]; - auto dis_array = res->array()[1]; - - //{ - // auto& ids = ids_array; - // auto& dists = dis_array; - // std::stringstream ss_id; - // std::stringstream ss_dist; - // for (auto i = 0; i < 10; i++) { - // for (auto j = 0; j < k; ++j) { - // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; - // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; - // } - // ss_id << std::endl; - // ss_dist << std::endl; - // } - // std::cout << "id\n" << ss_id.str() << std::endl; - // std::cout << "dist\n" << ss_dist.str() << std::endl; - //} - - auto p_ids = ids_array->data()->GetValues(1, 0); - auto p_dist = ids_array->data()->GetValues(1, 0); - - // TODO(linxj): avoid copy here. - memcpy(ids, p_ids, sizeof(int64_t) * nq * k); - memcpy(dist, p_dist, sizeof(float) * nq * k); -} - -zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { - return index_->Serialize(); -} - -void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { - index_->Load(index_binary); -} - -} -} -} diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h deleted file mode 100644 index 25f7d16548..0000000000 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ /dev/null @@ -1,38 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include "knowhere/index/vector_index/vector_index.h" - -#include "vec_index.h" - - -namespace zilliz { -namespace vecwise { -namespace engine { - -class VecIndexImpl : public VecIndex { - public: - explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; - void BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) override; - void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; - zilliz::knowhere::BinarySet Serialize() override; - void Load(const zilliz::knowhere::BinarySet &index_binary) override; - void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; - - private: - std::shared_ptr index_ = nullptr; -}; - -} -} -} diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp deleted file mode 100644 index 171388d0af..0000000000 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// -#include "knowhere/index/vector_index/ivf.h" -#include "knowhere/index/vector_index/gpu_ivf.h" -#include "knowhere/index/vector_index/cpu_kdt_rng.h" - -#include "vec_index.h" -#include "vec_impl.h" - - -namespace zilliz { -namespace vecwise { -namespace engine { - -// TODO(linxj): index_type => enum struct -VecIndexPtr GetVecIndexFactory(const std::string &index_type) { - std::shared_ptr index; - if (index_type == "IVF") { - index = std::make_shared(); - } else if (index_type == "GPUIVF") { - index = std::make_shared(0); - } else if (index_type == "SPTAG") { - index = std::make_shared(); - } - // TODO(linxj): Support NSG - //else if (index_type == "NSG") { - // index = std::make_shared(); - //} - return std::make_shared(index); -} - -VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { - auto index = GetVecIndexFactory(index_type); - index->Load(index_binary); - return index; -} - -} -} -} diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h deleted file mode 100644 index b03c43a36b..0000000000 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ /dev/null @@ -1,56 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include -#include - -#include "knowhere/common/config.h" -#include "knowhere/common/binary_set.h" - - -namespace zilliz { -namespace vecwise { -namespace engine { - -// TODO(linxj): jsoncons => rapidjson or other. -using Config = zilliz::knowhere::Config; - -class VecIndex { - public: - virtual void BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt = 0, - const float *xt = nullptr) = 0; - - virtual void Add(const long &nb, - const float *xb, - const long *ids, - const Config &cfg = Config()) = 0; - - virtual void Search(const long &nq, - const float *xq, - float *dist, - long *ids, - const Config &cfg = Config()) = 0; - - virtual zilliz::knowhere::BinarySet Serialize() = 0; - - virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; -}; - -using VecIndexPtr = std::shared_ptr; - -extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); - -extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); - -} -} -} diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere deleted file mode 160000 index 32187bacba..0000000000 --- a/cpp/thirdparty/knowhere +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 32187bacbaac0460676f5f6aa54ad904f5f2b5bc diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index db327ef10c..38046617ae 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -41,9 +41,6 @@ set(unittest_libs add_subdirectory(server) add_subdirectory(db) -#add_subdirectory(faiss_wrapper) -add_subdirectory(index_wrapper) -add_subdirectory(license) add_subdirectory(faiss_wrapper) #add_subdirectory(license) add_subdirectory(metrics) diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt deleted file mode 100644 index 51bd97b575..0000000000 --- a/cpp/unittest/index_wrapper/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") -link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") - -aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) - -set(helper - utils.cpp) - -set(knowhere_libs - knowhere - SPTAGLibStatic - arrow - jemalloc_pic - faiss - openblas - lapack - tbb - cudart - cublas - ) - -add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) -target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp deleted file mode 100644 index 58b0d5a4b2..0000000000 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ /dev/null @@ -1,149 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#include - -#include - -#include "utils.h" - - -using namespace zilliz::vecwise::engine; -using namespace zilliz::knowhere; - -using ::testing::TestWithParam; -using ::testing::Values; -using ::testing::Combine; - - -class KnowhereWrapperTest - : public TestWithParam<::std::tuple> { - protected: - void SetUp() override { - std::string generator_type; - std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam(); - - //auto generator = GetGenerateFactory(generator_type); - auto generator = std::make_shared(); - generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); - - index_ = GetVecIndexFactory(index_type); - } - - protected: - std::string index_type; - Config train_cfg; - Config search_cfg; - - int dim = 64; - int nb = 10000; - int nq = 10; - int k = 10; - std::vector xb; - std::vector xq; - std::vector ids; - - VecIndexPtr index_ = nullptr; - - // Ground Truth - std::vector gt_ids; -}; - -INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, - Values( - // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] - std::make_tuple("IVF", "Default", - 64, 10000, 10, 10, - Config::object{{"nlist", 100}, {"dim", 64}}, - Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} - ), - std::make_tuple("SPTAG", "Default", - 64, 10000, 10, 10, - Config::object{{"TPTNumber", 1}, {"dim", 64}}, - Config::object{{"dim", 64}, {"k", 10}} - ) - ) -); - -void AssertAnns(const std::vector >, - const std::vector &res, - const int &nq, - const int &k) { - EXPECT_EQ(res.size(), nq * k); - - for (auto i = 0; i < nq; i++) { - EXPECT_EQ(gt[i * k], res[i * k]); - } - - int match = 0; - for (int i = 0; i < nq; ++i) { - for (int j = 0; j < k; ++j) { - for (int l = 0; l < k; ++l) { - if (gt[i * nq + j] == res[i * nq + l]) match++; - } - } - } - - // TODO(linxj): percision check - EXPECT_GT(float(match/nq*k), 0.5); -} - -TEST_P(KnowhereWrapperTest, base_test) { - std::vector res_ids; - float *D = new float[k * nq]; - res_ids.resize(nq * k); - - index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); - index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); - AssertAnns(gt_ids, res_ids, nq, k); - delete[] D; -} - -TEST_P(KnowhereWrapperTest, serialize_test) { - std::vector res_ids; - float *D = new float[k * nq]; - res_ids.resize(nq * k); - - index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); - index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); - AssertAnns(gt_ids, res_ids, nq, k); - - { - auto binaryset = index_->Serialize(); - int fileno = 0; - const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; - std::vector filename_list; - std::vector> meta_list; - for (auto &iter: binaryset.binary_map_) { - const std::string &filename = base_name + std::to_string(fileno); - FileIOWriter writer(filename); - writer(iter.second.data, iter.second.size); - - meta_list.push_back(std::make_pair(iter.first, iter.second.size)); - filename_list.push_back(filename); - ++fileno; - } - - BinarySet load_data_list; - for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { - auto bin_size = meta_list[i].second; - FileIOReader reader(filename_list[i]); - std::vector load_data(bin_size); - reader(load_data.data(), bin_size); - load_data_list.Append(meta_list[i].first, load_data); - } - - - res_ids.clear(); - res_ids.resize(nq * k); - auto new_index = GetVecIndexFactory(index_type); - new_index->Load(load_data_list); - new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); - AssertAnns(gt_ids, res_ids, nq, k); - } - - delete[] D; -} diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp deleted file mode 100644 index e228ae001d..0000000000 --- a/cpp/unittest/index_wrapper/utils.cpp +++ /dev/null @@ -1,81 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#include - -#include "utils.h" - - -DataGenPtr GetGenerateFactory(const std::string &gen_type) { - std::shared_ptr generator; - if (gen_type == "default") { - generator = std::make_shared(); - } - return generator; -} - -void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, - float *xb, float *xq, long *ids, - const int &k, long *gt_ids) { - for (auto i = 0; i < nb; ++i) { - for (auto j = 0; j < dim; ++j) { - //p_data[i * d + j] = float(base + i); - xb[i * dim + j] = drand48(); - } - xb[dim * i] += i / 1000.; - ids[i] = i; - } - for (size_t i = 0; i < nq * dim; ++i) { - xq[i] = xb[i]; - } - - faiss::IndexFlatL2 index(dim); - //index.add_with_ids(nb, xb, ids); - index.add(nb, xb); - float *D = new float[k * nq]; - index.search(nq, xq, k, D, gt_ids); -} - -void DataGenBase::GenData(const int &dim, - const int &nb, - const int &nq, - std::vector &xb, - std::vector &xq, - std::vector &ids, - const int &k, - std::vector >_ids) { - xb.resize(nb * dim); - xq.resize(nq * dim); - ids.resize(nb); - gt_ids.resize(nq * k); - GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); -} - -FileIOReader::FileIOReader(const std::string &fname) { - name = fname; - fs = std::fstream(name, std::ios::in | std::ios::binary); -} - -FileIOReader::~FileIOReader() { - fs.close(); -} - -size_t FileIOReader::operator()(void *ptr, size_t size) { - fs.read(reinterpret_cast(ptr), size); -} - -FileIOWriter::FileIOWriter(const std::string &fname) { - name = fname; - fs = std::fstream(name, std::ios::out | std::ios::binary); -} - -FileIOWriter::~FileIOWriter() { - fs.close(); -} - -size_t FileIOWriter::operator()(void *ptr, size_t size) { - fs.write(reinterpret_cast(ptr), size); -} diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h deleted file mode 100644 index bbc52a011b..0000000000 --- a/cpp/unittest/index_wrapper/utils.h +++ /dev/null @@ -1,61 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include -#include -#include -#include -#include - - -class DataGenBase; - -using DataGenPtr = std::shared_ptr; - -extern DataGenPtr GetGenerateFactory(const std::string &gen_type); - - -class DataGenBase { - public: - virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, - const int &k, long *gt_ids); - - virtual void GenData(const int &dim, - const int &nb, - const int &nq, - std::vector &xb, - std::vector &xq, - std::vector &ids, - const int &k, - std::vector >_ids); -}; - - -class SanityCheck : public DataGenBase { - public: - void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, - const int &k, long *gt_ids) override; -}; - -struct FileIOWriter { - std::fstream fs; - std::string name; - - FileIOWriter(const std::string &fname); - ~FileIOWriter(); - size_t operator()(void *ptr, size_t size); -}; - -struct FileIOReader { - std::fstream fs; - std::string name; - - FileIOReader(const std::string &fname); - ~FileIOReader(); - size_t operator()(void *ptr, size_t size); -}; From ad6e164e19a05c52763229e559f5de68057d829a Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 3 Jul 2019 15:54:15 +0800 Subject: [PATCH 17/19] setting script file permissions to 700 Former-commit-id: c14d9e457795129f684cf9e25baa6e0a9f39ad8f --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4d8f43b5b9..c58d63178a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -145,6 +145,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.template ${CMAKE_CURR install(FILES scripts/start_server.sh scripts/stop_server.sh + FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ DESTINATION scripts) install(FILES From 8b71d45ac70be764d37fb89752684e76f4a019ff Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 3 Jul 2019 16:11:18 +0800 Subject: [PATCH 18/19] setting script file permissions to 744 Former-commit-id: ba085a62f7a15665cdfa5a481c9470ecdda66d1e --- cpp/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c58d63178a..18abb9a72c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -142,12 +142,12 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/server_config.template ${CMAKE_C configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.template ${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.conf) #install -install(FILES - scripts/start_server.sh - scripts/stop_server.sh +install(DIRECTORY scripts + DESTINATION scripts FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - DESTINATION - scripts) + GROUP_EXECUTE GROUP_READ + WORLD_EXECUTE WORLD_READ + FILES_MATCHING PATTERN "*.sh") install(FILES conf/server_config.yaml conf/log_config.conf From 87a1f4838dc79e9b72b99541b697bf145a4a70ac Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 3 Jul 2019 16:35:19 +0800 Subject: [PATCH 19/19] setting script file permissions to 755 Former-commit-id: 421486f834d6c65215fca89572b3327f6dcbcac5 --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 18abb9a72c..3a0e8900a6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -142,7 +142,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/server_config.template ${CMAKE_C configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.template ${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.conf) #install -install(DIRECTORY scripts +install(DIRECTORY scripts/ DESTINATION scripts FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ