From f7287d74ea2e8ba734c94740b30932b7482e5965 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Thu, 4 Jul 2019 14:47:23 +0800 Subject: [PATCH 01/91] MS-154 1. Add Submodule knowhere and specify branch 2. Integrate Knowhere, update many cmakelist 3. dev knowhere-wrapper 4. dev knowhere-wrapper unittest Former-commit-id: 783a85562b2b5db3b89920b0c68de133a577f245 --- .gitmodules | 4 + cpp/CHANGELOG.md | 11 ++ cpp/README.md | 12 +- cpp/cmake/DefineOptions.cmake | 13 +- cpp/cmake/ThirdPartyPackages.cmake | 57 +++++++ cpp/src/CMakeLists.txt | 17 ++- cpp/src/wrapper/knowhere/data_transfer.cpp | 48 ++++++ cpp/src/wrapper/knowhere/data_transfer.h | 24 +++ cpp/src/wrapper/knowhere/vec_impl.cpp | 95 ++++++++++++ cpp/src/wrapper/knowhere/vec_impl.h | 38 +++++ cpp/src/wrapper/knowhere/vec_index.cpp | 43 ++++++ cpp/src/wrapper/knowhere/vec_index.h | 56 +++++++ cpp/thirdparty/knowhere | 1 + cpp/unittest/CMakeLists.txt | 4 +- cpp/unittest/db/CMakeLists.txt | 4 +- cpp/unittest/index_wrapper/CMakeLists.txt | 23 +++ cpp/unittest/index_wrapper/knowhere_test.cpp | 149 +++++++++++++++++++ cpp/unittest/index_wrapper/utils.cpp | 81 ++++++++++ cpp/unittest/index_wrapper/utils.h | 61 ++++++++ cpp/unittest/metrics/CMakeLists.txt | 4 +- cpp/unittest/server/CMakeLists.txt | 6 +- 21 files changed, 727 insertions(+), 24 deletions(-) create mode 100644 .gitmodules create mode 100644 cpp/src/wrapper/knowhere/data_transfer.cpp create mode 100644 cpp/src/wrapper/knowhere/data_transfer.h create mode 100644 cpp/src/wrapper/knowhere/vec_impl.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_impl.h create mode 100644 cpp/src/wrapper/knowhere/vec_index.cpp create mode 100644 cpp/src/wrapper/knowhere/vec_index.h create mode 160000 cpp/thirdparty/knowhere create mode 100644 cpp/unittest/index_wrapper/CMakeLists.txt create mode 100644 cpp/unittest/index_wrapper/knowhere_test.cpp create mode 100644 cpp/unittest/index_wrapper/utils.cpp create mode 100644 cpp/unittest/index_wrapper/utils.h diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..297cf0e592 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "cpp/thirdparty/knowhere"] + path = cpp/thirdparty/knowhere + url = git@192.168.1.105:xiaojun.lin/knowhere.git + branch = develop diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index bca5826ccb..949a05c8db 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -3,6 +3,17 @@ Please mark all change in change log and use the ticket from JIRA. +# Milvus 0.3.2 (2019-07-10) + +## Bug + +## Improvement + +## New Feature +- MS-154 - Integrate knowhere + +## Task + # Milvus 0.3.1 (2019-07-10) ## Bug diff --git a/cpp/README.md b/cpp/README.md index 1b2f507db2..e656f1b367 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -1,13 +1,12 @@ ### Compilation #### Step 1: install necessery tools - Install MySQL centos7 : - yum install gfortran qt4 flex bison mysql-devel + yum install gfortran qt4 flex bison mysql-devel mysql ubuntu16.04 : - sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev + sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link: @@ -21,6 +20,9 @@ cmake_build/src/milvus_server is the server cmake_build/src/libmilvus_engine.a is the static library + git submodule init + git submodule update + cd [sourcecode path]/cpp ./build.sh -t Debug ./build.sh -t Release @@ -53,10 +55,10 @@ If you encounter the following error when building: ### Launch server Set config in cpp/conf/server_config.yaml -Add milvus/bin/lib to LD_LIBRARY_PATH +Add milvus/lib to LD_LIBRARY_PATH ``` -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/bin/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib ``` Then launch server with config: diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index 147663d0db..82259d2eb5 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -68,20 +68,17 @@ define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON) define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON) -define_option(MILVUS_WITH_FAISS "Build with FAISS library" ON) +define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF) -define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON) +define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF) -#define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against" -# "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61") - -define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" ON) +define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF) define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF) -define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" ON) +define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" OFF) define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) @@ -101,6 +98,8 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) +define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" ON) + if(CMAKE_VERSION VERSION_LESS 3.7) set(MILVUS_WITH_ZSTD_DEFAULT OFF) else() diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index 9aa3f62124..2c6c61dbf4 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -23,6 +23,7 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES Easylogging++ FAISS GTest + Knowhere JSONCONS LAPACK Lz4 @@ -61,6 +62,8 @@ macro(build_dependency DEPENDENCY_NAME) build_gtest() elseif("${DEPENDENCY_NAME}" STREQUAL "LAPACK") build_lapack() + elseif("${DEPENDENCY_NAME}" STREQUAL "Knowhere") + build_knowhere() elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4") build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "MySQLPP") @@ -242,6 +245,12 @@ else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() +if(DEFINED ENV{MILVUS_KNOWHERE_URL}) + set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") +else() + set(KNOWHERE_SOURCE_URL "${CMAKE_SOURCE_DIR}/thirdparty/knowhere") +endif() + if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () @@ -641,6 +650,54 @@ if(MILVUS_WITH_BZ2) include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") endif() +# ---------------------------------------------------------------------- +# Knowhere + +macro(build_knowhere) + message(STATUS "Building knowhere from source") + set(KNOWHERE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep") + set(KNOWHERE_INCLUDE_DIR "${KNOWHERE_PREFIX}/include") + set(KNOWHERE_STATIC_LIB + "${KNOWHERE_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}knowhere${CMAKE_STATIC_LIBRARY_SUFFIX}") + + set(KNOWHERE_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${KNOWHERE_PREFIX}" + -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + -DCMAKE_BUILD_TYPE=Release) + + externalproject_add(knowhere_ep + URL + ${KNOWHERE_SOURCE_URL} + ${EP_LOG_OPTIONS} + CMAKE_ARGS + ${KNOWHERE_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + ${KNOWHERE_STATIC_LIB}) + + file(MAKE_DIRECTORY "${KNOWHERE_INCLUDE_DIR}") + add_library(knowhere STATIC IMPORTED) + set_target_properties( + knowhere + PROPERTIES IMPORTED_LOCATION "${KNOWHERE_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${KNOWHERE_INCLUDE_DIR}") + + add_dependencies(knowhere knowhere_ep) +endmacro() + +if(MILVUS_WITH_KNOWHERE) + resolve_dependency(Knowhere) + + get_target_property(KNOWHERE_INCLUDE_DIR knowhere INTERFACE_INCLUDE_DIRECTORIES) + link_directories(SYSTEM "${KNOWHERE_PREFIX}/lib") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}") + include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}/SPTAG/AnnService") +endif() + # ---------------------------------------------------------------------- # Easylogging++ diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index d0029d5175..6a7fb6835e 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -12,6 +12,7 @@ aux_source_directory(utils utils_files) aux_source_directory(db db_files) aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) +aux_source_directory(wrapper/knowhere knowhere_files) aux_source_directory(db/scheduler scheduler_files) aux_source_directory(db/scheduler/context scheduler_context_files) @@ -50,6 +51,7 @@ set(engine_files ${wrapper_files} # metrics/Metrics.cpp ${metrics_files} + ${knowhere_files} ) set(get_sys_info_files @@ -65,14 +67,17 @@ include_directories(thrift/gen-cpp) include_directories(/usr/include/mysql) set(third_party_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack easyloggingpp sqlite thrift yaml-cpp - libgpufaiss.a - faiss - lapack - openblas prometheus-cpp-push prometheus-cpp-pull prometheus-cpp-core @@ -84,6 +89,8 @@ set(third_party_libs snappy zlib zstd + cudart + cublas mysqlpp ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) @@ -97,8 +104,6 @@ if (GPU_VERSION STREQUAL "ON") pthread libgomp.a libgfortran.a - cudart - cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp new file mode 100644 index 0000000000..af5ad212e4 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -0,0 +1,48 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); + std::vector arrays{id_array}; + std::vector array_fields{ConstructInt64Field("id")}; + auto array_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(arrays), array_schema, + std::move(tensors), tensor_schema); + return dataset; +} + +DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) { + std::vector shape{nb, dim}; + auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); + + auto dataset = std::make_shared(std::move(tensors), tensor_schema); + return dataset; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h new file mode 100644 index 0000000000..c99cd1c742 --- /dev/null +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -0,0 +1,24 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "knowhere/adapter/structure.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +extern zilliz::knowhere::DatasetPtr +GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids); + +extern zilliz::knowhere::DatasetPtr +GenDataset(const int64_t &nb, const int64_t &dim, const float *xb); + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp new file mode 100644 index 0000000000..e24d470acc --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -0,0 +1,95 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "knowhere/index/index.h" +#include "knowhere/index/index_model.h" +#include "knowhere/index/index_type.h" +#include "knowhere/adapter/sptag.h" +#include "knowhere/common/tensor.h" + +#include "vec_impl.h" +#include "data_transfer.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +using namespace zilliz::knowhere; + +void VecIndexImpl::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + auto d = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, d, xb, ids); + + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); + auto model = index_->Train(dataset, cfg); + index_->set_index_model(model); + index_->Add(dataset, cfg); +} + +void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { + // TODO(linxj): Assert index is trained; + + auto d = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, d, xb, ids); + + index_->Add(dataset, cfg); +} + +void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { + // TODO: Assert index is trained; + + auto d = cfg["dim"].as(); + auto k = cfg["k"].as(); + auto dataset = GenDataset(nq, d, xq); + + Config search_cfg; + auto res = index_->Search(dataset, cfg); + auto ids_array = res->array()[0]; + auto dis_array = res->array()[1]; + + //{ + // auto& ids = ids_array; + // auto& dists = dis_array; + // std::stringstream ss_id; + // std::stringstream ss_dist; + // for (auto i = 0; i < 10; i++) { + // for (auto j = 0; j < k; ++j) { + // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; + // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; + // } + // ss_id << std::endl; + // ss_dist << std::endl; + // } + // std::cout << "id\n" << ss_id.str() << std::endl; + // std::cout << "dist\n" << ss_dist.str() << std::endl; + //} + + auto p_ids = ids_array->data()->GetValues(1, 0); + auto p_dist = ids_array->data()->GetValues(1, 0); + + // TODO(linxj): avoid copy here. + memcpy(ids, p_ids, sizeof(int64_t) * nq * k); + memcpy(dist, p_dist, sizeof(float) * nq * k); +} + +zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { + return index_->Serialize(); +} + +void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { + index_->Load(index_binary); +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h new file mode 100644 index 0000000000..25f7d16548 --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -0,0 +1,38 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "knowhere/index/vector_index/vector_index.h" + +#include "vec_index.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +class VecIndexImpl : public VecIndex { + public: + explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; + void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; + zilliz::knowhere::BinarySet Serialize() override; + void Load(const zilliz::knowhere::BinarySet &index_binary) override; + void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; + + private: + std::shared_ptr index_ = nullptr; +}; + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp new file mode 100644 index 0000000000..171388d0af --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -0,0 +1,43 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" + +#include "vec_index.h" +#include "vec_impl.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +// TODO(linxj): index_type => enum struct +VecIndexPtr GetVecIndexFactory(const std::string &index_type) { + std::shared_ptr index; + if (index_type == "IVF") { + index = std::make_shared(); + } else if (index_type == "GPUIVF") { + index = std::make_shared(0); + } else if (index_type == "SPTAG") { + index = std::make_shared(); + } + // TODO(linxj): Support NSG + //else if (index_type == "NSG") { + // index = std::make_shared(); + //} + return std::make_shared(index); +} + +VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { + auto index = GetVecIndexFactory(index_type); + index->Load(index_binary); + return index; +} + +} +} +} diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h new file mode 100644 index 0000000000..b03c43a36b --- /dev/null +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -0,0 +1,56 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include + +#include "knowhere/common/config.h" +#include "knowhere/common/binary_set.h" + + +namespace zilliz { +namespace vecwise { +namespace engine { + +// TODO(linxj): jsoncons => rapidjson or other. +using Config = zilliz::knowhere::Config; + +class VecIndex { + public: + virtual void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt = 0, + const float *xt = nullptr) = 0; + + virtual void Add(const long &nb, + const float *xb, + const long *ids, + const Config &cfg = Config()) = 0; + + virtual void Search(const long &nq, + const float *xq, + float *dist, + long *ids, + const Config &cfg = Config()) = 0; + + virtual zilliz::knowhere::BinarySet Serialize() = 0; + + virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; +}; + +using VecIndexPtr = std::shared_ptr; + +extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); + +extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); + +} +} +} diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere new file mode 160000 index 0000000000..844e600834 --- /dev/null +++ b/cpp/thirdparty/knowhere @@ -0,0 +1 @@ +Subproject commit 844e600834df1eeafc6c7e5936338ae964bd1d41 diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 38046617ae..d0d158ec4a 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -29,7 +29,6 @@ set(unittest_libs easyloggingpp pthread metrics - openblas gfortran prometheus-cpp-pull prometheus-cpp-push @@ -41,7 +40,8 @@ set(unittest_libs add_subdirectory(server) add_subdirectory(db) -add_subdirectory(faiss_wrapper) +add_subdirectory(index_wrapper) +#add_subdirectory(faiss_wrapper) #add_subdirectory(license) add_subdirectory(metrics) add_subdirectory(storage) \ No newline at end of file diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 5bae9190f5..b2720f7006 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -36,8 +36,10 @@ set(db_test_src cuda_add_executable(db_test ${db_test_src}) set(db_libs - libgpufaiss.a + knowhere faiss + openblas + lapack cudart cublas sqlite3 diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt new file mode 100644 index 0000000000..51bd97b575 --- /dev/null +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -0,0 +1,23 @@ +include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") +link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) + +set(helper + utils.cpp) + +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + cudart + cublas + ) + +add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) +target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp new file mode 100644 index 0000000000..58b0d5a4b2 --- /dev/null +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -0,0 +1,149 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include + +#include "utils.h" + + +using namespace zilliz::vecwise::engine; +using namespace zilliz::knowhere; + +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::Combine; + + +class KnowhereWrapperTest + : public TestWithParam<::std::tuple> { + protected: + void SetUp() override { + std::string generator_type; + std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam(); + + //auto generator = GetGenerateFactory(generator_type); + auto generator = std::make_shared(); + generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); + + index_ = GetVecIndexFactory(index_type); + } + + protected: + std::string index_type; + Config train_cfg; + Config search_cfg; + + int dim = 64; + int nb = 10000; + int nq = 10; + int k = 10; + std::vector xb; + std::vector xq; + std::vector ids; + + VecIndexPtr index_ = nullptr; + + // Ground Truth + std::vector gt_ids; +}; + +INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, + Values( + // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] + std::make_tuple("IVF", "Default", + 64, 10000, 10, 10, + Config::object{{"nlist", 100}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} + ), + std::make_tuple("SPTAG", "Default", + 64, 10000, 10, 10, + Config::object{{"TPTNumber", 1}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}} + ) + ) +); + +void AssertAnns(const std::vector >, + const std::vector &res, + const int &nq, + const int &k) { + EXPECT_EQ(res.size(), nq * k); + + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(gt[i * k], res[i * k]); + } + + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (gt[i * nq + j] == res[i * nq + l]) match++; + } + } + } + + // TODO(linxj): percision check + EXPECT_GT(float(match/nq*k), 0.5); +} + +TEST_P(KnowhereWrapperTest, base_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + delete[] D; +} + +TEST_P(KnowhereWrapperTest, serialize_test) { + std::vector res_ids; + float *D = new float[k * nq]; + res_ids.resize(nq * k); + + index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); + index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + + { + auto binaryset = index_->Serialize(); + int fileno = 0; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + std::vector filename_list; + std::vector> meta_list; + for (auto &iter: binaryset.binary_map_) { + const std::string &filename = base_name + std::to_string(fileno); + FileIOWriter writer(filename); + writer(iter.second.data, iter.second.size); + + meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + filename_list.push_back(filename); + ++fileno; + } + + BinarySet load_data_list; + for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + auto bin_size = meta_list[i].second; + FileIOReader reader(filename_list[i]); + std::vector load_data(bin_size); + reader(load_data.data(), bin_size); + load_data_list.Append(meta_list[i].first, load_data); + } + + + res_ids.clear(); + res_ids.resize(nq * k); + auto new_index = GetVecIndexFactory(index_type); + new_index->Load(load_data_list); + new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); + AssertAnns(gt_ids, res_ids, nq, k); + } + + delete[] D; +} diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp new file mode 100644 index 0000000000..e228ae001d --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.cpp @@ -0,0 +1,81 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "utils.h" + + +DataGenPtr GetGenerateFactory(const std::string &gen_type) { + std::shared_ptr generator; + if (gen_type == "default") { + generator = std::make_shared(); + } + return generator; +} + +void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, + float *xb, float *xq, long *ids, + const int &k, long *gt_ids) { + for (auto i = 0; i < nb; ++i) { + for (auto j = 0; j < dim; ++j) { + //p_data[i * d + j] = float(base + i); + xb[i * dim + j] = drand48(); + } + xb[dim * i] += i / 1000.; + ids[i] = i; + } + for (size_t i = 0; i < nq * dim; ++i) { + xq[i] = xb[i]; + } + + faiss::IndexFlatL2 index(dim); + //index.add_with_ids(nb, xb, ids); + index.add(nb, xb); + float *D = new float[k * nq]; + index.search(nq, xq, k, D, gt_ids); +} + +void DataGenBase::GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids) { + xb.resize(nb * dim); + xq.resize(nq * dim); + ids.resize(nb); + gt_ids.resize(nq * k); + GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); +} + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h new file mode 100644 index 0000000000..bbc52a011b --- /dev/null +++ b/cpp/unittest/index_wrapper/utils.h @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include +#include +#include +#include +#include + + +class DataGenBase; + +using DataGenPtr = std::shared_ptr; + +extern DataGenPtr GetGenerateFactory(const std::string &gen_type); + + +class DataGenBase { + public: + virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids); + + virtual void GenData(const int &dim, + const int &nb, + const int &nq, + std::vector &xb, + std::vector &xq, + std::vector &ids, + const int &k, + std::vector >_ids); +}; + + +class SanityCheck : public DataGenBase { + public: + void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, + const int &k, long *gt_ids) override; +}; + +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); +}; diff --git a/cpp/unittest/metrics/CMakeLists.txt b/cpp/unittest/metrics/CMakeLists.txt index d31e44c056..be5a542da6 100644 --- a/cpp/unittest/metrics/CMakeLists.txt +++ b/cpp/unittest/metrics/CMakeLists.txt @@ -62,8 +62,10 @@ set(count_test_src add_executable(metrics_test ${count_test_src} ${require_files} ) target_link_libraries(metrics_test - libgpufaiss.a + knowhere faiss + openblas + lapack cudart cublas sqlite3 diff --git a/cpp/unittest/server/CMakeLists.txt b/cpp/unittest/server/CMakeLists.txt index c4112cda9e..94a581d276 100644 --- a/cpp/unittest/server/CMakeLists.txt +++ b/cpp/unittest/server/CMakeLists.txt @@ -32,9 +32,11 @@ cuda_add_executable(server_test ) set(require_libs - stdc++ - libgpufaiss.a + knowhere faiss + openblas + lapack + stdc++ cudart cublas sqlite3 From 172cd21daf644dc523c06af95fd428719ce6a693 Mon Sep 17 00:00:00 2001 From: zhiru Date: Fri, 5 Jul 2019 15:03:40 +0800 Subject: [PATCH 02/91] add mem impl Former-commit-id: 074f1ade11572923ddee2653c26ce6a143001b3c --- cpp/src/db/Constants.h | 20 ++++ cpp/src/db/MemTable.cpp | 51 ++++++++++ cpp/src/db/MemTable.h | 40 ++++++++ cpp/src/db/MemTableFile.cpp | 66 +++++++++++++ cpp/src/db/MemTableFile.h | 44 +++++++++ cpp/src/db/VectorSource.cpp | 60 +++++++++++ cpp/src/db/VectorSource.h | 41 ++++++++ cpp/unittest/db/mem_test.cpp | 187 +++++++++++++++++++++++++++++++++++ 8 files changed, 509 insertions(+) create mode 100644 cpp/src/db/Constants.h create mode 100644 cpp/src/db/MemTable.cpp create mode 100644 cpp/src/db/MemTable.h create mode 100644 cpp/src/db/MemTableFile.cpp create mode 100644 cpp/src/db/MemTableFile.h create mode 100644 cpp/src/db/VectorSource.cpp create mode 100644 cpp/src/db/VectorSource.h create mode 100644 cpp/unittest/db/mem_test.cpp diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h new file mode 100644 index 0000000000..2bb2e0a064 --- /dev/null +++ b/cpp/src/db/Constants.h @@ -0,0 +1,20 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +namespace zilliz { +namespace milvus { +namespace engine { + +const size_t K = 1024UL; +const size_t M = K*K; +const size_t MAX_TABLE_FILE_MEM = 128 * M; + +const int VECTOR_TYPE_SIZE = sizeof(float); + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp new file mode 100644 index 0000000000..032d479999 --- /dev/null +++ b/cpp/src/db/MemTable.cpp @@ -0,0 +1,51 @@ +#include "MemTable.h" +#include "Log.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +MemTable::MemTable(const std::string& table_id, + const std::shared_ptr& meta) : + table_id_(table_id), + meta_(meta) { + +} + +Status MemTable::Add(VectorSource::Ptr& source) { + while (!source->AllAdded()) { + MemTableFile::Ptr currentMemTableFile; + if (!mem_table_file_stack_.empty()) { + currentMemTableFile = mem_table_file_stack_.top(); + } + Status status; + if (mem_table_file_stack_.empty() || currentMemTableFile->isFull()) { + MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_); + status = newMemTableFile->Add(source); + if (status.ok()) { + mem_table_file_stack_.push(newMemTableFile); + } + } + else { + status = currentMemTableFile->Add(source); + } + if (!status.ok()) { + std::string errMsg = "MemTable::Add failed: " + status.ToString(); + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + } + return Status::OK(); +} + +void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { + mem_table_file = mem_table_file_stack_.top(); +} + +size_t MemTable::GetStackSize() { + return mem_table_file_stack_.size(); +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h new file mode 100644 index 0000000000..b9fe4147d8 --- /dev/null +++ b/cpp/src/db/MemTable.h @@ -0,0 +1,40 @@ +#pragma once + +#include "Status.h" +#include "MemTableFile.h" +#include "VectorSource.h" + +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +class MemTable { + +public: + + using Ptr = std::shared_ptr; + using MemTableFileStack = std::stack; + using MetaPtr = meta::Meta::Ptr; + + MemTable(const std::string& table_id, const std::shared_ptr& meta); + + Status Add(VectorSource::Ptr& source); + + void GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file); + + size_t GetStackSize(); + +private: + const std::string table_id_; + + MemTableFileStack mem_table_file_stack_; + + MetaPtr meta_; + +}; //MemTable + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp new file mode 100644 index 0000000000..26bc0d38e9 --- /dev/null +++ b/cpp/src/db/MemTableFile.cpp @@ -0,0 +1,66 @@ +#include "MemTableFile.h" +#include "Constants.h" +#include "Log.h" + +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +MemTableFile::MemTableFile(const std::string& table_id, + const std::shared_ptr& meta) : + table_id_(table_id), + meta_(meta) { + + current_mem_ = 0; + CreateTableFile(); +} + +Status MemTableFile::CreateTableFile() { + + meta::TableFileSchema table_file_schema; + table_file_schema.table_id_ = table_id_; + auto status = meta_->CreateTableFile(table_file_schema); + if (status.ok()) { + table_file_schema_ = table_file_schema; + } + else { + std::string errMsg = "MemTableFile::CreateTableFile failed: " + status.ToString(); + ENGINE_LOG_ERROR << errMsg; + } + return status; +} + +Status MemTableFile::Add(const VectorSource::Ptr& source) { + + size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + size_t memLeft = GetMemLeft(); + if (memLeft >= singleVectorMemSize) { + size_t numVectorsToAdd = std::ceil(memLeft / singleVectorMemSize); + size_t numVectorsAdded; + auto status = source->Add(table_file_schema_, numVectorsToAdd, numVectorsAdded); + if (status.ok()) { + current_mem_ += (numVectorsAdded * singleVectorMemSize); + } + return status; + } + return Status::OK(); +} + +size_t MemTableFile::GetCurrentMem() { + return current_mem_; +} + +size_t MemTableFile::GetMemLeft() { + return (MAX_TABLE_FILE_MEM - current_mem_); +} + +bool MemTableFile::isFull() { + size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + return (GetMemLeft() < singleVectorMemSize); +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h new file mode 100644 index 0000000000..1efe4c0bfe --- /dev/null +++ b/cpp/src/db/MemTableFile.h @@ -0,0 +1,44 @@ +#pragma once + +#include "Status.h" +#include "Meta.h" +#include "VectorSource.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +class MemTableFile { + +public: + + using Ptr = std::shared_ptr; + using MetaPtr = meta::Meta::Ptr; + + MemTableFile(const std::string& table_id, const std::shared_ptr& meta); + + Status Add(const VectorSource::Ptr& source); + + size_t GetCurrentMem(); + + size_t GetMemLeft(); + + bool isFull(); + +private: + + Status CreateTableFile(); + + const std::string table_id_; + + meta::TableFileSchema table_file_schema_; + + MetaPtr meta_; + + size_t current_mem_; + +}; //MemTableFile + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp new file mode 100644 index 0000000000..dff5423c6f --- /dev/null +++ b/cpp/src/db/VectorSource.cpp @@ -0,0 +1,60 @@ +#include "VectorSource.h" +#include "ExecutionEngine.h" +#include "EngineFactory.h" +#include "Log.h" + +namespace zilliz { +namespace milvus { +namespace engine { + + +VectorSource::VectorSource(const size_t &n, + const float *vectors) : + n_(n), + vectors_(vectors), + id_generator_(new SimpleIDGenerator()) { + current_num_vectors_added = 0; +} + +Status VectorSource::Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added) { + + if (table_file_schema.dimension_ <= 0) { + std::string errMsg = "VectorSource::Add: table_file_schema dimension = " + + std::to_string(table_file_schema.dimension_) + ", table_id = " + table_file_schema.table_id_; + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + ExecutionEnginePtr engine = EngineFactory::Build(table_file_schema.dimension_, + table_file_schema.location_, + (EngineType)table_file_schema.engine_type_); + + num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; + IDNumbers vector_ids_to_add; + id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); + Status status = engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); + if (status.ok()) { + current_num_vectors_added += num_vectors_added; + vector_ids_.insert(vector_ids_.end(), vector_ids_to_add.begin(), vector_ids_to_add.end()); + } + else { + ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); + } + + return status; +} + +size_t VectorSource::GetNumVectorsAdded() { + return current_num_vectors_added; +} + +bool VectorSource::AllAdded() { + return (current_num_vectors_added == n_); +} + +IDNumbers VectorSource::GetVectorIds() { + return vector_ids_; +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h new file mode 100644 index 0000000000..170f3634cf --- /dev/null +++ b/cpp/src/db/VectorSource.h @@ -0,0 +1,41 @@ +#pragma once + +#include "Status.h" +#include "Meta.h" +#include "IDGenerator.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +class VectorSource { + +public: + + using Ptr = std::shared_ptr; + + VectorSource(const size_t& n, const float* vectors); + + Status Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added); + + size_t GetNumVectorsAdded(); + + bool AllAdded(); + + IDNumbers GetVectorIds(); + +private: + + const size_t n_; + const float* vectors_; + IDNumbers vector_ids_; + + size_t current_num_vectors_added; + + IDGenerator* id_generator_; + +}; //VectorSource + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp new file mode 100644 index 0000000000..8418b9cd2d --- /dev/null +++ b/cpp/unittest/db/mem_test.cpp @@ -0,0 +1,187 @@ +#include "gtest/gtest.h" + +#include "db/VectorSource.h" +#include "db/MemTableFile.h" +#include "db/MemTable.h" +#include "utils.h" +#include "db/Factories.h" +#include "db/Constants.h" + +using namespace zilliz::milvus; + +namespace { + + static const std::string TABLE_NAME = "test_group"; + static constexpr int64_t TABLE_DIM = 256; + static constexpr int64_t VECTOR_COUNT = 250000; + static constexpr int64_t INSERT_LOOP = 10000; + + engine::meta::TableSchema BuildTableSchema() { + engine::meta::TableSchema table_info; + table_info.dimension_ = TABLE_DIM; + table_info.table_id_ = TABLE_NAME; + table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP; + return table_info; + } + + void BuildVectors(int64_t n, std::vector& vectors) { + vectors.clear(); + vectors.resize(n*TABLE_DIM); + float* data = vectors.data(); + for(int i = 0; i < n; i++) { + for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); + data[TABLE_DIM * i] += i / 2000.; + } + } +} + +TEST(MEM_TEST, VECTOR_SOURCE_TEST) { + + std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + + engine::meta::TableSchema table_schema = BuildTableSchema(); + auto status = impl_->CreateTable(table_schema); + ASSERT_TRUE(status.ok()); + + engine::meta::TableFileSchema table_file_schema; + table_file_schema.table_id_ = TABLE_NAME; + status = impl_->CreateTableFile(table_file_schema); + ASSERT_TRUE(status.ok()); + + int64_t n = 100; + std::vector vectors; + BuildVectors(n, vectors); + + engine::VectorSource source(n, vectors.data()); + + size_t num_vectors_added; + status = source.Add(table_file_schema, 50, num_vectors_added); + ASSERT_TRUE(status.ok()); + + ASSERT_EQ(num_vectors_added, 50); + + engine::IDNumbers vector_ids = source.GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 50); + + status = source.Add(table_file_schema, 60, num_vectors_added); + ASSERT_TRUE(status.ok()); + + ASSERT_EQ(num_vectors_added, 50); + + vector_ids = source.GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 100); + +// for (auto& id : vector_ids) { +// std::cout << id << std::endl; +// } + + status = impl_->DropAll(); + ASSERT_TRUE(status.ok()); +} + +TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { + + std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + + engine::meta::TableSchema table_schema = BuildTableSchema(); + auto status = impl_->CreateTable(table_schema); + ASSERT_TRUE(status.ok()); + + engine::MemTableFile memTableFile(TABLE_NAME, impl_); + + int64_t n_100 = 100; + std::vector vectors_100; + BuildVectors(n_100, vectors_100); + + engine::VectorSource::Ptr source = std::make_shared(n_100, vectors_100.data()); + + status = memTableFile.Add(source); + ASSERT_TRUE(status.ok()); + +// std::cout << memTableFile.GetCurrentMem() << " " << memTableFile.GetMemLeft() << std::endl; + + engine::IDNumbers vector_ids = source->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 100); + + size_t singleVectorMem = sizeof(float) * TABLE_DIM; + ASSERT_EQ(memTableFile.GetCurrentMem(), n_100 * singleVectorMem); + + int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; + std::vector vectors_128M; + BuildVectors(n_max, vectors_128M); + + engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); + status = memTableFile.Add(source_128M); + + vector_ids = source_128M->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), n_max - n_100); + + ASSERT_TRUE(memTableFile.isFull()); + + status = impl_->DropAll(); + ASSERT_TRUE(status.ok()); +} + +TEST(MEM_TEST, MEM_TABLE_TEST) { + + std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + + engine::meta::TableSchema table_schema = BuildTableSchema(); + auto status = impl_->CreateTable(table_schema); + ASSERT_TRUE(status.ok()); + + int64_t n_100 = 100; + std::vector vectors_100; + BuildVectors(n_100, vectors_100); + + engine::VectorSource::Ptr source_100 = std::make_shared(n_100, vectors_100.data()); + + engine::MemTable memTable(TABLE_NAME, impl_); + + status = memTable.Add(source_100); + ASSERT_TRUE(status.ok()); + + engine::IDNumbers vector_ids = source_100->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 100); + + engine::MemTableFile::Ptr memTableFile; + memTable.GetCurrentMemTableFile(memTableFile); + size_t singleVectorMem = sizeof(float) * TABLE_DIM; + ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + + int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; + std::vector vectors_128M; + BuildVectors(n_max, vectors_128M); + + engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); + status = memTable.Add(source_128M); + ASSERT_TRUE(status.ok()); + + vector_ids = source_128M->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), n_max); + + memTable.GetCurrentMemTableFile(memTableFile); + ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + + ASSERT_EQ(memTable.GetStackSize(), 2); + + int64_t n_1G = 1024000; + std::vector vectors_1G; + BuildVectors(n_1G, vectors_1G); + + engine::VectorSource::Ptr source_1G = std::make_shared(n_1G, vectors_1G.data()); + + status = memTable.Add(source_1G); + ASSERT_TRUE(status.ok()); + + vector_ids = source_1G->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), n_1G); + + int expectedStackSize = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); + ASSERT_EQ(memTable.GetStackSize(), expectedStackSize); + + status = impl_->DropAll(); + ASSERT_TRUE(status.ok()); +} + + From 8f42ef678d577af061f522575b9aa60c844a09f6 Mon Sep 17 00:00:00 2001 From: zhiru Date: Fri, 5 Jul 2019 15:57:49 +0800 Subject: [PATCH 03/91] update Former-commit-id: b5c019432679df7fcdf3aacd0e061ee91ddf9609 --- cpp/src/db/MemTableFile.cpp | 10 ++++++++-- cpp/src/db/MemTableFile.h | 3 +++ cpp/src/db/VectorSource.cpp | 10 +++++----- cpp/src/db/VectorSource.h | 8 +++++++- cpp/unittest/db/mem_test.cpp | 8 ++++++-- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp index 26bc0d38e9..58b76ab834 100644 --- a/cpp/src/db/MemTableFile.cpp +++ b/cpp/src/db/MemTableFile.cpp @@ -1,6 +1,7 @@ #include "MemTableFile.h" #include "Constants.h" #include "Log.h" +#include "EngineFactory.h" #include @@ -14,7 +15,12 @@ MemTableFile::MemTableFile(const std::string& table_id, meta_(meta) { current_mem_ = 0; - CreateTableFile(); + auto status = CreateTableFile(); + if (status.ok()) { + execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_, + table_file_schema_.location_, + (EngineType)table_file_schema_.engine_type_); + } } Status MemTableFile::CreateTableFile() { @@ -39,7 +45,7 @@ Status MemTableFile::Add(const VectorSource::Ptr& source) { if (memLeft >= singleVectorMemSize) { size_t numVectorsToAdd = std::ceil(memLeft / singleVectorMemSize); size_t numVectorsAdded; - auto status = source->Add(table_file_schema_, numVectorsToAdd, numVectorsAdded); + auto status = source->Add(execution_engine_, table_file_schema_, numVectorsToAdd, numVectorsAdded); if (status.ok()) { current_mem_ += (numVectorsAdded * singleVectorMemSize); } diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h index 1efe4c0bfe..04f30178ea 100644 --- a/cpp/src/db/MemTableFile.h +++ b/cpp/src/db/MemTableFile.h @@ -3,6 +3,7 @@ #include "Status.h" #include "Meta.h" #include "VectorSource.h" +#include "ExecutionEngine.h" namespace zilliz { namespace milvus { @@ -37,6 +38,8 @@ private: size_t current_mem_; + ExecutionEnginePtr execution_engine_; + }; //MemTableFile } // namespace engine diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index dff5423c6f..f7cef994fa 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -16,7 +16,10 @@ VectorSource::VectorSource(const size_t &n, current_num_vectors_added = 0; } -Status VectorSource::Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added) { +Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, + const meta::TableFileSchema& table_file_schema, + const size_t& num_vectors_to_add, + size_t& num_vectors_added) { if (table_file_schema.dimension_ <= 0) { std::string errMsg = "VectorSource::Add: table_file_schema dimension = " + @@ -24,14 +27,11 @@ Status VectorSource::Add(const meta::TableFileSchema& table_file_schema, const s ENGINE_LOG_ERROR << errMsg; return Status::Error(errMsg); } - ExecutionEnginePtr engine = EngineFactory::Build(table_file_schema.dimension_, - table_file_schema.location_, - (EngineType)table_file_schema.engine_type_); num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); - Status status = engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); + Status status = execution_engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); if (status.ok()) { current_num_vectors_added += num_vectors_added; vector_ids_.insert(vector_ids_.end(), vector_ids_to_add.begin(), vector_ids_to_add.end()); diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h index 170f3634cf..597eee4ad8 100644 --- a/cpp/src/db/VectorSource.h +++ b/cpp/src/db/VectorSource.h @@ -3,6 +3,7 @@ #include "Status.h" #include "Meta.h" #include "IDGenerator.h" +#include "ExecutionEngine.h" namespace zilliz { namespace milvus { @@ -16,7 +17,10 @@ public: VectorSource(const size_t& n, const float* vectors); - Status Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added); + Status Add(const ExecutionEnginePtr& execution_engine, + const meta::TableFileSchema& table_file_schema, + const size_t& num_vectors_to_add, + size_t& num_vectors_added); size_t GetNumVectorsAdded(); @@ -24,6 +28,8 @@ public: IDNumbers GetVectorIds(); +// Status Serialize(); + private: const size_t n_; diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 8418b9cd2d..111914f8a9 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -6,6 +6,7 @@ #include "utils.h" #include "db/Factories.h" #include "db/Constants.h" +#include "db/EngineFactory.h" using namespace zilliz::milvus; @@ -55,7 +56,10 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { engine::VectorSource source(n, vectors.data()); size_t num_vectors_added; - status = source.Add(table_file_schema, 50, num_vectors_added); + engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_, + table_file_schema.location_, + (engine::EngineType)table_file_schema.engine_type_); + status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added); ASSERT_TRUE(status.ok()); ASSERT_EQ(num_vectors_added, 50); @@ -63,7 +67,7 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { engine::IDNumbers vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 50); - status = source.Add(table_file_schema, 60, num_vectors_added); + status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added); ASSERT_TRUE(status.ok()); ASSERT_EQ(num_vectors_added, 50); From 9f38b96eddf222c57bb4b1eb6b23edf7d6b16735 Mon Sep 17 00:00:00 2001 From: zhiru Date: Fri, 5 Jul 2019 16:46:15 +0800 Subject: [PATCH 04/91] Implemented add and serialize Former-commit-id: 25fbbc2185efc4b45ea8f4693fea0ba0001d267e --- cpp/src/db/MemTable.cpp | 32 +++++++++++++++++++-------- cpp/src/db/MemTable.h | 10 ++++++--- cpp/src/db/MemTableFile.cpp | 42 +++++++++++++++++++++++++++++++++--- cpp/src/db/MemTableFile.h | 8 +++++-- cpp/src/db/VectorSource.cpp | 12 +++++------ cpp/src/db/VectorSource.h | 2 -- cpp/unittest/db/mem_test.cpp | 11 +++++++--- 7 files changed, 89 insertions(+), 28 deletions(-) diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index 032d479999..86554695c8 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -6,24 +6,26 @@ namespace milvus { namespace engine { MemTable::MemTable(const std::string& table_id, - const std::shared_ptr& meta) : + const std::shared_ptr& meta, + const Options& options) : table_id_(table_id), - meta_(meta) { + meta_(meta), + options_(options) { } Status MemTable::Add(VectorSource::Ptr& source) { while (!source->AllAdded()) { MemTableFile::Ptr currentMemTableFile; - if (!mem_table_file_stack_.empty()) { - currentMemTableFile = mem_table_file_stack_.top(); + if (!mem_table_file_list_.empty()) { + currentMemTableFile = mem_table_file_list_.back(); } Status status; - if (mem_table_file_stack_.empty() || currentMemTableFile->isFull()) { - MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_); + if (mem_table_file_list_.empty() || currentMemTableFile->IsFull()) { + MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_, options_); status = newMemTableFile->Add(source); if (status.ok()) { - mem_table_file_stack_.push(newMemTableFile); + mem_table_file_list_.emplace_back(newMemTableFile); } } else { @@ -39,11 +41,23 @@ Status MemTable::Add(VectorSource::Ptr& source) { } void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { - mem_table_file = mem_table_file_stack_.top(); + mem_table_file = mem_table_file_list_.back(); } size_t MemTable::GetStackSize() { - return mem_table_file_stack_.size(); + return mem_table_file_list_.size(); +} + +Status MemTable::Serialize() { + for (auto& memTableFile : mem_table_file_list_) { + auto status = memTableFile->Serialize(); + if (!status.ok()) { + std::string errMsg = "MemTable::Serialize failed: " + status.ToString(); + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + } + return Status::OK(); } } // namespace engine diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index b9fe4147d8..d5c7cc9e85 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -15,10 +15,10 @@ class MemTable { public: using Ptr = std::shared_ptr; - using MemTableFileStack = std::stack; + using MemTableFileList = std::vector; using MetaPtr = meta::Meta::Ptr; - MemTable(const std::string& table_id, const std::shared_ptr& meta); + MemTable(const std::string& table_id, const std::shared_ptr& meta, const Options& options); Status Add(VectorSource::Ptr& source); @@ -26,13 +26,17 @@ public: size_t GetStackSize(); + Status Serialize(); + private: const std::string table_id_; - MemTableFileStack mem_table_file_stack_; + MemTableFileList mem_table_file_list_; MetaPtr meta_; + Options options_; + }; //MemTable } // namespace engine diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp index 58b76ab834..0ff91de00b 100644 --- a/cpp/src/db/MemTableFile.cpp +++ b/cpp/src/db/MemTableFile.cpp @@ -2,6 +2,7 @@ #include "Constants.h" #include "Log.h" #include "EngineFactory.h" +#include "metrics/Metrics.h" #include @@ -10,9 +11,11 @@ namespace milvus { namespace engine { MemTableFile::MemTableFile(const std::string& table_id, - const std::shared_ptr& meta) : + const std::shared_ptr& meta, + const Options& options) : table_id_(table_id), - meta_(meta) { + meta_(meta), + options_(options) { current_mem_ = 0; auto status = CreateTableFile(); @@ -40,6 +43,13 @@ Status MemTableFile::CreateTableFile() { Status MemTableFile::Add(const VectorSource::Ptr& source) { + if (table_file_schema_.dimension_ <= 0) { + std::string errMsg = "MemTableFile::Add: table_file_schema dimension = " + + std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_; + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; size_t memLeft = GetMemLeft(); if (memLeft >= singleVectorMemSize) { @@ -62,11 +72,37 @@ size_t MemTableFile::GetMemLeft() { return (MAX_TABLE_FILE_MEM - current_mem_); } -bool MemTableFile::isFull() { +bool MemTableFile::IsFull() { size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; return (GetMemLeft() < singleVectorMemSize); } +Status MemTableFile::Serialize() { + + auto start_time = METRICS_NOW_TIME; + + auto size = GetCurrentMem(); + + execution_engine_->Serialize(); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + table_file_schema_.size_ = size; + + server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double)size/total_time); + + table_file_schema_.file_type_ = (size >= options_.index_trigger_size) ? + meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; + + auto status = meta_->UpdateTableFile(table_file_schema_); + + LOG(DEBUG) << "New " << ((table_file_schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index") + << " file " << table_file_schema_.file_id_ << " of size " << (double)size / (double)M << " M"; + + execution_engine_->Cache(); + + return status; +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h index 04f30178ea..1be0ae78ba 100644 --- a/cpp/src/db/MemTableFile.h +++ b/cpp/src/db/MemTableFile.h @@ -16,7 +16,7 @@ public: using Ptr = std::shared_ptr; using MetaPtr = meta::Meta::Ptr; - MemTableFile(const std::string& table_id, const std::shared_ptr& meta); + MemTableFile(const std::string& table_id, const std::shared_ptr& meta, const Options& options); Status Add(const VectorSource::Ptr& source); @@ -24,7 +24,9 @@ public: size_t GetMemLeft(); - bool isFull(); + bool IsFull(); + + Status Serialize(); private: @@ -36,6 +38,8 @@ private: MetaPtr meta_; + Options options_; + size_t current_mem_; ExecutionEnginePtr execution_engine_; diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index f7cef994fa..b113b9ad5e 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -2,6 +2,7 @@ #include "ExecutionEngine.h" #include "EngineFactory.h" #include "Log.h" +#include "metrics/Metrics.h" namespace zilliz { namespace milvus { @@ -21,12 +22,7 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, const size_t& num_vectors_to_add, size_t& num_vectors_added) { - if (table_file_schema.dimension_ <= 0) { - std::string errMsg = "VectorSource::Add: table_file_schema dimension = " + - std::to_string(table_file_schema.dimension_) + ", table_id = " + table_file_schema.table_id_; - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); - } + auto start_time = METRICS_NOW_TIME; num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; @@ -40,6 +36,10 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); } + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(table_file_schema.dimension_), total_time); + return status; } diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h index 597eee4ad8..dec31f39e1 100644 --- a/cpp/src/db/VectorSource.h +++ b/cpp/src/db/VectorSource.h @@ -28,8 +28,6 @@ public: IDNumbers GetVectorIds(); -// Status Serialize(); - private: const size_t n_; diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 111914f8a9..f68d1eb8e3 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -86,12 +86,13 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + auto options = engine::OptionsFactory::Build(); engine::meta::TableSchema table_schema = BuildTableSchema(); auto status = impl_->CreateTable(table_schema); ASSERT_TRUE(status.ok()); - engine::MemTableFile memTableFile(TABLE_NAME, impl_); + engine::MemTableFile memTableFile(TABLE_NAME, impl_, options); int64_t n_100 = 100; std::vector vectors_100; @@ -120,7 +121,7 @@ TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max - n_100); - ASSERT_TRUE(memTableFile.isFull()); + ASSERT_TRUE(memTableFile.IsFull()); status = impl_->DropAll(); ASSERT_TRUE(status.ok()); @@ -129,6 +130,7 @@ TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { TEST(MEM_TEST, MEM_TABLE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + auto options = engine::OptionsFactory::Build(); engine::meta::TableSchema table_schema = BuildTableSchema(); auto status = impl_->CreateTable(table_schema); @@ -140,7 +142,7 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_100 = std::make_shared(n_100, vectors_100.data()); - engine::MemTable memTable(TABLE_NAME, impl_); + engine::MemTable memTable(TABLE_NAME, impl_, options); status = memTable.Add(source_100); ASSERT_TRUE(status.ok()); @@ -184,6 +186,9 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { int expectedStackSize = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); ASSERT_EQ(memTable.GetStackSize(), expectedStackSize); + status = memTable.Serialize(); + ASSERT_TRUE(status.ok()); + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } From 6a6722a71c3544856a933d432ac9197a90c7e60f Mon Sep 17 00:00:00 2001 From: zhiru Date: Sun, 7 Jul 2019 13:50:39 +0800 Subject: [PATCH 05/91] add mem manager Former-commit-id: c9d77a1d0e9df6679c90fddefee22123cfb0acac --- cpp/src/db/DBImpl.cpp | 3 +- cpp/src/db/DBImpl.h | 4 +- cpp/src/db/Factories.cpp | 11 +++ cpp/src/db/Factories.h | 5 ++ cpp/src/db/MemManager.h | 14 ++-- cpp/src/db/MemManagerAbstract.h | 25 ++++++ cpp/src/db/MemTable.cpp | 10 ++- cpp/src/db/MemTable.h | 6 +- cpp/src/db/NewMemManager.cpp | 92 +++++++++++++++++++++ cpp/src/db/NewMemManager.h | 54 +++++++++++++ cpp/src/db/VectorSource.cpp | 15 +++- cpp/unittest/db/mem_test.cpp | 137 +++++++++++++++++++++++++++++++- 12 files changed, 356 insertions(+), 20 deletions(-) create mode 100644 cpp/src/db/MemManagerAbstract.h create mode 100644 cpp/src/db/NewMemManager.cpp create mode 100644 cpp/src/db/NewMemManager.h diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 0a1e8651e1..09a7c72201 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -87,8 +87,7 @@ DBImpl::DBImpl(const Options& options) compact_thread_pool_(1, 1), index_thread_pool_(1, 1) { meta_ptr_ = DBMetaImplFactory::Build(options.meta, options.mode); - mem_mgr_ = std::make_shared(meta_ptr_, options_); - // mem_mgr_ = (MemManagerPtr)(new MemManager(meta_ptr_, options_)); + mem_mgr_ = MemManagerFactory::Build(meta_ptr_, options_); if (options.mode != Options::MODE::READ_ONLY) { StartTimerTasks(); } diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 9dcd174f8b..5601f1a33b 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -9,6 +9,7 @@ #include "MemManager.h" #include "Types.h" #include "utils/ThreadPool.h" +#include "MemManagerAbstract.h" #include #include @@ -33,7 +34,6 @@ class Meta; class DBImpl : public DB { public: using MetaPtr = meta::Meta::Ptr; - using MemManagerPtr = typename MemManager::Ptr; explicit DBImpl(const Options &options); @@ -123,7 +123,7 @@ class DBImpl : public DB { std::thread bg_timer_thread_; MetaPtr meta_ptr_; - MemManagerPtr mem_mgr_; + MemManagerAbstractPtr mem_mgr_; server::ThreadPool compact_thread_pool_; std::list> compact_thread_results_; diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index 4b24bd3a1c..d51727cbff 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -6,6 +6,8 @@ #include #include "Factories.h" #include "DBImpl.h" +#include "MemManager.h" +#include "NewMemManager.h" #include #include @@ -98,6 +100,15 @@ DB* DBFactory::Build(const Options& options) { return new DBImpl(options); } +MemManagerAbstractPtr MemManagerFactory::Build(const std::shared_ptr& meta, + const Options& options) { + bool useNew = true; + if (useNew) { + return std::make_shared(meta, options); + } + return std::make_shared(meta, options); +} + } // namespace engine } // namespace milvus } // namespace zilliz diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 889922b17a..567bc0a8bc 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -10,6 +10,7 @@ #include "MySQLMetaImpl.h" #include "Options.h" #include "ExecutionEngine.h" +#include "MemManagerAbstract.h" #include #include @@ -36,6 +37,10 @@ struct DBFactory { static DB* Build(const Options&); }; +struct MemManagerFactory { + static MemManagerAbstractPtr Build(const std::shared_ptr& meta, const Options& options); +}; + } // namespace engine } // namespace milvus } // namespace zilliz diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index 0ce88d504d..95303889db 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -9,13 +9,13 @@ #include "IDGenerator.h" #include "Status.h" #include "Meta.h" +#include "MemManagerAbstract.h" #include #include #include #include #include -#include namespace zilliz { namespace milvus { @@ -62,7 +62,7 @@ private: -class MemManager { +class MemManager : public MemManagerAbstract { public: using MetaPtr = meta::Meta::Ptr; using MemVectorsPtr = typename MemVectors::Ptr; @@ -71,16 +71,16 @@ public: MemManager(const std::shared_ptr& meta, const Options& options) : meta_(meta), options_(options) {} - MemVectorsPtr GetMemByTable(const std::string& table_id); - Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids); + size_t n, const float* vectors, IDNumbers& vector_ids) override; - Status Serialize(std::set& table_ids); + Status Serialize(std::set& table_ids) override; - Status EraseMemVector(const std::string& table_id); + Status EraseMemVector(const std::string& table_id) override; private: + MemVectorsPtr GetMemByTable(const std::string& table_id); + Status InsertVectorsNoLock(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids); Status ToImmutable(); diff --git a/cpp/src/db/MemManagerAbstract.h b/cpp/src/db/MemManagerAbstract.h new file mode 100644 index 0000000000..74222df1e8 --- /dev/null +++ b/cpp/src/db/MemManagerAbstract.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +class MemManagerAbstract { +public: + + virtual Status InsertVectors(const std::string& table_id, + size_t n, const float* vectors, IDNumbers& vector_ids) = 0; + + virtual Status Serialize(std::set& table_ids) = 0; + + virtual Status EraseMemVector(const std::string& table_id) = 0; + +}; // MemManagerAbstract + +using MemManagerAbstractPtr = std::shared_ptr; + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index 86554695c8..b282ad375a 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -44,7 +44,7 @@ void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { mem_table_file = mem_table_file_list_.back(); } -size_t MemTable::GetStackSize() { +size_t MemTable::GetTableFileCount() { return mem_table_file_list_.size(); } @@ -60,6 +60,14 @@ Status MemTable::Serialize() { return Status::OK(); } +bool MemTable::Empty() { + return mem_table_file_list_.empty(); +} + +std::string MemTable::GetTableId() { + return table_id_; +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index d5c7cc9e85..e09d6ddac1 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -24,10 +24,14 @@ public: void GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file); - size_t GetStackSize(); + size_t GetTableFileCount(); Status Serialize(); + bool Empty(); + + std::string GetTableId(); + private: const std::string table_id_; diff --git a/cpp/src/db/NewMemManager.cpp b/cpp/src/db/NewMemManager.cpp new file mode 100644 index 0000000000..19aba68eb7 --- /dev/null +++ b/cpp/src/db/NewMemManager.cpp @@ -0,0 +1,92 @@ +#include "NewMemManager.h" +#include "VectorSource.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string& table_id) { + auto memIt = mem_id_map_.find(table_id); + if (memIt != mem_id_map_.end()) { + return memIt->second; + } + + mem_id_map_[table_id] = std::make_shared(table_id, meta_, options_); + return mem_id_map_[table_id]; +} + +Status NewMemManager::InsertVectors(const std::string& table_id_, + size_t n_, + const float* vectors_, + IDNumbers& vector_ids_) { + + + std::unique_lock lock(mutex_); + + return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); +} + +Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, + size_t n, + const float* vectors, + IDNumbers& vector_ids) { + MemTablePtr mem = GetMemByTable(table_id); + VectorSource::Ptr source = std::make_shared(n, vectors); + + auto status = mem->Add(source); + if (status.ok()) { + vector_ids = source->GetVectorIds(); + } + return status; +} + +Status NewMemManager::ToImmutable() { + std::unique_lock lock(mutex_); + MemIdMap temp_map; + for (auto& kv: mem_id_map_) { + if(kv.second->Empty()) { + temp_map.insert(kv); + continue;//empty table, no need to serialize + } + immu_mem_list_.push_back(kv.second); + } + + mem_id_map_.swap(temp_map); + return Status::OK(); +} + +Status NewMemManager::Serialize(std::set& table_ids) { + ToImmutable(); + std::unique_lock lock(serialization_mtx_); + table_ids.clear(); + for (auto& mem : immu_mem_list_) { + mem->Serialize(); + table_ids.insert(mem->GetTableId()); + } + immu_mem_list_.clear(); + return Status::OK(); +} + +Status NewMemManager::EraseMemVector(const std::string& table_id) { + {//erase MemVector from rapid-insert cache + std::unique_lock lock(mutex_); + mem_id_map_.erase(table_id); + } + + {//erase MemVector from serialize cache + std::unique_lock lock(serialization_mtx_); + MemList temp_list; + for (auto& mem : immu_mem_list_) { + if(mem->GetTableId() != table_id) { + temp_list.push_back(mem); + } + } + immu_mem_list_.swap(temp_list); + } + + return Status::OK(); +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/NewMemManager.h b/cpp/src/db/NewMemManager.h new file mode 100644 index 0000000000..a5f5a9ca13 --- /dev/null +++ b/cpp/src/db/NewMemManager.h @@ -0,0 +1,54 @@ +#pragma once + +#include "Meta.h" +#include "MemTable.h" +#include "Status.h" +#include "MemManagerAbstract.h" + +#include +#include +#include +#include +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +class NewMemManager : public MemManagerAbstract { +public: + using MetaPtr = meta::Meta::Ptr; + using Ptr = std::shared_ptr; + using MemTablePtr = typename MemTable::Ptr; + + NewMemManager(const std::shared_ptr& meta, const Options& options) + : meta_(meta), options_(options) {} + + Status InsertVectors(const std::string& table_id, + size_t n, const float* vectors, IDNumbers& vector_ids) override; + + Status Serialize(std::set& table_ids) override; + + Status EraseMemVector(const std::string& table_id) override; + +private: + MemTablePtr GetMemByTable(const std::string& table_id); + + Status InsertVectorsNoLock(const std::string& table_id, + size_t n, const float* vectors, IDNumbers& vector_ids); + Status ToImmutable(); + + using MemIdMap = std::map; + using MemList = std::vector; + MemIdMap mem_id_map_; + MemList immu_mem_list_; + MetaPtr meta_; + Options options_; + std::mutex mutex_; + std::mutex serialization_mtx_; +}; // NewMemManager + + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index b113b9ad5e..d032be51f6 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -24,13 +24,18 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, auto start_time = METRICS_NOW_TIME; - num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; + num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? + num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); - Status status = execution_engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); + Status status = execution_engine->AddWithIds(num_vectors_added, + vectors_ + current_num_vectors_added * table_file_schema.dimension_, + vector_ids_to_add.data()); if (status.ok()) { current_num_vectors_added += num_vectors_added; - vector_ids_.insert(vector_ids_.end(), vector_ids_to_add.begin(), vector_ids_to_add.end()); + vector_ids_.insert(vector_ids_.end(), + std::make_move_iterator(vector_ids_to_add.begin()), + std::make_move_iterator(vector_ids_to_add.end())); } else { ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); @@ -38,7 +43,9 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(table_file_schema.dimension_), total_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), + static_cast(table_file_schema.dimension_), + total_time); return status; } diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index f68d1eb8e3..915610adcc 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -7,6 +7,11 @@ #include "db/Factories.h" #include "db/Constants.h" #include "db/EngineFactory.h" +#include "metrics/Metrics.h" + +#include +#include +#include using namespace zilliz::milvus; @@ -29,6 +34,9 @@ namespace { vectors.clear(); vectors.resize(n*TABLE_DIM); float* data = vectors.data(); +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution<> dis(0.0, 1.0); for(int i = 0; i < n; i++) { for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); data[TABLE_DIM * i] += i / 2000.; @@ -169,7 +177,7 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { memTable.GetCurrentMemTableFile(memTableFile); ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); - ASSERT_EQ(memTable.GetStackSize(), 2); + ASSERT_EQ(memTable.GetTableFileCount(), 2); int64_t n_1G = 1024000; std::vector vectors_1G; @@ -183,8 +191,8 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { vector_ids = source_1G->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_1G); - int expectedStackSize = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); - ASSERT_EQ(memTable.GetStackSize(), expectedStackSize); + int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); + ASSERT_EQ(memTable.GetTableFileCount(), expectedTableFileCount); status = memTable.Serialize(); ASSERT_TRUE(status.ok()); @@ -193,4 +201,127 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { ASSERT_TRUE(status.ok()); } +TEST(MEM_TEST, MEM_MANAGER_TEST) { + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + std::map> search_vectors; +// std::map> vectors_ids_map; + { + engine::IDNumbers vector_ids; + int64_t nb = 1024000; + std::vector xb; + BuildVectors(nb, xb); + engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_TRUE(status.ok()); + +// std::ofstream myfile("mem_test.txt"); +// for (int64_t i = 0; i < nb; ++i) { +// int64_t vector_id = vector_ids[i]; +// std::vector vectors; +// for (int64_t j = 0; j < TABLE_DIM; j++) { +// vectors.emplace_back(xb[i*TABLE_DIM + j]); +//// std::cout << xb[i*TABLE_DIM + j] << std::endl; +// } +// vectors_ids_map[vector_id] = vectors; +// } + + std::this_thread::sleep_for(std::chrono::seconds(3)); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, nb - 1); + + int64_t numQuery = 1000; + for (int64_t i = 0; i < numQuery; ++i) { + int64_t index = dis(gen); + std::vector search; + for (int64_t j = 0; j < TABLE_DIM; j++) { + search.push_back(xb[index * TABLE_DIM + j]); + } + search_vectors.insert(std::make_pair(vector_ids[index], search)); +// std::cout << "index: " << index << " vector_ids[index]: " << vector_ids[index] << std::endl; + } + +// for (int64_t i = 0; i < nb; i += 100000) { +// std::vector search; +// for (int64_t j = 0; j < TABLE_DIM; j++) { +// search.push_back(xb[i * TABLE_DIM + j]); +// } +// search_vectors.insert(std::make_pair(vector_ids[i], search)); +// } + + } + + int k = 10; + for(auto& pair : search_vectors) { + auto& search = pair.second; + engine::QueryResults results; + stat = db_->Query(TABLE_NAME, k, 1, search.data(), results); + for(int t = 0; t < k; t++) { +// std::cout << "ID=" << results[0][t].first << " DISTANCE=" << results[0][t].second << std::endl; + +// std::cout << vectors_ids_map[results[0][t].first].size() << std::endl; +// for (auto& data : vectors_ids_map[results[0][t].first]) { +// std::cout << data << " "; +// } +// std::cout << std::endl; + } + // std::cout << "results[0][0].first: " << results[0][0].first << " pair.first: " << pair.first << " results[0][0].second: " << results[0][0].second << std::endl; + ASSERT_EQ(results[0][0].first, pair.first); + ASSERT_LT(results[0][0].second, 0.00001); + } + + stat = db_->DropAll(); + ASSERT_TRUE(stat.ok()); + +} + +TEST(MEM_TEST, INSERT_TEST) { + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + auto start_time = METRICS_NOW_TIME; + + int insert_loop = 1000; + for (int i = 0; i < insert_loop; ++i) { + int64_t nb = 204800; + std::vector xb; + BuildVectors(nb, xb); + engine::IDNumbers vector_ids; + engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_TRUE(status.ok()); + } + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + std::cout << "total_time(ms) : " << total_time << std::endl; + + stat = db_->DropAll(); + ASSERT_TRUE(stat.ok()); + +} From 606ef0b5486630438ea8d2cbf14f2ddff4ae8bcf Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 17:24:53 +0800 Subject: [PATCH 06/91] new engine Former-commit-id: 178888a85b0cc0394d429dfeac4300971f65079f --- cpp/src/db/EngineFactory.cpp | 23 +++++- cpp/src/db/ExecutionEngine.h | 4 +- cpp/src/db/ExecutionEngineImpl.cpp | 116 +++++++++++++++++++++++++++++ cpp/src/db/ExecutionEngineImpl.h | 68 +++++++++++++++++ cpp/src/server/RequestTask.cpp | 4 +- cpp/unittest/db/misc_test.cpp | 2 +- 6 files changed, 211 insertions(+), 6 deletions(-) create mode 100644 cpp/src/db/ExecutionEngineImpl.cpp create mode 100644 cpp/src/db/ExecutionEngineImpl.h diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp index bacce70ce4..56a6b4d1d2 100644 --- a/cpp/src/db/EngineFactory.cpp +++ b/cpp/src/db/EngineFactory.cpp @@ -5,13 +5,14 @@ ******************************************************************************/ #include "EngineFactory.h" #include "FaissExecutionEngine.h" +#include "ExecutionEngineImpl.h" #include "Log.h" - namespace zilliz { namespace milvus { namespace engine { +#if 0 ExecutionEnginePtr EngineFactory::Build(uint16_t dimension, const std::string &location, @@ -26,7 +27,7 @@ EngineFactory::Build(uint16_t dimension, break; } - case EngineType::FAISS_IVFFLAT: { + case EngineType::FAISS_IVFFLAT_GPU: { execution_engine_ptr = ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, "IVF", "IDMap,Flat")); break; @@ -41,6 +42,24 @@ EngineFactory::Build(uint16_t dimension, execution_engine_ptr->Init(); return execution_engine_ptr; } +#else +ExecutionEnginePtr +EngineFactory::Build(uint16_t dimension, + const std::string &location, + EngineType type) { + + if(type == EngineType::INVALID) { + ENGINE_LOG_ERROR << "Unsupported engine type"; + return nullptr; + } + + ExecutionEnginePtr execution_engine_ptr = + std::make_shared(dimension, location, type); + + execution_engine_ptr->Init(); + return execution_engine_ptr; +} +#endif } } diff --git a/cpp/src/db/ExecutionEngine.h b/cpp/src/db/ExecutionEngine.h index f8c05f6f9d..a101acd3cd 100644 --- a/cpp/src/db/ExecutionEngine.h +++ b/cpp/src/db/ExecutionEngine.h @@ -17,7 +17,9 @@ namespace engine { enum class EngineType { INVALID = 0, FAISS_IDMAP = 1, - FAISS_IVFFLAT, + FAISS_IVFFLAT_GPU, + FAISS_IVFFLAT_CPU, + SPTAG_KDT_RNT_CPU, }; class ExecutionEngine { diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp new file mode 100644 index 0000000000..32b7826430 --- /dev/null +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -0,0 +1,116 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#include "ExecutionEngineImpl.h" +#include "Log.h" + +#include "wrapper/knowhere/vec_impl.h" +#include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/index/vector_index/cpu_kdt_rng.h" + +namespace zilliz { +namespace milvus { +namespace engine { + + +ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, + const std::string& location, + EngineType type) + : location_(location) { + index_ = CreatetVecIndex(type); +} + +vecwise::engine::VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { + std::shared_ptr index; + switch(type) { + case EngineType::FAISS_IDMAP: { + + break; + } + case EngineType::FAISS_IVFFLAT_GPU: { + index = std::make_shared(0); + break; + } + case EngineType::FAISS_IVFFLAT_CPU: { + index = std::make_shared(); + break; + } + case EngineType::SPTAG_KDT_RNT_CPU: { + index = std::make_shared(); + break; + } + default:{ + ENGINE_LOG_ERROR << "Invalid engine type"; + return nullptr; + } + } + + return std::make_shared(index); +} + +Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) { + + return Status::OK(); +} + +size_t ExecutionEngineImpl::Count() const { + return 0; +} + +size_t ExecutionEngineImpl::Size() const { + return 0; +} + +size_t ExecutionEngineImpl::Dimension() const { + return 0; +} + +size_t ExecutionEngineImpl::PhysicalSize() const { + return 0; +} + +Status ExecutionEngineImpl::Serialize() { + return Status::OK(); +} + +Status ExecutionEngineImpl::Load() { + + return Status::OK(); +} + +Status ExecutionEngineImpl::Merge(const std::string& location) { + + return Status::OK(); +} + +ExecutionEnginePtr +ExecutionEngineImpl::BuildIndex(const std::string& location) { + return nullptr; +} + +Status ExecutionEngineImpl::Search(long n, + const float *data, + long k, + float *distances, + long *labels) const { + + return Status::OK(); +} + +Status ExecutionEngineImpl::Cache() { + + return Status::OK(); +} + +Status ExecutionEngineImpl::Init() { + + return Status::OK(); +} + + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h new file mode 100644 index 0000000000..c720f07158 --- /dev/null +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -0,0 +1,68 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include "ExecutionEngine.h" +#include "wrapper/knowhere/vec_index.h" + +#include +#include + +namespace zilliz { +namespace milvus { +namespace engine { + + +class ExecutionEngineImpl : public ExecutionEngine { +public: + + ExecutionEngineImpl(uint16_t dimension, + const std::string& location, + EngineType type); + + Status AddWithIds(long n, const float *xdata, const long *xids) override; + + size_t Count() const override; + + size_t Size() const override; + + size_t Dimension() const override; + + size_t PhysicalSize() const override; + + Status Serialize() override; + + Status Load() override; + + Status Merge(const std::string& location) override; + + Status Search(long n, + const float *data, + long k, + float *distances, + long *labels) const override; + + ExecutionEnginePtr BuildIndex(const std::string&) override; + + Status Cache() override; + + Status Init() override; + +private: + vecwise::engine::VecIndexPtr CreatetVecIndex(EngineType type); + +protected: + vecwise::engine::VecIndexPtr index_; + + std::string location_; + + size_t nprobe_ = 0; +}; + + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 1b91883af5..07a8305d1f 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -29,7 +29,7 @@ namespace { static std::map map_type = { {0, engine::EngineType::INVALID}, {1, engine::EngineType::FAISS_IDMAP}, - {2, engine::EngineType::FAISS_IVFFLAT}, + {2, engine::EngineType::FAISS_IVFFLAT_GPU}, }; if(map_type.find(type) == map_type.end()) { @@ -43,7 +43,7 @@ namespace { static std::map map_type = { {engine::EngineType::INVALID, 0}, {engine::EngineType::FAISS_IDMAP, 1}, - {engine::EngineType::FAISS_IVFFLAT, 2}, + {engine::EngineType::FAISS_IVFFLAT_GPU, 2}, }; if(map_type.find(type) == map_type.end()) { diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp index 4356746fc2..a49c4d5807 100644 --- a/cpp/unittest/db/misc_test.cpp +++ b/cpp/unittest/db/misc_test.cpp @@ -46,7 +46,7 @@ TEST(DBMiscTest, ENGINE_API_TEST) { auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); ASSERT_EQ(engine_ptr, nullptr); - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT); + engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU); ASSERT_NE(engine_ptr, nullptr); engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); From fedc8adc511d64254de4f4f4f84c3f3892fd58f4 Mon Sep 17 00:00:00 2001 From: jinhai Date: Sun, 7 Jul 2019 19:16:39 +0800 Subject: [PATCH 07/91] MS-176 Add create table parameter check Former-commit-id: 7b42e581b012853673ec4df3423fbea62f61777a --- cpp/src/server/RequestTask.cpp | 60 +++++++++++------ cpp/src/utils/ValidationUtil.cpp | 74 +++++++++++++++++++++ cpp/src/utils/ValidationUtil.h | 20 ++++++ cpp/unittest/CMakeLists.txt | 4 +- cpp/unittest/db/db_tests.cpp | 13 ++-- cpp/unittest/db/mysql_db_test.cpp | 12 ++-- cpp/unittest/db/search_test.cpp | 5 +- cpp/unittest/faiss_wrapper/wrapper_test.cpp | 5 +- cpp/unittest/utils/CMakeLists.txt | 30 +++++++++ cpp/unittest/utils/ValidationUtilTest.cpp | 61 +++++++++++++++++ 10 files changed, 247 insertions(+), 37 deletions(-) create mode 100644 cpp/src/utils/ValidationUtil.cpp create mode 100644 cpp/src/utils/ValidationUtil.h create mode 100644 cpp/unittest/utils/CMakeLists.txt create mode 100644 cpp/unittest/utils/ValidationUtilTest.cpp diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 1b91883af5..f8e617b9d4 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -8,6 +8,7 @@ #include "utils/CommonUtil.h" #include "utils/Log.h" #include "utils/TimeRecorder.h" +#include "utils/ValidationUtil.h" #include "DBWrapper.h" #include "version.h" @@ -133,19 +134,23 @@ BaseTaskPtr CreateTableTask::Create(const thrift::TableSchema& schema) { ServerError CreateTableTask::OnExecute() { TimeRecorder rc("CreateTableTask"); - + try { //step 1: check arguments - if(schema_.table_name.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); - } - if(schema_.dimension <= 0) { - return SetError(SERVER_INVALID_TABLE_DIMENSION, "Invalid table dimension: " + std::to_string(schema_.dimension)); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(schema_.table_name); + if(res != SERVER_SUCCESS) { + return res; } - engine::EngineType engine_type = EngineType(schema_.index_type); - if(engine_type == engine::EngineType::INVALID) { - return SetError(SERVER_INVALID_INDEX_TYPE, "Invalid index type: " + std::to_string(schema_.index_type)); + res = ValidateTableDimension(schema_.dimension); + if(res != SERVER_SUCCESS) { + return res; + } + + res = ValidateTableIndexType(schema_.index_type); + if(res != SERVER_SUCCESS) { + return res; } //step 2: construct table schema @@ -187,8 +192,10 @@ ServerError DescribeTableTask::OnExecute() { try { //step 1: check arguments - if(table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } //step 2: get table info @@ -230,10 +237,11 @@ ServerError HasTableTask::OnExecute() { TimeRecorder rc("HasTableTask"); //step 1: check arguments - if(table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } - //step 2: check table existence engine::Status stat = DBWrapper::DB()->HasTable(table_name_, has_table_); if(!stat.ok()) { @@ -264,8 +272,10 @@ ServerError DeleteTableTask::OnExecute() { TimeRecorder rc("DeleteTableTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } //step 2: check table existence @@ -346,8 +356,10 @@ ServerError AddVectorTask::OnExecute() { TimeRecorder rc("AddVectorTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } if(record_array_.empty()) { @@ -435,8 +447,10 @@ ServerError SearchVectorTask::OnExecute() { TimeRecorder rc("SearchVectorTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } if(top_k_ <= 0) { @@ -548,8 +562,10 @@ ServerError GetTableRowCountTask::OnExecute() { TimeRecorder rc("GetTableRowCountTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } //step 2: get row count diff --git a/cpp/src/utils/ValidationUtil.cpp b/cpp/src/utils/ValidationUtil.cpp new file mode 100644 index 0000000000..b4bbd3346a --- /dev/null +++ b/cpp/src/utils/ValidationUtil.cpp @@ -0,0 +1,74 @@ +#include +#include "ValidationUtil.h" +#include "Log.h" + + +namespace zilliz { +namespace milvus { +namespace server { + +constexpr size_t table_name_size_limit = 16384; +constexpr int64_t table_dimension_limit = 16384; + +ServerError +ValidateTableName(const std::string &table_name) { + + // Table name shouldn't be empty. + if (table_name.empty()) { + SERVER_LOG_ERROR << "Empty table name"; + return SERVER_INVALID_TABLE_NAME; + } + + // Table name size shouldn't exceed 16384. + if (table_name.size() > table_name_size_limit) { + SERVER_LOG_ERROR << "Table name size exceed the limitation"; + return SERVER_INVALID_TABLE_NAME; + } + + // Table name first character should be underscore or character. + char first_char = table_name[0]; + if (first_char != '_' && std::isalpha(first_char) == 0) { + SERVER_LOG_ERROR << "Table name first character isn't underscore or character: " << first_char; + return SERVER_INVALID_TABLE_NAME; + } + + int64_t table_name_size = table_name.size(); + for (int64_t i = 1; i < table_name_size; ++i) { + char name_char = table_name[i]; + if (name_char != '_' && std::isalnum(name_char) == 0) { + SERVER_LOG_ERROR << "Table name character isn't underscore or alphanumber: " << name_char; + return SERVER_INVALID_TABLE_NAME; + } + } + + return SERVER_SUCCESS; +} + +ServerError +ValidateTableDimension(int64_t dimension) { + if (dimension <= 0 || dimension > table_dimension_limit) { + SERVER_LOG_ERROR << "Table dimension excceed the limitation: " << table_dimension_limit; + return SERVER_INVALID_VECTOR_DIMENSION; + } else { + return SERVER_SUCCESS; + } +} + +ServerError +ValidateTableIndexType(int32_t index_type) { + auto engine_type = engine::EngineType(index_type); + switch (engine_type) { + case engine::EngineType::FAISS_IDMAP: + case engine::EngineType::FAISS_IVFFLAT: { + SERVER_LOG_DEBUG << "Index type: " << index_type; + return SERVER_SUCCESS; + } + default: { + return SERVER_INVALID_INDEX_TYPE; + } + } +} + +} +} +} \ No newline at end of file diff --git a/cpp/src/utils/ValidationUtil.h b/cpp/src/utils/ValidationUtil.h new file mode 100644 index 0000000000..608ac22682 --- /dev/null +++ b/cpp/src/utils/ValidationUtil.h @@ -0,0 +1,20 @@ +#pragma once + +#include "Error.h" + +namespace zilliz { +namespace milvus { +namespace server { + +ServerError +ValidateTableName(const std::string& table_name); + +ServerError +ValidateTableDimension(int64_t dimension); + +ServerError +ValidateTableIndexType(int32_t index_type); + +} +} +} \ No newline at end of file diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 38046617ae..62e32f6d1d 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -12,7 +12,6 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) set(unittest_srcs ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp) - #${EASYLOGGINGPP_INCLUDE_DIR}/easylogging++.cc) set(require_files ${MILVUS_ENGINE_SRC}/server/ServerConfig.cpp @@ -44,4 +43,5 @@ add_subdirectory(db) add_subdirectory(faiss_wrapper) #add_subdirectory(license) add_subdirectory(metrics) -add_subdirectory(storage) \ No newline at end of file +add_subdirectory(storage) +add_subdirectory(utils) \ No newline at end of file diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index bd17081af8..625211cae7 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -3,17 +3,20 @@ // Unauthorized copying of this file, via any medium is strictly prohibited. // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include - #include "utils.h" #include "db/DB.h" #include "db/DBImpl.h" #include "db/MetaConsts.h" #include "db/Factories.h" +#include +#include + +#include + +#include +#include + using namespace zilliz::milvus; namespace { diff --git a/cpp/unittest/db/mysql_db_test.cpp b/cpp/unittest/db/mysql_db_test.cpp index 7fdb30a204..0e24cacdfd 100644 --- a/cpp/unittest/db/mysql_db_test.cpp +++ b/cpp/unittest/db/mysql_db_test.cpp @@ -3,17 +3,19 @@ // Unauthorized copying of this file, via any medium is strictly prohibited. // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include - #include "utils.h" #include "db/DB.h" #include "db/DBImpl.h" #include "db/MetaConsts.h" #include "db/Factories.h" +#include +#include +#include + +#include +#include + using namespace zilliz::milvus; namespace { diff --git a/cpp/unittest/db/search_test.cpp b/cpp/unittest/db/search_test.cpp index db10bcbadf..ce99ea78f7 100644 --- a/cpp/unittest/db/search_test.cpp +++ b/cpp/unittest/db/search_test.cpp @@ -3,10 +3,11 @@ // Unauthorized copying of this file, via any medium is strictly prohibited. // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include - #include "db/scheduler/task/SearchTask.h" +#include + +#include #include using namespace zilliz::milvus; diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp index 67a6c3cde8..6f4a651a55 100644 --- a/cpp/unittest/faiss_wrapper/wrapper_test.cpp +++ b/cpp/unittest/faiss_wrapper/wrapper_test.cpp @@ -4,12 +4,15 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include + #include "wrapper/Operand.h" #include "wrapper/Index.h" #include "wrapper/IndexBuilder.h" +#include +#include + using namespace zilliz::milvus::engine; diff --git a/cpp/unittest/utils/CMakeLists.txt b/cpp/unittest/utils/CMakeLists.txt new file mode 100644 index 0000000000..a46a3b05e1 --- /dev/null +++ b/cpp/unittest/utils/CMakeLists.txt @@ -0,0 +1,30 @@ +#------------------------------------------------------------------------------- +# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +# Unauthorized copying of this file, via any medium is strictly prohibited. +# Proprietary and confidential. +#------------------------------------------------------------------------------- + +# Make sure that your call to link_directories takes place before your call to the relevant add_executable. +include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") +link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + +set(validation_util_src + ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp + ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.h) + +set(validation_util_test_src + ${unittest_srcs} + ${validation_util_src} + ${require_files} + ValidationUtilTest.cpp + ) + +add_executable(valication_util_test + ${validation_util_test_src} + ${config_files}) + +target_link_libraries(valication_util_test + ${unittest_libs} + boost_filesystem) + +install(TARGETS valication_util_test DESTINATION bin) \ No newline at end of file diff --git a/cpp/unittest/utils/ValidationUtilTest.cpp b/cpp/unittest/utils/ValidationUtilTest.cpp new file mode 100644 index 0000000000..095614e325 --- /dev/null +++ b/cpp/unittest/utils/ValidationUtilTest.cpp @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include + +#include "utils/ValidationUtil.h" +#include "utils/Error.h" + +#include + +using namespace zilliz::milvus::server; + +TEST(ValidationUtilTest, TableNameTest) { + std::string table_name = "Normal123_"; + ServerError res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "12sds"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = ""; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "_asdasd"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "!@#!@"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "中文"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + + table_name = std::string('a', 32768); + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); +} + + +TEST(ValidationUtilTest, TableDimensionTest) { + ASSERT_EQ(ValidateTableDimension(-1), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidateTableDimension(0), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidateTableDimension(16385), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidateTableDimension(16384), SERVER_SUCCESS); + ASSERT_EQ(ValidateTableDimension(1), SERVER_SUCCESS); +} + +TEST(ValidationUtilTest, TableIndexTypeTest) { + ASSERT_EQ(ValidateTableIndexType(0), SERVER_INVALID_INDEX_TYPE); + ASSERT_EQ(ValidateTableIndexType(1), SERVER_SUCCESS); + ASSERT_EQ(ValidateTableIndexType(2), SERVER_SUCCESS); + ASSERT_EQ(ValidateTableIndexType(3), SERVER_INVALID_INDEX_TYPE); + ASSERT_EQ(ValidateTableIndexType(4), SERVER_INVALID_INDEX_TYPE); +} From 3c851370ac888b30d2e94f84926c4408e74b6b62 Mon Sep 17 00:00:00 2001 From: jinhai Date: Sun, 7 Jul 2019 19:55:16 +0800 Subject: [PATCH 08/91] MS-176 Update table name length Former-commit-id: 76a6e9ccfe4c148a2e9fea81b98e0375be6e3c0e --- cpp/src/utils/ValidationUtil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/utils/ValidationUtil.cpp b/cpp/src/utils/ValidationUtil.cpp index b4bbd3346a..65cd81e670 100644 --- a/cpp/src/utils/ValidationUtil.cpp +++ b/cpp/src/utils/ValidationUtil.cpp @@ -7,7 +7,7 @@ namespace zilliz { namespace milvus { namespace server { -constexpr size_t table_name_size_limit = 16384; +constexpr size_t table_name_size_limit = 255; constexpr int64_t table_dimension_limit = 16384; ServerError From 8edaff9517b50b3bc2fba1b1704f5a347686a88c Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 20:20:30 +0800 Subject: [PATCH 09/91] new engine Former-commit-id: 8f84ec8cbe26733b730528b49ae76c9ea158d359 --- cpp/src/CMakeLists.txt | 23 +++++++++++++++++++++-- cpp/src/sdk/examples/simple/main.cpp | 2 +- cpp/unittest/db/CMakeLists.txt | 13 ++++++++++++- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 6a7fb6835e..cc577e2138 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -161,6 +161,17 @@ set(server_libs metrics ) +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + ) + add_executable(milvus_server ${config_files} ${server_files} @@ -170,9 +181,17 @@ add_executable(milvus_server ) if (ENABLE_LICENSE STREQUAL "ON") - target_link_libraries(milvus_server ${server_libs} license_check ${third_party_libs}) + target_link_libraries(milvus_server + ${server_libs} + license_check + ${third_party_libs} + ${knowhere_libs}) else () - target_link_libraries(milvus_server ${server_libs} ${third_party_libs}) + target_link_libraries(milvus_server + ${server_libs} + ${third_party_libs} + ${knowhere_libs} + ) endif() if (ENABLE_LICENSE STREQUAL "ON") diff --git a/cpp/src/sdk/examples/simple/main.cpp b/cpp/src/sdk/examples/simple/main.cpp index 499b8a9935..3a7880997d 100644 --- a/cpp/src/sdk/examples/simple/main.cpp +++ b/cpp/src/sdk/examples/simple/main.cpp @@ -51,7 +51,7 @@ main(int argc, char *argv[]) { } ClientTest test; - test.Test(address, port); + test.Test("", port); printf("Client stop...\n"); return 0; diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index b2720f7006..3b0fb11b20 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -49,6 +49,17 @@ set(db_libs mysqlpp ) -target_link_libraries(db_test ${db_libs} ${unittest_libs}) +set(knowhere_libs + knowhere + SPTAGLibStatic + arrow + jemalloc_pic + faiss + openblas + lapack + tbb + ) + +target_link_libraries(db_test ${db_libs} ${unittest_libs} ${knowhere_libs}) install(TARGETS db_test DESTINATION bin) From 57ec2e566903fc14fdb01a16ce0dfdf0745e6396 Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 20:21:38 +0800 Subject: [PATCH 10/91] new engine Former-commit-id: 4718ebf7b53b3ad523bf9318a26d3389ce0c311b --- cpp/src/sdk/examples/simple/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/sdk/examples/simple/main.cpp b/cpp/src/sdk/examples/simple/main.cpp index 3a7880997d..499b8a9935 100644 --- a/cpp/src/sdk/examples/simple/main.cpp +++ b/cpp/src/sdk/examples/simple/main.cpp @@ -51,7 +51,7 @@ main(int argc, char *argv[]) { } ClientTest test; - test.Test("", port); + test.Test(address, port); printf("Client stop...\n"); return 0; From fbb25fa91040f592feaaf764983d4b426a4d4bd4 Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 20:41:52 +0800 Subject: [PATCH 11/91] date range check Former-commit-id: ff8d7ece23d92d377febdf1eab0b1fbd77048c88 --- cpp/src/server/RequestTask.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index f8e617b9d4..2454f3a9ab 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -109,7 +109,13 @@ namespace { } long days = (tt_end > tt_start) ? (tt_end - tt_start)/DAY_SECONDS : (tt_start - tt_end)/DAY_SECONDS; - for(long i = 0; i <= days; i++) { + if(days == 0) { + error_code = SERVER_INVALID_TIME_RANGE; + error_msg = "Invalid time range: " + range.start_value + " to " + range.end_value; + return ; + } + + for(long i = 0; i < days; i++) { time_t tt_day = tt_start + DAY_SECONDS*i; tm tm_day; CommonUtil::ConvertTime(tt_day, tm_day); From 1410dc8695ac8051a85970d44e00378b588e7cbd Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 8 Jul 2019 09:42:54 +0800 Subject: [PATCH 12/91] fix build error Former-commit-id: b32b8705608b064a0209be5c8012e5a5d58c1b93 --- cpp/unittest/db/CMakeLists.txt | 13 ++++++------- cpp/unittest/metrics/CMakeLists.txt | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 3b0fb11b20..44b09d7b25 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -7,6 +7,7 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) aux_source_directory(./ test_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files) @@ -30,18 +31,14 @@ set(db_test_src ${db_srcs} ${db_scheduler_srcs} ${wrapper_src} + ${knowhere_src} ${require_files} - ${test_srcs}) + ${test_srcs} + ) cuda_add_executable(db_test ${db_test_src}) set(db_libs - knowhere - faiss - openblas - lapack - cudart - cublas sqlite3 boost_system boost_filesystem @@ -58,6 +55,8 @@ set(knowhere_libs openblas lapack tbb + cudart + cublas ) target_link_libraries(db_test ${db_libs} ${unittest_libs} ${knowhere_libs}) diff --git a/cpp/unittest/metrics/CMakeLists.txt b/cpp/unittest/metrics/CMakeLists.txt index be5a542da6..d7ae12aff8 100644 --- a/cpp/unittest/metrics/CMakeLists.txt +++ b/cpp/unittest/metrics/CMakeLists.txt @@ -12,11 +12,12 @@ include_directories(../../src) -aux_source_directory(../../src/db db_srcs) -aux_source_directory(../../src/config config_files) -aux_source_directory(../../src/cache cache_srcs) -aux_source_directory(../../src/wrapper wrapper_src) -aux_source_directory(../../src/metrics metrics_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) +aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/src/metrics metrics_src) aux_source_directory(./ test_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/db/scheduler scheduler_files) @@ -54,18 +55,26 @@ set(count_test_src ${db_srcs} ${db_scheduler_srcs} ${wrapper_src} + ${knowhere_src} ${metrics_src} ${test_srcs} ) - add_executable(metrics_test ${count_test_src} ${require_files} ) -target_link_libraries(metrics_test +set(knowhere_libs knowhere + SPTAGLibStatic + arrow + jemalloc_pic faiss openblas lapack + tbb + ) + +target_link_libraries(metrics_test + ${knowhere_libs} cudart cublas sqlite3 From 0d725b82aa57c072c3cf5d33bd35af1a4932a139 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 8 Jul 2019 10:02:39 +0800 Subject: [PATCH 13/91] update knowhere Former-commit-id: 26cefdfab21d0bf5350b2a44263d6f0e58a69768 --- cpp/thirdparty/knowhere | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 844e600834..2d543bfab6 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 844e600834df1eeafc6c7e5936338ae964bd1d41 +Subproject commit 2d543bfab655398f30113681f348519acac40ab5 From a951dd14c0e8fb330870dbe7257315b8829f5c9b Mon Sep 17 00:00:00 2001 From: zhiru Date: Mon, 8 Jul 2019 11:14:28 +0800 Subject: [PATCH 14/91] Add new mem manager Former-commit-id: abab1d1c2cf67f49a4d9dcf2304df1abed675dda --- cpp/CHANGELOG.md | 1 + cpp/conf/server_config.template | 4 +- cpp/src/db/Constants.h | 3 + cpp/src/db/MemManager.cpp | 25 ++++++ cpp/src/db/MemManager.h | 6 ++ cpp/src/db/MemManagerAbstract.h | 6 ++ cpp/src/db/MemTable.cpp | 17 +++- cpp/src/db/MemTable.h | 8 +- cpp/src/db/NewMemManager.cpp | 38 +++++++++ cpp/src/db/NewMemManager.h | 6 ++ cpp/src/db/Options.h | 1 + cpp/src/server/DBWrapper.cpp | 8 ++ cpp/src/server/ServerConfig.h | 1 + cpp/unittest/db/mem_test.cpp | 144 +++++++++++++++++++++----------- cpp/unittest/db/utils.cpp | 12 +++ cpp/unittest/db/utils.h | 5 ++ 16 files changed, 231 insertions(+), 54 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 0f4e480123..fd27d05b9c 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -18,6 +18,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-152 - Delete assert in MySQLMetaImpl and change MySQLConnectionPool impl ## New Feature +- MS-180 - Add new mem manager ## Task diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index 0383e00b53..f0cd6d5e52 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -2,7 +2,7 @@ server_config: address: 0.0.0.0 port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534 gpu_index: 0 # the gpu milvus use, default: 0, range: 0 ~ gpu number - 1 - mode: single # milvus deployment type: single, cluster + mode: single # milvus deployment type: single, cluster, read_only db_config: db_path: @MILVUS_DB_PATH@ # milvus data storage path @@ -15,6 +15,8 @@ db_config: index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB archive_disk_threshold: 512 # triger archive action if storage size exceed this value, unit: GB archive_days_threshold: 30 # files older than x days will be archived, unit: day + maximum_memory: 4 # maximum memory allowed, default: 4, unit: GB, should be at least 1 GB. + # the sum of maximum_memory and cpu_cache_capacity should be less than total memory metric_config: is_startup: off # if monitoring start: on, off diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h index 2bb2e0a064..1ba02b1d55 100644 --- a/cpp/src/db/Constants.h +++ b/cpp/src/db/Constants.h @@ -11,6 +11,9 @@ namespace engine { const size_t K = 1024UL; const size_t M = K*K; +const size_t G = K*M; +const size_t T = K*G; + const size_t MAX_TABLE_FILE_MEM = 128 * M; const int VECTOR_TYPE_SIZE = sizeof(float); diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index e36b0c45ba..ba8517cdbd 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -8,6 +8,7 @@ #include "MetaConsts.h" #include "EngineFactory.h" #include "metrics/Metrics.h" +#include "Log.h" #include #include @@ -128,6 +129,10 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids) { + + LOG(DEBUG) << "MemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + MemVectorsPtr mem = GetMemByTable(table_id); if (mem == nullptr) { return Status::NotFound("Group " + table_id + " not found!"); @@ -192,6 +197,26 @@ Status MemManager::EraseMemVector(const std::string& table_id) { return Status::OK(); } +size_t MemManager::GetCurrentMutableMem() { + size_t totalMem = 0; + for (auto& kv : mem_id_map_) { + auto memVector = kv.second; + totalMem += memVector->Size(); + } + return totalMem; +} + +size_t MemManager::GetCurrentImmutableMem() { + size_t totalMem = 0; + for (auto& memVector : immu_mem_list_) { + totalMem += memVector->Size(); + } + return totalMem; +} + +size_t MemManager::GetCurrentMem() { + return GetCurrentMutableMem() + GetCurrentImmutableMem(); +} } // namespace engine } // namespace milvus diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index 95303889db..e8460c7a6d 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -78,6 +78,12 @@ public: Status EraseMemVector(const std::string& table_id) override; + size_t GetCurrentMutableMem() override; + + size_t GetCurrentImmutableMem() override; + + size_t GetCurrentMem() override; + private: MemVectorsPtr GetMemByTable(const std::string& table_id); diff --git a/cpp/src/db/MemManagerAbstract.h b/cpp/src/db/MemManagerAbstract.h index 74222df1e8..58c73ba6f8 100644 --- a/cpp/src/db/MemManagerAbstract.h +++ b/cpp/src/db/MemManagerAbstract.h @@ -16,6 +16,12 @@ public: virtual Status EraseMemVector(const std::string& table_id) = 0; + virtual size_t GetCurrentMutableMem() = 0; + + virtual size_t GetCurrentImmutableMem() = 0; + + virtual size_t GetCurrentMem() = 0; + }; // MemManagerAbstract using MemManagerAbstractPtr = std::shared_ptr; diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index b282ad375a..ba3875fbb5 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -49,13 +49,15 @@ size_t MemTable::GetTableFileCount() { } Status MemTable::Serialize() { - for (auto& memTableFile : mem_table_file_list_) { - auto status = memTableFile->Serialize(); + for (auto memTableFile = mem_table_file_list_.begin(); memTableFile != mem_table_file_list_.end(); ) { + auto status = (*memTableFile)->Serialize(); if (!status.ok()) { std::string errMsg = "MemTable::Serialize failed: " + status.ToString(); ENGINE_LOG_ERROR << errMsg; return Status::Error(errMsg); } + std::lock_guard lock(mutex_); + memTableFile = mem_table_file_list_.erase(memTableFile); } return Status::OK(); } @@ -64,10 +66,19 @@ bool MemTable::Empty() { return mem_table_file_list_.empty(); } -std::string MemTable::GetTableId() { +const std::string& MemTable::GetTableId() const { return table_id_; } +size_t MemTable::GetCurrentMem() { + std::lock_guard lock(mutex_); + size_t totalMem = 0; + for (auto& memTableFile : mem_table_file_list_) { + totalMem += memTableFile->GetCurrentMem(); + } + return totalMem; +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index e09d6ddac1..9bae932e62 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -4,7 +4,7 @@ #include "MemTableFile.h" #include "VectorSource.h" -#include +#include namespace zilliz { namespace milvus { @@ -30,7 +30,9 @@ public: bool Empty(); - std::string GetTableId(); + const std::string& GetTableId() const; + + size_t GetCurrentMem(); private: const std::string table_id_; @@ -41,6 +43,8 @@ private: Options options_; + std::mutex mutex_; + }; //MemTable } // namespace engine diff --git a/cpp/src/db/NewMemManager.cpp b/cpp/src/db/NewMemManager.cpp index 19aba68eb7..3c78f37101 100644 --- a/cpp/src/db/NewMemManager.cpp +++ b/cpp/src/db/NewMemManager.cpp @@ -1,5 +1,9 @@ #include "NewMemManager.h" #include "VectorSource.h" +#include "Log.h" +#include "Constants.h" + +#include namespace zilliz { namespace milvus { @@ -20,6 +24,9 @@ Status NewMemManager::InsertVectors(const std::string& table_id_, const float* vectors_, IDNumbers& vector_ids_) { + while (GetCurrentMem() > options_.maximum_memory) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } std::unique_lock lock(mutex_); @@ -30,6 +37,10 @@ Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids) { + + LOG(DEBUG) << "NewMemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + MemTablePtr mem = GetMemByTable(table_id); VectorSource::Ptr source = std::make_shared(n, vectors); @@ -64,6 +75,12 @@ Status NewMemManager::Serialize(std::set& table_ids) { table_ids.insert(mem->GetTableId()); } immu_mem_list_.clear(); +// for (auto mem = immu_mem_list_.begin(); mem != immu_mem_list_.end(); ) { +// (*mem)->Serialize(); +// table_ids.insert((*mem)->GetTableId()); +// mem = immu_mem_list_.erase(mem); +// LOG(DEBUG) << "immu_mem_list_ size = " << immu_mem_list_.size(); +// } return Status::OK(); } @@ -87,6 +104,27 @@ Status NewMemManager::EraseMemVector(const std::string& table_id) { return Status::OK(); } +size_t NewMemManager::GetCurrentMutableMem() { + size_t totalMem = 0; + for (auto& kv : mem_id_map_) { + auto memTable = kv.second; + totalMem += memTable->GetCurrentMem(); + } + return totalMem; +} + +size_t NewMemManager::GetCurrentImmutableMem() { + size_t totalMem = 0; + for (auto& memTable : immu_mem_list_) { + totalMem += memTable->GetCurrentMem(); + } + return totalMem; +} + +size_t NewMemManager::GetCurrentMem() { + return GetCurrentMutableMem() + GetCurrentImmutableMem(); +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/NewMemManager.h b/cpp/src/db/NewMemManager.h index a5f5a9ca13..9883480404 100644 --- a/cpp/src/db/NewMemManager.h +++ b/cpp/src/db/NewMemManager.h @@ -31,6 +31,12 @@ public: Status EraseMemVector(const std::string& table_id) override; + size_t GetCurrentMutableMem() override; + + size_t GetCurrentImmutableMem() override; + + size_t GetCurrentMem() override; + private: MemTablePtr GetMemByTable(const std::string& table_id); diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 39d0a15019..47bbb45bbc 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -61,6 +61,7 @@ struct Options { size_t index_trigger_size = ONE_GB; //unit: byte DBMetaOptions meta; int mode = MODE::SINGLE; + float maximum_memory = 4 * ONE_GB; }; // Options diff --git a/cpp/src/server/DBWrapper.cpp b/cpp/src/server/DBWrapper.cpp index fca15cb65a..bed4440d5e 100644 --- a/cpp/src/server/DBWrapper.cpp +++ b/cpp/src/server/DBWrapper.cpp @@ -23,6 +23,14 @@ DBWrapper::DBWrapper() { if(index_size > 0) {//ensure larger than zero, unit is MB opt.index_trigger_size = (size_t)index_size * engine::ONE_MB; } + float maximum_memory = config.GetFloatValue(CONFIG_MAXMIMUM_MEMORY); + if (maximum_memory > 1.0) { + opt.maximum_memory = maximum_memory * engine::ONE_GB; + } + else { + std::cout << "ERROR: maximum_memory should be at least 1 GB" << std::endl; + kill(0, SIGUSR1); + } ConfigNode& serverConfig = ServerConfig::GetInstance().GetConfig(CONFIG_SERVER); std::string mode = serverConfig.GetValue(CONFIG_CLUSTER_MODE, "single"); diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 0ec04eed8c..b3b95eb8b6 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -26,6 +26,7 @@ static const std::string CONFIG_DB_PATH = "db_path"; static const std::string CONFIG_DB_INDEX_TRIGGER_SIZE = "index_building_threshold"; static const std::string CONFIG_DB_ARCHIVE_DISK = "archive_disk_threshold"; static const std::string CONFIG_DB_ARCHIVE_DAYS = "archive_days_threshold"; +static const std::string CONFIG_MAXMIMUM_MEMORY = "maximum_memory"; static const std::string CONFIG_LOG = "log_config"; diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 915610adcc..818c3a6388 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -8,6 +8,8 @@ #include "db/Constants.h" #include "db/EngineFactory.h" #include "metrics/Metrics.h" +#include "db/MetaConsts.h" +#include "boost/filesystem.hpp" #include #include @@ -34,9 +36,6 @@ namespace { vectors.clear(); vectors.resize(n*TABLE_DIM); float* data = vectors.data(); -// std::random_device rd; -// std::mt19937 gen(rd()); -// std::uniform_real_distribution<> dis(0.0, 1.0); for(int i = 0; i < n; i++) { for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); data[TABLE_DIM * i] += i / 2000.; @@ -44,7 +43,7 @@ namespace { } } -TEST(MEM_TEST, VECTOR_SOURCE_TEST) { +TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); @@ -91,7 +90,7 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { ASSERT_TRUE(status.ok()); } -TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { +TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); auto options = engine::OptionsFactory::Build(); @@ -135,7 +134,7 @@ TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { ASSERT_TRUE(status.ok()); } -TEST(MEM_TEST, MEM_TABLE_TEST) { +TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); auto options = engine::OptionsFactory::Build(); @@ -201,7 +200,7 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { ASSERT_TRUE(status.ok()); } -TEST(MEM_TEST, MEM_MANAGER_TEST) { +TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/milvus_test"; @@ -218,7 +217,6 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); std::map> search_vectors; -// std::map> vectors_ids_map; { engine::IDNumbers vector_ids; int64_t nb = 1024000; @@ -227,24 +225,13 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); ASSERT_TRUE(status.ok()); -// std::ofstream myfile("mem_test.txt"); -// for (int64_t i = 0; i < nb; ++i) { -// int64_t vector_id = vector_ids[i]; -// std::vector vectors; -// for (int64_t j = 0; j < TABLE_DIM; j++) { -// vectors.emplace_back(xb[i*TABLE_DIM + j]); -//// std::cout << xb[i*TABLE_DIM + j] << std::endl; -// } -// vectors_ids_map[vector_id] = vectors; -// } - std::this_thread::sleep_for(std::chrono::seconds(3)); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution dis(0, nb - 1); - int64_t numQuery = 1000; + int64_t numQuery = 20; for (int64_t i = 0; i < numQuery; ++i) { int64_t index = dis(gen); std::vector search; @@ -252,17 +239,7 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { search.push_back(xb[index * TABLE_DIM + j]); } search_vectors.insert(std::make_pair(vector_ids[index], search)); -// std::cout << "index: " << index << " vector_ids[index]: " << vector_ids[index] << std::endl; } - -// for (int64_t i = 0; i < nb; i += 100000) { -// std::vector search; -// for (int64_t j = 0; j < TABLE_DIM; j++) { -// search.push_back(xb[i * TABLE_DIM + j]); -// } -// search_vectors.insert(std::make_pair(vector_ids[i], search)); -// } - } int k = 10; @@ -270,26 +247,16 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { auto& search = pair.second; engine::QueryResults results; stat = db_->Query(TABLE_NAME, k, 1, search.data(), results); - for(int t = 0; t < k; t++) { -// std::cout << "ID=" << results[0][t].first << " DISTANCE=" << results[0][t].second << std::endl; - -// std::cout << vectors_ids_map[results[0][t].first].size() << std::endl; -// for (auto& data : vectors_ids_map[results[0][t].first]) { -// std::cout << data << " "; -// } -// std::cout << std::endl; - } - // std::cout << "results[0][0].first: " << results[0][0].first << " pair.first: " << pair.first << " results[0][0].second: " << results[0][0].second << std::endl; ASSERT_EQ(results[0][0].first, pair.first); ASSERT_LT(results[0][0].second, 0.00001); } - stat = db_->DropAll(); - ASSERT_TRUE(stat.ok()); + delete db_; + boost::filesystem::remove_all(options.meta.path); } -TEST(MEM_TEST, INSERT_TEST) { +TEST_F(NewMemManagerTest, INSERT_TEST) { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/milvus_test"; @@ -307,9 +274,9 @@ TEST(MEM_TEST, INSERT_TEST) { auto start_time = METRICS_NOW_TIME; - int insert_loop = 1000; + int insert_loop = 20; for (int i = 0; i < insert_loop; ++i) { - int64_t nb = 204800; + int64_t nb = 409600; std::vector xb; BuildVectors(nb, xb); engine::IDNumbers vector_ids; @@ -318,10 +285,91 @@ TEST(MEM_TEST, INSERT_TEST) { } auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - std::cout << "total_time(ms) : " << total_time << std::endl; + LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time; - stat = db_->DropAll(); - ASSERT_TRUE(stat.ok()); + delete db_; + boost::filesystem::remove_all(options.meta.path); } +TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + int64_t nb = 409600; + std::vector xb; + BuildVectors(nb, xb); + + int64_t qb = 5; + std::vector qxb; + BuildVectors(qb, qxb); + + std::thread search([&]() { + engine::QueryResults results; + int k = 10; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + INIT_TIMER; + std::stringstream ss; + uint64_t count = 0; + uint64_t prev_count = 0; + + for (auto j=0; j<10; ++j) { + ss.str(""); + db_->Size(count); + prev_count = count; + + START_TIMER; + stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); + ss << "Search " << j << " With Size " << count/engine::meta::M << " M"; + STOP_TIMER(ss.str()); + + ASSERT_STATS(stat); + for (auto k=0; k= prev_count); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + }); + + int loop = 20; + + for (auto i=0; iInsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); + ASSERT_EQ(target_ids.size(), qb); + } else { + db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + } + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + search.join(); + + delete db_; + boost::filesystem::remove_all(options.meta.path); + +}; + diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 70c0712549..ae05c59d3b 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -106,6 +106,18 @@ zilliz::milvus::engine::Options MySQLDBTest::GetOptions() { return options; } +void NewMemManagerTest::InitLog() { + el::Configurations defaultConf; + defaultConf.setToDefault(); + defaultConf.set(el::Level::Debug, + el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)"); + el::Loggers::reconfigureLogger("default", defaultConf); +} + +void NewMemManagerTest::SetUp() { + InitLog(); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); if (argc > 1) { diff --git a/cpp/unittest/db/utils.h b/cpp/unittest/db/utils.h index 361c24b4be..d06500de5c 100644 --- a/cpp/unittest/db/utils.h +++ b/cpp/unittest/db/utils.h @@ -87,3 +87,8 @@ class MySQLDBTest : public ::testing::Test { protected: zilliz::milvus::engine::Options GetOptions(); }; + +class NewMemManagerTest : public ::testing::Test { + void InitLog(); + virtual void SetUp() override; +}; From ed23b7056ff47a8f798534ecd87f09adda0a50e5 Mon Sep 17 00:00:00 2001 From: zhiru Date: Mon, 8 Jul 2019 15:07:03 +0800 Subject: [PATCH 15/91] update Former-commit-id: 6edbbf6f4bca89c568c71d5e4bd0de1be84e6137 --- cpp/src/db/Constants.h | 6 +- cpp/src/db/Factories.cpp | 19 +++--- cpp/src/db/Factories.h | 9 +-- cpp/src/db/MemManager.cpp | 83 ++++++++++++++------------ cpp/src/db/MemManager.h | 43 +++++++------- cpp/src/db/MemManagerAbstract.h | 11 ++-- cpp/src/db/MemTable.cpp | 66 +++++++++++---------- cpp/src/db/MemTable.h | 13 ++-- cpp/src/db/MemTableFile.cpp | 56 +++++++++--------- cpp/src/db/MemTableFile.h | 9 +-- cpp/src/db/NewMemManager.cpp | 63 ++++++++++---------- cpp/src/db/NewMemManager.h | 23 ++++---- cpp/src/db/VectorSource.cpp | 18 +++--- cpp/src/db/VectorSource.h | 19 +++--- cpp/unittest/db/mem_test.cpp | 101 ++++++++++++++++---------------- cpp/unittest/db/utils.h | 20 +++---- 16 files changed, 284 insertions(+), 275 deletions(-) diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h index 1ba02b1d55..055b10ca9a 100644 --- a/cpp/src/db/Constants.h +++ b/cpp/src/db/Constants.h @@ -10,9 +10,9 @@ namespace milvus { namespace engine { const size_t K = 1024UL; -const size_t M = K*K; -const size_t G = K*M; -const size_t T = K*G; +const size_t M = K * K; +const size_t G = K * M; +const size_t T = K * G; const size_t MAX_TABLE_FILE_MEM = 128 * M; diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index d51727cbff..65c7484a50 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -22,6 +22,8 @@ namespace zilliz { namespace milvus { namespace engine { +#define USE_NEW_MEM_MANAGER 1 + DBMetaOptions DBMetaOptionsFactory::Build(const std::string& path) { auto p = path; if(p == "") { @@ -74,17 +76,14 @@ std::shared_ptr DBMetaImplFactory::Build(const DBMetaOptions& metaOp if (dialect.find("mysql") != std::string::npos) { ENGINE_LOG_INFO << "Using MySQL"; return std::make_shared(meta::MySQLMetaImpl(metaOptions, mode)); - } - else if (dialect.find("sqlite") != std::string::npos) { + } else if (dialect.find("sqlite") != std::string::npos) { ENGINE_LOG_INFO << "Using SQLite"; return std::make_shared(meta::DBMetaImpl(metaOptions)); - } - else { + } else { ENGINE_LOG_ERROR << "Invalid dialect in URI: dialect = " << dialect; throw InvalidArgumentException("URI dialect is not mysql / sqlite"); } - } - else { + } else { ENGINE_LOG_ERROR << "Wrong URI format: URI = " << uri; throw InvalidArgumentException("Wrong URI format "); } @@ -102,11 +101,11 @@ DB* DBFactory::Build(const Options& options) { MemManagerAbstractPtr MemManagerFactory::Build(const std::shared_ptr& meta, const Options& options) { - bool useNew = true; - if (useNew) { - return std::make_shared(meta, options); - } +#ifdef USE_NEW_MEM_MANAGER + return std::make_shared(meta, options); +#else return std::make_shared(meta, options); +#endif } } // namespace engine diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 567bc0a8bc..8b6e7b100f 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -15,12 +15,13 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { struct DBMetaOptionsFactory { - static DBMetaOptions Build(const std::string& path = ""); + static DBMetaOptions Build(const std::string &path = ""); }; struct OptionsFactory { @@ -29,16 +30,16 @@ struct OptionsFactory { struct DBMetaImplFactory { static std::shared_ptr Build(); - static std::shared_ptr Build(const DBMetaOptions& metaOptions, const int& mode); + static std::shared_ptr Build(const DBMetaOptions &metaOptions, const int &mode); }; struct DBFactory { static std::shared_ptr Build(); - static DB* Build(const Options&); + static DB *Build(const Options &); }; struct MemManagerFactory { - static MemManagerAbstractPtr Build(const std::shared_ptr& meta, const Options& options); + static MemManagerAbstractPtr Build(const std::shared_ptr &meta, const Options &options); }; } // namespace engine diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index ba8517cdbd..dbf0703173 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -15,22 +15,23 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { -MemVectors::MemVectors(const std::shared_ptr& meta_ptr, - const meta::TableFileSchema& schema, const Options& options) - : meta_(meta_ptr), - options_(options), - schema_(schema), - id_generator_(new SimpleIDGenerator()), - active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType)schema_.engine_type_)) { +MemVectors::MemVectors(const std::shared_ptr &meta_ptr, + const meta::TableFileSchema &schema, const Options &options) + : meta_(meta_ptr), + options_(options), + schema_(schema), + id_generator_(new SimpleIDGenerator()), + active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType) schema_.engine_type_)) { } -Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { - if(active_engine_ == nullptr) { +Status MemVectors::Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_) { + if (active_engine_ == nullptr) { return Status::Error("index engine is null"); } @@ -39,13 +40,15 @@ Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) Status status = active_engine_->AddWithIds(n_, vectors_, vector_ids_.data()); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(schema_.dimension_), total_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), + static_cast(schema_.dimension_), + total_time); return status; } size_t MemVectors::RowCount() const { - if(active_engine_ == nullptr) { + if (active_engine_ == nullptr) { return 0; } @@ -53,15 +56,15 @@ size_t MemVectors::RowCount() const { } size_t MemVectors::Size() const { - if(active_engine_ == nullptr) { + if (active_engine_ == nullptr) { return 0; } return active_engine_->Size(); } -Status MemVectors::Serialize(std::string& table_id) { - if(active_engine_ == nullptr) { +Status MemVectors::Serialize(std::string &table_id) { + if (active_engine_ == nullptr) { return Status::Error("index engine is null"); } @@ -73,15 +76,16 @@ Status MemVectors::Serialize(std::string& table_id) { auto total_time = METRICS_MICROSECONDS(start_time, end_time); schema_.size_ = size; - server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size/total_time); + server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size / total_time); schema_.file_type_ = (size >= options_.index_trigger_size) ? - meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; + meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; auto status = meta_->UpdateTableFile(schema_); LOG(DEBUG) << "New " << ((schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index") - << " file " << schema_.file_id_ << " of size " << (double)(active_engine_->Size()) / (double)meta::M << " M"; + << " file " << schema_.file_id_ << " of size " << (double) (active_engine_->Size()) / (double) meta::M + << " M"; active_engine_->Cache(); @@ -99,7 +103,7 @@ MemVectors::~MemVectors() { * MemManager */ MemManager::MemVectorsPtr MemManager::GetMemByTable( - const std::string& table_id) { + const std::string &table_id) { auto memIt = mem_id_map_.find(table_id); if (memIt != mem_id_map_.end()) { return memIt->second; @@ -116,22 +120,23 @@ MemManager::MemVectorsPtr MemManager::GetMemByTable( return mem_id_map_[table_id]; } -Status MemManager::InsertVectors(const std::string& table_id_, - size_t n_, - const float* vectors_, - IDNumbers& vector_ids_) { +Status MemManager::InsertVectors(const std::string &table_id_, + size_t n_, + const float *vectors_, + IDNumbers &vector_ids_) { + + LOG(DEBUG) << "MemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + std::unique_lock lock(mutex_); return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); } -Status MemManager::InsertVectorsNoLock(const std::string& table_id, - size_t n, - const float* vectors, - IDNumbers& vector_ids) { - - LOG(DEBUG) << "MemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << - ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); +Status MemManager::InsertVectorsNoLock(const std::string &table_id, + size_t n, + const float *vectors, + IDNumbers &vector_ids) { MemVectorsPtr mem = GetMemByTable(table_id); if (mem == nullptr) { @@ -139,7 +144,7 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, } //makesure each file size less than index_trigger_size - if(mem->Size() > options_.index_trigger_size) { + if (mem->Size() > options_.index_trigger_size) { std::unique_lock lock(serialization_mtx_); immu_mem_list_.push_back(mem); mem_id_map_.erase(table_id); @@ -152,8 +157,8 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, Status MemManager::ToImmutable() { std::unique_lock lock(mutex_); MemIdMap temp_map; - for (auto& kv: mem_id_map_) { - if(kv.second->RowCount() == 0) { + for (auto &kv: mem_id_map_) { + if (kv.second->RowCount() == 0) { temp_map.insert(kv); continue;//empty vector, no need to serialize } @@ -164,12 +169,12 @@ Status MemManager::ToImmutable() { return Status::OK(); } -Status MemManager::Serialize(std::set& table_ids) { +Status MemManager::Serialize(std::set &table_ids) { ToImmutable(); std::unique_lock lock(serialization_mtx_); std::string table_id; table_ids.clear(); - for (auto& mem : immu_mem_list_) { + for (auto &mem : immu_mem_list_) { mem->Serialize(table_id); table_ids.insert(table_id); } @@ -177,7 +182,7 @@ Status MemManager::Serialize(std::set& table_ids) { return Status::OK(); } -Status MemManager::EraseMemVector(const std::string& table_id) { +Status MemManager::EraseMemVector(const std::string &table_id) { {//erase MemVector from rapid-insert cache std::unique_lock lock(mutex_); mem_id_map_.erase(table_id); @@ -186,8 +191,8 @@ Status MemManager::EraseMemVector(const std::string& table_id) { {//erase MemVector from serialize cache std::unique_lock lock(serialization_mtx_); MemList temp_list; - for (auto& mem : immu_mem_list_) { - if(mem->TableId() != table_id) { + for (auto &mem : immu_mem_list_) { + if (mem->TableId() != table_id) { temp_list.push_back(mem); } } @@ -199,7 +204,7 @@ Status MemManager::EraseMemVector(const std::string& table_id) { size_t MemManager::GetCurrentMutableMem() { size_t totalMem = 0; - for (auto& kv : mem_id_map_) { + for (auto &kv : mem_id_map_) { auto memVector = kv.second; totalMem += memVector->Size(); } @@ -208,7 +213,7 @@ size_t MemManager::GetCurrentMutableMem() { size_t MemManager::GetCurrentImmutableMem() { size_t totalMem = 0; - for (auto& memVector : immu_mem_list_) { + for (auto &memVector : immu_mem_list_) { totalMem += memVector->Size(); } return totalMem; diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index e8460c7a6d..5ad3d08b63 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -17,45 +17,46 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { namespace meta { - class Meta; +class Meta; } class MemVectors { -public: + public: using MetaPtr = meta::Meta::Ptr; using Ptr = std::shared_ptr; - explicit MemVectors(const std::shared_ptr&, - const meta::TableFileSchema&, const Options&); + explicit MemVectors(const std::shared_ptr &, + const meta::TableFileSchema &, const Options &); - Status Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_); + Status Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_); size_t RowCount() const; size_t Size() const; - Status Serialize(std::string& table_id); + Status Serialize(std::string &table_id); ~MemVectors(); - const std::string& Location() const { return schema_.location_; } + const std::string &Location() const { return schema_.location_; } std::string TableId() const { return schema_.table_id_; } -private: + private: MemVectors() = delete; - MemVectors(const MemVectors&) = delete; - MemVectors& operator=(const MemVectors&) = delete; + MemVectors(const MemVectors &) = delete; + MemVectors &operator=(const MemVectors &) = delete; MetaPtr meta_; Options options_; meta::TableFileSchema schema_; - IDGenerator* id_generator_; + IDGenerator *id_generator_; ExecutionEnginePtr active_engine_; }; // MemVectors @@ -63,20 +64,20 @@ private: class MemManager : public MemManagerAbstract { -public: + public: using MetaPtr = meta::Meta::Ptr; using MemVectorsPtr = typename MemVectors::Ptr; using Ptr = std::shared_ptr; - MemManager(const std::shared_ptr& meta, const Options& options) + MemManager(const std::shared_ptr &meta, const Options &options) : meta_(meta), options_(options) {} - Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids) override; + Status InsertVectors(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids) override; - Status Serialize(std::set& table_ids) override; + Status Serialize(std::set &table_ids) override; - Status EraseMemVector(const std::string& table_id) override; + Status EraseMemVector(const std::string &table_id) override; size_t GetCurrentMutableMem() override; @@ -84,11 +85,11 @@ public: size_t GetCurrentMem() override; -private: - MemVectorsPtr GetMemByTable(const std::string& table_id); + private: + MemVectorsPtr GetMemByTable(const std::string &table_id); - Status InsertVectorsNoLock(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids); + Status InsertVectorsNoLock(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids); Status ToImmutable(); using MemIdMap = std::map; diff --git a/cpp/src/db/MemManagerAbstract.h b/cpp/src/db/MemManagerAbstract.h index 58c73ba6f8..943c454e46 100644 --- a/cpp/src/db/MemManagerAbstract.h +++ b/cpp/src/db/MemManagerAbstract.h @@ -2,19 +2,20 @@ #include + namespace zilliz { namespace milvus { namespace engine { class MemManagerAbstract { -public: + public: - virtual Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids) = 0; + virtual Status InsertVectors(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids) = 0; - virtual Status Serialize(std::set& table_ids) = 0; + virtual Status Serialize(std::set &table_ids) = 0; - virtual Status EraseMemVector(const std::string& table_id) = 0; + virtual Status EraseMemVector(const std::string &table_id) = 0; virtual size_t GetCurrentMutableMem() = 0; diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index ba3875fbb5..e05aa058ac 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -1,46 +1,50 @@ #include "MemTable.h" #include "Log.h" + namespace zilliz { namespace milvus { namespace engine { -MemTable::MemTable(const std::string& table_id, - const std::shared_ptr& meta, - const Options& options) : - table_id_(table_id), - meta_(meta), - options_(options) { +MemTable::MemTable(const std::string &table_id, + const std::shared_ptr &meta, + const Options &options) : + table_id_(table_id), + meta_(meta), + options_(options) { } -Status MemTable::Add(VectorSource::Ptr& source) { +Status MemTable::Add(VectorSource::Ptr &source) { + while (!source->AllAdded()) { - MemTableFile::Ptr currentMemTableFile; + + MemTableFile::Ptr current_mem_table_file; if (!mem_table_file_list_.empty()) { - currentMemTableFile = mem_table_file_list_.back(); + current_mem_table_file = mem_table_file_list_.back(); } + Status status; - if (mem_table_file_list_.empty() || currentMemTableFile->IsFull()) { - MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_, options_); - status = newMemTableFile->Add(source); + if (mem_table_file_list_.empty() || current_mem_table_file->IsFull()) { + MemTableFile::Ptr new_mem_table_file = std::make_shared(table_id_, meta_, options_); + status = new_mem_table_file->Add(source); if (status.ok()) { - mem_table_file_list_.emplace_back(newMemTableFile); + mem_table_file_list_.emplace_back(new_mem_table_file); } + } else { + status = current_mem_table_file->Add(source); } - else { - status = currentMemTableFile->Add(source); - } + if (!status.ok()) { - std::string errMsg = "MemTable::Add failed: " + status.ToString(); - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); + std::string err_msg = "MemTable::Add failed: " + status.ToString(); + ENGINE_LOG_ERROR << err_msg; + return Status::Error(err_msg); } } return Status::OK(); } -void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { +void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file) { mem_table_file = mem_table_file_list_.back(); } @@ -49,15 +53,15 @@ size_t MemTable::GetTableFileCount() { } Status MemTable::Serialize() { - for (auto memTableFile = mem_table_file_list_.begin(); memTableFile != mem_table_file_list_.end(); ) { - auto status = (*memTableFile)->Serialize(); + for (auto mem_table_file = mem_table_file_list_.begin(); mem_table_file != mem_table_file_list_.end();) { + auto status = (*mem_table_file)->Serialize(); if (!status.ok()) { - std::string errMsg = "MemTable::Serialize failed: " + status.ToString(); - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); + std::string err_msg = "MemTable::Serialize failed: " + status.ToString(); + ENGINE_LOG_ERROR << err_msg; + return Status::Error(err_msg); } std::lock_guard lock(mutex_); - memTableFile = mem_table_file_list_.erase(memTableFile); + mem_table_file = mem_table_file_list_.erase(mem_table_file); } return Status::OK(); } @@ -66,17 +70,17 @@ bool MemTable::Empty() { return mem_table_file_list_.empty(); } -const std::string& MemTable::GetTableId() const { +const std::string &MemTable::GetTableId() const { return table_id_; } size_t MemTable::GetCurrentMem() { std::lock_guard lock(mutex_); - size_t totalMem = 0; - for (auto& memTableFile : mem_table_file_list_) { - totalMem += memTableFile->GetCurrentMem(); + size_t total_mem = 0; + for (auto &mem_table_file : mem_table_file_list_) { + total_mem += mem_table_file->GetCurrentMem(); } - return totalMem; + return total_mem; } } // namespace engine diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index 9bae932e62..198fcc228a 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -6,23 +6,24 @@ #include + namespace zilliz { namespace milvus { namespace engine { class MemTable { -public: + public: using Ptr = std::shared_ptr; using MemTableFileList = std::vector; using MetaPtr = meta::Meta::Ptr; - MemTable(const std::string& table_id, const std::shared_ptr& meta, const Options& options); + MemTable(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(VectorSource::Ptr& source); + Status Add(VectorSource::Ptr &source); - void GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file); + void GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file); size_t GetTableFileCount(); @@ -30,11 +31,11 @@ public: bool Empty(); - const std::string& GetTableId() const; + const std::string &GetTableId() const; size_t GetCurrentMem(); -private: + private: const std::string table_id_; MemTableFileList mem_table_file_list_; diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp index 0ff91de00b..649a680cf3 100644 --- a/cpp/src/db/MemTableFile.cpp +++ b/cpp/src/db/MemTableFile.cpp @@ -6,23 +6,24 @@ #include + namespace zilliz { namespace milvus { namespace engine { -MemTableFile::MemTableFile(const std::string& table_id, - const std::shared_ptr& meta, - const Options& options) : - table_id_(table_id), - meta_(meta), - options_(options) { +MemTableFile::MemTableFile(const std::string &table_id, + const std::shared_ptr &meta, + const Options &options) : + table_id_(table_id), + meta_(meta), + options_(options) { current_mem_ = 0; auto status = CreateTableFile(); if (status.ok()) { execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_, table_file_schema_.location_, - (EngineType)table_file_schema_.engine_type_); + (EngineType) table_file_schema_.engine_type_); } } @@ -33,31 +34,30 @@ Status MemTableFile::CreateTableFile() { auto status = meta_->CreateTableFile(table_file_schema); if (status.ok()) { table_file_schema_ = table_file_schema; - } - else { - std::string errMsg = "MemTableFile::CreateTableFile failed: " + status.ToString(); - ENGINE_LOG_ERROR << errMsg; + } else { + std::string err_msg = "MemTableFile::CreateTableFile failed: " + status.ToString(); + ENGINE_LOG_ERROR << err_msg; } return status; } -Status MemTableFile::Add(const VectorSource::Ptr& source) { +Status MemTableFile::Add(const VectorSource::Ptr &source) { if (table_file_schema_.dimension_ <= 0) { - std::string errMsg = "MemTableFile::Add: table_file_schema dimension = " + - std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_; - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); + std::string err_msg = "MemTableFile::Add: table_file_schema dimension = " + + std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_; + ENGINE_LOG_ERROR << err_msg; + return Status::Error(err_msg); } - size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; - size_t memLeft = GetMemLeft(); - if (memLeft >= singleVectorMemSize) { - size_t numVectorsToAdd = std::ceil(memLeft / singleVectorMemSize); - size_t numVectorsAdded; - auto status = source->Add(execution_engine_, table_file_schema_, numVectorsToAdd, numVectorsAdded); + size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + size_t mem_left = GetMemLeft(); + if (mem_left >= single_vector_mem_size) { + size_t num_vectors_to_add = std::ceil(mem_left / single_vector_mem_size); + size_t num_vectors_added; + auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added); if (status.ok()) { - current_mem_ += (numVectorsAdded * singleVectorMemSize); + current_mem_ += (num_vectors_added * single_vector_mem_size); } return status; } @@ -73,8 +73,8 @@ size_t MemTableFile::GetMemLeft() { } bool MemTableFile::IsFull() { - size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; - return (GetMemLeft() < singleVectorMemSize); + size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + return (GetMemLeft() < single_vector_mem_size); } Status MemTableFile::Serialize() { @@ -88,15 +88,15 @@ Status MemTableFile::Serialize() { auto total_time = METRICS_MICROSECONDS(start_time, end_time); table_file_schema_.size_ = size; - server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double)size/total_time); + server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time); table_file_schema_.file_type_ = (size >= options_.index_trigger_size) ? - meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; + meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; auto status = meta_->UpdateTableFile(table_file_schema_); LOG(DEBUG) << "New " << ((table_file_schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index") - << " file " << table_file_schema_.file_id_ << " of size " << (double)size / (double)M << " M"; + << " file " << table_file_schema_.file_id_ << " of size " << (double) size / (double) M << " M"; execution_engine_->Cache(); diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h index 1be0ae78ba..4d0011b362 100644 --- a/cpp/src/db/MemTableFile.h +++ b/cpp/src/db/MemTableFile.h @@ -5,20 +5,21 @@ #include "VectorSource.h" #include "ExecutionEngine.h" + namespace zilliz { namespace milvus { namespace engine { class MemTableFile { -public: + public: using Ptr = std::shared_ptr; using MetaPtr = meta::Meta::Ptr; - MemTableFile(const std::string& table_id, const std::shared_ptr& meta, const Options& options); + MemTableFile(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(const VectorSource::Ptr& source); + Status Add(const VectorSource::Ptr &source); size_t GetCurrentMem(); @@ -28,7 +29,7 @@ public: Status Serialize(); -private: + private: Status CreateTableFile(); diff --git a/cpp/src/db/NewMemManager.cpp b/cpp/src/db/NewMemManager.cpp index 3c78f37101..b0fcc9d4ae 100644 --- a/cpp/src/db/NewMemManager.cpp +++ b/cpp/src/db/NewMemManager.cpp @@ -5,11 +5,12 @@ #include + namespace zilliz { namespace milvus { namespace engine { -NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string& table_id) { +NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string &table_id) { auto memIt = mem_id_map_.find(table_id); if (memIt != mem_id_map_.end()) { return memIt->second; @@ -19,27 +20,27 @@ NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string& table return mem_id_map_[table_id]; } -Status NewMemManager::InsertVectors(const std::string& table_id_, +Status NewMemManager::InsertVectors(const std::string &table_id_, size_t n_, - const float* vectors_, - IDNumbers& vector_ids_) { + const float *vectors_, + IDNumbers &vector_ids_) { while (GetCurrentMem() > options_.maximum_memory) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + LOG(DEBUG) << "NewMemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + std::unique_lock lock(mutex_); return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); } -Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, +Status NewMemManager::InsertVectorsNoLock(const std::string &table_id, size_t n, - const float* vectors, - IDNumbers& vector_ids) { - - LOG(DEBUG) << "NewMemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << - ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + const float *vectors, + IDNumbers &vector_ids) { MemTablePtr mem = GetMemByTable(table_id); VectorSource::Ptr source = std::make_shared(n, vectors); @@ -54,37 +55,33 @@ Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, Status NewMemManager::ToImmutable() { std::unique_lock lock(mutex_); MemIdMap temp_map; - for (auto& kv: mem_id_map_) { - if(kv.second->Empty()) { + for (auto &kv: mem_id_map_) { + if (kv.second->Empty()) { + //empty table, no need to serialize temp_map.insert(kv); - continue;//empty table, no need to serialize + } else { + immu_mem_list_.push_back(kv.second); } - immu_mem_list_.push_back(kv.second); } mem_id_map_.swap(temp_map); return Status::OK(); } -Status NewMemManager::Serialize(std::set& table_ids) { +Status NewMemManager::Serialize(std::set &table_ids) { ToImmutable(); std::unique_lock lock(serialization_mtx_); table_ids.clear(); - for (auto& mem : immu_mem_list_) { + for (auto &mem : immu_mem_list_) { mem->Serialize(); table_ids.insert(mem->GetTableId()); } immu_mem_list_.clear(); -// for (auto mem = immu_mem_list_.begin(); mem != immu_mem_list_.end(); ) { -// (*mem)->Serialize(); -// table_ids.insert((*mem)->GetTableId()); -// mem = immu_mem_list_.erase(mem); -// LOG(DEBUG) << "immu_mem_list_ size = " << immu_mem_list_.size(); -// } + return Status::OK(); } -Status NewMemManager::EraseMemVector(const std::string& table_id) { +Status NewMemManager::EraseMemVector(const std::string &table_id) { {//erase MemVector from rapid-insert cache std::unique_lock lock(mutex_); mem_id_map_.erase(table_id); @@ -93,8 +90,8 @@ Status NewMemManager::EraseMemVector(const std::string& table_id) { {//erase MemVector from serialize cache std::unique_lock lock(serialization_mtx_); MemList temp_list; - for (auto& mem : immu_mem_list_) { - if(mem->GetTableId() != table_id) { + for (auto &mem : immu_mem_list_) { + if (mem->GetTableId() != table_id) { temp_list.push_back(mem); } } @@ -105,20 +102,20 @@ Status NewMemManager::EraseMemVector(const std::string& table_id) { } size_t NewMemManager::GetCurrentMutableMem() { - size_t totalMem = 0; - for (auto& kv : mem_id_map_) { + size_t total_mem = 0; + for (auto &kv : mem_id_map_) { auto memTable = kv.second; - totalMem += memTable->GetCurrentMem(); + total_mem += memTable->GetCurrentMem(); } - return totalMem; + return total_mem; } size_t NewMemManager::GetCurrentImmutableMem() { - size_t totalMem = 0; - for (auto& memTable : immu_mem_list_) { - totalMem += memTable->GetCurrentMem(); + size_t total_mem = 0; + for (auto &mem_table : immu_mem_list_) { + total_mem += mem_table->GetCurrentMem(); } - return totalMem; + return total_mem; } size_t NewMemManager::GetCurrentMem() { diff --git a/cpp/src/db/NewMemManager.h b/cpp/src/db/NewMemManager.h index 9883480404..5b933c94ca 100644 --- a/cpp/src/db/NewMemManager.h +++ b/cpp/src/db/NewMemManager.h @@ -11,25 +11,26 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { class NewMemManager : public MemManagerAbstract { -public: + public: using MetaPtr = meta::Meta::Ptr; using Ptr = std::shared_ptr; using MemTablePtr = typename MemTable::Ptr; - NewMemManager(const std::shared_ptr& meta, const Options& options) - : meta_(meta), options_(options) {} + NewMemManager(const std::shared_ptr &meta, const Options &options) + : meta_(meta), options_(options) {} - Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids) override; + Status InsertVectors(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids) override; - Status Serialize(std::set& table_ids) override; + Status Serialize(std::set &table_ids) override; - Status EraseMemVector(const std::string& table_id) override; + Status EraseMemVector(const std::string &table_id) override; size_t GetCurrentMutableMem() override; @@ -37,11 +38,11 @@ public: size_t GetCurrentMem() override; -private: - MemTablePtr GetMemByTable(const std::string& table_id); + private: + MemTablePtr GetMemByTable(const std::string &table_id); - Status InsertVectorsNoLock(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids); + Status InsertVectorsNoLock(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids); Status ToImmutable(); using MemIdMap = std::map; diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index d032be51f6..74c07ae1f6 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -4,6 +4,7 @@ #include "Log.h" #include "metrics/Metrics.h" + namespace zilliz { namespace milvus { namespace engine { @@ -11,16 +12,16 @@ namespace engine { VectorSource::VectorSource(const size_t &n, const float *vectors) : - n_(n), - vectors_(vectors), - id_generator_(new SimpleIDGenerator()) { + n_(n), + vectors_(vectors), + id_generator_(new SimpleIDGenerator()) { current_num_vectors_added = 0; } -Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, - const meta::TableFileSchema& table_file_schema, - const size_t& num_vectors_to_add, - size_t& num_vectors_added) { +Status VectorSource::Add(const ExecutionEnginePtr &execution_engine, + const meta::TableFileSchema &table_file_schema, + const size_t &num_vectors_to_add, + size_t &num_vectors_added) { auto start_time = METRICS_NOW_TIME; @@ -36,8 +37,7 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, vector_ids_.insert(vector_ids_.end(), std::make_move_iterator(vector_ids_to_add.begin()), std::make_move_iterator(vector_ids_to_add.end())); - } - else { + } else { ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); } diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h index dec31f39e1..7092805a6d 100644 --- a/cpp/src/db/VectorSource.h +++ b/cpp/src/db/VectorSource.h @@ -5,22 +5,23 @@ #include "IDGenerator.h" #include "ExecutionEngine.h" + namespace zilliz { namespace milvus { namespace engine { class VectorSource { -public: + public: using Ptr = std::shared_ptr; - VectorSource(const size_t& n, const float* vectors); + VectorSource(const size_t &n, const float *vectors); - Status Add(const ExecutionEnginePtr& execution_engine, - const meta::TableFileSchema& table_file_schema, - const size_t& num_vectors_to_add, - size_t& num_vectors_added); + Status Add(const ExecutionEnginePtr &execution_engine, + const meta::TableFileSchema &table_file_schema, + const size_t &num_vectors_to_add, + size_t &num_vectors_added); size_t GetNumVectorsAdded(); @@ -28,15 +29,15 @@ public: IDNumbers GetVectorIds(); -private: + private: const size_t n_; - const float* vectors_; + const float *vectors_; IDNumbers vector_ids_; size_t current_num_vectors_added; - IDGenerator* id_generator_; + IDGenerator *id_generator_; }; //VectorSource diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 818c3a6388..5b7972ec35 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -15,33 +15,34 @@ #include #include + using namespace zilliz::milvus; namespace { - static const std::string TABLE_NAME = "test_group"; - static constexpr int64_t TABLE_DIM = 256; - static constexpr int64_t VECTOR_COUNT = 250000; - static constexpr int64_t INSERT_LOOP = 10000; +static const std::string TABLE_NAME = "test_group"; +static constexpr int64_t TABLE_DIM = 256; +static constexpr int64_t VECTOR_COUNT = 250000; +static constexpr int64_t INSERT_LOOP = 10000; - engine::meta::TableSchema BuildTableSchema() { - engine::meta::TableSchema table_info; - table_info.dimension_ = TABLE_DIM; - table_info.table_id_ = TABLE_NAME; - table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP; - return table_info; - } +engine::meta::TableSchema BuildTableSchema() { + engine::meta::TableSchema table_info; + table_info.dimension_ = TABLE_DIM; + table_info.table_id_ = TABLE_NAME; + table_info.engine_type_ = (int) engine::EngineType::FAISS_IDMAP; + return table_info; +} - void BuildVectors(int64_t n, std::vector& vectors) { - vectors.clear(); - vectors.resize(n*TABLE_DIM); - float* data = vectors.data(); - for(int i = 0; i < n; i++) { - for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); - data[TABLE_DIM * i] += i / 2000.; - } +void BuildVectors(int64_t n, std::vector &vectors) { + vectors.clear(); + vectors.resize(n * TABLE_DIM); + float *data = vectors.data(); + for (int i = 0; i < n; i++) { + for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); + data[TABLE_DIM * i] += i / 2000.; } } +} TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { @@ -65,7 +66,7 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { size_t num_vectors_added; engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_, table_file_schema.location_, - (engine::EngineType)table_file_schema.engine_type_); + (engine::EngineType) table_file_schema.engine_type_); status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added); ASSERT_TRUE(status.ok()); @@ -82,10 +83,6 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); -// for (auto& id : vector_ids) { -// std::cout << id << std::endl; -// } - status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -99,7 +96,7 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { auto status = impl_->CreateTable(table_schema); ASSERT_TRUE(status.ok()); - engine::MemTableFile memTableFile(TABLE_NAME, impl_, options); + engine::MemTableFile mem_table_file(TABLE_NAME, impl_, options); int64_t n_100 = 100; std::vector vectors_100; @@ -107,28 +104,28 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { engine::VectorSource::Ptr source = std::make_shared(n_100, vectors_100.data()); - status = memTableFile.Add(source); + status = mem_table_file.Add(source); ASSERT_TRUE(status.ok()); -// std::cout << memTableFile.GetCurrentMem() << " " << memTableFile.GetMemLeft() << std::endl; +// std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl; engine::IDNumbers vector_ids = source->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); size_t singleVectorMem = sizeof(float) * TABLE_DIM; - ASSERT_EQ(memTableFile.GetCurrentMem(), n_100 * singleVectorMem); + ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem); int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; std::vector vectors_128M; BuildVectors(n_max, vectors_128M); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = memTableFile.Add(source_128M); + status = mem_table_file.Add(source_128M); vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max - n_100); - ASSERT_TRUE(memTableFile.IsFull()); + ASSERT_TRUE(mem_table_file.IsFull()); status = impl_->DropAll(); ASSERT_TRUE(status.ok()); @@ -149,34 +146,34 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_100 = std::make_shared(n_100, vectors_100.data()); - engine::MemTable memTable(TABLE_NAME, impl_, options); + engine::MemTable mem_table(TABLE_NAME, impl_, options); - status = memTable.Add(source_100); + status = mem_table.Add(source_100); ASSERT_TRUE(status.ok()); engine::IDNumbers vector_ids = source_100->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); - engine::MemTableFile::Ptr memTableFile; - memTable.GetCurrentMemTableFile(memTableFile); + engine::MemTableFile::Ptr mem_table_file; + mem_table.GetCurrentMemTableFile(mem_table_file); size_t singleVectorMem = sizeof(float) * TABLE_DIM; - ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem); int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; std::vector vectors_128M; BuildVectors(n_max, vectors_128M); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = memTable.Add(source_128M); + status = mem_table.Add(source_128M); ASSERT_TRUE(status.ok()); vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max); - memTable.GetCurrentMemTableFile(memTableFile); - ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + mem_table.GetCurrentMemTableFile(mem_table_file); + ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem); - ASSERT_EQ(memTable.GetTableFileCount(), 2); + ASSERT_EQ(mem_table.GetTableFileCount(), 2); int64_t n_1G = 1024000; std::vector vectors_1G; @@ -184,16 +181,16 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_1G = std::make_shared(n_1G, vectors_1G.data()); - status = memTable.Add(source_1G); + status = mem_table.Add(source_1G); ASSERT_TRUE(status.ok()); vector_ids = source_1G->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_1G); int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); - ASSERT_EQ(memTable.GetTableFileCount(), expectedTableFileCount); + ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount); - status = memTable.Serialize(); + status = mem_table.Serialize(); ASSERT_TRUE(status.ok()); status = impl_->DropAll(); @@ -216,7 +213,7 @@ TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { ASSERT_STATS(stat); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); - std::map> search_vectors; + std::map> search_vectors; { engine::IDNumbers vector_ids; int64_t nb = 1024000; @@ -231,8 +228,8 @@ TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { std::mt19937 gen(rd()); std::uniform_int_distribution dis(0, nb - 1); - int64_t numQuery = 20; - for (int64_t i = 0; i < numQuery; ++i) { + int64_t num_query = 20; + for (int64_t i = 0; i < num_query; ++i) { int64_t index = dis(gen); std::vector search; for (int64_t j = 0; j < TABLE_DIM; j++) { @@ -243,8 +240,8 @@ TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { } int k = 10; - for(auto& pair : search_vectors) { - auto& search = pair.second; + for (auto &pair : search_vectors) { + auto &search = pair.second; engine::QueryResults results; stat = db_->Query(TABLE_NAME, k, 1, search.data(), results); ASSERT_EQ(results[0][0].first, pair.first); @@ -329,18 +326,18 @@ TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { uint64_t count = 0; uint64_t prev_count = 0; - for (auto j=0; j<10; ++j) { + for (auto j = 0; j < 10; ++j) { ss.str(""); db_->Size(count); prev_count = count; START_TIMER; stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); - ss << "Search " << j << " With Size " << count/engine::meta::M << " M"; + ss << "Search " << j << " With Size " << count / engine::meta::M << " M"; STOP_TIMER(ss.str()); ASSERT_STATS(stat); - for (auto k=0; kInsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); ASSERT_EQ(target_ids.size(), qb); } else { diff --git a/cpp/unittest/db/utils.h b/cpp/unittest/db/utils.h index d06500de5c..9c126030c2 100644 --- a/cpp/unittest/db/utils.h +++ b/cpp/unittest/db/utils.h @@ -30,7 +30,7 @@ #define STOP_TIMER(name) #endif -void ASSERT_STATS(zilliz::milvus::engine::Status& stat); +void ASSERT_STATS(zilliz::milvus::engine::Status &stat); //class TestEnv : public ::testing::Environment { //public: @@ -54,8 +54,8 @@ void ASSERT_STATS(zilliz::milvus::engine::Status& stat); // ::testing::AddGlobalTestEnvironment(new TestEnv); class DBTest : public ::testing::Test { -protected: - zilliz::milvus::engine::DB* db_; + protected: + zilliz::milvus::engine::DB *db_; void InitLog(); virtual void SetUp() override; @@ -64,13 +64,13 @@ protected: }; class DBTest2 : public DBTest { -protected: + protected: virtual zilliz::milvus::engine::Options GetOptions() override; }; class MetaTest : public DBTest { -protected: + protected: std::shared_ptr impl_; virtual void SetUp() override; @@ -78,17 +78,17 @@ protected: }; class MySQLTest : public ::testing::Test { -protected: + protected: // std::shared_ptr impl_; zilliz::milvus::engine::DBMetaOptions getDBMetaOptions(); }; -class MySQLDBTest : public ::testing::Test { -protected: +class MySQLDBTest : public ::testing::Test { + protected: zilliz::milvus::engine::Options GetOptions(); }; -class NewMemManagerTest : public ::testing::Test { +class NewMemManagerTest : public ::testing::Test { void InitLog(); - virtual void SetUp() override; + void SetUp() override; }; From 92ebfe95d0e14411bbc9ccd54f777ac316d12779 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Mon, 8 Jul 2019 19:09:18 +0800 Subject: [PATCH 16/91] update... Former-commit-id: 6366c5292e1483be5009e46bd273fdb00433fea6 --- cpp/src/CMakeLists.txt | 2 +- cpp/src/cache/DataObj.h | 2 +- cpp/src/db/EngineFactory.cpp | 2 +- cpp/src/db/ExecutionEngineImpl.cpp | 127 +++++++++++++++------ cpp/src/db/ExecutionEngineImpl.h | 28 +++-- cpp/src/db/FaissExecutionEngine.cpp | 2 + cpp/src/db/FaissExecutionEngine.h | 2 + cpp/src/wrapper/Index.cpp | 2 + cpp/src/wrapper/Index.h | 27 +++-- cpp/src/wrapper/IndexBuilder.cpp | 4 +- cpp/src/wrapper/IndexBuilder.h | 2 + cpp/src/wrapper/Operand.cpp | 2 + cpp/src/wrapper/Operand.h | 2 + cpp/src/wrapper/knowhere/data_transfer.cpp | 2 +- cpp/src/wrapper/knowhere/data_transfer.h | 2 +- cpp/src/wrapper/knowhere/vec_impl.cpp | 37 ++++-- cpp/src/wrapper/knowhere/vec_impl.h | 15 ++- cpp/src/wrapper/knowhere/vec_index.cpp | 42 ++++--- cpp/src/wrapper/knowhere/vec_index.h | 19 ++- cpp/thirdparty/knowhere | 2 +- 20 files changed, 233 insertions(+), 90 deletions(-) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index cc577e2138..0627b2010a 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -10,7 +10,7 @@ aux_source_directory(config config_files) aux_source_directory(server server_files) aux_source_directory(utils utils_files) aux_source_directory(db db_files) -aux_source_directory(wrapper wrapper_files) +#aux_source_directory(wrapper wrapper_files) aux_source_directory(metrics metrics_files) aux_source_directory(wrapper/knowhere knowhere_files) diff --git a/cpp/src/cache/DataObj.h b/cpp/src/cache/DataObj.h index 1dff04027e..d5e52f7664 100644 --- a/cpp/src/cache/DataObj.h +++ b/cpp/src/cache/DataObj.h @@ -28,7 +28,7 @@ public: return 0; } - return index_->ntotal*(index_->dim*4); + return index_->Count() * index_->Dimension() * sizeof(float); } private: diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp index 56a6b4d1d2..3389c0a07e 100644 --- a/cpp/src/db/EngineFactory.cpp +++ b/cpp/src/db/EngineFactory.cpp @@ -4,7 +4,7 @@ * Proprietary and confidential. ******************************************************************************/ #include "EngineFactory.h" -#include "FaissExecutionEngine.h" +//#include "FaissExecutionEngine.h" #include "ExecutionEngineImpl.h" #include "Log.h" diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 32b7826430..28349695c7 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -3,13 +3,14 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ -#include "ExecutionEngineImpl.h" +#include #include "Log.h" +#include "src/cache/CpuCacheMgr.h" +#include "ExecutionEngineImpl.h" +#include "wrapper/knowhere/vec_index.h" #include "wrapper/knowhere/vec_impl.h" -#include "knowhere/index/vector_index/ivf.h" -#include "knowhere/index/vector_index/gpu_ivf.h" -#include "knowhere/index/vector_index/cpu_kdt_rng.h" + namespace zilliz { namespace milvus { @@ -17,95 +18,151 @@ namespace engine { ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, - const std::string& location, - EngineType type) - : location_(location) { - index_ = CreatetVecIndex(type); + const std::string &location, + EngineType type) + : location_(location), dim(dimension), build_type(type) { + index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); + std::static_pointer_cast(index_)->Build(dimension); } -vecwise::engine::VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { - std::shared_ptr index; - switch(type) { - case EngineType::FAISS_IDMAP: { +ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, + const std::string &location, + EngineType type) + : index_(std::move(index)), location_(location), build_type(type) { +} +VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { + std::shared_ptr index; + switch (type) { + case EngineType::FAISS_IDMAP: { + index = GetVecIndexFactory(IndexType::FAISS_IDMAP); break; } case EngineType::FAISS_IVFFLAT_GPU: { - index = std::make_shared(0); + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_GPU); break; } case EngineType::FAISS_IVFFLAT_CPU: { - index = std::make_shared(); + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU); break; } case EngineType::SPTAG_KDT_RNT_CPU: { - index = std::make_shared(); + index = GetVecIndexFactory(IndexType::SPTAG_KDT_RNT_CPU); break; } - default:{ + default: { ENGINE_LOG_ERROR << "Invalid engine type"; return nullptr; } } - - return std::make_shared(index); + return index; } Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) { - + index_->Add(n, xdata, xids, Config::object{{"dim", dim}}); return Status::OK(); } size_t ExecutionEngineImpl::Count() const { - return 0; + return index_->Count(); } size_t ExecutionEngineImpl::Size() const { - return 0; + return (size_t) (Count() * Dimension()) * sizeof(float); } size_t ExecutionEngineImpl::Dimension() const { - return 0; + return index_->Dimension(); } size_t ExecutionEngineImpl::PhysicalSize() const { - return 0; + return (size_t) (Count() * Dimension()) * sizeof(float); } Status ExecutionEngineImpl::Serialize() { + // TODO(groot): + auto binaryset = index_->Serialize(); return Status::OK(); } Status ExecutionEngineImpl::Load() { - + // TODO(groot): return Status::OK(); } -Status ExecutionEngineImpl::Merge(const std::string& location) { +VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { + // TODO(groot): dev func in Fake code + // pseude code + //auto data = read_file(location); + //auto index_type = get_index_type(data); + //auto binaryset = get_index_binary(data); + ///// - return Status::OK(); -} - -ExecutionEnginePtr -ExecutionEngineImpl::BuildIndex(const std::string& location) { + //return LoadVecIndex(index_type, binaryset); return nullptr; } -Status ExecutionEngineImpl::Search(long n, - const float *data, - long k, - float *distances, - long *labels) const { +Status ExecutionEngineImpl::Merge(const std::string &location) { + if (location == location_) { + return Status::Error("Cannot Merge Self"); + } + ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_; + auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location); + if (!to_merge) { + to_merge = Load(location); + } + + auto file_index = std::dynamic_pointer_cast(index_); + index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); + return Status::OK(); +} + +// TODO(linxj): add config +ExecutionEnginePtr +ExecutionEngineImpl::BuildIndex(const std::string &location) { + ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; + + auto from_index = std::dynamic_pointer_cast(index_); + auto to_index = CreatetVecIndex(build_type); + to_index->BuildAll(Count(), + from_index->GetRawVectors(), + from_index->GetRawIds(), + Config::object{{"dim", Dimension()}, {"gpu_id", gpu_num}}); + + return std::make_shared(to_index, location, build_type); +} + +Status ExecutionEngineImpl::Search(long n, + const float *data, + long k, + float *distances, + long *labels) const { + index_->Search(n, data, distances, labels, Config::object{{"k", k}, {"nprobe", nprobe_}}); return Status::OK(); } Status ExecutionEngineImpl::Cache() { + zilliz::milvus::cache::CpuCacheMgr::GetInstance()->InsertItem(location_, index_); return Status::OK(); } Status ExecutionEngineImpl::Init() { + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode server_config = config.GetConfig(CONFIG_SERVER); + gpu_num = server_config.GetInt32Value("gpu_index", 0); + + switch (build_type) { + case EngineType::FAISS_IVFFLAT_GPU: { + } + case EngineType::FAISS_IVFFLAT_CPU: { + ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); + nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000); + break; + } + } return Status::OK(); } diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h index c720f07158..6c0c83d9b3 100644 --- a/cpp/src/db/ExecutionEngineImpl.h +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -11,17 +11,22 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { class ExecutionEngineImpl : public ExecutionEngine { -public: + public: ExecutionEngineImpl(uint16_t dimension, - const std::string& location, - EngineType type); + const std::string &location, + EngineType type); + + ExecutionEngineImpl(VecIndexPtr index, + const std::string &location, + EngineType type); Status AddWithIds(long n, const float *xdata, const long *xids) override; @@ -37,7 +42,7 @@ public: Status Load() override; - Status Merge(const std::string& location) override; + Status Merge(const std::string &location) override; Status Search(long n, const float *data, @@ -45,21 +50,26 @@ public: float *distances, long *labels) const override; - ExecutionEnginePtr BuildIndex(const std::string&) override; + ExecutionEnginePtr BuildIndex(const std::string &) override; Status Cache() override; Status Init() override; -private: - vecwise::engine::VecIndexPtr CreatetVecIndex(EngineType type); + private: + VecIndexPtr CreatetVecIndex(EngineType type); -protected: - vecwise::engine::VecIndexPtr index_; + VecIndexPtr Load(const std::string &location); + protected: + VecIndexPtr index_ = nullptr; + EngineType build_type; + + int64_t dim; std::string location_; size_t nprobe_ = 0; + int64_t gpu_num = 0; }; diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index 20bd530e78..a2abe02e8a 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -3,6 +3,7 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ +#if 0 #include "FaissExecutionEngine.h" #include "Log.h" @@ -181,3 +182,4 @@ Status FaissExecutionEngine::Init() { } // namespace engine } // namespace milvus } // namespace zilliz +#endif diff --git a/cpp/src/db/FaissExecutionEngine.h b/cpp/src/db/FaissExecutionEngine.h index f9f37ad978..f8f0ad88bc 100644 --- a/cpp/src/db/FaissExecutionEngine.h +++ b/cpp/src/db/FaissExecutionEngine.h @@ -5,6 +5,7 @@ ******************************************************************************/ #pragma once +#if 0 #include "ExecutionEngine.h" #include "faiss/Index.h" @@ -71,3 +72,4 @@ protected: } // namespace engine } // namespace milvus } // namespace zilliz +#endif diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index 18e20d830a..4b10c1e686 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 // TODO: maybe support static search #ifdef GPU_VERSION #include "faiss/gpu/GpuAutoTune.h" @@ -80,3 +81,4 @@ Index_ptr read_index(const std::string &file_name) { } } } +#endif diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index ba157348d4..1668059d11 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -6,23 +6,29 @@ #pragma once -#include -#include -#include -#include -#include +//#include +//#include +//#include +//#include +//#include +// +//#include "faiss/AutoTune.h" +//#include "faiss/index_io.h" +// +//#include "Operand.h" -#include "faiss/AutoTune.h" -#include "faiss/index_io.h" +#include "knowhere/vec_index.h" -#include "Operand.h" namespace zilliz { namespace milvus { namespace engine { -class Index; -using Index_ptr = std::shared_ptr; +using Index_ptr = VecIndexPtr; + +#if 0 +//class Index; +//using Index_ptr = std::shared_ptr; class Index { typedef long idx_t; @@ -75,6 +81,7 @@ private: void write_index(const Index_ptr &index, const std::string &file_name); extern Index_ptr read_index(const std::string &file_name); +#endif } diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp index d4429c381a..1302ca4726 100644 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ b/cpp/src/wrapper/IndexBuilder.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #include "mutex" @@ -128,10 +129,8 @@ Index_ptr BgCpuBuilder::build_all(const long &nb, const float *xb, const long *i return std::make_shared(index); } -// TODO: Be Factory pattern later IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { if (opd->index_type == "IDMap") { - // TODO: fix hardcode IndexBuilderPtr index = nullptr; return std::make_shared(opd); } @@ -142,3 +141,4 @@ IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { } } } +#endif diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h index 8752063560..4cb6de814b 100644 --- a/cpp/src/wrapper/IndexBuilder.h +++ b/cpp/src/wrapper/IndexBuilder.h @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #pragma once #include "faiss/Index.h" @@ -64,3 +65,4 @@ extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd); } } } +#endif diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp index 25341676a6..4e9ac1011b 100644 --- a/cpp/src/wrapper/Operand.cpp +++ b/cpp/src/wrapper/Operand.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #include "Operand.h" @@ -90,3 +91,4 @@ Operand_ptr str_to_operand(const std::string &input) { } } } +#endif diff --git a/cpp/src/wrapper/Operand.h b/cpp/src/wrapper/Operand.h index 85a0eb8080..0e675f6a1b 100644 --- a/cpp/src/wrapper/Operand.h +++ b/cpp/src/wrapper/Operand.h @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#if 0 #pragma once #include @@ -42,3 +43,4 @@ extern Operand_ptr str_to_operand(const std::string &input); } } } +#endif diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/knowhere/data_transfer.cpp index af5ad212e4..583a44ee29 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.cpp +++ b/cpp/src/wrapper/knowhere/data_transfer.cpp @@ -8,7 +8,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { using namespace zilliz::knowhere; diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/knowhere/data_transfer.h index c99cd1c742..46de4ff21f 100644 --- a/cpp/src/wrapper/knowhere/data_transfer.h +++ b/cpp/src/wrapper/knowhere/data_transfer.h @@ -10,7 +10,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { extern zilliz::knowhere::DatasetPtr diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index e24d470acc..e1a93d37b9 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -4,18 +4,14 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include "knowhere/index/index.h" -#include "knowhere/index/index_model.h" -#include "knowhere/index/index_type.h" -#include "knowhere/adapter/sptag.h" -#include "knowhere/common/tensor.h" +#include "knowhere/index/vector_index/idmap.h" #include "vec_impl.h" #include "data_transfer.h" namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { using namespace zilliz::knowhere; @@ -26,8 +22,8 @@ void VecIndexImpl::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { - auto d = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, d, xb, ids); + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); auto preprocessor = index_->BuildPreprocessor(dataset, cfg); index_->set_preprocessor(preprocessor); @@ -39,7 +35,7 @@ void VecIndexImpl::BuildAll(const long &nb, void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { // TODO(linxj): Assert index is trained; - auto d = cfg["dim"].as(); + auto d = cfg.get_with_default("dim", dim); auto dataset = GenDatasetWithIds(nb, d, xb, ids); index_->Add(dataset, cfg); @@ -48,8 +44,8 @@ void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const C void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { // TODO: Assert index is trained; - auto d = cfg["dim"].as(); auto k = cfg["k"].as(); + auto d = cfg.get_with_default("dim", dim); auto dataset = GenDataset(nq, d, xq); Config search_cfg; @@ -90,6 +86,27 @@ void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { index_->Load(index_binary); } +int64_t VecIndexImpl::Dimension() { + return index_->Dimension(); +} + +int64_t VecIndexImpl::Count() { + return index_->Count(); +} + +float *BFIndex::GetRawVectors() { + return std::static_pointer_cast(index_)->GetRawVectors(); +} + +int64_t *BFIndex::GetRawIds() { + return std::static_pointer_cast(index_)->GetRawIds(); +} + +void BFIndex::Build(const int64_t &d) { + dim = d; + std::static_pointer_cast(index_)->Train(dim); +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 25f7d16548..9593e12779 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -12,7 +12,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { class VecIndexImpl : public VecIndex { @@ -24,15 +24,26 @@ class VecIndexImpl : public VecIndex { const Config &cfg, const long &nt, const float *xt) override; + int64_t Dimension() override; + int64_t Count() override; void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; zilliz::knowhere::BinarySet Serialize() override; void Load(const zilliz::knowhere::BinarySet &index_binary) override; void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; - private: + protected: + int64_t dim; std::shared_ptr index_ = nullptr; }; +class BFIndex : public VecIndexImpl { + public: + explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index)) {}; + void Build(const int64_t& d); + float* GetRawVectors(); + int64_t* GetRawIds(); +}; + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 171388d0af..b71eb0f4b7 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include "knowhere/index/vector_index/ivf.h" +#include "knowhere/index/vector_index/idmap.h" #include "knowhere/index/vector_index/gpu_ivf.h" #include "knowhere/index/vector_index/cpu_kdt_rng.h" @@ -12,27 +13,42 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { // TODO(linxj): index_type => enum struct -VecIndexPtr GetVecIndexFactory(const std::string &index_type) { +VecIndexPtr GetVecIndexFactory(const IndexType &type) { std::shared_ptr index; - if (index_type == "IVF") { - index = std::make_shared(); - } else if (index_type == "GPUIVF") { - index = std::make_shared(0); - } else if (index_type == "SPTAG") { - index = std::make_shared(); + switch (type) { + case IndexType::FAISS_IDMAP: { + index = std::make_shared(); + return std::make_shared(index); + } + case IndexType::FAISS_IVFFLAT_CPU: { + index = std::make_shared(); + break; + } + case IndexType::FAISS_IVFFLAT_GPU: { + index = std::make_shared(0); + break; + } + case IndexType::SPTAG_KDT_RNT_CPU: { + index = std::make_shared(); + break; + } + //// TODO(linxj): Support NSG + //case IndexType ::NSG: { + // index = std::make_shared(); + // break; + //} + default: { + return nullptr; + } } - // TODO(linxj): Support NSG - //else if (index_type == "NSG") { - // index = std::make_shared(); - //} return std::make_shared(index); } -VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) { +VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary) { auto index = GetVecIndexFactory(index_type); index->Load(index_binary); return index; diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index b03c43a36b..8e471b5213 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -14,7 +14,7 @@ namespace zilliz { -namespace vecwise { +namespace milvus { namespace engine { // TODO(linxj): jsoncons => rapidjson or other. @@ -40,6 +40,10 @@ class VecIndex { long *ids, const Config &cfg = Config()) = 0; + virtual int64_t Dimension() = 0; + + virtual int64_t Count() = 0; + virtual zilliz::knowhere::BinarySet Serialize() = 0; virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; @@ -47,9 +51,18 @@ class VecIndex { using VecIndexPtr = std::shared_ptr; -extern VecIndexPtr GetVecIndexFactory(const std::string &index_type); +enum class IndexType { + INVALID = 0, + FAISS_IDMAP = 1, + FAISS_IVFFLAT_GPU, + FAISS_IVFFLAT_CPU, + SPTAG_KDT_RNT_CPU, + NSG, +}; -extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary); +extern VecIndexPtr GetVecIndexFactory(const IndexType &type); + +extern VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary); } } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 2d543bfab6..c0df766214 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 2d543bfab655398f30113681f348519acac40ab5 +Subproject commit c0df766214d7fa288ffedd77cd06a8ba8620c8df From 440e18bb0fad3db3dbc61b6b059bf97614b924f8 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 9 Jul 2019 04:26:31 +0800 Subject: [PATCH 17/91] MS-137 - Integrate knowhere Former-commit-id: 67d9be936996437411ac5941c3e322b08a9389bf --- cpp/CHANGELOG.md | 12 +- cpp/src/db/ExecutionEngineImpl.cpp | 131 +++++++++++++++++-- cpp/src/db/ExecutionEngineImpl.h | 1 + cpp/src/db/scheduler/task/SearchTask.cpp | 5 +- cpp/src/wrapper/Index.h | 4 +- cpp/src/wrapper/knowhere/vec_impl.cpp | 25 +++- cpp/src/wrapper/knowhere/vec_impl.h | 8 +- cpp/thirdparty/knowhere | 2 +- cpp/unittest/CMakeLists.txt | 2 +- cpp/unittest/db/CMakeLists.txt | 2 +- cpp/unittest/db/misc_test.cpp | 52 ++++---- cpp/unittest/index_wrapper/knowhere_test.cpp | 48 +++++-- 12 files changed, 222 insertions(+), 70 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 949a05c8db..b32f159160 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -3,17 +3,6 @@ Please mark all change in change log and use the ticket from JIRA. -# Milvus 0.3.2 (2019-07-10) - -## Bug - -## Improvement - -## New Feature -- MS-154 - Integrate knowhere - -## Task - # Milvus 0.3.1 (2019-07-10) ## Bug @@ -21,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Improvement ## New Feature +- MS-137 - Integrate knowhere ## Task diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 28349695c7..85372d619d 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -16,12 +16,61 @@ namespace zilliz { namespace milvus { namespace engine { +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); + size_t operator()(void *ptr, size_t size, size_t pos); +}; + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +size_t FileIOReader::operator()(void *ptr, size_t size, size_t pos) { + return 0; +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string &location, EngineType type) : location_(location), dim(dimension), build_type(type) { index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); + current_type = EngineType::FAISS_IDMAP; std::static_pointer_cast(index_)->Build(dimension); } @@ -29,6 +78,7 @@ ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, const std::string &location, EngineType type) : index_(std::move(index)), location_(location), build_type(type) { + current_type = type; } VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { @@ -80,26 +130,85 @@ size_t ExecutionEngineImpl::PhysicalSize() const { } Status ExecutionEngineImpl::Serialize() { - // TODO(groot): auto binaryset = index_->Serialize(); + + FileIOWriter writer(location_); + writer(¤t_type, sizeof(current_type)); + for (auto &iter: binaryset.binary_map_) { + auto meta = iter.first.c_str(); + size_t meta_length = iter.first.length(); + writer(&meta_length, sizeof(meta_length)); + writer((void *) meta, meta_length); + + auto binary = iter.second; + size_t binary_length = binary->size; + writer(&binary_length, sizeof(binary_length)); + writer((void *) binary->data.get(), binary_length); + } return Status::OK(); } Status ExecutionEngineImpl::Load() { - // TODO(groot): + index_ = Load(location_); return Status::OK(); } VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { - // TODO(groot): dev func in Fake code - // pseude code - //auto data = read_file(location); - //auto index_type = get_index_type(data); - //auto binaryset = get_index_binary(data); - ///// + knowhere::BinarySet load_data_list; + FileIOReader reader(location); + reader.fs.seekg(0, reader.fs.end); + size_t length = reader.fs.tellg(); + reader.fs.seekg(0); - //return LoadVecIndex(index_type, binaryset); - return nullptr; + size_t rp = 0; + reader(¤t_type, sizeof(current_type)); + rp += sizeof(current_type); + while (rp < length) { + size_t meta_length; + reader(&meta_length, sizeof(meta_length)); + rp += sizeof(meta_length); + reader.fs.seekg(rp); + + auto meta = new char[meta_length]; + reader(meta, meta_length); + rp += meta_length; + reader.fs.seekg(rp); + + size_t bin_length; + reader(&bin_length, sizeof(bin_length)); + rp += sizeof(bin_length); + reader.fs.seekg(rp); + + auto bin = new uint8_t[bin_length]; + reader(bin, bin_length); + rp += bin_length; + + auto xx = std::make_shared(); + xx.reset(bin); + load_data_list.Append(std::string(meta, meta_length), xx, bin_length); + } + + auto index_type = IndexType::INVALID; + switch (current_type) { + case EngineType::FAISS_IDMAP: { + index_type = IndexType::FAISS_IDMAP; + break; + } + case EngineType::FAISS_IVFFLAT_CPU: { + index_type = IndexType::FAISS_IVFFLAT_CPU; + break; + } + case EngineType::FAISS_IVFFLAT_GPU: { + index_type = IndexType::FAISS_IVFFLAT_GPU; + break; + } + case EngineType::SPTAG_KDT_RNT_CPU: { + index_type = IndexType::SPTAG_KDT_RNT_CPU; + break; + } + } + + return LoadVecIndex(index_type, load_data_list); } Status ExecutionEngineImpl::Merge(const std::string &location) { @@ -113,7 +222,7 @@ Status ExecutionEngineImpl::Merge(const std::string &location) { to_merge = Load(location); } - auto file_index = std::dynamic_pointer_cast(index_); + auto file_index = std::dynamic_pointer_cast(to_merge); index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); return Status::OK(); } diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h index 6c0c83d9b3..6e5325f553 100644 --- a/cpp/src/db/ExecutionEngineImpl.h +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -64,6 +64,7 @@ class ExecutionEngineImpl : public ExecutionEngine { protected: VecIndexPtr index_ = nullptr; EngineType build_type; + EngineType current_type; int64_t dim; std::string location_; diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index d04f270331..2bfac90e20 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -151,7 +151,7 @@ std::shared_ptr SearchTask::Execute() { std::vector output_distence; for(auto& context : search_contexts_) { //step 1: allocate memory - auto inner_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk(); + auto inner_k = context->topk(); output_ids.resize(inner_k*context->nq()); output_distence.resize(inner_k*context->nq()); @@ -164,7 +164,8 @@ std::shared_ptr SearchTask::Execute() { //step 3: cluster result SearchContext::ResultSet result_set; - ClusterResult(output_ids, output_distence, context->nq(), inner_k, result_set); + auto spec_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk(); + ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set); rc.Record("cluster result"); //step 4: pick up topk result diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index 1668059d11..9841416a6c 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -12,8 +12,8 @@ //#include //#include // -//#include "faiss/AutoTune.h" -//#include "faiss/index_io.h" +#include "faiss/AutoTune.h" +#include "faiss/index_io.h" // //#include "Operand.h" diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index e1a93d37b9..9b1afb84ef 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#include #include "knowhere/index/vector_index/idmap.h" #include "vec_impl.h" @@ -27,7 +28,9 @@ void VecIndexImpl::BuildAll(const long &nb, auto preprocessor = index_->BuildPreprocessor(dataset, cfg); index_->set_preprocessor(preprocessor); - auto model = index_->Train(dataset, cfg); + auto nlist = int(nb / 1000000.0 * 16384); + auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; + auto model = index_->Train(dataset, cfg_t); index_->set_index_model(model); index_->Add(dataset, cfg); } @@ -71,7 +74,7 @@ void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *id //} auto p_ids = ids_array->data()->GetValues(1, 0); - auto p_dist = ids_array->data()->GetValues(1, 0); + auto p_dist = dis_array->data()->GetValues(1, 0); // TODO(linxj): avoid copy here. memcpy(ids, p_ids, sizeof(int64_t) * nq * k); @@ -84,6 +87,7 @@ zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { index_->Load(index_binary); + dim = Dimension(); } int64_t VecIndexImpl::Dimension() { @@ -95,7 +99,9 @@ int64_t VecIndexImpl::Count() { } float *BFIndex::GetRawVectors() { - return std::static_pointer_cast(index_)->GetRawVectors(); + auto raw_index = std::dynamic_pointer_cast(index_); + if (raw_index) { return raw_index->GetRawVectors(); } + return nullptr; } int64_t *BFIndex::GetRawIds() { @@ -107,6 +113,19 @@ void BFIndex::Build(const int64_t &d) { std::static_pointer_cast(index_)->Train(dim); } +void BFIndex::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); + + std::static_pointer_cast(index_)->Train(dim); + index_->Add(dataset, cfg); +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 9593e12779..ab6c6b8a79 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -32,7 +32,7 @@ class VecIndexImpl : public VecIndex { void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; protected: - int64_t dim; + int64_t dim = 0; std::shared_ptr index_ = nullptr; }; @@ -41,6 +41,12 @@ class BFIndex : public VecIndexImpl { explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index)) {}; void Build(const int64_t& d); float* GetRawVectors(); + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; int64_t* GetRawIds(); }; diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index c0df766214..3a30677b8a 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit c0df766214d7fa288ffedd77cd06a8ba8620c8df +Subproject commit 3a30677b8ab105955534922d1677e8fa99ef0406 diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index d0d158ec4a..043716b58b 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -38,7 +38,7 @@ set(unittest_libs ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) -add_subdirectory(server) +#add_subdirectory(server) add_subdirectory(db) add_subdirectory(index_wrapper) #add_subdirectory(faiss_wrapper) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 44b09d7b25..213eb146ed 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -6,7 +6,7 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/db db_srcs) aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_srcs) -aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +#aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src) aux_source_directory(./ test_srcs) diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp index a49c4d5807..6849a71867 100644 --- a/cpp/unittest/db/misc_test.cpp +++ b/cpp/unittest/db/misc_test.cpp @@ -26,32 +26,32 @@ namespace { } -TEST(DBMiscTest, ENGINE_API_TEST) { - //engine api AddWithIdArray - const uint16_t dim = 512; - const long n = 10; - engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat"); - std::vector vectors; - std::vector ids; - for (long i = 0; i < n; i++) { - for (uint16_t k = 0; k < dim; k++) { - vectors.push_back((float) k); - } - ids.push_back(i); - } - - auto status = engine.AddWithIdArray(vectors, ids); - ASSERT_TRUE(status.ok()); - - auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); - ASSERT_EQ(engine_ptr, nullptr); - - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU); - ASSERT_NE(engine_ptr, nullptr); - - engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); - ASSERT_NE(engine_ptr, nullptr); -} +//TEST(DBMiscTest, ENGINE_API_TEST) { +// //engine api AddWithIdArray +// const uint16_t dim = 512; +// const long n = 10; +// engine::FaissExecutionEngine engine(512, "/tmp/1", "IDMap", "IDMap,Flat"); +// std::vector vectors; +// std::vector ids; +// for (long i = 0; i < n; i++) { +// for (uint16_t k = 0; k < dim; k++) { +// vectors.push_back((float) k); +// } +// ids.push_back(i); +// } +// +// auto status = engine.AddWithIdArray(vectors, ids); +// ASSERT_TRUE(status.ok()); +// +// auto engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::INVALID); +// ASSERT_EQ(engine_ptr, nullptr); +// +// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IVFFLAT_GPU); +// ASSERT_NE(engine_ptr, nullptr); +// +// engine_ptr = engine::EngineFactory::Build(128, "/tmp", engine::EngineType::FAISS_IDMAP); +// ASSERT_NE(engine_ptr, nullptr); +//} TEST(DBMiscTest, EXCEPTION_TEST) { engine::Exception ex1(""); diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index 58b0d5a4b2..b4f8feba03 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -11,7 +11,7 @@ #include "utils.h" -using namespace zilliz::vecwise::engine; +using namespace zilliz::milvus::engine; using namespace zilliz::knowhere; using ::testing::TestWithParam; @@ -20,7 +20,7 @@ using ::testing::Combine; class KnowhereWrapperTest - : public TestWithParam<::std::tuple> { + : public TestWithParam<::std::tuple> { protected: void SetUp() override { std::string generator_type; @@ -34,7 +34,7 @@ class KnowhereWrapperTest } protected: - std::string index_type; + IndexType index_type; Config train_cfg; Config search_cfg; @@ -55,12 +55,12 @@ class KnowhereWrapperTest INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, Values( // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] - std::make_tuple("IVF", "Default", + std::make_tuple(IndexType::FAISS_IVFFLAT_CPU, "Default", 64, 10000, 10, 10, Config::object{{"nlist", 100}, {"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} ), - std::make_tuple("SPTAG", "Default", + std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 64, 10000, 10, 10, Config::object{{"TPTNumber", 1}, {"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}} @@ -113,16 +113,39 @@ TEST_P(KnowhereWrapperTest, serialize_test) { { auto binaryset = index_->Serialize(); + //int fileno = 0; + //const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; + //std::vector filename_list; + //std::vector> meta_list; + //for (auto &iter: binaryset.binary_map_) { + // const std::string &filename = base_name + std::to_string(fileno); + // FileIOWriter writer(filename); + // writer(iter.second->data.get(), iter.second->size); + // + // meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + // filename_list.push_back(filename); + // ++fileno; + //} + // + //BinarySet load_data_list; + //for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + // auto bin_size = meta_list[i].second; + // FileIOReader reader(filename_list[i]); + // std::vector load_data(bin_size); + // reader(load_data.data(), bin_size); + // load_data_list.Append(meta_list[i].first, load_data); + //} + int fileno = 0; - const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; std::vector filename_list; + const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; std::vector> meta_list; for (auto &iter: binaryset.binary_map_) { const std::string &filename = base_name + std::to_string(fileno); FileIOWriter writer(filename); - writer(iter.second.data, iter.second.size); + writer(iter.second->data.get(), iter.second->size); - meta_list.push_back(std::make_pair(iter.first, iter.second.size)); + meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); filename_list.push_back(filename); ++fileno; } @@ -131,9 +154,12 @@ TEST_P(KnowhereWrapperTest, serialize_test) { for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { auto bin_size = meta_list[i].second; FileIOReader reader(filename_list[i]); - std::vector load_data(bin_size); - reader(load_data.data(), bin_size); - load_data_list.Append(meta_list[i].first, load_data); + + auto load_data = new uint8_t[bin_size]; + reader(load_data, bin_size); + auto data = std::make_shared(); + data.reset(load_data); + load_data_list.Append(meta_list[i].first, data, bin_size); } From 015fe06fc4818ee042905d154d0bc1af63ffeaa7 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 9 Jul 2019 04:43:27 +0800 Subject: [PATCH 18/91] update... Former-commit-id: 42b56218b03ac7b8efb479e776ce601e12a45e6c --- cpp/src/db/ExecutionEngineImpl.cpp | 10 +++++++--- cpp/src/wrapper/knowhere/vec_index.cpp | 17 ++++++++++++----- cpp/src/wrapper/knowhere/vec_index.h | 6 ++++-- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 85372d619d..bba29fc9a0 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -183,9 +183,9 @@ VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { reader(bin, bin_length); rp += bin_length; - auto xx = std::make_shared(); - xx.reset(bin); - load_data_list.Append(std::string(meta, meta_length), xx, bin_length); + auto binptr = std::make_shared(); + binptr.reset(bin); + load_data_list.Append(std::string(meta, meta_length), binptr, bin_length); } auto index_type = IndexType::INVALID; @@ -206,6 +206,10 @@ VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { index_type = IndexType::SPTAG_KDT_RNT_CPU; break; } + default: { + ENGINE_LOG_ERROR << "wrong index_type"; + return nullptr; + } } return LoadVecIndex(index_type, load_data_list); diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index b71eb0f4b7..17aa428613 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -32,15 +32,22 @@ VecIndexPtr GetVecIndexFactory(const IndexType &type) { index = std::make_shared(0); break; } + case IndexType::FAISS_IVFPQ_CPU: { + index = std::make_shared(); + break; + } + case IndexType::FAISS_IVFPQ_GPU: { + index = std::make_shared(0); + break; + } case IndexType::SPTAG_KDT_RNT_CPU: { index = std::make_shared(); break; } - //// TODO(linxj): Support NSG - //case IndexType ::NSG: { - // index = std::make_shared(); - // break; - //} + //case IndexType::NSG: { // TODO(linxj): bug. + // index = std::make_shared(); + // break; + //} default: { return nullptr; } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index 8e471b5213..76c69537b5 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -54,10 +54,12 @@ using VecIndexPtr = std::shared_ptr; enum class IndexType { INVALID = 0, FAISS_IDMAP = 1, - FAISS_IVFFLAT_GPU, FAISS_IVFFLAT_CPU, + FAISS_IVFFLAT_GPU, + FAISS_IVFPQ_CPU, + FAISS_IVFPQ_GPU, SPTAG_KDT_RNT_CPU, - NSG, + //NSG, }; extern VecIndexPtr GetVecIndexFactory(const IndexType &type); From e8f4a76c9e66c14859e6486aa147866c392c329e Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 3 Jul 2019 14:25:02 +0800 Subject: [PATCH 19/91] Disable cleanup if mode is read only Former-commit-id: 6283a6ce33c7f88f84a8686c6659771af1069206 --- cpp/CHANGELOG.md | 1 + cpp/src/db/MySQLMetaImpl.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index b32f159160..981b79c293 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -26,6 +26,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-90 - Fix arch match incorrect on ARM - MS-99 - Fix compilation bug - MS-110 - Avoid huge file size +- MS-148 - Disable cleanup if mode is read only ## Improvement - MS-82 - Update server startup welcome message diff --git a/cpp/src/db/MySQLMetaImpl.cpp b/cpp/src/db/MySQLMetaImpl.cpp index 92bb17168a..20b0cef0c6 100644 --- a/cpp/src/db/MySQLMetaImpl.cpp +++ b/cpp/src/db/MySQLMetaImpl.cpp @@ -162,7 +162,9 @@ namespace meta { ENGINE_LOG_DEBUG << "MySQL connection pool: maximum pool size = " << std::to_string(maxPoolSize); try { - CleanUp(); + if (mode_ != Options::MODE::READ_ONLY) { + CleanUp(); + } { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -457,7 +459,7 @@ namespace meta { } //Scoped Connection - if (mode_ != Options::MODE::SINGLE) { + if (mode_ == Options::MODE::CLUSTER) { DeleteTableFiles(table_id); } From 9c9aac92b9eaa6870d891fcf1bd0748d2df52ad9 Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 3 Jul 2019 14:27:19 +0800 Subject: [PATCH 20/91] Disable cleanup if mode is read only Former-commit-id: 2dfd7154e07274db8a06f280217c1c8ce05c0b33 --- cpp/src/db/MySQLMetaImpl.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/db/MySQLMetaImpl.cpp b/cpp/src/db/MySQLMetaImpl.cpp index 20b0cef0c6..f2e032dac5 100644 --- a/cpp/src/db/MySQLMetaImpl.cpp +++ b/cpp/src/db/MySQLMetaImpl.cpp @@ -1814,7 +1814,9 @@ namespace meta { MySQLMetaImpl::~MySQLMetaImpl() { // std::lock_guard lock(mysql_mutex); - CleanUp(); + if (mode_ != Options::MODE::READ_ONLY) { + CleanUp(); + } } } // namespace meta From 9e8d7e04ca169eca1ecedb3765fc3b6bb215b392 Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 3 Jul 2019 15:48:21 +0800 Subject: [PATCH 21/91] update Former-commit-id: 7fd5d1f36a59fdaedf0e1fe4892c1e0d88180d16 --- cpp/src/server/RequestTask.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 07a8305d1f..295b4a0331 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -482,6 +482,7 @@ ServerError SearchVectorTask::OnExecute() { engine::QueryResults results; uint64_t record_count = (uint64_t)record_array_.size(); + SERVER_LOG_DEBUG << file_id_array_ << std::endl; if(file_id_array_.empty()) { stat = DBWrapper::DB()->Query(table_name_, (size_t) top_k_, record_count, vec_f.data(), dates, results); } else { From 849689dd423956ce69812695add1d81eecbc00cc Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 3 Jul 2019 16:04:00 +0800 Subject: [PATCH 22/91] update Former-commit-id: 8ea7ae4509eb7c6542ccacd925ee5286c2849e00 --- cpp/src/server/RequestTask.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 295b4a0331..4bda8ac060 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -482,7 +482,11 @@ ServerError SearchVectorTask::OnExecute() { engine::QueryResults results; uint64_t record_count = (uint64_t)record_array_.size(); - SERVER_LOG_DEBUG << file_id_array_ << std::endl; + SERVER_LOG_DEBUG << "file_id_array_: "; + for (auto& file_id : file_id_array_) { + SERVER_LOG_DEBUG << file_id; + } + if(file_id_array_.empty()) { stat = DBWrapper::DB()->Query(table_name_, (size_t) top_k_, record_count, vec_f.data(), dates, results); } else { From 3720a6c3e0f02cf92576c26c55d46c3f7b67f052 Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 3 Jul 2019 17:24:36 +0800 Subject: [PATCH 23/91] update Former-commit-id: ce55b27987989dda7c1e2dab1f420256f505820c --- cpp/src/db/DBImpl.cpp | 4 ++++ cpp/src/db/MySQLMetaImpl.cpp | 10 ++++++---- cpp/src/server/RequestHandler.cpp | 2 ++ cpp/src/server/RequestTask.cpp | 5 ----- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 0a1e8651e1..352fcae7d0 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -191,6 +191,10 @@ Status DBImpl::Query(const std::string& table_id, const std::vector return status; } + for (auto& file_schema : files_array) { + ENGINE_LOG_DEBUG << "file_id: " << file_schema.file_id_; + } + if(files_array.empty()) { return Status::Error("Invalid file id"); } diff --git a/cpp/src/db/MySQLMetaImpl.cpp b/cpp/src/db/MySQLMetaImpl.cpp index f2e032dac5..5bef070337 100644 --- a/cpp/src/db/MySQLMetaImpl.cpp +++ b/cpp/src/db/MySQLMetaImpl.cpp @@ -1083,10 +1083,10 @@ namespace meta { // } Query getTableFileQuery = connectionPtr->query(); - getTableFileQuery << "SELECT engine_type, file_id, file_type, size, date " << - "FROM TableFiles " << - "WHERE table_id = " << quote << table_id << " AND " << - "(" << idStr << ");"; + getTableFileQuery << "SELECT id, engine_type, file_id, file_type, size, date " << + "FROM TableFiles " << + "WHERE table_id = " << quote << table_id << " AND " << + "(" << idStr << ");"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::GetTableFiles: " << getTableFileQuery.str(); @@ -1106,6 +1106,8 @@ namespace meta { TableFileSchema file_schema; + file_schema.id_ = resRow["id"]; + file_schema.table_id_ = table_id; file_schema.engine_type_ = resRow["engine_type"]; diff --git a/cpp/src/server/RequestHandler.cpp b/cpp/src/server/RequestHandler.cpp index 037f80e0db..a4dc182c35 100644 --- a/cpp/src/server/RequestHandler.cpp +++ b/cpp/src/server/RequestHandler.cpp @@ -53,6 +53,7 @@ RequestHandler::SearchVector(std::vector &_return, const std::vector &query_record_array, const std::vector &query_range_array, const int64_t topk) { +// SERVER_LOG_DEBUG << "Entering RequestHandler::SearchVector"; BaseTaskPtr task_ptr = SearchVectorTask::Create(table_name, std::vector(), query_record_array, query_range_array, topk, _return); RequestScheduler::ExecTask(task_ptr); @@ -65,6 +66,7 @@ RequestHandler::SearchVectorInFiles(std::vector<::milvus::thrift::TopKQueryResul const std::vector<::milvus::thrift::RowRecord> &query_record_array, const std::vector<::milvus::thrift::Range> &query_range_array, const int64_t topk) { +// SERVER_LOG_DEBUG << "Entering RequestHandler::SearchVectorInFiles. file_id_array size = " << std::to_string(file_id_array.size()); BaseTaskPtr task_ptr = SearchVectorTask::Create(table_name, file_id_array, query_record_array, query_range_array, topk, _return); RequestScheduler::ExecTask(task_ptr); diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 4bda8ac060..07a8305d1f 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -482,11 +482,6 @@ ServerError SearchVectorTask::OnExecute() { engine::QueryResults results; uint64_t record_count = (uint64_t)record_array_.size(); - SERVER_LOG_DEBUG << "file_id_array_: "; - for (auto& file_id : file_id_array_) { - SERVER_LOG_DEBUG << file_id; - } - if(file_id_array_.empty()) { stat = DBWrapper::DB()->Query(table_name_, (size_t) top_k_, record_count, vec_f.data(), dates, results); } else { From b787b49799d801815dab79dab17e96e279bd6349 Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 3 Jul 2019 18:31:45 +0800 Subject: [PATCH 24/91] update Former-commit-id: bb7cb2df588dc30ead17b5d727099fddb5ee5595 --- cpp/src/db/DBImpl.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 352fcae7d0..0a1e8651e1 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -191,10 +191,6 @@ Status DBImpl::Query(const std::string& table_id, const std::vector return status; } - for (auto& file_schema : files_array) { - ENGINE_LOG_DEBUG << "file_id: " << file_schema.file_id_; - } - if(files_array.empty()) { return Status::Error("Invalid file id"); } From d87d0977fa0b5bced013e651574669d621bc8fd9 Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 3 Jul 2019 18:44:29 +0800 Subject: [PATCH 25/91] update Former-commit-id: 2561c4f4412eb0f52751d1cbc73e47c9b57652d9 --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 981b79c293..c0ea20e9bd 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -27,6 +27,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-99 - Fix compilation bug - MS-110 - Avoid huge file size - MS-148 - Disable cleanup if mode is read only +- MS-149 - Fixed searching only one index file issue in distributed mode ## Improvement - MS-82 - Update server startup welcome message From 0bc554cf77479a2f09aaf83692abc3668efef70b Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 3 Jul 2019 20:07:11 +0800 Subject: [PATCH 26/91] acc test Former-commit-id: 6f4f38ad3c53df2c06cc5a3ae5f57d75139a51c4 --- cpp/src/db/DBMetaImpl.cpp | 14 ++++++++------ cpp/src/db/FaissExecutionEngine.cpp | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 8c56c863e7..d13899dca0 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -612,7 +612,8 @@ Status DBMetaImpl::GetTableFiles(const std::string& table_id, TableFilesSchema& table_files) { try { table_files.clear(); - auto files = ConnectorPtr->select(columns(&TableFileSchema::file_id_, + auto files = ConnectorPtr->select(columns(&TableFileSchema::id_, + &TableFileSchema::file_id_, &TableFileSchema::file_type_, &TableFileSchema::size_, &TableFileSchema::date_, @@ -631,11 +632,12 @@ Status DBMetaImpl::GetTableFiles(const std::string& table_id, for (auto &file : files) { TableFileSchema file_schema; file_schema.table_id_ = table_id; - file_schema.file_id_ = std::get<0>(file); - file_schema.file_type_ = std::get<1>(file); - file_schema.size_ = std::get<2>(file); - file_schema.date_ = std::get<3>(file); - file_schema.engine_type_ = std::get<4>(file); + file_schema.id_ = std::get<0>(file); + file_schema.file_id_ = std::get<1>(file); + file_schema.file_type_ = std::get<2>(file); + file_schema.size_ = std::get<3>(file); + file_schema.date_ = std::get<4>(file); + file_schema.engine_type_ = std::get<5>(file); file_schema.dimension_ = table_schema.dimension_; GetTableFilePath(file_schema); diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index a2abe02e8a..51572dc3f4 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -139,6 +139,7 @@ Status FaissExecutionEngine::Search(long n, auto start_time = METRICS_NOW_TIME; std::shared_ptr ivf_index = std::dynamic_pointer_cast(pIndex_); + //ENGINE_LOG_DEBUG << "Index nlist: " << ivf_index->nlist << ", ntotal: "<< ivf_index->ntotal; if(ivf_index) { ENGINE_LOG_DEBUG << "Index type: IVFFLAT nProbe: " << nprobe_; ivf_index->nprobe = nprobe_; From a66c229efda9239f56b8b3701471e420c762be2a Mon Sep 17 00:00:00 2001 From: jinhai Date: Wed, 3 Jul 2019 20:23:21 +0800 Subject: [PATCH 27/91] Remove unused code Former-commit-id: a8b9985902792c6b8c884a284fe0fad1c6515b3a --- cpp/src/server/MilvusServer.cpp | 33 ++++++++++++--------------------- cpp/src/server/ServerConfig.h | 1 - 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/cpp/src/server/MilvusServer.cpp b/cpp/src/server/MilvusServer.cpp index 322460968f..452ee3af88 100644 --- a/cpp/src/server/MilvusServer.cpp +++ b/cpp/src/server/MilvusServer.cpp @@ -50,7 +50,6 @@ MilvusServer::StartService() { std::string address = server_config.GetValue(CONFIG_SERVER_ADDRESS, "127.0.0.1"); int32_t port = server_config.GetInt32Value(CONFIG_SERVER_PORT, 19530); std::string protocol = server_config.GetValue(CONFIG_SERVER_PROTOCOL, "binary"); - std::string mode = server_config.GetValue(CONFIG_SERVER_MODE, "thread_pool"); try { DBWrapper::DB();//initialize db @@ -68,30 +67,22 @@ MilvusServer::StartService() { } else if (protocol == "compact") { protocol_factory.reset(new TCompactProtocolFactory()); } else { - //SERVER_LOG_INFO << "Service protocol: " << protocol << " is not supported currently"; + // SERVER_LOG_INFO << "Service protocol: " << protocol << " is not supported currently"; return; } - std::string mode = "thread_pool"; - if (mode == "simple") { - s_server.reset(new TSimpleServer(processor, server_transport, transport_factory, protocol_factory)); - s_server->serve(); - } else if (mode == "thread_pool") { - stdcxx::shared_ptr threadManager(ThreadManager::newSimpleThreadManager()); - stdcxx::shared_ptr threadFactory(new PosixThreadFactory()); - threadManager->threadFactory(threadFactory); - threadManager->start(); + stdcxx::shared_ptr threadManager(ThreadManager::newSimpleThreadManager()); + stdcxx::shared_ptr threadFactory(new PosixThreadFactory()); + threadManager->threadFactory(threadFactory); + threadManager->start(); + + s_server.reset(new ThreadPoolServer(processor, + server_transport, + transport_factory, + protocol_factory, + threadManager)); + s_server->serve(); - s_server.reset(new ThreadPoolServer(processor, - server_transport, - transport_factory, - protocol_factory, - threadManager)); - s_server->serve(); - } else { - //SERVER_LOG_INFO << "Service mode: " << mode << " is not supported currently"; - return; - } } catch (apache::thrift::TException& ex) { std::cout << "ERROR! " << ex.what() << std::endl; kill(0, SIGUSR1); diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 412581bc1f..0ec04eed8c 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -18,7 +18,6 @@ static const std::string CONFIG_SERVER = "server_config"; static const std::string CONFIG_SERVER_ADDRESS = "address"; static const std::string CONFIG_SERVER_PORT = "port"; static const std::string CONFIG_SERVER_PROTOCOL = "transfer_protocol"; -static const std::string CONFIG_SERVER_MODE = "server_mode"; static const std::string CONFIG_CLUSTER_MODE = "mode"; static const std::string CONFIG_DB = "db_config"; From e2d3935b60e94eb4171c409a6364487350d9b2c9 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 3 Jul 2019 20:59:36 +0800 Subject: [PATCH 28/91] ADD LOG Former-commit-id: 36614bd9c42406016c746d56482ed987cb9b5bd4 --- cpp/src/db/scheduler/task/SearchTask.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index 2bfac90e20..141c93eb5f 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -168,6 +168,14 @@ std::shared_ptr SearchTask::Execute() { ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set); rc.Record("cluster result"); + + SERVER_LOG_DEBUG << "Query Result: "; + for(auto& id2score_vector: result_set) { + for(auto& pair: id2score_vector) { + SERVER_LOG_DEBUG << "id: " << pair.first << ", distance: " << pair.second; + } + } + //step 4: pick up topk result TopkResult(result_set, inner_k, context->GetResult()); rc.Record("reduce topk"); From 84e0c5808a0c646139434f2c23abe78241b9d4e8 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Thu, 4 Jul 2019 12:41:24 +0800 Subject: [PATCH 29/91] MS-151 Fix topk problem Former-commit-id: 3f9acd4460ebede7883669147194e32f99d72094 --- cpp/src/db/FaissExecutionEngine.cpp | 1 - cpp/src/db/scheduler/task/SearchTask.cpp | 8 -------- 2 files changed, 9 deletions(-) diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index 51572dc3f4..a2abe02e8a 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -139,7 +139,6 @@ Status FaissExecutionEngine::Search(long n, auto start_time = METRICS_NOW_TIME; std::shared_ptr ivf_index = std::dynamic_pointer_cast(pIndex_); - //ENGINE_LOG_DEBUG << "Index nlist: " << ivf_index->nlist << ", ntotal: "<< ivf_index->ntotal; if(ivf_index) { ENGINE_LOG_DEBUG << "Index type: IVFFLAT nProbe: " << nprobe_; ivf_index->nprobe = nprobe_; diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index 141c93eb5f..2bfac90e20 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -168,14 +168,6 @@ std::shared_ptr SearchTask::Execute() { ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set); rc.Record("cluster result"); - - SERVER_LOG_DEBUG << "Query Result: "; - for(auto& id2score_vector: result_set) { - for(auto& pair: id2score_vector) { - SERVER_LOG_DEBUG << "id: " << pair.first << ", distance: " << pair.second; - } - } - //step 4: pick up topk result TopkResult(result_set, inner_k, context->GetResult()); rc.Record("reduce topk"); From 1b5953bd0dd583d32ea3e122d261a56a185fc144 Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 4 Jul 2019 13:02:47 +0800 Subject: [PATCH 30/91] reduce unittest time cost Former-commit-id: 0c97f4623a480616b47f18fb0b72cbc33b393b66 --- cpp/unittest/db/db_tests.cpp | 2 +- cpp/unittest/db/mysql_db_test.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index d505320e86..bd17081af8 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -21,7 +21,7 @@ namespace { static const std::string TABLE_NAME = "test_group"; static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t VECTOR_COUNT = 250000; - static constexpr int64_t INSERT_LOOP = 100000; + static constexpr int64_t INSERT_LOOP = 10000; engine::meta::TableSchema BuildTableSchema() { engine::meta::TableSchema table_info; diff --git a/cpp/unittest/db/mysql_db_test.cpp b/cpp/unittest/db/mysql_db_test.cpp index 907aa8a0c4..7fdb30a204 100644 --- a/cpp/unittest/db/mysql_db_test.cpp +++ b/cpp/unittest/db/mysql_db_test.cpp @@ -21,7 +21,7 @@ namespace { static const std::string TABLE_NAME = "test_group"; static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t VECTOR_COUNT = 250000; - static constexpr int64_t INSERT_LOOP = 100000; + static constexpr int64_t INSERT_LOOP = 10000; engine::meta::TableSchema BuildTableSchema() { engine::meta::TableSchema table_info; From 14e5c8507140df3d57861f76c1d6ad65beb964ad Mon Sep 17 00:00:00 2001 From: zhiru Date: Thu, 4 Jul 2019 14:04:57 +0800 Subject: [PATCH 31/91] update Former-commit-id: f90944d80f2155a4971225fcf61f7b50610aa0fe --- cpp/coverage.sh | 1 + cpp/src/db/MySQLConnectionPool.cpp | 78 +++++++++++++++++++ cpp/src/db/MySQLConnectionPool.h | 75 +++++-------------- cpp/src/db/MySQLMetaImpl.cpp | 116 ++++++++++++++++++++++++----- 4 files changed, 198 insertions(+), 72 deletions(-) create mode 100644 cpp/src/db/MySQLConnectionPool.cpp diff --git a/cpp/coverage.sh b/cpp/coverage.sh index a0a4cfd03f..7a48e5d451 100755 --- a/cpp/coverage.sh +++ b/cpp/coverage.sh @@ -33,6 +33,7 @@ function mysql_exc() mysql_exc "CREATE DATABASE IF NOT EXISTS ${MYSQL_DB_NAME};" mysql_exc "GRANT ALL PRIVILEGES ON ${MYSQL_DB_NAME}.* TO '${MYSQL_USER_NAME}'@'%';" mysql_exc "FLUSH PRIVILEGES;" +mysql_exc "USE ${MYSQL_DB_NAME};" # get baseline ${LCOV_CMD} -c -i -d ${DIR_GCNO} -o "${FILE_INFO_BASE}" diff --git a/cpp/src/db/MySQLConnectionPool.cpp b/cpp/src/db/MySQLConnectionPool.cpp new file mode 100644 index 0000000000..b43126920e --- /dev/null +++ b/cpp/src/db/MySQLConnectionPool.cpp @@ -0,0 +1,78 @@ +#include "MySQLConnectionPool.h" + +namespace zilliz { +namespace milvus { +namespace engine { +namespace meta { + + // Do a simple form of in-use connection limiting: wait to return + // a connection until there are a reasonably low number in use + // already. Can't do this in create() because we're interested in + // connections actually in use, not those created. Also note that + // we keep our own count; ConnectionPool::size() isn't the same! + mysqlpp::Connection *MySQLConnectionPool::grab() { + while (conns_in_use_ > max_pool_size_) { + sleep(1); + } + + ++conns_in_use_; + return mysqlpp::ConnectionPool::grab(); + } + + // Other half of in-use conn count limit + void MySQLConnectionPool::release(const mysqlpp::Connection *pc) { + mysqlpp::ConnectionPool::release(pc); + + if (conns_in_use_ <= 0) { + ENGINE_LOG_WARNING << "MySQLConnetionPool::release: conns_in_use_ is less than zero. conns_in_use_ = " << conns_in_use_; + } else { + --conns_in_use_; + } + } + + int MySQLConnectionPool::getConnectionsInUse() { + return conns_in_use_; + } + + void MySQLConnectionPool::set_max_idle_time(int max_idle) { + max_idle_time_ = max_idle; + } + + std::string MySQLConnectionPool::getDB() { + return db_; + } + + // Superclass overrides + mysqlpp::Connection *MySQLConnectionPool::create() { + + try { + // Create connection using the parameters we were passed upon + // creation. + mysqlpp::Connection *conn = new mysqlpp::Connection(); + conn->set_option(new mysqlpp::ReconnectOption(true)); + conn->connect(db_.empty() ? 0 : db_.c_str(), + server_.empty() ? 0 : server_.c_str(), + user_.empty() ? 0 : user_.c_str(), + password_.empty() ? 0 : password_.c_str(), + port_); + return conn; + } catch (const mysqlpp::ConnectionFailed& er) { + ENGINE_LOG_ERROR << "Failed to connect to database server" << ": " << er.what(); + return nullptr; + } + } + + void MySQLConnectionPool::destroy(mysqlpp::Connection *cp) { + // Our superclass can't know how we created the Connection, so + // it delegates destruction to us, to be safe. + delete cp; + } + + unsigned int MySQLConnectionPool::max_idle_time() { + return max_idle_time_; + } + +} // namespace meta +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/db/MySQLConnectionPool.h b/cpp/src/db/MySQLConnectionPool.h index 6a763a9729..5112993b94 100644 --- a/cpp/src/db/MySQLConnectionPool.h +++ b/cpp/src/db/MySQLConnectionPool.h @@ -6,6 +6,11 @@ #include "Log.h" +namespace zilliz { +namespace milvus { +namespace engine { +namespace meta { + class MySQLConnectionPool : public mysqlpp::ConnectionPool { public: @@ -21,8 +26,7 @@ public: password_(passWord), server_(serverIp), port_(port), - max_pool_size_(maxPoolSize) - { + max_pool_size_(maxPoolSize) { conns_in_use_ = 0; @@ -35,69 +39,25 @@ public: clear(); } - // Do a simple form of in-use connection limiting: wait to return - // a connection until there are a reasonably low number in use - // already. Can't do this in create() because we're interested in - // connections actually in use, not those created. Also note that - // we keep our own count; ConnectionPool::size() isn't the same! - mysqlpp::Connection* grab() override { - while (conns_in_use_ > max_pool_size_) { - sleep(1); - } - - ++conns_in_use_; - return mysqlpp::ConnectionPool::grab(); - } + mysqlpp::Connection *grab() override; // Other half of in-use conn count limit - void release(const mysqlpp::Connection* pc) override { - mysqlpp::ConnectionPool::release(pc); + void release(const mysqlpp::Connection *pc) override; - if (conns_in_use_ <= 0) { - ENGINE_LOG_WARNING << "MySQLConnetionPool::release: conns_in_use_ is less than zero. conns_in_use_ = " << conns_in_use_ << std::endl; - } - else { - --conns_in_use_; - } - } + int getConnectionsInUse(); - int getConnectionsInUse() { - return conns_in_use_; - } + void set_max_idle_time(int max_idle); - void set_max_idle_time(int max_idle) { - max_idle_time_ = max_idle; - } - - std::string getDB() { - return db_; - } + std::string getDB(); protected: // Superclass overrides - mysqlpp::Connection* create() override { - // Create connection using the parameters we were passed upon - // creation. - mysqlpp::Connection* conn = new mysqlpp::Connection(); - conn->set_option(new mysqlpp::ReconnectOption(true)); - conn->connect(db_.empty() ? 0 : db_.c_str(), - server_.empty() ? 0 : server_.c_str(), - user_.empty() ? 0 : user_.c_str(), - password_.empty() ? 0 : password_.c_str(), - port_); - return conn; - } + mysqlpp::Connection *create() override; - void destroy(mysqlpp::Connection* cp) override { - // Our superclass can't know how we created the Connection, so - // it delegates destruction to us, to be safe. - delete cp; - } + void destroy(mysqlpp::Connection *cp) override; - unsigned int max_idle_time() override { - return max_idle_time_; - } + unsigned int max_idle_time() override; private: // Number of connections currently in use @@ -110,4 +70,9 @@ private: int max_pool_size_; unsigned int max_idle_time_; -}; \ No newline at end of file +}; + +} // namespace meta +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MySQLMetaImpl.cpp b/cpp/src/db/MySQLMetaImpl.cpp index 5bef070337..f32c5b65e2 100644 --- a/cpp/src/db/MySQLMetaImpl.cpp +++ b/cpp/src/db/MySQLMetaImpl.cpp @@ -169,6 +169,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: connections in use = " << mysql_connection_pool_->getConnectionsInUse(); // if (!connectionPtr->connect(dbName, serverAddress, username, password, port)) { // return Status::Error("DB connection failed: ", connectionPtr->error()); @@ -234,9 +238,6 @@ namespace meta { // } else { // return Status::DBTransactionError("Initialization Error", InitializeQuery.error()); // } - } catch (const ConnectionFailed& er) { - ENGINE_LOG_ERROR << "Failed to connect to database server" << ": " << er.what(); - return Status::DBTransactionError("Failed to connect to database server", er.what()); } catch (const BadQuery& er) { // Handle any query errors ENGINE_LOG_ERROR << "QUERY ERROR DURING INITIALIZATION" << ": " << er.what(); @@ -292,6 +293,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::DropPartitionsByDates connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -335,6 +340,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::CreateTable connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -351,7 +360,7 @@ namespace meta { ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTable: " << createTableQuery.str(); StoreQueryResult res = createTableQuery.store(); - assert(res && res.num_rows() <= 1); + if (res.num_rows() == 1) { int state = res[0]["state"]; if (TableSchema::TO_DELETE == state) { @@ -438,6 +447,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::DeleteTable connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -483,6 +496,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::DeleteTableFiles connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -529,6 +546,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::DescribeTable connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -544,7 +565,6 @@ namespace meta { res = describeTableQuery.store(); } //Scoped Connection - assert(res && res.num_rows() <= 1); if (res.num_rows() == 1) { const Row& resRow = res[0]; @@ -592,6 +612,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::HasTable connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -609,7 +633,6 @@ namespace meta { res = hasTableQuery.store(); } //Scoped Connection - assert(res && res.num_rows() == 1); int check = res[0]["check"]; has_or_not = (check == 1); @@ -639,6 +662,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::AllTables connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -726,6 +753,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::CreateTableFile connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -792,6 +823,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::FilesToIndex connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -875,6 +910,9 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::FilesToSearch connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -986,6 +1024,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::FilesToMerge connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1078,6 +1120,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::GetTableFiles connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1093,8 +1139,6 @@ namespace meta { res = getTableFileQuery.store(); } //Scoped Connection - assert(res); - TableSchema table_schema; table_schema.table_id_ = table_id; auto status = DescribeTable(table_schema); @@ -1162,6 +1206,10 @@ namespace meta { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::Archive connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1212,6 +1260,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::Size connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1226,7 +1278,6 @@ namespace meta { res = getSizeQuery.store(); } //Scoped Connection - assert(res && res.num_rows() == 1); // if (!res) { //// std::cout << "result is NULL" << std::endl; // return Status::DBTransactionError("QUERY ERROR WHEN RETRIEVING SIZE", getSizeQuery.error()); @@ -1272,6 +1323,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::DiscardFiles connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1288,7 +1343,6 @@ namespace meta { // std::cout << discardFilesQuery.str() << std::endl; StoreQueryResult res = discardFilesQuery.store(); - assert(res); if (res.num_rows() == 0) { return Status::OK(); } @@ -1350,6 +1404,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::UpdateTableFile connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1365,7 +1423,6 @@ namespace meta { StoreQueryResult res = updateTableFileQuery.store(); - assert(res && res.num_rows() <= 1); if (res.num_rows() == 1) { int state = res[0]["state"]; if (state == TableSchema::TO_DELETE) { @@ -1432,6 +1489,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::UpdateTableFiles connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1455,7 +1516,6 @@ namespace meta { StoreQueryResult res = updateTableFilesQuery.store(); - assert(res && res.num_rows() == 1); int check = res[0]["check"]; has_tables[file_schema.table_id_] = (check == 1); } @@ -1527,6 +1587,10 @@ namespace meta { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::CleanUpFilesWithTTL: clean table files: connection in use after creating ScopedConnection = " // << mysql_connection_pool_->getConnectionsInUse(); @@ -1542,8 +1606,6 @@ namespace meta { StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); - assert(res); - TableFileSchema table_file; std::vector idsToDelete; @@ -1611,6 +1673,10 @@ namespace meta { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::CleanUpFilesWithTTL: clean tables: connection in use after creating ScopedConnection = " // << mysql_connection_pool_->getConnectionsInUse(); @@ -1624,7 +1690,6 @@ namespace meta { ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUpFilesWithTTL: " << cleanUpFilesWithTTLQuery.str(); StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); - assert(res); // std::cout << res.num_rows() << std::endl; if (!res.empty()) { @@ -1677,6 +1742,10 @@ namespace meta { try { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::CleanUp: connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1690,7 +1759,7 @@ namespace meta { ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUp: " << cleanUpQuery.str(); StoreQueryResult res = cleanUpQuery.store(); - assert(res); + if (!res.empty()) { ENGINE_LOG_DEBUG << "Remove table file type as NEW"; cleanUpQuery << "DELETE FROM TableFiles WHERE file_type = " << std::to_string(TableFileSchema::NEW) << ";"; @@ -1736,6 +1805,10 @@ namespace meta { { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::Count: connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } @@ -1759,7 +1832,12 @@ namespace meta { result += size; } - assert(table_schema.dimension_ != 0); + if (table_schema.dimension_ <= 0) { + std::stringstream errorMsg; + errorMsg << "MySQLMetaImpl::Count: " << "table dimension = " << std::to_string(table_schema.dimension_) << ", table_id = " << table_id; + ENGINE_LOG_ERROR << errorMsg.str(); + return Status::Error(errorMsg.str()); + } result /= table_schema.dimension_; result /= sizeof(float); @@ -1786,6 +1864,10 @@ namespace meta { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + // if (mysql_connection_pool_->getConnectionsInUse() <= 0) { // ENGINE_LOG_WARNING << "MySQLMetaImpl::DropAll: connection in use = " << mysql_connection_pool_->getConnectionsInUse(); // } From 42b2f8f3a290ea843be8ced706d64ca2da9cac44 Mon Sep 17 00:00:00 2001 From: zhiru Date: Thu, 4 Jul 2019 14:20:59 +0800 Subject: [PATCH 32/91] update Former-commit-id: e041205d3137170f8611b083b4aded90202e4fdd --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index c0ea20e9bd..f1c58ca7ab 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -41,6 +41,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-124 - HasTable interface - MS-126 - Add more error code - MS-128 - Change default db path +- MS-152 - Delete assert in MySQLMetaImpl and change MySQLConnectionPool impl ## New Feature From a464673420553ec2f8759b64c783f1e21176c1db Mon Sep 17 00:00:00 2001 From: zhiru Date: Thu, 4 Jul 2019 15:58:28 +0800 Subject: [PATCH 33/91] fix c_str error when connecting to MySQL Former-commit-id: 055a1e26fb253783f9fdfcc0f6f8206ef29455dd --- cpp/src/db/MySQLMetaImpl.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/db/MySQLMetaImpl.cpp b/cpp/src/db/MySQLMetaImpl.cpp index f32c5b65e2..8ba7f1d3b6 100644 --- a/cpp/src/db/MySQLMetaImpl.cpp +++ b/cpp/src/db/MySQLMetaImpl.cpp @@ -144,15 +144,15 @@ namespace meta { if (dialect.find("mysql") == std::string::npos) { return Status::Error("URI's dialect is not MySQL"); } - const char* username = pieces_match[2].str().c_str(); - const char* password = pieces_match[3].str().c_str(); - const char* serverAddress = pieces_match[4].str().c_str(); + std::string username = pieces_match[2].str(); + std::string password = pieces_match[3].str(); + std::string serverAddress = pieces_match[4].str(); unsigned int port = 0; if (!pieces_match[5].str().empty()) { port = std::stoi(pieces_match[5].str()); } - const char* dbName = pieces_match[6].str().c_str(); - //std::cout << dbName << " " << serverAddress << " " << username << " " << password << " " << port << std::endl; + std::string dbName = pieces_match[6].str(); +// std::cout << dbName << " " << serverAddress << " " << username << " " << password << " " << port << std::endl; // connectionPtr->set_option(new MultiStatementsOption(true)); // connectionPtr->set_option(new mysqlpp::ReconnectOption(true)); int threadHint = std::thread::hardware_concurrency(); @@ -1753,8 +1753,8 @@ namespace meta { Query cleanUpQuery = connectionPtr->query(); cleanUpQuery << "SELECT table_name " << "FROM information_schema.tables " << - "WHERE table_schema = " << quote << mysql_connection_pool_->getDB() << quote << " " << - "AND table_name = " << quote << "TableFiles" << quote << ";"; + "WHERE table_schema = " << quote << mysql_connection_pool_->getDB() << " " << + "AND table_name = " << quote << "TableFiles" << ";"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUp: " << cleanUpQuery.str(); From fbbed892fa18a073ea6f133dab5357e5bf06d3ba Mon Sep 17 00:00:00 2001 From: zhiru Date: Thu, 4 Jul 2019 15:58:38 +0800 Subject: [PATCH 34/91] fix c_str error when connecting to MySQL Former-commit-id: 23334aabdfc983f72429f547d813c5341ca352c9 --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index f1c58ca7ab..87ff310f7f 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -28,6 +28,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-110 - Avoid huge file size - MS-148 - Disable cleanup if mode is read only - MS-149 - Fixed searching only one index file issue in distributed mode +- MS-153 - fix c_str error when connecting to MySQL ## Improvement - MS-82 - Update server startup welcome message From ea7f4e1da6bc1f3d24447c30e775c043741b2a66 Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 4 Jul 2019 16:58:40 +0800 Subject: [PATCH 35/91] add uiittest for merge result functions Former-commit-id: 7bc823973219f3b0d9b21c5fdc4d78b3adc2a44a --- cpp/CHANGELOG.md | 1 + cpp/src/db/scheduler/context/SearchContext.h | 4 +- cpp/src/db/scheduler/task/SearchTask.cpp | 215 ++++++++++--------- cpp/src/db/scheduler/task/SearchTask.h | 14 ++ cpp/unittest/db/search_test.cpp | 162 ++++++++++++++ 5 files changed, 294 insertions(+), 102 deletions(-) create mode 100644 cpp/unittest/db/search_test.cpp diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 87ff310f7f..9056f19a2e 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -8,6 +8,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement +- MS-156 - Add unittest for merge result functions ## New Feature - MS-137 - Integrate knowhere diff --git a/cpp/src/db/scheduler/context/SearchContext.h b/cpp/src/db/scheduler/context/SearchContext.h index 1997b80764..e81622eb32 100644 --- a/cpp/src/db/scheduler/context/SearchContext.h +++ b/cpp/src/db/scheduler/context/SearchContext.h @@ -32,8 +32,8 @@ public: using Id2IndexMap = std::unordered_map; const Id2IndexMap& GetIndexMap() const { return map_index_files_; } - using Id2ScoreMap = std::vector>; - using ResultSet = std::vector; + using Id2DistanceMap = std::vector>; + using ResultSet = std::vector; const ResultSet& GetResult() const { return result_; } ResultSet& GetResult() { return result_; } diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index 2bfac90e20..708bcc8708 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -13,104 +13,6 @@ namespace milvus { namespace engine { namespace { -void ClusterResult(const std::vector &output_ids, - const std::vector &output_distence, - uint64_t nq, - uint64_t topk, - SearchContext::ResultSet &result_set) { - result_set.clear(); - result_set.reserve(nq); - for (auto i = 0; i < nq; i++) { - SearchContext::Id2ScoreMap id_score; - id_score.reserve(topk); - for (auto k = 0; k < topk; k++) { - uint64_t index = i * topk + k; - if(output_ids[index] < 0) { - continue; - } - id_score.push_back(std::make_pair(output_ids[index], output_distence[index])); - } - result_set.emplace_back(id_score); - } -} - -void MergeResult(SearchContext::Id2ScoreMap &score_src, - SearchContext::Id2ScoreMap &score_target, - uint64_t topk) { - //Note: the score_src and score_target are already arranged by score in ascending order - if(score_src.empty()) { - return; - } - - if(score_target.empty()) { - score_target.swap(score_src); - return; - } - - size_t src_count = score_src.size(); - size_t target_count = score_target.size(); - SearchContext::Id2ScoreMap score_merged; - score_merged.reserve(topk); - size_t src_index = 0, target_index = 0; - while(true) { - //all score_src items are merged, if score_merged.size() still less than topk - //move items from score_target to score_merged until score_merged.size() equal topk - if(src_index >= src_count) { - for(size_t i = target_index; i < target_count && score_merged.size() < topk; ++i) { - score_merged.push_back(score_target[i]); - } - break; - } - - //all score_target items are merged, if score_merged.size() still less than topk - //move items from score_src to score_merged until score_merged.size() equal topk - if(target_index >= target_count) { - for(size_t i = src_index; i < src_count && score_merged.size() < topk; ++i) { - score_merged.push_back(score_src[i]); - } - break; - } - - //compare score, put smallest score to score_merged one by one - auto& src_pair = score_src[src_index]; - auto& target_pair = score_target[target_index]; - if(src_pair.second > target_pair.second) { - score_merged.push_back(target_pair); - target_index++; - } else { - score_merged.push_back(src_pair); - src_index++; - } - - //score_merged.size() already equal topk - if(score_merged.size() >= topk) { - break; - } - } - - score_target.swap(score_merged); -} - -void TopkResult(SearchContext::ResultSet &result_src, - uint64_t topk, - SearchContext::ResultSet &result_target) { - if (result_target.empty()) { - result_target.swap(result_src); - return; - } - - if (result_src.size() != result_target.size()) { - SERVER_LOG_ERROR << "Invalid result set"; - return; - } - - for (size_t i = 0; i < result_src.size(); i++) { - SearchContext::Id2ScoreMap &score_src = result_src[i]; - SearchContext::Id2ScoreMap &score_target = result_target[i]; - MergeResult(score_src, score_target, topk); - } -} - void CollectDurationMetrics(int index_type, double total_time) { switch(index_type) { case meta::TableFileSchema::RAW: { @@ -165,11 +67,11 @@ std::shared_ptr SearchTask::Execute() { //step 3: cluster result SearchContext::ResultSet result_set; auto spec_k = index_engine_->Count() < context->topk() ? index_engine_->Count() : context->topk(); - ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set); + SearchTask::ClusterResult(output_ids, output_distence, context->nq(), spec_k, result_set); rc.Record("cluster result"); //step 4: pick up topk result - TopkResult(result_set, inner_k, context->GetResult()); + SearchTask::TopkResult(result_set, inner_k, context->GetResult()); rc.Record("reduce topk"); } catch (std::exception& ex) { @@ -191,6 +93,119 @@ std::shared_ptr SearchTask::Execute() { return nullptr; } +Status SearchTask::ClusterResult(const std::vector &output_ids, + const std::vector &output_distence, + uint64_t nq, + uint64_t topk, + SearchContext::ResultSet &result_set) { + if(output_ids.size() != nq*topk || output_distence.size() != nq*topk) { + std::string msg = "Invalid id array size: " + std::to_string(output_ids.size()) + + " distance array size: " + std::to_string(output_distence.size()); + SERVER_LOG_ERROR << msg; + return Status::Error(msg); + } + + result_set.clear(); + result_set.reserve(nq); + for (auto i = 0; i < nq; i++) { + SearchContext::Id2DistanceMap id_distance; + id_distance.reserve(topk); + for (auto k = 0; k < topk; k++) { + uint64_t index = i * topk + k; + if(output_ids[index] < 0) { + continue; + } + id_distance.push_back(std::make_pair(output_ids[index], output_distence[index])); + } + result_set.emplace_back(id_distance); + } + + return Status::OK(); +} + +Status SearchTask::MergeResult(SearchContext::Id2DistanceMap &distance_src, + SearchContext::Id2DistanceMap &distance_target, + uint64_t topk) { + //Note: the score_src and score_target are already arranged by score in ascending order + if(distance_src.empty()) { + SERVER_LOG_WARNING << "Empty distance source array"; + return Status::OK(); + } + + if(distance_target.empty()) { + distance_target.swap(distance_src); + return Status::OK(); + } + + size_t src_count = distance_src.size(); + size_t target_count = distance_target.size(); + SearchContext::Id2DistanceMap distance_merged; + distance_merged.reserve(topk); + size_t src_index = 0, target_index = 0; + while(true) { + //all score_src items are merged, if score_merged.size() still less than topk + //move items from score_target to score_merged until score_merged.size() equal topk + if(src_index >= src_count) { + for(size_t i = target_index; i < target_count && distance_merged.size() < topk; ++i) { + distance_merged.push_back(distance_target[i]); + } + break; + } + + //all score_target items are merged, if score_merged.size() still less than topk + //move items from score_src to score_merged until score_merged.size() equal topk + if(target_index >= target_count) { + for(size_t i = src_index; i < src_count && distance_merged.size() < topk; ++i) { + distance_merged.push_back(distance_src[i]); + } + break; + } + + //compare score, put smallest score to score_merged one by one + auto& src_pair = distance_src[src_index]; + auto& target_pair = distance_target[target_index]; + if(src_pair.second > target_pair.second) { + distance_merged.push_back(target_pair); + target_index++; + } else { + distance_merged.push_back(src_pair); + src_index++; + } + + //score_merged.size() already equal topk + if(distance_merged.size() >= topk) { + break; + } + } + + distance_target.swap(distance_merged); + + return Status::OK(); +} + +Status SearchTask::TopkResult(SearchContext::ResultSet &result_src, + uint64_t topk, + SearchContext::ResultSet &result_target) { + if (result_target.empty()) { + result_target.swap(result_src); + return Status::OK(); + } + + if (result_src.size() != result_target.size()) { + std::string msg = "Invalid result set size"; + SERVER_LOG_ERROR << msg; + return Status::Error(msg); + } + + for (size_t i = 0; i < result_src.size(); i++) { + SearchContext::Id2DistanceMap &score_src = result_src[i]; + SearchContext::Id2DistanceMap &score_target = result_target[i]; + SearchTask::MergeResult(score_src, score_target, topk); + } + + return Status::OK(); +} + } } } diff --git a/cpp/src/db/scheduler/task/SearchTask.h b/cpp/src/db/scheduler/task/SearchTask.h index 0b3a236ce4..e4f0d872b1 100644 --- a/cpp/src/db/scheduler/task/SearchTask.h +++ b/cpp/src/db/scheduler/task/SearchTask.h @@ -19,6 +19,20 @@ public: virtual std::shared_ptr Execute() override; + static Status ClusterResult(const std::vector &output_ids, + const std::vector &output_distence, + uint64_t nq, + uint64_t topk, + SearchContext::ResultSet &result_set); + + static Status MergeResult(SearchContext::Id2DistanceMap &distance_src, + SearchContext::Id2DistanceMap &distance_target, + uint64_t topk); + + static Status TopkResult(SearchContext::ResultSet &result_src, + uint64_t topk, + SearchContext::ResultSet &result_target); + public: size_t index_id_ = 0; int index_type_ = 0; //for metrics diff --git a/cpp/unittest/db/search_test.cpp b/cpp/unittest/db/search_test.cpp new file mode 100644 index 0000000000..db10bcbadf --- /dev/null +++ b/cpp/unittest/db/search_test.cpp @@ -0,0 +1,162 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include + +#include "db/scheduler/task/SearchTask.h" + +#include + +using namespace zilliz::milvus; + +namespace { + +static constexpr uint64_t NQ = 15; +static constexpr uint64_t TOP_K = 64; + +void BuildResult(uint64_t nq, + uint64_t top_k, + std::vector &output_ids, + std::vector &output_distence) { + output_ids.clear(); + output_ids.resize(nq*top_k); + output_distence.clear(); + output_distence.resize(nq*top_k); + + for(uint64_t i = 0; i < nq; i++) { + for(uint64_t j = 0; j < top_k; j++) { + output_ids[i * top_k + j] = (long)(drand48()*100000); + output_distence[i * top_k + j] = j + drand48(); + } + } +} + +void CheckResult(const engine::SearchContext::Id2DistanceMap& src_1, + const engine::SearchContext::Id2DistanceMap& src_2, + const engine::SearchContext::Id2DistanceMap& target) { + for(uint64_t i = 0; i < target.size() - 1; i++) { + ASSERT_LE(target[i].second, target[i + 1].second); + } + + using ID2DistMap = std::map; + ID2DistMap src_map_1, src_map_2; + for(const auto& pair : src_1) { + src_map_1.insert(pair); + } + for(const auto& pair : src_2) { + src_map_2.insert(pair); + } + + for(const auto& pair : target) { + ASSERT_TRUE(src_map_1.find(pair.first) != src_map_1.end() || src_map_2.find(pair.first) != src_map_2.end()); + + float dist = src_map_1.find(pair.first) != src_map_1.end() ? src_map_1[pair.first] : src_map_2[pair.first]; + ASSERT_LT(fabs(pair.second - dist), std::numeric_limits::epsilon()); + } +} + +} + +TEST(DBSearchTest, TOPK_TEST) { + std::vector target_ids; + std::vector target_distence; + engine::SearchContext::ResultSet src_result; + auto status = engine::SearchTask::ClusterResult(target_ids, target_distence, NQ, TOP_K, src_result); + ASSERT_FALSE(status.ok()); + ASSERT_TRUE(src_result.empty()); + + BuildResult(NQ, TOP_K, target_ids, target_distence); + status = engine::SearchTask::ClusterResult(target_ids, target_distence, NQ, TOP_K, src_result); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(src_result.size(), NQ); + + engine::SearchContext::ResultSet target_result; + status = engine::SearchTask::TopkResult(target_result, TOP_K, target_result); + ASSERT_TRUE(status.ok()); + + status = engine::SearchTask::TopkResult(target_result, TOP_K, src_result); + ASSERT_FALSE(status.ok()); + + status = engine::SearchTask::TopkResult(src_result, TOP_K, target_result); + ASSERT_TRUE(status.ok()); + ASSERT_TRUE(src_result.empty()); + ASSERT_EQ(target_result.size(), NQ); + + std::vector src_ids; + std::vector src_distence; + uint64_t wrong_topk = TOP_K - 10; + BuildResult(NQ, wrong_topk, src_ids, src_distence); + + status = engine::SearchTask::ClusterResult(src_ids, src_distence, NQ, wrong_topk, src_result); + ASSERT_TRUE(status.ok()); + + status = engine::SearchTask::TopkResult(src_result, TOP_K, target_result); + ASSERT_TRUE(status.ok()); + for(uint64_t i = 0; i < NQ; i++) { + ASSERT_EQ(target_result[i].size(), TOP_K); + } + + wrong_topk = TOP_K + 10; + BuildResult(NQ, wrong_topk, src_ids, src_distence); + + status = engine::SearchTask::TopkResult(src_result, TOP_K, target_result); + ASSERT_TRUE(status.ok()); + for(uint64_t i = 0; i < NQ; i++) { + ASSERT_EQ(target_result[i].size(), TOP_K); + } +} + +TEST(DBSearchTest, MERGE_TEST) { + std::vector target_ids; + std::vector target_distence; + std::vector src_ids; + std::vector src_distence; + engine::SearchContext::ResultSet src_result, target_result; + + uint64_t src_count = 5, target_count = 8; + BuildResult(1, src_count, src_ids, src_distence); + BuildResult(1, target_count, target_ids, target_distence); + auto status = engine::SearchTask::ClusterResult(src_ids, src_distence, 1, src_count, src_result); + ASSERT_TRUE(status.ok()); + status = engine::SearchTask::ClusterResult(target_ids, target_distence, 1, target_count, target_result); + ASSERT_TRUE(status.ok()); + + { + engine::SearchContext::Id2DistanceMap src = src_result[0]; + engine::SearchContext::Id2DistanceMap target = target_result[0]; + status = engine::SearchTask::MergeResult(src, target, 10); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(target.size(), 10); + CheckResult(src_result[0], target_result[0], target); + } + + { + engine::SearchContext::Id2DistanceMap src = src_result[0]; + engine::SearchContext::Id2DistanceMap target; + status = engine::SearchTask::MergeResult(src, target, 10); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(target.size(), src_count); + ASSERT_TRUE(src.empty()); + CheckResult(src_result[0], target_result[0], target); + } + + { + engine::SearchContext::Id2DistanceMap src = src_result[0]; + engine::SearchContext::Id2DistanceMap target = target_result[0]; + status = engine::SearchTask::MergeResult(src, target, 30); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(target.size(), src_count + target_count); + CheckResult(src_result[0], target_result[0], target); + } + + { + engine::SearchContext::Id2DistanceMap target = src_result[0]; + engine::SearchContext::Id2DistanceMap src = target_result[0]; + status = engine::SearchTask::MergeResult(src, target, 30); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(target.size(), src_count + target_count); + CheckResult(src_result[0], target_result[0], target); + } +} \ No newline at end of file From c383b83215d2447220df58f09482a553beb83abf Mon Sep 17 00:00:00 2001 From: zhiru Date: Thu, 4 Jul 2019 17:15:38 +0800 Subject: [PATCH 36/91] fix changelog Former-commit-id: d4ece4b4807fee38fe17486d0775d870d3fd2bfc --- cpp/CHANGELOG.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 9056f19a2e..c7d983bd01 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -7,9 +7,15 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug +- MS-148 - Disable cleanup if mode is read only +- MS-149 - Fixed searching only one index file issue in distributed mode +- MS-153 - fix c_str error when connecting to MySQL + ## Improvement - MS-156 - Add unittest for merge result functions +- MS-152 - Delete assert in MySQLMetaImpl and change MySQLConnectionPool impl + ## New Feature - MS-137 - Integrate knowhere @@ -27,9 +33,6 @@ Please mark all change in change log and use the ticket from JIRA. - MS-90 - Fix arch match incorrect on ARM - MS-99 - Fix compilation bug - MS-110 - Avoid huge file size -- MS-148 - Disable cleanup if mode is read only -- MS-149 - Fixed searching only one index file issue in distributed mode -- MS-153 - fix c_str error when connecting to MySQL ## Improvement - MS-82 - Update server startup welcome message @@ -43,7 +46,6 @@ Please mark all change in change log and use the ticket from JIRA. - MS-124 - HasTable interface - MS-126 - Add more error code - MS-128 - Change default db path -- MS-152 - Delete assert in MySQLMetaImpl and change MySQLConnectionPool impl ## New Feature From 2c3128ea9f2de1502d4d1b14fa61e10b75e0d909 Mon Sep 17 00:00:00 2001 From: zhiru Date: Thu, 4 Jul 2019 17:17:09 +0800 Subject: [PATCH 37/91] fix changelog Former-commit-id: 740326a44c2884f3ba15173c59414bc11e055dc4 --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index c7d983bd01..5168b9549b 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -10,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-148 - Disable cleanup if mode is read only - MS-149 - Fixed searching only one index file issue in distributed mode - MS-153 - fix c_str error when connecting to MySQL +- MS-157 - fix changelog ## Improvement - MS-156 - Add unittest for merge result functions From b0e1f1b3f1ecf858a93f38f58b99a2a43f82fa68 Mon Sep 17 00:00:00 2001 From: zhiru Date: Fri, 5 Jul 2019 15:03:40 +0800 Subject: [PATCH 38/91] add mem impl Former-commit-id: 8aeddae834ddd1289a9ab574bdb050e3e5377e99 --- cpp/src/db/Constants.h | 20 ++++ cpp/src/db/MemTable.cpp | 51 ++++++++++ cpp/src/db/MemTable.h | 40 ++++++++ cpp/src/db/MemTableFile.cpp | 66 +++++++++++++ cpp/src/db/MemTableFile.h | 44 +++++++++ cpp/src/db/VectorSource.cpp | 60 +++++++++++ cpp/src/db/VectorSource.h | 41 ++++++++ cpp/unittest/db/mem_test.cpp | 187 +++++++++++++++++++++++++++++++++++ 8 files changed, 509 insertions(+) create mode 100644 cpp/src/db/Constants.h create mode 100644 cpp/src/db/MemTable.cpp create mode 100644 cpp/src/db/MemTable.h create mode 100644 cpp/src/db/MemTableFile.cpp create mode 100644 cpp/src/db/MemTableFile.h create mode 100644 cpp/src/db/VectorSource.cpp create mode 100644 cpp/src/db/VectorSource.h create mode 100644 cpp/unittest/db/mem_test.cpp diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h new file mode 100644 index 0000000000..2bb2e0a064 --- /dev/null +++ b/cpp/src/db/Constants.h @@ -0,0 +1,20 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +namespace zilliz { +namespace milvus { +namespace engine { + +const size_t K = 1024UL; +const size_t M = K*K; +const size_t MAX_TABLE_FILE_MEM = 128 * M; + +const int VECTOR_TYPE_SIZE = sizeof(float); + +} // namespace engine +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp new file mode 100644 index 0000000000..032d479999 --- /dev/null +++ b/cpp/src/db/MemTable.cpp @@ -0,0 +1,51 @@ +#include "MemTable.h" +#include "Log.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +MemTable::MemTable(const std::string& table_id, + const std::shared_ptr& meta) : + table_id_(table_id), + meta_(meta) { + +} + +Status MemTable::Add(VectorSource::Ptr& source) { + while (!source->AllAdded()) { + MemTableFile::Ptr currentMemTableFile; + if (!mem_table_file_stack_.empty()) { + currentMemTableFile = mem_table_file_stack_.top(); + } + Status status; + if (mem_table_file_stack_.empty() || currentMemTableFile->isFull()) { + MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_); + status = newMemTableFile->Add(source); + if (status.ok()) { + mem_table_file_stack_.push(newMemTableFile); + } + } + else { + status = currentMemTableFile->Add(source); + } + if (!status.ok()) { + std::string errMsg = "MemTable::Add failed: " + status.ToString(); + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + } + return Status::OK(); +} + +void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { + mem_table_file = mem_table_file_stack_.top(); +} + +size_t MemTable::GetStackSize() { + return mem_table_file_stack_.size(); +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h new file mode 100644 index 0000000000..b9fe4147d8 --- /dev/null +++ b/cpp/src/db/MemTable.h @@ -0,0 +1,40 @@ +#pragma once + +#include "Status.h" +#include "MemTableFile.h" +#include "VectorSource.h" + +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +class MemTable { + +public: + + using Ptr = std::shared_ptr; + using MemTableFileStack = std::stack; + using MetaPtr = meta::Meta::Ptr; + + MemTable(const std::string& table_id, const std::shared_ptr& meta); + + Status Add(VectorSource::Ptr& source); + + void GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file); + + size_t GetStackSize(); + +private: + const std::string table_id_; + + MemTableFileStack mem_table_file_stack_; + + MetaPtr meta_; + +}; //MemTable + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp new file mode 100644 index 0000000000..26bc0d38e9 --- /dev/null +++ b/cpp/src/db/MemTableFile.cpp @@ -0,0 +1,66 @@ +#include "MemTableFile.h" +#include "Constants.h" +#include "Log.h" + +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +MemTableFile::MemTableFile(const std::string& table_id, + const std::shared_ptr& meta) : + table_id_(table_id), + meta_(meta) { + + current_mem_ = 0; + CreateTableFile(); +} + +Status MemTableFile::CreateTableFile() { + + meta::TableFileSchema table_file_schema; + table_file_schema.table_id_ = table_id_; + auto status = meta_->CreateTableFile(table_file_schema); + if (status.ok()) { + table_file_schema_ = table_file_schema; + } + else { + std::string errMsg = "MemTableFile::CreateTableFile failed: " + status.ToString(); + ENGINE_LOG_ERROR << errMsg; + } + return status; +} + +Status MemTableFile::Add(const VectorSource::Ptr& source) { + + size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + size_t memLeft = GetMemLeft(); + if (memLeft >= singleVectorMemSize) { + size_t numVectorsToAdd = std::ceil(memLeft / singleVectorMemSize); + size_t numVectorsAdded; + auto status = source->Add(table_file_schema_, numVectorsToAdd, numVectorsAdded); + if (status.ok()) { + current_mem_ += (numVectorsAdded * singleVectorMemSize); + } + return status; + } + return Status::OK(); +} + +size_t MemTableFile::GetCurrentMem() { + return current_mem_; +} + +size_t MemTableFile::GetMemLeft() { + return (MAX_TABLE_FILE_MEM - current_mem_); +} + +bool MemTableFile::isFull() { + size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + return (GetMemLeft() < singleVectorMemSize); +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h new file mode 100644 index 0000000000..1efe4c0bfe --- /dev/null +++ b/cpp/src/db/MemTableFile.h @@ -0,0 +1,44 @@ +#pragma once + +#include "Status.h" +#include "Meta.h" +#include "VectorSource.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +class MemTableFile { + +public: + + using Ptr = std::shared_ptr; + using MetaPtr = meta::Meta::Ptr; + + MemTableFile(const std::string& table_id, const std::shared_ptr& meta); + + Status Add(const VectorSource::Ptr& source); + + size_t GetCurrentMem(); + + size_t GetMemLeft(); + + bool isFull(); + +private: + + Status CreateTableFile(); + + const std::string table_id_; + + meta::TableFileSchema table_file_schema_; + + MetaPtr meta_; + + size_t current_mem_; + +}; //MemTableFile + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp new file mode 100644 index 0000000000..dff5423c6f --- /dev/null +++ b/cpp/src/db/VectorSource.cpp @@ -0,0 +1,60 @@ +#include "VectorSource.h" +#include "ExecutionEngine.h" +#include "EngineFactory.h" +#include "Log.h" + +namespace zilliz { +namespace milvus { +namespace engine { + + +VectorSource::VectorSource(const size_t &n, + const float *vectors) : + n_(n), + vectors_(vectors), + id_generator_(new SimpleIDGenerator()) { + current_num_vectors_added = 0; +} + +Status VectorSource::Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added) { + + if (table_file_schema.dimension_ <= 0) { + std::string errMsg = "VectorSource::Add: table_file_schema dimension = " + + std::to_string(table_file_schema.dimension_) + ", table_id = " + table_file_schema.table_id_; + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + ExecutionEnginePtr engine = EngineFactory::Build(table_file_schema.dimension_, + table_file_schema.location_, + (EngineType)table_file_schema.engine_type_); + + num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; + IDNumbers vector_ids_to_add; + id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); + Status status = engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); + if (status.ok()) { + current_num_vectors_added += num_vectors_added; + vector_ids_.insert(vector_ids_.end(), vector_ids_to_add.begin(), vector_ids_to_add.end()); + } + else { + ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); + } + + return status; +} + +size_t VectorSource::GetNumVectorsAdded() { + return current_num_vectors_added; +} + +bool VectorSource::AllAdded() { + return (current_num_vectors_added == n_); +} + +IDNumbers VectorSource::GetVectorIds() { + return vector_ids_; +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h new file mode 100644 index 0000000000..170f3634cf --- /dev/null +++ b/cpp/src/db/VectorSource.h @@ -0,0 +1,41 @@ +#pragma once + +#include "Status.h" +#include "Meta.h" +#include "IDGenerator.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +class VectorSource { + +public: + + using Ptr = std::shared_ptr; + + VectorSource(const size_t& n, const float* vectors); + + Status Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added); + + size_t GetNumVectorsAdded(); + + bool AllAdded(); + + IDNumbers GetVectorIds(); + +private: + + const size_t n_; + const float* vectors_; + IDNumbers vector_ids_; + + size_t current_num_vectors_added; + + IDGenerator* id_generator_; + +}; //VectorSource + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp new file mode 100644 index 0000000000..8418b9cd2d --- /dev/null +++ b/cpp/unittest/db/mem_test.cpp @@ -0,0 +1,187 @@ +#include "gtest/gtest.h" + +#include "db/VectorSource.h" +#include "db/MemTableFile.h" +#include "db/MemTable.h" +#include "utils.h" +#include "db/Factories.h" +#include "db/Constants.h" + +using namespace zilliz::milvus; + +namespace { + + static const std::string TABLE_NAME = "test_group"; + static constexpr int64_t TABLE_DIM = 256; + static constexpr int64_t VECTOR_COUNT = 250000; + static constexpr int64_t INSERT_LOOP = 10000; + + engine::meta::TableSchema BuildTableSchema() { + engine::meta::TableSchema table_info; + table_info.dimension_ = TABLE_DIM; + table_info.table_id_ = TABLE_NAME; + table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP; + return table_info; + } + + void BuildVectors(int64_t n, std::vector& vectors) { + vectors.clear(); + vectors.resize(n*TABLE_DIM); + float* data = vectors.data(); + for(int i = 0; i < n; i++) { + for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); + data[TABLE_DIM * i] += i / 2000.; + } + } +} + +TEST(MEM_TEST, VECTOR_SOURCE_TEST) { + + std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + + engine::meta::TableSchema table_schema = BuildTableSchema(); + auto status = impl_->CreateTable(table_schema); + ASSERT_TRUE(status.ok()); + + engine::meta::TableFileSchema table_file_schema; + table_file_schema.table_id_ = TABLE_NAME; + status = impl_->CreateTableFile(table_file_schema); + ASSERT_TRUE(status.ok()); + + int64_t n = 100; + std::vector vectors; + BuildVectors(n, vectors); + + engine::VectorSource source(n, vectors.data()); + + size_t num_vectors_added; + status = source.Add(table_file_schema, 50, num_vectors_added); + ASSERT_TRUE(status.ok()); + + ASSERT_EQ(num_vectors_added, 50); + + engine::IDNumbers vector_ids = source.GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 50); + + status = source.Add(table_file_schema, 60, num_vectors_added); + ASSERT_TRUE(status.ok()); + + ASSERT_EQ(num_vectors_added, 50); + + vector_ids = source.GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 100); + +// for (auto& id : vector_ids) { +// std::cout << id << std::endl; +// } + + status = impl_->DropAll(); + ASSERT_TRUE(status.ok()); +} + +TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { + + std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + + engine::meta::TableSchema table_schema = BuildTableSchema(); + auto status = impl_->CreateTable(table_schema); + ASSERT_TRUE(status.ok()); + + engine::MemTableFile memTableFile(TABLE_NAME, impl_); + + int64_t n_100 = 100; + std::vector vectors_100; + BuildVectors(n_100, vectors_100); + + engine::VectorSource::Ptr source = std::make_shared(n_100, vectors_100.data()); + + status = memTableFile.Add(source); + ASSERT_TRUE(status.ok()); + +// std::cout << memTableFile.GetCurrentMem() << " " << memTableFile.GetMemLeft() << std::endl; + + engine::IDNumbers vector_ids = source->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 100); + + size_t singleVectorMem = sizeof(float) * TABLE_DIM; + ASSERT_EQ(memTableFile.GetCurrentMem(), n_100 * singleVectorMem); + + int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; + std::vector vectors_128M; + BuildVectors(n_max, vectors_128M); + + engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); + status = memTableFile.Add(source_128M); + + vector_ids = source_128M->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), n_max - n_100); + + ASSERT_TRUE(memTableFile.isFull()); + + status = impl_->DropAll(); + ASSERT_TRUE(status.ok()); +} + +TEST(MEM_TEST, MEM_TABLE_TEST) { + + std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + + engine::meta::TableSchema table_schema = BuildTableSchema(); + auto status = impl_->CreateTable(table_schema); + ASSERT_TRUE(status.ok()); + + int64_t n_100 = 100; + std::vector vectors_100; + BuildVectors(n_100, vectors_100); + + engine::VectorSource::Ptr source_100 = std::make_shared(n_100, vectors_100.data()); + + engine::MemTable memTable(TABLE_NAME, impl_); + + status = memTable.Add(source_100); + ASSERT_TRUE(status.ok()); + + engine::IDNumbers vector_ids = source_100->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 100); + + engine::MemTableFile::Ptr memTableFile; + memTable.GetCurrentMemTableFile(memTableFile); + size_t singleVectorMem = sizeof(float) * TABLE_DIM; + ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + + int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; + std::vector vectors_128M; + BuildVectors(n_max, vectors_128M); + + engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); + status = memTable.Add(source_128M); + ASSERT_TRUE(status.ok()); + + vector_ids = source_128M->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), n_max); + + memTable.GetCurrentMemTableFile(memTableFile); + ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + + ASSERT_EQ(memTable.GetStackSize(), 2); + + int64_t n_1G = 1024000; + std::vector vectors_1G; + BuildVectors(n_1G, vectors_1G); + + engine::VectorSource::Ptr source_1G = std::make_shared(n_1G, vectors_1G.data()); + + status = memTable.Add(source_1G); + ASSERT_TRUE(status.ok()); + + vector_ids = source_1G->GetVectorIds(); + ASSERT_EQ(vector_ids.size(), n_1G); + + int expectedStackSize = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); + ASSERT_EQ(memTable.GetStackSize(), expectedStackSize); + + status = impl_->DropAll(); + ASSERT_TRUE(status.ok()); +} + + From 8b05eec64369aac64b6732e09b903c9903e7b8d4 Mon Sep 17 00:00:00 2001 From: zhiru Date: Fri, 5 Jul 2019 15:57:49 +0800 Subject: [PATCH 39/91] update Former-commit-id: 85c6aeb5f2a63fda4450232cce723f16c98aece6 --- cpp/src/db/MemTableFile.cpp | 10 ++++++++-- cpp/src/db/MemTableFile.h | 3 +++ cpp/src/db/VectorSource.cpp | 10 +++++----- cpp/src/db/VectorSource.h | 8 +++++++- cpp/unittest/db/mem_test.cpp | 8 ++++++-- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp index 26bc0d38e9..58b76ab834 100644 --- a/cpp/src/db/MemTableFile.cpp +++ b/cpp/src/db/MemTableFile.cpp @@ -1,6 +1,7 @@ #include "MemTableFile.h" #include "Constants.h" #include "Log.h" +#include "EngineFactory.h" #include @@ -14,7 +15,12 @@ MemTableFile::MemTableFile(const std::string& table_id, meta_(meta) { current_mem_ = 0; - CreateTableFile(); + auto status = CreateTableFile(); + if (status.ok()) { + execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_, + table_file_schema_.location_, + (EngineType)table_file_schema_.engine_type_); + } } Status MemTableFile::CreateTableFile() { @@ -39,7 +45,7 @@ Status MemTableFile::Add(const VectorSource::Ptr& source) { if (memLeft >= singleVectorMemSize) { size_t numVectorsToAdd = std::ceil(memLeft / singleVectorMemSize); size_t numVectorsAdded; - auto status = source->Add(table_file_schema_, numVectorsToAdd, numVectorsAdded); + auto status = source->Add(execution_engine_, table_file_schema_, numVectorsToAdd, numVectorsAdded); if (status.ok()) { current_mem_ += (numVectorsAdded * singleVectorMemSize); } diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h index 1efe4c0bfe..04f30178ea 100644 --- a/cpp/src/db/MemTableFile.h +++ b/cpp/src/db/MemTableFile.h @@ -3,6 +3,7 @@ #include "Status.h" #include "Meta.h" #include "VectorSource.h" +#include "ExecutionEngine.h" namespace zilliz { namespace milvus { @@ -37,6 +38,8 @@ private: size_t current_mem_; + ExecutionEnginePtr execution_engine_; + }; //MemTableFile } // namespace engine diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index dff5423c6f..f7cef994fa 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -16,7 +16,10 @@ VectorSource::VectorSource(const size_t &n, current_num_vectors_added = 0; } -Status VectorSource::Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added) { +Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, + const meta::TableFileSchema& table_file_schema, + const size_t& num_vectors_to_add, + size_t& num_vectors_added) { if (table_file_schema.dimension_ <= 0) { std::string errMsg = "VectorSource::Add: table_file_schema dimension = " + @@ -24,14 +27,11 @@ Status VectorSource::Add(const meta::TableFileSchema& table_file_schema, const s ENGINE_LOG_ERROR << errMsg; return Status::Error(errMsg); } - ExecutionEnginePtr engine = EngineFactory::Build(table_file_schema.dimension_, - table_file_schema.location_, - (EngineType)table_file_schema.engine_type_); num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); - Status status = engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); + Status status = execution_engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); if (status.ok()) { current_num_vectors_added += num_vectors_added; vector_ids_.insert(vector_ids_.end(), vector_ids_to_add.begin(), vector_ids_to_add.end()); diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h index 170f3634cf..597eee4ad8 100644 --- a/cpp/src/db/VectorSource.h +++ b/cpp/src/db/VectorSource.h @@ -3,6 +3,7 @@ #include "Status.h" #include "Meta.h" #include "IDGenerator.h" +#include "ExecutionEngine.h" namespace zilliz { namespace milvus { @@ -16,7 +17,10 @@ public: VectorSource(const size_t& n, const float* vectors); - Status Add(const meta::TableFileSchema& table_file_schema, const size_t& num_vectors_to_add, size_t& num_vectors_added); + Status Add(const ExecutionEnginePtr& execution_engine, + const meta::TableFileSchema& table_file_schema, + const size_t& num_vectors_to_add, + size_t& num_vectors_added); size_t GetNumVectorsAdded(); @@ -24,6 +28,8 @@ public: IDNumbers GetVectorIds(); +// Status Serialize(); + private: const size_t n_; diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 8418b9cd2d..111914f8a9 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -6,6 +6,7 @@ #include "utils.h" #include "db/Factories.h" #include "db/Constants.h" +#include "db/EngineFactory.h" using namespace zilliz::milvus; @@ -55,7 +56,10 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { engine::VectorSource source(n, vectors.data()); size_t num_vectors_added; - status = source.Add(table_file_schema, 50, num_vectors_added); + engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_, + table_file_schema.location_, + (engine::EngineType)table_file_schema.engine_type_); + status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added); ASSERT_TRUE(status.ok()); ASSERT_EQ(num_vectors_added, 50); @@ -63,7 +67,7 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { engine::IDNumbers vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 50); - status = source.Add(table_file_schema, 60, num_vectors_added); + status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added); ASSERT_TRUE(status.ok()); ASSERT_EQ(num_vectors_added, 50); From 97d129fc38f0b62db2c722b9be838860d18379fd Mon Sep 17 00:00:00 2001 From: zhiru Date: Fri, 5 Jul 2019 16:46:15 +0800 Subject: [PATCH 40/91] Implemented add and serialize Former-commit-id: 02d0ec1da6c441ff0c05d9933a487886dfbd0f96 --- cpp/src/db/MemTable.cpp | 32 +++++++++++++++++++-------- cpp/src/db/MemTable.h | 10 ++++++--- cpp/src/db/MemTableFile.cpp | 42 +++++++++++++++++++++++++++++++++--- cpp/src/db/MemTableFile.h | 8 +++++-- cpp/src/db/VectorSource.cpp | 12 +++++------ cpp/src/db/VectorSource.h | 2 -- cpp/unittest/db/mem_test.cpp | 11 +++++++--- 7 files changed, 89 insertions(+), 28 deletions(-) diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index 032d479999..86554695c8 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -6,24 +6,26 @@ namespace milvus { namespace engine { MemTable::MemTable(const std::string& table_id, - const std::shared_ptr& meta) : + const std::shared_ptr& meta, + const Options& options) : table_id_(table_id), - meta_(meta) { + meta_(meta), + options_(options) { } Status MemTable::Add(VectorSource::Ptr& source) { while (!source->AllAdded()) { MemTableFile::Ptr currentMemTableFile; - if (!mem_table_file_stack_.empty()) { - currentMemTableFile = mem_table_file_stack_.top(); + if (!mem_table_file_list_.empty()) { + currentMemTableFile = mem_table_file_list_.back(); } Status status; - if (mem_table_file_stack_.empty() || currentMemTableFile->isFull()) { - MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_); + if (mem_table_file_list_.empty() || currentMemTableFile->IsFull()) { + MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_, options_); status = newMemTableFile->Add(source); if (status.ok()) { - mem_table_file_stack_.push(newMemTableFile); + mem_table_file_list_.emplace_back(newMemTableFile); } } else { @@ -39,11 +41,23 @@ Status MemTable::Add(VectorSource::Ptr& source) { } void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { - mem_table_file = mem_table_file_stack_.top(); + mem_table_file = mem_table_file_list_.back(); } size_t MemTable::GetStackSize() { - return mem_table_file_stack_.size(); + return mem_table_file_list_.size(); +} + +Status MemTable::Serialize() { + for (auto& memTableFile : mem_table_file_list_) { + auto status = memTableFile->Serialize(); + if (!status.ok()) { + std::string errMsg = "MemTable::Serialize failed: " + status.ToString(); + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + } + return Status::OK(); } } // namespace engine diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index b9fe4147d8..d5c7cc9e85 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -15,10 +15,10 @@ class MemTable { public: using Ptr = std::shared_ptr; - using MemTableFileStack = std::stack; + using MemTableFileList = std::vector; using MetaPtr = meta::Meta::Ptr; - MemTable(const std::string& table_id, const std::shared_ptr& meta); + MemTable(const std::string& table_id, const std::shared_ptr& meta, const Options& options); Status Add(VectorSource::Ptr& source); @@ -26,13 +26,17 @@ public: size_t GetStackSize(); + Status Serialize(); + private: const std::string table_id_; - MemTableFileStack mem_table_file_stack_; + MemTableFileList mem_table_file_list_; MetaPtr meta_; + Options options_; + }; //MemTable } // namespace engine diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp index 58b76ab834..0ff91de00b 100644 --- a/cpp/src/db/MemTableFile.cpp +++ b/cpp/src/db/MemTableFile.cpp @@ -2,6 +2,7 @@ #include "Constants.h" #include "Log.h" #include "EngineFactory.h" +#include "metrics/Metrics.h" #include @@ -10,9 +11,11 @@ namespace milvus { namespace engine { MemTableFile::MemTableFile(const std::string& table_id, - const std::shared_ptr& meta) : + const std::shared_ptr& meta, + const Options& options) : table_id_(table_id), - meta_(meta) { + meta_(meta), + options_(options) { current_mem_ = 0; auto status = CreateTableFile(); @@ -40,6 +43,13 @@ Status MemTableFile::CreateTableFile() { Status MemTableFile::Add(const VectorSource::Ptr& source) { + if (table_file_schema_.dimension_ <= 0) { + std::string errMsg = "MemTableFile::Add: table_file_schema dimension = " + + std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_; + ENGINE_LOG_ERROR << errMsg; + return Status::Error(errMsg); + } + size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; size_t memLeft = GetMemLeft(); if (memLeft >= singleVectorMemSize) { @@ -62,11 +72,37 @@ size_t MemTableFile::GetMemLeft() { return (MAX_TABLE_FILE_MEM - current_mem_); } -bool MemTableFile::isFull() { +bool MemTableFile::IsFull() { size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; return (GetMemLeft() < singleVectorMemSize); } +Status MemTableFile::Serialize() { + + auto start_time = METRICS_NOW_TIME; + + auto size = GetCurrentMem(); + + execution_engine_->Serialize(); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + table_file_schema_.size_ = size; + + server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double)size/total_time); + + table_file_schema_.file_type_ = (size >= options_.index_trigger_size) ? + meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; + + auto status = meta_->UpdateTableFile(table_file_schema_); + + LOG(DEBUG) << "New " << ((table_file_schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index") + << " file " << table_file_schema_.file_id_ << " of size " << (double)size / (double)M << " M"; + + execution_engine_->Cache(); + + return status; +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h index 04f30178ea..1be0ae78ba 100644 --- a/cpp/src/db/MemTableFile.h +++ b/cpp/src/db/MemTableFile.h @@ -16,7 +16,7 @@ public: using Ptr = std::shared_ptr; using MetaPtr = meta::Meta::Ptr; - MemTableFile(const std::string& table_id, const std::shared_ptr& meta); + MemTableFile(const std::string& table_id, const std::shared_ptr& meta, const Options& options); Status Add(const VectorSource::Ptr& source); @@ -24,7 +24,9 @@ public: size_t GetMemLeft(); - bool isFull(); + bool IsFull(); + + Status Serialize(); private: @@ -36,6 +38,8 @@ private: MetaPtr meta_; + Options options_; + size_t current_mem_; ExecutionEnginePtr execution_engine_; diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index f7cef994fa..b113b9ad5e 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -2,6 +2,7 @@ #include "ExecutionEngine.h" #include "EngineFactory.h" #include "Log.h" +#include "metrics/Metrics.h" namespace zilliz { namespace milvus { @@ -21,12 +22,7 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, const size_t& num_vectors_to_add, size_t& num_vectors_added) { - if (table_file_schema.dimension_ <= 0) { - std::string errMsg = "VectorSource::Add: table_file_schema dimension = " + - std::to_string(table_file_schema.dimension_) + ", table_id = " + table_file_schema.table_id_; - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); - } + auto start_time = METRICS_NOW_TIME; num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; @@ -40,6 +36,10 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); } + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(table_file_schema.dimension_), total_time); + return status; } diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h index 597eee4ad8..dec31f39e1 100644 --- a/cpp/src/db/VectorSource.h +++ b/cpp/src/db/VectorSource.h @@ -28,8 +28,6 @@ public: IDNumbers GetVectorIds(); -// Status Serialize(); - private: const size_t n_; diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 111914f8a9..f68d1eb8e3 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -86,12 +86,13 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + auto options = engine::OptionsFactory::Build(); engine::meta::TableSchema table_schema = BuildTableSchema(); auto status = impl_->CreateTable(table_schema); ASSERT_TRUE(status.ok()); - engine::MemTableFile memTableFile(TABLE_NAME, impl_); + engine::MemTableFile memTableFile(TABLE_NAME, impl_, options); int64_t n_100 = 100; std::vector vectors_100; @@ -120,7 +121,7 @@ TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max - n_100); - ASSERT_TRUE(memTableFile.isFull()); + ASSERT_TRUE(memTableFile.IsFull()); status = impl_->DropAll(); ASSERT_TRUE(status.ok()); @@ -129,6 +130,7 @@ TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { TEST(MEM_TEST, MEM_TABLE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); + auto options = engine::OptionsFactory::Build(); engine::meta::TableSchema table_schema = BuildTableSchema(); auto status = impl_->CreateTable(table_schema); @@ -140,7 +142,7 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_100 = std::make_shared(n_100, vectors_100.data()); - engine::MemTable memTable(TABLE_NAME, impl_); + engine::MemTable memTable(TABLE_NAME, impl_, options); status = memTable.Add(source_100); ASSERT_TRUE(status.ok()); @@ -184,6 +186,9 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { int expectedStackSize = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); ASSERT_EQ(memTable.GetStackSize(), expectedStackSize); + status = memTable.Serialize(); + ASSERT_TRUE(status.ok()); + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } From 9bb7ccf4127b9088ff2b860910355299ab2876c6 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Fri, 5 Jul 2019 18:07:40 +0800 Subject: [PATCH 41/91] MS-161 - Add CI / CD Module to Milvus Project Former-commit-id: 55cd0d918a24c97b31f8fb744f9c507c65f819f6 --- CHANGELOGS.md | 1 + ci/function/file_transfer.groovy | 10 + ci/jenkinsfile/cleanup_dev.groovy | 12 + ci/jenkinsfile/deploy2dev.groovy | 11 + ci/jenkinsfile/dev_test.groovy | 17 ++ ci/jenkinsfile/milvus_build.groovy | 17 ++ ci/jenkinsfile/milvus_build_no_ut.groovy | 17 ++ ci/jenkinsfile/packaged_milvus.groovy | 44 ++++ ci/jenkinsfile/packaged_milvus_no_ut.groovy | 26 ++ ci/jenkinsfile/publish_docker.groovy | 31 +++ ci/jenkinsfile/upload_dev_test_out.groovy | 26 ++ ci/main_jenkinsfile | 256 ++++++++++++++++++++ ci/main_jenkinsfile_no_ut | 256 ++++++++++++++++++++ ci/pod_containers/milvus-engine-build.yaml | 13 + ci/pod_containers/milvus-testframework.yaml | 13 + ci/pod_containers/publish-docker.yaml | 22 ++ 16 files changed, 772 insertions(+) create mode 100644 ci/function/file_transfer.groovy create mode 100644 ci/jenkinsfile/cleanup_dev.groovy create mode 100644 ci/jenkinsfile/deploy2dev.groovy create mode 100644 ci/jenkinsfile/dev_test.groovy create mode 100644 ci/jenkinsfile/milvus_build.groovy create mode 100644 ci/jenkinsfile/milvus_build_no_ut.groovy create mode 100644 ci/jenkinsfile/packaged_milvus.groovy create mode 100644 ci/jenkinsfile/packaged_milvus_no_ut.groovy create mode 100644 ci/jenkinsfile/publish_docker.groovy create mode 100644 ci/jenkinsfile/upload_dev_test_out.groovy create mode 100644 ci/main_jenkinsfile create mode 100644 ci/main_jenkinsfile_no_ut create mode 100644 ci/pod_containers/milvus-engine-build.yaml create mode 100644 ci/pod_containers/milvus-testframework.yaml create mode 100644 ci/pod_containers/publish-docker.yaml diff --git a/CHANGELOGS.md b/CHANGELOGS.md index a5d7bfec58..def4965a41 100644 --- a/CHANGELOGS.md +++ b/CHANGELOGS.md @@ -15,3 +15,4 @@ Please mark all change in change log and use the ticket from JIRA. ### Task - MS-1 - Add CHANGELOG.md +- MS-161 - Add CI / CD Module to Milvus Project diff --git a/ci/function/file_transfer.groovy b/ci/function/file_transfer.groovy new file mode 100644 index 0000000000..bebae14832 --- /dev/null +++ b/ci/function/file_transfer.groovy @@ -0,0 +1,10 @@ +def FileTransfer (sourceFiles, remoteDirectory, remoteIP, protocol = "ftp", makeEmptyDirs = true) { + if (protocol == "ftp") { + ftpPublisher masterNodeName: '', paramPublish: [parameterName: ''], alwaysPublishFromMaster: false, continueOnError: false, failOnError: true, publishers: [ + [configName: "${remoteIP}", transfers: [ + [asciiMode: false, cleanRemote: false, excludes: '', flatten: false, makeEmptyDirs: "${makeEmptyDirs}", noDefaultExcludes: false, patternSeparator: '[, ]+', remoteDirectory: "${remoteDirectory}", remoteDirectorySDF: false, removePrefix: '', sourceFiles: "${sourceFiles}"]], usePromotionTimestamp: true, useWorkspaceInPromotion: false, verbose: true + ] + ] + } +} +return this diff --git a/ci/jenkinsfile/cleanup_dev.groovy b/ci/jenkinsfile/cleanup_dev.groovy new file mode 100644 index 0000000000..32ee43d3b1 --- /dev/null +++ b/ci/jenkinsfile/cleanup_dev.groovy @@ -0,0 +1,12 @@ +try { + sh "helm del --purge ${env.JOB_NAME}-${env.BUILD_NUMBER}" + + if (currentBuild.result == 'ABORTED') { + throw new hudson.AbortException("Dev Test Aborted !") + } else if (currentBuild.result == 'FAILURE') { + error("Dev Test Failure !") + } +} catch (exc) { + updateGitlabCommitStatus name: 'Cleanup Dev', state: 'failed' + throw exc +} diff --git a/ci/jenkinsfile/deploy2dev.groovy b/ci/jenkinsfile/deploy2dev.groovy new file mode 100644 index 0000000000..6e4a23cfe7 --- /dev/null +++ b/ci/jenkinsfile/deploy2dev.groovy @@ -0,0 +1,11 @@ +try { + sh 'helm init --client-only --skip-refresh' + sh 'helm repo add milvus https://registry.zilliz.com/chartrepo/milvus' + sh 'helm repo update' + sh "helm install --set engine.image.repository=registry.zilliz.com/${PROJECT_NAME}/engine --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.JOB_NAME}-${env.BUILD_NUMBER} --version 0.3.0 milvus/milvus-gpu" +} catch (exc) { + updateGitlabCommitStatus name: 'Deloy to Dev', state: 'failed' + echo 'Helm running failed!' + sh "helm del --purge ${env.JOB_NAME}-${env.BUILD_NUMBER}" + throw exc +} diff --git a/ci/jenkinsfile/dev_test.groovy b/ci/jenkinsfile/dev_test.groovy new file mode 100644 index 0000000000..f5808cef40 --- /dev/null +++ b/ci/jenkinsfile/dev_test.groovy @@ -0,0 +1,17 @@ +container('milvus-testframework') { + timeout(time: 10, unit: 'MINUTES') { + gitlabCommitStatus(name: 'Dev Test') { + try { + dir ("${PROJECT_NAME}_test") { + checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git"]]]) + sh 'python3 -m pip install -r requirements.txt' + sh "pytest . --alluredir=test_out --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.kube-opt.svc.cluster.local" + } + } catch (exc) { + updateGitlabCommitStatus name: 'Dev Test', state: 'failed' + currentBuild.result = 'FAILURE' + echo 'Milvus Test Failed !' + } + } + } +} diff --git a/ci/jenkinsfile/milvus_build.groovy b/ci/jenkinsfile/milvus_build.groovy new file mode 100644 index 0000000000..ed07d2b992 --- /dev/null +++ b/ci/jenkinsfile/milvus_build.groovy @@ -0,0 +1,17 @@ +container('milvus-build-env') { + timeout(time: 20, unit: 'MINUTES') { + gitlabCommitStatus(name: 'Build Engine') { + dir ("milvus_engine") { + try { + checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git"]]]) + dir ("cpp") { + sh "./build.sh -t ${params.BUILD_TYPE} -u -c" + } + } catch (exc) { + updateGitlabCommitStatus name: 'Build Engine', state: 'failed' + throw exc + } + } + } + } +} diff --git a/ci/jenkinsfile/milvus_build_no_ut.groovy b/ci/jenkinsfile/milvus_build_no_ut.groovy new file mode 100644 index 0000000000..02b971de2f --- /dev/null +++ b/ci/jenkinsfile/milvus_build_no_ut.groovy @@ -0,0 +1,17 @@ +container('milvus-build-env') { + timeout(time: 20, unit: 'MINUTES') { + gitlabCommitStatus(name: 'Build Engine') { + dir ("milvus_engine") { + try { + checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git"]]]) + dir ("cpp") { + sh "./build.sh -t ${params.BUILD_TYPE}" + } + } catch (exc) { + updateGitlabCommitStatus name: 'Build Engine', state: 'failed' + throw exc + } + } + } + } +} diff --git a/ci/jenkinsfile/packaged_milvus.groovy b/ci/jenkinsfile/packaged_milvus.groovy new file mode 100644 index 0000000000..407b100589 --- /dev/null +++ b/ci/jenkinsfile/packaged_milvus.groovy @@ -0,0 +1,44 @@ +container('milvus-build-env') { + timeout(time: 5, unit: 'MINUTES') { + dir ("milvus_engine") { + dir ("cpp") { + gitlabCommitStatus(name: 'Packaged Engine') { + if (fileExists('milvus')) { + try { + sh "tar -zcvf ./${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz ./milvus" + def fileTransfer = load "${env.WORKSPACE}/ci/function/file_transfer.groovy" + fileTransfer.FileTransfer("${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz", "${PROJECT_NAME}/engine/${JOB_NAME}-${BUILD_ID}", 'nas storage') + if (currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { + echo "Download Milvus Engine Binary Viewer \"http://192.168.1.126:8080/${PROJECT_NAME}/engine/${JOB_NAME}-${BUILD_ID}/${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz\"" + } + } catch (exc) { + updateGitlabCommitStatus name: 'Packaged Engine', state: 'failed' + throw exc + } + } else { + updateGitlabCommitStatus name: 'Packaged Engine', state: 'failed' + error("Milvus binary directory don't exists!") + } + } + + gitlabCommitStatus(name: 'Packaged Engine lcov') { + if (fileExists('lcov_out')) { + try { + def fileTransfer = load "${env.WORKSPACE}/ci/function/file_transfer.groovy" + fileTransfer.FileTransfer("lcov_out/", "${PROJECT_NAME}/lcov/${JOB_NAME}-${BUILD_ID}", 'nas storage') + if (currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { + echo "Milvus lcov out Viewer \"http://192.168.1.126:8080/${PROJECT_NAME}/lcov/${JOB_NAME}-${BUILD_ID}/lcov_out/\"" + } + } catch (exc) { + updateGitlabCommitStatus name: 'Packaged Engine lcov', state: 'failed' + throw exc + } + } else { + updateGitlabCommitStatus name: 'Packaged Engine lcov', state: 'failed' + error("Milvus lcov out directory don't exists!") + } + } + } + } + } +} diff --git a/ci/jenkinsfile/packaged_milvus_no_ut.groovy b/ci/jenkinsfile/packaged_milvus_no_ut.groovy new file mode 100644 index 0000000000..b6c31540a1 --- /dev/null +++ b/ci/jenkinsfile/packaged_milvus_no_ut.groovy @@ -0,0 +1,26 @@ +container('milvus-build-env') { + timeout(time: 5, unit: 'MINUTES') { + dir ("milvus_engine") { + dir ("cpp") { + gitlabCommitStatus(name: 'Packaged Engine') { + if (fileExists('milvus')) { + try { + sh "tar -zcvf ./${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz ./milvus" + def fileTransfer = load "${env.WORKSPACE}/ci/function/file_transfer.groovy" + fileTransfer.FileTransfer("${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz", "${PROJECT_NAME}/engine/${JOB_NAME}-${BUILD_ID}", 'nas storage') + if (currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { + echo "Download Milvus Engine Binary Viewer \"http://192.168.1.126:8080/${PROJECT_NAME}/engine/${JOB_NAME}-${BUILD_ID}/${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz\"" + } + } catch (exc) { + updateGitlabCommitStatus name: 'Packaged Engine', state: 'failed' + throw exc + } + } else { + updateGitlabCommitStatus name: 'Packaged Engine', state: 'failed' + error("Milvus binary directory don't exists!") + } + } + } + } + } +} diff --git a/ci/jenkinsfile/publish_docker.groovy b/ci/jenkinsfile/publish_docker.groovy new file mode 100644 index 0000000000..04f1a8567d --- /dev/null +++ b/ci/jenkinsfile/publish_docker.groovy @@ -0,0 +1,31 @@ +container('publish-docker') { + timeout(time: 15, unit: 'MINUTES') { + gitlabCommitStatus(name: 'Publish Engine Docker') { + try { + dir ("${PROJECT_NAME}_build") { + checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:build/milvus_build.git"]]]) + dir ("docker/deploy/ubuntu16.04/free_version") { + sh "curl -O -u anonymous: ftp://192.168.1.126/data/${PROJECT_NAME}/engine/${JOB_NAME}-${BUILD_ID}/${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz" + sh "tar zxvf ${PROJECT_NAME}-engine-${PACKAGE_VERSION}.tar.gz" + try { + docker.withRegistry('https://registry.zilliz.com', 'a54e38ef-c424-4ea9-9224-b25fc20e3924') { + def customImage = docker.build("${PROJECT_NAME}/engine:${DOCKER_VERSION}") + customImage.push() + } + echo "Docker Pull Command: docker pull registry.zilliz.com/${PROJECT_NAME}/engine:${DOCKER_VERSION}" + } catch (exc) { + updateGitlabCommitStatus name: 'Publish Engine Docker', state: 'canceled' + throw exc + } finally { + sh "docker rmi ${PROJECT_NAME}/engine:${DOCKER_VERSION}" + } + } + } + } catch (exc) { + updateGitlabCommitStatus name: 'Publish Engine Docker', state: 'failed' + echo 'Publish docker failed!' + throw exc + } + } + } +} diff --git a/ci/jenkinsfile/upload_dev_test_out.groovy b/ci/jenkinsfile/upload_dev_test_out.groovy new file mode 100644 index 0000000000..c401b16608 --- /dev/null +++ b/ci/jenkinsfile/upload_dev_test_out.groovy @@ -0,0 +1,26 @@ +container('milvus-testframework') { + timeout(time: 5, unit: 'MINUTES') { + dir ("${PROJECT_NAME}_test") { + gitlabCommitStatus(name: 'Upload Dev Test Out') { + if (fileExists('test_out')) { + try { + def fileTransfer = load "${env.WORKSPACE}/ci/function/file_transfer.groovy" + fileTransfer.FileTransfer("test_out/", "${PROJECT_NAME}/test/${JOB_NAME}-${BUILD_ID}", 'nas storage') + if (currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { + echo "Milvus Dev Test Out Viewer \"ftp://192.168.1.126/data/${PROJECT_NAME}/test/${JOB_NAME}-${BUILD_ID}\"" + } + } catch (hudson.AbortException ae) { + updateGitlabCommitStatus name: 'Upload Dev Test Out', state: 'canceled' + currentBuild.result = 'ABORTED' + } catch (exc) { + updateGitlabCommitStatus name: 'Upload Dev Test Out', state: 'failed' + currentBuild.result = 'FAILURE' + } + } else { + updateGitlabCommitStatus name: 'Upload Dev Test Out', state: 'failed' + echo "Milvus Dev Test Out directory don't exists!" + } + } + } + } +} diff --git a/ci/main_jenkinsfile b/ci/main_jenkinsfile new file mode 100644 index 0000000000..c144c46685 --- /dev/null +++ b/ci/main_jenkinsfile @@ -0,0 +1,256 @@ +pipeline { + agent none + + options { + timestamps() + } + + environment { + PROJECT_NAME = "milvus" + LOWER_BUILD_TYPE = BUILD_TYPE.toLowerCase() + SEMVER = "${env.gitlabSourceBranch == null ? params.ENGINE_BRANCH.substring(params.ENGINE_BRANCH.lastIndexOf('/') + 1) : env.gitlabSourceBranch}" + GITLAB_AFTER_COMMIT = "${env.gitlabAfter == null ? null : env.gitlabAfter}" + SUFFIX_VERSION_NAME = "${env.gitlabAfter == null ? null : env.gitlabAfter.substring(0, 6)}" + DOCKER_VERSION_STR = "${env.gitlabAfter == null ? "${SEMVER}-${LOWER_BUILD_TYPE}" : "${SEMVER}-${LOWER_BUILD_TYPE}-${SUFFIX_VERSION_NAME}"}" + } + + stages { + stage("Ubuntu 16.04") { + environment { + PACKAGE_VERSION = VersionNumber([ + versionNumberString : '${SEMVER}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, "yyyyMMdd"}' + ]); + + DOCKER_VERSION = VersionNumber([ + versionNumberString : '${DOCKER_VERSION_STR}' + ]); + } + + stages { + stage("Run Build") { + agent { + kubernetes { + cloud 'build-kubernetes' + label 'build' + defaultContainer 'jnlp' + containerTemplate { + name 'milvus-build-env' + image 'registry.zilliz.com/milvus/milvus-build-env:v0.10' + ttyEnabled true + command 'cat' + } + } + } + stages { + stage('Build') { + steps { + gitlabCommitStatus(name: 'Build') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/milvus_build.groovy" + load "${env.WORKSPACE}/ci/jenkinsfile/packaged_milvus.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Build', state: 'canceled' + echo "Milvus Build aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Build', state: 'failed' + echo "Milvus Build failure !" + } + } + } + } + + stage("Publish docker and helm") { + agent { + kubernetes { + label 'publish' + defaultContainer 'jnlp' + yaml """ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: publish + componet: docker +spec: + containers: + - name: publish-docker + image: registry.zilliz.com/library/zilliz_docker:v1.0.0 + securityContext: + privileged: true + command: + - cat + tty: true + volumeMounts: + - name: docker-sock + mountPath: /var/run/docker.sock + volumes: + - name: docker-sock + hostPath: + path: /var/run/docker.sock +""" + } + } + stages { + stage('Publish Docker') { + steps { + gitlabCommitStatus(name: 'Publish Docker') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/publish_docker.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Publish Docker', state: 'canceled' + echo "Milvus Publish Docker aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Publish Docker', state: 'failed' + echo "Milvus Publish Docker failure !" + } + } + } + } + + stage("Deploy to Development") { + stages { + stage("Deploy to Dev") { + agent { + kubernetes { + label 'jenkins-slave' + defaultContainer 'jnlp' + } + } + stages { + stage('Deploy') { + steps { + gitlabCommitStatus(name: 'Deloy to Dev') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/deploy2dev.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Deloy to Dev', state: 'canceled' + echo "Milvus Deloy to Dev aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Deloy to Dev', state: 'failed' + echo "Milvus Deloy to Dev failure !" + } + } + } + } + + stage("Dev Test") { + agent { + kubernetes { + label 'test' + defaultContainer 'jnlp' + containerTemplate { + name 'milvus-testframework' + image 'registry.zilliz.com/milvus/milvus-test:v0.1' + ttyEnabled true + command 'cat' + } + } + } + stages { + stage('Test') { + steps { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/dev_test.groovy" + load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_test_out.groovy" + } + } + } + } + } + + stage ("Cleanup Dev") { + agent { + kubernetes { + label 'jenkins-slave' + defaultContainer 'jnlp' + } + } + stages { + stage('Cleanup') { + steps { + gitlabCommitStatus(name: 'Cleanup Dev') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Cleanup Dev', state: 'canceled' + echo "Milvus Cleanup Dev aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Cleanup Dev', state: 'failed' + echo "Milvus Cleanup Dev failure !" + } + } + } + } + } + } + } + } + } + + post { + success { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'success' + echo "Milvus CI/CD success !" + } + } + + aborted { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'canceled' + echo "Milvus CI/CD aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'failed' + echo "Milvus CI/CD failure !" + } + } + } +} diff --git a/ci/main_jenkinsfile_no_ut b/ci/main_jenkinsfile_no_ut new file mode 100644 index 0000000000..277ec155a5 --- /dev/null +++ b/ci/main_jenkinsfile_no_ut @@ -0,0 +1,256 @@ +pipeline { + agent none + + options { + timestamps() + } + + environment { + PROJECT_NAME = "milvus" + LOWER_BUILD_TYPE = BUILD_TYPE.toLowerCase() + SEMVER = "${env.gitlabSourceBranch == null ? params.ENGINE_BRANCH.substring(params.ENGINE_BRANCH.lastIndexOf('/') + 1) : env.gitlabSourceBranch}" + GITLAB_AFTER_COMMIT = "${env.gitlabAfter == null ? null : env.gitlabAfter}" + SUFFIX_VERSION_NAME = "${env.gitlabAfter == null ? null : env.gitlabAfter.substring(0, 6)}" + DOCKER_VERSION_STR = "${env.gitlabAfter == null ? "${SEMVER}-${LOWER_BUILD_TYPE}" : "${SEMVER}-${LOWER_BUILD_TYPE}-${SUFFIX_VERSION_NAME}"}" + } + + stages { + stage("Ubuntu 16.04") { + environment { + PACKAGE_VERSION = VersionNumber([ + versionNumberString : '${SEMVER}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, "yyyyMMdd"}' + ]); + + DOCKER_VERSION = VersionNumber([ + versionNumberString : '${DOCKER_VERSION_STR}' + ]); + } + + stages { + stage("Run Build") { + agent { + kubernetes { + cloud 'build-kubernetes' + label 'build' + defaultContainer 'jnlp' + containerTemplate { + name 'milvus-build-env' + image 'registry.zilliz.com/milvus/milvus-build-env:v0.10' + ttyEnabled true + command 'cat' + } + } + } + stages { + stage('Build') { + steps { + gitlabCommitStatus(name: 'Build') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/milvus_build_no_ut.groovy" + load "${env.WORKSPACE}/ci/jenkinsfile/packaged_milvus_no_ut.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Build', state: 'canceled' + echo "Milvus Build aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Build', state: 'failed' + echo "Milvus Build failure !" + } + } + } + } + + stage("Publish docker and helm") { + agent { + kubernetes { + label 'publish' + defaultContainer 'jnlp' + yaml """ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: publish + componet: docker +spec: + containers: + - name: publish-docker + image: registry.zilliz.com/library/zilliz_docker:v1.0.0 + securityContext: + privileged: true + command: + - cat + tty: true + volumeMounts: + - name: docker-sock + mountPath: /var/run/docker.sock + volumes: + - name: docker-sock + hostPath: + path: /var/run/docker.sock +""" + } + } + stages { + stage('Publish Docker') { + steps { + gitlabCommitStatus(name: 'Publish Docker') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/publish_docker.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Publish Docker', state: 'canceled' + echo "Milvus Publish Docker aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Publish Docker', state: 'failed' + echo "Milvus Publish Docker failure !" + } + } + } + } + + stage("Deploy to Development") { + stages { + stage("Deploy to Dev") { + agent { + kubernetes { + label 'jenkins-slave' + defaultContainer 'jnlp' + } + } + stages { + stage('Deploy') { + steps { + gitlabCommitStatus(name: 'Deloy to Dev') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/deploy2dev.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Deloy to Dev', state: 'canceled' + echo "Milvus Deloy to Dev aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Deloy to Dev', state: 'failed' + echo "Milvus Deloy to Dev failure !" + } + } + } + } + + stage("Dev Test") { + agent { + kubernetes { + label 'test' + defaultContainer 'jnlp' + containerTemplate { + name 'milvus-testframework' + image 'registry.zilliz.com/milvus/milvus-test:v0.1' + ttyEnabled true + command 'cat' + } + } + } + stages { + stage('Test') { + steps { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/dev_test.groovy" + load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_test_out.groovy" + } + } + } + } + } + + stage ("Cleanup Dev") { + agent { + kubernetes { + label 'jenkins-slave' + defaultContainer 'jnlp' + } + } + stages { + stage('Cleanup') { + steps { + gitlabCommitStatus(name: 'Cleanup Dev') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Cleanup Dev', state: 'canceled' + echo "Milvus Cleanup Dev aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Cleanup Dev', state: 'failed' + echo "Milvus Cleanup Dev failure !" + } + } + } + } + } + } + } + } + } + + post { + success { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'success' + echo "Milvus CI/CD success !" + } + } + + aborted { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'canceled' + echo "Milvus CI/CD aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'failed' + echo "Milvus CI/CD failure !" + } + } + } +} diff --git a/ci/pod_containers/milvus-engine-build.yaml b/ci/pod_containers/milvus-engine-build.yaml new file mode 100644 index 0000000000..cd5352ffef --- /dev/null +++ b/ci/pod_containers/milvus-engine-build.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: milvus + componet: build-env +spec: + containers: + - name: milvus-build-env + image: registry.zilliz.com/milvus/milvus-build-env:v0.9 + command: + - cat + tty: true diff --git a/ci/pod_containers/milvus-testframework.yaml b/ci/pod_containers/milvus-testframework.yaml new file mode 100644 index 0000000000..7a98fbca8e --- /dev/null +++ b/ci/pod_containers/milvus-testframework.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: milvus + componet: testframework +spec: + containers: + - name: milvus-testframework + image: registry.zilliz.com/milvus/milvus-test:v0.1 + command: + - cat + tty: true diff --git a/ci/pod_containers/publish-docker.yaml b/ci/pod_containers/publish-docker.yaml new file mode 100644 index 0000000000..268afb1331 --- /dev/null +++ b/ci/pod_containers/publish-docker.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: publish + componet: docker +spec: + containers: + - name: publish-docker + image: registry.zilliz.com/library/zilliz_docker:v1.0.0 + securityContext: + privileged: true + command: + - cat + tty: true + volumeMounts: + - name: docker-sock + mountPath: /var/run/docker.sock + volumes: + - name: docker-sock + hostPath: + path: /var/run/docker.sock From 2ab74af667c3a4c298824c946c467676a810438b Mon Sep 17 00:00:00 2001 From: zhiru Date: Sun, 7 Jul 2019 13:50:39 +0800 Subject: [PATCH 42/91] add mem manager Former-commit-id: db1cb126fd931f260c7ecf710e6de043ea0ebeb9 --- cpp/src/db/DBImpl.cpp | 3 +- cpp/src/db/DBImpl.h | 4 +- cpp/src/db/Factories.cpp | 11 +++ cpp/src/db/Factories.h | 5 ++ cpp/src/db/MemManager.h | 14 ++-- cpp/src/db/MemManagerAbstract.h | 25 ++++++ cpp/src/db/MemTable.cpp | 10 ++- cpp/src/db/MemTable.h | 6 +- cpp/src/db/NewMemManager.cpp | 92 +++++++++++++++++++++ cpp/src/db/NewMemManager.h | 54 +++++++++++++ cpp/src/db/VectorSource.cpp | 15 +++- cpp/unittest/db/mem_test.cpp | 137 +++++++++++++++++++++++++++++++- 12 files changed, 356 insertions(+), 20 deletions(-) create mode 100644 cpp/src/db/MemManagerAbstract.h create mode 100644 cpp/src/db/NewMemManager.cpp create mode 100644 cpp/src/db/NewMemManager.h diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 0a1e8651e1..09a7c72201 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -87,8 +87,7 @@ DBImpl::DBImpl(const Options& options) compact_thread_pool_(1, 1), index_thread_pool_(1, 1) { meta_ptr_ = DBMetaImplFactory::Build(options.meta, options.mode); - mem_mgr_ = std::make_shared(meta_ptr_, options_); - // mem_mgr_ = (MemManagerPtr)(new MemManager(meta_ptr_, options_)); + mem_mgr_ = MemManagerFactory::Build(meta_ptr_, options_); if (options.mode != Options::MODE::READ_ONLY) { StartTimerTasks(); } diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 9dcd174f8b..5601f1a33b 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -9,6 +9,7 @@ #include "MemManager.h" #include "Types.h" #include "utils/ThreadPool.h" +#include "MemManagerAbstract.h" #include #include @@ -33,7 +34,6 @@ class Meta; class DBImpl : public DB { public: using MetaPtr = meta::Meta::Ptr; - using MemManagerPtr = typename MemManager::Ptr; explicit DBImpl(const Options &options); @@ -123,7 +123,7 @@ class DBImpl : public DB { std::thread bg_timer_thread_; MetaPtr meta_ptr_; - MemManagerPtr mem_mgr_; + MemManagerAbstractPtr mem_mgr_; server::ThreadPool compact_thread_pool_; std::list> compact_thread_results_; diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index 4b24bd3a1c..d51727cbff 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -6,6 +6,8 @@ #include #include "Factories.h" #include "DBImpl.h" +#include "MemManager.h" +#include "NewMemManager.h" #include #include @@ -98,6 +100,15 @@ DB* DBFactory::Build(const Options& options) { return new DBImpl(options); } +MemManagerAbstractPtr MemManagerFactory::Build(const std::shared_ptr& meta, + const Options& options) { + bool useNew = true; + if (useNew) { + return std::make_shared(meta, options); + } + return std::make_shared(meta, options); +} + } // namespace engine } // namespace milvus } // namespace zilliz diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 889922b17a..567bc0a8bc 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -10,6 +10,7 @@ #include "MySQLMetaImpl.h" #include "Options.h" #include "ExecutionEngine.h" +#include "MemManagerAbstract.h" #include #include @@ -36,6 +37,10 @@ struct DBFactory { static DB* Build(const Options&); }; +struct MemManagerFactory { + static MemManagerAbstractPtr Build(const std::shared_ptr& meta, const Options& options); +}; + } // namespace engine } // namespace milvus } // namespace zilliz diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index 0ce88d504d..95303889db 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -9,13 +9,13 @@ #include "IDGenerator.h" #include "Status.h" #include "Meta.h" +#include "MemManagerAbstract.h" #include #include #include #include #include -#include namespace zilliz { namespace milvus { @@ -62,7 +62,7 @@ private: -class MemManager { +class MemManager : public MemManagerAbstract { public: using MetaPtr = meta::Meta::Ptr; using MemVectorsPtr = typename MemVectors::Ptr; @@ -71,16 +71,16 @@ public: MemManager(const std::shared_ptr& meta, const Options& options) : meta_(meta), options_(options) {} - MemVectorsPtr GetMemByTable(const std::string& table_id); - Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids); + size_t n, const float* vectors, IDNumbers& vector_ids) override; - Status Serialize(std::set& table_ids); + Status Serialize(std::set& table_ids) override; - Status EraseMemVector(const std::string& table_id); + Status EraseMemVector(const std::string& table_id) override; private: + MemVectorsPtr GetMemByTable(const std::string& table_id); + Status InsertVectorsNoLock(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids); Status ToImmutable(); diff --git a/cpp/src/db/MemManagerAbstract.h b/cpp/src/db/MemManagerAbstract.h new file mode 100644 index 0000000000..74222df1e8 --- /dev/null +++ b/cpp/src/db/MemManagerAbstract.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +class MemManagerAbstract { +public: + + virtual Status InsertVectors(const std::string& table_id, + size_t n, const float* vectors, IDNumbers& vector_ids) = 0; + + virtual Status Serialize(std::set& table_ids) = 0; + + virtual Status EraseMemVector(const std::string& table_id) = 0; + +}; // MemManagerAbstract + +using MemManagerAbstractPtr = std::shared_ptr; + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index 86554695c8..b282ad375a 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -44,7 +44,7 @@ void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { mem_table_file = mem_table_file_list_.back(); } -size_t MemTable::GetStackSize() { +size_t MemTable::GetTableFileCount() { return mem_table_file_list_.size(); } @@ -60,6 +60,14 @@ Status MemTable::Serialize() { return Status::OK(); } +bool MemTable::Empty() { + return mem_table_file_list_.empty(); +} + +std::string MemTable::GetTableId() { + return table_id_; +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index d5c7cc9e85..e09d6ddac1 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -24,10 +24,14 @@ public: void GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file); - size_t GetStackSize(); + size_t GetTableFileCount(); Status Serialize(); + bool Empty(); + + std::string GetTableId(); + private: const std::string table_id_; diff --git a/cpp/src/db/NewMemManager.cpp b/cpp/src/db/NewMemManager.cpp new file mode 100644 index 0000000000..19aba68eb7 --- /dev/null +++ b/cpp/src/db/NewMemManager.cpp @@ -0,0 +1,92 @@ +#include "NewMemManager.h" +#include "VectorSource.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string& table_id) { + auto memIt = mem_id_map_.find(table_id); + if (memIt != mem_id_map_.end()) { + return memIt->second; + } + + mem_id_map_[table_id] = std::make_shared(table_id, meta_, options_); + return mem_id_map_[table_id]; +} + +Status NewMemManager::InsertVectors(const std::string& table_id_, + size_t n_, + const float* vectors_, + IDNumbers& vector_ids_) { + + + std::unique_lock lock(mutex_); + + return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); +} + +Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, + size_t n, + const float* vectors, + IDNumbers& vector_ids) { + MemTablePtr mem = GetMemByTable(table_id); + VectorSource::Ptr source = std::make_shared(n, vectors); + + auto status = mem->Add(source); + if (status.ok()) { + vector_ids = source->GetVectorIds(); + } + return status; +} + +Status NewMemManager::ToImmutable() { + std::unique_lock lock(mutex_); + MemIdMap temp_map; + for (auto& kv: mem_id_map_) { + if(kv.second->Empty()) { + temp_map.insert(kv); + continue;//empty table, no need to serialize + } + immu_mem_list_.push_back(kv.second); + } + + mem_id_map_.swap(temp_map); + return Status::OK(); +} + +Status NewMemManager::Serialize(std::set& table_ids) { + ToImmutable(); + std::unique_lock lock(serialization_mtx_); + table_ids.clear(); + for (auto& mem : immu_mem_list_) { + mem->Serialize(); + table_ids.insert(mem->GetTableId()); + } + immu_mem_list_.clear(); + return Status::OK(); +} + +Status NewMemManager::EraseMemVector(const std::string& table_id) { + {//erase MemVector from rapid-insert cache + std::unique_lock lock(mutex_); + mem_id_map_.erase(table_id); + } + + {//erase MemVector from serialize cache + std::unique_lock lock(serialization_mtx_); + MemList temp_list; + for (auto& mem : immu_mem_list_) { + if(mem->GetTableId() != table_id) { + temp_list.push_back(mem); + } + } + immu_mem_list_.swap(temp_list); + } + + return Status::OK(); +} + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/NewMemManager.h b/cpp/src/db/NewMemManager.h new file mode 100644 index 0000000000..a5f5a9ca13 --- /dev/null +++ b/cpp/src/db/NewMemManager.h @@ -0,0 +1,54 @@ +#pragma once + +#include "Meta.h" +#include "MemTable.h" +#include "Status.h" +#include "MemManagerAbstract.h" + +#include +#include +#include +#include +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +class NewMemManager : public MemManagerAbstract { +public: + using MetaPtr = meta::Meta::Ptr; + using Ptr = std::shared_ptr; + using MemTablePtr = typename MemTable::Ptr; + + NewMemManager(const std::shared_ptr& meta, const Options& options) + : meta_(meta), options_(options) {} + + Status InsertVectors(const std::string& table_id, + size_t n, const float* vectors, IDNumbers& vector_ids) override; + + Status Serialize(std::set& table_ids) override; + + Status EraseMemVector(const std::string& table_id) override; + +private: + MemTablePtr GetMemByTable(const std::string& table_id); + + Status InsertVectorsNoLock(const std::string& table_id, + size_t n, const float* vectors, IDNumbers& vector_ids); + Status ToImmutable(); + + using MemIdMap = std::map; + using MemList = std::vector; + MemIdMap mem_id_map_; + MemList immu_mem_list_; + MetaPtr meta_; + Options options_; + std::mutex mutex_; + std::mutex serialization_mtx_; +}; // NewMemManager + + +} // namespace engine +} // namespace milvus +} // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index b113b9ad5e..d032be51f6 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -24,13 +24,18 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, auto start_time = METRICS_NOW_TIME; - num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; + num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? + num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); - Status status = execution_engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added, vector_ids_to_add.data()); + Status status = execution_engine->AddWithIds(num_vectors_added, + vectors_ + current_num_vectors_added * table_file_schema.dimension_, + vector_ids_to_add.data()); if (status.ok()) { current_num_vectors_added += num_vectors_added; - vector_ids_.insert(vector_ids_.end(), vector_ids_to_add.begin(), vector_ids_to_add.end()); + vector_ids_.insert(vector_ids_.end(), + std::make_move_iterator(vector_ids_to_add.begin()), + std::make_move_iterator(vector_ids_to_add.end())); } else { ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); @@ -38,7 +43,9 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(table_file_schema.dimension_), total_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), + static_cast(table_file_schema.dimension_), + total_time); return status; } diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index f68d1eb8e3..915610adcc 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -7,6 +7,11 @@ #include "db/Factories.h" #include "db/Constants.h" #include "db/EngineFactory.h" +#include "metrics/Metrics.h" + +#include +#include +#include using namespace zilliz::milvus; @@ -29,6 +34,9 @@ namespace { vectors.clear(); vectors.resize(n*TABLE_DIM); float* data = vectors.data(); +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution<> dis(0.0, 1.0); for(int i = 0; i < n; i++) { for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); data[TABLE_DIM * i] += i / 2000.; @@ -169,7 +177,7 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { memTable.GetCurrentMemTableFile(memTableFile); ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); - ASSERT_EQ(memTable.GetStackSize(), 2); + ASSERT_EQ(memTable.GetTableFileCount(), 2); int64_t n_1G = 1024000; std::vector vectors_1G; @@ -183,8 +191,8 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { vector_ids = source_1G->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_1G); - int expectedStackSize = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); - ASSERT_EQ(memTable.GetStackSize(), expectedStackSize); + int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); + ASSERT_EQ(memTable.GetTableFileCount(), expectedTableFileCount); status = memTable.Serialize(); ASSERT_TRUE(status.ok()); @@ -193,4 +201,127 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { ASSERT_TRUE(status.ok()); } +TEST(MEM_TEST, MEM_MANAGER_TEST) { + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + std::map> search_vectors; +// std::map> vectors_ids_map; + { + engine::IDNumbers vector_ids; + int64_t nb = 1024000; + std::vector xb; + BuildVectors(nb, xb); + engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_TRUE(status.ok()); + +// std::ofstream myfile("mem_test.txt"); +// for (int64_t i = 0; i < nb; ++i) { +// int64_t vector_id = vector_ids[i]; +// std::vector vectors; +// for (int64_t j = 0; j < TABLE_DIM; j++) { +// vectors.emplace_back(xb[i*TABLE_DIM + j]); +//// std::cout << xb[i*TABLE_DIM + j] << std::endl; +// } +// vectors_ids_map[vector_id] = vectors; +// } + + std::this_thread::sleep_for(std::chrono::seconds(3)); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, nb - 1); + + int64_t numQuery = 1000; + for (int64_t i = 0; i < numQuery; ++i) { + int64_t index = dis(gen); + std::vector search; + for (int64_t j = 0; j < TABLE_DIM; j++) { + search.push_back(xb[index * TABLE_DIM + j]); + } + search_vectors.insert(std::make_pair(vector_ids[index], search)); +// std::cout << "index: " << index << " vector_ids[index]: " << vector_ids[index] << std::endl; + } + +// for (int64_t i = 0; i < nb; i += 100000) { +// std::vector search; +// for (int64_t j = 0; j < TABLE_DIM; j++) { +// search.push_back(xb[i * TABLE_DIM + j]); +// } +// search_vectors.insert(std::make_pair(vector_ids[i], search)); +// } + + } + + int k = 10; + for(auto& pair : search_vectors) { + auto& search = pair.second; + engine::QueryResults results; + stat = db_->Query(TABLE_NAME, k, 1, search.data(), results); + for(int t = 0; t < k; t++) { +// std::cout << "ID=" << results[0][t].first << " DISTANCE=" << results[0][t].second << std::endl; + +// std::cout << vectors_ids_map[results[0][t].first].size() << std::endl; +// for (auto& data : vectors_ids_map[results[0][t].first]) { +// std::cout << data << " "; +// } +// std::cout << std::endl; + } + // std::cout << "results[0][0].first: " << results[0][0].first << " pair.first: " << pair.first << " results[0][0].second: " << results[0][0].second << std::endl; + ASSERT_EQ(results[0][0].first, pair.first); + ASSERT_LT(results[0][0].second, 0.00001); + } + + stat = db_->DropAll(); + ASSERT_TRUE(stat.ok()); + +} + +TEST(MEM_TEST, INSERT_TEST) { + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + auto start_time = METRICS_NOW_TIME; + + int insert_loop = 1000; + for (int i = 0; i < insert_loop; ++i) { + int64_t nb = 204800; + std::vector xb; + BuildVectors(nb, xb); + engine::IDNumbers vector_ids; + engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_TRUE(status.ok()); + } + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + std::cout << "total_time(ms) : " << total_time << std::endl; + + stat = db_->DropAll(); + ASSERT_TRUE(stat.ok()); + +} From 1db5af5dd2107c803e7b61307e93662a5dca340b Mon Sep 17 00:00:00 2001 From: jinhai Date: Sun, 7 Jul 2019 19:16:39 +0800 Subject: [PATCH 43/91] MS-176 Add create table parameter check Former-commit-id: 99f768f472c297d453310684c13a02edca0ff90a --- cpp/src/server/RequestTask.cpp | 60 +++++++++++------ cpp/src/utils/ValidationUtil.cpp | 74 +++++++++++++++++++++ cpp/src/utils/ValidationUtil.h | 20 ++++++ cpp/unittest/CMakeLists.txt | 4 +- cpp/unittest/db/db_tests.cpp | 13 ++-- cpp/unittest/db/mysql_db_test.cpp | 12 ++-- cpp/unittest/db/search_test.cpp | 5 +- cpp/unittest/faiss_wrapper/wrapper_test.cpp | 5 +- cpp/unittest/utils/CMakeLists.txt | 30 +++++++++ cpp/unittest/utils/ValidationUtilTest.cpp | 61 +++++++++++++++++ 10 files changed, 247 insertions(+), 37 deletions(-) create mode 100644 cpp/src/utils/ValidationUtil.cpp create mode 100644 cpp/src/utils/ValidationUtil.h create mode 100644 cpp/unittest/utils/CMakeLists.txt create mode 100644 cpp/unittest/utils/ValidationUtilTest.cpp diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 07a8305d1f..76da0f728d 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -8,6 +8,7 @@ #include "utils/CommonUtil.h" #include "utils/Log.h" #include "utils/TimeRecorder.h" +#include "utils/ValidationUtil.h" #include "DBWrapper.h" #include "version.h" @@ -133,19 +134,23 @@ BaseTaskPtr CreateTableTask::Create(const thrift::TableSchema& schema) { ServerError CreateTableTask::OnExecute() { TimeRecorder rc("CreateTableTask"); - + try { //step 1: check arguments - if(schema_.table_name.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); - } - if(schema_.dimension <= 0) { - return SetError(SERVER_INVALID_TABLE_DIMENSION, "Invalid table dimension: " + std::to_string(schema_.dimension)); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(schema_.table_name); + if(res != SERVER_SUCCESS) { + return res; } - engine::EngineType engine_type = EngineType(schema_.index_type); - if(engine_type == engine::EngineType::INVALID) { - return SetError(SERVER_INVALID_INDEX_TYPE, "Invalid index type: " + std::to_string(schema_.index_type)); + res = ValidateTableDimension(schema_.dimension); + if(res != SERVER_SUCCESS) { + return res; + } + + res = ValidateTableIndexType(schema_.index_type); + if(res != SERVER_SUCCESS) { + return res; } //step 2: construct table schema @@ -187,8 +192,10 @@ ServerError DescribeTableTask::OnExecute() { try { //step 1: check arguments - if(table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } //step 2: get table info @@ -230,10 +237,11 @@ ServerError HasTableTask::OnExecute() { TimeRecorder rc("HasTableTask"); //step 1: check arguments - if(table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } - //step 2: check table existence engine::Status stat = DBWrapper::DB()->HasTable(table_name_, has_table_); if(!stat.ok()) { @@ -264,8 +272,10 @@ ServerError DeleteTableTask::OnExecute() { TimeRecorder rc("DeleteTableTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } //step 2: check table existence @@ -346,8 +356,10 @@ ServerError AddVectorTask::OnExecute() { TimeRecorder rc("AddVectorTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } if(record_array_.empty()) { @@ -435,8 +447,10 @@ ServerError SearchVectorTask::OnExecute() { TimeRecorder rc("SearchVectorTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } if(top_k_ <= 0) { @@ -548,8 +562,10 @@ ServerError GetTableRowCountTask::OnExecute() { TimeRecorder rc("GetTableRowCountTask"); //step 1: check arguments - if (table_name_.empty()) { - return SetError(SERVER_INVALID_TABLE_NAME, "Empty table name"); + ServerError res = SERVER_SUCCESS; + res = ValidateTableName(table_name_); + if(res != SERVER_SUCCESS) { + return res; } //step 2: get row count diff --git a/cpp/src/utils/ValidationUtil.cpp b/cpp/src/utils/ValidationUtil.cpp new file mode 100644 index 0000000000..b4bbd3346a --- /dev/null +++ b/cpp/src/utils/ValidationUtil.cpp @@ -0,0 +1,74 @@ +#include +#include "ValidationUtil.h" +#include "Log.h" + + +namespace zilliz { +namespace milvus { +namespace server { + +constexpr size_t table_name_size_limit = 16384; +constexpr int64_t table_dimension_limit = 16384; + +ServerError +ValidateTableName(const std::string &table_name) { + + // Table name shouldn't be empty. + if (table_name.empty()) { + SERVER_LOG_ERROR << "Empty table name"; + return SERVER_INVALID_TABLE_NAME; + } + + // Table name size shouldn't exceed 16384. + if (table_name.size() > table_name_size_limit) { + SERVER_LOG_ERROR << "Table name size exceed the limitation"; + return SERVER_INVALID_TABLE_NAME; + } + + // Table name first character should be underscore or character. + char first_char = table_name[0]; + if (first_char != '_' && std::isalpha(first_char) == 0) { + SERVER_LOG_ERROR << "Table name first character isn't underscore or character: " << first_char; + return SERVER_INVALID_TABLE_NAME; + } + + int64_t table_name_size = table_name.size(); + for (int64_t i = 1; i < table_name_size; ++i) { + char name_char = table_name[i]; + if (name_char != '_' && std::isalnum(name_char) == 0) { + SERVER_LOG_ERROR << "Table name character isn't underscore or alphanumber: " << name_char; + return SERVER_INVALID_TABLE_NAME; + } + } + + return SERVER_SUCCESS; +} + +ServerError +ValidateTableDimension(int64_t dimension) { + if (dimension <= 0 || dimension > table_dimension_limit) { + SERVER_LOG_ERROR << "Table dimension excceed the limitation: " << table_dimension_limit; + return SERVER_INVALID_VECTOR_DIMENSION; + } else { + return SERVER_SUCCESS; + } +} + +ServerError +ValidateTableIndexType(int32_t index_type) { + auto engine_type = engine::EngineType(index_type); + switch (engine_type) { + case engine::EngineType::FAISS_IDMAP: + case engine::EngineType::FAISS_IVFFLAT: { + SERVER_LOG_DEBUG << "Index type: " << index_type; + return SERVER_SUCCESS; + } + default: { + return SERVER_INVALID_INDEX_TYPE; + } + } +} + +} +} +} \ No newline at end of file diff --git a/cpp/src/utils/ValidationUtil.h b/cpp/src/utils/ValidationUtil.h new file mode 100644 index 0000000000..608ac22682 --- /dev/null +++ b/cpp/src/utils/ValidationUtil.h @@ -0,0 +1,20 @@ +#pragma once + +#include "Error.h" + +namespace zilliz { +namespace milvus { +namespace server { + +ServerError +ValidateTableName(const std::string& table_name); + +ServerError +ValidateTableDimension(int64_t dimension); + +ServerError +ValidateTableIndexType(int32_t index_type); + +} +} +} \ No newline at end of file diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 043716b58b..8675bf8735 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -12,7 +12,6 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) set(unittest_srcs ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp) - #${EASYLOGGINGPP_INCLUDE_DIR}/easylogging++.cc) set(require_files ${MILVUS_ENGINE_SRC}/server/ServerConfig.cpp @@ -44,4 +43,5 @@ add_subdirectory(index_wrapper) #add_subdirectory(faiss_wrapper) #add_subdirectory(license) add_subdirectory(metrics) -add_subdirectory(storage) \ No newline at end of file +add_subdirectory(storage) +add_subdirectory(utils) \ No newline at end of file diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index bd17081af8..625211cae7 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -3,17 +3,20 @@ // Unauthorized copying of this file, via any medium is strictly prohibited. // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include - #include "utils.h" #include "db/DB.h" #include "db/DBImpl.h" #include "db/MetaConsts.h" #include "db/Factories.h" +#include +#include + +#include + +#include +#include + using namespace zilliz::milvus; namespace { diff --git a/cpp/unittest/db/mysql_db_test.cpp b/cpp/unittest/db/mysql_db_test.cpp index 7fdb30a204..0e24cacdfd 100644 --- a/cpp/unittest/db/mysql_db_test.cpp +++ b/cpp/unittest/db/mysql_db_test.cpp @@ -3,17 +3,19 @@ // Unauthorized copying of this file, via any medium is strictly prohibited. // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include - #include "utils.h" #include "db/DB.h" #include "db/DBImpl.h" #include "db/MetaConsts.h" #include "db/Factories.h" +#include +#include +#include + +#include +#include + using namespace zilliz::milvus; namespace { diff --git a/cpp/unittest/db/search_test.cpp b/cpp/unittest/db/search_test.cpp index db10bcbadf..ce99ea78f7 100644 --- a/cpp/unittest/db/search_test.cpp +++ b/cpp/unittest/db/search_test.cpp @@ -3,10 +3,11 @@ // Unauthorized copying of this file, via any medium is strictly prohibited. // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include - #include "db/scheduler/task/SearchTask.h" +#include + +#include #include using namespace zilliz::milvus; diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp index 67a6c3cde8..6f4a651a55 100644 --- a/cpp/unittest/faiss_wrapper/wrapper_test.cpp +++ b/cpp/unittest/faiss_wrapper/wrapper_test.cpp @@ -4,12 +4,15 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include + #include "wrapper/Operand.h" #include "wrapper/Index.h" #include "wrapper/IndexBuilder.h" +#include +#include + using namespace zilliz::milvus::engine; diff --git a/cpp/unittest/utils/CMakeLists.txt b/cpp/unittest/utils/CMakeLists.txt new file mode 100644 index 0000000000..a46a3b05e1 --- /dev/null +++ b/cpp/unittest/utils/CMakeLists.txt @@ -0,0 +1,30 @@ +#------------------------------------------------------------------------------- +# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +# Unauthorized copying of this file, via any medium is strictly prohibited. +# Proprietary and confidential. +#------------------------------------------------------------------------------- + +# Make sure that your call to link_directories takes place before your call to the relevant add_executable. +include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") +link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + +set(validation_util_src + ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp + ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.h) + +set(validation_util_test_src + ${unittest_srcs} + ${validation_util_src} + ${require_files} + ValidationUtilTest.cpp + ) + +add_executable(valication_util_test + ${validation_util_test_src} + ${config_files}) + +target_link_libraries(valication_util_test + ${unittest_libs} + boost_filesystem) + +install(TARGETS valication_util_test DESTINATION bin) \ No newline at end of file diff --git a/cpp/unittest/utils/ValidationUtilTest.cpp b/cpp/unittest/utils/ValidationUtilTest.cpp new file mode 100644 index 0000000000..095614e325 --- /dev/null +++ b/cpp/unittest/utils/ValidationUtilTest.cpp @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include + +#include "utils/ValidationUtil.h" +#include "utils/Error.h" + +#include + +using namespace zilliz::milvus::server; + +TEST(ValidationUtilTest, TableNameTest) { + std::string table_name = "Normal123_"; + ServerError res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "12sds"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = ""; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "_asdasd"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "!@#!@"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "中文"; + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + + table_name = std::string('a', 32768); + res = ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); +} + + +TEST(ValidationUtilTest, TableDimensionTest) { + ASSERT_EQ(ValidateTableDimension(-1), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidateTableDimension(0), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidateTableDimension(16385), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidateTableDimension(16384), SERVER_SUCCESS); + ASSERT_EQ(ValidateTableDimension(1), SERVER_SUCCESS); +} + +TEST(ValidationUtilTest, TableIndexTypeTest) { + ASSERT_EQ(ValidateTableIndexType(0), SERVER_INVALID_INDEX_TYPE); + ASSERT_EQ(ValidateTableIndexType(1), SERVER_SUCCESS); + ASSERT_EQ(ValidateTableIndexType(2), SERVER_SUCCESS); + ASSERT_EQ(ValidateTableIndexType(3), SERVER_INVALID_INDEX_TYPE); + ASSERT_EQ(ValidateTableIndexType(4), SERVER_INVALID_INDEX_TYPE); +} From ee03d36691937f831f381df11388098cfb6196f6 Mon Sep 17 00:00:00 2001 From: jinhai Date: Sun, 7 Jul 2019 19:55:16 +0800 Subject: [PATCH 44/91] MS-176 Update table name length Former-commit-id: 9280bdd9470730803640f75b962737f3e604501f --- cpp/src/utils/ValidationUtil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/utils/ValidationUtil.cpp b/cpp/src/utils/ValidationUtil.cpp index b4bbd3346a..65cd81e670 100644 --- a/cpp/src/utils/ValidationUtil.cpp +++ b/cpp/src/utils/ValidationUtil.cpp @@ -7,7 +7,7 @@ namespace zilliz { namespace milvus { namespace server { -constexpr size_t table_name_size_limit = 16384; +constexpr size_t table_name_size_limit = 255; constexpr int64_t table_dimension_limit = 16384; ServerError From 5951755fb926d5ec840abb93a78c910823f7326b Mon Sep 17 00:00:00 2001 From: starlord Date: Sun, 7 Jul 2019 20:41:52 +0800 Subject: [PATCH 45/91] date range check Former-commit-id: 10d00dd0be0ccabf933684add78a5c93bc6f8bce --- cpp/src/server/RequestTask.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 76da0f728d..51d5404107 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -109,7 +109,13 @@ namespace { } long days = (tt_end > tt_start) ? (tt_end - tt_start)/DAY_SECONDS : (tt_start - tt_end)/DAY_SECONDS; - for(long i = 0; i <= days; i++) { + if(days == 0) { + error_code = SERVER_INVALID_TIME_RANGE; + error_msg = "Invalid time range: " + range.start_value + " to " + range.end_value; + return ; + } + + for(long i = 0; i < days; i++) { time_t tt_day = tt_start + DAY_SECONDS*i; tm tm_day; CommonUtil::ConvertTime(tt_day, tm_day); From 18e0881baf282665cfc7dc533ea6459644d5cde0 Mon Sep 17 00:00:00 2001 From: zhiru Date: Mon, 8 Jul 2019 11:14:28 +0800 Subject: [PATCH 46/91] Add new mem manager Former-commit-id: ef46229de4934de34f144502ce6d8551d4ef38db --- cpp/CHANGELOG.md | 1 + cpp/conf/server_config.template | 4 +- cpp/src/db/Constants.h | 3 + cpp/src/db/MemManager.cpp | 25 ++++++ cpp/src/db/MemManager.h | 6 ++ cpp/src/db/MemManagerAbstract.h | 6 ++ cpp/src/db/MemTable.cpp | 17 +++- cpp/src/db/MemTable.h | 8 +- cpp/src/db/NewMemManager.cpp | 38 +++++++++ cpp/src/db/NewMemManager.h | 6 ++ cpp/src/db/Options.h | 1 + cpp/src/server/DBWrapper.cpp | 8 ++ cpp/src/server/ServerConfig.h | 1 + cpp/unittest/db/mem_test.cpp | 144 +++++++++++++++++++++----------- cpp/unittest/db/utils.cpp | 12 +++ cpp/unittest/db/utils.h | 5 ++ 16 files changed, 231 insertions(+), 54 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 5168b9549b..03ffe14731 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -19,6 +19,7 @@ Please mark all change in change log and use the ticket from JIRA. ## New Feature - MS-137 - Integrate knowhere +- MS-180 - Add new mem manager ## Task diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index 0383e00b53..f0cd6d5e52 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -2,7 +2,7 @@ server_config: address: 0.0.0.0 port: 19530 # the port milvus listen to, default: 19530, range: 1025 ~ 65534 gpu_index: 0 # the gpu milvus use, default: 0, range: 0 ~ gpu number - 1 - mode: single # milvus deployment type: single, cluster + mode: single # milvus deployment type: single, cluster, read_only db_config: db_path: @MILVUS_DB_PATH@ # milvus data storage path @@ -15,6 +15,8 @@ db_config: index_building_threshold: 1024 # index building trigger threshold, default: 1024, unit: MB archive_disk_threshold: 512 # triger archive action if storage size exceed this value, unit: GB archive_days_threshold: 30 # files older than x days will be archived, unit: day + maximum_memory: 4 # maximum memory allowed, default: 4, unit: GB, should be at least 1 GB. + # the sum of maximum_memory and cpu_cache_capacity should be less than total memory metric_config: is_startup: off # if monitoring start: on, off diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h index 2bb2e0a064..1ba02b1d55 100644 --- a/cpp/src/db/Constants.h +++ b/cpp/src/db/Constants.h @@ -11,6 +11,9 @@ namespace engine { const size_t K = 1024UL; const size_t M = K*K; +const size_t G = K*M; +const size_t T = K*G; + const size_t MAX_TABLE_FILE_MEM = 128 * M; const int VECTOR_TYPE_SIZE = sizeof(float); diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index e36b0c45ba..ba8517cdbd 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -8,6 +8,7 @@ #include "MetaConsts.h" #include "EngineFactory.h" #include "metrics/Metrics.h" +#include "Log.h" #include #include @@ -128,6 +129,10 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids) { + + LOG(DEBUG) << "MemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + MemVectorsPtr mem = GetMemByTable(table_id); if (mem == nullptr) { return Status::NotFound("Group " + table_id + " not found!"); @@ -192,6 +197,26 @@ Status MemManager::EraseMemVector(const std::string& table_id) { return Status::OK(); } +size_t MemManager::GetCurrentMutableMem() { + size_t totalMem = 0; + for (auto& kv : mem_id_map_) { + auto memVector = kv.second; + totalMem += memVector->Size(); + } + return totalMem; +} + +size_t MemManager::GetCurrentImmutableMem() { + size_t totalMem = 0; + for (auto& memVector : immu_mem_list_) { + totalMem += memVector->Size(); + } + return totalMem; +} + +size_t MemManager::GetCurrentMem() { + return GetCurrentMutableMem() + GetCurrentImmutableMem(); +} } // namespace engine } // namespace milvus diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index 95303889db..e8460c7a6d 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -78,6 +78,12 @@ public: Status EraseMemVector(const std::string& table_id) override; + size_t GetCurrentMutableMem() override; + + size_t GetCurrentImmutableMem() override; + + size_t GetCurrentMem() override; + private: MemVectorsPtr GetMemByTable(const std::string& table_id); diff --git a/cpp/src/db/MemManagerAbstract.h b/cpp/src/db/MemManagerAbstract.h index 74222df1e8..58c73ba6f8 100644 --- a/cpp/src/db/MemManagerAbstract.h +++ b/cpp/src/db/MemManagerAbstract.h @@ -16,6 +16,12 @@ public: virtual Status EraseMemVector(const std::string& table_id) = 0; + virtual size_t GetCurrentMutableMem() = 0; + + virtual size_t GetCurrentImmutableMem() = 0; + + virtual size_t GetCurrentMem() = 0; + }; // MemManagerAbstract using MemManagerAbstractPtr = std::shared_ptr; diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index b282ad375a..ba3875fbb5 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -49,13 +49,15 @@ size_t MemTable::GetTableFileCount() { } Status MemTable::Serialize() { - for (auto& memTableFile : mem_table_file_list_) { - auto status = memTableFile->Serialize(); + for (auto memTableFile = mem_table_file_list_.begin(); memTableFile != mem_table_file_list_.end(); ) { + auto status = (*memTableFile)->Serialize(); if (!status.ok()) { std::string errMsg = "MemTable::Serialize failed: " + status.ToString(); ENGINE_LOG_ERROR << errMsg; return Status::Error(errMsg); } + std::lock_guard lock(mutex_); + memTableFile = mem_table_file_list_.erase(memTableFile); } return Status::OK(); } @@ -64,10 +66,19 @@ bool MemTable::Empty() { return mem_table_file_list_.empty(); } -std::string MemTable::GetTableId() { +const std::string& MemTable::GetTableId() const { return table_id_; } +size_t MemTable::GetCurrentMem() { + std::lock_guard lock(mutex_); + size_t totalMem = 0; + for (auto& memTableFile : mem_table_file_list_) { + totalMem += memTableFile->GetCurrentMem(); + } + return totalMem; +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index e09d6ddac1..9bae932e62 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -4,7 +4,7 @@ #include "MemTableFile.h" #include "VectorSource.h" -#include +#include namespace zilliz { namespace milvus { @@ -30,7 +30,9 @@ public: bool Empty(); - std::string GetTableId(); + const std::string& GetTableId() const; + + size_t GetCurrentMem(); private: const std::string table_id_; @@ -41,6 +43,8 @@ private: Options options_; + std::mutex mutex_; + }; //MemTable } // namespace engine diff --git a/cpp/src/db/NewMemManager.cpp b/cpp/src/db/NewMemManager.cpp index 19aba68eb7..3c78f37101 100644 --- a/cpp/src/db/NewMemManager.cpp +++ b/cpp/src/db/NewMemManager.cpp @@ -1,5 +1,9 @@ #include "NewMemManager.h" #include "VectorSource.h" +#include "Log.h" +#include "Constants.h" + +#include namespace zilliz { namespace milvus { @@ -20,6 +24,9 @@ Status NewMemManager::InsertVectors(const std::string& table_id_, const float* vectors_, IDNumbers& vector_ids_) { + while (GetCurrentMem() > options_.maximum_memory) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } std::unique_lock lock(mutex_); @@ -30,6 +37,10 @@ Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, size_t n, const float* vectors, IDNumbers& vector_ids) { + + LOG(DEBUG) << "NewMemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + MemTablePtr mem = GetMemByTable(table_id); VectorSource::Ptr source = std::make_shared(n, vectors); @@ -64,6 +75,12 @@ Status NewMemManager::Serialize(std::set& table_ids) { table_ids.insert(mem->GetTableId()); } immu_mem_list_.clear(); +// for (auto mem = immu_mem_list_.begin(); mem != immu_mem_list_.end(); ) { +// (*mem)->Serialize(); +// table_ids.insert((*mem)->GetTableId()); +// mem = immu_mem_list_.erase(mem); +// LOG(DEBUG) << "immu_mem_list_ size = " << immu_mem_list_.size(); +// } return Status::OK(); } @@ -87,6 +104,27 @@ Status NewMemManager::EraseMemVector(const std::string& table_id) { return Status::OK(); } +size_t NewMemManager::GetCurrentMutableMem() { + size_t totalMem = 0; + for (auto& kv : mem_id_map_) { + auto memTable = kv.second; + totalMem += memTable->GetCurrentMem(); + } + return totalMem; +} + +size_t NewMemManager::GetCurrentImmutableMem() { + size_t totalMem = 0; + for (auto& memTable : immu_mem_list_) { + totalMem += memTable->GetCurrentMem(); + } + return totalMem; +} + +size_t NewMemManager::GetCurrentMem() { + return GetCurrentMutableMem() + GetCurrentImmutableMem(); +} + } // namespace engine } // namespace milvus } // namespace zilliz \ No newline at end of file diff --git a/cpp/src/db/NewMemManager.h b/cpp/src/db/NewMemManager.h index a5f5a9ca13..9883480404 100644 --- a/cpp/src/db/NewMemManager.h +++ b/cpp/src/db/NewMemManager.h @@ -31,6 +31,12 @@ public: Status EraseMemVector(const std::string& table_id) override; + size_t GetCurrentMutableMem() override; + + size_t GetCurrentImmutableMem() override; + + size_t GetCurrentMem() override; + private: MemTablePtr GetMemByTable(const std::string& table_id); diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 39d0a15019..47bbb45bbc 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -61,6 +61,7 @@ struct Options { size_t index_trigger_size = ONE_GB; //unit: byte DBMetaOptions meta; int mode = MODE::SINGLE; + float maximum_memory = 4 * ONE_GB; }; // Options diff --git a/cpp/src/server/DBWrapper.cpp b/cpp/src/server/DBWrapper.cpp index fca15cb65a..bed4440d5e 100644 --- a/cpp/src/server/DBWrapper.cpp +++ b/cpp/src/server/DBWrapper.cpp @@ -23,6 +23,14 @@ DBWrapper::DBWrapper() { if(index_size > 0) {//ensure larger than zero, unit is MB opt.index_trigger_size = (size_t)index_size * engine::ONE_MB; } + float maximum_memory = config.GetFloatValue(CONFIG_MAXMIMUM_MEMORY); + if (maximum_memory > 1.0) { + opt.maximum_memory = maximum_memory * engine::ONE_GB; + } + else { + std::cout << "ERROR: maximum_memory should be at least 1 GB" << std::endl; + kill(0, SIGUSR1); + } ConfigNode& serverConfig = ServerConfig::GetInstance().GetConfig(CONFIG_SERVER); std::string mode = serverConfig.GetValue(CONFIG_CLUSTER_MODE, "single"); diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 0ec04eed8c..b3b95eb8b6 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -26,6 +26,7 @@ static const std::string CONFIG_DB_PATH = "db_path"; static const std::string CONFIG_DB_INDEX_TRIGGER_SIZE = "index_building_threshold"; static const std::string CONFIG_DB_ARCHIVE_DISK = "archive_disk_threshold"; static const std::string CONFIG_DB_ARCHIVE_DAYS = "archive_days_threshold"; +static const std::string CONFIG_MAXMIMUM_MEMORY = "maximum_memory"; static const std::string CONFIG_LOG = "log_config"; diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 915610adcc..818c3a6388 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -8,6 +8,8 @@ #include "db/Constants.h" #include "db/EngineFactory.h" #include "metrics/Metrics.h" +#include "db/MetaConsts.h" +#include "boost/filesystem.hpp" #include #include @@ -34,9 +36,6 @@ namespace { vectors.clear(); vectors.resize(n*TABLE_DIM); float* data = vectors.data(); -// std::random_device rd; -// std::mt19937 gen(rd()); -// std::uniform_real_distribution<> dis(0.0, 1.0); for(int i = 0; i < n; i++) { for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); data[TABLE_DIM * i] += i / 2000.; @@ -44,7 +43,7 @@ namespace { } } -TEST(MEM_TEST, VECTOR_SOURCE_TEST) { +TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); @@ -91,7 +90,7 @@ TEST(MEM_TEST, VECTOR_SOURCE_TEST) { ASSERT_TRUE(status.ok()); } -TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { +TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); auto options = engine::OptionsFactory::Build(); @@ -135,7 +134,7 @@ TEST(MEM_TEST, MEM_TABLE_FILE_TEST) { ASSERT_TRUE(status.ok()); } -TEST(MEM_TEST, MEM_TABLE_TEST) { +TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { std::shared_ptr impl_ = engine::DBMetaImplFactory::Build(); auto options = engine::OptionsFactory::Build(); @@ -201,7 +200,7 @@ TEST(MEM_TEST, MEM_TABLE_TEST) { ASSERT_TRUE(status.ok()); } -TEST(MEM_TEST, MEM_MANAGER_TEST) { +TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/milvus_test"; @@ -218,7 +217,6 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); std::map> search_vectors; -// std::map> vectors_ids_map; { engine::IDNumbers vector_ids; int64_t nb = 1024000; @@ -227,24 +225,13 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); ASSERT_TRUE(status.ok()); -// std::ofstream myfile("mem_test.txt"); -// for (int64_t i = 0; i < nb; ++i) { -// int64_t vector_id = vector_ids[i]; -// std::vector vectors; -// for (int64_t j = 0; j < TABLE_DIM; j++) { -// vectors.emplace_back(xb[i*TABLE_DIM + j]); -//// std::cout << xb[i*TABLE_DIM + j] << std::endl; -// } -// vectors_ids_map[vector_id] = vectors; -// } - std::this_thread::sleep_for(std::chrono::seconds(3)); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution dis(0, nb - 1); - int64_t numQuery = 1000; + int64_t numQuery = 20; for (int64_t i = 0; i < numQuery; ++i) { int64_t index = dis(gen); std::vector search; @@ -252,17 +239,7 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { search.push_back(xb[index * TABLE_DIM + j]); } search_vectors.insert(std::make_pair(vector_ids[index], search)); -// std::cout << "index: " << index << " vector_ids[index]: " << vector_ids[index] << std::endl; } - -// for (int64_t i = 0; i < nb; i += 100000) { -// std::vector search; -// for (int64_t j = 0; j < TABLE_DIM; j++) { -// search.push_back(xb[i * TABLE_DIM + j]); -// } -// search_vectors.insert(std::make_pair(vector_ids[i], search)); -// } - } int k = 10; @@ -270,26 +247,16 @@ TEST(MEM_TEST, MEM_MANAGER_TEST) { auto& search = pair.second; engine::QueryResults results; stat = db_->Query(TABLE_NAME, k, 1, search.data(), results); - for(int t = 0; t < k; t++) { -// std::cout << "ID=" << results[0][t].first << " DISTANCE=" << results[0][t].second << std::endl; - -// std::cout << vectors_ids_map[results[0][t].first].size() << std::endl; -// for (auto& data : vectors_ids_map[results[0][t].first]) { -// std::cout << data << " "; -// } -// std::cout << std::endl; - } - // std::cout << "results[0][0].first: " << results[0][0].first << " pair.first: " << pair.first << " results[0][0].second: " << results[0][0].second << std::endl; ASSERT_EQ(results[0][0].first, pair.first); ASSERT_LT(results[0][0].second, 0.00001); } - stat = db_->DropAll(); - ASSERT_TRUE(stat.ok()); + delete db_; + boost::filesystem::remove_all(options.meta.path); } -TEST(MEM_TEST, INSERT_TEST) { +TEST_F(NewMemManagerTest, INSERT_TEST) { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/milvus_test"; @@ -307,9 +274,9 @@ TEST(MEM_TEST, INSERT_TEST) { auto start_time = METRICS_NOW_TIME; - int insert_loop = 1000; + int insert_loop = 20; for (int i = 0; i < insert_loop; ++i) { - int64_t nb = 204800; + int64_t nb = 409600; std::vector xb; BuildVectors(nb, xb); engine::IDNumbers vector_ids; @@ -318,10 +285,91 @@ TEST(MEM_TEST, INSERT_TEST) { } auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - std::cout << "total_time(ms) : " << total_time << std::endl; + LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time; - stat = db_->DropAll(); - ASSERT_TRUE(stat.ok()); + delete db_; + boost::filesystem::remove_all(options.meta.path); } +TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + int64_t nb = 409600; + std::vector xb; + BuildVectors(nb, xb); + + int64_t qb = 5; + std::vector qxb; + BuildVectors(qb, qxb); + + std::thread search([&]() { + engine::QueryResults results; + int k = 10; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + INIT_TIMER; + std::stringstream ss; + uint64_t count = 0; + uint64_t prev_count = 0; + + for (auto j=0; j<10; ++j) { + ss.str(""); + db_->Size(count); + prev_count = count; + + START_TIMER; + stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); + ss << "Search " << j << " With Size " << count/engine::meta::M << " M"; + STOP_TIMER(ss.str()); + + ASSERT_STATS(stat); + for (auto k=0; k= prev_count); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + }); + + int loop = 20; + + for (auto i=0; iInsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); + ASSERT_EQ(target_ids.size(), qb); + } else { + db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + } + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + search.join(); + + delete db_; + boost::filesystem::remove_all(options.meta.path); + +}; + diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 70c0712549..ae05c59d3b 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -106,6 +106,18 @@ zilliz::milvus::engine::Options MySQLDBTest::GetOptions() { return options; } +void NewMemManagerTest::InitLog() { + el::Configurations defaultConf; + defaultConf.setToDefault(); + defaultConf.set(el::Level::Debug, + el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)"); + el::Loggers::reconfigureLogger("default", defaultConf); +} + +void NewMemManagerTest::SetUp() { + InitLog(); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); if (argc > 1) { diff --git a/cpp/unittest/db/utils.h b/cpp/unittest/db/utils.h index 361c24b4be..d06500de5c 100644 --- a/cpp/unittest/db/utils.h +++ b/cpp/unittest/db/utils.h @@ -87,3 +87,8 @@ class MySQLDBTest : public ::testing::Test { protected: zilliz::milvus::engine::Options GetOptions(); }; + +class NewMemManagerTest : public ::testing::Test { + void InitLog(); + virtual void SetUp() override; +}; From 7ddd982da37e1936670aca63324db982250b93f0 Mon Sep 17 00:00:00 2001 From: zhiru Date: Mon, 8 Jul 2019 15:07:03 +0800 Subject: [PATCH 47/91] update Former-commit-id: b21bbf134cf07237466575a2ae7afc4420eb33fe --- cpp/src/db/Constants.h | 6 +- cpp/src/db/Factories.cpp | 19 +++--- cpp/src/db/Factories.h | 9 +-- cpp/src/db/MemManager.cpp | 83 ++++++++++++++------------ cpp/src/db/MemManager.h | 43 +++++++------- cpp/src/db/MemManagerAbstract.h | 11 ++-- cpp/src/db/MemTable.cpp | 66 +++++++++++---------- cpp/src/db/MemTable.h | 13 ++-- cpp/src/db/MemTableFile.cpp | 56 +++++++++--------- cpp/src/db/MemTableFile.h | 9 +-- cpp/src/db/NewMemManager.cpp | 63 ++++++++++---------- cpp/src/db/NewMemManager.h | 23 ++++---- cpp/src/db/VectorSource.cpp | 18 +++--- cpp/src/db/VectorSource.h | 19 +++--- cpp/unittest/db/mem_test.cpp | 101 ++++++++++++++++---------------- cpp/unittest/db/utils.h | 20 +++---- 16 files changed, 284 insertions(+), 275 deletions(-) diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h index 1ba02b1d55..055b10ca9a 100644 --- a/cpp/src/db/Constants.h +++ b/cpp/src/db/Constants.h @@ -10,9 +10,9 @@ namespace milvus { namespace engine { const size_t K = 1024UL; -const size_t M = K*K; -const size_t G = K*M; -const size_t T = K*G; +const size_t M = K * K; +const size_t G = K * M; +const size_t T = K * G; const size_t MAX_TABLE_FILE_MEM = 128 * M; diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index d51727cbff..65c7484a50 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -22,6 +22,8 @@ namespace zilliz { namespace milvus { namespace engine { +#define USE_NEW_MEM_MANAGER 1 + DBMetaOptions DBMetaOptionsFactory::Build(const std::string& path) { auto p = path; if(p == "") { @@ -74,17 +76,14 @@ std::shared_ptr DBMetaImplFactory::Build(const DBMetaOptions& metaOp if (dialect.find("mysql") != std::string::npos) { ENGINE_LOG_INFO << "Using MySQL"; return std::make_shared(meta::MySQLMetaImpl(metaOptions, mode)); - } - else if (dialect.find("sqlite") != std::string::npos) { + } else if (dialect.find("sqlite") != std::string::npos) { ENGINE_LOG_INFO << "Using SQLite"; return std::make_shared(meta::DBMetaImpl(metaOptions)); - } - else { + } else { ENGINE_LOG_ERROR << "Invalid dialect in URI: dialect = " << dialect; throw InvalidArgumentException("URI dialect is not mysql / sqlite"); } - } - else { + } else { ENGINE_LOG_ERROR << "Wrong URI format: URI = " << uri; throw InvalidArgumentException("Wrong URI format "); } @@ -102,11 +101,11 @@ DB* DBFactory::Build(const Options& options) { MemManagerAbstractPtr MemManagerFactory::Build(const std::shared_ptr& meta, const Options& options) { - bool useNew = true; - if (useNew) { - return std::make_shared(meta, options); - } +#ifdef USE_NEW_MEM_MANAGER + return std::make_shared(meta, options); +#else return std::make_shared(meta, options); +#endif } } // namespace engine diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 567bc0a8bc..8b6e7b100f 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -15,12 +15,13 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { struct DBMetaOptionsFactory { - static DBMetaOptions Build(const std::string& path = ""); + static DBMetaOptions Build(const std::string &path = ""); }; struct OptionsFactory { @@ -29,16 +30,16 @@ struct OptionsFactory { struct DBMetaImplFactory { static std::shared_ptr Build(); - static std::shared_ptr Build(const DBMetaOptions& metaOptions, const int& mode); + static std::shared_ptr Build(const DBMetaOptions &metaOptions, const int &mode); }; struct DBFactory { static std::shared_ptr Build(); - static DB* Build(const Options&); + static DB *Build(const Options &); }; struct MemManagerFactory { - static MemManagerAbstractPtr Build(const std::shared_ptr& meta, const Options& options); + static MemManagerAbstractPtr Build(const std::shared_ptr &meta, const Options &options); }; } // namespace engine diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index ba8517cdbd..dbf0703173 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -15,22 +15,23 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { -MemVectors::MemVectors(const std::shared_ptr& meta_ptr, - const meta::TableFileSchema& schema, const Options& options) - : meta_(meta_ptr), - options_(options), - schema_(schema), - id_generator_(new SimpleIDGenerator()), - active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType)schema_.engine_type_)) { +MemVectors::MemVectors(const std::shared_ptr &meta_ptr, + const meta::TableFileSchema &schema, const Options &options) + : meta_(meta_ptr), + options_(options), + schema_(schema), + id_generator_(new SimpleIDGenerator()), + active_engine_(EngineFactory::Build(schema_.dimension_, schema_.location_, (EngineType) schema_.engine_type_)) { } -Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { - if(active_engine_ == nullptr) { +Status MemVectors::Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_) { + if (active_engine_ == nullptr) { return Status::Error("index engine is null"); } @@ -39,13 +40,15 @@ Status MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) Status status = active_engine_->AddWithIds(n_, vectors_, vector_ids_.data()); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(schema_.dimension_), total_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), + static_cast(schema_.dimension_), + total_time); return status; } size_t MemVectors::RowCount() const { - if(active_engine_ == nullptr) { + if (active_engine_ == nullptr) { return 0; } @@ -53,15 +56,15 @@ size_t MemVectors::RowCount() const { } size_t MemVectors::Size() const { - if(active_engine_ == nullptr) { + if (active_engine_ == nullptr) { return 0; } return active_engine_->Size(); } -Status MemVectors::Serialize(std::string& table_id) { - if(active_engine_ == nullptr) { +Status MemVectors::Serialize(std::string &table_id) { + if (active_engine_ == nullptr) { return Status::Error("index engine is null"); } @@ -73,15 +76,16 @@ Status MemVectors::Serialize(std::string& table_id) { auto total_time = METRICS_MICROSECONDS(start_time, end_time); schema_.size_ = size; - server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size/total_time); + server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size / total_time); schema_.file_type_ = (size >= options_.index_trigger_size) ? - meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; + meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; auto status = meta_->UpdateTableFile(schema_); LOG(DEBUG) << "New " << ((schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index") - << " file " << schema_.file_id_ << " of size " << (double)(active_engine_->Size()) / (double)meta::M << " M"; + << " file " << schema_.file_id_ << " of size " << (double) (active_engine_->Size()) / (double) meta::M + << " M"; active_engine_->Cache(); @@ -99,7 +103,7 @@ MemVectors::~MemVectors() { * MemManager */ MemManager::MemVectorsPtr MemManager::GetMemByTable( - const std::string& table_id) { + const std::string &table_id) { auto memIt = mem_id_map_.find(table_id); if (memIt != mem_id_map_.end()) { return memIt->second; @@ -116,22 +120,23 @@ MemManager::MemVectorsPtr MemManager::GetMemByTable( return mem_id_map_[table_id]; } -Status MemManager::InsertVectors(const std::string& table_id_, - size_t n_, - const float* vectors_, - IDNumbers& vector_ids_) { +Status MemManager::InsertVectors(const std::string &table_id_, + size_t n_, + const float *vectors_, + IDNumbers &vector_ids_) { + + LOG(DEBUG) << "MemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + std::unique_lock lock(mutex_); return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); } -Status MemManager::InsertVectorsNoLock(const std::string& table_id, - size_t n, - const float* vectors, - IDNumbers& vector_ids) { - - LOG(DEBUG) << "MemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << - ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); +Status MemManager::InsertVectorsNoLock(const std::string &table_id, + size_t n, + const float *vectors, + IDNumbers &vector_ids) { MemVectorsPtr mem = GetMemByTable(table_id); if (mem == nullptr) { @@ -139,7 +144,7 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, } //makesure each file size less than index_trigger_size - if(mem->Size() > options_.index_trigger_size) { + if (mem->Size() > options_.index_trigger_size) { std::unique_lock lock(serialization_mtx_); immu_mem_list_.push_back(mem); mem_id_map_.erase(table_id); @@ -152,8 +157,8 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id, Status MemManager::ToImmutable() { std::unique_lock lock(mutex_); MemIdMap temp_map; - for (auto& kv: mem_id_map_) { - if(kv.second->RowCount() == 0) { + for (auto &kv: mem_id_map_) { + if (kv.second->RowCount() == 0) { temp_map.insert(kv); continue;//empty vector, no need to serialize } @@ -164,12 +169,12 @@ Status MemManager::ToImmutable() { return Status::OK(); } -Status MemManager::Serialize(std::set& table_ids) { +Status MemManager::Serialize(std::set &table_ids) { ToImmutable(); std::unique_lock lock(serialization_mtx_); std::string table_id; table_ids.clear(); - for (auto& mem : immu_mem_list_) { + for (auto &mem : immu_mem_list_) { mem->Serialize(table_id); table_ids.insert(table_id); } @@ -177,7 +182,7 @@ Status MemManager::Serialize(std::set& table_ids) { return Status::OK(); } -Status MemManager::EraseMemVector(const std::string& table_id) { +Status MemManager::EraseMemVector(const std::string &table_id) { {//erase MemVector from rapid-insert cache std::unique_lock lock(mutex_); mem_id_map_.erase(table_id); @@ -186,8 +191,8 @@ Status MemManager::EraseMemVector(const std::string& table_id) { {//erase MemVector from serialize cache std::unique_lock lock(serialization_mtx_); MemList temp_list; - for (auto& mem : immu_mem_list_) { - if(mem->TableId() != table_id) { + for (auto &mem : immu_mem_list_) { + if (mem->TableId() != table_id) { temp_list.push_back(mem); } } @@ -199,7 +204,7 @@ Status MemManager::EraseMemVector(const std::string& table_id) { size_t MemManager::GetCurrentMutableMem() { size_t totalMem = 0; - for (auto& kv : mem_id_map_) { + for (auto &kv : mem_id_map_) { auto memVector = kv.second; totalMem += memVector->Size(); } @@ -208,7 +213,7 @@ size_t MemManager::GetCurrentMutableMem() { size_t MemManager::GetCurrentImmutableMem() { size_t totalMem = 0; - for (auto& memVector : immu_mem_list_) { + for (auto &memVector : immu_mem_list_) { totalMem += memVector->Size(); } return totalMem; diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index e8460c7a6d..5ad3d08b63 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -17,45 +17,46 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { namespace meta { - class Meta; +class Meta; } class MemVectors { -public: + public: using MetaPtr = meta::Meta::Ptr; using Ptr = std::shared_ptr; - explicit MemVectors(const std::shared_ptr&, - const meta::TableFileSchema&, const Options&); + explicit MemVectors(const std::shared_ptr &, + const meta::TableFileSchema &, const Options &); - Status Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_); + Status Add(size_t n_, const float *vectors_, IDNumbers &vector_ids_); size_t RowCount() const; size_t Size() const; - Status Serialize(std::string& table_id); + Status Serialize(std::string &table_id); ~MemVectors(); - const std::string& Location() const { return schema_.location_; } + const std::string &Location() const { return schema_.location_; } std::string TableId() const { return schema_.table_id_; } -private: + private: MemVectors() = delete; - MemVectors(const MemVectors&) = delete; - MemVectors& operator=(const MemVectors&) = delete; + MemVectors(const MemVectors &) = delete; + MemVectors &operator=(const MemVectors &) = delete; MetaPtr meta_; Options options_; meta::TableFileSchema schema_; - IDGenerator* id_generator_; + IDGenerator *id_generator_; ExecutionEnginePtr active_engine_; }; // MemVectors @@ -63,20 +64,20 @@ private: class MemManager : public MemManagerAbstract { -public: + public: using MetaPtr = meta::Meta::Ptr; using MemVectorsPtr = typename MemVectors::Ptr; using Ptr = std::shared_ptr; - MemManager(const std::shared_ptr& meta, const Options& options) + MemManager(const std::shared_ptr &meta, const Options &options) : meta_(meta), options_(options) {} - Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids) override; + Status InsertVectors(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids) override; - Status Serialize(std::set& table_ids) override; + Status Serialize(std::set &table_ids) override; - Status EraseMemVector(const std::string& table_id) override; + Status EraseMemVector(const std::string &table_id) override; size_t GetCurrentMutableMem() override; @@ -84,11 +85,11 @@ public: size_t GetCurrentMem() override; -private: - MemVectorsPtr GetMemByTable(const std::string& table_id); + private: + MemVectorsPtr GetMemByTable(const std::string &table_id); - Status InsertVectorsNoLock(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids); + Status InsertVectorsNoLock(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids); Status ToImmutable(); using MemIdMap = std::map; diff --git a/cpp/src/db/MemManagerAbstract.h b/cpp/src/db/MemManagerAbstract.h index 58c73ba6f8..943c454e46 100644 --- a/cpp/src/db/MemManagerAbstract.h +++ b/cpp/src/db/MemManagerAbstract.h @@ -2,19 +2,20 @@ #include + namespace zilliz { namespace milvus { namespace engine { class MemManagerAbstract { -public: + public: - virtual Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids) = 0; + virtual Status InsertVectors(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids) = 0; - virtual Status Serialize(std::set& table_ids) = 0; + virtual Status Serialize(std::set &table_ids) = 0; - virtual Status EraseMemVector(const std::string& table_id) = 0; + virtual Status EraseMemVector(const std::string &table_id) = 0; virtual size_t GetCurrentMutableMem() = 0; diff --git a/cpp/src/db/MemTable.cpp b/cpp/src/db/MemTable.cpp index ba3875fbb5..e05aa058ac 100644 --- a/cpp/src/db/MemTable.cpp +++ b/cpp/src/db/MemTable.cpp @@ -1,46 +1,50 @@ #include "MemTable.h" #include "Log.h" + namespace zilliz { namespace milvus { namespace engine { -MemTable::MemTable(const std::string& table_id, - const std::shared_ptr& meta, - const Options& options) : - table_id_(table_id), - meta_(meta), - options_(options) { +MemTable::MemTable(const std::string &table_id, + const std::shared_ptr &meta, + const Options &options) : + table_id_(table_id), + meta_(meta), + options_(options) { } -Status MemTable::Add(VectorSource::Ptr& source) { +Status MemTable::Add(VectorSource::Ptr &source) { + while (!source->AllAdded()) { - MemTableFile::Ptr currentMemTableFile; + + MemTableFile::Ptr current_mem_table_file; if (!mem_table_file_list_.empty()) { - currentMemTableFile = mem_table_file_list_.back(); + current_mem_table_file = mem_table_file_list_.back(); } + Status status; - if (mem_table_file_list_.empty() || currentMemTableFile->IsFull()) { - MemTableFile::Ptr newMemTableFile = std::make_shared(table_id_, meta_, options_); - status = newMemTableFile->Add(source); + if (mem_table_file_list_.empty() || current_mem_table_file->IsFull()) { + MemTableFile::Ptr new_mem_table_file = std::make_shared(table_id_, meta_, options_); + status = new_mem_table_file->Add(source); if (status.ok()) { - mem_table_file_list_.emplace_back(newMemTableFile); + mem_table_file_list_.emplace_back(new_mem_table_file); } + } else { + status = current_mem_table_file->Add(source); } - else { - status = currentMemTableFile->Add(source); - } + if (!status.ok()) { - std::string errMsg = "MemTable::Add failed: " + status.ToString(); - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); + std::string err_msg = "MemTable::Add failed: " + status.ToString(); + ENGINE_LOG_ERROR << err_msg; + return Status::Error(err_msg); } } return Status::OK(); } -void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file) { +void MemTable::GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file) { mem_table_file = mem_table_file_list_.back(); } @@ -49,15 +53,15 @@ size_t MemTable::GetTableFileCount() { } Status MemTable::Serialize() { - for (auto memTableFile = mem_table_file_list_.begin(); memTableFile != mem_table_file_list_.end(); ) { - auto status = (*memTableFile)->Serialize(); + for (auto mem_table_file = mem_table_file_list_.begin(); mem_table_file != mem_table_file_list_.end();) { + auto status = (*mem_table_file)->Serialize(); if (!status.ok()) { - std::string errMsg = "MemTable::Serialize failed: " + status.ToString(); - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); + std::string err_msg = "MemTable::Serialize failed: " + status.ToString(); + ENGINE_LOG_ERROR << err_msg; + return Status::Error(err_msg); } std::lock_guard lock(mutex_); - memTableFile = mem_table_file_list_.erase(memTableFile); + mem_table_file = mem_table_file_list_.erase(mem_table_file); } return Status::OK(); } @@ -66,17 +70,17 @@ bool MemTable::Empty() { return mem_table_file_list_.empty(); } -const std::string& MemTable::GetTableId() const { +const std::string &MemTable::GetTableId() const { return table_id_; } size_t MemTable::GetCurrentMem() { std::lock_guard lock(mutex_); - size_t totalMem = 0; - for (auto& memTableFile : mem_table_file_list_) { - totalMem += memTableFile->GetCurrentMem(); + size_t total_mem = 0; + for (auto &mem_table_file : mem_table_file_list_) { + total_mem += mem_table_file->GetCurrentMem(); } - return totalMem; + return total_mem; } } // namespace engine diff --git a/cpp/src/db/MemTable.h b/cpp/src/db/MemTable.h index 9bae932e62..198fcc228a 100644 --- a/cpp/src/db/MemTable.h +++ b/cpp/src/db/MemTable.h @@ -6,23 +6,24 @@ #include + namespace zilliz { namespace milvus { namespace engine { class MemTable { -public: + public: using Ptr = std::shared_ptr; using MemTableFileList = std::vector; using MetaPtr = meta::Meta::Ptr; - MemTable(const std::string& table_id, const std::shared_ptr& meta, const Options& options); + MemTable(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(VectorSource::Ptr& source); + Status Add(VectorSource::Ptr &source); - void GetCurrentMemTableFile(MemTableFile::Ptr& mem_table_file); + void GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file); size_t GetTableFileCount(); @@ -30,11 +31,11 @@ public: bool Empty(); - const std::string& GetTableId() const; + const std::string &GetTableId() const; size_t GetCurrentMem(); -private: + private: const std::string table_id_; MemTableFileList mem_table_file_list_; diff --git a/cpp/src/db/MemTableFile.cpp b/cpp/src/db/MemTableFile.cpp index 0ff91de00b..649a680cf3 100644 --- a/cpp/src/db/MemTableFile.cpp +++ b/cpp/src/db/MemTableFile.cpp @@ -6,23 +6,24 @@ #include + namespace zilliz { namespace milvus { namespace engine { -MemTableFile::MemTableFile(const std::string& table_id, - const std::shared_ptr& meta, - const Options& options) : - table_id_(table_id), - meta_(meta), - options_(options) { +MemTableFile::MemTableFile(const std::string &table_id, + const std::shared_ptr &meta, + const Options &options) : + table_id_(table_id), + meta_(meta), + options_(options) { current_mem_ = 0; auto status = CreateTableFile(); if (status.ok()) { execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_, table_file_schema_.location_, - (EngineType)table_file_schema_.engine_type_); + (EngineType) table_file_schema_.engine_type_); } } @@ -33,31 +34,30 @@ Status MemTableFile::CreateTableFile() { auto status = meta_->CreateTableFile(table_file_schema); if (status.ok()) { table_file_schema_ = table_file_schema; - } - else { - std::string errMsg = "MemTableFile::CreateTableFile failed: " + status.ToString(); - ENGINE_LOG_ERROR << errMsg; + } else { + std::string err_msg = "MemTableFile::CreateTableFile failed: " + status.ToString(); + ENGINE_LOG_ERROR << err_msg; } return status; } -Status MemTableFile::Add(const VectorSource::Ptr& source) { +Status MemTableFile::Add(const VectorSource::Ptr &source) { if (table_file_schema_.dimension_ <= 0) { - std::string errMsg = "MemTableFile::Add: table_file_schema dimension = " + - std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_; - ENGINE_LOG_ERROR << errMsg; - return Status::Error(errMsg); + std::string err_msg = "MemTableFile::Add: table_file_schema dimension = " + + std::to_string(table_file_schema_.dimension_) + ", table_id = " + table_file_schema_.table_id_; + ENGINE_LOG_ERROR << err_msg; + return Status::Error(err_msg); } - size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; - size_t memLeft = GetMemLeft(); - if (memLeft >= singleVectorMemSize) { - size_t numVectorsToAdd = std::ceil(memLeft / singleVectorMemSize); - size_t numVectorsAdded; - auto status = source->Add(execution_engine_, table_file_schema_, numVectorsToAdd, numVectorsAdded); + size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + size_t mem_left = GetMemLeft(); + if (mem_left >= single_vector_mem_size) { + size_t num_vectors_to_add = std::ceil(mem_left / single_vector_mem_size); + size_t num_vectors_added; + auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added); if (status.ok()) { - current_mem_ += (numVectorsAdded * singleVectorMemSize); + current_mem_ += (num_vectors_added * single_vector_mem_size); } return status; } @@ -73,8 +73,8 @@ size_t MemTableFile::GetMemLeft() { } bool MemTableFile::IsFull() { - size_t singleVectorMemSize = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; - return (GetMemLeft() < singleVectorMemSize); + size_t single_vector_mem_size = table_file_schema_.dimension_ * VECTOR_TYPE_SIZE; + return (GetMemLeft() < single_vector_mem_size); } Status MemTableFile::Serialize() { @@ -88,15 +88,15 @@ Status MemTableFile::Serialize() { auto total_time = METRICS_MICROSECONDS(start_time, end_time); table_file_schema_.size_ = size; - server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double)size/total_time); + server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time); table_file_schema_.file_type_ = (size >= options_.index_trigger_size) ? - meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; + meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; auto status = meta_->UpdateTableFile(table_file_schema_); LOG(DEBUG) << "New " << ((table_file_schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index") - << " file " << table_file_schema_.file_id_ << " of size " << (double)size / (double)M << " M"; + << " file " << table_file_schema_.file_id_ << " of size " << (double) size / (double) M << " M"; execution_engine_->Cache(); diff --git a/cpp/src/db/MemTableFile.h b/cpp/src/db/MemTableFile.h index 1be0ae78ba..4d0011b362 100644 --- a/cpp/src/db/MemTableFile.h +++ b/cpp/src/db/MemTableFile.h @@ -5,20 +5,21 @@ #include "VectorSource.h" #include "ExecutionEngine.h" + namespace zilliz { namespace milvus { namespace engine { class MemTableFile { -public: + public: using Ptr = std::shared_ptr; using MetaPtr = meta::Meta::Ptr; - MemTableFile(const std::string& table_id, const std::shared_ptr& meta, const Options& options); + MemTableFile(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(const VectorSource::Ptr& source); + Status Add(const VectorSource::Ptr &source); size_t GetCurrentMem(); @@ -28,7 +29,7 @@ public: Status Serialize(); -private: + private: Status CreateTableFile(); diff --git a/cpp/src/db/NewMemManager.cpp b/cpp/src/db/NewMemManager.cpp index 3c78f37101..b0fcc9d4ae 100644 --- a/cpp/src/db/NewMemManager.cpp +++ b/cpp/src/db/NewMemManager.cpp @@ -5,11 +5,12 @@ #include + namespace zilliz { namespace milvus { namespace engine { -NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string& table_id) { +NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string &table_id) { auto memIt = mem_id_map_.find(table_id); if (memIt != mem_id_map_.end()) { return memIt->second; @@ -19,27 +20,27 @@ NewMemManager::MemTablePtr NewMemManager::GetMemByTable(const std::string& table return mem_id_map_[table_id]; } -Status NewMemManager::InsertVectors(const std::string& table_id_, +Status NewMemManager::InsertVectors(const std::string &table_id_, size_t n_, - const float* vectors_, - IDNumbers& vector_ids_) { + const float *vectors_, + IDNumbers &vector_ids_) { while (GetCurrentMem() > options_.maximum_memory) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + LOG(DEBUG) << "NewMemManager::InsertVectors: mutable mem = " << GetCurrentMutableMem() << + ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + std::unique_lock lock(mutex_); return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); } -Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, +Status NewMemManager::InsertVectorsNoLock(const std::string &table_id, size_t n, - const float* vectors, - IDNumbers& vector_ids) { - - LOG(DEBUG) << "NewMemManager::InsertVectorsNoLock: mutable mem = " << GetCurrentMutableMem() << - ", immutable mem = " << GetCurrentImmutableMem() << ", total mem = " << GetCurrentMem(); + const float *vectors, + IDNumbers &vector_ids) { MemTablePtr mem = GetMemByTable(table_id); VectorSource::Ptr source = std::make_shared(n, vectors); @@ -54,37 +55,33 @@ Status NewMemManager::InsertVectorsNoLock(const std::string& table_id, Status NewMemManager::ToImmutable() { std::unique_lock lock(mutex_); MemIdMap temp_map; - for (auto& kv: mem_id_map_) { - if(kv.second->Empty()) { + for (auto &kv: mem_id_map_) { + if (kv.second->Empty()) { + //empty table, no need to serialize temp_map.insert(kv); - continue;//empty table, no need to serialize + } else { + immu_mem_list_.push_back(kv.second); } - immu_mem_list_.push_back(kv.second); } mem_id_map_.swap(temp_map); return Status::OK(); } -Status NewMemManager::Serialize(std::set& table_ids) { +Status NewMemManager::Serialize(std::set &table_ids) { ToImmutable(); std::unique_lock lock(serialization_mtx_); table_ids.clear(); - for (auto& mem : immu_mem_list_) { + for (auto &mem : immu_mem_list_) { mem->Serialize(); table_ids.insert(mem->GetTableId()); } immu_mem_list_.clear(); -// for (auto mem = immu_mem_list_.begin(); mem != immu_mem_list_.end(); ) { -// (*mem)->Serialize(); -// table_ids.insert((*mem)->GetTableId()); -// mem = immu_mem_list_.erase(mem); -// LOG(DEBUG) << "immu_mem_list_ size = " << immu_mem_list_.size(); -// } + return Status::OK(); } -Status NewMemManager::EraseMemVector(const std::string& table_id) { +Status NewMemManager::EraseMemVector(const std::string &table_id) { {//erase MemVector from rapid-insert cache std::unique_lock lock(mutex_); mem_id_map_.erase(table_id); @@ -93,8 +90,8 @@ Status NewMemManager::EraseMemVector(const std::string& table_id) { {//erase MemVector from serialize cache std::unique_lock lock(serialization_mtx_); MemList temp_list; - for (auto& mem : immu_mem_list_) { - if(mem->GetTableId() != table_id) { + for (auto &mem : immu_mem_list_) { + if (mem->GetTableId() != table_id) { temp_list.push_back(mem); } } @@ -105,20 +102,20 @@ Status NewMemManager::EraseMemVector(const std::string& table_id) { } size_t NewMemManager::GetCurrentMutableMem() { - size_t totalMem = 0; - for (auto& kv : mem_id_map_) { + size_t total_mem = 0; + for (auto &kv : mem_id_map_) { auto memTable = kv.second; - totalMem += memTable->GetCurrentMem(); + total_mem += memTable->GetCurrentMem(); } - return totalMem; + return total_mem; } size_t NewMemManager::GetCurrentImmutableMem() { - size_t totalMem = 0; - for (auto& memTable : immu_mem_list_) { - totalMem += memTable->GetCurrentMem(); + size_t total_mem = 0; + for (auto &mem_table : immu_mem_list_) { + total_mem += mem_table->GetCurrentMem(); } - return totalMem; + return total_mem; } size_t NewMemManager::GetCurrentMem() { diff --git a/cpp/src/db/NewMemManager.h b/cpp/src/db/NewMemManager.h index 9883480404..5b933c94ca 100644 --- a/cpp/src/db/NewMemManager.h +++ b/cpp/src/db/NewMemManager.h @@ -11,25 +11,26 @@ #include #include + namespace zilliz { namespace milvus { namespace engine { class NewMemManager : public MemManagerAbstract { -public: + public: using MetaPtr = meta::Meta::Ptr; using Ptr = std::shared_ptr; using MemTablePtr = typename MemTable::Ptr; - NewMemManager(const std::shared_ptr& meta, const Options& options) - : meta_(meta), options_(options) {} + NewMemManager(const std::shared_ptr &meta, const Options &options) + : meta_(meta), options_(options) {} - Status InsertVectors(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids) override; + Status InsertVectors(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids) override; - Status Serialize(std::set& table_ids) override; + Status Serialize(std::set &table_ids) override; - Status EraseMemVector(const std::string& table_id) override; + Status EraseMemVector(const std::string &table_id) override; size_t GetCurrentMutableMem() override; @@ -37,11 +38,11 @@ public: size_t GetCurrentMem() override; -private: - MemTablePtr GetMemByTable(const std::string& table_id); + private: + MemTablePtr GetMemByTable(const std::string &table_id); - Status InsertVectorsNoLock(const std::string& table_id, - size_t n, const float* vectors, IDNumbers& vector_ids); + Status InsertVectorsNoLock(const std::string &table_id, + size_t n, const float *vectors, IDNumbers &vector_ids); Status ToImmutable(); using MemIdMap = std::map; diff --git a/cpp/src/db/VectorSource.cpp b/cpp/src/db/VectorSource.cpp index d032be51f6..74c07ae1f6 100644 --- a/cpp/src/db/VectorSource.cpp +++ b/cpp/src/db/VectorSource.cpp @@ -4,6 +4,7 @@ #include "Log.h" #include "metrics/Metrics.h" + namespace zilliz { namespace milvus { namespace engine { @@ -11,16 +12,16 @@ namespace engine { VectorSource::VectorSource(const size_t &n, const float *vectors) : - n_(n), - vectors_(vectors), - id_generator_(new SimpleIDGenerator()) { + n_(n), + vectors_(vectors), + id_generator_(new SimpleIDGenerator()) { current_num_vectors_added = 0; } -Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, - const meta::TableFileSchema& table_file_schema, - const size_t& num_vectors_to_add, - size_t& num_vectors_added) { +Status VectorSource::Add(const ExecutionEnginePtr &execution_engine, + const meta::TableFileSchema &table_file_schema, + const size_t &num_vectors_to_add, + size_t &num_vectors_added) { auto start_time = METRICS_NOW_TIME; @@ -36,8 +37,7 @@ Status VectorSource::Add(const ExecutionEnginePtr& execution_engine, vector_ids_.insert(vector_ids_.end(), std::make_move_iterator(vector_ids_to_add.begin()), std::make_move_iterator(vector_ids_to_add.end())); - } - else { + } else { ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); } diff --git a/cpp/src/db/VectorSource.h b/cpp/src/db/VectorSource.h index dec31f39e1..7092805a6d 100644 --- a/cpp/src/db/VectorSource.h +++ b/cpp/src/db/VectorSource.h @@ -5,22 +5,23 @@ #include "IDGenerator.h" #include "ExecutionEngine.h" + namespace zilliz { namespace milvus { namespace engine { class VectorSource { -public: + public: using Ptr = std::shared_ptr; - VectorSource(const size_t& n, const float* vectors); + VectorSource(const size_t &n, const float *vectors); - Status Add(const ExecutionEnginePtr& execution_engine, - const meta::TableFileSchema& table_file_schema, - const size_t& num_vectors_to_add, - size_t& num_vectors_added); + Status Add(const ExecutionEnginePtr &execution_engine, + const meta::TableFileSchema &table_file_schema, + const size_t &num_vectors_to_add, + size_t &num_vectors_added); size_t GetNumVectorsAdded(); @@ -28,15 +29,15 @@ public: IDNumbers GetVectorIds(); -private: + private: const size_t n_; - const float* vectors_; + const float *vectors_; IDNumbers vector_ids_; size_t current_num_vectors_added; - IDGenerator* id_generator_; + IDGenerator *id_generator_; }; //VectorSource diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 818c3a6388..5b7972ec35 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -15,33 +15,34 @@ #include #include + using namespace zilliz::milvus; namespace { - static const std::string TABLE_NAME = "test_group"; - static constexpr int64_t TABLE_DIM = 256; - static constexpr int64_t VECTOR_COUNT = 250000; - static constexpr int64_t INSERT_LOOP = 10000; +static const std::string TABLE_NAME = "test_group"; +static constexpr int64_t TABLE_DIM = 256; +static constexpr int64_t VECTOR_COUNT = 250000; +static constexpr int64_t INSERT_LOOP = 10000; - engine::meta::TableSchema BuildTableSchema() { - engine::meta::TableSchema table_info; - table_info.dimension_ = TABLE_DIM; - table_info.table_id_ = TABLE_NAME; - table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP; - return table_info; - } +engine::meta::TableSchema BuildTableSchema() { + engine::meta::TableSchema table_info; + table_info.dimension_ = TABLE_DIM; + table_info.table_id_ = TABLE_NAME; + table_info.engine_type_ = (int) engine::EngineType::FAISS_IDMAP; + return table_info; +} - void BuildVectors(int64_t n, std::vector& vectors) { - vectors.clear(); - vectors.resize(n*TABLE_DIM); - float* data = vectors.data(); - for(int i = 0; i < n; i++) { - for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); - data[TABLE_DIM * i] += i / 2000.; - } +void BuildVectors(int64_t n, std::vector &vectors) { + vectors.clear(); + vectors.resize(n * TABLE_DIM); + float *data = vectors.data(); + for (int i = 0; i < n; i++) { + for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48(); + data[TABLE_DIM * i] += i / 2000.; } } +} TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { @@ -65,7 +66,7 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { size_t num_vectors_added; engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_, table_file_schema.location_, - (engine::EngineType)table_file_schema.engine_type_); + (engine::EngineType) table_file_schema.engine_type_); status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added); ASSERT_TRUE(status.ok()); @@ -82,10 +83,6 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); -// for (auto& id : vector_ids) { -// std::cout << id << std::endl; -// } - status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -99,7 +96,7 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { auto status = impl_->CreateTable(table_schema); ASSERT_TRUE(status.ok()); - engine::MemTableFile memTableFile(TABLE_NAME, impl_, options); + engine::MemTableFile mem_table_file(TABLE_NAME, impl_, options); int64_t n_100 = 100; std::vector vectors_100; @@ -107,28 +104,28 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { engine::VectorSource::Ptr source = std::make_shared(n_100, vectors_100.data()); - status = memTableFile.Add(source); + status = mem_table_file.Add(source); ASSERT_TRUE(status.ok()); -// std::cout << memTableFile.GetCurrentMem() << " " << memTableFile.GetMemLeft() << std::endl; +// std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl; engine::IDNumbers vector_ids = source->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); size_t singleVectorMem = sizeof(float) * TABLE_DIM; - ASSERT_EQ(memTableFile.GetCurrentMem(), n_100 * singleVectorMem); + ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem); int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; std::vector vectors_128M; BuildVectors(n_max, vectors_128M); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = memTableFile.Add(source_128M); + status = mem_table_file.Add(source_128M); vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max - n_100); - ASSERT_TRUE(memTableFile.IsFull()); + ASSERT_TRUE(mem_table_file.IsFull()); status = impl_->DropAll(); ASSERT_TRUE(status.ok()); @@ -149,34 +146,34 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_100 = std::make_shared(n_100, vectors_100.data()); - engine::MemTable memTable(TABLE_NAME, impl_, options); + engine::MemTable mem_table(TABLE_NAME, impl_, options); - status = memTable.Add(source_100); + status = mem_table.Add(source_100); ASSERT_TRUE(status.ok()); engine::IDNumbers vector_ids = source_100->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); - engine::MemTableFile::Ptr memTableFile; - memTable.GetCurrentMemTableFile(memTableFile); + engine::MemTableFile::Ptr mem_table_file; + mem_table.GetCurrentMemTableFile(mem_table_file); size_t singleVectorMem = sizeof(float) * TABLE_DIM; - ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem); int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem; std::vector vectors_128M; BuildVectors(n_max, vectors_128M); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = memTable.Add(source_128M); + status = mem_table.Add(source_128M); ASSERT_TRUE(status.ok()); vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max); - memTable.GetCurrentMemTableFile(memTableFile); - ASSERT_EQ(memTableFile->GetCurrentMem(), n_100 * singleVectorMem); + mem_table.GetCurrentMemTableFile(mem_table_file); + ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem); - ASSERT_EQ(memTable.GetTableFileCount(), 2); + ASSERT_EQ(mem_table.GetTableFileCount(), 2); int64_t n_1G = 1024000; std::vector vectors_1G; @@ -184,16 +181,16 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_1G = std::make_shared(n_1G, vectors_1G.data()); - status = memTable.Add(source_1G); + status = mem_table.Add(source_1G); ASSERT_TRUE(status.ok()); vector_ids = source_1G->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_1G); int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM); - ASSERT_EQ(memTable.GetTableFileCount(), expectedTableFileCount); + ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount); - status = memTable.Serialize(); + status = mem_table.Serialize(); ASSERT_TRUE(status.ok()); status = impl_->DropAll(); @@ -216,7 +213,7 @@ TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { ASSERT_STATS(stat); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); - std::map> search_vectors; + std::map> search_vectors; { engine::IDNumbers vector_ids; int64_t nb = 1024000; @@ -231,8 +228,8 @@ TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { std::mt19937 gen(rd()); std::uniform_int_distribution dis(0, nb - 1); - int64_t numQuery = 20; - for (int64_t i = 0; i < numQuery; ++i) { + int64_t num_query = 20; + for (int64_t i = 0; i < num_query; ++i) { int64_t index = dis(gen); std::vector search; for (int64_t j = 0; j < TABLE_DIM; j++) { @@ -243,8 +240,8 @@ TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) { } int k = 10; - for(auto& pair : search_vectors) { - auto& search = pair.second; + for (auto &pair : search_vectors) { + auto &search = pair.second; engine::QueryResults results; stat = db_->Query(TABLE_NAME, k, 1, search.data(), results); ASSERT_EQ(results[0][0].first, pair.first); @@ -329,18 +326,18 @@ TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { uint64_t count = 0; uint64_t prev_count = 0; - for (auto j=0; j<10; ++j) { + for (auto j = 0; j < 10; ++j) { ss.str(""); db_->Size(count); prev_count = count; START_TIMER; stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); - ss << "Search " << j << " With Size " << count/engine::meta::M << " M"; + ss << "Search " << j << " With Size " << count / engine::meta::M << " M"; STOP_TIMER(ss.str()); ASSERT_STATS(stat); - for (auto k=0; kInsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); ASSERT_EQ(target_ids.size(), qb); } else { diff --git a/cpp/unittest/db/utils.h b/cpp/unittest/db/utils.h index d06500de5c..9c126030c2 100644 --- a/cpp/unittest/db/utils.h +++ b/cpp/unittest/db/utils.h @@ -30,7 +30,7 @@ #define STOP_TIMER(name) #endif -void ASSERT_STATS(zilliz::milvus::engine::Status& stat); +void ASSERT_STATS(zilliz::milvus::engine::Status &stat); //class TestEnv : public ::testing::Environment { //public: @@ -54,8 +54,8 @@ void ASSERT_STATS(zilliz::milvus::engine::Status& stat); // ::testing::AddGlobalTestEnvironment(new TestEnv); class DBTest : public ::testing::Test { -protected: - zilliz::milvus::engine::DB* db_; + protected: + zilliz::milvus::engine::DB *db_; void InitLog(); virtual void SetUp() override; @@ -64,13 +64,13 @@ protected: }; class DBTest2 : public DBTest { -protected: + protected: virtual zilliz::milvus::engine::Options GetOptions() override; }; class MetaTest : public DBTest { -protected: + protected: std::shared_ptr impl_; virtual void SetUp() override; @@ -78,17 +78,17 @@ protected: }; class MySQLTest : public ::testing::Test { -protected: + protected: // std::shared_ptr impl_; zilliz::milvus::engine::DBMetaOptions getDBMetaOptions(); }; -class MySQLDBTest : public ::testing::Test { -protected: +class MySQLDBTest : public ::testing::Test { + protected: zilliz::milvus::engine::Options GetOptions(); }; -class NewMemManagerTest : public ::testing::Test { +class NewMemManagerTest : public ::testing::Test { void InitLog(); - virtual void SetUp() override; + void SetUp() override; }; From 2c0f2a9459045472cddd86be6475dc0083318e3f Mon Sep 17 00:00:00 2001 From: quicksilver Date: Tue, 9 Jul 2019 13:47:25 +0800 Subject: [PATCH 48/91] configure git global user information Former-commit-id: a9a17182a1fa22411c7a3bf0c7bb0505550cb333 --- ci/jenkinsfile/milvus_build.groovy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/jenkinsfile/milvus_build.groovy b/ci/jenkinsfile/milvus_build.groovy index ed07d2b992..243b6dc2da 100644 --- a/ci/jenkinsfile/milvus_build.groovy +++ b/ci/jenkinsfile/milvus_build.groovy @@ -5,6 +5,8 @@ container('milvus-build-env') { try { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'SubmoduleOption',disableSubmodules: false,parentCredentials: true,recursiveSubmodules: true,reference: '',trackingSubmodules: false]], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:megasearch/milvus.git"]]]) dir ("cpp") { + sh "git config --global user.email \"test@zilliz.com\"" + sh "git config --global user.name \"test\"" sh "./build.sh -t ${params.BUILD_TYPE} -u -c" } } catch (exc) { From de7b3333be5892a19cdaaaf6be06a15bb60c11a0 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Tue, 9 Jul 2019 14:30:00 +0800 Subject: [PATCH 49/91] add nightly_main_jenkinsfile Former-commit-id: 15122904442d76bd2befb5f683a92f3353fca376 --- ci/nightly_main_jenkinsfile | 256 ++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 ci/nightly_main_jenkinsfile diff --git a/ci/nightly_main_jenkinsfile b/ci/nightly_main_jenkinsfile new file mode 100644 index 0000000000..9c54b6b8c0 --- /dev/null +++ b/ci/nightly_main_jenkinsfile @@ -0,0 +1,256 @@ +pipeline { + agent none + + options { + timestamps() + } + + environment { + PROJECT_NAME = "milvus" + LOWER_BUILD_TYPE = BUILD_TYPE.toLowerCase() + SEMVER = "${env.gitlabSourceBranch == null ? params.ENGINE_BRANCH.substring(params.ENGINE_BRANCH.lastIndexOf('/') + 1) : env.gitlabSourceBranch}" + GITLAB_AFTER_COMMIT = "${env.gitlabAfter == null ? null : env.gitlabAfter}" + SUFFIX_VERSION_NAME = "${env.gitlabAfter == null ? null : env.gitlabAfter.substring(0, 6)}" + DOCKER_VERSION_STR = "${env.gitlabAfter == null ? "${SEMVER}-${LOWER_BUILD_TYPE}-\$\{BUILD_DATE_FORMATTED\, \"yyyyMMdd\"\}" : "${SEMVER}-${LOWER_BUILD_TYPE}-${SUFFIX_VERSION_NAME}"}" + } + + stages { + stage("Ubuntu 16.04") { + environment { + PACKAGE_VERSION = VersionNumber([ + versionNumberString : '${SEMVER}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, "yyyyMMdd"}' + ]); + + DOCKER_VERSION = VersionNumber([ + versionNumberString : '${DOCKER_VERSION_STR}' + ]); + } + + stages { + stage("Run Build") { + agent { + kubernetes { + cloud 'build-kubernetes' + label 'build' + defaultContainer 'jnlp' + containerTemplate { + name 'milvus-build-env' + image 'registry.zilliz.com/milvus/milvus-build-env:v0.10' + ttyEnabled true + command 'cat' + } + } + } + stages { + stage('Build') { + steps { + gitlabCommitStatus(name: 'Build') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/milvus_build.groovy" + load "${env.WORKSPACE}/ci/jenkinsfile/packaged_milvus.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Build', state: 'canceled' + echo "Milvus Build aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Build', state: 'failed' + echo "Milvus Build failure !" + } + } + } + } + + stage("Publish docker and helm") { + agent { + kubernetes { + label 'publish' + defaultContainer 'jnlp' + yaml """ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: publish + componet: docker +spec: + containers: + - name: publish-docker + image: registry.zilliz.com/library/zilliz_docker:v1.0.0 + securityContext: + privileged: true + command: + - cat + tty: true + volumeMounts: + - name: docker-sock + mountPath: /var/run/docker.sock + volumes: + - name: docker-sock + hostPath: + path: /var/run/docker.sock +""" + } + } + stages { + stage('Publish Docker') { + steps { + gitlabCommitStatus(name: 'Publish Docker') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/publish_docker.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Publish Docker', state: 'canceled' + echo "Milvus Publish Docker aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Publish Docker', state: 'failed' + echo "Milvus Publish Docker failure !" + } + } + } + } + + stage("Deploy to Development") { + stages { + stage("Deploy to Dev") { + agent { + kubernetes { + label 'jenkins-slave' + defaultContainer 'jnlp' + } + } + stages { + stage('Deploy') { + steps { + gitlabCommitStatus(name: 'Deloy to Dev') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/deploy2dev.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Deloy to Dev', state: 'canceled' + echo "Milvus Deloy to Dev aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Deloy to Dev', state: 'failed' + echo "Milvus Deloy to Dev failure !" + } + } + } + } + + stage("Dev Test") { + agent { + kubernetes { + label 'test' + defaultContainer 'jnlp' + containerTemplate { + name 'milvus-testframework' + image 'registry.zilliz.com/milvus/milvus-test:v0.1' + ttyEnabled true + command 'cat' + } + } + } + stages { + stage('Test') { + steps { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/dev_test.groovy" + load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_test_out.groovy" + } + } + } + } + } + + stage ("Cleanup Dev") { + agent { + kubernetes { + label 'jenkins-slave' + defaultContainer 'jnlp' + } + } + stages { + stage('Cleanup') { + steps { + gitlabCommitStatus(name: 'Cleanup Dev') { + script { + load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy" + } + } + } + } + } + post { + aborted { + script { + updateGitlabCommitStatus name: 'Cleanup Dev', state: 'canceled' + echo "Milvus Cleanup Dev aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'Cleanup Dev', state: 'failed' + echo "Milvus Cleanup Dev failure !" + } + } + } + } + } + } + } + } + } + + post { + success { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'success' + echo "Milvus CI/CD success !" + } + } + + aborted { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'canceled' + echo "Milvus CI/CD aborted !" + } + } + + failure { + script { + updateGitlabCommitStatus name: 'CI/CD', state: 'failed' + echo "Milvus CI/CD failure !" + } + } + } +} From 838b1d776c2acc961ee8209bd6e47c85fd1b40d9 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Tue, 9 Jul 2019 15:06:26 +0800 Subject: [PATCH 50/91] fix nightly_main_jenkinsfile docker version bug Former-commit-id: f7937454b11da5f2c1af63ae0ae221ab25eb1e7a --- ci/nightly_main_jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/nightly_main_jenkinsfile b/ci/nightly_main_jenkinsfile index 9c54b6b8c0..eff34a5de2 100644 --- a/ci/nightly_main_jenkinsfile +++ b/ci/nightly_main_jenkinsfile @@ -11,7 +11,7 @@ pipeline { SEMVER = "${env.gitlabSourceBranch == null ? params.ENGINE_BRANCH.substring(params.ENGINE_BRANCH.lastIndexOf('/') + 1) : env.gitlabSourceBranch}" GITLAB_AFTER_COMMIT = "${env.gitlabAfter == null ? null : env.gitlabAfter}" SUFFIX_VERSION_NAME = "${env.gitlabAfter == null ? null : env.gitlabAfter.substring(0, 6)}" - DOCKER_VERSION_STR = "${env.gitlabAfter == null ? "${SEMVER}-${LOWER_BUILD_TYPE}-\$\{BUILD_DATE_FORMATTED\, \"yyyyMMdd\"\}" : "${SEMVER}-${LOWER_BUILD_TYPE}-${SUFFIX_VERSION_NAME}"}" + DOCKER_VERSION_STR = "${env.gitlabAfter == null ? '${SEMVER}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, \"yyyyMMdd\"}' : '${SEMVER}-${LOWER_BUILD_TYPE}-${SUFFIX_VERSION_NAME}'}" } stages { From 94824d55f160b9fcead999f463ff895106540a74 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 9 Jul 2019 15:09:08 +0800 Subject: [PATCH 51/91] fix build error Former-commit-id: 83a0e40c4f90a435abe93f46b7e7aefa6b1f2e67 --- cpp/src/utils/ValidationUtil.cpp | 2 +- cpp/thirdparty/knowhere | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/utils/ValidationUtil.cpp b/cpp/src/utils/ValidationUtil.cpp index 65cd81e670..78d586dad6 100644 --- a/cpp/src/utils/ValidationUtil.cpp +++ b/cpp/src/utils/ValidationUtil.cpp @@ -59,7 +59,7 @@ ValidateTableIndexType(int32_t index_type) { auto engine_type = engine::EngineType(index_type); switch (engine_type) { case engine::EngineType::FAISS_IDMAP: - case engine::EngineType::FAISS_IVFFLAT: { + case engine::EngineType::FAISS_IVFFLAT_GPU: { SERVER_LOG_DEBUG << "Index type: " << index_type; return SERVER_SUCCESS; } diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 3a30677b8a..3a052d063f 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 3a30677b8ab105955534922d1677e8fa99ef0406 +Subproject commit 3a052d063ff9b360529a4a120f9703f00c603437 From 4502c9e6f6365b9e4e9d7ff06194273120526346 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 9 Jul 2019 15:50:54 +0800 Subject: [PATCH 52/91] add tbb.so Former-commit-id: 44a9d9485d3f6de2006979c81fe092fa179f5515 --- cpp/src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 0627b2010a..7a1442cbef 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -208,6 +208,7 @@ endif () install(TARGETS milvus_server DESTINATION bin) install(FILES + ${CMAKE_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3 ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4 From a6180ccea5b2dc49c5c1e9236b0432cc2d560c5f Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 17:07:52 +0800 Subject: [PATCH 53/91] use env variable to switch mem manager and fix cmake Former-commit-id: f433a193e4086600d3a0ca2259e1f65f0d7fbc5b --- cpp/cmake/ThirdPartyPackages.cmake | 3 ++- cpp/src/db/Factories.cpp | 22 ++++++++++++++++------ cpp/unittest/db/CMakeLists.txt | 2 +- cpp/unittest/metrics/CMakeLists.txt | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index 2c6c61dbf4..b4b21b8085 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -664,7 +664,8 @@ macro(build_knowhere) ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${KNOWHERE_PREFIX}" -DCMAKE_INSTALL_LIBDIR=lib - -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc + "-DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}" + "-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}" -DCMAKE_BUILD_TYPE=Release) externalproject_add(knowhere_ep diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index 65c7484a50..3cfb9a6eb3 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -3,12 +3,14 @@ // Unauthorized copying of this file, via any medium is strictly prohibited. // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// -#include + #include "Factories.h" #include "DBImpl.h" #include "MemManager.h" #include "NewMemManager.h" +#include "Exception.h" +#include #include #include #include @@ -16,7 +18,9 @@ #include #include #include -#include "Exception.h" +#include +#include +#include namespace zilliz { namespace milvus { @@ -101,11 +105,17 @@ DB* DBFactory::Build(const Options& options) { MemManagerAbstractPtr MemManagerFactory::Build(const std::shared_ptr& meta, const Options& options) { -#ifdef USE_NEW_MEM_MANAGER + if (const char* env = getenv("MILVUS_USE_OLD_MEM_MANAGER")) { + std::string env_str = env; + std::transform(env_str.begin(), env_str.end(), env_str.begin(), ::toupper); + if (env_str == "ON") { + return std::make_shared(meta, options); + } + else { + return std::make_shared(meta, options); + } + } return std::make_shared(meta, options); -#else - return std::make_shared(meta, options); -#endif } } // namespace engine diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 213eb146ed..2d360a5d62 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -39,7 +39,7 @@ set(db_test_src cuda_add_executable(db_test ${db_test_src}) set(db_libs - sqlite3 + sqlite boost_system boost_filesystem lz4 diff --git a/cpp/unittest/metrics/CMakeLists.txt b/cpp/unittest/metrics/CMakeLists.txt index d7ae12aff8..e55f99eb3e 100644 --- a/cpp/unittest/metrics/CMakeLists.txt +++ b/cpp/unittest/metrics/CMakeLists.txt @@ -77,7 +77,7 @@ target_link_libraries(metrics_test ${knowhere_libs} cudart cublas - sqlite3 + sqlite boost_system boost_filesystem lz4 From a569f60407ca8ef8ec2d9b8dbc22d924c98caa78 Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 17:09:22 +0800 Subject: [PATCH 54/91] use env variable to switch mem manager and fix cmake Former-commit-id: adf9686013de429b842e1bc30860b5ebbe4cad2a --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 03ffe14731..966b584694 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -11,6 +11,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-149 - Fixed searching only one index file issue in distributed mode - MS-153 - fix c_str error when connecting to MySQL - MS-157 - fix changelog +- MS-190 - use env variable to switch mem manager and fix cmake ## Improvement - MS-156 - Add unittest for merge result functions From d13b244a466fdc2e1fe6bb2dcfedde7baff815b9 Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 17:15:21 +0800 Subject: [PATCH 55/91] use env variable to switch mem manager and fix cmake Former-commit-id: 7928c6384045f002fd4bf4367bcf5ab9b5262ac0 --- cpp/.gitignore | 2 +- cpp/src/CMakeLists.txt | 1 + cpp/thirdparty/knowhere | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/.gitignore b/cpp/.gitignore index 03149cde32..6c2602d341 100644 --- a/cpp/.gitignore +++ b/cpp/.gitignore @@ -7,4 +7,4 @@ lcov_out/ base.info output.info output_new.info -server.info \ No newline at end of file +server.info diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 7a1442cbef..81a085b153 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -209,6 +209,7 @@ install(TARGETS milvus_server DESTINATION bin) install(FILES ${CMAKE_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX} + ${CMAKE_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}.2 ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3 ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4 diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 3a052d063f..c3123501d6 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 3a052d063ff9b360529a4a120f9703f00c603437 +Subproject commit c3123501d62f69f9eacaa73ee96c0daeb24620a5 From 2f6799e53b4d7c5f46495982c994a1398cdfe529 Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 17:31:02 +0800 Subject: [PATCH 56/91] fix cmake Former-commit-id: bcbf6776c264b9aa59de049201249acff7c8c37e --- cpp/unittest/db/CMakeLists.txt | 4 ++-- cpp/unittest/faiss_wrapper/CMakeLists.txt | 6 +++--- cpp/unittest/license/CMakeLists.txt | 6 +++--- cpp/unittest/metrics/CMakeLists.txt | 4 ++-- cpp/unittest/server/CMakeLists.txt | 6 +++--- cpp/unittest/storage/CMakeLists.txt | 2 +- cpp/unittest/utils/CMakeLists.txt | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 2d360a5d62..acd13b8cda 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -40,8 +40,8 @@ cuda_add_executable(db_test ${db_test_src}) set(db_libs sqlite - boost_system - boost_filesystem + boost_system_static + boost_filesystem_static lz4 mysqlpp ) diff --git a/cpp/unittest/faiss_wrapper/CMakeLists.txt b/cpp/unittest/faiss_wrapper/CMakeLists.txt index f044df8d8c..ff2535e3fd 100644 --- a/cpp/unittest/faiss_wrapper/CMakeLists.txt +++ b/cpp/unittest/faiss_wrapper/CMakeLists.txt @@ -22,13 +22,13 @@ add_executable(wrapper_test ${wrapper_test_src}) set(wrapper_libs stdc++ - boost_system - boost_filesystem + boost_system_static + boost_filesystem_static libgpufaiss.a faiss cudart cublas - sqlite3 + sqlite snappy bz2 z diff --git a/cpp/unittest/license/CMakeLists.txt b/cpp/unittest/license/CMakeLists.txt index 93af82ed56..39d0ede197 100644 --- a/cpp/unittest/license/CMakeLists.txt +++ b/cpp/unittest/license/CMakeLists.txt @@ -33,11 +33,11 @@ set(db_libs nvidia-ml cudart cublas - boost_system - boost_filesystem + boost_system_static + boost_filesystem_static lz4 crypto - boost_serialization + boost_serialization_static ) target_link_libraries(license_test ${db_libs} ${unittest_libs}) diff --git a/cpp/unittest/metrics/CMakeLists.txt b/cpp/unittest/metrics/CMakeLists.txt index e55f99eb3e..28a438e344 100644 --- a/cpp/unittest/metrics/CMakeLists.txt +++ b/cpp/unittest/metrics/CMakeLists.txt @@ -78,8 +78,8 @@ target_link_libraries(metrics_test cudart cublas sqlite - boost_system - boost_filesystem + boost_system_static + boost_filesystem_static lz4 metrics gtest diff --git a/cpp/unittest/server/CMakeLists.txt b/cpp/unittest/server/CMakeLists.txt index 94a581d276..29a54fddb8 100644 --- a/cpp/unittest/server/CMakeLists.txt +++ b/cpp/unittest/server/CMakeLists.txt @@ -39,9 +39,9 @@ set(require_libs stdc++ cudart cublas - sqlite3 - boost_system - boost_filesystem + sqlite + boost_system_static + boost_filesystem_static snappy z bz2 diff --git a/cpp/unittest/storage/CMakeLists.txt b/cpp/unittest/storage/CMakeLists.txt index 6b4303b70a..d4deaefab8 100644 --- a/cpp/unittest/storage/CMakeLists.txt +++ b/cpp/unittest/storage/CMakeLists.txt @@ -25,7 +25,7 @@ set(s3_client_libs stdc++ aws-cpp-sdk-s3 aws-cpp-sdk-core - boost_filesystem + boost_filesystem_static ) target_link_libraries(s3_test ${s3_client_libs} diff --git a/cpp/unittest/utils/CMakeLists.txt b/cpp/unittest/utils/CMakeLists.txt index a46a3b05e1..61e4459462 100644 --- a/cpp/unittest/utils/CMakeLists.txt +++ b/cpp/unittest/utils/CMakeLists.txt @@ -25,6 +25,6 @@ add_executable(valication_util_test target_link_libraries(valication_util_test ${unittest_libs} - boost_filesystem) + boost_filesystem_static) install(TARGETS valication_util_test DESTINATION bin) \ No newline at end of file From df26e649cd006a6c1a5de1783041a7e11a6fa472 Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 17:32:05 +0800 Subject: [PATCH 57/91] update Former-commit-id: 632e7b8205196c234eb85dca72c59f52db82979f --- cpp/src/db/Factories.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index 3cfb9a6eb3..442dca2974 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -26,8 +26,6 @@ namespace zilliz { namespace milvus { namespace engine { -#define USE_NEW_MEM_MANAGER 1 - DBMetaOptions DBMetaOptionsFactory::Build(const std::string& path) { auto p = path; if(p == "") { From f57ffefa36bb83ff69621785a071fa7bdf60e431 Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 21:11:05 +0800 Subject: [PATCH 58/91] update Former-commit-id: c4bd8a4f4c714ad54999e8c7471b1c6179427b62 --- cpp/cmake/ThirdPartyPackages.cmake | 2 +- cpp/unittest/db/CMakeLists.txt | 6 +- cpp/unittest/db/mem_test.cpp | 160 ++++++++++++++--------------- 3 files changed, 85 insertions(+), 83 deletions(-) diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index b4b21b8085..8b888519f9 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -534,7 +534,7 @@ if(MILVUS_BOOST_VENDORED) "" ${EP_LOG_OPTIONS}) set(Boost_INCLUDE_DIR "${BOOST_PREFIX}") - set(Boost_INCLUDE_DIRS "${BOOST_INCLUDE_DIR}") + set(Boost_INCLUDE_DIRS "${Boost_INCLUDE_DIR}") add_dependencies(boost_system_static boost_ep) add_dependencies(boost_filesystem_static boost_ep) add_dependencies(boost_serialization_static boost_ep) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index acd13b8cda..1415cef16a 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -24,6 +24,8 @@ link_directories("/usr/local/cuda/lib64") include_directories(/usr/include/mysql) +#add_definitions(-DBOOST_ERROR_CODE_HEADER_ONLY) + set(db_test_src #${unittest_srcs} ${config_files} @@ -40,8 +42,8 @@ cuda_add_executable(db_test ${db_test_src}) set(db_libs sqlite - boost_system_static boost_filesystem_static + boost_system_static lz4 mysqlpp ) @@ -59,6 +61,6 @@ set(knowhere_libs cublas ) -target_link_libraries(db_test ${db_libs} ${unittest_libs} ${knowhere_libs}) +target_link_libraries(db_test ${knowhere_libs} ${db_libs} ${unittest_libs}) install(TARGETS db_test DESTINATION bin) diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 5b7972ec35..56ff0dbec6 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -289,84 +289,84 @@ TEST_F(NewMemManagerTest, INSERT_TEST) { } -TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { - - auto options = engine::OptionsFactory::Build(); - options.meta.path = "/tmp/milvus_test"; - options.meta.backend_uri = "sqlite://:@:/"; - auto db_ = engine::DBFactory::Build(options); - - engine::meta::TableSchema table_info = BuildTableSchema(); - engine::Status stat = db_->CreateTable(table_info); - - engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = TABLE_NAME; - stat = db_->DescribeTable(table_info_get); - ASSERT_STATS(stat); - ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); - - engine::IDNumbers vector_ids; - engine::IDNumbers target_ids; - - int64_t nb = 409600; - std::vector xb; - BuildVectors(nb, xb); - - int64_t qb = 5; - std::vector qxb; - BuildVectors(qb, qxb); - - std::thread search([&]() { - engine::QueryResults results; - int k = 10; - std::this_thread::sleep_for(std::chrono::seconds(2)); - - INIT_TIMER; - std::stringstream ss; - uint64_t count = 0; - uint64_t prev_count = 0; - - for (auto j = 0; j < 10; ++j) { - ss.str(""); - db_->Size(count); - prev_count = count; - - START_TIMER; - stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); - ss << "Search " << j << " With Size " << count / engine::meta::M << " M"; - STOP_TIMER(ss.str()); - - ASSERT_STATS(stat); - for (auto k = 0; k < qb; ++k) { - ASSERT_EQ(results[k][0].first, target_ids[k]); - ss.str(""); - ss << "Result [" << k << "]:"; - for (auto result : results[k]) { - ss << result.first << " "; - } - /* LOG(DEBUG) << ss.str(); */ - } - ASSERT_TRUE(count >= prev_count); - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - }); - - int loop = 20; - - for (auto i = 0; i < loop; ++i) { - if (i == 0) { - db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); - ASSERT_EQ(target_ids.size(), qb); - } else { - db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); - } - std::this_thread::sleep_for(std::chrono::microseconds(1)); - } - - search.join(); - - delete db_; - boost::filesystem::remove_all(options.meta.path); - -}; +//TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { +// +// auto options = engine::OptionsFactory::Build(); +// options.meta.path = "/tmp/milvus_test"; +// options.meta.backend_uri = "sqlite://:@:/"; +// auto db_ = engine::DBFactory::Build(options); +// +// engine::meta::TableSchema table_info = BuildTableSchema(); +// engine::Status stat = db_->CreateTable(table_info); +// +// engine::meta::TableSchema table_info_get; +// table_info_get.table_id_ = TABLE_NAME; +// stat = db_->DescribeTable(table_info_get); +// ASSERT_STATS(stat); +// ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); +// +// engine::IDNumbers vector_ids; +// engine::IDNumbers target_ids; +// +// int64_t nb = 409600; +// std::vector xb; +// BuildVectors(nb, xb); +// +// int64_t qb = 5; +// std::vector qxb; +// BuildVectors(qb, qxb); +// +// std::thread search([&]() { +// engine::QueryResults results; +// int k = 10; +// std::this_thread::sleep_for(std::chrono::seconds(5)); +// +// INIT_TIMER; +// std::stringstream ss; +// uint64_t count = 0; +// uint64_t prev_count = 0; +// +// for (auto j = 0; j < 10; ++j) { +// ss.str(""); +// db_->Size(count); +// prev_count = count; +// +// START_TIMER; +// stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); +// ss << "Search " << j << " With Size " << count / engine::meta::M << " M"; +// STOP_TIMER(ss.str()); +// +// ASSERT_STATS(stat); +// for (auto k = 0; k < qb; ++k) { +// ASSERT_EQ(results[k][0].first, target_ids[k]); +// ss.str(""); +// ss << "Result [" << k << "]:"; +// for (auto result : results[k]) { +// ss << result.first << " "; +// } +// /* LOG(DEBUG) << ss.str(); */ +// } +// ASSERT_TRUE(count >= prev_count); +// std::this_thread::sleep_for(std::chrono::seconds(2)); +// } +// }); +// +// int loop = 20; +// +// for (auto i = 0; i < loop; ++i) { +// if (i == 0) { +// db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); +// ASSERT_EQ(target_ids.size(), qb); +// } else { +// db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); +// } +// std::this_thread::sleep_for(std::chrono::microseconds(1)); +// } +// +// search.join(); +// +// delete db_; +// boost::filesystem::remove_all(options.meta.path); +// +//}; From 143a172ae89a9eb1b323bdc5ccef270d65622f7d Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 21:13:55 +0800 Subject: [PATCH 59/91] update Former-commit-id: 1575fe6ead3c09caf0d5d3fa764ceb461431ab5e --- cpp/unittest/db/mem_test.cpp | 160 +++++++++++++++++------------------ 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 56ff0dbec6..5b7972ec35 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -289,84 +289,84 @@ TEST_F(NewMemManagerTest, INSERT_TEST) { } -//TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { -// -// auto options = engine::OptionsFactory::Build(); -// options.meta.path = "/tmp/milvus_test"; -// options.meta.backend_uri = "sqlite://:@:/"; -// auto db_ = engine::DBFactory::Build(options); -// -// engine::meta::TableSchema table_info = BuildTableSchema(); -// engine::Status stat = db_->CreateTable(table_info); -// -// engine::meta::TableSchema table_info_get; -// table_info_get.table_id_ = TABLE_NAME; -// stat = db_->DescribeTable(table_info_get); -// ASSERT_STATS(stat); -// ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); -// -// engine::IDNumbers vector_ids; -// engine::IDNumbers target_ids; -// -// int64_t nb = 409600; -// std::vector xb; -// BuildVectors(nb, xb); -// -// int64_t qb = 5; -// std::vector qxb; -// BuildVectors(qb, qxb); -// -// std::thread search([&]() { -// engine::QueryResults results; -// int k = 10; -// std::this_thread::sleep_for(std::chrono::seconds(5)); -// -// INIT_TIMER; -// std::stringstream ss; -// uint64_t count = 0; -// uint64_t prev_count = 0; -// -// for (auto j = 0; j < 10; ++j) { -// ss.str(""); -// db_->Size(count); -// prev_count = count; -// -// START_TIMER; -// stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); -// ss << "Search " << j << " With Size " << count / engine::meta::M << " M"; -// STOP_TIMER(ss.str()); -// -// ASSERT_STATS(stat); -// for (auto k = 0; k < qb; ++k) { -// ASSERT_EQ(results[k][0].first, target_ids[k]); -// ss.str(""); -// ss << "Result [" << k << "]:"; -// for (auto result : results[k]) { -// ss << result.first << " "; -// } -// /* LOG(DEBUG) << ss.str(); */ -// } -// ASSERT_TRUE(count >= prev_count); -// std::this_thread::sleep_for(std::chrono::seconds(2)); -// } -// }); -// -// int loop = 20; -// -// for (auto i = 0; i < loop; ++i) { -// if (i == 0) { -// db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); -// ASSERT_EQ(target_ids.size(), qb); -// } else { -// db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); -// } -// std::this_thread::sleep_for(std::chrono::microseconds(1)); -// } -// -// search.join(); -// -// delete db_; -// boost::filesystem::remove_all(options.meta.path); -// -//}; +TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + int64_t nb = 409600; + std::vector xb; + BuildVectors(nb, xb); + + int64_t qb = 5; + std::vector qxb; + BuildVectors(qb, qxb); + + std::thread search([&]() { + engine::QueryResults results; + int k = 10; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + INIT_TIMER; + std::stringstream ss; + uint64_t count = 0; + uint64_t prev_count = 0; + + for (auto j = 0; j < 10; ++j) { + ss.str(""); + db_->Size(count); + prev_count = count; + + START_TIMER; + stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results); + ss << "Search " << j << " With Size " << count / engine::meta::M << " M"; + STOP_TIMER(ss.str()); + + ASSERT_STATS(stat); + for (auto k = 0; k < qb; ++k) { + ASSERT_EQ(results[k][0].first, target_ids[k]); + ss.str(""); + ss << "Result [" << k << "]:"; + for (auto result : results[k]) { + ss << result.first << " "; + } + /* LOG(DEBUG) << ss.str(); */ + } + ASSERT_TRUE(count >= prev_count); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + }); + + int loop = 20; + + for (auto i = 0; i < loop; ++i) { + if (i == 0) { + db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids); + ASSERT_EQ(target_ids.size(), qb); + } else { + db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + } + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + search.join(); + + delete db_; + boost::filesystem::remove_all(options.meta.path); + +}; From 81ab29ed88323879f33041a99edc6da1370c0e6a Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 9 Jul 2019 21:15:54 +0800 Subject: [PATCH 60/91] update Former-commit-id: 64463e2d9609f85f1347c39a3b42eba880f35565 --- cpp/unittest/db/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/unittest/db/CMakeLists.txt b/cpp/unittest/db/CMakeLists.txt index 1415cef16a..60c22d0ee2 100644 --- a/cpp/unittest/db/CMakeLists.txt +++ b/cpp/unittest/db/CMakeLists.txt @@ -42,8 +42,8 @@ cuda_add_executable(db_test ${db_test_src}) set(db_libs sqlite - boost_filesystem_static boost_system_static + boost_filesystem_static lz4 mysqlpp ) From 739744569cae60e9c66d2c38a60a274be1883114 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 10 Jul 2019 14:54:19 +0800 Subject: [PATCH 61/91] fix server test Former-commit-id: 9f3f0b98fb27392407ad0319608034a89a918b33 --- cpp/unittest/CMakeLists.txt | 2 +- cpp/unittest/server/cache_test.cpp | 73 +++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 8675bf8735..225aca2c81 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -37,7 +37,7 @@ set(unittest_libs ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) -#add_subdirectory(server) +add_subdirectory(server) add_subdirectory(db) add_subdirectory(index_wrapper) #add_subdirectory(faiss_wrapper) diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index 8c323d81d3..4b40861d0b 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -8,6 +8,7 @@ #include "cache/GpuCacheMgr.h" #include "wrapper/Index.h" +#include "wrapper/knowhere/vec_index.h" using namespace zilliz::milvus; @@ -26,6 +27,54 @@ public: } }; +class MockVecIndex : public engine::VecIndex { +public: + virtual void BuildAll(const long &nb, + const float *xb, + const long *ids, + const engine::Config &cfg, + const long &nt = 0, + const float *xt = nullptr) { + + } + + virtual void Add(const long &nb, + const float *xb, + const long *ids, + const engine::Config &cfg = engine::Config()) { + + } + + virtual void Search(const long &nq, + const float *xq, + float *dist, + long *ids, + const engine::Config &cfg = engine::Config()) { + + } + + virtual int64_t Dimension() { + return dimension_; + } + + virtual int64_t Count() { + return ntotal_; + } + + virtual zilliz::knowhere::BinarySet Serialize() { + zilliz::knowhere::BinarySet binset; + return binset; + } + + virtual void Load(const zilliz::knowhere::BinarySet &index_binary) { + + } + +public: + int64_t dimension_ = 512; + int64_t ntotal_ = 0; +}; + } TEST(CacheTest, CPU_CACHE_TEST) { @@ -40,9 +89,9 @@ TEST(CacheTest, CPU_CACHE_TEST) { const int dim = 256; for (int i = 0; i < 20; i++) { - std::shared_ptr raw_index(faiss::index_factory(dim, "IDMap,Flat")); - engine::Index_ptr index = std::make_shared(raw_index); - index->ntotal = 1000000;//less 1G per index + MockVecIndex* mock_index = new MockVecIndex(); + mock_index->ntotal_ = 1000000;//less 1G per index + engine::Index_ptr index(mock_index); cpu_mgr->InsertItem("index_" + std::to_string(i), index); } @@ -65,9 +114,9 @@ TEST(CacheTest, CPU_CACHE_TEST) { g_num = 5; cpu_mgr->SetCapacity(g_num * gbyte); - std::shared_ptr raw_index(faiss::index_factory(dim, "IDMap,Flat")); - engine::Index_ptr index = std::make_shared(raw_index); - index->ntotal = 6000000;//6G less + MockVecIndex* mock_index = new MockVecIndex(); + mock_index->ntotal_ = 6000000;//6G less + engine::Index_ptr index(mock_index); cpu_mgr->InsertItem("index_6g", index); ASSERT_EQ(cpu_mgr->ItemCount(), 0);//data greater than capacity can not be inserted sucessfully @@ -82,9 +131,9 @@ TEST(CacheTest, GPU_CACHE_TEST) { const int dim = 256; for(int i = 0; i < 20; i++) { - std::shared_ptr raw_index(faiss::index_factory(dim, "IDMap,Flat")); - engine::Index_ptr index = std::make_shared(raw_index); - index->ntotal = 1000; + MockVecIndex* mock_index = new MockVecIndex(); + mock_index->ntotal_ = 1000; + engine::Index_ptr index(mock_index); cache::DataObjPtr obj = std::make_shared(index); @@ -117,9 +166,9 @@ TEST(CacheTest, INVALID_TEST) { { LessItemCacheMgr mgr; for(int i = 0; i < 20; i++) { - std::shared_ptr raw_index(faiss::index_factory(2, "IDMap,Flat")); - engine::Index_ptr index = std::make_shared(raw_index); - index->ntotal = 2; + MockVecIndex* mock_index = new MockVecIndex(); + mock_index->ntotal_ = 2; + engine::Index_ptr index(mock_index); cache::DataObjPtr obj = std::make_shared(index); mgr.InsertItem("index_" + std::to_string(i), obj); From c6ce5772bad81257263dfd83a0eb37994c220b4a Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 10 Jul 2019 16:28:11 +0800 Subject: [PATCH 62/91] fix bad alloc and add idmap Former-commit-id: bd2686574ad9010e33dcf34f3ae45308d5b3c971 --- cpp/src/db/ExecutionEngineImpl.cpp | 163 +++------------- cpp/src/wrapper/knowhere/vec_impl.cpp | 37 ++++ cpp/src/wrapper/knowhere/vec_impl.h | 29 ++- cpp/src/wrapper/knowhere/vec_index.cpp | 123 +++++++++++- cpp/src/wrapper/knowhere/vec_index.h | 27 ++- cpp/thirdparty/knowhere | 2 +- cpp/unittest/index_wrapper/knowhere_test.cpp | 186 +++++++++---------- cpp/unittest/index_wrapper/utils.cpp | 37 +--- cpp/unittest/index_wrapper/utils.h | 32 +--- 9 files changed, 326 insertions(+), 310 deletions(-) diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index bba29fc9a0..64aabb3777 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -4,6 +4,7 @@ * Proprietary and confidential. ******************************************************************************/ #include +#include #include "Log.h" #include "src/cache/CpuCacheMgr.h" @@ -16,55 +17,6 @@ namespace zilliz { namespace milvus { namespace engine { -struct FileIOWriter { - std::fstream fs; - std::string name; - - FileIOWriter(const std::string &fname); - ~FileIOWriter(); - size_t operator()(void *ptr, size_t size); -}; - -struct FileIOReader { - std::fstream fs; - std::string name; - - FileIOReader(const std::string &fname); - ~FileIOReader(); - size_t operator()(void *ptr, size_t size); - size_t operator()(void *ptr, size_t size, size_t pos); -}; - -FileIOReader::FileIOReader(const std::string &fname) { - name = fname; - fs = std::fstream(name, std::ios::in | std::ios::binary); -} - -FileIOReader::~FileIOReader() { - fs.close(); -} - -size_t FileIOReader::operator()(void *ptr, size_t size) { - fs.read(reinterpret_cast(ptr), size); -} - -size_t FileIOReader::operator()(void *ptr, size_t size, size_t pos) { - return 0; -} - -FileIOWriter::FileIOWriter(const std::string &fname) { - name = fname; - fs = std::fstream(name, std::ios::out | std::ios::binary); -} - -FileIOWriter::~FileIOWriter() { - fs.close(); -} - -size_t FileIOWriter::operator()(void *ptr, size_t size) { - fs.write(reinterpret_cast(ptr), size); -} - ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string &location, EngineType type) @@ -89,7 +41,7 @@ VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { break; } case EngineType::FAISS_IVFFLAT_GPU: { - index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_GPU); + index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_MIX); break; } case EngineType::FAISS_IVFFLAT_CPU: { @@ -130,91 +82,34 @@ size_t ExecutionEngineImpl::PhysicalSize() const { } Status ExecutionEngineImpl::Serialize() { - auto binaryset = index_->Serialize(); - - FileIOWriter writer(location_); - writer(¤t_type, sizeof(current_type)); - for (auto &iter: binaryset.binary_map_) { - auto meta = iter.first.c_str(); - size_t meta_length = iter.first.length(); - writer(&meta_length, sizeof(meta_length)); - writer((void *) meta, meta_length); - - auto binary = iter.second; - size_t binary_length = binary->size; - writer(&binary_length, sizeof(binary_length)); - writer((void *) binary->data.get(), binary_length); - } + write_index(index_, location_); return Status::OK(); } Status ExecutionEngineImpl::Load() { - index_ = Load(location_); + index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); + bool to_cache = false; + auto start_time = METRICS_NOW_TIME; + if (!index_) { + index_ = read_index(location_); + to_cache = true; + ENGINE_LOG_DEBUG << "Disk io from: " << location_; + } + + if (to_cache) { + Cache(); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + + server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); + double total_size = Size(); + + server::Metrics::GetInstance().FaissDiskLoadSizeBytesHistogramObserve(total_size); + server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(total_size / double(total_time)); + } return Status::OK(); } -VecIndexPtr ExecutionEngineImpl::Load(const std::string &location) { - knowhere::BinarySet load_data_list; - FileIOReader reader(location); - reader.fs.seekg(0, reader.fs.end); - size_t length = reader.fs.tellg(); - reader.fs.seekg(0); - - size_t rp = 0; - reader(¤t_type, sizeof(current_type)); - rp += sizeof(current_type); - while (rp < length) { - size_t meta_length; - reader(&meta_length, sizeof(meta_length)); - rp += sizeof(meta_length); - reader.fs.seekg(rp); - - auto meta = new char[meta_length]; - reader(meta, meta_length); - rp += meta_length; - reader.fs.seekg(rp); - - size_t bin_length; - reader(&bin_length, sizeof(bin_length)); - rp += sizeof(bin_length); - reader.fs.seekg(rp); - - auto bin = new uint8_t[bin_length]; - reader(bin, bin_length); - rp += bin_length; - - auto binptr = std::make_shared(); - binptr.reset(bin); - load_data_list.Append(std::string(meta, meta_length), binptr, bin_length); - } - - auto index_type = IndexType::INVALID; - switch (current_type) { - case EngineType::FAISS_IDMAP: { - index_type = IndexType::FAISS_IDMAP; - break; - } - case EngineType::FAISS_IVFFLAT_CPU: { - index_type = IndexType::FAISS_IVFFLAT_CPU; - break; - } - case EngineType::FAISS_IVFFLAT_GPU: { - index_type = IndexType::FAISS_IVFFLAT_GPU; - break; - } - case EngineType::SPTAG_KDT_RNT_CPU: { - index_type = IndexType::SPTAG_KDT_RNT_CPU; - break; - } - default: { - ENGINE_LOG_ERROR << "wrong index_type"; - return nullptr; - } - } - - return LoadVecIndex(index_type, load_data_list); -} - Status ExecutionEngineImpl::Merge(const std::string &location) { if (location == location_) { return Status::Error("Cannot Merge Self"); @@ -223,15 +118,17 @@ Status ExecutionEngineImpl::Merge(const std::string &location) { auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location); if (!to_merge) { - to_merge = Load(location); + to_merge = read_index(location); } - auto file_index = std::dynamic_pointer_cast(to_merge); - index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); - return Status::OK(); + if (auto file_index = std::dynamic_pointer_cast(to_merge)) { + index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); + return Status::OK(); + } else { + return Status::Error("file index type is not idmap"); + } } -// TODO(linxj): add config ExecutionEnginePtr ExecutionEngineImpl::BuildIndex(const std::string &location) { ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index 9b1afb84ef..d50bfe34da 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -6,6 +6,7 @@ #include #include "knowhere/index/vector_index/idmap.h" +#include "knowhere/index/vector_index/gpu_ivf.h" #include "vec_impl.h" #include "data_transfer.h" @@ -98,6 +99,10 @@ int64_t VecIndexImpl::Count() { return index_->Count(); } +IndexType VecIndexImpl::GetType() { + return type; +} + float *BFIndex::GetRawVectors() { auto raw_index = std::dynamic_pointer_cast(index_); if (raw_index) { return raw_index->GetRawVectors(); } @@ -126,6 +131,38 @@ void BFIndex::BuildAll(const long &nb, index_->Add(dataset, cfg); } +// TODO(linxj): add lock here. +void IVFMixIndex::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); + + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); + auto nlist = int(nb / 1000000.0 * 16384); + auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; + auto model = index_->Train(dataset, cfg_t); + index_->set_index_model(model); + index_->Add(dataset, cfg); + + if (auto device_index = std::dynamic_pointer_cast(index_)) { + auto host_index = device_index->Copy_index_gpu_to_cpu(); + index_ = host_index; + } else { + // TODO(linxj): LOG ERROR + } +} + +void IVFMixIndex::Load(const zilliz::knowhere::BinarySet &index_binary) { + index_ = std::make_shared(); + index_->Load(index_binary); + dim = Dimension(); +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index ab6c6b8a79..1d09a069d2 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -17,13 +17,15 @@ namespace engine { class VecIndexImpl : public VecIndex { public: - explicit VecIndexImpl(std::shared_ptr index) : index_(std::move(index)) {}; + explicit VecIndexImpl(std::shared_ptr index, const IndexType &type) + : index_(std::move(index)), type(type) {}; void BuildAll(const long &nb, const float *xb, const long *ids, const Config &cfg, const long &nt, const float *xt) override; + IndexType GetType() override; int64_t Dimension() override; int64_t Count() override; void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; @@ -33,21 +35,36 @@ class VecIndexImpl : public VecIndex { protected: int64_t dim = 0; + IndexType type = IndexType::INVALID; std::shared_ptr index_ = nullptr; }; -class BFIndex : public VecIndexImpl { +class IVFMixIndex : public VecIndexImpl { public: - explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index)) {}; - void Build(const int64_t& d); - float* GetRawVectors(); + explicit IVFMixIndex(std::shared_ptr index) : VecIndexImpl(std::move(index), + IndexType::FAISS_IVFFLAT_MIX) {}; void BuildAll(const long &nb, const float *xb, const long *ids, const Config &cfg, const long &nt, const float *xt) override; - int64_t* GetRawIds(); + void Load(const zilliz::knowhere::BinarySet &index_binary) override; +}; + +class BFIndex : public VecIndexImpl { + public: + explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index), + IndexType::FAISS_IDMAP) {}; + void Build(const int64_t &d); + float *GetRawVectors(); + void BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; + int64_t *GetRawIds(); }; } diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 17aa428613..55e1ea4cea 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -16,7 +16,56 @@ namespace zilliz { namespace milvus { namespace engine { -// TODO(linxj): index_type => enum struct +struct FileIOWriter { + std::fstream fs; + std::string name; + + FileIOWriter(const std::string &fname); + ~FileIOWriter(); + size_t operator()(void *ptr, size_t size); +}; + +struct FileIOReader { + std::fstream fs; + std::string name; + + FileIOReader(const std::string &fname); + ~FileIOReader(); + size_t operator()(void *ptr, size_t size); + size_t operator()(void *ptr, size_t size, size_t pos); +}; + +FileIOReader::FileIOReader(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::in | std::ios::binary); +} + +FileIOReader::~FileIOReader() { + fs.close(); +} + +size_t FileIOReader::operator()(void *ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); +} + +size_t FileIOReader::operator()(void *ptr, size_t size, size_t pos) { + return 0; +} + +FileIOWriter::FileIOWriter(const std::string &fname) { + name = fname; + fs = std::fstream(name, std::ios::out | std::ios::binary); +} + +FileIOWriter::~FileIOWriter() { + fs.close(); +} + +size_t FileIOWriter::operator()(void *ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); +} + + VecIndexPtr GetVecIndexFactory(const IndexType &type) { std::shared_ptr index; switch (type) { @@ -32,6 +81,10 @@ VecIndexPtr GetVecIndexFactory(const IndexType &type) { index = std::make_shared(0); break; } + case IndexType::FAISS_IVFFLAT_MIX: { + index = std::make_shared(0); + return std::make_shared(index); + } case IndexType::FAISS_IVFPQ_CPU: { index = std::make_shared(); break; @@ -44,15 +97,15 @@ VecIndexPtr GetVecIndexFactory(const IndexType &type) { index = std::make_shared(); break; } - //case IndexType::NSG: { // TODO(linxj): bug. - // index = std::make_shared(); - // break; - //} + //case IndexType::NSG: { // TODO(linxj): bug. + // index = std::make_shared(); + // break; + //} default: { return nullptr; } } - return std::make_shared(index); + return std::make_shared(index, type); } VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary) { @@ -61,6 +114,64 @@ VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::Bi return index; } +VecIndexPtr read_index(const std::string &location) { + knowhere::BinarySet load_data_list; + FileIOReader reader(location); + reader.fs.seekg(0, reader.fs.end); + size_t length = reader.fs.tellg(); + reader.fs.seekg(0); + + size_t rp = 0; + auto current_type = IndexType::INVALID; + reader(¤t_type, sizeof(current_type)); + rp += sizeof(current_type); + while (rp < length) { + size_t meta_length; + reader(&meta_length, sizeof(meta_length)); + rp += sizeof(meta_length); + reader.fs.seekg(rp); + + auto meta = new char[meta_length]; + reader(meta, meta_length); + rp += meta_length; + reader.fs.seekg(rp); + + size_t bin_length; + reader(&bin_length, sizeof(bin_length)); + rp += sizeof(bin_length); + reader.fs.seekg(rp); + + auto bin = new uint8_t[bin_length]; + reader(bin, bin_length); + rp += bin_length; + + auto binptr = std::make_shared(); + binptr.reset(bin); + load_data_list.Append(std::string(meta, meta_length), binptr, bin_length); + } + + return LoadVecIndex(current_type, load_data_list); +} + +void write_index(VecIndexPtr index, const std::string &location) { + auto binaryset = index->Serialize(); + auto index_type = index->GetType(); + + FileIOWriter writer(location); + writer(&index_type, sizeof(IndexType)); + for (auto &iter: binaryset.binary_map_) { + auto meta = iter.first.c_str(); + size_t meta_length = iter.first.length(); + writer(&meta_length, sizeof(meta_length)); + writer((void *) meta, meta_length); + + auto binary = iter.second; + int64_t binary_length = binary->size; + writer(&binary_length, sizeof(binary_length)); + writer((void *) binary->data.get(), binary_length); + } +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index 76c69537b5..a488922d9e 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -20,6 +20,18 @@ namespace engine { // TODO(linxj): jsoncons => rapidjson or other. using Config = zilliz::knowhere::Config; +enum class IndexType { + INVALID = 0, + FAISS_IDMAP = 1, + FAISS_IVFFLAT_CPU, + FAISS_IVFFLAT_GPU, + FAISS_IVFFLAT_MIX, // build on gpu and search on cpu + FAISS_IVFPQ_CPU, + FAISS_IVFPQ_GPU, + SPTAG_KDT_RNT_CPU, + //NSG, +}; + class VecIndex { public: virtual void BuildAll(const long &nb, @@ -40,6 +52,8 @@ class VecIndex { long *ids, const Config &cfg = Config()) = 0; + virtual IndexType GetType() = 0; + virtual int64_t Dimension() = 0; virtual int64_t Count() = 0; @@ -51,16 +65,9 @@ class VecIndex { using VecIndexPtr = std::shared_ptr; -enum class IndexType { - INVALID = 0, - FAISS_IDMAP = 1, - FAISS_IVFFLAT_CPU, - FAISS_IVFFLAT_GPU, - FAISS_IVFPQ_CPU, - FAISS_IVFPQ_GPU, - SPTAG_KDT_RNT_CPU, - //NSG, -}; +extern void write_index(VecIndexPtr index, const std::string &location); + +extern VecIndexPtr read_index(const std::string &location); extern VecIndexPtr GetVecIndexFactory(const IndexType &type); diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index c3123501d6..ca99a6899b 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit c3123501d62f69f9eacaa73ee96c0daeb24620a5 +Subproject commit ca99a6899be4e8a0806452656cf0f2be19d79c1a diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index b4f8feba03..30673dba1f 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -28,11 +28,37 @@ class KnowhereWrapperTest //auto generator = GetGenerateFactory(generator_type); auto generator = std::make_shared(); - generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids); + generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids, gt_dis); index_ = GetVecIndexFactory(index_type); } + void AssertResult(const std::vector &ids, const std::vector &dis) { + EXPECT_EQ(ids.size(), nq * k); + EXPECT_EQ(dis.size(), nq * k); + + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(ids[i * k], gt_ids[i * k]); + EXPECT_EQ(dis[i * k], gt_dis[i * k]); + } + + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (ids[i * nq + j] == gt_ids[i * nq + l]) match++; + } + } + } + + auto precision = float(match) / (nq * k); + EXPECT_GT(precision, 0.5); + std::cout << std::endl << "Precision: " << precision + << ", match: " << match + << ", total: " << nq * k + << std::endl; + } + protected: IndexType index_type; Config train_cfg; @@ -50,126 +76,88 @@ class KnowhereWrapperTest // Ground Truth std::vector gt_ids; + std::vector gt_dis; }; INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, Values( - // ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] + //["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] std::make_tuple(IndexType::FAISS_IVFFLAT_CPU, "Default", - 64, 10000, 10, 10, + 64, 100000, 10, 10, Config::object{{"nlist", 100}, {"dim", 64}}, - Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}} + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 10}} ), - std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", - 64, 10000, 10, 10, - Config::object{{"TPTNumber", 1}, {"dim", 64}}, + //std::make_tuple(IndexType::FAISS_IVFFLAT_GPU, "Default", + // 64, 10000, 10, 10, + // Config::object{{"nlist", 100}, {"dim", 64}}, + // Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 40}} + //), + std::make_tuple(IndexType::FAISS_IVFFLAT_MIX, "Default", + 64, 100000, 10, 10, + Config::object{{"nlist", 100}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 10}} + ), + std::make_tuple(IndexType::FAISS_IDMAP, "Default", + 64, 100000, 10, 10, + Config::object{{"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}} ) + //std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", + // 64, 10000, 10, 10, + // Config::object{{"TPTNumber", 1}, {"dim", 64}}, + // Config::object{{"dim", 64}, {"k", 10}} + //) ) ); -void AssertAnns(const std::vector >, - const std::vector &res, - const int &nq, - const int &k) { - EXPECT_EQ(res.size(), nq * k); - - for (auto i = 0; i < nq; i++) { - EXPECT_EQ(gt[i * k], res[i * k]); - } - - int match = 0; - for (int i = 0; i < nq; ++i) { - for (int j = 0; j < k; ++j) { - for (int l = 0; l < k; ++l) { - if (gt[i * nq + j] == res[i * nq + l]) match++; - } - } - } - - // TODO(linxj): percision check - EXPECT_GT(float(match/nq*k), 0.5); -} - TEST_P(KnowhereWrapperTest, base_test) { - std::vector res_ids; - float *D = new float[k * nq]; - res_ids.resize(nq * k); + EXPECT_EQ(index_->GetType(), index_type); + + auto elems = nq * k; + std::vector res_ids(elems); + std::vector res_dis(elems); index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); - index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); - AssertAnns(gt_ids, res_ids, nq, k); - delete[] D; + index_->Search(nq, xq.data(), res_dis.data(), res_ids.data(), search_cfg); + AssertResult(res_ids, res_dis); } -TEST_P(KnowhereWrapperTest, serialize_test) { - std::vector res_ids; - float *D = new float[k * nq]; - res_ids.resize(nq * k); +TEST_P(KnowhereWrapperTest, serialize) { + EXPECT_EQ(index_->GetType(), index_type); + auto elems = nq * k; + std::vector res_ids(elems); + std::vector res_dis(elems); index_->BuildAll(nb, xb.data(), ids.data(), train_cfg); - index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg); - AssertAnns(gt_ids, res_ids, nq, k); + index_->Search(nq, xq.data(), res_dis.data(), res_ids.data(), search_cfg); + AssertResult(res_ids, res_dis); { - auto binaryset = index_->Serialize(); - //int fileno = 0; - //const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; - //std::vector filename_list; - //std::vector> meta_list; - //for (auto &iter: binaryset.binary_map_) { - // const std::string &filename = base_name + std::to_string(fileno); - // FileIOWriter writer(filename); - // writer(iter.second->data.get(), iter.second->size); - // - // meta_list.push_back(std::make_pair(iter.first, iter.second.size)); - // filename_list.push_back(filename); - // ++fileno; - //} - // - //BinarySet load_data_list; - //for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { - // auto bin_size = meta_list[i].second; - // FileIOReader reader(filename_list[i]); - // std::vector load_data(bin_size); - // reader(load_data.data(), bin_size); - // load_data_list.Append(meta_list[i].first, load_data); - //} + auto binary = index_->Serialize(); + auto type = index_->GetType(); + auto new_index = GetVecIndexFactory(type); + new_index->Load(binary); + EXPECT_EQ(new_index->Dimension(), index_->Dimension()); + EXPECT_EQ(new_index->Count(), index_->Count()); - int fileno = 0; - std::vector filename_list; - const std::string &base_name = "/tmp/wrapper_serialize_test_bin_"; - std::vector> meta_list; - for (auto &iter: binaryset.binary_map_) { - const std::string &filename = base_name + std::to_string(fileno); - FileIOWriter writer(filename); - writer(iter.second->data.get(), iter.second->size); - - meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); - filename_list.push_back(filename); - ++fileno; - } - - BinarySet load_data_list; - for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { - auto bin_size = meta_list[i].second; - FileIOReader reader(filename_list[i]); - - auto load_data = new uint8_t[bin_size]; - reader(load_data, bin_size); - auto data = std::make_shared(); - data.reset(load_data); - load_data_list.Append(meta_list[i].first, data, bin_size); - } - - - res_ids.clear(); - res_ids.resize(nq * k); - auto new_index = GetVecIndexFactory(index_type); - new_index->Load(load_data_list); - new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg); - AssertAnns(gt_ids, res_ids, nq, k); + std::vector res_ids(elems); + std::vector res_dis(elems); + new_index->Search(nq, xq.data(), res_dis.data(), res_ids.data(), search_cfg); + AssertResult(res_ids, res_dis); } - delete[] D; + { + std::string file_location = "/tmp/whatever"; + write_index(index_, file_location); + auto new_index = read_index(file_location); + EXPECT_EQ(new_index->GetType(), index_type); + EXPECT_EQ(new_index->Dimension(), index_->Dimension()); + EXPECT_EQ(new_index->Count(), index_->Count()); + + std::vector res_ids(elems); + std::vector res_dis(elems); + new_index->Search(nq, xq.data(), res_dis.data(), res_ids.data(), search_cfg); + AssertResult(res_ids, res_dis); + } } + diff --git a/cpp/unittest/index_wrapper/utils.cpp b/cpp/unittest/index_wrapper/utils.cpp index e228ae001d..ede5dd0485 100644 --- a/cpp/unittest/index_wrapper/utils.cpp +++ b/cpp/unittest/index_wrapper/utils.cpp @@ -19,7 +19,7 @@ DataGenPtr GetGenerateFactory(const std::string &gen_type) { void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, - const int &k, long *gt_ids) { + const int &k, long *gt_ids, float *gt_dis) { for (auto i = 0; i < nb; ++i) { for (auto j = 0; j < dim; ++j) { //p_data[i * d + j] = float(base + i); @@ -35,8 +35,7 @@ void DataGenBase::GenData(const int &dim, const int &nb, const int &nq, faiss::IndexFlatL2 index(dim); //index.add_with_ids(nb, xb, ids); index.add(nb, xb); - float *D = new float[k * nq]; - index.search(nq, xq, k, D, gt_ids); + index.search(nq, xq, k, gt_dis, gt_ids); } void DataGenBase::GenData(const int &dim, @@ -46,36 +45,12 @@ void DataGenBase::GenData(const int &dim, std::vector &xq, std::vector &ids, const int &k, - std::vector >_ids) { + std::vector >_ids, + std::vector >_dis) { xb.resize(nb * dim); xq.resize(nq * dim); ids.resize(nb); gt_ids.resize(nq * k); - GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data()); -} - -FileIOReader::FileIOReader(const std::string &fname) { - name = fname; - fs = std::fstream(name, std::ios::in | std::ios::binary); -} - -FileIOReader::~FileIOReader() { - fs.close(); -} - -size_t FileIOReader::operator()(void *ptr, size_t size) { - fs.read(reinterpret_cast(ptr), size); -} - -FileIOWriter::FileIOWriter(const std::string &fname) { - name = fname; - fs = std::fstream(name, std::ios::out | std::ios::binary); -} - -FileIOWriter::~FileIOWriter() { - fs.close(); -} - -size_t FileIOWriter::operator()(void *ptr, size_t size) { - fs.write(reinterpret_cast(ptr), size); + gt_dis.resize(nq * k); + GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data(), gt_dis.data()); } diff --git a/cpp/unittest/index_wrapper/utils.h b/cpp/unittest/index_wrapper/utils.h index bbc52a011b..ce3c428d68 100644 --- a/cpp/unittest/index_wrapper/utils.h +++ b/cpp/unittest/index_wrapper/utils.h @@ -23,7 +23,7 @@ extern DataGenPtr GetGenerateFactory(const std::string &gen_type); class DataGenBase { public: virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, - const int &k, long *gt_ids); + const int &k, long *gt_ids, float *gt_dis); virtual void GenData(const int &dim, const int &nb, @@ -32,30 +32,14 @@ class DataGenBase { std::vector &xq, std::vector &ids, const int &k, - std::vector >_ids); + std::vector >_ids, + std::vector >_dis); }; -class SanityCheck : public DataGenBase { - public: - void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, - const int &k, long *gt_ids) override; -}; +//class SanityCheck : public DataGenBase { +// public: +// void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids, +// const int &k, long *gt_ids, float *gt_dis) override; +//}; -struct FileIOWriter { - std::fstream fs; - std::string name; - - FileIOWriter(const std::string &fname); - ~FileIOWriter(); - size_t operator()(void *ptr, size_t size); -}; - -struct FileIOReader { - std::fstream fs; - std::string name; - - FileIOReader(const std::string &fname); - ~FileIOReader(); - size_t operator()(void *ptr, size_t size); -}; From 85e8a2b12b64eb1a46be8ed0713147dd0a89bc93 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 10 Jul 2019 16:48:09 +0800 Subject: [PATCH 63/91] fix unittest Former-commit-id: 92e8d283ff8170584f62db8b94f604a8bb094281 --- cpp/unittest/server/cache_test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index 4b40861d0b..9424563cad 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -38,6 +38,10 @@ public: } + engine::IndexType GetType() override { + return engine::IndexType::INVALID; + } + virtual void Add(const long &nb, const float *xb, const long *ids, From 72ffedebd78b33740db1d80907f6d687b12af51b Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 10 Jul 2019 17:14:28 +0800 Subject: [PATCH 64/91] add email notify function Former-commit-id: b5813c2a1916b72aa23f2026d95fd1023d5406a7 --- ci/jenkinsfile/notify.groovy | 15 +++++++++++++++ ci/main_jenkinsfile | 6 ++++++ 2 files changed, 21 insertions(+) create mode 100644 ci/jenkinsfile/notify.groovy diff --git a/ci/jenkinsfile/notify.groovy b/ci/jenkinsfile/notify.groovy new file mode 100644 index 0000000000..0a257b8cd8 --- /dev/null +++ b/ci/jenkinsfile/notify.groovy @@ -0,0 +1,15 @@ +def notify() { + if (!currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { + // Send an email only if the build status has changed from green/unstable to red + emailext subject: '$DEFAULT_SUBJECT', + body: '$DEFAULT_CONTENT', + recipientProviders: [ + [$class: 'DevelopersRecipientProvider'], + [$class: 'RequesterRecipientProvider'] + ], + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS' + } +} +return this + diff --git a/ci/main_jenkinsfile b/ci/main_jenkinsfile index c144c46685..5c0fde587b 100644 --- a/ci/main_jenkinsfile +++ b/ci/main_jenkinsfile @@ -232,6 +232,12 @@ spec: } post { + always { + script { + def notify = load "${env.WORKSPACE}/ci/jenkinsfile/notify.groovy" + notify.notify() + } + } success { script { updateGitlabCommitStatus name: 'CI/CD', state: 'success' From b4abf92d54670a6bea169343cc28ff9fe304bcb2 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 10 Jul 2019 17:43:08 +0800 Subject: [PATCH 65/91] send email on master node Former-commit-id: 07518d892cb27efe82ac4d181594a31a67795e79 --- ci/main_jenkinsfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/main_jenkinsfile b/ci/main_jenkinsfile index 5c0fde587b..2783e84688 100644 --- a/ci/main_jenkinsfile +++ b/ci/main_jenkinsfile @@ -234,8 +234,10 @@ spec: post { always { script { - def notify = load "${env.WORKSPACE}/ci/jenkinsfile/notify.groovy" - notify.notify() + node('master') { + def notify = load "${env.WORKSPACE}/ci/jenkinsfile/notify.groovy" + notify.notify() + } } } success { From 7cabf0f1ead5a2eaff9e987bcf9c8a0e712f711c Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 10 Jul 2019 17:46:29 +0800 Subject: [PATCH 66/91] send email on master node Former-commit-id: b363e112ef2fcd06de24cd203b366696371d80a4 --- ci/main_jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/main_jenkinsfile b/ci/main_jenkinsfile index 2783e84688..2ab4344c01 100644 --- a/ci/main_jenkinsfile +++ b/ci/main_jenkinsfile @@ -234,7 +234,7 @@ spec: post { always { script { - node('master') { + node { def notify = load "${env.WORKSPACE}/ci/jenkinsfile/notify.groovy" notify.notify() } From e3ec2ac26a37d1d2a39a8b304c2db2a519fe8912 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 10 Jul 2019 17:50:22 +0800 Subject: [PATCH 67/91] send email on master node Former-commit-id: 2fab82c23cfd1a93e1df61686f54f11352a2c463 --- ci/main_jenkinsfile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ci/main_jenkinsfile b/ci/main_jenkinsfile index 2ab4344c01..1c7d8be75c 100644 --- a/ci/main_jenkinsfile +++ b/ci/main_jenkinsfile @@ -234,12 +234,20 @@ spec: post { always { script { - node { - def notify = load "${env.WORKSPACE}/ci/jenkinsfile/notify.groovy" - notify.notify() + if (!currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { + // Send an email only if the build status has changed from green/unstable to red + emailext subject: '$DEFAULT_SUBJECT', + body: '$DEFAULT_CONTENT', + recipientProviders: [ + [$class: 'DevelopersRecipientProvider'], + [$class: 'RequesterRecipientProvider'] + ], + replyTo: '$DEFAULT_REPLYTO', + to: '$DEFAULT_RECIPIENTS' } } } + success { script { updateGitlabCommitStatus name: 'CI/CD', state: 'success' From 027ee4ea9319e76da2d345858eebeb1714aabdd5 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Wed, 10 Jul 2019 17:54:47 +0800 Subject: [PATCH 68/91] MS-202 - Add Milvus Jenkins project email notification Former-commit-id: 951c5314c90dced332c32254c44d4eefef4adb9a --- CHANGELOGS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOGS.md b/CHANGELOGS.md index def4965a41..d9ef3d4c36 100644 --- a/CHANGELOGS.md +++ b/CHANGELOGS.md @@ -16,3 +16,4 @@ Please mark all change in change log and use the ticket from JIRA. - MS-1 - Add CHANGELOG.md - MS-161 - Add CI / CD Module to Milvus Project +- MS-202 - Add Milvus Jenkins project email notification From ad82f4bd7be2d7ec9c002e91b00ff4a18e0c6eb0 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 10 Jul 2019 17:25:11 +0800 Subject: [PATCH 69/91] add test-bin Former-commit-id: 920180954eeba02697bc61e9c9b69a13629b0e90 --- cpp/unittest/index_wrapper/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/index_wrapper/CMakeLists.txt b/cpp/unittest/index_wrapper/CMakeLists.txt index 51bd97b575..1f02464ad9 100644 --- a/cpp/unittest/index_wrapper/CMakeLists.txt +++ b/cpp/unittest/index_wrapper/CMakeLists.txt @@ -20,4 +20,6 @@ set(knowhere_libs ) add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper}) -target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) \ No newline at end of file +target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs}) + +install(TARGETS knowhere_test DESTINATION bin) \ No newline at end of file From 0a7288c2e31d06e7e13a2c5086e586efdb864ed2 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Fri, 12 Jul 2019 14:26:16 +0800 Subject: [PATCH 70/91] add helm stable repo Former-commit-id: e7958b9d69a0ae9f534e0b2288b92424b398970e --- ci/jenkinsfile/deploy2dev.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/jenkinsfile/deploy2dev.groovy b/ci/jenkinsfile/deploy2dev.groovy index 6e4a23cfe7..47497e1648 100644 --- a/ci/jenkinsfile/deploy2dev.groovy +++ b/ci/jenkinsfile/deploy2dev.groovy @@ -1,5 +1,5 @@ try { - sh 'helm init --client-only --skip-refresh' + sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts' sh 'helm repo add milvus https://registry.zilliz.com/chartrepo/milvus' sh 'helm repo update' sh "helm install --set engine.image.repository=registry.zilliz.com/${PROJECT_NAME}/engine --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.JOB_NAME}-${env.BUILD_NUMBER} --version 0.3.0 milvus/milvus-gpu" From 46dbce34adcf7eab92f0f402a485f830aea7c1d9 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 15 Jul 2019 19:24:20 +0800 Subject: [PATCH 71/91] avoid background merge thread stop Former-commit-id: cc6ff30a93f8f6afdd662b7459dbb0db63ef45d6 --- cpp/src/db/DBImpl.cpp | 5 ++--- cpp/src/db/DBImpl.h | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 09a7c72201..c6365bc38b 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -229,7 +229,6 @@ void DBImpl::BackgroundTimerTask() { Status status; server::SystemInfo::GetInstance().Init(); while (true) { - if (!bg_error_.ok()) break; if (shutting_down_.load(std::memory_order_acquire)){ for(auto& iter : compact_thread_results_) { iter.wait(); @@ -392,7 +391,7 @@ void DBImpl::BackgroundCompaction(std::set table_ids) { for (auto& table_id : table_ids) { status = BackgroundMergeFiles(table_id); if (!status.ok()) { - bg_error_ = status; + ENGINE_LOG_ERROR << "Merge files for table " << table_id << " failed: " << status.ToString(); return; } } @@ -498,7 +497,7 @@ void DBImpl::BackgroundBuildIndex() { /* ENGINE_LOG_DEBUG << "Buiding index for " << file.location; */ status = BuildIndex(file); if (!status.ok()) { - bg_error_ = status; + ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString(); return; } diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 5601f1a33b..012a445ef1 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -114,10 +114,8 @@ class DBImpl : public DB { BuildIndex(const meta::TableFileSchema &); private: - const Options options_; - Status bg_error_; std::atomic shutting_down_; std::thread bg_timer_thread_; From d332dadf9b3b5b807eb810e591b264f8cf338bbd Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Tue, 16 Jul 2019 15:25:33 +0800 Subject: [PATCH 72/91] add some log Former-commit-id: c5432276b5ade9bb1dce687ea1cfe04840078cbb --- cpp/src/db/ExecutionEngineImpl.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 64aabb3777..ead97dfe38 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -37,14 +37,17 @@ VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { std::shared_ptr index; switch (type) { case EngineType::FAISS_IDMAP: { + ENGINE_LOG_DEBUG << "Build Index: IDMAP"; index = GetVecIndexFactory(IndexType::FAISS_IDMAP); break; } case EngineType::FAISS_IVFFLAT_GPU: { + ENGINE_LOG_DEBUG << "Build Index: IVFMIX"; index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_MIX); break; } case EngineType::FAISS_IVFFLAT_CPU: { + ENGINE_LOG_DEBUG << "Build Index: IVFCPU"; index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU); break; } @@ -135,6 +138,7 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { auto from_index = std::dynamic_pointer_cast(index_); auto to_index = CreatetVecIndex(build_type); + ENGINE_LOG_DEBUG << "Build Params: [gpu_id] " << gpu_num; to_index->BuildAll(Count(), from_index->GetRawVectors(), from_index->GetRawIds(), @@ -148,6 +152,7 @@ Status ExecutionEngineImpl::Search(long n, long k, float *distances, long *labels) const { + ENGINE_LOG_DEBUG << "Search Params: [k] " << k << " [nprobe] " << nprobe_; index_->Search(n, data, distances, labels, Config::object{{"k", k}, {"nprobe", nprobe_}}); return Status::OK(); } @@ -169,7 +174,7 @@ Status ExecutionEngineImpl::Init() { } case EngineType::FAISS_IVFFLAT_CPU: { ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); - nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000); + nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1); break; } } From 4948e47b67322fb4c91e6df468ee4373788e8416 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 17 Jul 2019 10:54:04 +0800 Subject: [PATCH 73/91] MS-240 add Log Former-commit-id: 0f779db21c49adb3d09b1868ac2ee8de00d35bd0 --- cpp/src/db/DBMetaImpl.cpp | 1 + cpp/src/db/EngineFactory.cpp | 1 + cpp/src/db/ExecutionEngineImpl.cpp | 4 +--- cpp/src/server/RequestTask.cpp | 1 + cpp/src/wrapper/knowhere/vec_impl.cpp | 5 ++++- cpp/src/wrapper/knowhere/wrapper_log.h | 28 ++++++++++++++++++++++++++ 6 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 cpp/src/wrapper/knowhere/wrapper_log.h diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index d13899dca0..2f88250a42 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -388,6 +388,7 @@ Status DBMetaImpl::CreateTableFile(TableFileSchema &file_schema) { file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; + ENGINE_LOG_DEBUG << "CreateTableFile EngineTypee: " << table_schema.engine_type_; GetTableFilePath(file_schema); auto id = ConnectorPtr->insert(file_schema); diff --git a/cpp/src/db/EngineFactory.cpp b/cpp/src/db/EngineFactory.cpp index 3389c0a07e..f05e487f38 100644 --- a/cpp/src/db/EngineFactory.cpp +++ b/cpp/src/db/EngineFactory.cpp @@ -53,6 +53,7 @@ EngineFactory::Build(uint16_t dimension, return nullptr; } + ENGINE_LOG_DEBUG << "EngineFactory EngineTypee: " << int(type); ExecutionEnginePtr execution_engine_ptr = std::make_shared(dimension, location, type); diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index ead97dfe38..5927d09399 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -37,17 +37,14 @@ VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { std::shared_ptr index; switch (type) { case EngineType::FAISS_IDMAP: { - ENGINE_LOG_DEBUG << "Build Index: IDMAP"; index = GetVecIndexFactory(IndexType::FAISS_IDMAP); break; } case EngineType::FAISS_IVFFLAT_GPU: { - ENGINE_LOG_DEBUG << "Build Index: IVFMIX"; index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_MIX); break; } case EngineType::FAISS_IVFFLAT_CPU: { - ENGINE_LOG_DEBUG << "Build Index: IVFCPU"; index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU); break; } @@ -137,6 +134,7 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; auto from_index = std::dynamic_pointer_cast(index_); + ENGINE_LOG_DEBUG << "BuildIndex EngineTypee: " << int(build_type); auto to_index = CreatetVecIndex(build_type); ENGINE_LOG_DEBUG << "Build Params: [gpu_id] " << gpu_num; to_index->BuildAll(Count(), diff --git a/cpp/src/server/RequestTask.cpp b/cpp/src/server/RequestTask.cpp index 51d5404107..555be29042 100644 --- a/cpp/src/server/RequestTask.cpp +++ b/cpp/src/server/RequestTask.cpp @@ -155,6 +155,7 @@ ServerError CreateTableTask::OnExecute() { } res = ValidateTableIndexType(schema_.index_type); + SERVER_LOG_DEBUG << "Createtbale EngineTypee: " << schema_.index_type; if(res != SERVER_SUCCESS) { return res; } diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index d50bfe34da..4ca48bfe12 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -10,6 +10,7 @@ #include "vec_impl.h" #include "data_transfer.h" +#include "wrapper_log.h" namespace zilliz { @@ -138,6 +139,8 @@ void IVFMixIndex::BuildAll(const long &nb, const Config &cfg, const long &nt, const float *xt) { + WRAPPER_LOG_DEBUG << "Get Into Build IVFMIX"; + dim = cfg["dim"].as(); auto dataset = GenDatasetWithIds(nb, dim, xb, ids); @@ -153,7 +156,7 @@ void IVFMixIndex::BuildAll(const long &nb, auto host_index = device_index->Copy_index_gpu_to_cpu(); index_ = host_index; } else { - // TODO(linxj): LOG ERROR + WRAPPER_LOG_ERROR << "Build IVFMIXIndex Failed"; } } diff --git a/cpp/src/wrapper/knowhere/wrapper_log.h b/cpp/src/wrapper/knowhere/wrapper_log.h new file mode 100644 index 0000000000..39ca78092b --- /dev/null +++ b/cpp/src/wrapper/knowhere/wrapper_log.h @@ -0,0 +1,28 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include + +namespace zilliz { +namespace milvus { +namespace engine { + +#define WRAPPER_DOMAIN_NAME "[WRAPPER] " +#define WRAPPER_ERROR_TEXT "WRAPPER Error:" + +#define WRAPPER_LOG_TRACE LOG(TRACE) << WRAPPER_DOMAIN_NAME +#define WRAPPER_LOG_DEBUG LOG(DEBUG) << WRAPPER_DOMAIN_NAME +#define WRAPPER_LOG_INFO LOG(INFO) << WRAPPER_DOMAIN_NAME +#define WRAPPER_LOG_WARNING LOG(WARNING) << WRAPPER_DOMAIN_NAME +#define WRAPPER_LOG_ERROR LOG(ERROR) << WRAPPER_DOMAIN_NAME +#define WRAPPER_LOG_FATAL LOG(FATAL) << WRAPPER_DOMAIN_NAME + +} +} +} + From b6a3f7657a61164cfefcbcd887ac971e84ca4d99 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 17 Jul 2019 11:40:09 +0800 Subject: [PATCH 74/91] MS-240 fix unitttest build error Former-commit-id: a8b8dfbc46e01ac2651c74a7b41d0298557eef57 --- cpp/unittest/index_wrapper/knowhere_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index 30673dba1f..928d61d00c 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -5,11 +5,13 @@ //////////////////////////////////////////////////////////////////////////////// #include +#include #include #include "utils.h" +INITIALIZE_EASYLOGGINGPP using namespace zilliz::milvus::engine; using namespace zilliz::knowhere; From d15858e5109ef3bdde65a3396071ad6da8cbf1c0 Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 17 Jul 2019 18:51:09 +0800 Subject: [PATCH 75/91] update knowhere and fix bug Former-commit-id: 0b12cc8353bf063ef309b2ffd2bc5f13ebb83634 --- cpp/thirdparty/knowhere | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index ca99a6899b..922081c142 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit ca99a6899be4e8a0806452656cf0f2be19d79c1a +Subproject commit 922081c14234e8ddab8020e07b3a39a593010095 From 606ec47ec5bc4f34b9d208d4dbe0c53ed10254c6 Mon Sep 17 00:00:00 2001 From: zhiru Date: Wed, 17 Jul 2019 18:57:11 +0800 Subject: [PATCH 76/91] update knowhere Former-commit-id: 9ddf5b059cce6c8f5f6e4a8eda38b867992e2621 --- cpp/thirdparty/knowhere | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 922081c142..afaf652827 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 922081c14234e8ddab8020e07b3a39a593010095 +Subproject commit afaf65282737514e232bf477aacb2772a4d32d5d From 651f642d3182a5cb8415e6d3221ee022b1bcaf0c Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Sun, 21 Jul 2019 16:10:26 +0800 Subject: [PATCH 77/91] MS-259 add exception Former-commit-id: 8f6b7b9c2e178146d0db55d14a2b716999d8dc0a --- cpp/src/db/ExecutionEngineImpl.cpp | 71 ++++-- cpp/src/utils/Error.h | 6 + cpp/src/wrapper/knowhere/vec_impl.cpp | 247 ++++++++++++------- cpp/src/wrapper/knowhere/vec_impl.h | 46 ++-- cpp/src/wrapper/knowhere/vec_index.cpp | 39 +-- cpp/src/wrapper/knowhere/vec_index.h | 36 +-- cpp/thirdparty/knowhere | 2 +- cpp/unittest/index_wrapper/knowhere_test.cpp | 4 + 8 files changed, 291 insertions(+), 160 deletions(-) diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 5927d09399..63ed00d29e 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -3,6 +3,8 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ +#include + #include #include #include "Log.h" @@ -11,6 +13,8 @@ #include "ExecutionEngineImpl.h" #include "wrapper/knowhere/vec_index.h" #include "wrapper/knowhere/vec_impl.h" +#include "knowhere/common/exception.h" +#include "Exception.h" namespace zilliz { @@ -21,9 +25,13 @@ ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string &location, EngineType type) : location_(location), dim(dimension), build_type(type) { - index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); current_type = EngineType::FAISS_IDMAP; - std::static_pointer_cast(index_)->Build(dimension); + + index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); + if (!index_) throw Exception("Create Empty VecIndex"); + + auto ec = std::static_pointer_cast(index_)->Build(dimension); + if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } } ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, @@ -61,7 +69,10 @@ VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { } Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) { - index_->Add(n, xdata, xids, Config::object{{"dim", dim}}); + auto ec = index_->Add(n, xdata, xids, Config::object{{"dim", dim}}); + if (ec != server::KNOWHERE_SUCCESS) { + return Status::Error("Add error"); + } return Status::OK(); } @@ -82,7 +93,10 @@ size_t ExecutionEngineImpl::PhysicalSize() const { } Status ExecutionEngineImpl::Serialize() { - write_index(index_, location_); + auto ec = write_index(index_, location_); + if (ec != server::KNOWHERE_SUCCESS) { + return Status::Error("Serialize: write to disk error"); + } return Status::OK(); } @@ -91,9 +105,16 @@ Status ExecutionEngineImpl::Load() { bool to_cache = false; auto start_time = METRICS_NOW_TIME; if (!index_) { - index_ = read_index(location_); - to_cache = true; - ENGINE_LOG_DEBUG << "Disk io from: " << location_; + try { + index_ = read_index(location_); + to_cache = true; + ENGINE_LOG_DEBUG << "Disk io from: " << location_; + } catch (knowhere::KnowhereException &e) { + ENGINE_LOG_ERROR << e.what(); + return Status::Error(e.what()); + } catch (std::exception &e) { + return Status::Error(e.what()); + } } if (to_cache) { @@ -118,11 +139,22 @@ Status ExecutionEngineImpl::Merge(const std::string &location) { auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location); if (!to_merge) { - to_merge = read_index(location); + try { + to_merge = read_index(location); + } catch (knowhere::KnowhereException &e) { + ENGINE_LOG_ERROR << e.what(); + return Status::Error(e.what()); + } catch (std::exception &e) { + return Status::Error(e.what()); + } } if (auto file_index = std::dynamic_pointer_cast(to_merge)) { - index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); + auto ec = index_->Add(file_index->Count(), file_index->GetRawVectors(), file_index->GetRawIds()); + if (ec != server::KNOWHERE_SUCCESS) { + ENGINE_LOG_ERROR << "Merge: Add Error"; + return Status::Error("Merge: Add Error"); + } return Status::OK(); } else { return Status::Error("file index type is not idmap"); @@ -134,13 +166,16 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; auto from_index = std::dynamic_pointer_cast(index_); - ENGINE_LOG_DEBUG << "BuildIndex EngineTypee: " << int(build_type); auto to_index = CreatetVecIndex(build_type); - ENGINE_LOG_DEBUG << "Build Params: [gpu_id] " << gpu_num; - to_index->BuildAll(Count(), - from_index->GetRawVectors(), - from_index->GetRawIds(), - Config::object{{"dim", Dimension()}, {"gpu_id", gpu_num}}); + if (!to_index) { + throw Exception("Create Empty VecIndex"); + } + + auto ec = to_index->BuildAll(Count(), + from_index->GetRawVectors(), + from_index->GetRawIds(), + Config::object{{"dim", Dimension()}, {"gpu_id", gpu_num}}); + if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } return std::make_shared(to_index, location, build_type); } @@ -151,7 +186,11 @@ Status ExecutionEngineImpl::Search(long n, float *distances, long *labels) const { ENGINE_LOG_DEBUG << "Search Params: [k] " << k << " [nprobe] " << nprobe_; - index_->Search(n, data, distances, labels, Config::object{{"k", k}, {"nprobe", nprobe_}}); + auto ec = index_->Search(n, data, distances, labels, Config::object{{"k", k}, {"nprobe", nprobe_}}); + if (ec != server::KNOWHERE_SUCCESS) { + ENGINE_LOG_ERROR << "Search error"; + return Status::Error("Search: Search Error"); + } return Status::OK(); } diff --git a/cpp/src/utils/Error.h b/cpp/src/utils/Error.h index 8c4da70339..82b22d57e7 100644 --- a/cpp/src/utils/Error.h +++ b/cpp/src/utils/Error.h @@ -54,6 +54,12 @@ constexpr ServerError SERVER_LICENSE_VALIDATION_FAIL = ToGlobalServerErrorCode(5 constexpr ServerError DB_META_TRANSACTION_FAILED = ToGlobalServerErrorCode(1000); +using KnowhereError = int32_t; +constexpr KnowhereError KNOWHERE_SUCCESS = 0; +constexpr KnowhereError KNOWHERE_ERROR = ToGlobalServerErrorCode(1); +constexpr KnowhereError KNOWHERE_INVALID_ARGUMENT = ToGlobalServerErrorCode(2); +constexpr KnowhereError KNOWHERE_UNEXPECTED_ERROR = ToGlobalServerErrorCode(3); + class ServerException : public std::exception { public: ServerException(ServerError error_code, diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index 4ca48bfe12..f0bcd30f43 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -7,6 +7,7 @@ #include #include "knowhere/index/vector_index/idmap.h" #include "knowhere/index/vector_index/gpu_ivf.h" +#include "knowhere/common/exception.h" #include "vec_impl.h" #include "data_transfer.h" @@ -19,77 +20,110 @@ namespace engine { using namespace zilliz::knowhere; -void VecIndexImpl::BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) { - dim = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, dim, xb, ids); +server::KnowhereError VecIndexImpl::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + try { + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); - auto preprocessor = index_->BuildPreprocessor(dataset, cfg); - index_->set_preprocessor(preprocessor); - auto nlist = int(nb / 1000000.0 * 16384); - auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; - auto model = index_->Train(dataset, cfg_t); - index_->set_index_model(model); - index_->Add(dataset, cfg); + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); + auto nlist = int(nb / 1000000.0 * 16384); + auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; + auto model = index_->Train(dataset, cfg_t); + index_->set_index_model(model); + index_->Add(dataset, cfg); + } catch (KnowhereException &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_UNEXPECTED_ERROR; + } catch (jsoncons::json_exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_INVALID_ARGUMENT; + } catch (std::exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_ERROR; + } + return server::KNOWHERE_SUCCESS; } -void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { - // TODO(linxj): Assert index is trained; +server::KnowhereError VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { + try { + auto d = cfg.get_with_default("dim", dim); + auto dataset = GenDatasetWithIds(nb, d, xb, ids); - auto d = cfg.get_with_default("dim", dim); - auto dataset = GenDatasetWithIds(nb, d, xb, ids); - - index_->Add(dataset, cfg); + index_->Add(dataset, cfg); + } catch (KnowhereException &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_UNEXPECTED_ERROR; + } catch (jsoncons::json_exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_INVALID_ARGUMENT; + } catch (std::exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_ERROR; + } + return server::KNOWHERE_SUCCESS; } -void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { - // TODO: Assert index is trained; +server::KnowhereError VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { + try { + auto k = cfg["k"].as(); + auto d = cfg.get_with_default("dim", dim); + auto dataset = GenDataset(nq, d, xq); - auto k = cfg["k"].as(); - auto d = cfg.get_with_default("dim", dim); - auto dataset = GenDataset(nq, d, xq); + Config search_cfg; + auto res = index_->Search(dataset, cfg); + auto ids_array = res->array()[0]; + auto dis_array = res->array()[1]; - Config search_cfg; - auto res = index_->Search(dataset, cfg); - auto ids_array = res->array()[0]; - auto dis_array = res->array()[1]; + //{ + // auto& ids = ids_array; + // auto& dists = dis_array; + // std::stringstream ss_id; + // std::stringstream ss_dist; + // for (auto i = 0; i < 10; i++) { + // for (auto j = 0; j < k; ++j) { + // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; + // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; + // } + // ss_id << std::endl; + // ss_dist << std::endl; + // } + // std::cout << "id\n" << ss_id.str() << std::endl; + // std::cout << "dist\n" << ss_dist.str() << std::endl; + //} - //{ - // auto& ids = ids_array; - // auto& dists = dis_array; - // std::stringstream ss_id; - // std::stringstream ss_dist; - // for (auto i = 0; i < 10; i++) { - // for (auto j = 0; j < k; ++j) { - // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; - // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; - // } - // ss_id << std::endl; - // ss_dist << std::endl; - // } - // std::cout << "id\n" << ss_id.str() << std::endl; - // std::cout << "dist\n" << ss_dist.str() << std::endl; - //} + auto p_ids = ids_array->data()->GetValues(1, 0); + auto p_dist = dis_array->data()->GetValues(1, 0); - auto p_ids = ids_array->data()->GetValues(1, 0); - auto p_dist = dis_array->data()->GetValues(1, 0); - - // TODO(linxj): avoid copy here. - memcpy(ids, p_ids, sizeof(int64_t) * nq * k); - memcpy(dist, p_dist, sizeof(float) * nq * k); + // TODO(linxj): avoid copy here. + memcpy(ids, p_ids, sizeof(int64_t) * nq * k); + memcpy(dist, p_dist, sizeof(float) * nq * k); + } catch (KnowhereException &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_UNEXPECTED_ERROR; + } catch (jsoncons::json_exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_INVALID_ARGUMENT; + } catch (std::exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_ERROR; + } + return server::KNOWHERE_SUCCESS; } zilliz::knowhere::BinarySet VecIndexImpl::Serialize() { return index_->Serialize(); } -void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { +server::KnowhereError VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) { index_->Load(index_binary); dim = Dimension(); + return server::KNOWHERE_SUCCESS; } int64_t VecIndexImpl::Dimension() { @@ -114,56 +148,91 @@ int64_t *BFIndex::GetRawIds() { return std::static_pointer_cast(index_)->GetRawIds(); } -void BFIndex::Build(const int64_t &d) { - dim = d; - std::static_pointer_cast(index_)->Train(dim); +server::KnowhereError BFIndex::Build(const int64_t &d) { + try { + dim = d; + std::static_pointer_cast(index_)->Train(dim); + } catch (KnowhereException &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_UNEXPECTED_ERROR; + } catch (jsoncons::json_exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_INVALID_ARGUMENT; + } catch (std::exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_ERROR; + } + return server::KNOWHERE_SUCCESS; } -void BFIndex::BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) { - dim = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, dim, xb, ids); +server::KnowhereError BFIndex::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + try { + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); - std::static_pointer_cast(index_)->Train(dim); - index_->Add(dataset, cfg); + std::static_pointer_cast(index_)->Train(dim); + index_->Add(dataset, cfg); + } catch (KnowhereException &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_UNEXPECTED_ERROR; + } catch (jsoncons::json_exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_INVALID_ARGUMENT; + } catch (std::exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_ERROR; + } + return server::KNOWHERE_SUCCESS; } // TODO(linxj): add lock here. -void IVFMixIndex::BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) { - WRAPPER_LOG_DEBUG << "Get Into Build IVFMIX"; +server::KnowhereError IVFMixIndex::BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) { + try { + dim = cfg["dim"].as(); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); - dim = cfg["dim"].as(); - auto dataset = GenDatasetWithIds(nb, dim, xb, ids); + auto preprocessor = index_->BuildPreprocessor(dataset, cfg); + index_->set_preprocessor(preprocessor); + auto nlist = int(nb / 1000000.0 * 16384); + auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; + auto model = index_->Train(dataset, cfg_t); + index_->set_index_model(model); + index_->Add(dataset, cfg); - auto preprocessor = index_->BuildPreprocessor(dataset, cfg); - index_->set_preprocessor(preprocessor); - auto nlist = int(nb / 1000000.0 * 16384); - auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; - auto model = index_->Train(dataset, cfg_t); - index_->set_index_model(model); - index_->Add(dataset, cfg); - - if (auto device_index = std::dynamic_pointer_cast(index_)) { - auto host_index = device_index->Copy_index_gpu_to_cpu(); - index_ = host_index; - } else { - WRAPPER_LOG_ERROR << "Build IVFMIXIndex Failed"; + if (auto device_index = std::dynamic_pointer_cast(index_)) { + auto host_index = device_index->Copy_index_gpu_to_cpu(); + index_ = host_index; + } else { + WRAPPER_LOG_ERROR << "Build IVFMIXIndex Failed"; + } + } catch (KnowhereException &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_UNEXPECTED_ERROR; + } catch (jsoncons::json_exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_INVALID_ARGUMENT; + } catch (std::exception &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_ERROR; } + return server::KNOWHERE_SUCCESS; } -void IVFMixIndex::Load(const zilliz::knowhere::BinarySet &index_binary) { +server::KnowhereError IVFMixIndex::Load(const zilliz::knowhere::BinarySet &index_binary) { index_ = std::make_shared(); index_->Load(index_binary); dim = Dimension(); + return server::KNOWHERE_SUCCESS; } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 1d09a069d2..3d432ff0d8 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -19,19 +19,19 @@ class VecIndexImpl : public VecIndex { public: explicit VecIndexImpl(std::shared_ptr index, const IndexType &type) : index_(std::move(index)), type(type) {}; - void BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) override; + server::KnowhereError BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; IndexType GetType() override; int64_t Dimension() override; int64_t Count() override; - void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; + server::KnowhereError Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override; zilliz::knowhere::BinarySet Serialize() override; - void Load(const zilliz::knowhere::BinarySet &index_binary) override; - void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; + server::KnowhereError Load(const zilliz::knowhere::BinarySet &index_binary) override; + server::KnowhereError Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override; protected: int64_t dim = 0; @@ -43,27 +43,27 @@ class IVFMixIndex : public VecIndexImpl { public: explicit IVFMixIndex(std::shared_ptr index) : VecIndexImpl(std::move(index), IndexType::FAISS_IVFFLAT_MIX) {}; - void BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) override; - void Load(const zilliz::knowhere::BinarySet &index_binary) override; + server::KnowhereError BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; + server::KnowhereError Load(const zilliz::knowhere::BinarySet &index_binary) override; }; class BFIndex : public VecIndexImpl { public: explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index), IndexType::FAISS_IDMAP) {}; - void Build(const int64_t &d); + server::KnowhereError Build(const int64_t &d); float *GetRawVectors(); - void BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt, - const float *xt) override; + server::KnowhereError BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt, + const float *xt) override; int64_t *GetRawIds(); }; diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 55e1ea4cea..342f10a6b7 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -7,9 +7,11 @@ #include "knowhere/index/vector_index/idmap.h" #include "knowhere/index/vector_index/gpu_ivf.h" #include "knowhere/index/vector_index/cpu_kdt_rng.h" +#include "knowhere/common/exception.h" #include "vec_index.h" #include "vec_impl.h" +#include "wrapper_log.h" namespace zilliz { @@ -153,23 +155,32 @@ VecIndexPtr read_index(const std::string &location) { return LoadVecIndex(current_type, load_data_list); } -void write_index(VecIndexPtr index, const std::string &location) { - auto binaryset = index->Serialize(); - auto index_type = index->GetType(); +server::KnowhereError write_index(VecIndexPtr index, const std::string &location) { + try { + auto binaryset = index->Serialize(); + auto index_type = index->GetType(); - FileIOWriter writer(location); - writer(&index_type, sizeof(IndexType)); - for (auto &iter: binaryset.binary_map_) { - auto meta = iter.first.c_str(); - size_t meta_length = iter.first.length(); - writer(&meta_length, sizeof(meta_length)); - writer((void *) meta, meta_length); + FileIOWriter writer(location); + writer(&index_type, sizeof(IndexType)); + for (auto &iter: binaryset.binary_map_) { + auto meta = iter.first.c_str(); + size_t meta_length = iter.first.length(); + writer(&meta_length, sizeof(meta_length)); + writer((void *) meta, meta_length); - auto binary = iter.second; - int64_t binary_length = binary->size; - writer(&binary_length, sizeof(binary_length)); - writer((void *) binary->data.get(), binary_length); + auto binary = iter.second; + int64_t binary_length = binary->size; + writer(&binary_length, sizeof(binary_length)); + writer((void *) binary->data.get(), binary_length); + } + } catch (knowhere::KnowhereException &e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_UNEXPECTED_ERROR; + } catch (std::exception& e) { + WRAPPER_LOG_ERROR << e.what(); + return server::KNOWHERE_ERROR; } + return server::KNOWHERE_SUCCESS; } } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index a488922d9e..c3f5528652 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -9,6 +9,8 @@ #include #include +#include "utils/Error.h" + #include "knowhere/common/config.h" #include "knowhere/common/binary_set.h" @@ -34,23 +36,23 @@ enum class IndexType { class VecIndex { public: - virtual void BuildAll(const long &nb, - const float *xb, - const long *ids, - const Config &cfg, - const long &nt = 0, - const float *xt = nullptr) = 0; + virtual server::KnowhereError BuildAll(const long &nb, + const float *xb, + const long *ids, + const Config &cfg, + const long &nt = 0, + const float *xt = nullptr) = 0; - virtual void Add(const long &nb, - const float *xb, - const long *ids, - const Config &cfg = Config()) = 0; + virtual server::KnowhereError Add(const long &nb, + const float *xb, + const long *ids, + const Config &cfg = Config()) = 0; - virtual void Search(const long &nq, - const float *xq, - float *dist, - long *ids, - const Config &cfg = Config()) = 0; + virtual server::KnowhereError Search(const long &nq, + const float *xq, + float *dist, + long *ids, + const Config &cfg = Config()) = 0; virtual IndexType GetType() = 0; @@ -60,12 +62,12 @@ class VecIndex { virtual zilliz::knowhere::BinarySet Serialize() = 0; - virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0; + virtual server::KnowhereError Load(const zilliz::knowhere::BinarySet &index_binary) = 0; }; using VecIndexPtr = std::shared_ptr; -extern void write_index(VecIndexPtr index, const std::string &location); +extern server::KnowhereError write_index(VecIndexPtr index, const std::string &location); extern VecIndexPtr read_index(const std::string &location); diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index afaf652827..1a4dc44779 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit afaf65282737514e232bf477aacb2772a4d32d5d +Subproject commit 1a4dc447797d281c3c83255c1b8a7709fc8d7738 diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index 928d61d00c..83a4d4404c 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -163,3 +163,7 @@ TEST_P(KnowhereWrapperTest, serialize) { } } +// TODO(linxj): add exception test +//TEST_P(KnowhereWrapperTest, exception_test) { +//} + From 100bdc0d4a2b6db8aff2b8da05d198939b324400 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Sun, 21 Jul 2019 20:47:34 +0800 Subject: [PATCH 78/91] update knowhere version Former-commit-id: 3822bfab27fcaf68ad040615190ab56be725f8b2 --- cpp/thirdparty/knowhere | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 1a4dc44779..b0b9dd18fa 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 1a4dc447797d281c3c83255c1b8a7709fc8d7738 +Subproject commit b0b9dd18fadbf9dc0fccaad815e14e578a92993e From 4cff638c30aaca525c3de497cebd228aecb410fa Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sun, 21 Jul 2019 11:57:12 +0800 Subject: [PATCH 79/91] 1. fix bzip2 download url 2. update exception Former-commit-id: f9f9c4e49933f3e60c7245a886882ca187722cc7 --- cpp/cmake/ThirdPartyPackages.cmake | 2 +- cpp/unittest/server/cache_test.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index 8b888519f9..5b5d72f885 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -230,7 +230,7 @@ endif() if(DEFINED ENV{MILVUS_BZIP2_URL}) set(BZIP2_SOURCE_URL "$ENV{MILVUS_BZIP2_URL}") else() - set(BZIP2_SOURCE_URL "https://fossies.org/linux/misc/bzip2-${BZIP2_VERSION}.tar.gz") + set(BZIP2_SOURCE_URL "https://sourceware.org/pub/bzip2/bzip2-${BZIP2_VERSION}.tar.gz") endif() if(DEFINED ENV{MILVUS_EASYLOGGINGPP_URL}) diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index 9424563cad..d45deb7475 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -7,6 +7,7 @@ #include "cache/CpuCacheMgr.h" #include "cache/GpuCacheMgr.h" +#include "utils/Error.h" #include "wrapper/Index.h" #include "wrapper/knowhere/vec_index.h" @@ -29,7 +30,7 @@ public: class MockVecIndex : public engine::VecIndex { public: - virtual void BuildAll(const long &nb, + virtual server::KnowhereError BuildAll(const long &nb, const float *xb, const long *ids, const engine::Config &cfg, @@ -42,14 +43,14 @@ public: return engine::IndexType::INVALID; } - virtual void Add(const long &nb, + virtual server::KnowhereError Add(const long &nb, const float *xb, const long *ids, const engine::Config &cfg = engine::Config()) { } - virtual void Search(const long &nq, + virtual server::KnowhereError Search(const long &nq, const float *xq, float *dist, long *ids, @@ -70,7 +71,7 @@ public: return binset; } - virtual void Load(const zilliz::knowhere::BinarySet &index_binary) { + virtual server::KnowhereError Load(const zilliz::knowhere::BinarySet &index_binary) { } From ba7e3c3dd2966bd1076cb7318e6384fe7a7cd4b0 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Mon, 22 Jul 2019 16:37:06 +0800 Subject: [PATCH 80/91] MS-265 IVFSQ Former-commit-id: 6b0638af562ed60bc2252e4430c1018e29ae857c --- cpp/src/db/ExecutionEngineImpl.cpp | 9 +++++++-- cpp/src/wrapper/knowhere/vec_impl.cpp | 14 ++++--------- cpp/src/wrapper/knowhere/vec_impl.h | 5 +++-- cpp/src/wrapper/knowhere/vec_index.cpp | 20 ++++++++++++++++++- cpp/src/wrapper/knowhere/vec_index.h | 3 +++ cpp/unittest/index_wrapper/knowhere_test.cpp | 21 ++++++++++++-------- 6 files changed, 49 insertions(+), 23 deletions(-) diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 63ed00d29e..c3b1afc375 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -69,7 +69,7 @@ VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { } Status ExecutionEngineImpl::AddWithIds(long n, const float *xdata, const long *xids) { - auto ec = index_->Add(n, xdata, xids, Config::object{{"dim", dim}}); + auto ec = index_->Add(n, xdata, xids); if (ec != server::KNOWHERE_SUCCESS) { return Status::Error("Add error"); } @@ -171,10 +171,15 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { throw Exception("Create Empty VecIndex"); } + Config build_cfg; + build_cfg["dim"] = Dimension(); + build_cfg["gpu_id"] = gpu_num; + AutoGenParams(to_index->GetType(), Count(), build_cfg); + auto ec = to_index->BuildAll(Count(), from_index->GetRawVectors(), from_index->GetRawIds(), - Config::object{{"dim", Dimension()}, {"gpu_id", gpu_num}}); + build_cfg); if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } return std::make_shared(to_index, location, build_type); diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index f0bcd30f43..63e4d51c26 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -32,9 +32,7 @@ server::KnowhereError VecIndexImpl::BuildAll(const long &nb, auto preprocessor = index_->BuildPreprocessor(dataset, cfg); index_->set_preprocessor(preprocessor); - auto nlist = int(nb / 1000000.0 * 16384); - auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; - auto model = index_->Train(dataset, cfg_t); + auto model = index_->Train(dataset, cfg); index_->set_index_model(model); index_->Add(dataset, cfg); } catch (KnowhereException &e) { @@ -52,8 +50,7 @@ server::KnowhereError VecIndexImpl::BuildAll(const long &nb, server::KnowhereError VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) { try { - auto d = cfg.get_with_default("dim", dim); - auto dataset = GenDatasetWithIds(nb, d, xb, ids); + auto dataset = GenDatasetWithIds(nb, dim, xb, ids); index_->Add(dataset, cfg); } catch (KnowhereException &e) { @@ -72,8 +69,7 @@ server::KnowhereError VecIndexImpl::Add(const long &nb, const float *xb, const l server::KnowhereError VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) { try { auto k = cfg["k"].as(); - auto d = cfg.get_with_default("dim", dim); - auto dataset = GenDataset(nq, d, xq); + auto dataset = GenDataset(nq, dim, xq); Config search_cfg; auto res = index_->Search(dataset, cfg); @@ -203,9 +199,7 @@ server::KnowhereError IVFMixIndex::BuildAll(const long &nb, auto preprocessor = index_->BuildPreprocessor(dataset, cfg); index_->set_preprocessor(preprocessor); - auto nlist = int(nb / 1000000.0 * 16384); - auto cfg_t = Config::object{{"nlist", nlist}, {"dim", dim}}; - auto model = index_->Train(dataset, cfg_t); + auto model = index_->Train(dataset, cfg); index_->set_index_model(model); index_->Add(dataset, cfg); diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 3d432ff0d8..4f20d17b6a 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -41,8 +41,9 @@ class VecIndexImpl : public VecIndex { class IVFMixIndex : public VecIndexImpl { public: - explicit IVFMixIndex(std::shared_ptr index) : VecIndexImpl(std::move(index), - IndexType::FAISS_IVFFLAT_MIX) {}; + explicit IVFMixIndex(std::shared_ptr index, const IndexType &type) + : VecIndexImpl(std::move(index), type) {}; + server::KnowhereError BuildAll(const long &nb, const float *xb, const long *ids, diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 342f10a6b7..6f5d51a3af 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -85,7 +85,7 @@ VecIndexPtr GetVecIndexFactory(const IndexType &type) { } case IndexType::FAISS_IVFFLAT_MIX: { index = std::make_shared(0); - return std::make_shared(index); + return std::make_shared(index, IndexType::FAISS_IVFFLAT_MIX); } case IndexType::FAISS_IVFPQ_CPU: { index = std::make_shared(); @@ -98,6 +98,10 @@ VecIndexPtr GetVecIndexFactory(const IndexType &type) { case IndexType::SPTAG_KDT_RNT_CPU: { index = std::make_shared(); break; + } + case IndexType::FAISS_IVFSQ8_MIX: { + index = std::make_shared(0); + return std::make_shared(index, IndexType::FAISS_IVFSQ8_MIX); } //case IndexType::NSG: { // TODO(linxj): bug. // index = std::make_shared(); @@ -183,6 +187,20 @@ server::KnowhereError write_index(VecIndexPtr index, const std::string &location return server::KNOWHERE_SUCCESS; } + +// TODO(linxj): redo here. +void AutoGenParams(const IndexType &type, const long &size, zilliz::knowhere::Config &cfg) { + if (!cfg.contains("nlist")) { cfg["nlist"] = int(size / 1000000.0 * 16384); } + if (!cfg.contains("gpu_id")) { cfg["gpu_id"] = int(0); } + + switch (type) { + case IndexType::FAISS_IVFSQ8_MIX: { + if (!cfg.contains("nbits")) { cfg["nbits"] = int(8); } + break; + } + } +} + } } } diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/knowhere/vec_index.h index c3f5528652..ed1451bb04 100644 --- a/cpp/src/wrapper/knowhere/vec_index.h +++ b/cpp/src/wrapper/knowhere/vec_index.h @@ -31,6 +31,7 @@ enum class IndexType { FAISS_IVFPQ_CPU, FAISS_IVFPQ_GPU, SPTAG_KDT_RNT_CPU, + FAISS_IVFSQ8_MIX, //NSG, }; @@ -75,6 +76,8 @@ extern VecIndexPtr GetVecIndexFactory(const IndexType &type); extern VecIndexPtr LoadVecIndex(const IndexType &index_type, const zilliz::knowhere::BinarySet &index_binary); +extern void AutoGenParams(const IndexType& type, const long& size, Config& cfg); + } } } diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index 83a4d4404c..bec4c940cf 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -41,7 +41,7 @@ class KnowhereWrapperTest for (auto i = 0; i < nq; i++) { EXPECT_EQ(ids[i * k], gt_ids[i * k]); - EXPECT_EQ(dis[i * k], gt_dis[i * k]); + //EXPECT_EQ(dis[i * k], gt_dis[i * k]); } int match = 0; @@ -84,11 +84,11 @@ class KnowhereWrapperTest INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, Values( //["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] - std::make_tuple(IndexType::FAISS_IVFFLAT_CPU, "Default", - 64, 100000, 10, 10, - Config::object{{"nlist", 100}, {"dim", 64}}, - Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 10}} - ), + //std::make_tuple(IndexType::FAISS_IVFFLAT_CPU, "Default", + // 64, 100000, 10, 10, + // Config::object{{"nlist", 100}, {"dim", 64}}, + // Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 10}} + //), //std::make_tuple(IndexType::FAISS_IVFFLAT_GPU, "Default", // 64, 10000, 10, 10, // Config::object{{"nlist", 100}, {"dim", 64}}, @@ -96,13 +96,18 @@ INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, //), std::make_tuple(IndexType::FAISS_IVFFLAT_MIX, "Default", 64, 100000, 10, 10, - Config::object{{"nlist", 100}, {"dim", 64}}, - Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 10}} + Config::object{{"nlist", 1000}, {"dim", 64}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 5}} ), std::make_tuple(IndexType::FAISS_IDMAP, "Default", 64, 100000, 10, 10, Config::object{{"dim", 64}}, Config::object{{"dim", 64}, {"k", 10}} + ), + std::make_tuple(IndexType::FAISS_IVFSQ8_MIX, "Default", + 64, 100000, 10, 10, + Config::object{{"dim", 64}, {"nlist", 1000}, {"nbits", 8}}, + Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 5}} ) //std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", // 64, 10000, 10, 10, From 813f5151e8161a2c99300a8a455bc94f94c1c67b Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Mon, 22 Jul 2019 19:53:44 +0800 Subject: [PATCH 81/91] MS-267 Support Inner Product update.. Former-commit-id: 8fdbb39fbdd05853f25c374f437f3ed78a46345d --- cpp/src/db/ExecutionEngineImpl.cpp | 5 ++++- cpp/src/wrapper/knowhere/vec_impl.cpp | 8 ++++---- cpp/src/wrapper/knowhere/vec_impl.h | 2 +- cpp/src/wrapper/knowhere/vec_index.cpp | 3 ++- cpp/thirdparty/knowhere | 2 +- cpp/unittest/index_wrapper/knowhere_test.cpp | 6 +++--- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index c3b1afc375..35f68558c4 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -30,7 +30,10 @@ ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); if (!index_) throw Exception("Create Empty VecIndex"); - auto ec = std::static_pointer_cast(index_)->Build(dimension); + Config build_cfg; + build_cfg["dim"] = dimension; + AutoGenParams(index_->GetType(), 0, build_cfg); + auto ec = std::static_pointer_cast(index_)->Build(build_cfg); if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } } diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/knowhere/vec_impl.cpp index 63e4d51c26..7efbd54f0f 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.cpp +++ b/cpp/src/wrapper/knowhere/vec_impl.cpp @@ -144,10 +144,10 @@ int64_t *BFIndex::GetRawIds() { return std::static_pointer_cast(index_)->GetRawIds(); } -server::KnowhereError BFIndex::Build(const int64_t &d) { +server::KnowhereError BFIndex::Build(const Config &cfg) { try { - dim = d; - std::static_pointer_cast(index_)->Train(dim); + dim = cfg["dim"].as(); + std::static_pointer_cast(index_)->Train(cfg); } catch (KnowhereException &e) { WRAPPER_LOG_ERROR << e.what(); return server::KNOWHERE_UNEXPECTED_ERROR; @@ -171,7 +171,7 @@ server::KnowhereError BFIndex::BuildAll(const long &nb, dim = cfg["dim"].as(); auto dataset = GenDatasetWithIds(nb, dim, xb, ids); - std::static_pointer_cast(index_)->Train(dim); + std::static_pointer_cast(index_)->Train(cfg); index_->Add(dataset, cfg); } catch (KnowhereException &e) { WRAPPER_LOG_ERROR << e.what(); diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/knowhere/vec_impl.h index 4f20d17b6a..c4a0e2ac61 100644 --- a/cpp/src/wrapper/knowhere/vec_impl.h +++ b/cpp/src/wrapper/knowhere/vec_impl.h @@ -57,7 +57,7 @@ class BFIndex : public VecIndexImpl { public: explicit BFIndex(std::shared_ptr index) : VecIndexImpl(std::move(index), IndexType::FAISS_IDMAP) {}; - server::KnowhereError Build(const int64_t &d); + server::KnowhereError Build(const Config& cfg); float *GetRawVectors(); server::KnowhereError BuildAll(const long &nb, const float *xb, diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 6f5d51a3af..65364eb01f 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -180,7 +180,7 @@ server::KnowhereError write_index(VecIndexPtr index, const std::string &location } catch (knowhere::KnowhereException &e) { WRAPPER_LOG_ERROR << e.what(); return server::KNOWHERE_UNEXPECTED_ERROR; - } catch (std::exception& e) { + } catch (std::exception &e) { WRAPPER_LOG_ERROR << e.what(); return server::KNOWHERE_ERROR; } @@ -192,6 +192,7 @@ server::KnowhereError write_index(VecIndexPtr index, const std::string &location void AutoGenParams(const IndexType &type, const long &size, zilliz::knowhere::Config &cfg) { if (!cfg.contains("nlist")) { cfg["nlist"] = int(size / 1000000.0 * 16384); } if (!cfg.contains("gpu_id")) { cfg["gpu_id"] = int(0); } + if (!cfg.contains("metric_type")) { cfg["metric_type"] = "L2"; } switch (type) { case IndexType::FAISS_IVFSQ8_MIX: { diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index b0b9dd18fa..f866ac4e29 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit b0b9dd18fadbf9dc0fccaad815e14e578a92993e +Subproject commit f866ac4e297dea477ec591a62679cf5cdd219cc8 diff --git a/cpp/unittest/index_wrapper/knowhere_test.cpp b/cpp/unittest/index_wrapper/knowhere_test.cpp index bec4c940cf..064d6dc911 100644 --- a/cpp/unittest/index_wrapper/knowhere_test.cpp +++ b/cpp/unittest/index_wrapper/knowhere_test.cpp @@ -96,17 +96,17 @@ INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, //), std::make_tuple(IndexType::FAISS_IVFFLAT_MIX, "Default", 64, 100000, 10, 10, - Config::object{{"nlist", 1000}, {"dim", 64}}, + Config::object{{"nlist", 1000}, {"dim", 64}, {"metric_type", "L2"}}, Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 5}} ), std::make_tuple(IndexType::FAISS_IDMAP, "Default", 64, 100000, 10, 10, - Config::object{{"dim", 64}}, + Config::object{{"dim", 64}, {"metric_type", "L2"}}, Config::object{{"dim", 64}, {"k", 10}} ), std::make_tuple(IndexType::FAISS_IVFSQ8_MIX, "Default", 64, 100000, 10, 10, - Config::object{{"dim", 64}, {"nlist", 1000}, {"nbits", 8}}, + Config::object{{"dim", 64}, {"nlist", 1000}, {"nbits", 8}, {"metric_type", "L2"}}, Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 5}} ) //std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", From 7b3886fd23cae55ee1a79f9ef9995789da051da7 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Wed, 24 Jul 2019 10:33:41 +0800 Subject: [PATCH 82/91] MS-267 default support IP Former-commit-id: 64bfb03ea5291604f551530bc65896cc19841a5d --- cpp/src/wrapper/knowhere/vec_index.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index 65364eb01f..cc9f808474 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -192,7 +192,7 @@ server::KnowhereError write_index(VecIndexPtr index, const std::string &location void AutoGenParams(const IndexType &type, const long &size, zilliz::knowhere::Config &cfg) { if (!cfg.contains("nlist")) { cfg["nlist"] = int(size / 1000000.0 * 16384); } if (!cfg.contains("gpu_id")) { cfg["gpu_id"] = int(0); } - if (!cfg.contains("metric_type")) { cfg["metric_type"] = "L2"; } + if (!cfg.contains("metric_type")) { cfg["metric_type"] = "IP"; } // TODO: remove switch (type) { case IndexType::FAISS_IVFSQ8_MIX: { From d7cbf9404af9f21360a423a981640fbc934067fe Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 24 Jul 2019 20:00:14 +0800 Subject: [PATCH 83/91] fix unittest unstable case Former-commit-id: 2cc58215ffb327741e90824fecedf8a3073eebcc --- cpp/unittest/db/db_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 0d17ecbb16..396a578181 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -217,7 +217,7 @@ TEST_F(DBTest, SEARCH_TEST) { {//search by specify index file engine::meta::DatesT dates; - std::vector file_ids = {"1", "2", "3", "4"}; + std::vector file_ids = {"4", "5"}; engine::QueryResults results; stat = db_->Query(TABLE_NAME, file_ids, k, nq, xq.data(), dates, results); ASSERT_STATS(stat); From f37ae7765abf9888f7ebd733b23cf4875ce56010 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Thu, 25 Jul 2019 19:57:12 +0800 Subject: [PATCH 84/91] MS-137 integrate knowhere... Former-commit-id: 04bd70d29d1da919dbff9c39e0c60ccc6db57d23 --- ci/main_jenkinsfile | 13 ---- ci/nightly_main_jenkinsfile | 96 -------------------------- cpp/src/db/ExecutionEngineImpl.cpp | 13 ++++ cpp/src/db/ExecutionEngineImpl.h | 1 + cpp/src/wrapper/knowhere/vec_index.cpp | 2 +- cpp/thirdparty/knowhere | 2 +- cpp/unittest/db/db_tests.cpp | 15 ++-- 7 files changed, 24 insertions(+), 118 deletions(-) diff --git a/ci/main_jenkinsfile b/ci/main_jenkinsfile index a97edc7cbb..2710e51ffb 100644 --- a/ci/main_jenkinsfile +++ b/ci/main_jenkinsfile @@ -324,18 +324,6 @@ spec: post { always { script { -<<<<<<< HEAD - if (!currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { - // Send an email only if the build status has changed from green/unstable to red - emailext subject: '$DEFAULT_SUBJECT', - body: '$DEFAULT_CONTENT', - recipientProviders: [ - [$class: 'DevelopersRecipientProvider'], - [$class: 'RequesterRecipientProvider'] - ], - replyTo: '$DEFAULT_REPLYTO', - to: '$DEFAULT_RECIPIENTS' -======= if (env.gitlabAfter != null) { if (!currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { // Send an email only if the build status has changed from green/unstable to red @@ -348,7 +336,6 @@ spec: replyTo: '$DEFAULT_REPLYTO', to: '$DEFAULT_RECIPIENTS' } ->>>>>>> branch-0.3.1 } } } diff --git a/ci/nightly_main_jenkinsfile b/ci/nightly_main_jenkinsfile index c0e7a10039..567e70cb48 100644 --- a/ci/nightly_main_jenkinsfile +++ b/ci/nightly_main_jenkinsfile @@ -35,11 +35,7 @@ pipeline { defaultContainer 'jnlp' containerTemplate { name 'milvus-build-env' -<<<<<<< HEAD - image 'registry.zilliz.com/milvus/milvus-build-env:v0.10' -======= image 'registry.zilliz.com/milvus/milvus-build-env:v0.12' ->>>>>>> branch-0.3.1 ttyEnabled true command 'cat' } @@ -134,22 +130,6 @@ spec: } stage("Deploy to Development") { -<<<<<<< HEAD - stages { - stage("Deploy to Dev") { - agent { - kubernetes { - label 'jenkins-slave' - defaultContainer 'jnlp' - } - } - stages { - stage('Deploy') { - steps { - gitlabCommitStatus(name: 'Deloy to Dev') { - script { - load "${env.WORKSPACE}/ci/jenkinsfile/deploy2dev.groovy" -======= parallel { stage("Single Node") { agent { @@ -213,26 +193,12 @@ spec: script { load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy" } ->>>>>>> branch-0.3.1 } } } } } post { -<<<<<<< HEAD - aborted { - script { - updateGitlabCommitStatus name: 'Deloy to Dev', state: 'canceled' - echo "Milvus Deloy to Dev aborted !" - } - } - - failure { - script { - updateGitlabCommitStatus name: 'Deloy to Dev', state: 'failed' - echo "Milvus Deloy to Dev failure !" -======= always { container('milvus-testframework') { script { @@ -253,52 +219,11 @@ spec: failure { script { echo "Milvus Single Node CI/CD failure !" ->>>>>>> branch-0.3.1 } } } } -<<<<<<< HEAD - stage("Dev Test") { - agent { - kubernetes { - label 'test' - defaultContainer 'jnlp' - containerTemplate { - name 'milvus-testframework' - image 'registry.zilliz.com/milvus/milvus-test:v0.1' - ttyEnabled true - command 'cat' - } - } - } - stages { - stage('Test') { - steps { - script { - load "${env.WORKSPACE}/ci/jenkinsfile/dev_test.groovy" - load "${env.WORKSPACE}/ci/jenkinsfile/upload_dev_test_out.groovy" - } - } - } - } - } - - stage ("Cleanup Dev") { - agent { - kubernetes { - label 'jenkins-slave' - defaultContainer 'jnlp' - } - } - stages { - stage('Cleanup') { - steps { - gitlabCommitStatus(name: 'Cleanup Dev') { - script { - load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy" -======= stage("Cluster") { agent { kubernetes { @@ -360,26 +285,12 @@ spec: script { load "${env.WORKSPACE}/ci/jenkinsfile/cluster_cleanup_dev.groovy" } ->>>>>>> branch-0.3.1 } } } } } post { -<<<<<<< HEAD - aborted { - script { - updateGitlabCommitStatus name: 'Cleanup Dev', state: 'canceled' - echo "Milvus Cleanup Dev aborted !" - } - } - - failure { - script { - updateGitlabCommitStatus name: 'Cleanup Dev', state: 'failed' - echo "Milvus Cleanup Dev failure !" -======= always { container('milvus-testframework') { script { @@ -400,7 +311,6 @@ spec: failure { script { echo "Milvus Cluster CI/CD failure !" ->>>>>>> branch-0.3.1 } } } @@ -412,8 +322,6 @@ spec: } post { -<<<<<<< HEAD -======= always { script { if (!currentBuild.resultIsBetterOrEqualTo('SUCCESS')) { @@ -430,7 +338,6 @@ spec: } } ->>>>>>> branch-0.3.1 success { script { updateGitlabCommitStatus name: 'CI/CD', state: 'success' @@ -453,7 +360,4 @@ spec: } } } -<<<<<<< HEAD -======= ->>>>>>> branch-0.3.1 diff --git a/cpp/src/db/ExecutionEngineImpl.cpp b/cpp/src/db/ExecutionEngineImpl.cpp index 03c9d3d535..f878018dcd 100644 --- a/cpp/src/db/ExecutionEngineImpl.cpp +++ b/cpp/src/db/ExecutionEngineImpl.cpp @@ -22,6 +22,14 @@ namespace zilliz { namespace milvus { namespace engine { +namespace { +std::string GetMetricType() { + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode engine_config = config.GetConfig(server::CONFIG_ENGINE); + return engine_config.GetValue(server::CONFIG_METRICTYPE, "L2"); +} +} + ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string &location, EngineType type) @@ -33,6 +41,7 @@ ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, Config build_cfg; build_cfg["dim"] = dimension; + build_cfg["metric_type"] = GetMetricType(); AutoGenParams(index_->GetType(), 0, build_cfg); auto ec = std::static_pointer_cast(index_)->Build(build_cfg); if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } @@ -172,7 +181,9 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { Config build_cfg; build_cfg["dim"] = Dimension(); + build_cfg["metric_type"] = GetMetricType(); build_cfg["gpu_id"] = gpu_num; + build_cfg["nlist"] = nlist_; AutoGenParams(to_index->GetType(), Count(), build_cfg); auto ec = to_index->BuildAll(Count(), @@ -204,6 +215,7 @@ Status ExecutionEngineImpl::Cache() { return Status::OK(); } +// TODO(linxj): remove. Status ExecutionEngineImpl::Init() { using namespace zilliz::milvus::server; ServerConfig &config = ServerConfig::GetInstance(); @@ -215,6 +227,7 @@ Status ExecutionEngineImpl::Init() { case EngineType::FAISS_IVFFLAT: { ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1); + nlist_ = engine_config.GetInt32Value(CONFIG_NLIST, 16384); break; } } diff --git a/cpp/src/db/ExecutionEngineImpl.h b/cpp/src/db/ExecutionEngineImpl.h index 1cc6d5282a..12579d7c5d 100644 --- a/cpp/src/db/ExecutionEngineImpl.h +++ b/cpp/src/db/ExecutionEngineImpl.h @@ -70,6 +70,7 @@ class ExecutionEngineImpl : public ExecutionEngine { std::string location_; size_t nprobe_ = 0; + size_t nlist_ = 0; int64_t gpu_num = 0; }; diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/knowhere/vec_index.cpp index cc9f808474..65364eb01f 100644 --- a/cpp/src/wrapper/knowhere/vec_index.cpp +++ b/cpp/src/wrapper/knowhere/vec_index.cpp @@ -192,7 +192,7 @@ server::KnowhereError write_index(VecIndexPtr index, const std::string &location void AutoGenParams(const IndexType &type, const long &size, zilliz::knowhere::Config &cfg) { if (!cfg.contains("nlist")) { cfg["nlist"] = int(size / 1000000.0 * 16384); } if (!cfg.contains("gpu_id")) { cfg["gpu_id"] = int(0); } - if (!cfg.contains("metric_type")) { cfg["metric_type"] = "IP"; } // TODO: remove + if (!cfg.contains("metric_type")) { cfg["metric_type"] = "L2"; } switch (type) { case IndexType::FAISS_IVFSQ8_MIX: { diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index f866ac4e29..a9151773f8 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit f866ac4e297dea477ec591a62679cf5cdd219cc8 +Subproject commit a9151773f8119392a87a3052974c47ec890d101d diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 0d17ecbb16..70f7da43c4 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -215,13 +215,14 @@ TEST_F(DBTest, SEARCH_TEST) { ASSERT_STATS(stat); } - {//search by specify index file - engine::meta::DatesT dates; - std::vector file_ids = {"1", "2", "3", "4"}; - engine::QueryResults results; - stat = db_->Query(TABLE_NAME, file_ids, k, nq, xq.data(), dates, results); - ASSERT_STATS(stat); - } + // TODO: FIX HERE + //{//search by specify index file + // engine::meta::DatesT dates; + // std::vector file_ids = {"1", "2", "3", "4"}; + // engine::QueryResults results; + // stat = db_->Query(TABLE_NAME, file_ids, k, nq, xq.data(), dates, results); + // ASSERT_STATS(stat); + //} // TODO(linxj): add groundTruth assert }; From aa771e7c3d9c5f94e98a858fc461cbc80fc1294c Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Fri, 26 Jul 2019 10:42:28 +0800 Subject: [PATCH 85/91] update knowhere version Former-commit-id: 0457623ff251b8bb925e289df0b9bf0ee097973c --- cpp/thirdparty/knowhere | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index a9151773f8..81b28a753f 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit a9151773f8119392a87a3052974c47ec890d101d +Subproject commit 81b28a753fc47b46364afa7b9414e249c4b2cd75 From 640e9141c6e0a08ccd1e8b35b621f93fe4932e54 Mon Sep 17 00:00:00 2001 From: starlord Date: Fri, 26 Jul 2019 10:54:49 +0800 Subject: [PATCH 86/91] MS-275 Avoid sqlite logic error excetion Former-commit-id: d6ecaf2f32ed6c75370e6ed2bd24779cc18e76ff --- cpp/CHANGELOG.md | 2 ++ cpp/src/db/DBMetaImpl.cpp | 51 +++++++++++++++++++++++++++++++++- cpp/src/db/DBMetaImpl.h | 3 ++ cpp/src/db/Factories.cpp | 4 +-- cpp/unittest/db/meta_tests.cpp | 4 +-- 5 files changed, 59 insertions(+), 5 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index b326293e27..10fbc904e2 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -37,6 +37,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-260 - Refine log - MS-249 - Check machine hardware during initialize - MS-261 - Update faiss version to 1.5.3 and add BUILD_FAISS_WITH_MKL as an option +- MS-266 - Improve topk reduce time by using multi-threads +- MS-275 - Avoid sqlite logic error excetion ## New Feature - MS-180 - Add new mem manager diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 56f741c4dc..6149202123 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -109,7 +109,7 @@ Status DBMetaImpl::Initialize() { auto ret = boost::filesystem::create_directory(options_.path); if (!ret) { ENGINE_LOG_ERROR << "Failed to create db directory " << options_.path; - return Status::DBTransactionError("Failed to create db directory", options_.path); + return Status::InvalidDBPath("Failed to create db directory", options_.path); } } @@ -147,6 +147,9 @@ Status DBMetaImpl::DropPartitionsByDates(const std::string &table_id, } } + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + ConnectorPtr->update_all( set( c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE @@ -167,6 +170,9 @@ Status DBMetaImpl::CreateTable(TableSchema &table_schema) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + if (table_schema.table_id_ == "") { NextTableId(table_schema.table_id_); } else { @@ -190,6 +196,7 @@ Status DBMetaImpl::CreateTable(TableSchema &table_schema) { auto id = ConnectorPtr->insert(table_schema); table_schema.id_ = id; } catch (...) { + ENGINE_LOG_ERROR << "sqlite transaction failed"; return Status::DBTransactionError("Add Table Error"); } @@ -206,6 +213,9 @@ Status DBMetaImpl::DeleteTable(const std::string& table_id) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + //soft delete table auto tables = ConnectorPtr->select(columns(&TableSchema::id_, &TableSchema::files_cnt_, @@ -238,6 +248,9 @@ Status DBMetaImpl::DeleteTableFiles(const std::string& table_id) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + //soft delete table files ConnectorPtr->update_all( set( @@ -383,6 +396,9 @@ Status DBMetaImpl::CreateTableFile(TableFileSchema &file_schema) { file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + auto id = ConnectorPtr->insert(file_schema); file_schema.id_ = id; @@ -649,6 +665,9 @@ Status DBMetaImpl::Archive() { long usecs = limit * D_SEC * US_PS; long now = utils::GetMicroSecTimeStamp(); try { + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + ConnectorPtr->update_all( set( c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE @@ -710,6 +729,9 @@ Status DBMetaImpl::DiscardFiles(long to_discard_size) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + auto commited = ConnectorPtr->transaction([&]() mutable { auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::size_), @@ -748,6 +770,7 @@ Status DBMetaImpl::DiscardFiles(long to_discard_size) { }); if (!commited) { + ENGINE_LOG_ERROR << "sqlite transaction failed"; return Status::DBTransactionError("Update table file error"); } @@ -763,6 +786,9 @@ Status DBMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + auto tables = ConnectorPtr->select(columns(&TableSchema::state_), where(c(&TableSchema::table_id_) == file_schema.table_id_)); @@ -784,6 +810,11 @@ Status DBMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { Status DBMetaImpl::UpdateTableFilesToIndex(const std::string& table_id) { try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + ConnectorPtr->update_all( set( c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_INDEX @@ -803,6 +834,9 @@ Status DBMetaImpl::UpdateTableFiles(TableFilesSchema &files) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + std::map has_tables; for (auto &file : files) { if(has_tables.find(file.table_id_) != has_tables.end()) { @@ -831,6 +865,7 @@ Status DBMetaImpl::UpdateTableFiles(TableFilesSchema &files) { }); if (!commited) { + ENGINE_LOG_ERROR << "sqlite transaction failed"; return Status::DBTransactionError("Update table files error"); } @@ -845,6 +880,9 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + auto files = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, @@ -873,6 +911,7 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { }); if (!commited) { + ENGINE_LOG_ERROR << "sqlite transaction failed"; return Status::DBTransactionError("Clean files error"); } @@ -883,6 +922,9 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { try { MetricCollector metric; + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + auto tables = ConnectorPtr->select(columns(&TableSchema::id_, &TableSchema::table_id_), where(c(&TableSchema::state_) == (int) TableSchema::TO_DELETE)); @@ -897,6 +939,7 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { }); if (!commited) { + ENGINE_LOG_ERROR << "sqlite transaction failed"; return Status::DBTransactionError("Clean files error"); } @@ -909,6 +952,11 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { Status DBMetaImpl::CleanUp() { try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + auto files = ConnectorPtr->select(columns(&TableFileSchema::id_), where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW)); @@ -921,6 +969,7 @@ Status DBMetaImpl::CleanUp() { }); if (!commited) { + ENGINE_LOG_ERROR << "sqlite transaction failed"; return Status::DBTransactionError("Clean files error"); } diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index 6187ad7eae..ada8a9a4ee 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -8,6 +8,7 @@ #include "Meta.h" #include "Options.h" +#include namespace zilliz { namespace milvus { @@ -94,6 +95,8 @@ class DBMetaImpl : public Meta { Status Initialize(); const DBMetaOptions options_; + + std::mutex meta_mutex_; }; // DBMetaImpl } // namespace meta diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index 442dca2974..22d4760b9b 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -77,10 +77,10 @@ std::shared_ptr DBMetaImplFactory::Build(const DBMetaOptions& metaOp std::transform(dialect.begin(), dialect.end(), dialect.begin(), ::tolower); if (dialect.find("mysql") != std::string::npos) { ENGINE_LOG_INFO << "Using MySQL"; - return std::make_shared(meta::MySQLMetaImpl(metaOptions, mode)); + return std::make_shared(metaOptions, mode); } else if (dialect.find("sqlite") != std::string::npos) { ENGINE_LOG_INFO << "Using SQLite"; - return std::make_shared(meta::DBMetaImpl(metaOptions)); + return std::make_shared(metaOptions); } else { ENGINE_LOG_ERROR << "Invalid dialect in URI: dialect = " << dialect; throw InvalidArgumentException("URI dialect is not mysql / sqlite"); diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 5bce4058b1..5fe9ca4309 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -113,7 +113,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DAYS) { ss << "days:" << days_num; options.archive_conf = ArchiveConf("delete", ss.str()); - auto impl = meta::DBMetaImpl(options); + meta::DBMetaImpl impl(options); auto table_id = "meta_test_table"; meta::TableSchema table; @@ -163,7 +163,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DISK) { options.path = "/tmp/milvus_test"; options.archive_conf = ArchiveConf("delete", "disk:11"); - auto impl = meta::DBMetaImpl(options); + meta::DBMetaImpl impl(options); auto table_id = "meta_test_group"; meta::TableSchema table; From bf8f5cca346ae2920facafce85f395301536efd0 Mon Sep 17 00:00:00 2001 From: starlord Date: Fri, 26 Jul 2019 12:26:45 +0800 Subject: [PATCH 87/91] MS-260 Refine log Former-commit-id: a67b4e677a6ad1b46a793119d19a6da85f2441c4 --- cpp/src/db/DBImpl.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 9a27f09b3d..26a0c9f57e 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -418,13 +418,15 @@ Status DBImpl::BackgroundMergeFiles(const std::string& table_id) { bool has_merge = false; for (auto& kv : raw_files) { auto files = kv.second; - if (files.size() <= options_.merge_trigger_number) { + if (files.size() < options_.merge_trigger_number) { + ENGINE_LOG_DEBUG << "Files number not greater equal than merge trigger number, skip merge action"; continue; } has_merge = true; MergeFiles(table_id, kv.first, kv.second); if (shutting_down_.load(std::memory_order_acquire)){ + ENGINE_LOG_DEBUG << "Server will shutdown, skip merge action for table " << table_id; break; } } @@ -442,6 +444,11 @@ void DBImpl::BackgroundCompaction(std::set table_ids) { ENGINE_LOG_ERROR << "Merge files for table " << table_id << " failed: " << status.ToString(); continue;//let other table get chance to merge } + + if (shutting_down_.load(std::memory_order_acquire)){ + ENGINE_LOG_DEBUG << "Server will shutdown, skip merge action"; + break; + } } meta_ptr_->Archive(); @@ -575,6 +582,11 @@ Status DBImpl::BuildIndexByTable(const std::string& table_id) { return status; } ENGINE_LOG_DEBUG << "Sync building index for " << file.id_ << " passed"; + + if (shutting_down_.load(std::memory_order_acquire)){ + ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action for table " << table_id; + break; + } } return status; @@ -595,6 +607,7 @@ void DBImpl::BackgroundBuildIndex() { } if (shutting_down_.load(std::memory_order_acquire)){ + ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action"; break; } } From b5816a386c6eae45a6cead81652131eb9ca8d564 Mon Sep 17 00:00:00 2001 From: starlord Date: Fri, 26 Jul 2019 12:29:36 +0800 Subject: [PATCH 88/91] MS-260 Refine log Former-commit-id: 0fbaa027ff33645bae10ce31ecd9ffa3c17894df --- cpp/src/db/DBImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 26a0c9f57e..8e634415aa 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -351,7 +351,7 @@ void DBImpl::StartCompactionTask() { Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, const meta::TableFilesSchema& files) { - ENGINE_LOG_DEBUG << "Merge files for table" << table_id; + ENGINE_LOG_DEBUG << "Merge files for table " << table_id; meta::TableFileSchema table_file; table_file.table_id_ = table_id; From 66119dd20f8c4ebc1bf555c7331ed9455f811263 Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Fri, 26 Jul 2019 17:45:14 +0800 Subject: [PATCH 89/91] MS-286 fix.. Former-commit-id: 056d394c7aff7f05ac5519a626e60e77e576fb63 --- cpp/src/db/DBImpl.cpp | 15 +++- cpp/src/db/DBMetaImpl.cpp | 73 ++++++++++++++++++ cpp/src/db/DBMetaImpl.h | 5 ++ cpp/src/db/Meta.h | 3 + cpp/src/db/MySQLMetaImpl.cpp | 111 ++++++++++++++++++++++++++++ cpp/src/db/MySQLMetaImpl.h | 5 ++ cpp/unittest/db/db_tests.cpp | 15 ++-- cpp/unittest/db/meta_tests.cpp | 11 +++ cpp/unittest/db/mysql_meta_test.cpp | 11 +++ 9 files changed, 237 insertions(+), 12 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 9a27f09b3d..4720e08aeb 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -198,18 +198,25 @@ Status DBImpl::Query(const std::string& table_id, const std::vector ids.push_back(std::stoul(id, &sz)); } - meta::TableFilesSchema files_array; - auto status = meta_ptr_->GetTableFiles(table_id, ids, files_array); + meta::DatePartionedTableFilesSchema files_array; + auto status = meta_ptr_->FilesToSearch(table_id, ids, dates, files_array); if (!status.ok()) { return status; } - if(files_array.empty()) { + meta::TableFilesSchema file_id_array; + for (auto &day_files : files_array) { + for (auto &file : day_files.second) { + file_id_array.push_back(file); + } + } + + if(file_id_array.empty()) { return Status::Error("Invalid file id"); } cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info before query - status = QueryAsync(table_id, files_array, k, nq, vectors, dates, results); + status = QueryAsync(table_id, file_id_array, k, nq, vectors, dates, results); cache::CpuCacheMgr::GetInstance()->PrintInfo(); //print cache info after query return status; } diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 56f741c4dc..2cea16e6bb 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -544,6 +544,79 @@ Status DBMetaImpl::FilesToSearch(const std::string &table_id, return Status::OK(); } +Status DBMetaImpl::FilesToSearch(const std::string &table_id, + const std::vector &ids, + const DatesT &partition, + DatePartionedTableFilesSchema &files) { + files.clear(); + MetricCollector metric; + + try { + auto select_columns = columns(&TableFileSchema::id_, + &TableFileSchema::table_id_, + &TableFileSchema::file_id_, + &TableFileSchema::file_type_, + &TableFileSchema::size_, + &TableFileSchema::date_, + &TableFileSchema::engine_type_); + + auto match_tableid = c(&TableFileSchema::table_id_) == table_id; + auto is_raw = c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW; + auto is_toindex = c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX; + auto is_index = c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX; + + TableSchema table_schema; + table_schema.table_id_ = table_id; + auto status = DescribeTable(table_schema); + if (!status.ok()) { return status; } + + decltype(ConnectorPtr->select(select_columns)) result; + if (partition.empty() && ids.empty()) { + auto filter = where(match_tableid and (is_raw or is_toindex or is_index)); + result = ConnectorPtr->select(select_columns, filter); + } + else if (partition.empty() && !ids.empty()) { + auto match_fileid = in(&TableFileSchema::id_, ids); + auto filter = where(match_tableid and match_fileid and (is_raw or is_toindex or is_index)); + result = ConnectorPtr->select(select_columns, filter); + } + else if (!partition.empty() && ids.empty()) { + auto match_date = in(&TableFileSchema::date_, partition); + auto filter = where(match_tableid and match_date and (is_raw or is_toindex or is_index)); + result = ConnectorPtr->select(select_columns, filter); + } + else if (!partition.empty() && !ids.empty()) { + auto match_fileid = in(&TableFileSchema::id_, ids); + auto match_date = in(&TableFileSchema::date_, partition); + auto filter = where(match_tableid and match_fileid and match_date and (is_raw or is_toindex or is_index)); + result = ConnectorPtr->select(select_columns, filter); + } + + TableFileSchema table_file; + for (auto &file : result) { + table_file.id_ = std::get<0>(file); + table_file.table_id_ = std::get<1>(file); + table_file.file_id_ = std::get<2>(file); + table_file.file_type_ = std::get<3>(file); + table_file.size_ = std::get<4>(file); + table_file.date_ = std::get<5>(file); + table_file.engine_type_ = std::get<6>(file); + table_file.dimension_ = table_schema.dimension_; + utils::GetTableFilePath(options_, table_file); + auto dateItr = files.find(table_file.date_); + if (dateItr == files.end()) { + files[table_file.date_] = TableFilesSchema(); + } + files[table_file.date_].push_back(table_file); + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when iterate index files", e); + } + + return Status::OK(); +} + Status DBMetaImpl::FilesToMerge(const std::string &table_id, DatePartionedTableFilesSchema &files) { files.clear(); diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index 6187ad7eae..a163d450a1 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -62,6 +62,11 @@ class DBMetaImpl : public Meta { Status FilesToSearch(const std::string &table_id, const DatesT &partition, DatePartionedTableFilesSchema &files) override; + Status FilesToSearch(const std::string &table_id, + const std::vector &ids, + const DatesT &partition, + DatePartionedTableFilesSchema &files) override; + Status FilesToMerge(const std::string &table_id, DatePartionedTableFilesSchema &files) override; diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 5275605611..7e826f6335 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -65,6 +65,9 @@ class Meta { virtual Status FilesToSearch(const std::string &table_id, const DatesT &partition, DatePartionedTableFilesSchema &files) = 0; + virtual Status + FilesToSearch(const std::string &table_id, const std::vector &ids, const DatesT &partition, DatePartionedTableFilesSchema &files) = 0; + virtual Status FilesToMerge(const std::string &table_id, DatePartionedTableFilesSchema &files) = 0; diff --git a/cpp/src/db/MySQLMetaImpl.cpp b/cpp/src/db/MySQLMetaImpl.cpp index 14879d81fe..12bfc55c1b 100644 --- a/cpp/src/db/MySQLMetaImpl.cpp +++ b/cpp/src/db/MySQLMetaImpl.cpp @@ -965,6 +965,117 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, return Status::OK(); } +Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, + const std::vector &ids, + const DatesT &partition, + DatePartionedTableFilesSchema &files) { + + + files.clear(); + + try { + + MetricCollector metric; + + StoreQueryResult res; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + Query filesToSearchQuery = connectionPtr->query(); + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + "FROM TableFiles " << + "WHERE table_id = " << quote << table_id; + + if (!partition.empty()) { + std::stringstream partitionListSS; + for (auto &date : partition) { + partitionListSS << std::to_string(date) << ", "; + } + std::string partitionListStr = partitionListSS.str(); + + partitionListStr = partitionListStr.substr(0, partitionListStr.size() - 2); //remove the last ", " + filesToSearchQuery << " AND " << "date IN (" << partitionListStr << ")"; + } + + if (!ids.empty()) { + std::stringstream idSS; + for (auto &id : ids) { + idSS << "id = " << std::to_string(id) << " OR "; + } + std::string idStr = idSS.str(); + idStr = idStr.substr(0, idStr.size() - 4); //remove the last " OR " + + filesToSearchQuery << " AND " << "(" << idStr << ")"; + + } + // End + filesToSearchQuery << " AND " << + "(file_type = " << std::to_string(TableFileSchema::RAW) << " OR " << + "file_type = " << std::to_string(TableFileSchema::TO_INDEX) << " OR " << + "file_type = " << std::to_string(TableFileSchema::INDEX) << ");"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::FilesToSearch: " << filesToSearchQuery.str(); + + res = filesToSearchQuery.store(); + } //Scoped Connection + + TableSchema table_schema; + table_schema.table_id_ = table_id; + auto status = DescribeTable(table_schema); + if (!status.ok()) { + return status; + } + + TableFileSchema table_file; + for (auto &resRow : res) { + + table_file.id_ = resRow["id"]; //implicit conversion + + std::string table_id_str; + resRow["table_id"].to_string(table_id_str); + table_file.table_id_ = table_id_str; + + table_file.engine_type_ = resRow["engine_type"]; + + std::string file_id; + resRow["file_id"].to_string(file_id); + table_file.file_id_ = file_id; + + table_file.file_type_ = resRow["file_type"]; + + table_file.size_ = resRow["size"]; + + table_file.date_ = resRow["date"]; + + table_file.dimension_ = table_schema.dimension_; + + utils::GetTableFilePath(options_, table_file); + + auto dateItr = files.find(table_file.date_); + if (dateItr == files.end()) { + files[table_file.date_] = TableFilesSchema(); + } + + files[table_file.date_].push_back(table_file); + } + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN FINDING TABLE FILES TO SEARCH" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN FINDING TABLE FILES TO SEARCH", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN FINDING TABLE FILES TO SEARCH" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN FINDING TABLE FILES TO SEARCH", er.what()); + } + + return Status::OK(); +} + Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, DatePartionedTableFilesSchema &files) { diff --git a/cpp/src/db/MySQLMetaImpl.h b/cpp/src/db/MySQLMetaImpl.h index 87bc1783c7..7822b99f64 100644 --- a/cpp/src/db/MySQLMetaImpl.h +++ b/cpp/src/db/MySQLMetaImpl.h @@ -53,6 +53,11 @@ class MySQLMetaImpl : public Meta { const DatesT &partition, DatePartionedTableFilesSchema &files) override; + Status FilesToSearch(const std::string &table_id, + const std::vector &ids, + const DatesT &partition, + DatePartionedTableFilesSchema &files) override; + Status FilesToMerge(const std::string &table_id, DatePartionedTableFilesSchema &files) override; diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 70f7da43c4..81f2279d1c 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -215,14 +215,13 @@ TEST_F(DBTest, SEARCH_TEST) { ASSERT_STATS(stat); } - // TODO: FIX HERE - //{//search by specify index file - // engine::meta::DatesT dates; - // std::vector file_ids = {"1", "2", "3", "4"}; - // engine::QueryResults results; - // stat = db_->Query(TABLE_NAME, file_ids, k, nq, xq.data(), dates, results); - // ASSERT_STATS(stat); - //} + {//search by specify index file + engine::meta::DatesT dates; + std::vector file_ids = {"4", "5", "6"}; + engine::QueryResults results; + stat = db_->Query(TABLE_NAME, file_ids, k, nq, xq.data(), dates, results); + ASSERT_STATS(stat); + } // TODO(linxj): add groundTruth assert }; diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 5bce4058b1..41b145ee85 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -269,4 +269,15 @@ TEST_F(MetaTest, TABLE_FILES_TEST) { ASSERT_TRUE(status.ok()); ASSERT_EQ(dated_files[table_file.date_].size(), to_index_files_cnt+raw_files_cnt+index_files_cnt); + + std::vector ids; + status = impl_->FilesToSearch(table_id, ids, meta::DatesT(), dated_files); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(dated_files[table_file.date_].size(), + to_index_files_cnt+raw_files_cnt+index_files_cnt); + + ids.push_back(size_t(9999999999)); + status = impl_->FilesToSearch(table_id, ids, dates, dated_files); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(dated_files[table_file.date_].size(),0); } diff --git a/cpp/unittest/db/mysql_meta_test.cpp b/cpp/unittest/db/mysql_meta_test.cpp index 76d7846362..aead509a2c 100644 --- a/cpp/unittest/db/mysql_meta_test.cpp +++ b/cpp/unittest/db/mysql_meta_test.cpp @@ -328,6 +328,17 @@ TEST_F(MySQLTest, TABLE_FILES_TEST) { ASSERT_EQ(dated_files[table_file.date_].size(), to_index_files_cnt+raw_files_cnt+index_files_cnt); + std::vector ids; + status = impl.FilesToSearch(table_id, ids, meta::DatesT(), dated_files); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(dated_files[table_file.date_].size(), + to_index_files_cnt+raw_files_cnt+index_files_cnt); + + ids.push_back(size_t(9999999999)); + status = impl.FilesToSearch(table_id, ids, dates, dated_files); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(dated_files[table_file.date_].size(),0); + status = impl.DropAll(); ASSERT_TRUE(status.ok()); } From 4fc8921183e69ba6eaad51461522c88fda0f059c Mon Sep 17 00:00:00 2001 From: starlord Date: Fri, 26 Jul 2019 19:31:02 +0800 Subject: [PATCH 90/91] parallel reduce default false Former-commit-id: 863941e425c4c2653367d2da6f13bebcb2b5eb96 --- cpp/src/db/scheduler/task/SearchTask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index e696faaed0..79baeeafe9 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -22,7 +22,7 @@ static constexpr size_t PARALLEL_REDUCE_BATCH = 1000; bool NeedParallelReduce(uint64_t nq, uint64_t topk) { server::ServerConfig &config = server::ServerConfig::GetInstance(); server::ConfigNode& db_config = config.GetConfig(server::CONFIG_DB); - bool need_parallel = db_config.GetBoolValue(server::CONFIG_DB_PARALLEL_REDUCE, true); + bool need_parallel = db_config.GetBoolValue(server::CONFIG_DB_PARALLEL_REDUCE, false); if(!need_parallel) { return false; } From 3314d47114bcf3219f5be9e31f8263eea19abc5d Mon Sep 17 00:00:00 2001 From: "xj.lin" Date: Sat, 27 Jul 2019 14:28:27 +0800 Subject: [PATCH 91/91] MS-288 update build stage Former-commit-id: d9e0bfbbbb5060b65311b2f6eb8eadf531b918c2 --- cpp/CHANGELOG.md | 1 + cpp/CMakeLists.txt | 8 ++++++++ cpp/build.sh | 7 +++++++ cpp/cmake/DefineOptions.cmake | 2 +- cpp/src/CMakeLists.txt | 4 ++-- cpp/thirdparty/knowhere | 2 +- 6 files changed, 20 insertions(+), 4 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index e6d89ac3e6..2e3c4a8515 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-235 - Some test cases random fail - MS-236 - Add MySQLMetaImpl::HasNonIndexFiles - MS-257 - Update bzip2 download url +- MS-288 - Update compile scripts ## Improvement - MS-156 - Add unittest for merge result functions diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 07b4719790..5082774b02 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -109,6 +109,14 @@ include(ThirdPartyPackages) include_directories(${MILVUS_SOURCE_DIR}) link_directories(${MILVUS_BINARY_DIR}) +if (NOT DEFINED KNOWHERE_BUILD_DIR) + message(FATAL_ERROR "You must set environment variable KNOWHERE_BUILD_DIR") +endif() +message(STATUS "Build with ${KNOWHERE_BUILD_DIR}") +include_directories(${KNOWHERE_BUILD_DIR}/include) +include_directories(${KNOWHERE_BUILD_DIR}/include/SPTAG/AnnService) +link_directories(${KNOWHERE_BUILD_DIR}/lib) + ## Following should be check set(MILVUS_ENGINE_INCLUDE ${PROJECT_SOURCE_DIR}/include) diff --git a/cpp/build.sh b/cpp/build.sh index edfe9305be..584f98cbe3 100755 --- a/cpp/build.sh +++ b/cpp/build.sh @@ -75,6 +75,12 @@ if [[ ! -d cmake_build ]]; then MAKE_CLEAN="ON" fi +# Build Knowhere +KNOWHERE_BUILD_DIR="`pwd`/thirdparty/knowhere_build" +pushd `pwd`/thirdparty/knowhere +./build.sh -t Release -p ${KNOWHERE_BUILD_DIR} +popd + cd cmake_build CUDA_COMPILER=/usr/local/cuda/bin/nvcc @@ -89,6 +95,7 @@ if [[ ${MAKE_CLEAN} == "ON" ]]; then -DMILVUS_DB_PATH=${DB_PATH} \ -DMILVUS_ENABLE_PROFILING=${PROFILING} \ -DBUILD_FAISS_WITH_MKL=${BUILD_FAISS_WITH_MKL} \ + -DKNOWHERE_BUILD_DIR=${KNOWHERE_BUILD_DIR} \ $@ ../" echo ${CMAKE_CMD} diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index cd0aac5df3..af89dccb4b 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -96,7 +96,7 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) -define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" ON) +define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" OFF) if(CMAKE_VERSION VERSION_LESS 3.7) set(MILVUS_WITH_ZSTD_DEFAULT OFF) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index f1e4435b3d..29bb1de3df 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -225,8 +225,8 @@ endif () install(TARGETS milvus_server DESTINATION bin) install(FILES - ${CMAKE_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX} - ${CMAKE_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}.2 + ${KNOWHERE_BUILD_DIR}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX} + ${KNOWHERE_BUILD_DIR}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}tbb${CMAKE_SHARED_LIBRARY_SUFFIX}.2 ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3 ${CMAKE_BINARY_DIR}/mysqlpp_ep-prefix/src/mysqlpp_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}mysqlpp${CMAKE_SHARED_LIBRARY_SUFFIX}.3.2.4 diff --git a/cpp/thirdparty/knowhere b/cpp/thirdparty/knowhere index 81b28a753f..02550a43b5 160000 --- a/cpp/thirdparty/knowhere +++ b/cpp/thirdparty/knowhere @@ -1 +1 @@ -Subproject commit 81b28a753fc47b46364afa7b9414e249c4b2cd75 +Subproject commit 02550a43b5146bd7976b8b2b3fc37ca885d1e880