mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-30 15:35:33 +08:00
Merge branch 'integrate_knowhere-0.3.2' into 'branch-0.3.2'
MS-154 See merge request megasearch/milvus!161 Former-commit-id: 0eab9f44ffb05836139eadfc2c2945f158bde434
This commit is contained in:
commit
692feaacdc
4
.gitmodules
vendored
Normal file
4
.gitmodules
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
[submodule "cpp/thirdparty/knowhere"]
|
||||
path = cpp/thirdparty/knowhere
|
||||
url = git@192.168.1.105:xiaojun.lin/knowhere.git
|
||||
branch = develop
|
||||
@ -3,6 +3,17 @@
|
||||
Please mark all change in change log and use the ticket from JIRA.
|
||||
|
||||
|
||||
# Milvus 0.3.2 (2019-07-10)
|
||||
|
||||
## Bug
|
||||
|
||||
## Improvement
|
||||
|
||||
## New Feature
|
||||
- MS-154 - Integrate knowhere
|
||||
|
||||
## Task
|
||||
|
||||
# Milvus 0.3.1 (2019-07-10)
|
||||
|
||||
## Bug
|
||||
|
||||
@ -1,13 +1,12 @@
|
||||
### Compilation
|
||||
#### Step 1: install necessery tools
|
||||
|
||||
Install MySQL
|
||||
|
||||
centos7 :
|
||||
yum install gfortran qt4 flex bison mysql-devel
|
||||
yum install gfortran qt4 flex bison mysql-devel mysql
|
||||
|
||||
ubuntu16.04 :
|
||||
sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev
|
||||
sudo apt-get install gfortran qt4-qmake flex bison libmysqlclient-dev mysql-client
|
||||
|
||||
If `libmysqlclient_r.so` does not exist after installing MySQL Development Files, you need to create a symbolic link:
|
||||
|
||||
@ -21,6 +20,9 @@ cmake_build/src/milvus_server is the server
|
||||
|
||||
cmake_build/src/libmilvus_engine.a is the static library
|
||||
|
||||
git submodule init
|
||||
git submodule update
|
||||
|
||||
cd [sourcecode path]/cpp
|
||||
./build.sh -t Debug
|
||||
./build.sh -t Release
|
||||
@ -53,10 +55,10 @@ If you encounter the following error when building:
|
||||
### Launch server
|
||||
Set config in cpp/conf/server_config.yaml
|
||||
|
||||
Add milvus/bin/lib to LD_LIBRARY_PATH
|
||||
Add milvus/lib to LD_LIBRARY_PATH
|
||||
|
||||
```
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/bin/lib
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib
|
||||
```
|
||||
|
||||
Then launch server with config:
|
||||
|
||||
@ -68,20 +68,17 @@ define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON)
|
||||
|
||||
define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON)
|
||||
|
||||
define_option(MILVUS_WITH_FAISS "Build with FAISS library" ON)
|
||||
define_option(MILVUS_WITH_FAISS "Build with FAISS library" OFF)
|
||||
|
||||
define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" ON)
|
||||
define_option(MILVUS_WITH_FAISS_GPU_VERSION "Build with FAISS GPU version" OFF)
|
||||
|
||||
#define_option_string(MILVUS_FAISS_GPU_ARCH "Specifying which GPU architectures to build against"
|
||||
# "-gencode=arch=compute_35,code=compute_35 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61")
|
||||
|
||||
define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" ON)
|
||||
define_option(MILVUS_WITH_LAPACK "Build with LAPACK library" OFF)
|
||||
|
||||
define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON)
|
||||
|
||||
define_option(MILVUS_WITH_JSONCONS "Build with JSONCONS" OFF)
|
||||
|
||||
define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" ON)
|
||||
define_option(MILVUS_WITH_OPENBLAS "Build with OpenBLAS library" OFF)
|
||||
|
||||
define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON)
|
||||
|
||||
@ -101,6 +98,8 @@ define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON)
|
||||
|
||||
define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON)
|
||||
|
||||
define_option(MILVUS_WITH_KNOWHERE "Build with Knowhere" ON)
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.7)
|
||||
set(MILVUS_WITH_ZSTD_DEFAULT OFF)
|
||||
else()
|
||||
|
||||
@ -23,6 +23,7 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES
|
||||
Easylogging++
|
||||
FAISS
|
||||
GTest
|
||||
Knowhere
|
||||
JSONCONS
|
||||
LAPACK
|
||||
Lz4
|
||||
@ -61,6 +62,8 @@ macro(build_dependency DEPENDENCY_NAME)
|
||||
build_gtest()
|
||||
elseif("${DEPENDENCY_NAME}" STREQUAL "LAPACK")
|
||||
build_lapack()
|
||||
elseif("${DEPENDENCY_NAME}" STREQUAL "Knowhere")
|
||||
build_knowhere()
|
||||
elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4")
|
||||
build_lz4()
|
||||
elseif ("${DEPENDENCY_NAME}" STREQUAL "MySQLPP")
|
||||
@ -242,6 +245,12 @@ else()
|
||||
set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz")
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MILVUS_KNOWHERE_URL})
|
||||
set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}")
|
||||
else()
|
||||
set(KNOWHERE_SOURCE_URL "${CMAKE_SOURCE_DIR}/thirdparty/knowhere")
|
||||
endif()
|
||||
|
||||
if (DEFINED ENV{MILVUS_GTEST_URL})
|
||||
set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}")
|
||||
else ()
|
||||
@ -641,6 +650,54 @@ if(MILVUS_WITH_BZ2)
|
||||
include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Knowhere
|
||||
|
||||
macro(build_knowhere)
|
||||
message(STATUS "Building knowhere from source")
|
||||
set(KNOWHERE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/knowhere_ep-prefix/src/knowhere_ep")
|
||||
set(KNOWHERE_INCLUDE_DIR "${KNOWHERE_PREFIX}/include")
|
||||
set(KNOWHERE_STATIC_LIB
|
||||
"${KNOWHERE_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}knowhere${CMAKE_STATIC_LIBRARY_SUFFIX}")
|
||||
|
||||
set(KNOWHERE_CMAKE_ARGS
|
||||
${EP_COMMON_CMAKE_ARGS}
|
||||
"-DCMAKE_INSTALL_PREFIX=${KNOWHERE_PREFIX}"
|
||||
-DCMAKE_INSTALL_LIBDIR=lib
|
||||
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc
|
||||
-DCMAKE_BUILD_TYPE=Release)
|
||||
|
||||
externalproject_add(knowhere_ep
|
||||
URL
|
||||
${KNOWHERE_SOURCE_URL}
|
||||
${EP_LOG_OPTIONS}
|
||||
CMAKE_ARGS
|
||||
${KNOWHERE_CMAKE_ARGS}
|
||||
BUILD_COMMAND
|
||||
${MAKE}
|
||||
${MAKE_BUILD_ARGS}
|
||||
BUILD_BYPRODUCTS
|
||||
${KNOWHERE_STATIC_LIB})
|
||||
|
||||
file(MAKE_DIRECTORY "${KNOWHERE_INCLUDE_DIR}")
|
||||
add_library(knowhere STATIC IMPORTED)
|
||||
set_target_properties(
|
||||
knowhere
|
||||
PROPERTIES IMPORTED_LOCATION "${KNOWHERE_STATIC_LIB}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${KNOWHERE_INCLUDE_DIR}")
|
||||
|
||||
add_dependencies(knowhere knowhere_ep)
|
||||
endmacro()
|
||||
|
||||
if(MILVUS_WITH_KNOWHERE)
|
||||
resolve_dependency(Knowhere)
|
||||
|
||||
get_target_property(KNOWHERE_INCLUDE_DIR knowhere INTERFACE_INCLUDE_DIRECTORIES)
|
||||
link_directories(SYSTEM "${KNOWHERE_PREFIX}/lib")
|
||||
include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}")
|
||||
include_directories(SYSTEM "${KNOWHERE_INCLUDE_DIR}/SPTAG/AnnService")
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Easylogging++
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ aux_source_directory(utils utils_files)
|
||||
aux_source_directory(db db_files)
|
||||
aux_source_directory(wrapper wrapper_files)
|
||||
aux_source_directory(metrics metrics_files)
|
||||
aux_source_directory(wrapper/knowhere knowhere_files)
|
||||
|
||||
aux_source_directory(db/scheduler scheduler_files)
|
||||
aux_source_directory(db/scheduler/context scheduler_context_files)
|
||||
@ -50,6 +51,7 @@ set(engine_files
|
||||
${wrapper_files}
|
||||
# metrics/Metrics.cpp
|
||||
${metrics_files}
|
||||
${knowhere_files}
|
||||
)
|
||||
|
||||
set(get_sys_info_files
|
||||
@ -65,14 +67,17 @@ include_directories(thrift/gen-cpp)
|
||||
include_directories(/usr/include/mysql)
|
||||
|
||||
set(third_party_libs
|
||||
knowhere
|
||||
SPTAGLibStatic
|
||||
arrow
|
||||
jemalloc_pic
|
||||
faiss
|
||||
openblas
|
||||
lapack
|
||||
easyloggingpp
|
||||
sqlite
|
||||
thrift
|
||||
yaml-cpp
|
||||
libgpufaiss.a
|
||||
faiss
|
||||
lapack
|
||||
openblas
|
||||
prometheus-cpp-push
|
||||
prometheus-cpp-pull
|
||||
prometheus-cpp-core
|
||||
@ -84,6 +89,8 @@ set(third_party_libs
|
||||
snappy
|
||||
zlib
|
||||
zstd
|
||||
cudart
|
||||
cublas
|
||||
mysqlpp
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
|
||||
)
|
||||
@ -97,8 +104,6 @@ if (GPU_VERSION STREQUAL "ON")
|
||||
pthread
|
||||
libgomp.a
|
||||
libgfortran.a
|
||||
cudart
|
||||
cublas
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so
|
||||
)
|
||||
else()
|
||||
|
||||
48
cpp/src/wrapper/knowhere/data_transfer.cpp
Normal file
48
cpp/src/wrapper/knowhere/data_transfer.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "data_transfer.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace vecwise {
|
||||
namespace engine {
|
||||
|
||||
using namespace zilliz::knowhere;
|
||||
|
||||
DatasetPtr
|
||||
GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids) {
|
||||
std::vector<int64_t> shape{nb, dim};
|
||||
auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape);
|
||||
std::vector<TensorPtr> tensors{tensor};
|
||||
std::vector<FieldPtr> tensor_fields{ConstructFloatField("data")};
|
||||
auto tensor_schema = std::make_shared<Schema>(tensor_fields);
|
||||
|
||||
auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t));
|
||||
std::vector<ArrayPtr> arrays{id_array};
|
||||
std::vector<FieldPtr> array_fields{ConstructInt64Field("id")};
|
||||
auto array_schema = std::make_shared<Schema>(tensor_fields);
|
||||
|
||||
auto dataset = std::make_shared<Dataset>(std::move(arrays), array_schema,
|
||||
std::move(tensors), tensor_schema);
|
||||
return dataset;
|
||||
}
|
||||
|
||||
DatasetPtr
|
||||
GenDataset(const int64_t &nb, const int64_t &dim, const float *xb) {
|
||||
std::vector<int64_t> shape{nb, dim};
|
||||
auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape);
|
||||
std::vector<TensorPtr> tensors{tensor};
|
||||
std::vector<FieldPtr> tensor_fields{ConstructFloatField("data")};
|
||||
auto tensor_schema = std::make_shared<Schema>(tensor_fields);
|
||||
|
||||
auto dataset = std::make_shared<Dataset>(std::move(tensors), tensor_schema);
|
||||
return dataset;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
24
cpp/src/wrapper/knowhere/data_transfer.h
Normal file
24
cpp/src/wrapper/knowhere/data_transfer.h
Normal file
@ -0,0 +1,24 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "knowhere/adapter/structure.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace vecwise {
|
||||
namespace engine {
|
||||
|
||||
extern zilliz::knowhere::DatasetPtr
|
||||
GenDatasetWithIds(const int64_t &nb, const int64_t &dim, const float *xb, const long *ids);
|
||||
|
||||
extern zilliz::knowhere::DatasetPtr
|
||||
GenDataset(const int64_t &nb, const int64_t &dim, const float *xb);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
95
cpp/src/wrapper/knowhere/vec_impl.cpp
Normal file
95
cpp/src/wrapper/knowhere/vec_impl.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "knowhere/index/index.h"
|
||||
#include "knowhere/index/index_model.h"
|
||||
#include "knowhere/index/index_type.h"
|
||||
#include "knowhere/adapter/sptag.h"
|
||||
#include "knowhere/common/tensor.h"
|
||||
|
||||
#include "vec_impl.h"
|
||||
#include "data_transfer.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace vecwise {
|
||||
namespace engine {
|
||||
|
||||
using namespace zilliz::knowhere;
|
||||
|
||||
void VecIndexImpl::BuildAll(const long &nb,
|
||||
const float *xb,
|
||||
const long *ids,
|
||||
const Config &cfg,
|
||||
const long &nt,
|
||||
const float *xt) {
|
||||
auto d = cfg["dim"].as<int>();
|
||||
auto dataset = GenDatasetWithIds(nb, d, xb, ids);
|
||||
|
||||
auto preprocessor = index_->BuildPreprocessor(dataset, cfg);
|
||||
index_->set_preprocessor(preprocessor);
|
||||
auto model = index_->Train(dataset, cfg);
|
||||
index_->set_index_model(model);
|
||||
index_->Add(dataset, cfg);
|
||||
}
|
||||
|
||||
void VecIndexImpl::Add(const long &nb, const float *xb, const long *ids, const Config &cfg) {
|
||||
// TODO(linxj): Assert index is trained;
|
||||
|
||||
auto d = cfg["dim"].as<int>();
|
||||
auto dataset = GenDatasetWithIds(nb, d, xb, ids);
|
||||
|
||||
index_->Add(dataset, cfg);
|
||||
}
|
||||
|
||||
void VecIndexImpl::Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) {
|
||||
// TODO: Assert index is trained;
|
||||
|
||||
auto d = cfg["dim"].as<int>();
|
||||
auto k = cfg["k"].as<int>();
|
||||
auto dataset = GenDataset(nq, d, xq);
|
||||
|
||||
Config search_cfg;
|
||||
auto res = index_->Search(dataset, cfg);
|
||||
auto ids_array = res->array()[0];
|
||||
auto dis_array = res->array()[1];
|
||||
|
||||
//{
|
||||
// auto& ids = ids_array;
|
||||
// auto& dists = dis_array;
|
||||
// std::stringstream ss_id;
|
||||
// std::stringstream ss_dist;
|
||||
// for (auto i = 0; i < 10; i++) {
|
||||
// for (auto j = 0; j < k; ++j) {
|
||||
// ss_id << *(ids->data()->GetValues<int64_t>(1, i * k + j)) << " ";
|
||||
// ss_dist << *(dists->data()->GetValues<float>(1, i * k + j)) << " ";
|
||||
// }
|
||||
// ss_id << std::endl;
|
||||
// ss_dist << std::endl;
|
||||
// }
|
||||
// std::cout << "id\n" << ss_id.str() << std::endl;
|
||||
// std::cout << "dist\n" << ss_dist.str() << std::endl;
|
||||
//}
|
||||
|
||||
auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
|
||||
auto p_dist = ids_array->data()->GetValues<float>(1, 0);
|
||||
|
||||
// TODO(linxj): avoid copy here.
|
||||
memcpy(ids, p_ids, sizeof(int64_t) * nq * k);
|
||||
memcpy(dist, p_dist, sizeof(float) * nq * k);
|
||||
}
|
||||
|
||||
zilliz::knowhere::BinarySet VecIndexImpl::Serialize() {
|
||||
return index_->Serialize();
|
||||
}
|
||||
|
||||
void VecIndexImpl::Load(const zilliz::knowhere::BinarySet &index_binary) {
|
||||
index_->Load(index_binary);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
38
cpp/src/wrapper/knowhere/vec_impl.h
Normal file
38
cpp/src/wrapper/knowhere/vec_impl.h
Normal file
@ -0,0 +1,38 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "knowhere/index/vector_index/vector_index.h"
|
||||
|
||||
#include "vec_index.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace vecwise {
|
||||
namespace engine {
|
||||
|
||||
class VecIndexImpl : public VecIndex {
|
||||
public:
|
||||
explicit VecIndexImpl(std::shared_ptr<zilliz::knowhere::VectorIndex> index) : index_(std::move(index)) {};
|
||||
void BuildAll(const long &nb,
|
||||
const float *xb,
|
||||
const long *ids,
|
||||
const Config &cfg,
|
||||
const long &nt,
|
||||
const float *xt) override;
|
||||
void Add(const long &nb, const float *xb, const long *ids, const Config &cfg) override;
|
||||
zilliz::knowhere::BinarySet Serialize() override;
|
||||
void Load(const zilliz::knowhere::BinarySet &index_binary) override;
|
||||
void Search(const long &nq, const float *xq, float *dist, long *ids, const Config &cfg) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<zilliz::knowhere::VectorIndex> index_ = nullptr;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
43
cpp/src/wrapper/knowhere/vec_index.cpp
Normal file
43
cpp/src/wrapper/knowhere/vec_index.cpp
Normal file
@ -0,0 +1,43 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#include "knowhere/index/vector_index/ivf.h"
|
||||
#include "knowhere/index/vector_index/gpu_ivf.h"
|
||||
#include "knowhere/index/vector_index/cpu_kdt_rng.h"
|
||||
|
||||
#include "vec_index.h"
|
||||
#include "vec_impl.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace vecwise {
|
||||
namespace engine {
|
||||
|
||||
// TODO(linxj): index_type => enum struct
|
||||
VecIndexPtr GetVecIndexFactory(const std::string &index_type) {
|
||||
std::shared_ptr<zilliz::knowhere::VectorIndex> index;
|
||||
if (index_type == "IVF") {
|
||||
index = std::make_shared<zilliz::knowhere::IVF>();
|
||||
} else if (index_type == "GPUIVF") {
|
||||
index = std::make_shared<zilliz::knowhere::GPUIVF>(0);
|
||||
} else if (index_type == "SPTAG") {
|
||||
index = std::make_shared<zilliz::knowhere::CPUKDTRNG>();
|
||||
}
|
||||
// TODO(linxj): Support NSG
|
||||
//else if (index_type == "NSG") {
|
||||
// index = std::make_shared<zilliz::knowhere::NSG>();
|
||||
//}
|
||||
return std::make_shared<VecIndexImpl>(index);
|
||||
}
|
||||
|
||||
VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary) {
|
||||
auto index = GetVecIndexFactory(index_type);
|
||||
index->Load(index_binary);
|
||||
return index;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
56
cpp/src/wrapper/knowhere/vec_index.h
Normal file
56
cpp/src/wrapper/knowhere/vec_index.h
Normal file
@ -0,0 +1,56 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "knowhere/common/config.h"
|
||||
#include "knowhere/common/binary_set.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace vecwise {
|
||||
namespace engine {
|
||||
|
||||
// TODO(linxj): jsoncons => rapidjson or other.
|
||||
using Config = zilliz::knowhere::Config;
|
||||
|
||||
class VecIndex {
|
||||
public:
|
||||
virtual void BuildAll(const long &nb,
|
||||
const float *xb,
|
||||
const long *ids,
|
||||
const Config &cfg,
|
||||
const long &nt = 0,
|
||||
const float *xt = nullptr) = 0;
|
||||
|
||||
virtual void Add(const long &nb,
|
||||
const float *xb,
|
||||
const long *ids,
|
||||
const Config &cfg = Config()) = 0;
|
||||
|
||||
virtual void Search(const long &nq,
|
||||
const float *xq,
|
||||
float *dist,
|
||||
long *ids,
|
||||
const Config &cfg = Config()) = 0;
|
||||
|
||||
virtual zilliz::knowhere::BinarySet Serialize() = 0;
|
||||
|
||||
virtual void Load(const zilliz::knowhere::BinarySet &index_binary) = 0;
|
||||
};
|
||||
|
||||
using VecIndexPtr = std::shared_ptr<VecIndex>;
|
||||
|
||||
extern VecIndexPtr GetVecIndexFactory(const std::string &index_type);
|
||||
|
||||
extern VecIndexPtr LoadVecIndex(const std::string &index_type, const zilliz::knowhere::BinarySet &index_binary);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
1
cpp/thirdparty/knowhere
vendored
Submodule
1
cpp/thirdparty/knowhere
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 844e600834df1eeafc6c7e5936338ae964bd1d41
|
||||
@ -29,7 +29,6 @@ set(unittest_libs
|
||||
easyloggingpp
|
||||
pthread
|
||||
metrics
|
||||
openblas
|
||||
gfortran
|
||||
prometheus-cpp-pull
|
||||
prometheus-cpp-push
|
||||
@ -41,7 +40,8 @@ set(unittest_libs
|
||||
|
||||
add_subdirectory(server)
|
||||
add_subdirectory(db)
|
||||
add_subdirectory(faiss_wrapper)
|
||||
add_subdirectory(index_wrapper)
|
||||
#add_subdirectory(faiss_wrapper)
|
||||
#add_subdirectory(license)
|
||||
add_subdirectory(metrics)
|
||||
add_subdirectory(storage)
|
||||
@ -36,8 +36,10 @@ set(db_test_src
|
||||
cuda_add_executable(db_test ${db_test_src})
|
||||
|
||||
set(db_libs
|
||||
libgpufaiss.a
|
||||
knowhere
|
||||
faiss
|
||||
openblas
|
||||
lapack
|
||||
cudart
|
||||
cublas
|
||||
sqlite3
|
||||
|
||||
23
cpp/unittest/index_wrapper/CMakeLists.txt
Normal file
23
cpp/unittest/index_wrapper/CMakeLists.txt
Normal file
@ -0,0 +1,23 @@
|
||||
include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
|
||||
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
|
||||
|
||||
aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper/knowhere knowhere_src)
|
||||
|
||||
set(helper
|
||||
utils.cpp)
|
||||
|
||||
set(knowhere_libs
|
||||
knowhere
|
||||
SPTAGLibStatic
|
||||
arrow
|
||||
jemalloc_pic
|
||||
faiss
|
||||
openblas
|
||||
lapack
|
||||
tbb
|
||||
cudart
|
||||
cublas
|
||||
)
|
||||
|
||||
add_executable(knowhere_test knowhere_test.cpp ${knowhere_src} ${helper})
|
||||
target_link_libraries(knowhere_test ${knowhere_libs} ${unittest_libs})
|
||||
149
cpp/unittest/index_wrapper/knowhere_test.cpp
Normal file
149
cpp/unittest/index_wrapper/knowhere_test.cpp
Normal file
@ -0,0 +1,149 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <wrapper/knowhere/vec_index.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
using namespace zilliz::vecwise::engine;
|
||||
using namespace zilliz::knowhere;
|
||||
|
||||
using ::testing::TestWithParam;
|
||||
using ::testing::Values;
|
||||
using ::testing::Combine;
|
||||
|
||||
|
||||
class KnowhereWrapperTest
|
||||
: public TestWithParam<::std::tuple<std::string, std::string, int, int, int, int, Config, Config>> {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
std::string generator_type;
|
||||
std::tie(index_type, generator_type, dim, nb, nq, k, train_cfg, search_cfg) = GetParam();
|
||||
|
||||
//auto generator = GetGenerateFactory(generator_type);
|
||||
auto generator = std::make_shared<DataGenBase>();
|
||||
generator->GenData(dim, nb, nq, xb, xq, ids, k, gt_ids);
|
||||
|
||||
index_ = GetVecIndexFactory(index_type);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string index_type;
|
||||
Config train_cfg;
|
||||
Config search_cfg;
|
||||
|
||||
int dim = 64;
|
||||
int nb = 10000;
|
||||
int nq = 10;
|
||||
int k = 10;
|
||||
std::vector<float> xb;
|
||||
std::vector<float> xq;
|
||||
std::vector<long> ids;
|
||||
|
||||
VecIndexPtr index_ = nullptr;
|
||||
|
||||
// Ground Truth
|
||||
std::vector<long> gt_ids;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest,
|
||||
Values(
|
||||
// ["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"]
|
||||
std::make_tuple("IVF", "Default",
|
||||
64, 10000, 10, 10,
|
||||
Config::object{{"nlist", 100}, {"dim", 64}},
|
||||
Config::object{{"dim", 64}, {"k", 10}, {"nprobe", 20}}
|
||||
),
|
||||
std::make_tuple("SPTAG", "Default",
|
||||
64, 10000, 10, 10,
|
||||
Config::object{{"TPTNumber", 1}, {"dim", 64}},
|
||||
Config::object{{"dim", 64}, {"k", 10}}
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
void AssertAnns(const std::vector<long> >,
|
||||
const std::vector<long> &res,
|
||||
const int &nq,
|
||||
const int &k) {
|
||||
EXPECT_EQ(res.size(), nq * k);
|
||||
|
||||
for (auto i = 0; i < nq; i++) {
|
||||
EXPECT_EQ(gt[i * k], res[i * k]);
|
||||
}
|
||||
|
||||
int match = 0;
|
||||
for (int i = 0; i < nq; ++i) {
|
||||
for (int j = 0; j < k; ++j) {
|
||||
for (int l = 0; l < k; ++l) {
|
||||
if (gt[i * nq + j] == res[i * nq + l]) match++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(linxj): percision check
|
||||
EXPECT_GT(float(match/nq*k), 0.5);
|
||||
}
|
||||
|
||||
TEST_P(KnowhereWrapperTest, base_test) {
|
||||
std::vector<long> res_ids;
|
||||
float *D = new float[k * nq];
|
||||
res_ids.resize(nq * k);
|
||||
|
||||
index_->BuildAll(nb, xb.data(), ids.data(), train_cfg);
|
||||
index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg);
|
||||
AssertAnns(gt_ids, res_ids, nq, k);
|
||||
delete[] D;
|
||||
}
|
||||
|
||||
TEST_P(KnowhereWrapperTest, serialize_test) {
|
||||
std::vector<long> res_ids;
|
||||
float *D = new float[k * nq];
|
||||
res_ids.resize(nq * k);
|
||||
|
||||
index_->BuildAll(nb, xb.data(), ids.data(), train_cfg);
|
||||
index_->Search(nq, xq.data(), D, res_ids.data(), search_cfg);
|
||||
AssertAnns(gt_ids, res_ids, nq, k);
|
||||
|
||||
{
|
||||
auto binaryset = index_->Serialize();
|
||||
int fileno = 0;
|
||||
const std::string &base_name = "/tmp/wrapper_serialize_test_bin_";
|
||||
std::vector<std::string> filename_list;
|
||||
std::vector<std::pair<std::string, size_t >> meta_list;
|
||||
for (auto &iter: binaryset.binary_map_) {
|
||||
const std::string &filename = base_name + std::to_string(fileno);
|
||||
FileIOWriter writer(filename);
|
||||
writer(iter.second.data, iter.second.size);
|
||||
|
||||
meta_list.push_back(std::make_pair(iter.first, iter.second.size));
|
||||
filename_list.push_back(filename);
|
||||
++fileno;
|
||||
}
|
||||
|
||||
BinarySet load_data_list;
|
||||
for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) {
|
||||
auto bin_size = meta_list[i].second;
|
||||
FileIOReader reader(filename_list[i]);
|
||||
std::vector<uint8_t> load_data(bin_size);
|
||||
reader(load_data.data(), bin_size);
|
||||
load_data_list.Append(meta_list[i].first, load_data);
|
||||
}
|
||||
|
||||
|
||||
res_ids.clear();
|
||||
res_ids.resize(nq * k);
|
||||
auto new_index = GetVecIndexFactory(index_type);
|
||||
new_index->Load(load_data_list);
|
||||
new_index->Search(nq, xq.data(), D, res_ids.data(), search_cfg);
|
||||
AssertAnns(gt_ids, res_ids, nq, k);
|
||||
}
|
||||
|
||||
delete[] D;
|
||||
}
|
||||
81
cpp/unittest/index_wrapper/utils.cpp
Normal file
81
cpp/unittest/index_wrapper/utils.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <faiss/IndexFlat.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
DataGenPtr GetGenerateFactory(const std::string &gen_type) {
|
||||
std::shared_ptr<DataGenBase> generator;
|
||||
if (gen_type == "default") {
|
||||
generator = std::make_shared<DataGenBase>();
|
||||
}
|
||||
return generator;
|
||||
}
|
||||
|
||||
void DataGenBase::GenData(const int &dim, const int &nb, const int &nq,
|
||||
float *xb, float *xq, long *ids,
|
||||
const int &k, long *gt_ids) {
|
||||
for (auto i = 0; i < nb; ++i) {
|
||||
for (auto j = 0; j < dim; ++j) {
|
||||
//p_data[i * d + j] = float(base + i);
|
||||
xb[i * dim + j] = drand48();
|
||||
}
|
||||
xb[dim * i] += i / 1000.;
|
||||
ids[i] = i;
|
||||
}
|
||||
for (size_t i = 0; i < nq * dim; ++i) {
|
||||
xq[i] = xb[i];
|
||||
}
|
||||
|
||||
faiss::IndexFlatL2 index(dim);
|
||||
//index.add_with_ids(nb, xb, ids);
|
||||
index.add(nb, xb);
|
||||
float *D = new float[k * nq];
|
||||
index.search(nq, xq, k, D, gt_ids);
|
||||
}
|
||||
|
||||
void DataGenBase::GenData(const int &dim,
|
||||
const int &nb,
|
||||
const int &nq,
|
||||
std::vector<float> &xb,
|
||||
std::vector<float> &xq,
|
||||
std::vector<long> &ids,
|
||||
const int &k,
|
||||
std::vector<long> >_ids) {
|
||||
xb.resize(nb * dim);
|
||||
xq.resize(nq * dim);
|
||||
ids.resize(nb);
|
||||
gt_ids.resize(nq * k);
|
||||
GenData(dim, nb, nq, xb.data(), xq.data(), ids.data(), k, gt_ids.data());
|
||||
}
|
||||
|
||||
FileIOReader::FileIOReader(const std::string &fname) {
|
||||
name = fname;
|
||||
fs = std::fstream(name, std::ios::in | std::ios::binary);
|
||||
}
|
||||
|
||||
FileIOReader::~FileIOReader() {
|
||||
fs.close();
|
||||
}
|
||||
|
||||
size_t FileIOReader::operator()(void *ptr, size_t size) {
|
||||
fs.read(reinterpret_cast<char *>(ptr), size);
|
||||
}
|
||||
|
||||
FileIOWriter::FileIOWriter(const std::string &fname) {
|
||||
name = fname;
|
||||
fs = std::fstream(name, std::ios::out | std::ios::binary);
|
||||
}
|
||||
|
||||
FileIOWriter::~FileIOWriter() {
|
||||
fs.close();
|
||||
}
|
||||
|
||||
size_t FileIOWriter::operator()(void *ptr, size_t size) {
|
||||
fs.write(reinterpret_cast<char *>(ptr), size);
|
||||
}
|
||||
61
cpp/unittest/index_wrapper/utils.h
Normal file
61
cpp/unittest/index_wrapper/utils.h
Normal file
@ -0,0 +1,61 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
|
||||
// Unauthorized copying of this file, via any medium is strictly prohibited.
|
||||
// Proprietary and confidential.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
class DataGenBase;
|
||||
|
||||
using DataGenPtr = std::shared_ptr<DataGenBase>;
|
||||
|
||||
extern DataGenPtr GetGenerateFactory(const std::string &gen_type);
|
||||
|
||||
|
||||
class DataGenBase {
|
||||
public:
|
||||
virtual void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids,
|
||||
const int &k, long *gt_ids);
|
||||
|
||||
virtual void GenData(const int &dim,
|
||||
const int &nb,
|
||||
const int &nq,
|
||||
std::vector<float> &xb,
|
||||
std::vector<float> &xq,
|
||||
std::vector<long> &ids,
|
||||
const int &k,
|
||||
std::vector<long> >_ids);
|
||||
};
|
||||
|
||||
|
||||
class SanityCheck : public DataGenBase {
|
||||
public:
|
||||
void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids,
|
||||
const int &k, long *gt_ids) override;
|
||||
};
|
||||
|
||||
struct FileIOWriter {
|
||||
std::fstream fs;
|
||||
std::string name;
|
||||
|
||||
FileIOWriter(const std::string &fname);
|
||||
~FileIOWriter();
|
||||
size_t operator()(void *ptr, size_t size);
|
||||
};
|
||||
|
||||
struct FileIOReader {
|
||||
std::fstream fs;
|
||||
std::string name;
|
||||
|
||||
FileIOReader(const std::string &fname);
|
||||
~FileIOReader();
|
||||
size_t operator()(void *ptr, size_t size);
|
||||
};
|
||||
@ -62,8 +62,10 @@ set(count_test_src
|
||||
add_executable(metrics_test ${count_test_src} ${require_files} )
|
||||
|
||||
target_link_libraries(metrics_test
|
||||
libgpufaiss.a
|
||||
knowhere
|
||||
faiss
|
||||
openblas
|
||||
lapack
|
||||
cudart
|
||||
cublas
|
||||
sqlite3
|
||||
|
||||
@ -32,9 +32,11 @@ cuda_add_executable(server_test
|
||||
)
|
||||
|
||||
set(require_libs
|
||||
stdc++
|
||||
libgpufaiss.a
|
||||
knowhere
|
||||
faiss
|
||||
openblas
|
||||
lapack
|
||||
stdc++
|
||||
cudart
|
||||
cublas
|
||||
sqlite3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user