mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-02-02 01:06:41 +08:00
Add support for idmap, nsg, sq8
Signed-off-by: dragondriver <jiquan.long@zilliz.com>
This commit is contained in:
parent
09620efadf
commit
2e198e2783
@ -105,6 +105,32 @@ IndexWrapper::parse() {
|
||||
config_[milvus::knowhere::IndexParams::m] = std::stoi(m);
|
||||
}
|
||||
|
||||
/************************** NSG Parameter **************************/
|
||||
if (!config_.contains(milvus::knowhere::IndexParams::knng)) {
|
||||
} else {
|
||||
auto knng = config_[milvus::knowhere::IndexParams::knng].get<std::string>();
|
||||
config_[milvus::knowhere::IndexParams::knng] = std::stoi(knng);
|
||||
}
|
||||
|
||||
if (!config_.contains(milvus::knowhere::IndexParams::search_length)) {
|
||||
} else {
|
||||
auto search_length = config_[milvus::knowhere::IndexParams::search_length].get<std::string>();
|
||||
config_[milvus::knowhere::IndexParams::search_length] = std::stoi(search_length);
|
||||
}
|
||||
|
||||
if (!config_.contains(milvus::knowhere::IndexParams::out_degree)) {
|
||||
} else {
|
||||
auto out_degree = config_[milvus::knowhere::IndexParams::out_degree].get<std::string>();
|
||||
config_[milvus::knowhere::IndexParams::out_degree] = std::stoi(out_degree);
|
||||
}
|
||||
|
||||
if (!config_.contains(milvus::knowhere::IndexParams::candidate)) {
|
||||
} else {
|
||||
auto candidate = config_[milvus::knowhere::IndexParams::candidate].get<std::string>();
|
||||
config_[milvus::knowhere::IndexParams::candidate] = std::stoi(candidate);
|
||||
}
|
||||
|
||||
/************************** Serialize *******************************/
|
||||
if (!config_.contains(milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE)) {
|
||||
config_[milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE] = 4;
|
||||
} else {
|
||||
@ -132,12 +158,37 @@ IndexWrapper::dim() {
|
||||
void
|
||||
IndexWrapper::BuildWithoutIds(const knowhere::DatasetPtr& dataset) {
|
||||
auto index_type = get_index_type();
|
||||
// if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
|
||||
// PanicInfo(std::string(index_type) + " doesn't support build without ids yet!");
|
||||
if (is_in_need_id_list(index_type)) {
|
||||
PanicInfo(std::string(index_type) + " doesn't support build without ids yet!");
|
||||
}
|
||||
// if (is_in_need_build_all_list(index_type)) {
|
||||
// index_->BuildAll(dataset, config_);
|
||||
// } else {
|
||||
// index_->Train(dataset, config_);
|
||||
// index_->AddWithoutIds(dataset, config_);
|
||||
// }
|
||||
index_->Train(dataset, config_);
|
||||
index_->AddWithoutIds(dataset, config_);
|
||||
index_->BuildAll(dataset, config_);
|
||||
|
||||
if (is_in_nm_list(index_type)) {
|
||||
StoreRawData(dataset);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IndexWrapper::BuildWithIds(const knowhere::DatasetPtr& dataset) {
|
||||
Assert(dataset->data().find(milvus::knowhere::meta::IDS) != dataset->data().end());
|
||||
// index_->Train(dataset, config_);
|
||||
// index_->Add(dataset, config_);
|
||||
index_->BuildAll(dataset, config_);
|
||||
|
||||
if (is_in_nm_list(get_index_type())) {
|
||||
StoreRawData(dataset);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IndexWrapper::StoreRawData(const knowhere::DatasetPtr& dataset) {
|
||||
auto index_type = get_index_type();
|
||||
if (is_in_nm_list(index_type)) {
|
||||
auto tensor = dataset->Get<const void*>(milvus::knowhere::meta::TENSOR);
|
||||
auto row_num = dataset->Get<int64_t>(milvus::knowhere::meta::ROWS);
|
||||
@ -153,14 +204,6 @@ IndexWrapper::BuildWithoutIds(const knowhere::DatasetPtr& dataset) {
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IndexWrapper::BuildWithIds(const knowhere::DatasetPtr& dataset) {
|
||||
Assert(dataset->data().find(milvus::knowhere::meta::IDS) != dataset->data().end());
|
||||
// index_->Train(dataset, config_);
|
||||
// index_->Add(dataset, config_);
|
||||
index_->BuildAll(dataset, config_);
|
||||
}
|
||||
|
||||
/*
|
||||
* brief Return serialized binary set
|
||||
*/
|
||||
|
||||
@ -49,6 +49,9 @@ class IndexWrapper {
|
||||
std::optional<T>
|
||||
get_config_by_name(std::string name);
|
||||
|
||||
void
|
||||
StoreRawData(const knowhere::DatasetPtr& dataset);
|
||||
|
||||
public:
|
||||
void
|
||||
BuildWithIds(const knowhere::DatasetPtr& dataset);
|
||||
|
||||
@ -36,16 +36,47 @@ BIN_List() {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
Need_ID_List() {
|
||||
static std::vector<std::string> ret{
|
||||
// milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
// milvus::knowhere::IndexEnum::INDEX_NSG
|
||||
};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
Need_BuildAll_list() {
|
||||
static std::vector<std::string> ret{milvus::knowhere::IndexEnum::INDEX_NSG};
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
|
||||
auto l = list_func();
|
||||
return std::find(l.begin(), l.end(), t) != l.end();
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_bin_list(const milvus::knowhere::IndexType& index_type) {
|
||||
auto bin_list = BIN_List();
|
||||
return std::find(bin_list.begin(), bin_list.end(), index_type) != bin_list.end();
|
||||
return is_in_list<std::string>(index_type, BIN_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_nm_list(const milvus::knowhere::IndexType& index_type) {
|
||||
auto nm_list = NM_List();
|
||||
return std::find(nm_list.begin(), nm_list.end(), index_type) != nm_list.end();
|
||||
return is_in_list<std::string>(index_type, NM_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_need_build_all_list(const milvus::knowhere::IndexType& index_type) {
|
||||
return is_in_list<std::string>(index_type, Need_BuildAll_list);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_need_id_list(const milvus::knowhere::IndexType& index_type) {
|
||||
return is_in_list<std::string>(index_type, Need_ID_List);
|
||||
}
|
||||
|
||||
} // namespace indexbuilder
|
||||
|
||||
@ -38,7 +38,14 @@ int DEVICEID = 0;
|
||||
namespace {
|
||||
auto
|
||||
generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowhere::MetricType& metric_type) {
|
||||
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
|
||||
if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP) {
|
||||
return milvus::knowhere::Config{
|
||||
{milvus::knowhere::meta::DIM, DIM},
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
{milvus::knowhere::Metric::TYPE, metric_type},
|
||||
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
|
||||
};
|
||||
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) {
|
||||
return milvus::knowhere::Config{
|
||||
{milvus::knowhere::meta::DIM, DIM},
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
@ -55,7 +62,20 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
{milvus::knowhere::IndexParams::nlist, 100},
|
||||
// {milvus::knowhere::IndexParams::nprobe, 4},
|
||||
{milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2},
|
||||
{milvus::knowhere::Metric::TYPE, metric_type},
|
||||
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
{milvus::knowhere::meta::DEVICEID, DEVICEID},
|
||||
#endif
|
||||
};
|
||||
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8) {
|
||||
return milvus::knowhere::Config{
|
||||
{milvus::knowhere::meta::DIM, DIM},
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
{milvus::knowhere::IndexParams::nlist, 100},
|
||||
// {milvus::knowhere::IndexParams::nprobe, 4},
|
||||
{milvus::knowhere::IndexParams::nbits, 8},
|
||||
{milvus::knowhere::Metric::TYPE, metric_type},
|
||||
{milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4},
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
{milvus::knowhere::meta::DEVICEID, DEVICEID},
|
||||
@ -78,6 +98,15 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh
|
||||
// {milvus::knowhere::meta::TOPK, K},
|
||||
{milvus::knowhere::Metric::TYPE, metric_type},
|
||||
};
|
||||
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_NSG) {
|
||||
return milvus::knowhere::Config{{milvus::knowhere::meta::DIM, DIM},
|
||||
{milvus::knowhere::IndexParams::nlist, 163},
|
||||
{milvus::knowhere::IndexParams::nprobe, 8},
|
||||
{milvus::knowhere::IndexParams::knng, 20},
|
||||
{milvus::knowhere::IndexParams::search_length, 40},
|
||||
{milvus::knowhere::IndexParams::out_degree, 30},
|
||||
{milvus::knowhere::IndexParams::candidate, 100},
|
||||
{milvus::knowhere::Metric::TYPE, metric_type}};
|
||||
}
|
||||
return milvus::knowhere::Config();
|
||||
}
|
||||
@ -142,11 +171,6 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
|
||||
if (!is_binary) {
|
||||
xb_data = dataset.get_col<float>(0);
|
||||
xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data());
|
||||
} else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
|
||||
xb_bin_data = dataset.get_col<uint8_t>(0);
|
||||
ids.resize(NB);
|
||||
std::iota(ids.begin(), ids.end(), 0);
|
||||
xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_bin_data.data());
|
||||
} else {
|
||||
xb_bin_data = dataset.get_col<uint8_t>(0);
|
||||
xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_bin_data.data());
|
||||
@ -335,11 +359,14 @@ TEST(BinIdMapWrapper, Build) {
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
IndexTypeParameters,
|
||||
IndexWrapperTest,
|
||||
::testing::Values(
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, milvus::knowhere::Metric::L2),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, milvus::knowhere::Metric::L2),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, milvus::knowhere::Metric::JACCARD),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, milvus::knowhere::Metric::JACCARD)));
|
||||
::testing::Values(std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP, milvus::knowhere::Metric::L2),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, milvus::knowhere::Metric::L2),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, milvus::knowhere::Metric::L2),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, milvus::knowhere::Metric::L2),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
milvus::knowhere::Metric::JACCARD),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, milvus::knowhere::Metric::JACCARD),
|
||||
std::pair(milvus::knowhere::IndexEnum::INDEX_NSG, milvus::knowhere::Metric::L2)));
|
||||
|
||||
TEST_P(IndexWrapperTest, Constructor) {
|
||||
auto index =
|
||||
@ -357,7 +384,11 @@ TEST_P(IndexWrapperTest, BuildWithoutIds) {
|
||||
auto index =
|
||||
std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str());
|
||||
|
||||
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
if (milvus::indexbuilder::is_in_need_id_list(index_type)) {
|
||||
ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
} else {
|
||||
ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(IndexWrapperTest, Codec) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user