From 3a6eff32f437ebb243f69d85f91ee37ad479b172 Mon Sep 17 00:00:00 2001 From: xige-16 Date: Mon, 12 Dec 2022 11:57:21 +0800 Subject: [PATCH] Fix search failed on disk index when search_list equal to limit (#21114) Signed-off-by: xige-16 Signed-off-by: xige-16 --- internal/core/src/index/VectorDiskIndex.cpp | 2 +- internal/core/unittest/test_indexing.cpp | 69 ++++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/internal/core/src/index/VectorDiskIndex.cpp b/internal/core/src/index/VectorDiskIndex.cpp index 13da0c6de1..106363268d 100644 --- a/internal/core/src/index/VectorDiskIndex.cpp +++ b/internal/core/src/index/VectorDiskIndex.cpp @@ -112,7 +112,7 @@ VectorDiskAnnIndex::Query(const DatasetPtr dataset, const SearchInfo& search_ AssertInfo(search_list_size.has_value(), "param " + std::string(DISK_ANN_QUERY_LIST) + "is empty"); query_config.search_list_size = search_list_size.value(); - AssertInfo(query_config.search_list_size > topk, "search_list should be greater than topk"); + AssertInfo(query_config.search_list_size >= topk, "search_list should be greater than or equal to topk"); AssertInfo(query_config.search_list_size <= std::max(uint32_t(topk * 10), uint32_t(kSearchListMaxValue1)) && query_config.search_list_size <= uint32_t(kSearchListMaxValue2), "search_list should be less than max(topk*10, 200) and less than 65535"); diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 5e0a5bc603..605bcd17d5 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -422,4 +422,71 @@ TEST_P(IndexTest, BuildAndQuery) { if (!is_binary) { EXPECT_EQ(result->seg_offsets_[0], query_offset); } -} \ No newline at end of file +} + +//#ifdef BUILD_DISK_ANN +// TEST(Indexing, SearchDiskAnnWithInvalidParam) { +// int64_t NB = 10000; +// IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN; +// MetricType metric_type = knowhere::metric::L2; +// milvus::index::CreateIndexInfo create_index_info; +// create_index_info.index_type = index_type; +// create_index_info.metric_type = metric_type; +// create_index_info.field_type = milvus::DataType::VECTOR_FLOAT; +// +// StorageConfig storage_config = get_default_storage_config(); +// milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100}; +// milvus::storage::IndexMeta index_meta{3, 100, 1000, 1}; +// auto file_manager = +// std::make_shared(field_data_meta, index_meta, storage_config); +// auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, file_manager); +// +// auto build_conf = knowhere::Config{ +// {knowhere::meta::METRIC_TYPE, metric_type}, +// {knowhere::meta::DIM, std::to_string(DIM)}, +// {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)}, +// {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)}, +// {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)}, +// {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)}, +// }; +// +// // build disk ann index +// auto dataset = GenDataset(NB, metric_type, false); +// std::vector xb_data = dataset.get_col(milvus::FieldId(100)); +// knowhere::DatasetPtr xb_dataset = knowhere::GenDataset(NB, DIM, xb_data.data()); +// ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf)); +// +// // serialize and load disk index, disk index can only be search after loading for now +// auto binary_set = index->Serialize(milvus::Config{}); +// index.reset(); +// // clean local file dir +// file_manager.reset(); +// +// auto new_file_manager = +// std::make_shared(field_data_meta, index_meta, storage_config); +// auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(create_index_info, new_file_manager); +// auto vec_index = dynamic_cast(new_index.get()); +// std::vector index_files; +// for (auto& binary : binary_set.binary_map_) { +// index_files.emplace_back(binary.first); +// } +// auto load_conf = generate_load_conf(index_type, metric_type, NB); +// load_conf["index_files"] = index_files; +// vec_index->Load(binary_set, load_conf); +// EXPECT_EQ(vec_index->Count(), NB); +// +// // search disk index with search_list == limit +// int query_offset = 100; +// knowhere::DatasetPtr xq_dataset = knowhere::GenDataset(NQ, DIM, xb_data.data() + DIM * query_offset); +// +// milvus::SearchInfo search_info; +// search_info.topk_ = K; +// search_info.metric_type_ = metric_type; +// search_info.search_params_ = milvus::Config{ +// {knowhere::meta::METRIC_TYPE, metric_type}, +// {milvus::index::DISK_ANN_QUERY_LIST, K - 1}, +// }; +// EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr), std::runtime_error); +// // vec_index->Query(xq_dataset, search_info, nullptr); +//} +//#endif