From c5918ffbdb131e6f5b9bf6fca54f98769eac7824 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Thu, 23 May 2024 14:11:42 +0800 Subject: [PATCH] enhance: mark sparse inverted index as mmap-able (#33281) issue: #29419 Signed-off-by: Buqian Zheng --- internal/core/src/index/Index.h | 5 ++++- internal/core/unittest/test_indexing.cpp | 2 +- internal/core/unittest/test_utils/DataGen.h | 12 ++++++++++++ pkg/util/indexparamcheck/index_type.go | 4 +++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 9381ee74ad..2f3da4be14 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -81,7 +81,10 @@ class IndexBase { index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFSQ8 || index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT || index_type_ == knowhere::IndexEnum::INDEX_FAISS_IDMAP || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP; + index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP || + index_type_ == + knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX || + index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND; } const IndexType& diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 4cdf94420b..c02f427736 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -570,7 +570,7 @@ TEST_P(IndexTest, Mmap) { load_conf["mmap_filepath"] = "mmap/test_index_mmap_" + index_type; vec_index->Load(milvus::tracer::TraceContext{}, load_conf); EXPECT_EQ(vec_index->Count(), NB); - EXPECT_EQ(vec_index->GetDim(), DIM); + EXPECT_EQ(vec_index->GetDim(), is_sparse ? kTestSparseDim : DIM); milvus::SearchInfo search_info; search_info.topk_ = K; diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index 7566c63757..283ccbec3c 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -259,6 +259,18 @@ GenerateRandomSparseFloatVector(size_t rows, std::vector> data(rows); + // ensure the actual dim of the entire generated dataset is cols. + data[0][cols - 1] = real_distrib(rng); + --num_elements; + + // Ensure each row has at least one non-zero value + for (size_t i = 0; i < rows; ++i) { + auto col = col_distrib(rng); + float val = real_distrib(rng); + data[i][col] = val; + } + num_elements -= rows; + for (int32_t i = 0; i < num_elements; ++i) { auto row = row_distrib(rng); while (data[row].size() == (size_t)cols) { diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index e752057ea4..a20db560bf 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -57,7 +57,9 @@ func IsMmapSupported(indexType IndexType) bool { indexType == IndexFaissBinIDMap || indexType == IndexFaissBinIvfFlat || indexType == IndexHNSW || - indexType == IndexScaNN + indexType == IndexScaNN || + indexType == IndexSparseInverted || + indexType == IndexSparseWand } func IsDiskIndex(indexType IndexType) bool {