From 017eb9ffe27db5ba88467be46f484cfcb126c43f Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 24 Jun 2025 17:10:42 +0800 Subject: [PATCH] fix: fix some bugs discovered by chaos tests --- cherry pick (#42909) master: https://github.com/milvus-io/milvus/pull/42906 issue: https://github.com/milvus-io/milvus/issues/42870 --------- Signed-off-by: SpadeA --- .../core/src/index/InvertedIndexTantivy.cpp | 6 +-- internal/core/src/index/JsonInvertedIndex.h | 4 +- .../src/index/JsonKeyStatsInvertedIndex.cpp | 8 +-- .../src/index/JsonKeyStatsInvertedIndex.h | 2 +- internal/core/src/index/TextMatchIndex.cpp | 8 +-- internal/core/src/index/TextMatchIndex.h | 2 +- internal/core/src/index/Utils.h | 22 +++++++- .../src/segcore/ChunkedSegmentSealedImpl.cpp | 2 +- .../core/src/segcore/SegmentGrowingImpl.cpp | 4 +- .../core/src/segcore/SegmentSealedImpl.cpp | 2 +- .../core/thirdparty/tantivy/jieba_demo.cpp | 2 +- .../tantivy-binding/src/index_writer.rs | 4 +- .../tantivy-binding/src/index_writer_c.rs | 51 ++++++++++--------- .../tantivy/tantivy-binding/src/util.rs | 4 +- .../core/thirdparty/tantivy/tantivy-wrapper.h | 8 +-- .../core/thirdparty/tantivy/text_demo.cpp | 2 +- internal/core/unittest/test_expr.cpp | 8 +-- internal/core/unittest/test_json_index.cpp | 4 +- .../unittest/test_json_key_stats_index.cpp | 2 +- internal/core/unittest/test_text_match.cpp | 2 +- 20 files changed, 85 insertions(+), 62 deletions(-) diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index ddadcfc41b..9c57d0b077 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -254,8 +254,8 @@ InvertedIndexTantivy::Load(milvus::tracer::TraceContext ctx, inverted_index_files.end()); disk_file_manager_->CacheIndexToDisk(inverted_index_files); path_ = prefix; - wrapper_ = std::make_shared(prefix.c_str(), - milvus::index::SetBitset); + wrapper_ = std::make_shared( + prefix.c_str(), milvus::index::SetBitsetSealed); } template @@ -482,7 +482,7 @@ InvertedIndexTantivy::BuildWithRawDataForUT(size_t n, static_cast(values), n); } } - wrapper_->create_reader(); + wrapper_->create_reader(milvus::index::SetBitsetSealed); finish(); wrapper_->reload(); } diff --git a/internal/core/src/index/JsonInvertedIndex.h b/internal/core/src/index/JsonInvertedIndex.h index da1e352c11..23d0a8c6a4 100644 --- a/internal/core/src/index/JsonInvertedIndex.h +++ b/internal/core/src/index/JsonInvertedIndex.h @@ -108,8 +108,8 @@ class JsonInvertedIndex : public index::InvertedIndexTantivy { } void - create_reader() { - this->wrapper_->create_reader(); + create_reader(SetBitsetFn set_bitset) { + this->wrapper_->create_reader(set_bitset); } bool diff --git a/internal/core/src/index/JsonKeyStatsInvertedIndex.cpp b/internal/core/src/index/JsonKeyStatsInvertedIndex.cpp index b242d1b3eb..f7a6cad418 100644 --- a/internal/core/src/index/JsonKeyStatsInvertedIndex.cpp +++ b/internal/core/src/index/JsonKeyStatsInvertedIndex.cpp @@ -386,8 +386,8 @@ JsonKeyStatsInvertedIndex::Load(milvus::tracer::TraceContext ctx, disk_file_manager_->CacheJsonKeyIndexToDisk(index_files.value()); AssertInfo( tantivy_index_exist(path_.c_str()), "index not exist: {}", path_); - wrapper_ = std::make_shared(path_.c_str(), - milvus::index::SetBitset); + wrapper_ = std::make_shared( + path_.c_str(), milvus::index::SetBitsetSealed); LOG_INFO("load json key index done for field id:{} with dir:{}", field_id_, path_); @@ -497,8 +497,8 @@ JsonKeyStatsInvertedIndex::Reload() { } void -JsonKeyStatsInvertedIndex::CreateReader() { - wrapper_->create_reader(); +JsonKeyStatsInvertedIndex::CreateReader(SetBitsetFn set_bitset) { + wrapper_->create_reader(set_bitset); } } // namespace milvus::index diff --git a/internal/core/src/index/JsonKeyStatsInvertedIndex.h b/internal/core/src/index/JsonKeyStatsInvertedIndex.h index 9b3b57c777..a60430c0b8 100644 --- a/internal/core/src/index/JsonKeyStatsInvertedIndex.h +++ b/internal/core/src/index/JsonKeyStatsInvertedIndex.h @@ -133,7 +133,7 @@ class JsonKeyStatsInvertedIndex : public InvertedIndexTantivy { Reload(); void - CreateReader(); + CreateReader(SetBitsetFn set_bitset); bool has_escape_sequence(const std::string& str) { diff --git a/internal/core/src/index/TextMatchIndex.cpp b/internal/core/src/index/TextMatchIndex.cpp index 6b991d0813..9b29602ab8 100644 --- a/internal/core/src/index/TextMatchIndex.cpp +++ b/internal/core/src/index/TextMatchIndex.cpp @@ -149,8 +149,8 @@ TextMatchIndex::Load(const Config& config) { disk_file_manager_->CacheTextLogToDisk(files_value); AssertInfo( tantivy_index_exist(prefix.c_str()), "index not exist: {}", prefix); - wrapper_ = std::make_shared(prefix.c_str(), - milvus::index::SetBitset); + wrapper_ = std::make_shared( + prefix.c_str(), milvus::index::SetBitsetSealed); } void @@ -269,8 +269,8 @@ TextMatchIndex::Reload() { } void -TextMatchIndex::CreateReader() { - wrapper_->create_reader(); +TextMatchIndex::CreateReader(SetBitsetFn set_bitset) { + wrapper_->create_reader(set_bitset); } void diff --git a/internal/core/src/index/TextMatchIndex.h b/internal/core/src/index/TextMatchIndex.h index 2cc6c98b10..e9d6f9d47c 100644 --- a/internal/core/src/index/TextMatchIndex.h +++ b/internal/core/src/index/TextMatchIndex.h @@ -74,7 +74,7 @@ class TextMatchIndex : public InvertedIndexTantivy { public: void - CreateReader(); + CreateReader(SetBitsetFn set_bitset); void RegisterTokenizer(const char* tokenizer_name, const char* analyzer_params); diff --git a/internal/core/src/index/Utils.h b/internal/core/src/index/Utils.h index bdc9451c42..97067b48ea 100644 --- a/internal/core/src/index/Utils.h +++ b/internal/core/src/index/Utils.h @@ -199,13 +199,31 @@ CheckAndUpdateKnowhereRangeSearchParam(const SearchInfo& search_info, const MetricType& metric_type, knowhere::Json& search_config); -void inline SetBitset(void* bitset, const uint32_t* doc_id, uintptr_t n) { +// For sealed segment, the doc_id is guaranteed to be less than bitset size which equals to the doc count of tantivy before querying. +void inline SetBitsetSealed(void* bitset, const uint32_t* doc_id, uintptr_t n) { TargetBitmap* bitmap = static_cast(bitset); + const auto bitmap_size = bitmap->size(); for (uintptr_t i = 0; i < n; ++i) { - assert(doc_id[i] < bitmap->size()); + assert(doc_id[i] < bitmap_size); (*bitmap)[doc_id[i]] = true; } } +// For growing segment, concurrent insert exists, so the doc_id may exceed bitset size. +void inline SetBitsetGrowing(void* bitset, + const uint32_t* doc_id, + uintptr_t n) { + TargetBitmap* bitmap = static_cast(bitset); + const auto bitmap_size = bitmap->size(); + + for (uintptr_t i = 0; i < n; ++i) { + const auto id = doc_id[i]; + if (id >= bitmap_size) { + // Ideally, the doc_id is sorted and we can return directly. But I don't want to have this strong guarantee. + continue; + } + (*bitmap)[id] = true; + } +} } // namespace milvus::index diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index 81b0fa5264..0b08fcc1e7 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -1523,7 +1523,7 @@ ChunkedSegmentSealedImpl::CreateTextIndex(FieldId field_id) { } // create index reader. - index->CreateReader(); + index->CreateReader(milvus::index::SetBitsetSealed); // release index writer. index->Finish(); diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index fd62caaa00..fc60f6bd28 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -894,7 +894,7 @@ SegmentGrowingImpl::CreateTextIndex(FieldId field_id) { "milvus_tokenizer", field_meta.get_analyzer_params().c_str()); index->Commit(); - index->CreateReader(); + index->CreateReader(milvus::index::SetBitsetGrowing); index->RegisterTokenizer("milvus_tokenizer", field_meta.get_analyzer_params().c_str()); text_indexes_[field_id] = std::move(index); @@ -955,7 +955,7 @@ SegmentGrowingImpl::CreateJSONIndex(FieldId field_id) { JSON_KEY_STATS_COMMIT_INTERVAL, unique_id.c_str()); index->Commit(); - index->CreateReader(); + index->CreateReader(milvus::index::SetBitsetGrowing); json_indexes_[field_id] = std::move(index); } diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 4b282aeafc..daf1487416 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -2138,7 +2138,7 @@ SegmentSealedImpl::CreateTextIndex(FieldId field_id) { } // create index reader. - index->CreateReader(); + index->CreateReader(milvus::index::SetBitsetGrowing); // release index writer. index->Finish(); diff --git a/internal/core/thirdparty/tantivy/jieba_demo.cpp b/internal/core/thirdparty/tantivy/jieba_demo.cpp index 0c4428238a..43fe3eed20 100644 --- a/internal/core/thirdparty/tantivy/jieba_demo.cpp +++ b/internal/core/thirdparty/tantivy/jieba_demo.cpp @@ -37,7 +37,7 @@ main(int argc, char* argv[]) { text_index.commit(); } - text_index.create_reader(); + text_index.create_reader(milvus::index::SetBitsetGrowing); text_index.register_tokenizer(tokenizer_name.c_str(), analyzer_params); { diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs index dfc7ae8ba7..c1c2fbdba6 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs @@ -1,3 +1,4 @@ +use core::slice; use std::ffi::CStr; use std::sync::Arc; @@ -10,6 +11,7 @@ use tantivy::schema::{ }; use tantivy::{doc, Document, Index, IndexWriter, SingleSegmentIndexWriter, UserOperation}; +use crate::convert_to_rust_slice; use crate::data_type::TantivyDataType; use crate::error::{Result, TantivyBindingError}; @@ -196,7 +198,7 @@ impl IndexWriterWrapper { .to_str() .map_err(|e| TantivyBindingError::InternalError(e.to_string()))?; let json_offsets = - unsafe { std::slice::from_raw_parts(*json_offsets, *json_offsets_len) }; + unsafe { convert_to_rust_slice!(*json_offsets, *json_offsets_len) }; for offset in json_offsets { batch.push(UserOperation::Add(doc!( diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs index 573f1135cc..09127d56ce 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs @@ -11,6 +11,7 @@ use crate::{ util::{create_binding, free_binding}, }; +#[macro_export] macro_rules! convert_to_rust_slice { ($arr: expr, $len: expr) => { match $arr { @@ -127,7 +128,7 @@ pub extern "C" fn tantivy_index_add_int8s( offset_begin: i64, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i8, &mut (*real)).into() } } @@ -138,7 +139,7 @@ pub extern "C" fn tantivy_index_add_int8s_by_single_segment_writer( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute_by_single_segment_writer( arr, @@ -157,7 +158,7 @@ pub extern "C" fn tantivy_index_add_int16s( offset_begin: i64, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i16, &mut (*real)).into() } } @@ -168,7 +169,7 @@ pub extern "C" fn tantivy_index_add_int16s_by_single_segment_writer( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute_by_single_segment_writer( arr, @@ -187,7 +188,7 @@ pub extern "C" fn tantivy_index_add_int32s( offset_begin: i64, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i32, &mut (*real)).into() } } @@ -198,7 +199,7 @@ pub extern "C" fn tantivy_index_add_int32s_by_single_segment_writer( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute_by_single_segment_writer( arr, @@ -217,7 +218,7 @@ pub extern "C" fn tantivy_index_add_int64s( offset_begin: i64, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i64, &mut (*real)).into() } } @@ -229,7 +230,7 @@ pub extern "C" fn tantivy_index_add_int64s_by_single_segment_writer( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute_by_single_segment_writer( @@ -249,7 +250,7 @@ pub extern "C" fn tantivy_index_add_f32s( offset_begin: i64, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f32, &mut (*real)).into() } } @@ -260,7 +261,7 @@ pub extern "C" fn tantivy_index_add_f32s_by_single_segment_writer( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute_by_single_segment_writer( arr, @@ -279,7 +280,7 @@ pub extern "C" fn tantivy_index_add_f64s( offset_begin: i64, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f64, &mut (*real)).into() } } @@ -290,7 +291,7 @@ pub extern "C" fn tantivy_index_add_f64s_by_single_segment_writer( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute_by_single_segment_writer( arr, @@ -309,7 +310,7 @@ pub extern "C" fn tantivy_index_add_bools( offset_begin: i64, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute( arr, @@ -330,9 +331,9 @@ pub extern "C" fn tantivy_index_add_json_key_stats_data_by_batch( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let json_offsets_len = unsafe { slice::from_raw_parts(json_offsets_len, len) }; - let json_offsets = unsafe { slice::from_raw_parts(json_offsets, len) }; - let keys = unsafe { slice::from_raw_parts(keys, len) }; + let json_offsets_len = unsafe { convert_to_rust_slice!(json_offsets_len, len) }; + let json_offsets = unsafe { convert_to_rust_slice!(json_offsets, len) }; + let keys = unsafe { convert_to_rust_slice!(keys, len) }; unsafe { (*real) .add_json_key_stats(keys, json_offsets, json_offsets_len) @@ -347,7 +348,7 @@ pub extern "C" fn tantivy_index_add_bools_by_single_segment_writer( len: usize, ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let arr = unsafe { slice::from_raw_parts(array, len) }; + let arr = unsafe { convert_to_rust_slice!(array, len) }; unsafe { execute_by_single_segment_writer( arr, @@ -392,7 +393,7 @@ pub extern "C" fn tantivy_index_add_multi_int8s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i8s(arr, offset).into() } } @@ -419,7 +420,7 @@ pub extern "C" fn tantivy_index_add_multi_int16s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i16s(arr, offset).into() } } @@ -446,7 +447,7 @@ pub extern "C" fn tantivy_index_add_multi_int32s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i32s(arr, offset).into() } } @@ -473,7 +474,7 @@ pub extern "C" fn tantivy_index_add_multi_int64s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i64s(arr, offset).into() } } @@ -500,7 +501,7 @@ pub extern "C" fn tantivy_index_add_multi_f32s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_f32s(arr, offset).into() } } @@ -527,7 +528,7 @@ pub extern "C" fn tantivy_index_add_multi_f64s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_f64s(arr, offset).into() } } @@ -554,7 +555,7 @@ pub extern "C" fn tantivy_index_add_multi_bools( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_bools(arr, offset).into() } } @@ -581,7 +582,7 @@ pub extern "C" fn tantivy_index_add_multi_keywords( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_keywords(arr, offset).into() } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs index 9326c2effb..cb489f04b8 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs @@ -1,3 +1,5 @@ +use crate::convert_to_rust_slice; +use core::slice; use std::ffi::c_void; use std::ops::Bound; use tantivy::{directory::MmapDirectory, Index}; @@ -31,6 +33,6 @@ pub fn free_binding(ptr: *mut c_void) { #[cfg(test)] pub extern "C" fn set_bitset(bitset: *mut c_void, doc_id: *const u32, len: usize) { let bitset = unsafe { &mut *(bitset as *mut Vec) }; - let docs = unsafe { std::slice::from_raw_parts(doc_id, len) }; + let docs = unsafe { convert_to_rust_slice!(doc_id, len) }; bitset.extend_from_slice(docs); } diff --git a/internal/core/thirdparty/tantivy/tantivy-wrapper.h b/internal/core/thirdparty/tantivy/tantivy-wrapper.h index 10fd055748..b7128262f2 100644 --- a/internal/core/thirdparty/tantivy/tantivy-wrapper.h +++ b/internal/core/thirdparty/tantivy/tantivy-wrapper.h @@ -144,10 +144,10 @@ struct TantivyIndexWrapper { } // create reader. void - create_reader() { + create_reader(SetBitsetFn set_bitset) { if (writer_ != nullptr) { - auto res = RustResultWrapper(tantivy_create_reader_from_writer( - writer_, milvus::index::SetBitset)); + auto res = RustResultWrapper( + tantivy_create_reader_from_writer(writer_, set_bitset)); AssertInfo(res.result_->success, "failed to create reader from writer: {}", res.result_->error); @@ -155,7 +155,7 @@ struct TantivyIndexWrapper { } else if (!path_.empty()) { assert(tantivy_index_exist(path_.c_str())); auto res = RustResultWrapper( - tantivy_load_index(path_.c_str(), milvus::index::SetBitset)); + tantivy_load_index(path_.c_str(), set_bitset)); AssertInfo(res.result_->success, "failed to load index: {}", res.result_->error); diff --git a/internal/core/thirdparty/tantivy/text_demo.cpp b/internal/core/thirdparty/tantivy/text_demo.cpp index c084319f0a..42fad71fb9 100644 --- a/internal/core/thirdparty/tantivy/text_demo.cpp +++ b/internal/core/thirdparty/tantivy/text_demo.cpp @@ -31,7 +31,7 @@ main(int argc, char* argv[]) { text_index.commit(); } - text_index.create_reader(); + text_index.create_reader(milvus::index::SetBitsetSealed); { auto result = to_set(text_index.match_query("football")); assert(result.size() == 2); diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index a4b57feabf..356dc28d34 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -16617,7 +16617,7 @@ TYPED_TEST(JsonIndexTestFixture, TestJsonIndexUnaryExpr) { json_index->BuildWithFieldData({json_field}); json_index->finish(); - json_index->create_reader(); + json_index->create_reader(milvus::index::SetBitsetSealed); load_index_info.field_id = json_fid.get(); load_index_info.field_type = DataType::JSON; @@ -16745,7 +16745,7 @@ TEST(JsonIndexTest, TestJsonNotEqualExpr) { json_index->BuildWithFieldData({json_field, json_field2}); json_index->finish(); - json_index->create_reader(); + json_index->create_reader(milvus::index::SetBitsetSealed); load_index_info.field_id = json_fid.get(); load_index_info.field_type = DataType::JSON; @@ -16845,7 +16845,7 @@ TEST_P(JsonIndexExistsTest, TestExistsExpr) { json_index->BuildWithFieldData({json_field}); json_index->finish(); - json_index->create_reader(); + json_index->create_reader(milvus::index::SetBitsetSealed); load_index_info.field_id = json_fid.get(); load_index_info.field_type = DataType::JSON; @@ -17021,7 +17021,7 @@ TEST_P(JsonIndexBinaryExprTest, TestBinaryRangeExpr) { json_index->BuildWithFieldData({json_field}); json_index->finish(); - json_index->create_reader(); + json_index->create_reader(milvus::index::SetBitsetSealed); load_index_info.field_id = json_fid.get(); load_index_info.field_type = DataType::JSON; diff --git a/internal/core/unittest/test_json_index.cpp b/internal/core/unittest/test_json_index.cpp index 3fa5bcb20d..e7d3731733 100644 --- a/internal/core/unittest/test_json_index.cpp +++ b/internal/core/unittest/test_json_index.cpp @@ -134,7 +134,7 @@ TEST(JsonIndexTest, TestJsonContains) { json_field->add_json_data(jsons); json_index->BuildWithFieldData({json_field}); json_index->finish(); - json_index->create_reader(); + json_index->create_reader(milvus::index::SetBitsetSealed); auto segment = segcore::CreateSealedSegment(schema); segcore::LoadIndexInfo load_index_info; @@ -226,7 +226,7 @@ TEST(JsonIndexTest, TestJsonCast) { json_field->add_json_data(jsons); json_index->BuildWithFieldData({json_field}); json_index->finish(); - json_index->create_reader(); + json_index->create_reader(milvus::index::SetBitsetSealed); auto segment = segcore::CreateSealedSegment(schema); segcore::LoadIndexInfo load_index_info; diff --git a/internal/core/unittest/test_json_key_stats_index.cpp b/internal/core/unittest/test_json_key_stats_index.cpp index ab6fe72be9..704225766f 100644 --- a/internal/core/unittest/test_json_key_stats_index.cpp +++ b/internal/core/unittest/test_json_key_stats_index.cpp @@ -561,7 +561,7 @@ TEST(GrowingJsonKeyStatsIndexTest, GrowingIndex) { for (const auto& jsonData : jsonDatas) { jsons.push_back(milvus::Json(simdjson::padded_string(jsonData))); } - index->CreateReader(); + index->CreateReader(milvus::index::SetBitsetGrowing); index->AddJSONDatas(jsonDatas.size(), jsonDatas.data(), nullptr, 0); index->Commit(); index->Reload(); diff --git a/internal/core/unittest/test_text_match.cpp b/internal/core/unittest/test_text_match.cpp index 600bb5d049..787c0f064f 100644 --- a/internal/core/unittest/test_text_match.cpp +++ b/internal/core/unittest/test_text_match.cpp @@ -134,7 +134,7 @@ TEST(TextMatch, Index) { "unique_id", "milvus_tokenizer", "{}"); - index->CreateReader(); + index->CreateReader(milvus::index::SetBitsetGrowing); index->AddText("football, basketball, pingpang", true, 0); index->AddText("", false, 1); index->AddText("swimming, football", true, 2);