fix:fix text_match bug because of not adapting to multi-chunk model (#43303)

https://github.com/milvus-io/milvus/issues/43296

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2025-07-17 10:32:51 +08:00 committed by GitHub
parent df8ceb123b
commit ee43954534
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 41 additions and 52 deletions

View File

@ -1198,43 +1198,6 @@ class SegmentExpr : public Expr {
return valid_result;
}
template <typename FUNC, typename... ValTypes>
VectorPtr
ProcessTextMatchIndex(FUNC func, ValTypes... values) {
TargetBitmap result;
TargetBitmap valid_result;
if (cached_match_res_ == nullptr) {
auto index = segment_->GetTextIndex(field_id_);
auto res = std::move(func(index, values...));
auto valid_res = index->IsNotNull();
cached_match_res_ = std::make_shared<TargetBitmap>(std::move(res));
cached_index_chunk_valid_res_ = std::move(valid_res);
if (cached_match_res_->size() < active_count_) {
// some entities are not visible in inverted index.
// only happend on growing segment.
TargetBitmap tail(active_count_ - cached_match_res_->size());
cached_match_res_->append(tail);
cached_index_chunk_valid_res_.append(tail);
}
}
// return batch size, not sure if we should use the data position.
auto real_batch_size =
(current_data_chunk_pos_ + batch_size_ > active_count_)
? active_count_ - current_data_chunk_pos_
: batch_size_;
result.append(
*cached_match_res_, current_data_chunk_pos_, real_batch_size);
valid_result.append(cached_index_chunk_valid_res_,
current_data_chunk_pos_,
real_batch_size);
current_data_chunk_pos_ += real_batch_size;
return std::make_shared<ColumnVector>(std::move(result),
std::move(valid_result));
}
template <typename T, typename FUNC, typename... ValTypes>
void
ProcessIndexChunksV2(FUNC func, ValTypes... values) {

View File

@ -1928,8 +1928,36 @@ PhyUnaryRangeFilterExpr::ExecTextMatch() {
op_type);
}
};
auto res = ProcessTextMatchIndex(func, query);
return res;
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
if (cached_match_res_ == nullptr) {
auto index = segment_->GetTextIndex(field_id_);
auto res = std::move(func(index, query));
auto valid_res = index->IsNotNull();
cached_match_res_ = std::make_shared<TargetBitmap>(std::move(res));
cached_index_chunk_valid_res_ = std::move(valid_res);
if (cached_match_res_->size() < active_count_) {
// some entities are not visible in inverted index.
// only happend on growing segment.
TargetBitmap tail(active_count_ - cached_match_res_->size());
cached_match_res_->append(tail);
cached_index_chunk_valid_res_.append(tail);
}
}
TargetBitmap result;
TargetBitmap valid_result;
result.append(*cached_match_res_, current_data_global_pos_, real_batch_size);
valid_result.append(cached_index_chunk_valid_res_,
current_data_global_pos_,
real_batch_size);
MoveCursor();
return std::make_shared<ColumnVector>(std::move(result),
std::move(valid_result));
};
bool
@ -1949,9 +1977,10 @@ PhyUnaryRangeFilterExpr::ExecNgramMatch() {
}
auto literal = value_arg_.GetValue<std::string>();
TargetBitmap result;
TargetBitmap valid_result;
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
return std::nullopt;
}
if (cached_ngram_match_res_ == nullptr) {
auto pinned_index = segment_->GetNgramIndex(field_id_);
@ -1969,19 +1998,16 @@ PhyUnaryRangeFilterExpr::ExecNgramMatch() {
cached_index_chunk_valid_res_ = std::move(valid_res);
}
auto real_batch_size =
(current_data_chunk_pos_ + batch_size_ > active_count_)
? active_count_ - current_data_chunk_pos_
: batch_size_;
TargetBitmap result;
TargetBitmap valid_result;
result.append(
*cached_ngram_match_res_, current_data_chunk_pos_, real_batch_size);
*cached_ngram_match_res_, current_data_global_pos_, real_batch_size);
valid_result.append(cached_index_chunk_valid_res_,
current_data_chunk_pos_,
current_data_global_pos_,
real_batch_size);
current_data_chunk_pos_ += real_batch_size;
return std::optional<VectorPtr>(std::make_shared<ColumnVector>(
std::move(result), std::move(valid_result)));
MoveCursor();
return std::make_shared<ColumnVector>(std::move(result),
std::move(valid_result));
}
} // namespace exec