mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
fix:fix text_match bug because of not adapting to multi-chunk model (#43303)
https://github.com/milvus-io/milvus/issues/43296 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
df8ceb123b
commit
ee43954534
@ -1198,43 +1198,6 @@ class SegmentExpr : public Expr {
|
||||
return valid_result;
|
||||
}
|
||||
|
||||
template <typename FUNC, typename... ValTypes>
|
||||
VectorPtr
|
||||
ProcessTextMatchIndex(FUNC func, ValTypes... values) {
|
||||
TargetBitmap result;
|
||||
TargetBitmap valid_result;
|
||||
|
||||
if (cached_match_res_ == nullptr) {
|
||||
auto index = segment_->GetTextIndex(field_id_);
|
||||
auto res = std::move(func(index, values...));
|
||||
auto valid_res = index->IsNotNull();
|
||||
cached_match_res_ = std::make_shared<TargetBitmap>(std::move(res));
|
||||
cached_index_chunk_valid_res_ = std::move(valid_res);
|
||||
if (cached_match_res_->size() < active_count_) {
|
||||
// some entities are not visible in inverted index.
|
||||
// only happend on growing segment.
|
||||
TargetBitmap tail(active_count_ - cached_match_res_->size());
|
||||
cached_match_res_->append(tail);
|
||||
cached_index_chunk_valid_res_.append(tail);
|
||||
}
|
||||
}
|
||||
|
||||
// return batch size, not sure if we should use the data position.
|
||||
auto real_batch_size =
|
||||
(current_data_chunk_pos_ + batch_size_ > active_count_)
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
result.append(
|
||||
*cached_match_res_, current_data_chunk_pos_, real_batch_size);
|
||||
valid_result.append(cached_index_chunk_valid_res_,
|
||||
current_data_chunk_pos_,
|
||||
real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
std::move(valid_result));
|
||||
}
|
||||
|
||||
template <typename T, typename FUNC, typename... ValTypes>
|
||||
void
|
||||
ProcessIndexChunksV2(FUNC func, ValTypes... values) {
|
||||
|
||||
@ -1928,8 +1928,36 @@ PhyUnaryRangeFilterExpr::ExecTextMatch() {
|
||||
op_type);
|
||||
}
|
||||
};
|
||||
auto res = ProcessTextMatchIndex(func, query);
|
||||
return res;
|
||||
|
||||
auto real_batch_size = GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (cached_match_res_ == nullptr) {
|
||||
auto index = segment_->GetTextIndex(field_id_);
|
||||
auto res = std::move(func(index, query));
|
||||
auto valid_res = index->IsNotNull();
|
||||
cached_match_res_ = std::make_shared<TargetBitmap>(std::move(res));
|
||||
cached_index_chunk_valid_res_ = std::move(valid_res);
|
||||
if (cached_match_res_->size() < active_count_) {
|
||||
// some entities are not visible in inverted index.
|
||||
// only happend on growing segment.
|
||||
TargetBitmap tail(active_count_ - cached_match_res_->size());
|
||||
cached_match_res_->append(tail);
|
||||
cached_index_chunk_valid_res_.append(tail);
|
||||
}
|
||||
}
|
||||
|
||||
TargetBitmap result;
|
||||
TargetBitmap valid_result;
|
||||
result.append(*cached_match_res_, current_data_global_pos_, real_batch_size);
|
||||
valid_result.append(cached_index_chunk_valid_res_,
|
||||
current_data_global_pos_,
|
||||
real_batch_size);
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
std::move(valid_result));
|
||||
};
|
||||
|
||||
bool
|
||||
@ -1949,9 +1977,10 @@ PhyUnaryRangeFilterExpr::ExecNgramMatch() {
|
||||
}
|
||||
|
||||
auto literal = value_arg_.GetValue<std::string>();
|
||||
|
||||
TargetBitmap result;
|
||||
TargetBitmap valid_result;
|
||||
auto real_batch_size = GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (cached_ngram_match_res_ == nullptr) {
|
||||
auto pinned_index = segment_->GetNgramIndex(field_id_);
|
||||
@ -1969,19 +1998,16 @@ PhyUnaryRangeFilterExpr::ExecNgramMatch() {
|
||||
cached_index_chunk_valid_res_ = std::move(valid_res);
|
||||
}
|
||||
|
||||
auto real_batch_size =
|
||||
(current_data_chunk_pos_ + batch_size_ > active_count_)
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
TargetBitmap result;
|
||||
TargetBitmap valid_result;
|
||||
result.append(
|
||||
*cached_ngram_match_res_, current_data_chunk_pos_, real_batch_size);
|
||||
*cached_ngram_match_res_, current_data_global_pos_, real_batch_size);
|
||||
valid_result.append(cached_index_chunk_valid_res_,
|
||||
current_data_chunk_pos_,
|
||||
current_data_global_pos_,
|
||||
real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
|
||||
return std::optional<VectorPtr>(std::make_shared<ColumnVector>(
|
||||
std::move(result), std::move(valid_result)));
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
std::move(valid_result));
|
||||
}
|
||||
|
||||
} // namespace exec
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user