mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 09:08:43 +08:00
enhance: pk binary range in sealed segment to use binary search (#45829)
issue: https://github.com/milvus-io/milvus/discussions/44935 pr: https://github.com/milvus-io/milvus/pull/45328 this pr is to improve pk range op --------- Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
parent
61cb29904a
commit
6c0a80d8c3
@ -168,6 +168,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
||||
if (!has_offset_input_ && is_pk_field_ &&
|
||||
segment_->type() == SegmentType::Sealed) {
|
||||
if (pk_type_ == DataType::VARCHAR) {
|
||||
return ExecRangeVisitorImplForPk<std::string_view>(context);
|
||||
} else {
|
||||
return ExecRangeVisitorImplForPk<int64_t>(context);
|
||||
}
|
||||
}
|
||||
|
||||
if (SegmentExpr::CanUseIndex() && !has_offset_input_) {
|
||||
return ExecRangeVisitorImplForIndex<T>();
|
||||
} else {
|
||||
@ -865,5 +874,46 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) {
|
||||
return res_vec;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForPk(EvalCtx& context) {
|
||||
typedef std::
|
||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||
PkInnerType;
|
||||
|
||||
if (!arg_inited_) {
|
||||
lower_arg_.SetValue<PkInnerType>(expr_->lower_val_);
|
||||
upper_arg_.SetValue<PkInnerType>(expr_->upper_val_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
auto real_batch_size = GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
cached_index_chunk_id_ = 0;
|
||||
cached_index_chunk_res_ = std::make_shared<TargetBitmap>(active_count_);
|
||||
auto cache_view = cached_index_chunk_res_->view();
|
||||
|
||||
PkType lower_pk = lower_arg_.GetValue<PkInnerType>();
|
||||
PkType upper_pk = upper_arg_.GetValue<PkInnerType>();
|
||||
segment_->pk_binary_range(op_ctx_,
|
||||
lower_pk,
|
||||
expr_->lower_inclusive_,
|
||||
upper_pk,
|
||||
expr_->upper_inclusive_,
|
||||
cache_view);
|
||||
}
|
||||
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
*cached_index_chunk_res_, current_data_global_pos_, real_batch_size);
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
} // namespace exec
|
||||
} // namespace milvus
|
||||
|
||||
@ -320,6 +320,10 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForArray(EvalCtx& context);
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForPk(EvalCtx& context);
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
||||
int64_t overflow_check_pos_{0};
|
||||
|
||||
@ -1441,6 +1441,54 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ChunkedSegmentSealedImpl::pk_binary_range(milvus::OpContext* op_ctx,
|
||||
const PkType& lower_pk,
|
||||
bool lower_inclusive,
|
||||
const PkType& upper_pk,
|
||||
bool upper_inclusive,
|
||||
BitsetTypeView& bitset) const {
|
||||
if (!is_sorted_by_pk_) {
|
||||
// For unsorted segments, use the InsertRecord's binary range search
|
||||
insert_record_.search_pk_binary_range(
|
||||
lower_pk, lower_inclusive, upper_pk, upper_inclusive, bitset);
|
||||
return;
|
||||
}
|
||||
|
||||
// For sorted segments, use binary search
|
||||
auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(pk_field_id.get() != -1, "Primary key is -1");
|
||||
auto pk_column = get_column(pk_field_id);
|
||||
AssertInfo(pk_column != nullptr, "primary key column not loaded");
|
||||
|
||||
switch (schema_->get_fields().at(pk_field_id).get_data_type()) {
|
||||
case DataType::INT64:
|
||||
search_sorted_pk_binary_range_impl<int64_t>(
|
||||
std::get<int64_t>(lower_pk),
|
||||
lower_inclusive,
|
||||
std::get<int64_t>(upper_pk),
|
||||
upper_inclusive,
|
||||
pk_column,
|
||||
bitset);
|
||||
break;
|
||||
case DataType::VARCHAR:
|
||||
search_sorted_pk_binary_range_impl<std::string>(
|
||||
std::get<std::string>(lower_pk),
|
||||
lower_inclusive,
|
||||
std::get<std::string>(upper_pk),
|
||||
upper_inclusive,
|
||||
pk_column,
|
||||
bitset);
|
||||
break;
|
||||
default:
|
||||
ThrowInfo(
|
||||
DataTypeInvalid,
|
||||
fmt::format(
|
||||
"unsupported type {}",
|
||||
schema_->get_fields().at(pk_field_id).get_data_type()));
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::vector<OffsetMap::OffsetType>, bool>
|
||||
ChunkedSegmentSealedImpl::find_first(int64_t limit,
|
||||
const BitsetType& bitset) const {
|
||||
|
||||
@ -226,6 +226,14 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
||||
const PkType& pk,
|
||||
BitsetTypeView& bitset) const;
|
||||
|
||||
void
|
||||
pk_binary_range(milvus::OpContext* op_ctx,
|
||||
const PkType& lower_pk,
|
||||
bool lower_inclusive,
|
||||
const PkType& upper_pk,
|
||||
bool upper_inclusive,
|
||||
BitsetTypeView& bitset) const override;
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
get_vector(milvus::OpContext* op_ctx,
|
||||
FieldId field_id,
|
||||
@ -420,9 +428,7 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
||||
auto end_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) +
|
||||
last_in_chunk_offset;
|
||||
|
||||
for (int64_t idx = start_idx; idx <= end_idx; idx++) {
|
||||
bitset[idx] = true;
|
||||
}
|
||||
bitset.set(start_idx, end_idx - start_idx + 1, true);
|
||||
}
|
||||
} else if (op == proto::plan::OpType::GreaterEqual ||
|
||||
op == proto::plan::OpType::GreaterThan) {
|
||||
@ -479,6 +485,80 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PK>
|
||||
void
|
||||
search_sorted_pk_binary_range_impl(
|
||||
const PK& lower_val,
|
||||
bool lower_inclusive,
|
||||
const PK& upper_val,
|
||||
bool upper_inclusive,
|
||||
const std::shared_ptr<ChunkedColumnInterface>& pk_column,
|
||||
BitsetTypeView& bitset) const {
|
||||
const auto num_chunk = pk_column->num_chunks();
|
||||
if (num_chunk == 0) {
|
||||
return;
|
||||
}
|
||||
auto all_chunk_pins = pk_column->GetAllChunks(nullptr);
|
||||
|
||||
// Find the lower bound position (first value >= lower_val or > lower_val)
|
||||
auto [lower_chunk_id, lower_in_chunk_offset, lower_exact_match] =
|
||||
this->pk_lower_bound<PK>(
|
||||
lower_val, pk_column.get(), all_chunk_pins, 0);
|
||||
|
||||
int64_t start_idx = 0;
|
||||
if (lower_chunk_id != -1) {
|
||||
start_idx = pk_column->GetNumRowsUntilChunk(lower_chunk_id) +
|
||||
lower_in_chunk_offset;
|
||||
// If lower_inclusive is false and we found an exact match, skip all equal values
|
||||
if (!lower_inclusive && lower_exact_match) {
|
||||
auto [last_chunk_id, last_in_chunk_offset] =
|
||||
this->find_last_pk_position<PK>(lower_val,
|
||||
pk_column.get(),
|
||||
all_chunk_pins,
|
||||
lower_chunk_id,
|
||||
lower_in_chunk_offset);
|
||||
start_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) +
|
||||
last_in_chunk_offset + 1;
|
||||
}
|
||||
} else {
|
||||
// lower_val is greater than all values, no results
|
||||
return;
|
||||
}
|
||||
|
||||
// Find the upper bound position (first value >= upper_val or > upper_val)
|
||||
auto [upper_chunk_id, upper_in_chunk_offset, upper_exact_match] =
|
||||
this->pk_lower_bound<PK>(
|
||||
upper_val, pk_column.get(), all_chunk_pins, 0);
|
||||
|
||||
int64_t end_idx = 0;
|
||||
if (upper_chunk_id == -1) {
|
||||
// upper_val is greater than all values, include all from start_idx to end
|
||||
end_idx = bitset.size();
|
||||
} else {
|
||||
// If upper_inclusive is true and we found an exact match, include all equal values
|
||||
if (upper_inclusive && upper_exact_match) {
|
||||
auto [last_chunk_id, last_in_chunk_offset] =
|
||||
this->find_last_pk_position<PK>(upper_val,
|
||||
pk_column.get(),
|
||||
all_chunk_pins,
|
||||
upper_chunk_id,
|
||||
upper_in_chunk_offset);
|
||||
end_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) +
|
||||
last_in_chunk_offset + 1;
|
||||
} else {
|
||||
// upper_inclusive is false or no exact match
|
||||
// In both cases, end at the position of first value >= upper_val
|
||||
end_idx = pk_column->GetNumRowsUntilChunk(upper_chunk_id) +
|
||||
upper_in_chunk_offset;
|
||||
}
|
||||
}
|
||||
|
||||
// Set bits from start_idx to end_idx - 1
|
||||
if (start_idx < end_idx) {
|
||||
bitset.set(start_idx, end_idx - start_idx, true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PK>
|
||||
void
|
||||
search_pks_with_two_pointers_impl(
|
||||
|
||||
@ -514,6 +514,33 @@ class InsertRecordSealed {
|
||||
pk2offset_->find_range(pk, op, bitset, condition);
|
||||
}
|
||||
|
||||
void
|
||||
search_pk_binary_range(const PkType& lower_pk,
|
||||
bool lower_inclusive,
|
||||
const PkType& upper_pk,
|
||||
bool upper_inclusive,
|
||||
BitsetTypeView& bitset) const {
|
||||
auto lower_op = lower_inclusive ? proto::plan::OpType::GreaterEqual
|
||||
: proto::plan::OpType::GreaterThan;
|
||||
auto upper_op = upper_inclusive ? proto::plan::OpType::LessEqual
|
||||
: proto::plan::OpType::LessThan;
|
||||
|
||||
BitsetType upper_result(bitset.size());
|
||||
auto upper_view = upper_result.view();
|
||||
|
||||
// values >= lower_pk (or > lower_pk if not inclusive)
|
||||
pk2offset_->find_range(
|
||||
lower_pk, lower_op, bitset, [](int64_t offset) { return true; });
|
||||
|
||||
// values <= upper_pk (or < upper_pk if not inclusive)
|
||||
pk2offset_->find_range(
|
||||
upper_pk, upper_op, upper_view, [](int64_t offset) {
|
||||
return true;
|
||||
});
|
||||
|
||||
bitset &= upper_result;
|
||||
}
|
||||
|
||||
void
|
||||
insert_pks(milvus::DataType data_type, ChunkedColumnInterface* data) {
|
||||
std::lock_guard lck(shared_mutex_);
|
||||
|
||||
@ -39,6 +39,17 @@ class SegmentGrowing : public SegmentInternalInterface {
|
||||
return SegmentType::Growing;
|
||||
}
|
||||
|
||||
void
|
||||
pk_binary_range(milvus::OpContext* op_ctx,
|
||||
const PkType& lower_pk,
|
||||
bool lower_inclusive,
|
||||
const PkType& upper_pk,
|
||||
bool upper_inclusive,
|
||||
BitsetTypeView& bitset) const override {
|
||||
ThrowInfo(ErrorCode::Unsupported,
|
||||
"pk_binary_range is not supported for growing segment");
|
||||
}
|
||||
|
||||
// virtual int64_t
|
||||
// PreDelete(int64_t size) = 0;
|
||||
|
||||
|
||||
@ -649,6 +649,14 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
const PkType& pk,
|
||||
BitsetTypeView& bitset) const = 0;
|
||||
|
||||
virtual void
|
||||
pk_binary_range(milvus::OpContext* op_ctx,
|
||||
const PkType& lower_pk,
|
||||
bool lower_inclusive,
|
||||
const PkType& upper_pk,
|
||||
bool upper_inclusive,
|
||||
BitsetTypeView& bitset) const = 0;
|
||||
|
||||
virtual GEOSContextHandle_t
|
||||
get_ctx() const {
|
||||
return ctx_;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user