mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: pk binary range in sealed segment to use binary search (#45829)
issue: https://github.com/milvus-io/milvus/discussions/44935 pr: https://github.com/milvus-io/milvus/pull/45328 this pr is to improve pk range op --------- Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
parent
61cb29904a
commit
6c0a80d8c3
@ -168,6 +168,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
||||||
|
if (!has_offset_input_ && is_pk_field_ &&
|
||||||
|
segment_->type() == SegmentType::Sealed) {
|
||||||
|
if (pk_type_ == DataType::VARCHAR) {
|
||||||
|
return ExecRangeVisitorImplForPk<std::string_view>(context);
|
||||||
|
} else {
|
||||||
|
return ExecRangeVisitorImplForPk<int64_t>(context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (SegmentExpr::CanUseIndex() && !has_offset_input_) {
|
if (SegmentExpr::CanUseIndex() && !has_offset_input_) {
|
||||||
return ExecRangeVisitorImplForIndex<T>();
|
return ExecRangeVisitorImplForIndex<T>();
|
||||||
} else {
|
} else {
|
||||||
@ -865,5 +874,46 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) {
|
|||||||
return res_vec;
|
return res_vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
VectorPtr
|
||||||
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForPk(EvalCtx& context) {
|
||||||
|
typedef std::
|
||||||
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
|
PkInnerType;
|
||||||
|
|
||||||
|
if (!arg_inited_) {
|
||||||
|
lower_arg_.SetValue<PkInnerType>(expr_->lower_val_);
|
||||||
|
upper_arg_.SetValue<PkInnerType>(expr_->upper_val_);
|
||||||
|
arg_inited_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto real_batch_size = GetNextBatchSize();
|
||||||
|
if (real_batch_size == 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cached_index_chunk_id_ != 0) {
|
||||||
|
cached_index_chunk_id_ = 0;
|
||||||
|
cached_index_chunk_res_ = std::make_shared<TargetBitmap>(active_count_);
|
||||||
|
auto cache_view = cached_index_chunk_res_->view();
|
||||||
|
|
||||||
|
PkType lower_pk = lower_arg_.GetValue<PkInnerType>();
|
||||||
|
PkType upper_pk = upper_arg_.GetValue<PkInnerType>();
|
||||||
|
segment_->pk_binary_range(op_ctx_,
|
||||||
|
lower_pk,
|
||||||
|
expr_->lower_inclusive_,
|
||||||
|
upper_pk,
|
||||||
|
expr_->upper_inclusive_,
|
||||||
|
cache_view);
|
||||||
|
}
|
||||||
|
|
||||||
|
TargetBitmap result;
|
||||||
|
result.append(
|
||||||
|
*cached_index_chunk_res_, current_data_global_pos_, real_batch_size);
|
||||||
|
MoveCursor();
|
||||||
|
return std::make_shared<ColumnVector>(std::move(result),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace exec
|
} // namespace exec
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
|
|||||||
@ -320,6 +320,10 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForArray(EvalCtx& context);
|
ExecRangeVisitorImplForArray(EvalCtx& context);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
VectorPtr
|
||||||
|
ExecRangeVisitorImplForPk(EvalCtx& context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
||||||
int64_t overflow_check_pos_{0};
|
int64_t overflow_check_pos_{0};
|
||||||
|
|||||||
@ -1441,6 +1441,54 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ChunkedSegmentSealedImpl::pk_binary_range(milvus::OpContext* op_ctx,
|
||||||
|
const PkType& lower_pk,
|
||||||
|
bool lower_inclusive,
|
||||||
|
const PkType& upper_pk,
|
||||||
|
bool upper_inclusive,
|
||||||
|
BitsetTypeView& bitset) const {
|
||||||
|
if (!is_sorted_by_pk_) {
|
||||||
|
// For unsorted segments, use the InsertRecord's binary range search
|
||||||
|
insert_record_.search_pk_binary_range(
|
||||||
|
lower_pk, lower_inclusive, upper_pk, upper_inclusive, bitset);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For sorted segments, use binary search
|
||||||
|
auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||||
|
AssertInfo(pk_field_id.get() != -1, "Primary key is -1");
|
||||||
|
auto pk_column = get_column(pk_field_id);
|
||||||
|
AssertInfo(pk_column != nullptr, "primary key column not loaded");
|
||||||
|
|
||||||
|
switch (schema_->get_fields().at(pk_field_id).get_data_type()) {
|
||||||
|
case DataType::INT64:
|
||||||
|
search_sorted_pk_binary_range_impl<int64_t>(
|
||||||
|
std::get<int64_t>(lower_pk),
|
||||||
|
lower_inclusive,
|
||||||
|
std::get<int64_t>(upper_pk),
|
||||||
|
upper_inclusive,
|
||||||
|
pk_column,
|
||||||
|
bitset);
|
||||||
|
break;
|
||||||
|
case DataType::VARCHAR:
|
||||||
|
search_sorted_pk_binary_range_impl<std::string>(
|
||||||
|
std::get<std::string>(lower_pk),
|
||||||
|
lower_inclusive,
|
||||||
|
std::get<std::string>(upper_pk),
|
||||||
|
upper_inclusive,
|
||||||
|
pk_column,
|
||||||
|
bitset);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ThrowInfo(
|
||||||
|
DataTypeInvalid,
|
||||||
|
fmt::format(
|
||||||
|
"unsupported type {}",
|
||||||
|
schema_->get_fields().at(pk_field_id).get_data_type()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<std::vector<OffsetMap::OffsetType>, bool>
|
std::pair<std::vector<OffsetMap::OffsetType>, bool>
|
||||||
ChunkedSegmentSealedImpl::find_first(int64_t limit,
|
ChunkedSegmentSealedImpl::find_first(int64_t limit,
|
||||||
const BitsetType& bitset) const {
|
const BitsetType& bitset) const {
|
||||||
|
|||||||
@ -226,6 +226,14 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||||||
const PkType& pk,
|
const PkType& pk,
|
||||||
BitsetTypeView& bitset) const;
|
BitsetTypeView& bitset) const;
|
||||||
|
|
||||||
|
void
|
||||||
|
pk_binary_range(milvus::OpContext* op_ctx,
|
||||||
|
const PkType& lower_pk,
|
||||||
|
bool lower_inclusive,
|
||||||
|
const PkType& upper_pk,
|
||||||
|
bool upper_inclusive,
|
||||||
|
BitsetTypeView& bitset) const override;
|
||||||
|
|
||||||
std::unique_ptr<DataArray>
|
std::unique_ptr<DataArray>
|
||||||
get_vector(milvus::OpContext* op_ctx,
|
get_vector(milvus::OpContext* op_ctx,
|
||||||
FieldId field_id,
|
FieldId field_id,
|
||||||
@ -420,9 +428,7 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||||||
auto end_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) +
|
auto end_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) +
|
||||||
last_in_chunk_offset;
|
last_in_chunk_offset;
|
||||||
|
|
||||||
for (int64_t idx = start_idx; idx <= end_idx; idx++) {
|
bitset.set(start_idx, end_idx - start_idx + 1, true);
|
||||||
bitset[idx] = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else if (op == proto::plan::OpType::GreaterEqual ||
|
} else if (op == proto::plan::OpType::GreaterEqual ||
|
||||||
op == proto::plan::OpType::GreaterThan) {
|
op == proto::plan::OpType::GreaterThan) {
|
||||||
@ -479,6 +485,80 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename PK>
|
||||||
|
void
|
||||||
|
search_sorted_pk_binary_range_impl(
|
||||||
|
const PK& lower_val,
|
||||||
|
bool lower_inclusive,
|
||||||
|
const PK& upper_val,
|
||||||
|
bool upper_inclusive,
|
||||||
|
const std::shared_ptr<ChunkedColumnInterface>& pk_column,
|
||||||
|
BitsetTypeView& bitset) const {
|
||||||
|
const auto num_chunk = pk_column->num_chunks();
|
||||||
|
if (num_chunk == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto all_chunk_pins = pk_column->GetAllChunks(nullptr);
|
||||||
|
|
||||||
|
// Find the lower bound position (first value >= lower_val or > lower_val)
|
||||||
|
auto [lower_chunk_id, lower_in_chunk_offset, lower_exact_match] =
|
||||||
|
this->pk_lower_bound<PK>(
|
||||||
|
lower_val, pk_column.get(), all_chunk_pins, 0);
|
||||||
|
|
||||||
|
int64_t start_idx = 0;
|
||||||
|
if (lower_chunk_id != -1) {
|
||||||
|
start_idx = pk_column->GetNumRowsUntilChunk(lower_chunk_id) +
|
||||||
|
lower_in_chunk_offset;
|
||||||
|
// If lower_inclusive is false and we found an exact match, skip all equal values
|
||||||
|
if (!lower_inclusive && lower_exact_match) {
|
||||||
|
auto [last_chunk_id, last_in_chunk_offset] =
|
||||||
|
this->find_last_pk_position<PK>(lower_val,
|
||||||
|
pk_column.get(),
|
||||||
|
all_chunk_pins,
|
||||||
|
lower_chunk_id,
|
||||||
|
lower_in_chunk_offset);
|
||||||
|
start_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) +
|
||||||
|
last_in_chunk_offset + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// lower_val is greater than all values, no results
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the upper bound position (first value >= upper_val or > upper_val)
|
||||||
|
auto [upper_chunk_id, upper_in_chunk_offset, upper_exact_match] =
|
||||||
|
this->pk_lower_bound<PK>(
|
||||||
|
upper_val, pk_column.get(), all_chunk_pins, 0);
|
||||||
|
|
||||||
|
int64_t end_idx = 0;
|
||||||
|
if (upper_chunk_id == -1) {
|
||||||
|
// upper_val is greater than all values, include all from start_idx to end
|
||||||
|
end_idx = bitset.size();
|
||||||
|
} else {
|
||||||
|
// If upper_inclusive is true and we found an exact match, include all equal values
|
||||||
|
if (upper_inclusive && upper_exact_match) {
|
||||||
|
auto [last_chunk_id, last_in_chunk_offset] =
|
||||||
|
this->find_last_pk_position<PK>(upper_val,
|
||||||
|
pk_column.get(),
|
||||||
|
all_chunk_pins,
|
||||||
|
upper_chunk_id,
|
||||||
|
upper_in_chunk_offset);
|
||||||
|
end_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) +
|
||||||
|
last_in_chunk_offset + 1;
|
||||||
|
} else {
|
||||||
|
// upper_inclusive is false or no exact match
|
||||||
|
// In both cases, end at the position of first value >= upper_val
|
||||||
|
end_idx = pk_column->GetNumRowsUntilChunk(upper_chunk_id) +
|
||||||
|
upper_in_chunk_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set bits from start_idx to end_idx - 1
|
||||||
|
if (start_idx < end_idx) {
|
||||||
|
bitset.set(start_idx, end_idx - start_idx, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename PK>
|
template <typename PK>
|
||||||
void
|
void
|
||||||
search_pks_with_two_pointers_impl(
|
search_pks_with_two_pointers_impl(
|
||||||
|
|||||||
@ -514,6 +514,33 @@ class InsertRecordSealed {
|
|||||||
pk2offset_->find_range(pk, op, bitset, condition);
|
pk2offset_->find_range(pk, op, bitset, condition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
search_pk_binary_range(const PkType& lower_pk,
|
||||||
|
bool lower_inclusive,
|
||||||
|
const PkType& upper_pk,
|
||||||
|
bool upper_inclusive,
|
||||||
|
BitsetTypeView& bitset) const {
|
||||||
|
auto lower_op = lower_inclusive ? proto::plan::OpType::GreaterEqual
|
||||||
|
: proto::plan::OpType::GreaterThan;
|
||||||
|
auto upper_op = upper_inclusive ? proto::plan::OpType::LessEqual
|
||||||
|
: proto::plan::OpType::LessThan;
|
||||||
|
|
||||||
|
BitsetType upper_result(bitset.size());
|
||||||
|
auto upper_view = upper_result.view();
|
||||||
|
|
||||||
|
// values >= lower_pk (or > lower_pk if not inclusive)
|
||||||
|
pk2offset_->find_range(
|
||||||
|
lower_pk, lower_op, bitset, [](int64_t offset) { return true; });
|
||||||
|
|
||||||
|
// values <= upper_pk (or < upper_pk if not inclusive)
|
||||||
|
pk2offset_->find_range(
|
||||||
|
upper_pk, upper_op, upper_view, [](int64_t offset) {
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
bitset &= upper_result;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
insert_pks(milvus::DataType data_type, ChunkedColumnInterface* data) {
|
insert_pks(milvus::DataType data_type, ChunkedColumnInterface* data) {
|
||||||
std::lock_guard lck(shared_mutex_);
|
std::lock_guard lck(shared_mutex_);
|
||||||
|
|||||||
@ -39,6 +39,17 @@ class SegmentGrowing : public SegmentInternalInterface {
|
|||||||
return SegmentType::Growing;
|
return SegmentType::Growing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pk_binary_range(milvus::OpContext* op_ctx,
|
||||||
|
const PkType& lower_pk,
|
||||||
|
bool lower_inclusive,
|
||||||
|
const PkType& upper_pk,
|
||||||
|
bool upper_inclusive,
|
||||||
|
BitsetTypeView& bitset) const override {
|
||||||
|
ThrowInfo(ErrorCode::Unsupported,
|
||||||
|
"pk_binary_range is not supported for growing segment");
|
||||||
|
}
|
||||||
|
|
||||||
// virtual int64_t
|
// virtual int64_t
|
||||||
// PreDelete(int64_t size) = 0;
|
// PreDelete(int64_t size) = 0;
|
||||||
|
|
||||||
|
|||||||
@ -649,6 +649,14 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
const PkType& pk,
|
const PkType& pk,
|
||||||
BitsetTypeView& bitset) const = 0;
|
BitsetTypeView& bitset) const = 0;
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
pk_binary_range(milvus::OpContext* op_ctx,
|
||||||
|
const PkType& lower_pk,
|
||||||
|
bool lower_inclusive,
|
||||||
|
const PkType& upper_pk,
|
||||||
|
bool upper_inclusive,
|
||||||
|
BitsetTypeView& bitset) const = 0;
|
||||||
|
|
||||||
virtual GEOSContextHandle_t
|
virtual GEOSContextHandle_t
|
||||||
get_ctx() const {
|
get_ctx() const {
|
||||||
return ctx_;
|
return ctx_;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user