diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index 18e56b38d6..17fa00ae92 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -168,6 +168,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) { + if (!has_offset_input_ && is_pk_field_ && + segment_->type() == SegmentType::Sealed) { + if (pk_type_ == DataType::VARCHAR) { + return ExecRangeVisitorImplForPk(context); + } else { + return ExecRangeVisitorImplForPk(context); + } + } + if (SegmentExpr::CanUseIndex() && !has_offset_input_) { return ExecRangeVisitorImplForIndex(); } else { @@ -865,5 +874,46 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) { return res_vec; } +template +VectorPtr +PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForPk(EvalCtx& context) { + typedef std:: + conditional_t, std::string, T> + PkInnerType; + + if (!arg_inited_) { + lower_arg_.SetValue(expr_->lower_val_); + upper_arg_.SetValue(expr_->upper_val_); + arg_inited_ = true; + } + + auto real_batch_size = GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } + + if (cached_index_chunk_id_ != 0) { + cached_index_chunk_id_ = 0; + cached_index_chunk_res_ = std::make_shared(active_count_); + auto cache_view = cached_index_chunk_res_->view(); + + PkType lower_pk = lower_arg_.GetValue(); + PkType upper_pk = upper_arg_.GetValue(); + segment_->pk_binary_range(op_ctx_, + lower_pk, + expr_->lower_inclusive_, + upper_pk, + expr_->upper_inclusive_, + cache_view); + } + + TargetBitmap result; + result.append( + *cached_index_chunk_res_, current_data_global_pos_, real_batch_size); + MoveCursor(); + return std::make_shared(std::move(result), + TargetBitmap(real_batch_size, true)); +} + } // namespace exec } // namespace milvus diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.h b/internal/core/src/exec/expression/BinaryRangeExpr.h index 0e5c6971f8..21daf95fb6 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.h +++ b/internal/core/src/exec/expression/BinaryRangeExpr.h @@ -320,6 +320,10 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr { VectorPtr ExecRangeVisitorImplForArray(EvalCtx& context); + template + VectorPtr + ExecRangeVisitorImplForPk(EvalCtx& context); + private: std::shared_ptr expr_; int64_t overflow_check_pos_{0}; diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index d5f9203792..0d65e2ba1b 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -1441,6 +1441,54 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, } } +void +ChunkedSegmentSealedImpl::pk_binary_range(milvus::OpContext* op_ctx, + const PkType& lower_pk, + bool lower_inclusive, + const PkType& upper_pk, + bool upper_inclusive, + BitsetTypeView& bitset) const { + if (!is_sorted_by_pk_) { + // For unsorted segments, use the InsertRecord's binary range search + insert_record_.search_pk_binary_range( + lower_pk, lower_inclusive, upper_pk, upper_inclusive, bitset); + return; + } + + // For sorted segments, use binary search + auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(pk_field_id.get() != -1, "Primary key is -1"); + auto pk_column = get_column(pk_field_id); + AssertInfo(pk_column != nullptr, "primary key column not loaded"); + + switch (schema_->get_fields().at(pk_field_id).get_data_type()) { + case DataType::INT64: + search_sorted_pk_binary_range_impl( + std::get(lower_pk), + lower_inclusive, + std::get(upper_pk), + upper_inclusive, + pk_column, + bitset); + break; + case DataType::VARCHAR: + search_sorted_pk_binary_range_impl( + std::get(lower_pk), + lower_inclusive, + std::get(upper_pk), + upper_inclusive, + pk_column, + bitset); + break; + default: + ThrowInfo( + DataTypeInvalid, + fmt::format( + "unsupported type {}", + schema_->get_fields().at(pk_field_id).get_data_type())); + } +} + std::pair, bool> ChunkedSegmentSealedImpl::find_first(int64_t limit, const BitsetType& bitset) const { diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h index 070b2dd6dc..fc022c8418 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h @@ -226,6 +226,14 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { const PkType& pk, BitsetTypeView& bitset) const; + void + pk_binary_range(milvus::OpContext* op_ctx, + const PkType& lower_pk, + bool lower_inclusive, + const PkType& upper_pk, + bool upper_inclusive, + BitsetTypeView& bitset) const override; + std::unique_ptr get_vector(milvus::OpContext* op_ctx, FieldId field_id, @@ -420,9 +428,7 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { auto end_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) + last_in_chunk_offset; - for (int64_t idx = start_idx; idx <= end_idx; idx++) { - bitset[idx] = true; - } + bitset.set(start_idx, end_idx - start_idx + 1, true); } } else if (op == proto::plan::OpType::GreaterEqual || op == proto::plan::OpType::GreaterThan) { @@ -479,6 +485,80 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { } } + template + void + search_sorted_pk_binary_range_impl( + const PK& lower_val, + bool lower_inclusive, + const PK& upper_val, + bool upper_inclusive, + const std::shared_ptr& pk_column, + BitsetTypeView& bitset) const { + const auto num_chunk = pk_column->num_chunks(); + if (num_chunk == 0) { + return; + } + auto all_chunk_pins = pk_column->GetAllChunks(nullptr); + + // Find the lower bound position (first value >= lower_val or > lower_val) + auto [lower_chunk_id, lower_in_chunk_offset, lower_exact_match] = + this->pk_lower_bound( + lower_val, pk_column.get(), all_chunk_pins, 0); + + int64_t start_idx = 0; + if (lower_chunk_id != -1) { + start_idx = pk_column->GetNumRowsUntilChunk(lower_chunk_id) + + lower_in_chunk_offset; + // If lower_inclusive is false and we found an exact match, skip all equal values + if (!lower_inclusive && lower_exact_match) { + auto [last_chunk_id, last_in_chunk_offset] = + this->find_last_pk_position(lower_val, + pk_column.get(), + all_chunk_pins, + lower_chunk_id, + lower_in_chunk_offset); + start_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) + + last_in_chunk_offset + 1; + } + } else { + // lower_val is greater than all values, no results + return; + } + + // Find the upper bound position (first value >= upper_val or > upper_val) + auto [upper_chunk_id, upper_in_chunk_offset, upper_exact_match] = + this->pk_lower_bound( + upper_val, pk_column.get(), all_chunk_pins, 0); + + int64_t end_idx = 0; + if (upper_chunk_id == -1) { + // upper_val is greater than all values, include all from start_idx to end + end_idx = bitset.size(); + } else { + // If upper_inclusive is true and we found an exact match, include all equal values + if (upper_inclusive && upper_exact_match) { + auto [last_chunk_id, last_in_chunk_offset] = + this->find_last_pk_position(upper_val, + pk_column.get(), + all_chunk_pins, + upper_chunk_id, + upper_in_chunk_offset); + end_idx = pk_column->GetNumRowsUntilChunk(last_chunk_id) + + last_in_chunk_offset + 1; + } else { + // upper_inclusive is false or no exact match + // In both cases, end at the position of first value >= upper_val + end_idx = pk_column->GetNumRowsUntilChunk(upper_chunk_id) + + upper_in_chunk_offset; + } + } + + // Set bits from start_idx to end_idx - 1 + if (start_idx < end_idx) { + bitset.set(start_idx, end_idx - start_idx, true); + } + } + template void search_pks_with_two_pointers_impl( diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 80a34f0ba4..dbb7b6c127 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -514,6 +514,33 @@ class InsertRecordSealed { pk2offset_->find_range(pk, op, bitset, condition); } + void + search_pk_binary_range(const PkType& lower_pk, + bool lower_inclusive, + const PkType& upper_pk, + bool upper_inclusive, + BitsetTypeView& bitset) const { + auto lower_op = lower_inclusive ? proto::plan::OpType::GreaterEqual + : proto::plan::OpType::GreaterThan; + auto upper_op = upper_inclusive ? proto::plan::OpType::LessEqual + : proto::plan::OpType::LessThan; + + BitsetType upper_result(bitset.size()); + auto upper_view = upper_result.view(); + + // values >= lower_pk (or > lower_pk if not inclusive) + pk2offset_->find_range( + lower_pk, lower_op, bitset, [](int64_t offset) { return true; }); + + // values <= upper_pk (or < upper_pk if not inclusive) + pk2offset_->find_range( + upper_pk, upper_op, upper_view, [](int64_t offset) { + return true; + }); + + bitset &= upper_result; + } + void insert_pks(milvus::DataType data_type, ChunkedColumnInterface* data) { std::lock_guard lck(shared_mutex_); diff --git a/internal/core/src/segcore/SegmentGrowing.h b/internal/core/src/segcore/SegmentGrowing.h index 06e1813652..7292c53b44 100644 --- a/internal/core/src/segcore/SegmentGrowing.h +++ b/internal/core/src/segcore/SegmentGrowing.h @@ -39,6 +39,17 @@ class SegmentGrowing : public SegmentInternalInterface { return SegmentType::Growing; } + void + pk_binary_range(milvus::OpContext* op_ctx, + const PkType& lower_pk, + bool lower_inclusive, + const PkType& upper_pk, + bool upper_inclusive, + BitsetTypeView& bitset) const override { + ThrowInfo(ErrorCode::Unsupported, + "pk_binary_range is not supported for growing segment"); + } + // virtual int64_t // PreDelete(int64_t size) = 0; diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 7393048914..5ca96784c2 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -649,6 +649,14 @@ class SegmentInternalInterface : public SegmentInterface { const PkType& pk, BitsetTypeView& bitset) const = 0; + virtual void + pk_binary_range(milvus::OpContext* op_ctx, + const PkType& lower_pk, + bool lower_inclusive, + const PkType& upper_pk, + bool upper_inclusive, + BitsetTypeView& bitset) const = 0; + virtual GEOSContextHandle_t get_ctx() const { return ctx_;