diff --git a/internal/core/src/exec/expression/TermExpr.cpp b/internal/core/src/exec/expression/TermExpr.cpp index 54895a0f2c..4bf3ab7e42 100644 --- a/internal/core/src/exec/expression/TermExpr.cpp +++ b/internal/core/src/exec/expression/TermExpr.cpp @@ -139,7 +139,6 @@ PhyTermFilterExpr::CanSkipSegment() { if (segment_->type() == SegmentType::Sealed && skip_index.CanSkipBinaryRange(field_id_, 0, min, max, true, true)) { cached_bits_.resize(active_count_, false); - cached_offsets_ = std::make_shared(DataType::INT64, 0); cached_offsets_inited_ = true; return true; } @@ -178,14 +177,9 @@ PhyTermFilterExpr::InitPkCacheOffset() { auto [uids, seg_offsets] = segment_->search_ids(*id_array, query_timestamp_); cached_bits_.resize(active_count_, false); - cached_offsets_ = - std::make_shared(DataType::INT64, seg_offsets.size()); - int64_t* cached_offsets_ptr = (int64_t*)cached_offsets_->GetRawData(); - int i = 0; for (const auto& offset : seg_offsets) { auto _offset = (int64_t)offset.get(); cached_bits_[_offset] = true; - cached_offsets_ptr[i++] = _offset; } cached_offsets_inited_ = true; } @@ -214,7 +208,10 @@ PhyTermFilterExpr::ExecPkTermImpl() { } if (use_cache_offsets_) { - std::vector vecs{res_vec, cached_offsets_}; + auto cache_bits_copy = cached_bits_.clone(); + std::vector vecs{ + res_vec, + std::make_shared(std::move(cache_bits_copy))}; return std::make_shared(vecs); } else { return res_vec; diff --git a/internal/core/src/query/generated/ExecPlanNodeVisitor.h b/internal/core/src/query/generated/ExecPlanNodeVisitor.h index d3b69a388d..96b5d9b2f9 100644 --- a/internal/core/src/query/generated/ExecPlanNodeVisitor.h +++ b/internal/core/src/query/generated/ExecPlanNodeVisitor.h @@ -78,21 +78,6 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor { return ret; } - void - SetExprCacheOffsets(std::vector&& offsets) { - expr_cached_pk_id_offsets_ = std::move(offsets); - } - - void - AddExprCacheOffset(int64_t offset) { - expr_cached_pk_id_offsets_.push_back(offset); - } - - const std::vector& - GetExprCacheOffsets() { - return expr_cached_pk_id_offsets_; - } - void SetExprUsePkIndex(bool use_pk_index) { expr_use_pk_index_ = use_pk_index; @@ -103,29 +88,11 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor { return expr_use_pk_index_; } - void - ExecuteExprNodeInternal( - const std::shared_ptr& plannode, - const milvus::segcore::SegmentInternalInterface* segment, - int64_t active_count, - BitsetType& result, - bool& cache_offset_getted, - std::vector& cache_offset); - void ExecuteExprNode(const std::shared_ptr& plannode, const milvus::segcore::SegmentInternalInterface* segment, int64_t active_count, - BitsetType& result) { - bool get_cache_offset; - std::vector cache_offsets; - ExecuteExprNodeInternal(plannode, - segment, - active_count, - result, - get_cache_offset, - cache_offsets); - } + BitsetType& result); private: template @@ -140,6 +107,5 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor { SearchResultOpt search_result_opt_; RetrieveResultOpt retrieve_result_opt_; bool expr_use_pk_index_ = false; - std::vector expr_cached_pk_id_offsets_; }; } // namespace milvus::query diff --git a/internal/core/src/query/visitors/ExecExprVisitor.cpp b/internal/core/src/query/visitors/ExecExprVisitor.cpp index d0b59873ee..7652e0e436 100644 --- a/internal/core/src/query/visitors/ExecExprVisitor.cpp +++ b/internal/core/src/query/visitors/ExecExprVisitor.cpp @@ -2547,7 +2547,6 @@ ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType { // If enable plan_visitor pk index cache, pass offsets_ to it if (plan_visitor_ != nullptr) { plan_visitor_->SetExprUsePkIndex(true); - plan_visitor_->SetExprCacheOffsets(std::move(cached_offsets)); } AssertInfo(bitset.size() == row_count_, "[ExecExprVisitor]Size of results not equal row count"); diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index c5902fd63e..f3c01beb30 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -75,13 +75,11 @@ empty_search_result(int64_t num_queries, SearchInfo& search_info) { } void -ExecPlanNodeVisitor::ExecuteExprNodeInternal( +ExecPlanNodeVisitor::ExecuteExprNode( const std::shared_ptr& plannode, const milvus::segcore::SegmentInternalInterface* segment, int64_t active_count, - BitsetType& bitset_holder, - bool& cache_offset_getted, - std::vector& cache_offset) { + BitsetType& bitset_holder) { bitset_holder.clear(); LOG_DEBUG("plannode: {}, active_count: {}, timestamp: {}", plannode->ToString(), @@ -94,6 +92,7 @@ ExecPlanNodeVisitor::ExecuteExprNodeInternal( auto task = milvus::exec::Task::Create(DEFAULT_TASK_ID, plan, 0, query_context); + bool cache_offset_getted = false; for (;;) { auto result = task->Next(); if (!result) { @@ -115,20 +114,17 @@ ExecPlanNodeVisitor::ExecuteExprNodeInternal( if (!cache_offset_getted) { // offset cache only get once because not support iterator batch - auto cache_offset_vec = + auto cache_bits_vec = std::dynamic_pointer_cast(row->child(1)); - // If get empty cached offsets. mean no record hits in this segment + TargetBitmapView view(cache_bits_vec->GetRawData(), + cache_bits_vec->size()); + // If get empty cached bits. mean no record hits in this segment // no need to get next batch. - if (cache_offset_vec->size() == 0) { + if (view.count() == 0) { bitset_holder.resize(active_count); task->RequestCancel(); break; } - auto cache_offset_vec_ptr = - (int64_t*)(cache_offset_vec->GetRawData()); - for (size_t i = 0; i < cache_offset_vec->size(); ++i) { - cache_offset.push_back(cache_offset_vec_ptr[i]); - } cache_offset_getted = true; } } else { @@ -281,17 +277,12 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) { bitset_holder.resize(active_count); } - // This flag used to indicate whether to get offset from expr module that - // speeds up mvcc filter in the next interface: "timestamp_filter" - bool get_cache_offset = false; std::vector cache_offsets; if (node.filter_plannode_.has_value()) { - ExecuteExprNodeInternal(node.filter_plannode_.value(), - segment, - active_count, - bitset_holder, - get_cache_offset, - cache_offsets); + ExecuteExprNode(node.filter_plannode_.value(), + segment, + active_count, + bitset_holder); bitset_holder.flip(); } @@ -313,16 +304,7 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) { } retrieve_result.total_data_cnt_ = bitset_holder.size(); - bool false_filtered_out = false; - if (get_cache_offset) { - segment->timestamp_filter(bitset_holder, cache_offsets, timestamp_); - } else { - bitset_holder.flip(); - false_filtered_out = true; - segment->timestamp_filter(bitset_holder, timestamp_); - } - auto results_pair = - segment->find_first(node.limit_, bitset_holder, false_filtered_out); + auto results_pair = segment->find_first(node.limit_, bitset_holder); retrieve_result.result_offsets_ = std::move(results_pair.first); retrieve_result.has_more_result = results_pair.second; retrieve_result_opt_ = std::move(retrieve_result); diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index d590e3dfb7..c010971e76 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -63,9 +63,7 @@ class OffsetMap { using OffsetType = int64_t; // TODO: in fact, we can retrieve the pk here. Not sure which way is more efficient. virtual std::pair, bool> - find_first(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const = 0; + find_first(int64_t limit, const BitsetType& bitset) const = 0; virtual void clear() = 0; @@ -169,9 +167,7 @@ class OffsetOrderedMap : public OffsetMap { } std::pair, bool> - find_first(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const override { + find_first(int64_t limit, const BitsetType& bitset) const override { std::shared_lock lck(mtx_); if (limit == Unlimited || limit == NoLimit) { @@ -180,7 +176,7 @@ class OffsetOrderedMap : public OffsetMap { // TODO: we can't retrieve pk by offset very conveniently. // Selectivity should be done outside. - return find_first_by_index(limit, bitset, false_filtered_out); + return find_first_by_index(limit, bitset); } void @@ -191,15 +187,10 @@ class OffsetOrderedMap : public OffsetMap { private: std::pair, bool> - find_first_by_index(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const { + find_first_by_index(int64_t limit, const BitsetType& bitset) const { int64_t hit_num = 0; // avoid counting the number everytime. - int64_t cnt = bitset.count(); auto size = bitset.size(); - if (!false_filtered_out) { - cnt = size - bitset.count(); - } + int64_t cnt = size - bitset.count(); limit = std::min(limit, cnt); std::vector seg_offsets; seg_offsets.reserve(limit); @@ -214,7 +205,7 @@ class OffsetOrderedMap : public OffsetMap { continue; } - if (!(bitset[seg_offset] ^ false_filtered_out)) { + if (!bitset[seg_offset]) { seg_offsets.push_back(seg_offset); hit_num++; // PK hit, no need to continue traversing offsets with the same PK. @@ -346,9 +337,7 @@ class OffsetOrderedArray : public OffsetMap { } std::pair, bool> - find_first(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const override { + find_first(int64_t limit, const BitsetType& bitset) const override { check_search(); if (limit == Unlimited || limit == NoLimit) { @@ -357,7 +346,7 @@ class OffsetOrderedArray : public OffsetMap { // TODO: we can't retrieve pk by offset very conveniently. // Selectivity should be done outside. - return find_first_by_index(limit, bitset, false_filtered_out); + return find_first_by_index(limit, bitset); } void @@ -368,15 +357,10 @@ class OffsetOrderedArray : public OffsetMap { private: std::pair, bool> - find_first_by_index(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const { + find_first_by_index(int64_t limit, const BitsetType& bitset) const { int64_t hit_num = 0; // avoid counting the number everytime. - int64_t cnt = bitset.count(); auto size = bitset.size(); - if (!false_filtered_out) { - cnt = size - bitset.count(); - } + int64_t cnt = size - bitset.count(); auto more_hit_than_limit = cnt > limit; limit = std::min(limit, cnt); std::vector seg_offsets; @@ -389,7 +373,7 @@ class OffsetOrderedArray : public OffsetMap { continue; } - if (!(bitset[seg_offset] ^ false_filtered_out)) { + if (!bitset[seg_offset]) { seg_offsets.push_back(seg_offset); hit_num++; } diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 8715ca488f..734ef83bc8 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -307,11 +307,8 @@ class SegmentGrowingImpl : public SegmentGrowing { } std::pair, bool> - find_first(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const override { - return insert_record_.pk2offset_->find_first( - limit, bitset, false_filtered_out); + find_first(int64_t limit, const BitsetType& bitset) const override { + return insert_record_.pk2offset_->find_first(limit, bitset); } bool diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 7d2bbab6dd..845dbade5c 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -340,9 +340,7 @@ class SegmentInternalInterface : public SegmentInterface { * @return All candidates offsets. */ virtual std::pair, bool> - find_first(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const = 0; + find_first(int64_t limit, const BitsetType& bitset) const = 0; void FillTargetEntry( diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index cf4340cc24..d739d69d95 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -151,11 +151,8 @@ class SegmentSealedImpl : public SegmentSealed { const Timestamp* timestamps) override; std::pair, bool> - find_first(int64_t limit, - const BitsetType& bitset, - bool false_filtered_out) const override { - return insert_record_.pk2offset_->find_first( - limit, bitset, false_filtered_out); + find_first(int64_t limit, const BitsetType& bitset) const override { + return insert_record_.pk2offset_->find_first(limit, bitset); } // Calculate: output[i] = Vec[seg_offset[i]] diff --git a/internal/core/unittest/test_offset_ordered_array.cpp b/internal/core/unittest/test_offset_ordered_array.cpp index fd817fbdd5..356cf64073 100644 --- a/internal/core/unittest/test_offset_ordered_array.cpp +++ b/internal/core/unittest/test_offset_ordered_array.cpp @@ -66,7 +66,7 @@ TYPED_TEST_SUITE_P(TypedOffsetOrderedArrayTest); TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { // not sealed. - ASSERT_ANY_THROW(this->map_.find_first(Unlimited, {}, true)); + ASSERT_ANY_THROW(this->map_.find_first(Unlimited, {})); // insert 10 entities. int num = 10; @@ -81,10 +81,8 @@ TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { // all is satisfied. { BitsetType all(num); - all.set(); { - auto [offsets, has_more_res] = - this->map_.find_first(num / 2, all, true); + auto [offsets, has_more_res] = this->map_.find_first(num / 2, all); ASSERT_EQ(num / 2, offsets.size()); ASSERT_TRUE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -93,7 +91,7 @@ TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { } { auto [offsets, has_more_res] = - this->map_.find_first(Unlimited, all, true); + this->map_.find_first(Unlimited, all); ASSERT_EQ(num, offsets.size()); ASSERT_FALSE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -104,10 +102,9 @@ TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { { // corner case, segment offset exceeds the size of bitset. BitsetType all_minus_1(num - 1); - all_minus_1.set(); { auto [offsets, has_more_res] = - this->map_.find_first(num / 2, all_minus_1, true); + this->map_.find_first(num / 2, all_minus_1); ASSERT_EQ(num / 2, offsets.size()); ASSERT_TRUE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -116,7 +113,7 @@ TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { } { auto [offsets, has_more_res] = - this->map_.find_first(Unlimited, all_minus_1, true); + this->map_.find_first(Unlimited, all_minus_1); ASSERT_EQ(all_minus_1.size(), offsets.size()); ASSERT_FALSE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -127,11 +124,11 @@ TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { { // none is satisfied. BitsetType none(num); - none.reset(); - auto result_pair = this->map_.find_first(num / 2, none, true); + none.set(); + auto result_pair = this->map_.find_first(num / 2, none); ASSERT_EQ(0, result_pair.first.size()); ASSERT_FALSE(result_pair.second); - result_pair = this->map_.find_first(NoLimit, none, true); + result_pair = this->map_.find_first(NoLimit, none); ASSERT_EQ(0, result_pair.first.size()); ASSERT_FALSE(result_pair.second); } diff --git a/internal/core/unittest/test_offset_ordered_map.cpp b/internal/core/unittest/test_offset_ordered_map.cpp index 36f4bafc83..0bc1913074 100644 --- a/internal/core/unittest/test_offset_ordered_map.cpp +++ b/internal/core/unittest/test_offset_ordered_map.cpp @@ -62,8 +62,7 @@ TYPED_TEST_SUITE_P(TypedOffsetOrderedMapTest); TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { // no data. { - auto [offsets, has_more_res] = - this->map_.find_first(Unlimited, {}, true); + auto [offsets, has_more_res] = this->map_.find_first(Unlimited, {}); ASSERT_EQ(0, offsets.size()); ASSERT_FALSE(has_more_res); } @@ -76,11 +75,10 @@ TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { // all is satisfied. BitsetType all(num); - all.set(); + all.reset(); { - auto [offsets, has_more_res] = - this->map_.find_first(num / 2, all, true); + auto [offsets, has_more_res] = this->map_.find_first(num / 2, all); ASSERT_EQ(num / 2, offsets.size()); ASSERT_TRUE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -88,8 +86,7 @@ TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { } } { - auto [offsets, has_more_res] = - this->map_.find_first(Unlimited, all, true); + auto [offsets, has_more_res] = this->map_.find_first(Unlimited, all); ASSERT_EQ(num, offsets.size()); ASSERT_FALSE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -99,10 +96,10 @@ TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { // corner case, segment offset exceeds the size of bitset. BitsetType all_minus_1(num - 1); - all_minus_1.set(); + all_minus_1.reset(); { auto [offsets, has_more_res] = - this->map_.find_first(num / 2, all_minus_1, true); + this->map_.find_first(num / 2, all_minus_1); ASSERT_EQ(num / 2, offsets.size()); ASSERT_TRUE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -111,7 +108,7 @@ TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { } { auto [offsets, has_more_res] = - this->map_.find_first(Unlimited, all_minus_1, true); + this->map_.find_first(Unlimited, all_minus_1); ASSERT_EQ(all_minus_1.size(), offsets.size()); ASSERT_FALSE(has_more_res); for (int i = 1; i < offsets.size(); i++) { @@ -121,16 +118,14 @@ TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { // none is satisfied. BitsetType none(num); - none.reset(); + none.set(); { - auto [offsets, has_more_res] = - this->map_.find_first(num / 2, none, true); + auto [offsets, has_more_res] = this->map_.find_first(num / 2, none); ASSERT_TRUE(has_more_res); ASSERT_EQ(0, offsets.size()); } { - auto [offsets, has_more_res] = - this->map_.find_first(NoLimit, none, true); + auto [offsets, has_more_res] = this->map_.find_first(NoLimit, none); ASSERT_TRUE(has_more_res); ASSERT_EQ(0, offsets.size()); }