From 515a939edfd0c6ed6091449db11bcbcd42e4abbd Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Fri, 7 Nov 2025 16:07:35 +0800 Subject: [PATCH] enhance: remove obsolete code (#45307) issue: #44452 Signed-off-by: Buqian Zheng --- .../src/segcore/ChunkedSegmentSealedImpl.cpp | 135 ++---------------- .../src/segcore/ChunkedSegmentSealedImpl.h | 18 +-- .../core/src/segcore/SegmentGrowingImpl.cpp | 5 - .../core/src/segcore/SegmentGrowingImpl.h | 10 -- internal/core/src/segcore/SegmentInterface.h | 8 -- internal/core/unittest/test_sealed.cpp | 25 ---- 6 files changed, 13 insertions(+), 188 deletions(-) diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index 6113b0ac81..2c1e68b25d 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -1026,18 +1026,6 @@ ChunkedSegmentSealedImpl::check_search(const query::Plan* plan) const { } } -std::vector -ChunkedSegmentSealedImpl::search_pk(milvus::OpContext* op_ctx, - const PkType& pk, - Timestamp timestamp) const { - if (!is_sorted_by_pk_) { - return insert_record_.search_pk(pk, timestamp); - } - return search_sorted_pk(op_ctx, pk, [this, timestamp](int64_t offset) { - return insert_record_.timestamps_[offset] <= timestamp; - }); -} - void ChunkedSegmentSealedImpl::search_pks(BitsetType& bitset, const std::vector& pks) const { @@ -1219,74 +1207,6 @@ ChunkedSegmentSealedImpl::search_batch_pks( } } -template -std::vector -ChunkedSegmentSealedImpl::search_sorted_pk(milvus::OpContext* op_ctx, - const PkType& pk, - Condition condition) const { - auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); - AssertInfo(pk_field_id.get() != -1, "Primary key is -1"); - auto pk_column = get_column(pk_field_id); - AssertInfo(pk_column != nullptr, "primary key column not loaded"); - std::vector pk_offsets; - switch (schema_->get_fields().at(pk_field_id).get_data_type()) { - case DataType::INT64: { - auto target = std::get(pk); - // get int64 pks - auto num_chunk = pk_column->num_chunks(); - for (int i = 0; i < num_chunk; ++i) { - auto pw = pk_column->DataOfChunk(op_ctx, i); - auto src = reinterpret_cast(pw.get()); - auto chunk_row_num = pk_column->chunk_row_nums(i); - auto it = std::lower_bound( - src, - src + chunk_row_num, - target, - [](const int64_t& elem, const int64_t& value) { - return elem < value; - }); - auto num_rows_until_chunk = pk_column->GetNumRowsUntilChunk(i); - for (; it != src + chunk_row_num && *it == target; ++it) { - auto offset = it - src + num_rows_until_chunk; - if (condition(offset)) { - pk_offsets.emplace_back(offset); - } - } - } - break; - } - case DataType::VARCHAR: { - auto target = std::get(pk); - // get varchar pks - auto num_chunk = pk_column->num_chunks(); - for (int i = 0; i < num_chunk; ++i) { - // TODO @xiaocai2333, @sunby: chunk need to record the min/max. - auto num_rows_until_chunk = pk_column->GetNumRowsUntilChunk(i); - auto pw = pk_column->GetChunk(op_ctx, i); - auto string_chunk = static_cast(pw.get()); - auto offset = string_chunk->binary_search_string(target); - for (; offset != -1 && offset < string_chunk->RowNums() && - string_chunk->operator[](offset) == target; - ++offset) { - auto segment_offset = offset + num_rows_until_chunk; - if (condition(segment_offset)) { - pk_offsets.emplace_back(segment_offset); - } - } - } - break; - } - default: { - ThrowInfo( - DataTypeInvalid, - fmt::format( - "unsupported type {}", - schema_->get_fields().at(pk_field_id).get_data_type())); - } - } - return pk_offsets; -} - void ChunkedSegmentSealedImpl::pk_range(milvus::OpContext* op_ctx, proto::plan::OpType op, @@ -1297,17 +1217,14 @@ ChunkedSegmentSealedImpl::pk_range(milvus::OpContext* op_ctx, return; } - search_sorted_pk_range( - op_ctx, op, pk, bitset, [](int64_t offset) { return true; }); + search_sorted_pk_range(op_ctx, op, pk, bitset); } -template void ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, proto::plan::OpType op, const PkType& pk, - BitsetTypeView& bitset, - Condition condition) const { + BitsetTypeView& bitset) const { auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); AssertInfo(pk_field_id.get() != -1, "Primary key is -1"); auto pk_column = get_column(pk_field_id); @@ -1335,9 +1252,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, pk_column->GetNumRowsUntilChunk(i); for (; it != src + chunk_row_num; ++it) { auto offset = it - src + num_rows_until_chunk; - if (condition(offset)) { - bitset[offset] = true; - } + bitset[offset] = true; } } else if (op == proto::plan::OpType::GreaterThan) { auto it = std::upper_bound( @@ -1351,9 +1266,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, pk_column->GetNumRowsUntilChunk(i); for (; it != src + chunk_row_num; ++it) { auto offset = it - src + num_rows_until_chunk; - if (condition(offset)) { - bitset[offset] = true; - } + bitset[offset] = true; } } else if (op == proto::plan::OpType::LessEqual) { auto it = std::upper_bound( @@ -1370,9 +1283,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, pk_column->GetNumRowsUntilChunk(i); for (auto ptr = src; ptr < it; ++ptr) { auto offset = ptr - src + num_rows_until_chunk; - if (condition(offset)) { - bitset[offset] = true; - } + bitset[offset] = true; } } else if (op == proto::plan::OpType::LessThan) { auto it = @@ -1384,9 +1295,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, pk_column->GetNumRowsUntilChunk(i); for (auto ptr = src; ptr < it; ++ptr) { auto offset = ptr - src + num_rows_until_chunk; - if (condition(offset)) { - bitset[offset] = true; - } + bitset[offset] = true; } } else if (op == proto::plan::OpType::Equal) { auto it = std::lower_bound( @@ -1400,9 +1309,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, pk_column->GetNumRowsUntilChunk(i); for (; it != src + chunk_row_num && *it == target; ++it) { auto offset = it - src + num_rows_until_chunk; - if (condition(offset)) { - bitset[offset] = true; - } + bitset[offset] = true; } if (it != src + chunk_row_num && *it > target) { break; @@ -1430,9 +1337,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, string_chunk->operator[](offset) == target; ++offset) { auto segment_offset = offset + num_rows_until_chunk; - if (condition(segment_offset)) { - bitset[segment_offset] = true; - } + bitset[segment_offset] = true; } if (offset < string_chunk->RowNums() && string_chunk->operator[](offset) > target) { @@ -1442,17 +1347,13 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, auto offset = string_chunk->lower_bound_string(target); for (; offset < string_chunk->RowNums(); ++offset) { auto segment_offset = offset + num_rows_until_chunk; - if (condition(segment_offset)) { - bitset[segment_offset] = true; - } + bitset[segment_offset] = true; } } else if (op == proto::plan::OpType::GreaterThan) { auto offset = string_chunk->upper_bound_string(target); for (; offset < string_chunk->RowNums(); ++offset) { auto segment_offset = offset + num_rows_until_chunk; - if (condition(segment_offset)) { - bitset[segment_offset] = true; - } + bitset[segment_offset] = true; } } else if (op == proto::plan::OpType::LessEqual) { auto pos = string_chunk->upper_bound_string(target); @@ -1461,9 +1362,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, } for (auto offset = 0; offset < pos; ++offset) { auto segment_offset = offset + num_rows_until_chunk; - if (condition(segment_offset)) { - bitset[segment_offset] = true; - } + bitset[segment_offset] = true; } } else if (op == proto::plan::OpType::LessThan) { auto pos = string_chunk->lower_bound_string(target); @@ -1472,9 +1371,7 @@ ChunkedSegmentSealedImpl::search_sorted_pk_range(milvus::OpContext* op_ctx, } for (auto offset = 0; offset < pos; ++offset) { auto segment_offset = offset + num_rows_until_chunk; - if (condition(segment_offset)) { - bitset[segment_offset] = true; - } + bitset[segment_offset] = true; } } else { ThrowInfo(ErrorCode::Unsupported, @@ -2359,14 +2256,6 @@ ChunkedSegmentSealedImpl::Delete(int64_t size, return SegcoreError::success(); } -std::string -ChunkedSegmentSealedImpl::debug() const { - std::string log_str; - log_str += "Sealed\n"; - log_str += "\n"; - return log_str; -} - void ChunkedSegmentSealedImpl::LoadSegmentMeta( const proto::segcore::LoadSegmentMeta& segment_meta) { diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h index 4f1a05c1f9..065c055bde 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h @@ -206,30 +206,17 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { const Schema& get_schema() const override; - std::vector - search_pk(milvus::OpContext* op_ctx, - const PkType& pk, - Timestamp timestamp) const override; - - template - std::vector - search_sorted_pk(milvus::OpContext* op_ctx, - const PkType& pk, - Condition condition) const; - void pk_range(milvus::OpContext* op_ctx, proto::plan::OpType op, const PkType& pk, BitsetTypeView& bitset) const override; - template void search_sorted_pk_range(milvus::OpContext* op_ctx, proto::plan::OpType op, const PkType& pk, - BitsetTypeView& bitset, - Condition condition) const; + BitsetTypeView& bitset) const; std::unique_ptr get_vector(milvus::OpContext* op_ctx, @@ -280,9 +267,6 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { int64_t num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override; - std::string - debug() const override; - SegcoreError Delete(int64_t size, const IdArray* pks, diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 3ed1ed87b4..a6980e3944 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -1184,11 +1184,6 @@ SegmentGrowingImpl::search_ids(BitsetType& bitset, } } -std::string -SegmentGrowingImpl::debug() const { - return "Growing\n"; -} - int64_t SegmentGrowingImpl::get_active_count(Timestamp ts) const { auto row_count = this->get_row_count(); diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index b7ee3dbd37..a39baf950c 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -68,9 +68,6 @@ class SegmentGrowingImpl : public SegmentGrowing { void LoadFieldData(const LoadFieldDataInfo& info) override; - std::string - debug() const override; - int64_t get_segment_id() const override { return id_; @@ -457,13 +454,6 @@ class SegmentGrowingImpl : public SegmentGrowing { return false; } - std::vector - search_pk(milvus::OpContext* op_ctx, - const PkType& pk, - Timestamp timestamp) const override { - return insert_record_.search_pk(pk, timestamp); - } - void pk_range(milvus::OpContext* op_ctx, proto::plan::OpType op, diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 6d572b8c57..34c6a14658 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -333,9 +333,6 @@ class SegmentInternalInterface : public SegmentInterface { virtual bool HasIndex(FieldId field_id) const = 0; - virtual std::string - debug() const = 0; - int64_t get_real_count() const override; @@ -588,11 +585,6 @@ class SegmentInternalInterface : public SegmentInterface { int64_t count, const std::vector& dynamic_field_names) const = 0; - virtual std::vector - search_pk(milvus::OpContext* op_ctx, - const PkType& pk, - Timestamp timestamp) const = 0; - virtual void pk_range(milvus::OpContext* op_ctx, proto::plan::OpType op, diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index 1ec5bc5b24..1b126f6a7f 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -2280,31 +2280,6 @@ TEST(Sealed, QueryAllNullableFields) { EXPECT_EQ(float_array_result->valid_data_size(), dataset_size); } -TEST(Sealed, SearchSortedPk) { - auto schema = std::make_shared(); - auto varchar_pk_field = schema->AddDebugField("pk", DataType::VARCHAR); - schema->set_primary_field_id(varchar_pk_field); - auto segment_sealed = CreateSealedSegment( - schema, nullptr, 999, SegcoreConfig::default_config(), true); - auto segment = - dynamic_cast(segment_sealed.get()); - - int64_t dataset_size = 1000; - auto dataset = DataGen(schema, dataset_size, 42, 0, 10); - LoadGeneratedDataIntoSegment(dataset, segment); - - auto pk_values = dataset.get_col(varchar_pk_field); - auto offsets = - segment->search_pk(nullptr, PkType(pk_values[100]), Timestamp(99999)); - EXPECT_EQ(10, offsets.size()); - EXPECT_EQ(100, offsets[0].get()); - - auto offsets2 = - segment->search_pk(nullptr, PkType(pk_values[100]), int64_t(105)); - EXPECT_EQ(6, offsets2.size()); - EXPECT_EQ(100, offsets2[0].get()); -} - using VectorArrayTestParam = std::tuple;