From f033294dc1fa1fd62f589927f8b09a0cd58094f0 Mon Sep 17 00:00:00 2001 From: Chun Han <116052805+MrPresent-Han@users.noreply.github.com> Date: Mon, 4 Aug 2025 11:25:39 +0800 Subject: [PATCH] fix: try to get span raw data for variable length data type(#43544) (#43703) related: #43544 pr: https://github.com/milvus-io/milvus/pull/43705 Signed-off-by: MrPresent-Han Co-authored-by: MrPresent-Han --- internal/core/src/common/Chunk.h | 14 ++++++++++ internal/core/src/exec/expression/Expr.h | 10 ++++--- .../src/exec/expression/JsonContainsExpr.cpp | 1 - internal/core/src/mmap/ChunkedColumn.h | 22 +++++++++++++--- internal/core/src/mmap/Column.h | 25 +++++++++++++++--- .../src/segcore/ChunkedSegmentSealedImpl.cpp | 26 ++++++++++++++++--- .../src/segcore/ChunkedSegmentSealedImpl.h | 13 +++++++--- .../core/src/segcore/SegmentGrowingImpl.cpp | 17 +++++++++--- .../core/src/segcore/SegmentGrowingImpl.h | 13 +++++++--- internal/core/src/segcore/SegmentInterface.h | 21 ++++++++++----- .../core/src/segcore/SegmentSealedImpl.cpp | 23 +++++++++++++--- internal/core/src/segcore/SegmentSealedImpl.h | 13 +++++++--- internal/querynodev2/segments/segment.go | 2 +- 13 files changed, 162 insertions(+), 38 deletions(-) diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h index 919afb8120..4ffa1be643 100644 --- a/internal/core/src/common/Chunk.h +++ b/internal/core/src/common/Chunk.h @@ -251,6 +251,20 @@ class ArrayChunk : public Chunk { offsets_ptr); } + std::pair, FixedVector> + ViewsByOffsets(const FixedVector& offsets) { + std::vector views; + FixedVector valid_res; + size_t size = offsets.size(); + views.reserve(size); + valid_res.reserve(size); + for (auto i = 0; i < size; ++i) { + views.emplace_back(View(offsets[i])); + valid_res.emplace_back(isValid(offsets[i])); + } + return {std::move(views), std::move(valid_res)}; + } + std::pair, FixedVector> Views(std::optional> offset_len = std::nullopt) const { diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index ce82122842..8baf0816b4 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -500,7 +500,8 @@ class SegmentExpr : public Expr { if (segment_->type() == SegmentType::Sealed) { if (segment_->is_chunked()) { if constexpr (std::is_same_v || - std::is_same_v) { + std::is_same_v || + std::is_same_v) { for (size_t i = 0; i < input->size(); ++i) { int64_t offset = (*input)[i]; auto [chunk_id, chunk_offset] = @@ -557,7 +558,8 @@ class SegmentExpr : public Expr { return input->size(); } else { if constexpr (std::is_same_v || - std::is_same_v) { + std::is_same_v || + std::is_same_v) { return ProcessDataByOffsetsForSealedSeg( func, skip_func, input, res, valid_res, values...); } @@ -619,7 +621,6 @@ class SegmentExpr : public Expr { TargetBitmapView valid_res, ValTypes... values) { int64_t processed_size = 0; - if constexpr (std::is_same_v || std::is_same_v) { if (segment_->type() == SegmentType::Sealed) { @@ -742,7 +743,8 @@ class SegmentExpr : public Expr { } else { const bool* valid_data; if constexpr (std::is_same_v || - std::is_same_v) { + std::is_same_v || + std::is_same_v) { if (segment_->type() == SegmentType::Sealed) { auto batch_views = segment_->get_batch_views( field_id_, i, data_pos, size); diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index a57dc16e3d..0a73d7ea59 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -25,7 +25,6 @@ void PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto input = context.get_offset_input(); SetHasOffsetInput((input != nullptr)); - if (expr_->vals_.empty()) { auto real_batch_size = has_offset_input_ ? context.get_offset_input()->size() diff --git a/internal/core/src/mmap/ChunkedColumn.h b/internal/core/src/mmap/ChunkedColumn.h index dbb53bbe0b..4b882ff85f 100644 --- a/internal/core/src/mmap/ChunkedColumn.h +++ b/internal/core/src/mmap/ChunkedColumn.h @@ -168,12 +168,19 @@ class ChunkedColumnBase : public ColumnBase { } virtual std::pair, FixedVector> - ViewsByOffsets(int64_t chunk_id, - const FixedVector& offsets) const { + StringViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const { PanicInfo(ErrorCode::Unsupported, "viewsbyoffsets only supported for VariableColumn"); } + virtual std::pair, FixedVector> + ArrayViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const { + PanicInfo(ErrorCode::Unsupported, + "viewsbyoffsets only supported for ArrayColumn"); + } + std::pair GetChunkIDByOffset(int64_t offset) const { AssertInfo(offset < num_rows_, @@ -355,8 +362,8 @@ class ChunkedVariableColumn : public ChunkedColumnBase { } std::pair, FixedVector> - ViewsByOffsets(int64_t chunk_id, - const FixedVector& offsets) const override { + StringViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const override { return std::static_pointer_cast(chunks_[chunk_id]) ->ViewsByOffsets(offsets); } @@ -444,5 +451,12 @@ class ChunkedArrayColumn : public ChunkedColumnBase { return std::dynamic_pointer_cast(chunks_[chunk_id]) ->Views(offset_len); } + + std::pair, FixedVector> + ArrayViewsByOffsets(int64_t chunk_id, + const FixedVector& offsets) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->ViewsByOffsets(offsets); + } }; } // namespace milvus \ No newline at end of file diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index 06737a5426..2394b87837 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -331,11 +331,17 @@ class SingleChunkColumnBase : public ColumnBase { } virtual std::pair, FixedVector> - ViewsByOffsets(const FixedVector& offsets) const { + StringViewsByOffsets(const FixedVector& offsets) const { PanicInfo(ErrorCode::Unsupported, "viewsbyoffsets only supported for VariableColumn"); } + virtual std::pair, FixedVector> + ArrayViewsByOffsets(const FixedVector& offsets) const { + PanicInfo(ErrorCode::Unsupported, + "viewsbyoffsets only supported for ArrayColumn"); + } + virtual std::string_view RawAt(const size_t i) const { PanicInfo(ErrorCode::Unsupported, @@ -719,7 +725,7 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase { } std::pair, FixedVector> - ViewsByOffsets(const FixedVector& offsets) const { + StringViewsByOffsets(const FixedVector& offsets) const { std::vector res; FixedVector valid; res.reserve(offsets.size()); @@ -728,7 +734,7 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase { res.emplace_back(RawAt(offset)); valid.emplace_back(IsValid(offset)); } - return {res, valid}; + return {std::move(res), std::move(valid)}; } [[nodiscard]] std::vector @@ -973,6 +979,19 @@ class SingleChunkArrayColumn : public SingleChunkColumnBase { return {Views(), valid_data_}; } + std::pair, FixedVector> + ArrayViewsByOffsets(const FixedVector& offsets) const override { + std::vector views; + FixedVector valid; + views.reserve(offsets.size()); + valid.reserve(offsets.size()); + for (auto offset : offsets) { + views.emplace_back(this->operator[](offset)); + valid.emplace_back(IsValid(offset)); + } + return {std::move(views), std::move(valid)}; + } + protected: void ConstructViews() { diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index ce89edb2b7..2f32f3608e 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -774,7 +774,7 @@ ChunkedSegmentSealedImpl::chunk_string_view_impl( } std::pair, FixedVector> -ChunkedSegmentSealedImpl::chunk_view_by_offsets( +ChunkedSegmentSealedImpl::chunk_string_views_by_offsets( FieldId field_id, int64_t chunk_id, const FixedVector& offsets) const { @@ -783,10 +783,28 @@ ChunkedSegmentSealedImpl::chunk_view_by_offsets( "Can't get bitset element at " + std::to_string(field_id.get())); if (auto it = fields_.find(field_id); it != fields_.end()) { auto& field_data = it->second; - return field_data->ViewsByOffsets(chunk_id, offsets); + return field_data->StringViewsByOffsets(chunk_id, offsets); } - PanicInfo(ErrorCode::UnexpectedError, - "chunk_view_by_offsets only used for variable column field "); + PanicInfo( + ErrorCode::UnexpectedError, + "chunk_string_views_by_offsets only used for variable column field "); +} + +std::pair, FixedVector> +ChunkedSegmentSealedImpl::chunk_array_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->ArrayViewsByOffsets(chunk_id, offsets); + } + PanicInfo( + ErrorCode::UnexpectedError, + "chunk_array_views_by_offsets only used for variable column field "); } const index::IndexBase* diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h index 1b724ae0b2..1668367187 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h @@ -246,9 +246,16 @@ class ChunkedSegmentSealedImpl : public SegmentSealed { std::optional> offset_len) const override; std::pair, FixedVector> - chunk_view_by_offsets(FieldId field_id, - int64_t chunk_id, - const FixedVector& offsets) const override; + chunk_string_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + + std::pair, FixedVector> + chunk_array_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; std::pair> get_chunk_buffer(FieldId field_id, diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index abee12e993..07bb648c8a 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -419,12 +419,23 @@ SegmentGrowingImpl::chunk_array_view_impl( } std::pair, FixedVector> -SegmentGrowingImpl::chunk_view_by_offsets( +SegmentGrowingImpl::chunk_string_views_by_offsets( FieldId field_id, int64_t chunk_id, const FixedVector& offsets) const { - PanicInfo(ErrorCode::NotImplemented, - "chunk view by offsets not implemented for growing segment"); + PanicInfo( + ErrorCode::NotImplemented, + "chunk string views by offsets not implemented for growing segment"); +} + +std::pair, FixedVector> +SegmentGrowingImpl::chunk_array_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + PanicInfo( + ErrorCode::NotImplemented, + "chunk array views by offsets not implemented for growing segment"); } int64_t diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 2e00535e6a..56f6608762 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -376,9 +376,16 @@ class SegmentGrowingImpl : public SegmentGrowing { std::optional> offset_len) const override; std::pair, FixedVector> - chunk_view_by_offsets(FieldId field_id, - int64_t chunk_id, - const FixedVector& offsets) const override; + chunk_string_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + + std::pair, FixedVector> + chunk_array_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; std::pair> get_chunk_buffer(FieldId field_id, diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index e07ef02a0b..9023a36acc 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -212,16 +212,19 @@ class SegmentInternalInterface : public SegmentInterface { PanicInfo(ErrorCode::Unsupported, "get chunk views not supported for growing segment"); } - auto chunk_view = chunk_view_by_offsets(field_id, chunk_id, offsets); if constexpr (std::is_same_v) { - return chunk_view; - } else { + return chunk_string_views_by_offsets(field_id, chunk_id, offsets); + } else if constexpr (std::is_same_v) { + auto chunk_view = + chunk_string_views_by_offsets(field_id, chunk_id, offsets); std::vector res; res.reserve(chunk_view.first.size()); for (const auto& view : chunk_view.first) { res.emplace_back(view); } return {res, chunk_view.second}; + } else if constexpr (std::is_same_v) { + return chunk_array_views_by_offsets(field_id, chunk_id, offsets); } } @@ -476,9 +479,15 @@ class SegmentInternalInterface : public SegmentInterface { int64_t length) const = 0; virtual std::pair, FixedVector> - chunk_view_by_offsets(FieldId field_id, - int64_t chunk_id, - const FixedVector& offsets) const = 0; + chunk_string_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const = 0; + + virtual std::pair, FixedVector> + chunk_array_views_by_offsets(FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const = 0; // internal API: return chunk_index in span, support scalar index only virtual const index::IndexBase* diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index ae8a2174ca..de2aebdbb5 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -778,7 +778,7 @@ SegmentSealedImpl::chunk_array_view_impl( } std::pair, FixedVector> -SegmentSealedImpl::chunk_view_by_offsets( +SegmentSealedImpl::chunk_string_views_by_offsets( FieldId field_id, int64_t chunk_id, const FixedVector& offsets) const { @@ -787,10 +787,27 @@ SegmentSealedImpl::chunk_view_by_offsets( "Can't get bitset element at " + std::to_string(field_id.get())); if (auto it = fields_.find(field_id); it != fields_.end()) { auto& field_data = it->second; - return field_data->ViewsByOffsets(offsets); + return field_data->StringViewsByOffsets(offsets); + } + PanicInfo( + ErrorCode::UnexpectedError, + "chunk_string_views_by_offsets only used for variable column field "); +} + +std::pair, FixedVector> +SegmentSealedImpl::chunk_array_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->ArrayViewsByOffsets(offsets); } PanicInfo(ErrorCode::UnexpectedError, - "chunk_view_by_offsets only used for variable column field "); + "chunk_array_views_by_offsets only used for array column field "); } const index::IndexBase* diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index bcd1b22672..b33208571e 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -246,9 +246,16 @@ class SegmentSealedImpl : public SegmentSealed { std::optional> offset_len) const override; std::pair, FixedVector> - chunk_view_by_offsets(FieldId field_id, - int64_t chunk_id, - const FixedVector& offsets) const override; + chunk_string_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; + + std::pair, FixedVector> + chunk_array_views_by_offsets( + FieldId field_id, + int64_t chunk_id, + const FixedVector& offsets) const override; std::pair> get_chunk_buffer(FieldId field_id, diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 63d5f5ba08..66fccf4396 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -354,7 +354,7 @@ func NewSegment(ctx context.Context, logger.Warn("create segment failed", zap.Error(err)) return nil, err } - log.Info("create segment done") + logger.Info("create segment done") segment := &LocalSegment{ baseSegment: base,