mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
related: #43544 pr: https://github.com/milvus-io/milvus/pull/43705 Signed-off-by: MrPresent-Han <chun.han@gmail.com> Co-authored-by: MrPresent-Han <chun.han@gmail.com>
This commit is contained in:
parent
1a2871b628
commit
f033294dc1
@ -251,6 +251,20 @@ class ArrayChunk : public Chunk {
|
||||
offsets_ptr);
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ViewsByOffsets(const FixedVector<int32_t>& offsets) {
|
||||
std::vector<ArrayView> views;
|
||||
FixedVector<bool> valid_res;
|
||||
size_t size = offsets.size();
|
||||
views.reserve(size);
|
||||
valid_res.reserve(size);
|
||||
for (auto i = 0; i < size; ++i) {
|
||||
views.emplace_back(View(offsets[i]));
|
||||
valid_res.emplace_back(isValid(offsets[i]));
|
||||
}
|
||||
return {std::move(views), std::move(valid_res)};
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
Views(std::optional<std::pair<int64_t, int64_t>> offset_len =
|
||||
std::nullopt) const {
|
||||
|
||||
@ -500,7 +500,8 @@ class SegmentExpr : public Expr {
|
||||
if (segment_->type() == SegmentType::Sealed) {
|
||||
if (segment_->is_chunked()) {
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json>) {
|
||||
std::is_same_v<T, Json> ||
|
||||
std::is_same_v<T, ArrayView>) {
|
||||
for (size_t i = 0; i < input->size(); ++i) {
|
||||
int64_t offset = (*input)[i];
|
||||
auto [chunk_id, chunk_offset] =
|
||||
@ -557,7 +558,8 @@ class SegmentExpr : public Expr {
|
||||
return input->size();
|
||||
} else {
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json>) {
|
||||
std::is_same_v<T, Json> ||
|
||||
std::is_same_v<T, ArrayView>) {
|
||||
return ProcessDataByOffsetsForSealedSeg<T>(
|
||||
func, skip_func, input, res, valid_res, values...);
|
||||
}
|
||||
@ -619,7 +621,6 @@ class SegmentExpr : public Expr {
|
||||
TargetBitmapView valid_res,
|
||||
ValTypes... values) {
|
||||
int64_t processed_size = 0;
|
||||
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json>) {
|
||||
if (segment_->type() == SegmentType::Sealed) {
|
||||
@ -742,7 +743,8 @@ class SegmentExpr : public Expr {
|
||||
} else {
|
||||
const bool* valid_data;
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json>) {
|
||||
std::is_same_v<T, Json> ||
|
||||
std::is_same_v<T, ArrayView>) {
|
||||
if (segment_->type() == SegmentType::Sealed) {
|
||||
auto batch_views = segment_->get_batch_views<T>(
|
||||
field_id_, i, data_pos, size);
|
||||
|
||||
@ -25,7 +25,6 @@ void
|
||||
PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
auto input = context.get_offset_input();
|
||||
SetHasOffsetInput((input != nullptr));
|
||||
|
||||
if (expr_->vals_.empty()) {
|
||||
auto real_batch_size = has_offset_input_
|
||||
? context.get_offset_input()->size()
|
||||
|
||||
@ -168,12 +168,19 @@ class ChunkedColumnBase : public ColumnBase {
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
ViewsByOffsets(int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
StringViewsByOffsets(int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"viewsbyoffsets only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ArrayViewsByOffsets(int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"viewsbyoffsets only supported for ArrayColumn");
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t>
|
||||
GetChunkIDByOffset(int64_t offset) const {
|
||||
AssertInfo(offset < num_rows_,
|
||||
@ -355,8 +362,8 @@ class ChunkedVariableColumn : public ChunkedColumnBase {
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
ViewsByOffsets(int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override {
|
||||
StringViewsByOffsets(int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override {
|
||||
return std::static_pointer_cast<StringChunk>(chunks_[chunk_id])
|
||||
->ViewsByOffsets(offsets);
|
||||
}
|
||||
@ -444,5 +451,12 @@ class ChunkedArrayColumn : public ChunkedColumnBase {
|
||||
return std::dynamic_pointer_cast<ArrayChunk>(chunks_[chunk_id])
|
||||
->Views(offset_len);
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ArrayViewsByOffsets(int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override {
|
||||
return std::dynamic_pointer_cast<ArrayChunk>(chunks_[chunk_id])
|
||||
->ViewsByOffsets(offsets);
|
||||
}
|
||||
};
|
||||
} // namespace milvus
|
||||
@ -331,11 +331,17 @@ class SingleChunkColumnBase : public ColumnBase {
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
ViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||
StringViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"viewsbyoffsets only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ArrayViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"viewsbyoffsets only supported for ArrayColumn");
|
||||
}
|
||||
|
||||
virtual std::string_view
|
||||
RawAt(const size_t i) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
@ -719,7 +725,7 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase {
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
ViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||
StringViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||
std::vector<std::string_view> res;
|
||||
FixedVector<bool> valid;
|
||||
res.reserve(offsets.size());
|
||||
@ -728,7 +734,7 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase {
|
||||
res.emplace_back(RawAt(offset));
|
||||
valid.emplace_back(IsValid(offset));
|
||||
}
|
||||
return {res, valid};
|
||||
return {std::move(res), std::move(valid)};
|
||||
}
|
||||
|
||||
[[nodiscard]] std::vector<ViewType>
|
||||
@ -973,6 +979,19 @@ class SingleChunkArrayColumn : public SingleChunkColumnBase {
|
||||
return {Views(), valid_data_};
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ArrayViewsByOffsets(const FixedVector<int32_t>& offsets) const override {
|
||||
std::vector<ArrayView> views;
|
||||
FixedVector<bool> valid;
|
||||
views.reserve(offsets.size());
|
||||
valid.reserve(offsets.size());
|
||||
for (auto offset : offsets) {
|
||||
views.emplace_back(this->operator[](offset));
|
||||
valid.emplace_back(IsValid(offset));
|
||||
}
|
||||
return {std::move(views), std::move(valid)};
|
||||
}
|
||||
|
||||
protected:
|
||||
void
|
||||
ConstructViews() {
|
||||
|
||||
@ -774,7 +774,7 @@ ChunkedSegmentSealedImpl::chunk_string_view_impl(
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
ChunkedSegmentSealedImpl::chunk_view_by_offsets(
|
||||
ChunkedSegmentSealedImpl::chunk_string_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
@ -783,10 +783,28 @@ ChunkedSegmentSealedImpl::chunk_view_by_offsets(
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||
auto& field_data = it->second;
|
||||
return field_data->ViewsByOffsets(chunk_id, offsets);
|
||||
return field_data->StringViewsByOffsets(chunk_id, offsets);
|
||||
}
|
||||
PanicInfo(ErrorCode::UnexpectedError,
|
||||
"chunk_view_by_offsets only used for variable column field ");
|
||||
PanicInfo(
|
||||
ErrorCode::UnexpectedError,
|
||||
"chunk_string_views_by_offsets only used for variable column field ");
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ChunkedSegmentSealedImpl::chunk_array_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||
auto& field_data = it->second;
|
||||
return field_data->ArrayViewsByOffsets(chunk_id, offsets);
|
||||
}
|
||||
PanicInfo(
|
||||
ErrorCode::UnexpectedError,
|
||||
"chunk_array_views_by_offsets only used for variable column field ");
|
||||
}
|
||||
|
||||
const index::IndexBase*
|
||||
|
||||
@ -246,9 +246,16 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
||||
std::optional<std::pair<int64_t, int64_t>> offset_len) const override;
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_by_offsets(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
chunk_string_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
chunk_array_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
|
||||
std::pair<BufferView, FixedVector<bool>>
|
||||
get_chunk_buffer(FieldId field_id,
|
||||
|
||||
@ -419,12 +419,23 @@ SegmentGrowingImpl::chunk_array_view_impl(
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
SegmentGrowingImpl::chunk_view_by_offsets(
|
||||
SegmentGrowingImpl::chunk_string_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
PanicInfo(ErrorCode::NotImplemented,
|
||||
"chunk view by offsets not implemented for growing segment");
|
||||
PanicInfo(
|
||||
ErrorCode::NotImplemented,
|
||||
"chunk string views by offsets not implemented for growing segment");
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
SegmentGrowingImpl::chunk_array_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
PanicInfo(
|
||||
ErrorCode::NotImplemented,
|
||||
"chunk array views by offsets not implemented for growing segment");
|
||||
}
|
||||
|
||||
int64_t
|
||||
|
||||
@ -376,9 +376,16 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
std::optional<std::pair<int64_t, int64_t>> offset_len) const override;
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_by_offsets(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
chunk_string_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
chunk_array_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
|
||||
std::pair<BufferView, FixedVector<bool>>
|
||||
get_chunk_buffer(FieldId field_id,
|
||||
|
||||
@ -212,16 +212,19 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"get chunk views not supported for growing segment");
|
||||
}
|
||||
auto chunk_view = chunk_view_by_offsets(field_id, chunk_id, offsets);
|
||||
if constexpr (std::is_same_v<ViewType, std::string_view>) {
|
||||
return chunk_view;
|
||||
} else {
|
||||
return chunk_string_views_by_offsets(field_id, chunk_id, offsets);
|
||||
} else if constexpr (std::is_same_v<ViewType, Json>) {
|
||||
auto chunk_view =
|
||||
chunk_string_views_by_offsets(field_id, chunk_id, offsets);
|
||||
std::vector<ViewType> res;
|
||||
res.reserve(chunk_view.first.size());
|
||||
for (const auto& view : chunk_view.first) {
|
||||
res.emplace_back(view);
|
||||
}
|
||||
return {res, chunk_view.second};
|
||||
} else if constexpr (std::is_same_v<ViewType, ArrayView>) {
|
||||
return chunk_array_views_by_offsets(field_id, chunk_id, offsets);
|
||||
}
|
||||
}
|
||||
|
||||
@ -476,9 +479,15 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
int64_t length) const = 0;
|
||||
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_by_offsets(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const = 0;
|
||||
chunk_string_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const = 0;
|
||||
|
||||
virtual std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
chunk_array_views_by_offsets(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const = 0;
|
||||
|
||||
// internal API: return chunk_index in span, support scalar index only
|
||||
virtual const index::IndexBase*
|
||||
|
||||
@ -778,7 +778,7 @@ SegmentSealedImpl::chunk_array_view_impl(
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
SegmentSealedImpl::chunk_view_by_offsets(
|
||||
SegmentSealedImpl::chunk_string_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
@ -787,10 +787,27 @@ SegmentSealedImpl::chunk_view_by_offsets(
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||
auto& field_data = it->second;
|
||||
return field_data->ViewsByOffsets(offsets);
|
||||
return field_data->StringViewsByOffsets(offsets);
|
||||
}
|
||||
PanicInfo(
|
||||
ErrorCode::UnexpectedError,
|
||||
"chunk_string_views_by_offsets only used for variable column field ");
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
SegmentSealedImpl::chunk_array_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||
auto& field_data = it->second;
|
||||
return field_data->ArrayViewsByOffsets(offsets);
|
||||
}
|
||||
PanicInfo(ErrorCode::UnexpectedError,
|
||||
"chunk_view_by_offsets only used for variable column field ");
|
||||
"chunk_array_views_by_offsets only used for array column field ");
|
||||
}
|
||||
|
||||
const index::IndexBase*
|
||||
|
||||
@ -246,9 +246,16 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
std::optional<std::pair<int64_t, int64_t>> offset_len) const override;
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_by_offsets(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
chunk_string_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
chunk_array_views_by_offsets(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
|
||||
std::pair<BufferView, FixedVector<bool>>
|
||||
get_chunk_buffer(FieldId field_id,
|
||||
|
||||
@ -354,7 +354,7 @@ func NewSegment(ctx context.Context,
|
||||
logger.Warn("create segment failed", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
log.Info("create segment done")
|
||||
logger.Info("create segment done")
|
||||
|
||||
segment := &LocalSegment{
|
||||
baseSegment: base,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user