mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
enhance: add ScalarFieldProto& overload to avoid unnecessary copies (#45743)
1. Array.h: Add output_data(ScalarFieldProto&) overload for both Array and ArrayView classes 2. Use std::string_view instead of std::string for VARCHAR and GEOMETRY types to avoid extra string copies 3. Call Reserve(length_) before writing to proto objects to reduce memory reallocations a simple test shows those optimizations improve the Array of Varchar bulk_subscript performance by 20% issue: https://github.com/milvus-io/milvus/issues/45679 Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
parent
f51fcc09ae
commit
e00ad1098f
@ -296,11 +296,12 @@ class Array {
|
||||
return offsets_ptr_.get();
|
||||
}
|
||||
|
||||
ScalarFieldProto
|
||||
output_data() const {
|
||||
ScalarFieldProto data_array;
|
||||
void
|
||||
output_data(ScalarFieldProto& data_array) const {
|
||||
switch (element_type_) {
|
||||
case DataType::BOOL: {
|
||||
data_array.mutable_bool_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<bool>(j);
|
||||
data_array.mutable_bool_data()->add_data(element);
|
||||
@ -310,6 +311,7 @@ class Array {
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32: {
|
||||
data_array.mutable_int_data()->mutable_data()->Reserve(length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<int>(j);
|
||||
data_array.mutable_int_data()->add_data(element);
|
||||
@ -317,6 +319,8 @@ class Array {
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
data_array.mutable_long_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<int64_t>(j);
|
||||
data_array.mutable_long_data()->add_data(element);
|
||||
@ -325,13 +329,18 @@ class Array {
|
||||
}
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR: {
|
||||
data_array.mutable_string_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<std::string>(j);
|
||||
data_array.mutable_string_data()->add_data(element);
|
||||
auto element = get_data<std::string_view>(j);
|
||||
data_array.mutable_string_data()->add_data(element.data(),
|
||||
element.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
data_array.mutable_float_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<float>(j);
|
||||
data_array.mutable_float_data()->add_data(element);
|
||||
@ -339,6 +348,8 @@ class Array {
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
data_array.mutable_double_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<double>(j);
|
||||
data_array.mutable_double_data()->add_data(element);
|
||||
@ -346,9 +357,12 @@ class Array {
|
||||
break;
|
||||
}
|
||||
case DataType::GEOMETRY: {
|
||||
data_array.mutable_geometry_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<std::string>(j);
|
||||
data_array.mutable_geometry_data()->add_data(element);
|
||||
auto element = get_data<std::string_view>(j);
|
||||
data_array.mutable_geometry_data()->add_data(
|
||||
element.data(), element.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -356,6 +370,12 @@ class Array {
|
||||
// empty array
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ScalarFieldProto
|
||||
output_data() const {
|
||||
ScalarFieldProto data_array;
|
||||
output_data(data_array);
|
||||
return data_array;
|
||||
}
|
||||
|
||||
@ -541,11 +561,12 @@ class ArrayView {
|
||||
return reinterpret_cast<T*>(data_)[index];
|
||||
}
|
||||
|
||||
ScalarFieldProto
|
||||
output_data() const {
|
||||
ScalarFieldProto data_array;
|
||||
void
|
||||
output_data(ScalarFieldProto& data_array) const {
|
||||
switch (element_type_) {
|
||||
case DataType::BOOL: {
|
||||
data_array.mutable_bool_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<bool>(j);
|
||||
data_array.mutable_bool_data()->add_data(element);
|
||||
@ -555,6 +576,7 @@ class ArrayView {
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
case DataType::INT32: {
|
||||
data_array.mutable_int_data()->mutable_data()->Reserve(length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<int>(j);
|
||||
data_array.mutable_int_data()->add_data(element);
|
||||
@ -562,6 +584,8 @@ class ArrayView {
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
data_array.mutable_long_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<int64_t>(j);
|
||||
data_array.mutable_long_data()->add_data(element);
|
||||
@ -570,13 +594,18 @@ class ArrayView {
|
||||
}
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR: {
|
||||
data_array.mutable_string_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<std::string>(j);
|
||||
data_array.mutable_string_data()->add_data(element);
|
||||
auto element = get_data<std::string_view>(j);
|
||||
data_array.mutable_string_data()->add_data(element.data(),
|
||||
element.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
data_array.mutable_float_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<float>(j);
|
||||
data_array.mutable_float_data()->add_data(element);
|
||||
@ -584,6 +613,8 @@ class ArrayView {
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
data_array.mutable_double_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<double>(j);
|
||||
data_array.mutable_double_data()->add_data(element);
|
||||
@ -591,9 +622,12 @@ class ArrayView {
|
||||
break;
|
||||
}
|
||||
case DataType::GEOMETRY: {
|
||||
data_array.mutable_geometry_data()->mutable_data()->Reserve(
|
||||
length_);
|
||||
for (int j = 0; j < length_; ++j) {
|
||||
auto element = get_data<std::string>(j);
|
||||
data_array.mutable_geometry_data()->add_data(element);
|
||||
auto element = get_data<std::string_view>(j);
|
||||
data_array.mutable_geometry_data()->add_data(
|
||||
element.data(), element.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -601,6 +635,12 @@ class ArrayView {
|
||||
// empty array
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ScalarFieldProto
|
||||
output_data() const {
|
||||
ScalarFieldProto data_array;
|
||||
output_data(data_array);
|
||||
return data_array;
|
||||
}
|
||||
|
||||
|
||||
@ -629,16 +629,15 @@ class ChunkedArrayColumn : public ChunkedColumnBase {
|
||||
|
||||
void
|
||||
BulkArrayAt(milvus::OpContext* op_ctx,
|
||||
std::function<void(ScalarFieldProto&&, size_t)> fn,
|
||||
std::function<void(const ArrayView&, size_t)> fn,
|
||||
const int64_t* offsets,
|
||||
int64_t count) const override {
|
||||
auto [cids, offsets_in_chunk] = ToChunkIdAndOffset(offsets, count);
|
||||
auto ca = SemiInlineGet(slot_->PinCells(op_ctx, cids));
|
||||
for (int64_t i = 0; i < count; i++) {
|
||||
auto array = static_cast<ArrayChunk*>(ca->get_cell_of(cids[i]))
|
||||
->View(offsets_in_chunk[i])
|
||||
.output_data();
|
||||
fn(std::move(array), i);
|
||||
auto view = static_cast<ArrayChunk*>(ca->get_cell_of(cids[i]))
|
||||
->View(offsets_in_chunk[i]);
|
||||
fn(view, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -626,7 +626,7 @@ class ProxyChunkColumn : public ChunkedColumnInterface {
|
||||
|
||||
void
|
||||
BulkArrayAt(milvus::OpContext* op_ctx,
|
||||
std::function<void(ScalarFieldProto&&, size_t)> fn,
|
||||
std::function<void(const ArrayView&, size_t)> fn,
|
||||
const int64_t* offsets,
|
||||
int64_t count) const override {
|
||||
if (!IsChunkedArrayColumnDataType(data_type_)) {
|
||||
@ -639,10 +639,9 @@ class ProxyChunkColumn : public ChunkedColumnInterface {
|
||||
for (int64_t i = 0; i < count; i++) {
|
||||
auto* group_chunk = ca->get_cell_of(cids[i]);
|
||||
auto chunk = group_chunk->GetChunk(field_id_);
|
||||
auto array = static_cast<ArrayChunk*>(chunk.get())
|
||||
->View(offsets_in_chunk[i])
|
||||
.output_data();
|
||||
fn(std::move(array), i);
|
||||
auto view = static_cast<ArrayChunk*>(chunk.get())
|
||||
->View(offsets_in_chunk[i]);
|
||||
fn(view, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -187,7 +187,7 @@ class ChunkedColumnInterface {
|
||||
|
||||
virtual void
|
||||
BulkArrayAt(milvus::OpContext* op_ctx,
|
||||
std::function<void(ScalarFieldProto&&, size_t)> fn,
|
||||
std::function<void(const ArrayView&, size_t)> fn,
|
||||
const int64_t* offsets,
|
||||
int64_t count) const {
|
||||
ThrowInfo(ErrorCode::Unsupported,
|
||||
|
||||
@ -1678,8 +1678,8 @@ ChunkedSegmentSealedImpl::bulk_subscript_array_impl(
|
||||
google::protobuf::RepeatedPtrField<T>* dst) {
|
||||
column->BulkArrayAt(
|
||||
op_ctx,
|
||||
[dst](ScalarFieldProto&& array, size_t i) {
|
||||
dst->at(i) = std::move(array);
|
||||
[dst](const ArrayView& view, size_t i) {
|
||||
view.output_data(dst->at(i));
|
||||
},
|
||||
seg_offsets,
|
||||
count);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user