From 005d178a0ebae8e35d27a33afa1896c724dc383a Mon Sep 17 00:00:00 2001 From: yah01 Date: Mon, 20 Mar 2023 10:19:56 +0800 Subject: [PATCH] Optimize performance of insert & query & search (#22829) - Reduce 1x copy of inserting int8/int16 into growing segment - Reduce 1x copy of retrieving primary keys - Reduce 1x copy of inserting/loading/deleting/filtering primary keys - Reduce 1x copy of reducing string results Signed-off-by: yah01 --- internal/core/src/segcore/ConcurrentVector.cpp | 8 ++++---- internal/core/src/segcore/SegmentInterface.cpp | 7 ++++--- internal/core/src/segcore/Utils.cpp | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/internal/core/src/segcore/ConcurrentVector.cpp b/internal/core/src/segcore/ConcurrentVector.cpp index 32cee42b83..88ef77be05 100644 --- a/internal/core/src/segcore/ConcurrentVector.cpp +++ b/internal/core/src/segcore/ConcurrentVector.cpp @@ -39,13 +39,13 @@ VectorBase::set_data_raw(ssize_t element_offset, element_count); } case DataType::INT8: { - auto src_data = data->scalars().int_data().data(); + auto& src_data = data->scalars().int_data().data(); std::vector data_raw(src_data.size()); std::copy_n(src_data.data(), src_data.size(), data_raw.data()); return set_data_raw(element_offset, data_raw.data(), element_count); } case DataType::INT16: { - auto src_data = data->scalars().int_data().data(); + auto& src_data = data->scalars().int_data().data(); std::vector data_raw(src_data.size()); std::copy_n(src_data.data(), src_data.size(), data_raw.data()); return set_data_raw(element_offset, data_raw.data(), element_count); @@ -104,13 +104,13 @@ VectorBase::fill_chunk_data(ssize_t element_count, element_count); } case DataType::INT8: { - auto src_data = data->scalars().int_data().data(); + auto& src_data = data->scalars().int_data().data(); std::vector data_raw(src_data.size()); std::copy_n(src_data.data(), src_data.size(), data_raw.data()); return fill_chunk_data(data_raw.data(), element_count); } case DataType::INT16: { - auto src_data = data->scalars().int_data().data(); + auto& src_data = data->scalars().int_data().data(); std::vector data_raw(src_data.size()); std::copy_n(src_data.data(), src_data.size(), data_raw.data()); return fill_chunk_data(data_raw.data(), element_count); diff --git a/internal/core/src/segcore/SegmentInterface.cpp b/internal/core/src/segcore/SegmentInterface.cpp index 6cf7df460c..ba99745609 100644 --- a/internal/core/src/segcore/SegmentInterface.cpp +++ b/internal/core/src/segcore/SegmentInterface.cpp @@ -133,16 +133,17 @@ SegmentInternalInterface::Retrieve(const query::RetrievePlan* plan, switch (field_meta.get_data_type()) { case DataType::INT64: { auto int_ids = ids->mutable_int_id(); - auto src_data = col_data->scalars().long_data(); + auto& src_data = col_data->scalars().long_data(); int_ids->mutable_data()->Add(src_data.data().begin(), src_data.data().end()); break; } case DataType::VARCHAR: { auto str_ids = ids->mutable_str_id(); - auto src_data = col_data->scalars().string_data(); - for (auto i = 0; i < src_data.data_size(); ++i) + auto& src_data = col_data->scalars().string_data(); + for (auto i = 0; i < src_data.data_size(); ++i) { *(str_ids->mutable_data()->Add()) = src_data.data(i); + } break; } default: { diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index d3081bbce1..89cfcc57e3 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -25,7 +25,7 @@ ParsePksFromFieldData(std::vector& pks, const DataArray& data) { break; } case DataType::VARCHAR: { - auto src_data = data.scalars().string_data().data(); + auto& src_data = data.scalars().string_data().data(); std::copy(src_data.begin(), src_data.end(), pks.begin()); break; } @@ -47,7 +47,7 @@ ParsePksFromIDs(std::vector& pks, break; } case DataType::VARCHAR: { - auto source_data = data.str_id().data(); + auto& source_data = data.str_id().data(); std::copy(source_data.begin(), source_data.end(), pks.begin()); break; } @@ -363,7 +363,7 @@ MergeDataArray( continue; } case DataType::VARCHAR: { - auto data = src_field_data->scalars().string_data(); + auto& data = src_field_data->scalars().string_data(); auto obj = scalar_array->mutable_string_data(); *(obj->mutable_data()->Add()) = data.data(src_offset); continue;