From 6bbed3b019ff82fea2311d63598fdfdba49e5f03 Mon Sep 17 00:00:00 2001 From: congqixia Date: Thu, 10 Jul 2025 21:26:48 +0800 Subject: [PATCH] fix: [AddField] Add shared_lock for insert prevent race (#43229) Related to #43113 When schema change happens, insert shall not happen, otherwise: - Data race may happen causing insertion failure - Inconsistent data schema This PR add shared_lock prevent this data race. --------- Signed-off-by: Congqi Xia --- internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp | 9 +++++---- internal/core/src/segcore/SegmentGrowingImpl.cpp | 7 ++++++- internal/core/src/segcore/SegmentInterface.h | 4 ++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp index 8d64e26294..caea5c5cf4 100644 --- a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -357,10 +357,11 @@ ChunkedSegmentSealedImpl::load_field_data_internal( auto field_id = FieldId(id); - auto field_data_info = FieldDataInfo(field_id.get(), - num_rows, - load_info.mmap_dir_path, - schema_->ShouldLoadField(field_id)); + auto field_data_info = + FieldDataInfo(field_id.get(), + num_rows, + load_info.mmap_dir_path, + schema_->ShouldLoadField(field_id)); LOG_INFO("segment {} loads field {} with num_rows {}, sorted by pk {}", this->get_segment_id(), field_id.get(), diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 6b326a14f1..77d6a24b06 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -90,6 +90,11 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset, InsertRecordProto* insert_record_proto) { AssertInfo(insert_record_proto->num_rows() == num_rows, "Entities_raw count not equal to insert size"); + // protect schema being changed during insert + // schema change cannot happends during insertion, + // otherwise, there might be some data not following new schema + std::shared_lock lck(sch_mutex_); + // step 1: check insert data if valid std::unordered_map field_id_to_offset; int64_t field_offset = 0; @@ -1263,7 +1268,7 @@ SegmentGrowingImpl::LazyCheckSchema(SchemaPtr sch) { void SegmentGrowingImpl::Reopen(SchemaPtr sch) { - std::unique_lock lck(mutex_); + std::unique_lock lck(sch_mutex_); auto absent_fields = sch->AbsentFields(*schema_); diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 33d4f55727..1471839b46 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -546,6 +547,9 @@ class SegmentInternalInterface : public SegmentInterface { search_pk(const PkType& pk, Timestamp timestamp) const = 0; protected: + // mutex protecting rw options on schema_ + std::shared_mutex sch_mutex_; + mutable std::shared_mutex mutex_; // fieldID -> std::pair std::unordered_map>