From 8780e12570c9f7c777eda1a37e774ad4dc895a34 Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 10 Dec 2025 16:25:13 +0800 Subject: [PATCH] fix: use assertion instead of modifying schema under shared lock (#46242) Related to #46225 Replace the heterogeneous insert data handling logic that modified schema_ while holding a shared lock with an assertion. The previous implementation had a concurrency bug where schema modification operations were performed under a shared_lock, which violates mutex semantics and can lead to data races. Issue: #46225 reported two problems: 1. Schema modification under shared_lock (not exclusive lock) 2. Access to schema_ not protected by mutex in growing segment The removed code attempted to handle "added fields" by: - Adding new field to schema (schema_->AddField) - Appending field metadata to insert_record_ - Setting default data for existing rows All these write operations were performed while holding only a shared_lock, which is incorrect since shared_locks are meant for read-only operations. This fix replaces the unsafe modification with an assertion that fails if an unexpected new field is encountered in a growing segment with existing data. The proper handling of schema changes should go through the Reopen() path which correctly acquires a unique_lock before modifying schema_. Signed-off-by: Congqi Xia --- .../core/src/segcore/SegmentGrowingImpl.cpp | 24 ++++--------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index ce2f285eaf..de65712b93 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -108,26 +108,10 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset, auto field_id = FieldId(field.field_id()); AssertInfo(!field_id_to_offset.count(field_id), "duplicate field data"); field_id_to_offset.emplace(field_id, field_offset++); - // may be added field, add the null if has existed data - if (exist_rows > 0 && !insert_record_.is_data_exist(field_id)) { - LOG_WARN( - "heterogeneous insert data found for segment {}, field id {}, " - "data type {}", - id_, - field_id.get(), - field.type()); - schema_->AddField(FieldName(field.field_name()), - field_id, - DataType(field.type()), - true, - std::nullopt); - auto field_meta = schema_->get_fields().at(field_id); - insert_record_.append_field_meta( - field_id, field_meta, size_per_chunk(), mmap_descriptor_); - auto data = bulk_subscript_not_exist_field(field_meta, exist_rows); - insert_record_.get_data_base(field_id)->set_data_raw( - 0, exist_rows, data.get(), field_meta); - } + AssertInfo(exist_rows == 0 || insert_record_.is_data_exist(field_id), + "unexpected new field in growing segment {}, field id {}", + id_, + field.field_id()); } // segment have latest schema while insert used old one