fix: correct index_has_raw_data logic for fielddata loading (#46117)

Related to #46098
This fix addresses a bug where the segment loader incorrectly determined
whether scalar fields have raw data in their indexes, leading to
unnecessary field data loading or skipping indexed raw data retrieval.

- Build `field_ids` vector that handles both single field and column
group cases (when `child_fields_size() > 0`)
- Move the mmap setting and index_has_raw_data checks before the skip
decision, iterating over the correctly built `field_ids`
- Fix the boolean AND logic in both `Load()` and `LoadColumnGroup()` to
properly check if ALL fields in the group have raw data in their indexes

This bug was hiding the root cause of issue #46098, where QueryNode
panics when outputting timestamptz data from scalar index with raw data.

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-12-05 17:47:12 +08:00 committed by GitHub
parent 141547d8a8
commit 8e82631282
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -407,9 +407,9 @@ ChunkedSegmentSealedImpl::LoadColumnGroup(
is_vector = true;
}
std::shared_lock lck(mutex_);
if (index_has_raw_data_.find(field_id) != index_has_raw_data_.end()) {
index_has_rawdata =
index_has_raw_data_.at(field_id) && index_has_rawdata;
auto iter = index_has_raw_data_.find(field_id);
if (iter != index_has_raw_data_.end()) {
index_has_rawdata = index_has_rawdata && iter->second;
} else {
index_has_rawdata = false;
}
@ -2973,22 +2973,58 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
segment_load_info_.storageversion();
const auto& field_binlog = segment_load_info_.binlog_paths(i);
auto field_id = FieldId(field_binlog.fieldid());
std::vector<FieldId> field_ids;
// when child fields specified, field id is group id, child field ids are actual id values here
if (field_binlog.child_fields_size() > 0) {
field_ids.reserve(field_binlog.child_fields_size());
for (auto field_id : field_binlog.child_fields()) {
field_ids.emplace_back(field_id);
}
} else {
field_ids.emplace_back(field_binlog.fieldid());
}
bool index_has_raw_data = true;
bool has_mmap_setting = false;
bool mmap_enabled = false;
bool is_vector = false;
for (const auto& child_field_id : field_ids) {
auto& field_meta = schema_->operator[](child_field_id);
if (IsVectorDataType(field_meta.get_data_type())) {
is_vector = true;
}
// if field has mmap setting, use it
// - mmap setting at collection level, then all field are the same
// - mmap setting at field level, we define that as long as one field shall be mmap, then whole group shall be mmaped
auto [field_has_setting, field_mmap_enabled] =
schema_->MmapEnabled(child_field_id);
has_mmap_setting = has_mmap_setting || field_has_setting;
mmap_enabled = mmap_enabled || field_mmap_enabled;
auto iter = index_has_raw_data_.find(child_field_id);
if (iter != index_has_raw_data_.end()) {
index_has_raw_data = index_has_raw_data && iter->second;
} else {
index_has_raw_data = false;
}
}
auto group_id = field_binlog.fieldid();
// Skip if this field has an index with raw data
auto iter = index_has_raw_data_.find(field_id);
if (iter != index_has_raw_data_.end() && iter->second) {
if (index_has_raw_data) {
LOG_INFO(
"Skip loading binlog for segment {} field {} because index "
"Skip loading fielddata for segment {} group {} because index "
"has raw data",
id_,
field_id.get());
group_id);
continue;
}
// Build FieldBinlogInfo
FieldBinlogInfo field_binlog_info;
field_binlog_info.field_id = field_id.get();
field_binlog_info.field_id = group_id;
// Calculate total row count and collect binlog paths
int64_t total_entries = 0;
@ -3004,24 +3040,6 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
}
field_binlog_info.row_count = total_entries;
bool has_mmap_setting = false;
bool mmap_enabled = false;
bool is_vector = false;
for (const auto& child_field_id : field_binlog.child_fields()) {
auto& field_meta = schema_->operator[](FieldId(child_field_id));
if (IsVectorDataType(field_meta.get_data_type())) {
is_vector = true;
}
// if field has mmap setting, use it
// - mmap setting at collection level, then all field are the same
// - mmap setting at field level, we define that as long as one field shall be mmap, then whole group shall be mmaped
auto [field_has_setting, field_mmap_enabled] =
schema_->MmapEnabled(field_id);
has_mmap_setting = has_mmap_setting || field_has_setting;
mmap_enabled = mmap_enabled || field_mmap_enabled;
}
auto& mmap_config = storage::MmapManager::GetInstance().GetMmapConfig();
auto global_use_mmap = is_vector
? mmap_config.GetVectorFieldEnableMmap()
@ -3030,9 +3048,9 @@ ChunkedSegmentSealedImpl::Load(milvus::tracer::TraceContext& trace_ctx) {
has_mmap_setting ? mmap_enabled : global_use_mmap;
// Store in map
load_field_data_info.field_infos[field_id.get()] = field_binlog_info;
load_field_data_info.field_infos[group_id] = field_binlog_info;
field_data_to_load[field_id] = load_field_data_info;
field_data_to_load[FieldId(group_id)] = load_field_data_info;
}
// Step 4: Load field data for non-indexed fields