mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: [AddField] Add log for segcore segment schema change (#43215)
Related to #39178 This PR add logs for segment schema change operations. Also fixes the nit comments from PR #42490 --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
7f8c5c9bb8
commit
f027eea545
@ -302,7 +302,7 @@ class Schema {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ShallLoadField(FieldId field_id) {
|
ShouldLoadField(FieldId field_id) {
|
||||||
return load_fields_.empty() || load_fields_.count(field_id) > 0;
|
return load_fields_.empty() || load_fields_.count(field_id) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -32,11 +32,11 @@ struct FieldDataInfo {
|
|||||||
FieldDataInfo(int64_t field_id,
|
FieldDataInfo(int64_t field_id,
|
||||||
size_t row_count,
|
size_t row_count,
|
||||||
std::string mmap_dir_path = "",
|
std::string mmap_dir_path = "",
|
||||||
bool in_list = false)
|
bool in_load_list = false)
|
||||||
: field_id(field_id),
|
: field_id(field_id),
|
||||||
row_count(row_count),
|
row_count(row_count),
|
||||||
mmap_dir_path(std::move(mmap_dir_path)),
|
mmap_dir_path(std::move(mmap_dir_path)),
|
||||||
in_load_list(in_list) {
|
in_load_list(in_load_list) {
|
||||||
arrow_reader_channel = std::make_shared<ArrowReaderChannel>();
|
arrow_reader_channel = std::make_shared<ArrowReaderChannel>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -293,7 +293,7 @@ ChunkedSegmentSealedImpl::load_column_group_data_internal(
|
|||||||
for (int i = 0; i < field_id_list.size(); ++i) {
|
for (int i = 0; i < field_id_list.size(); ++i) {
|
||||||
milvus_field_ids.push_back(FieldId(field_id_list.Get(i)));
|
milvus_field_ids.push_back(FieldId(field_id_list.Get(i)));
|
||||||
merged_in_load_list = merged_in_load_list ||
|
merged_in_load_list = merged_in_load_list ||
|
||||||
schema_->ShallLoadField(milvus_field_ids[i]);
|
schema_->ShouldLoadField(milvus_field_ids[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto column_group_info = FieldDataInfo(column_group_id.get(),
|
auto column_group_info = FieldDataInfo(column_group_id.get(),
|
||||||
@ -360,7 +360,7 @@ ChunkedSegmentSealedImpl::load_field_data_internal(
|
|||||||
auto field_data_info = FieldDataInfo(field_id.get(),
|
auto field_data_info = FieldDataInfo(field_id.get(),
|
||||||
num_rows,
|
num_rows,
|
||||||
load_info.mmap_dir_path,
|
load_info.mmap_dir_path,
|
||||||
schema_->ShallLoadField(field_id));
|
schema_->ShouldLoadField(field_id));
|
||||||
LOG_INFO("segment {} loads field {} with num_rows {}, sorted by pk {}",
|
LOG_INFO("segment {} loads field {} with num_rows {}, sorted by pk {}",
|
||||||
this->get_segment_id(),
|
this->get_segment_id(),
|
||||||
field_id.get(),
|
field_id.get(),
|
||||||
@ -1892,6 +1892,12 @@ ChunkedSegmentSealedImpl::RemoveFieldFile(const FieldId field_id) {
|
|||||||
void
|
void
|
||||||
ChunkedSegmentSealedImpl::LazyCheckSchema(SchemaPtr sch) {
|
ChunkedSegmentSealedImpl::LazyCheckSchema(SchemaPtr sch) {
|
||||||
if (sch->get_schema_version() > schema_->get_schema_version()) {
|
if (sch->get_schema_version() > schema_->get_schema_version()) {
|
||||||
|
LOG_INFO(
|
||||||
|
"lazy check schema segment {} found newer schema version, current "
|
||||||
|
"schema version {}, new schema version {}",
|
||||||
|
id_,
|
||||||
|
schema_->get_schema_version(),
|
||||||
|
sch->get_schema_version());
|
||||||
Reopen(sch);
|
Reopen(sch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1917,8 +1923,9 @@ ChunkedSegmentSealedImpl::load_field_data_common(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!enable_mmap) {
|
if (!enable_mmap) {
|
||||||
if (!is_proxy_column || is_proxy_column &&
|
if (!is_proxy_column ||
|
||||||
field_id.get() != DEFAULT_SHORT_COLUMN_GROUP_ID) {
|
is_proxy_column &&
|
||||||
|
field_id.get() != DEFAULT_SHORT_COLUMN_GROUP_ID) {
|
||||||
stats_.mem_size += column->DataByteSize();
|
stats_.mem_size += column->DataByteSize();
|
||||||
}
|
}
|
||||||
if (!IsVariableDataType(data_type) || IsStringDataType(data_type)) {
|
if (!IsVariableDataType(data_type) || IsStringDataType(data_type)) {
|
||||||
@ -2017,9 +2024,14 @@ ChunkedSegmentSealedImpl::FinishLoad() {
|
|||||||
|
|
||||||
void
|
void
|
||||||
ChunkedSegmentSealedImpl::fill_empty_field(const FieldMeta& field_meta) {
|
ChunkedSegmentSealedImpl::fill_empty_field(const FieldMeta& field_meta) {
|
||||||
|
auto field_id = field_meta.get_id();
|
||||||
|
LOG_INFO("start fill empty field {} (data type {}) for sealed segment {}",
|
||||||
|
field_meta.get_data_type(),
|
||||||
|
field_id.get(),
|
||||||
|
id_);
|
||||||
int64_t size = num_rows_.value();
|
int64_t size = num_rows_.value();
|
||||||
AssertInfo(size > 0, "Chunked Sealed segment must have more than 0 row");
|
AssertInfo(size > 0, "Chunked Sealed segment must have more than 0 row");
|
||||||
auto field_data_info = FieldDataInfo(field_meta.get_id().get(), size, "");
|
auto field_data_info = FieldDataInfo(field_id.get(), size, "");
|
||||||
std::unique_ptr<Translator<milvus::Chunk>> translator =
|
std::unique_ptr<Translator<milvus::Chunk>> translator =
|
||||||
std::make_unique<storagev1translator::DefaultValueChunkTranslator>(
|
std::make_unique<storagev1translator::DefaultValueChunkTranslator>(
|
||||||
get_segment_id(), field_meta, field_data_info, false);
|
get_segment_id(), field_meta, field_data_info, false);
|
||||||
@ -2053,9 +2065,13 @@ ChunkedSegmentSealedImpl::fill_empty_field(const FieldMeta& field_meta) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto field_id = field_meta.get_id();
|
|
||||||
fields_.emplace(field_id, column);
|
fields_.emplace(field_id, column);
|
||||||
set_bit(field_data_ready_bitset_, field_id, true);
|
set_bit(field_data_ready_bitset_, field_id, true);
|
||||||
|
LOG_INFO("fill empty field {} (data type {}) for growing segment {} done",
|
||||||
|
field_meta.get_data_type(),
|
||||||
|
field_id.get(),
|
||||||
|
id_);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace milvus::segcore
|
} // namespace milvus::segcore
|
||||||
|
|||||||
@ -479,10 +479,6 @@ CreateSealedSegment(
|
|||||||
const SegcoreConfig& segcore_config = SegcoreConfig::default_config(),
|
const SegcoreConfig& segcore_config = SegcoreConfig::default_config(),
|
||||||
bool is_sorted_by_pk = false) {
|
bool is_sorted_by_pk = false) {
|
||||||
return std::make_unique<ChunkedSegmentSealedImpl>(
|
return std::make_unique<ChunkedSegmentSealedImpl>(
|
||||||
schema,
|
schema, index_meta, segcore_config, segment_id, is_sorted_by_pk);
|
||||||
index_meta,
|
|
||||||
segcore_config,
|
|
||||||
segment_id,
|
|
||||||
is_sorted_by_pk);
|
|
||||||
}
|
}
|
||||||
} // namespace milvus::segcore
|
} // namespace milvus::segcore
|
||||||
|
|||||||
@ -101,6 +101,12 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset,
|
|||||||
field_id_to_offset.emplace(field_id, field_offset++);
|
field_id_to_offset.emplace(field_id, field_offset++);
|
||||||
// may be added field, add the null if has existed data
|
// may be added field, add the null if has existed data
|
||||||
if (exist_rows > 0 && !insert_record_.is_data_exist(field_id)) {
|
if (exist_rows > 0 && !insert_record_.is_data_exist(field_id)) {
|
||||||
|
LOG_WARN(
|
||||||
|
"heterogeneous insert data found for segment {}, field id {}, "
|
||||||
|
"data type {}",
|
||||||
|
id_,
|
||||||
|
field_id.get(),
|
||||||
|
field.type());
|
||||||
schema_->AddField(FieldName(field.field_name()),
|
schema_->AddField(FieldName(field.field_name()),
|
||||||
field_id,
|
field_id,
|
||||||
DataType(field.type()),
|
DataType(field.type()),
|
||||||
@ -124,6 +130,13 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset,
|
|||||||
if (field_id_to_offset.count(field_id) > 0) {
|
if (field_id_to_offset.count(field_id) > 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
LOG_INFO(
|
||||||
|
"schema newer than insert data found for segment {}, attach empty "
|
||||||
|
"field data"
|
||||||
|
"not exist field {}, data type {}",
|
||||||
|
id_,
|
||||||
|
field_id.get(),
|
||||||
|
field_meta.get_data_type());
|
||||||
auto data = bulk_subscript_not_exist_field(field_meta, num_rows);
|
auto data = bulk_subscript_not_exist_field(field_meta, num_rows);
|
||||||
insert_record_proto->add_fields_data()->CopyFrom(*data);
|
insert_record_proto->add_fields_data()->CopyFrom(*data);
|
||||||
field_id_to_offset.emplace(field_id, field_offset++);
|
field_id_to_offset.emplace(field_id, field_offset++);
|
||||||
@ -500,8 +513,10 @@ SegmentGrowingImpl::load_column_group_data_internal(
|
|||||||
auto field_data = storage::CreateFieldData(
|
auto field_data = storage::CreateFieldData(
|
||||||
data_type,
|
data_type,
|
||||||
field.second.is_nullable(),
|
field.second.is_nullable(),
|
||||||
IsVectorDataType(data_type) && !IsSparseFloatVectorDataType(data_type) ? field.second.get_dim()
|
IsVectorDataType(data_type) &&
|
||||||
: 1,
|
!IsSparseFloatVectorDataType(data_type)
|
||||||
|
? field.second.get_dim()
|
||||||
|
: 1,
|
||||||
batch_num_rows);
|
batch_num_rows);
|
||||||
field_data->FillFieldData(table->column(i));
|
field_data->FillFieldData(table->column(i));
|
||||||
field_data_map[FieldId(field_id)].push_back(field_data);
|
field_data_map[FieldId(field_id)].push_back(field_data);
|
||||||
@ -1236,6 +1251,12 @@ SegmentGrowingImpl::BulkGetJsonData(
|
|||||||
void
|
void
|
||||||
SegmentGrowingImpl::LazyCheckSchema(SchemaPtr sch) {
|
SegmentGrowingImpl::LazyCheckSchema(SchemaPtr sch) {
|
||||||
if (sch->get_schema_version() > schema_->get_schema_version()) {
|
if (sch->get_schema_version() > schema_->get_schema_version()) {
|
||||||
|
LOG_INFO(
|
||||||
|
"lazy check schema segment {} found newer schema version, current "
|
||||||
|
"schema version {}, new schema version {}",
|
||||||
|
id_,
|
||||||
|
schema_->get_schema_version(),
|
||||||
|
sch->get_schema_version());
|
||||||
Reopen(sch);
|
Reopen(sch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1271,6 +1292,10 @@ SegmentGrowingImpl::FinishLoad() {
|
|||||||
void
|
void
|
||||||
SegmentGrowingImpl::fill_empty_field(const FieldMeta& field_meta) {
|
SegmentGrowingImpl::fill_empty_field(const FieldMeta& field_meta) {
|
||||||
auto field_id = field_meta.get_id();
|
auto field_id = field_meta.get_id();
|
||||||
|
LOG_INFO("start fill empty field {} (data type {}) for growing segment {}",
|
||||||
|
field_meta.get_data_type(),
|
||||||
|
field_id.get(),
|
||||||
|
id_);
|
||||||
// append meta only needed when schema is old
|
// append meta only needed when schema is old
|
||||||
// loading old segment with new schema will have meta appended
|
// loading old segment with new schema will have meta appended
|
||||||
if (!insert_record_.is_data_exist(field_id)) {
|
if (!insert_record_.is_data_exist(field_id)) {
|
||||||
@ -1286,9 +1311,10 @@ SegmentGrowingImpl::fill_empty_field(const FieldMeta& field_meta) {
|
|||||||
insert_record_.get_data_base(field_id)->set_data_raw(
|
insert_record_.get_data_base(field_id)->set_data_raw(
|
||||||
0, total_row_num, data.get(), field_meta);
|
0, total_row_num, data.get(), field_meta);
|
||||||
|
|
||||||
LOG_INFO("Growing segment {} fill empty field {} done",
|
LOG_INFO("fill empty field {} (data type {}) for growing segment {} done",
|
||||||
this->get_segment_id(),
|
field_meta.get_data_type(),
|
||||||
field_meta.get_id().get());
|
field_id.get(),
|
||||||
|
id_);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace milvus::segcore
|
} // namespace milvus::segcore
|
||||||
|
|||||||
@ -97,6 +97,11 @@ func (m *collectionManager) PutOrRef(collectionID int64, schema *schemapb.Collec
|
|||||||
collection.schema.Store(schema)
|
collection.schema.Store(schema)
|
||||||
collection.ccollection.UpdateSchema(schema, loadMeta.GetSchemaVersion())
|
collection.ccollection.UpdateSchema(schema, loadMeta.GetSchemaVersion())
|
||||||
collection.schemaVersion = loadMeta.GetSchemaVersion()
|
collection.schemaVersion = loadMeta.GetSchemaVersion()
|
||||||
|
log.Info("update collection schema",
|
||||||
|
zap.Int64("collectionID", collectionID),
|
||||||
|
zap.Uint64("schemaVersion", loadMeta.GetSchemaVersion()),
|
||||||
|
zap.Any("schema", schema),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
collection.Ref(1)
|
collection.Ref(1)
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user