enhance: add NewSegmentWithLoadInfo API to support segment self-managed loading (#45061)

This commit introduces the foundation for enabling segments to manage
their own loading process by passing load information during segment
creation.

Changes:

C++ Layer:
- Add NewSegmentWithLoadInfo() C API to create segments with serialized
load info
- Add SetLoadInfo() method to SegmentInterface for storing load
information
- Refactor segment creation logic into shared CreateSegment() helper
function
- Add comprehensive documentation for the new API

Go Layer:
- Extend CreateCSegmentRequest to support optional LoadInfo field
- Update segment creation in querynode to pass SegmentLoadInfo when
available
- Add ConvertToSegcoreSegmentLoadInfo() and helper converters for proto
translation

Proto Definitions:
- Add segcorepb.SegmentLoadInfo message with essential loading metadata
- Add supporting messages: Binlog, FieldBinlog, FieldIndexInfo,
TextIndexStats, JsonKeyStats
- Remove dependency on data_coord.proto by creating segcore-specific
definitions

Testing:
- Add comprehensive unit tests for proto conversion functions
- Test edge cases including nil inputs, empty data, and nil array/map
elements

This is the first step toward issue #45060 - enabling segments to
autonomously manage their loading process, which will:
- Clarify responsibilities between Go and C++ layers
- Reduce cross-language call overhead
- Enable precise resource management at the C++ level
- Support better integration with caching layer
- Enable proactive schema evolution handling

Related to #45060

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-10-27 15:28:12 +08:00 committed by GitHub
parent dabbae0386
commit 36a887b38b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1663 additions and 150 deletions

View File

@ -197,6 +197,9 @@ class SegmentInterface {
// currently it's used to sync field data list with updated schema.
virtual void
FinishLoad() = 0;
virtual void
SetLoadInfo(const milvus::proto::segcore::SegmentLoadInfo& load_info) = 0;
};
// internal API for DSL calculation
@ -381,6 +384,12 @@ class SegmentInternalInterface : public SegmentInterface {
FieldId field_id,
const std::string& nested_path) const override;
virtual void
SetLoadInfo(
const milvus::proto::segcore::SegmentLoadInfo& load_info) override {
load_info_ = load_info;
}
public:
// `query_offsets` is not null only for vector array (embedding list) search
// where it denotes the number of vectors in each embedding list. The length
@ -599,6 +608,8 @@ class SegmentInternalInterface : public SegmentInterface {
// mutex protecting rw options on schema_
std::shared_mutex sch_mutex_;
milvus::proto::segcore::SegmentLoadInfo load_info_;
mutable std::shared_mutex mutex_;
// fieldID -> std::pair<num_rows, avg_size>
std::unordered_map<FieldId, std::pair<int64_t, int64_t>>

View File

@ -14,6 +14,8 @@
#include <memory>
#include <limits>
#include "common/EasyAssert.h"
#include "common/common_type_c.h"
#include "pb/cgo_msg.pb.h"
#include "pb/index_cgo_msg.pb.h"
@ -27,6 +29,7 @@
#include "log/Log.h"
#include "mmap/Types.h"
#include "monitor/scope_metric.h"
#include "pb/segcore.pb.h"
#include "segcore/Collection.h"
#include "segcore/SegcoreConfig.h"
#include "segcore/SegmentGrowingImpl.h"
@ -45,6 +48,48 @@
#include "common/GeometryCache.h"
////////////////////////////// common interfaces //////////////////////////////
/**
* @brief Create a segment from a collection.
* @param col The collection to create the segment from.
* @param seg_type The type of segment to create.
* @param segment_id The ID of the segment to create.
* @param is_sorted_by_pk Whether the data in the sealed segment is sorted by primary key.
* @return A unique pointer to a SegmentInterface object.
*/
std::unique_ptr<milvus::segcore::SegmentInterface>
CreateSegment(milvus::segcore::Collection* col,
SegmentType seg_type,
int64_t segment_id,
bool is_sorted_by_pk) {
std::unique_ptr<milvus::segcore::SegmentInterface> segment;
switch (seg_type) {
case Growing: {
auto seg = milvus::segcore::CreateGrowingSegment(
col->get_schema(),
col->get_index_meta(),
segment_id,
milvus::segcore::SegcoreConfig::default_config());
segment = std::move(seg);
break;
}
case Sealed:
case Indexing:
segment = milvus::segcore::CreateSealedSegment(
col->get_schema(),
col->get_index_meta(),
segment_id,
milvus::segcore::SegcoreConfig::default_config(),
is_sorted_by_pk);
break;
default:
ThrowInfo(
milvus::UnexpectedError, "invalid segment type: {}", seg_type);
}
return segment;
}
CStatus
NewSegment(CCollection collection,
SegmentType seg_type,
@ -56,33 +101,37 @@ NewSegment(CCollection collection,
try {
auto col = static_cast<milvus::segcore::Collection*>(collection);
std::unique_ptr<milvus::segcore::SegmentInterface> segment;
switch (seg_type) {
case Growing: {
auto seg = milvus::segcore::CreateGrowingSegment(
col->get_schema(),
col->get_index_meta(),
segment_id,
milvus::segcore::SegcoreConfig::default_config());
segment = std::move(seg);
break;
}
case Sealed:
case Indexing:
segment = milvus::segcore::CreateSealedSegment(
col->get_schema(),
col->get_index_meta(),
segment_id,
milvus::segcore::SegcoreConfig::default_config(),
is_sorted_by_pk);
break;
auto segment =
CreateSegment(col, seg_type, segment_id, is_sorted_by_pk);
default:
ThrowInfo(milvus::UnexpectedError,
"invalid segment type: {}",
seg_type);
}
*newSegment = segment.release();
return milvus::SuccessCStatus();
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}
CStatus
NewSegmentWithLoadInfo(CCollection collection,
SegmentType seg_type,
int64_t segment_id,
CSegmentInterface* newSegment,
bool is_sorted_by_pk,
const uint8_t* load_info_blob,
const int64_t load_info_length) {
SCOPE_CGO_CALL_METRIC();
try {
AssertInfo(load_info_blob, "load info is null");
milvus::proto::segcore::SegmentLoadInfo load_info;
auto suc = load_info.ParseFromArray(load_info_blob, load_info_length);
AssertInfo(suc, "unmarshal load info failed");
auto col = static_cast<milvus::segcore::Collection*>(collection);
auto segment =
CreateSegment(col, seg_type, segment_id, is_sorted_by_pk);
segment->SetLoadInfo(load_info);
*newSegment = segment.release();
return milvus::SuccessCStatus();
} catch (std::exception& e) {

View File

@ -36,6 +36,27 @@ NewSegment(CCollection collection,
CSegmentInterface* newSegment,
bool is_sorted_by_pk);
// Create a new segment with pre-loaded segment information.
// This function creates a segment and initializes it with serialized load info,
// which can include precomputed metadata, statistics, or configuration data.
//
// @param collection: The collection that this segment belongs to
// @param seg_type: Type of the segment (growing, sealed, etc.)
// @param segment_id: Unique identifier for this segment
// @param newSegment: Output parameter for the created segment interface
// @param is_sorted_by_pk: Whether the segment data is sorted by primary key
// @param load_info_blob: Serialized load information blob
// @param load_info_length: Length of the load_info_blob in bytes
// @return CStatus indicating success or failure
CStatus
NewSegmentWithLoadInfo(CCollection collection,
SegmentType seg_type,
int64_t segment_id,
CSegmentInterface* newSegment,
bool is_sorted_by_pk,
const uint8_t* load_info_blob,
const int64_t load_info_length);
void
DeleteSegment(CSegmentInterface c_segment);

View File

@ -370,6 +370,7 @@ func NewSegment(ctx context.Context,
SegmentID: loadInfo.GetSegmentID(),
SegmentType: segmentType,
IsSorted: loadInfo.GetIsSorted(),
LoadInfo: loadInfo,
})
return nil, err
}).Await(); err != nil {

View File

@ -23,6 +23,9 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
)
@ -42,6 +45,7 @@ type CreateCSegmentRequest struct {
SegmentID int64
SegmentType SegmentType
IsSorted bool
LoadInfo *querypb.SegmentLoadInfo
}
func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
@ -60,7 +64,17 @@ func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
// CreateCSegment creates a segment from a CreateCSegmentRequest.
func CreateCSegment(req *CreateCSegmentRequest) (CSegment, error) {
var ptr C.CSegmentInterface
status := C.NewSegment(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted))
var status C.CStatus
if req.LoadInfo != nil {
loadInfoBlob, err := proto.Marshal(req.LoadInfo)
if err != nil {
return nil, err
}
status = C.NewSegmentWithLoadInfo(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted), (*C.uint8_t)(unsafe.Pointer(&loadInfoBlob[0])), C.int64_t(len(loadInfoBlob)))
} else {
status = C.NewSegment(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted))
}
if err := ConsumeCStatusIntoError(&status); err != nil {
return nil, err
}
@ -317,3 +331,160 @@ func (s *cSegmentImpl) DropJSONIndex(ctx context.Context, fieldID int64, nestedP
func (s *cSegmentImpl) Release() {
C.DeleteSegment(s.ptr)
}
// ConvertToSegcoreSegmentLoadInfo converts querypb.SegmentLoadInfo to segcorepb.SegmentLoadInfo.
// This function is needed because segcorepb.SegmentLoadInfo is a simplified version that doesn't
// depend on data_coord.proto and excludes fields like start_position, delta_position, and level.
func ConvertToSegcoreSegmentLoadInfo(src *querypb.SegmentLoadInfo) *segcorepb.SegmentLoadInfo {
if src == nil {
return nil
}
return &segcorepb.SegmentLoadInfo{
SegmentID: src.GetSegmentID(),
PartitionID: src.GetPartitionID(),
CollectionID: src.GetCollectionID(),
DbID: src.GetDbID(),
FlushTime: src.GetFlushTime(),
BinlogPaths: convertFieldBinlogs(src.GetBinlogPaths()),
NumOfRows: src.GetNumOfRows(),
Statslogs: convertFieldBinlogs(src.GetStatslogs()),
Deltalogs: convertFieldBinlogs(src.GetDeltalogs()),
CompactionFrom: src.GetCompactionFrom(),
IndexInfos: convertFieldIndexInfos(src.GetIndexInfos()),
SegmentSize: src.GetSegmentSize(),
InsertChannel: src.GetInsertChannel(),
ReadableVersion: src.GetReadableVersion(),
StorageVersion: src.GetStorageVersion(),
IsSorted: src.GetIsSorted(),
TextStatsLogs: convertTextIndexStats(src.GetTextStatsLogs()),
Bm25Logs: convertFieldBinlogs(src.GetBm25Logs()),
JsonKeyStatsLogs: convertJSONKeyStats(src.GetJsonKeyStatsLogs()),
Priority: src.GetPriority(),
}
}
// convertFieldBinlogs converts datapb.FieldBinlog to segcorepb.FieldBinlog.
func convertFieldBinlogs(src []*datapb.FieldBinlog) []*segcorepb.FieldBinlog {
if src == nil {
return nil
}
result := make([]*segcorepb.FieldBinlog, 0, len(src))
for _, fb := range src {
if fb == nil {
continue
}
result = append(result, &segcorepb.FieldBinlog{
FieldID: fb.GetFieldID(),
Binlogs: convertBinlogs(fb.GetBinlogs()),
ChildFields: fb.GetChildFields(),
})
}
return result
}
// convertBinlogs converts datapb.Binlog to segcorepb.Binlog.
func convertBinlogs(src []*datapb.Binlog) []*segcorepb.Binlog {
if src == nil {
return nil
}
result := make([]*segcorepb.Binlog, 0, len(src))
for _, b := range src {
if b == nil {
continue
}
result = append(result, &segcorepb.Binlog{
EntriesNum: b.GetEntriesNum(),
TimestampFrom: b.GetTimestampFrom(),
TimestampTo: b.GetTimestampTo(),
LogPath: b.GetLogPath(),
LogSize: b.GetLogSize(),
LogID: b.GetLogID(),
MemorySize: b.GetMemorySize(),
})
}
return result
}
// convertFieldIndexInfos converts querypb.FieldIndexInfo to segcorepb.FieldIndexInfo.
func convertFieldIndexInfos(src []*querypb.FieldIndexInfo) []*segcorepb.FieldIndexInfo {
if src == nil {
return nil
}
result := make([]*segcorepb.FieldIndexInfo, 0, len(src))
for _, fii := range src {
if fii == nil {
continue
}
result = append(result, &segcorepb.FieldIndexInfo{
FieldID: fii.GetFieldID(),
EnableIndex: fii.GetEnableIndex(),
IndexName: fii.GetIndexName(),
IndexID: fii.GetIndexID(),
BuildID: fii.GetBuildID(),
IndexParams: fii.GetIndexParams(),
IndexFilePaths: fii.GetIndexFilePaths(),
IndexSize: fii.GetIndexSize(),
IndexVersion: fii.GetIndexVersion(),
NumRows: fii.GetNumRows(),
CurrentIndexVersion: fii.GetCurrentIndexVersion(),
IndexStoreVersion: fii.GetIndexStoreVersion(),
})
}
return result
}
// convertTextIndexStats converts datapb.TextIndexStats to segcorepb.TextIndexStats.
func convertTextIndexStats(src map[int64]*datapb.TextIndexStats) map[int64]*segcorepb.TextIndexStats {
if src == nil {
return nil
}
result := make(map[int64]*segcorepb.TextIndexStats, len(src))
for k, v := range src {
if v == nil {
continue
}
result[k] = &segcorepb.TextIndexStats{
FieldID: v.GetFieldID(),
Version: v.GetVersion(),
Files: v.GetFiles(),
LogSize: v.GetLogSize(),
MemorySize: v.GetMemorySize(),
BuildID: v.GetBuildID(),
}
}
return result
}
// convertJSONKeyStats converts datapb.JsonKeyStats to segcorepb.JsonKeyStats.
func convertJSONKeyStats(src map[int64]*datapb.JsonKeyStats) map[int64]*segcorepb.JsonKeyStats {
if src == nil {
return nil
}
result := make(map[int64]*segcorepb.JsonKeyStats, len(src))
for k, v := range src {
if v == nil {
continue
}
result[k] = &segcorepb.JsonKeyStats{
FieldID: v.GetFieldID(),
Version: v.GetVersion(),
Files: v.GetFiles(),
LogSize: v.GetLogSize(),
MemorySize: v.GetMemorySize(),
BuildID: v.GetBuildID(),
JsonKeyStatsDataFormat: v.GetJsonKeyStatsDataFormat(),
}
}
return result
}

View File

@ -8,12 +8,15 @@ import (
"github.com/stretchr/testify/assert"
"google.golang.org/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/mocks/util/mock_segcore"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/initcore"
"github.com/milvus-io/milvus/internal/util/segcore"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
@ -139,3 +142,260 @@ func assertEqualCount(
assert.NotNil(t, retrieveResult2)
retrieveResult2.Release()
}
func TestConvertToSegcoreSegmentLoadInfo(t *testing.T) {
t.Run("nil input", func(t *testing.T) {
result := segcore.ConvertToSegcoreSegmentLoadInfo(nil)
assert.Nil(t, result)
})
t.Run("empty input", func(t *testing.T) {
src := &querypb.SegmentLoadInfo{}
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
assert.NotNil(t, result)
assert.Equal(t, int64(0), result.SegmentID)
assert.Equal(t, int64(0), result.PartitionID)
assert.Equal(t, int64(0), result.CollectionID)
})
t.Run("full conversion", func(t *testing.T) {
// Create source querypb.SegmentLoadInfo with all fields populated
src := &querypb.SegmentLoadInfo{
SegmentID: 1001,
PartitionID: 2001,
CollectionID: 3001,
DbID: 4001,
FlushTime: 5001,
BinlogPaths: []*datapb.FieldBinlog{
{
FieldID: 100,
Binlogs: []*datapb.Binlog{
{
EntriesNum: 10,
TimestampFrom: 1000,
TimestampTo: 2000,
LogPath: "/path/to/binlog",
LogSize: 1024,
LogID: 9001,
MemorySize: 2048,
},
},
ChildFields: []int64{101, 102},
},
},
NumOfRows: 1000,
Statslogs: []*datapb.FieldBinlog{
{
FieldID: 200,
Binlogs: []*datapb.Binlog{
{
EntriesNum: 5,
TimestampFrom: 1500,
TimestampTo: 2500,
LogPath: "/path/to/statslog",
LogSize: 512,
LogID: 9002,
MemorySize: 1024,
},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
FieldID: 300,
Binlogs: []*datapb.Binlog{
{
EntriesNum: 3,
TimestampFrom: 2000,
TimestampTo: 3000,
LogPath: "/path/to/deltalog",
LogSize: 256,
LogID: 9003,
MemorySize: 512,
},
},
},
},
CompactionFrom: []int64{8001, 8002},
IndexInfos: []*querypb.FieldIndexInfo{
{
FieldID: 100,
EnableIndex: true,
IndexName: "test_index",
IndexID: 7001,
BuildID: 7002,
IndexParams: []*commonpb.KeyValuePair{{Key: "index_type", Value: "HNSW"}},
IndexFilePaths: []string{"/path/to/index"},
IndexSize: 4096,
IndexVersion: 1,
NumRows: 1000,
CurrentIndexVersion: 2,
IndexStoreVersion: 3,
},
},
SegmentSize: 8192,
InsertChannel: "insert_channel_1",
ReadableVersion: 6001,
StorageVersion: 7001,
IsSorted: true,
TextStatsLogs: map[int64]*datapb.TextIndexStats{
400: {
FieldID: 400,
Version: 1,
Files: []string{"/path/to/text/stats1", "/path/to/text/stats2"},
LogSize: 2048,
MemorySize: 4096,
BuildID: 9101,
},
},
Bm25Logs: []*datapb.FieldBinlog{
{
FieldID: 500,
Binlogs: []*datapb.Binlog{
{
EntriesNum: 7,
TimestampFrom: 3000,
TimestampTo: 4000,
LogPath: "/path/to/bm25log",
LogSize: 768,
LogID: 9004,
MemorySize: 1536,
},
},
},
},
JsonKeyStatsLogs: map[int64]*datapb.JsonKeyStats{
600: {
FieldID: 600,
Version: 2,
Files: []string{"/path/to/json/stats"},
LogSize: 1024,
MemorySize: 2048,
BuildID: 9201,
JsonKeyStatsDataFormat: 1,
},
},
Priority: commonpb.LoadPriority_HIGH,
}
// Convert to segcorepb.SegmentLoadInfo
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
// Validate basic fields
assert.NotNil(t, result)
assert.Equal(t, src.SegmentID, result.SegmentID)
assert.Equal(t, src.PartitionID, result.PartitionID)
assert.Equal(t, src.CollectionID, result.CollectionID)
assert.Equal(t, src.DbID, result.DbID)
assert.Equal(t, src.FlushTime, result.FlushTime)
assert.Equal(t, src.NumOfRows, result.NumOfRows)
assert.Equal(t, src.SegmentSize, result.SegmentSize)
assert.Equal(t, src.InsertChannel, result.InsertChannel)
assert.Equal(t, src.ReadableVersion, result.ReadableVersion)
assert.Equal(t, src.StorageVersion, result.StorageVersion)
assert.Equal(t, src.IsSorted, result.IsSorted)
assert.Equal(t, src.Priority, result.Priority)
assert.Equal(t, src.CompactionFrom, result.CompactionFrom)
// Validate BinlogPaths conversion
assert.Equal(t, len(src.BinlogPaths), len(result.BinlogPaths))
assert.Equal(t, src.BinlogPaths[0].FieldID, result.BinlogPaths[0].FieldID)
assert.Equal(t, len(src.BinlogPaths[0].Binlogs), len(result.BinlogPaths[0].Binlogs))
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].EntriesNum, result.BinlogPaths[0].Binlogs[0].EntriesNum)
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].TimestampFrom, result.BinlogPaths[0].Binlogs[0].TimestampFrom)
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].TimestampTo, result.BinlogPaths[0].Binlogs[0].TimestampTo)
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogPath, result.BinlogPaths[0].Binlogs[0].LogPath)
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogSize, result.BinlogPaths[0].Binlogs[0].LogSize)
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogID, result.BinlogPaths[0].Binlogs[0].LogID)
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].MemorySize, result.BinlogPaths[0].Binlogs[0].MemorySize)
assert.Equal(t, src.BinlogPaths[0].ChildFields, result.BinlogPaths[0].ChildFields)
// Validate Statslogs conversion
assert.Equal(t, len(src.Statslogs), len(result.Statslogs))
assert.Equal(t, src.Statslogs[0].FieldID, result.Statslogs[0].FieldID)
// Validate Deltalogs conversion
assert.Equal(t, len(src.Deltalogs), len(result.Deltalogs))
assert.Equal(t, src.Deltalogs[0].FieldID, result.Deltalogs[0].FieldID)
// Validate IndexInfos conversion
assert.Equal(t, len(src.IndexInfos), len(result.IndexInfos))
assert.Equal(t, src.IndexInfos[0].FieldID, result.IndexInfos[0].FieldID)
assert.Equal(t, src.IndexInfos[0].EnableIndex, result.IndexInfos[0].EnableIndex)
assert.Equal(t, src.IndexInfos[0].IndexName, result.IndexInfos[0].IndexName)
assert.Equal(t, src.IndexInfos[0].IndexID, result.IndexInfos[0].IndexID)
assert.Equal(t, src.IndexInfos[0].BuildID, result.IndexInfos[0].BuildID)
assert.Equal(t, len(src.IndexInfos[0].IndexParams), len(result.IndexInfos[0].IndexParams))
assert.Equal(t, src.IndexInfos[0].IndexFilePaths, result.IndexInfos[0].IndexFilePaths)
assert.Equal(t, src.IndexInfos[0].IndexSize, result.IndexInfos[0].IndexSize)
assert.Equal(t, src.IndexInfos[0].IndexVersion, result.IndexInfos[0].IndexVersion)
assert.Equal(t, src.IndexInfos[0].NumRows, result.IndexInfos[0].NumRows)
assert.Equal(t, src.IndexInfos[0].CurrentIndexVersion, result.IndexInfos[0].CurrentIndexVersion)
assert.Equal(t, src.IndexInfos[0].IndexStoreVersion, result.IndexInfos[0].IndexStoreVersion)
// Validate TextStatsLogs conversion
assert.Equal(t, len(src.TextStatsLogs), len(result.TextStatsLogs))
textStats := result.TextStatsLogs[400]
assert.NotNil(t, textStats)
assert.Equal(t, src.TextStatsLogs[400].FieldID, textStats.FieldID)
assert.Equal(t, src.TextStatsLogs[400].Version, textStats.Version)
assert.Equal(t, src.TextStatsLogs[400].Files, textStats.Files)
assert.Equal(t, src.TextStatsLogs[400].LogSize, textStats.LogSize)
assert.Equal(t, src.TextStatsLogs[400].MemorySize, textStats.MemorySize)
assert.Equal(t, src.TextStatsLogs[400].BuildID, textStats.BuildID)
// Validate Bm25Logs conversion
assert.Equal(t, len(src.Bm25Logs), len(result.Bm25Logs))
assert.Equal(t, src.Bm25Logs[0].FieldID, result.Bm25Logs[0].FieldID)
// Validate JsonKeyStatsLogs conversion
assert.Equal(t, len(src.JsonKeyStatsLogs), len(result.JsonKeyStatsLogs))
jsonStats := result.JsonKeyStatsLogs[600]
assert.NotNil(t, jsonStats)
assert.Equal(t, src.JsonKeyStatsLogs[600].FieldID, jsonStats.FieldID)
assert.Equal(t, src.JsonKeyStatsLogs[600].Version, jsonStats.Version)
assert.Equal(t, src.JsonKeyStatsLogs[600].Files, jsonStats.Files)
assert.Equal(t, src.JsonKeyStatsLogs[600].LogSize, jsonStats.LogSize)
assert.Equal(t, src.JsonKeyStatsLogs[600].MemorySize, jsonStats.MemorySize)
assert.Equal(t, src.JsonKeyStatsLogs[600].BuildID, jsonStats.BuildID)
assert.Equal(t, src.JsonKeyStatsLogs[600].JsonKeyStatsDataFormat, jsonStats.JsonKeyStatsDataFormat)
})
t.Run("nil elements in arrays and maps", func(t *testing.T) {
src := &querypb.SegmentLoadInfo{
SegmentID: 1001,
BinlogPaths: []*datapb.FieldBinlog{
nil, // nil element should be skipped
{FieldID: 100},
},
Statslogs: []*datapb.FieldBinlog{
nil,
},
IndexInfos: []*querypb.FieldIndexInfo{
nil,
{FieldID: 200},
},
TextStatsLogs: map[int64]*datapb.TextIndexStats{
100: nil, // nil value should be skipped
200: {FieldID: 200},
},
JsonKeyStatsLogs: map[int64]*datapb.JsonKeyStats{
300: nil,
400: {FieldID: 400},
},
}
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
assert.NotNil(t, result)
assert.Equal(t, 1, len(result.BinlogPaths))
assert.Equal(t, int64(100), result.BinlogPaths[0].FieldID)
assert.Equal(t, 0, len(result.Statslogs))
assert.Equal(t, 1, len(result.IndexInfos))
assert.Equal(t, int64(200), result.IndexInfos[0].FieldID)
assert.Equal(t, 1, len(result.TextStatsLogs))
assert.NotNil(t, result.TextStatsLogs[200])
assert.Equal(t, 1, len(result.JsonKeyStatsLogs))
assert.NotNil(t, result.JsonKeyStatsLogs[400])
})
}

View File

@ -5,6 +5,41 @@ option go_package = "github.com/milvus-io/milvus/pkg/v2/proto/segcorepb";
import "schema.proto";
import "common.proto";
message Binlog {
int64 entries_num = 1;
uint64 timestamp_from = 2;
uint64 timestamp_to = 3;
string log_path = 4;
int64 log_size = 5;
int64 logID = 6;
int64 memory_size = 7;
}
message FieldBinlog {
int64 fieldID = 1;
repeated Binlog binlogs = 2;
repeated int64 child_fields = 3;
}
message TextIndexStats {
int64 fieldID = 1;
int64 version = 2;
repeated string files = 3;
int64 log_size = 4;
int64 memory_size = 5;
int64 buildID = 6;
}
message JsonKeyStats {
int64 fieldID = 1;
int64 version = 2;
repeated string files = 3;
int64 log_size = 4;
int64 memory_size = 5;
int64 buildID = 6;
int64 json_key_stats_data_format = 7;
}
message RetrieveResults {
schema.IDs ids = 1;
repeated int64 offset = 2;
@ -46,3 +81,41 @@ message CollectionIndexMeta {
int64 maxIndexRowCount = 1;
repeated FieldIndexMeta index_metas = 2;
}
message FieldIndexInfo {
int64 fieldID = 1;
bool enable_index = 2 [deprecated = true];
string index_name = 3;
int64 indexID = 4;
int64 buildID = 5;
repeated common.KeyValuePair index_params = 6;
repeated string index_file_paths = 7;
int64 index_size = 8;
int64 index_version = 9;
int64 num_rows = 10;
int32 current_index_version = 11;
int64 index_store_version = 12;
}
message SegmentLoadInfo {
int64 segmentID = 1;
int64 partitionID = 2;
int64 collectionID = 3;
int64 dbID = 4;
int64 flush_time = 5;
repeated FieldBinlog binlog_paths = 6;
int64 num_of_rows = 7;
repeated FieldBinlog statslogs = 8;
repeated FieldBinlog deltalogs = 9;
repeated int64 compactionFrom = 10; // segmentIDs compacted from
repeated FieldIndexInfo index_infos = 11;
int64 segment_size = 12 [deprecated = true];
string insert_channel = 13;
int64 readableVersion = 14;
int64 storageVersion = 15;
bool is_sorted = 16;
map<int64, TextIndexStats> textStatsLogs = 17;
repeated FieldBinlog bm25logs = 18;
map<int64, JsonKeyStats> jsonKeyStatsLogs = 19;
common.LoadPriority priority = 20;
}

File diff suppressed because it is too large Load Diff