mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: add NewSegmentWithLoadInfo API to support segment self-managed loading (#45061)
This commit introduces the foundation for enabling segments to manage their own loading process by passing load information during segment creation. Changes: C++ Layer: - Add NewSegmentWithLoadInfo() C API to create segments with serialized load info - Add SetLoadInfo() method to SegmentInterface for storing load information - Refactor segment creation logic into shared CreateSegment() helper function - Add comprehensive documentation for the new API Go Layer: - Extend CreateCSegmentRequest to support optional LoadInfo field - Update segment creation in querynode to pass SegmentLoadInfo when available - Add ConvertToSegcoreSegmentLoadInfo() and helper converters for proto translation Proto Definitions: - Add segcorepb.SegmentLoadInfo message with essential loading metadata - Add supporting messages: Binlog, FieldBinlog, FieldIndexInfo, TextIndexStats, JsonKeyStats - Remove dependency on data_coord.proto by creating segcore-specific definitions Testing: - Add comprehensive unit tests for proto conversion functions - Test edge cases including nil inputs, empty data, and nil array/map elements This is the first step toward issue #45060 - enabling segments to autonomously manage their loading process, which will: - Clarify responsibilities between Go and C++ layers - Reduce cross-language call overhead - Enable precise resource management at the C++ level - Support better integration with caching layer - Enable proactive schema evolution handling Related to #45060 --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
dabbae0386
commit
36a887b38b
@ -197,6 +197,9 @@ class SegmentInterface {
|
||||
// currently it's used to sync field data list with updated schema.
|
||||
virtual void
|
||||
FinishLoad() = 0;
|
||||
|
||||
virtual void
|
||||
SetLoadInfo(const milvus::proto::segcore::SegmentLoadInfo& load_info) = 0;
|
||||
};
|
||||
|
||||
// internal API for DSL calculation
|
||||
@ -381,6 +384,12 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
FieldId field_id,
|
||||
const std::string& nested_path) const override;
|
||||
|
||||
virtual void
|
||||
SetLoadInfo(
|
||||
const milvus::proto::segcore::SegmentLoadInfo& load_info) override {
|
||||
load_info_ = load_info;
|
||||
}
|
||||
|
||||
public:
|
||||
// `query_offsets` is not null only for vector array (embedding list) search
|
||||
// where it denotes the number of vectors in each embedding list. The length
|
||||
@ -599,6 +608,8 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
// mutex protecting rw options on schema_
|
||||
std::shared_mutex sch_mutex_;
|
||||
|
||||
milvus::proto::segcore::SegmentLoadInfo load_info_;
|
||||
|
||||
mutable std::shared_mutex mutex_;
|
||||
// fieldID -> std::pair<num_rows, avg_size>
|
||||
std::unordered_map<FieldId, std::pair<int64_t, int64_t>>
|
||||
|
||||
@ -14,6 +14,8 @@
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
|
||||
#include "common/EasyAssert.h"
|
||||
#include "common/common_type_c.h"
|
||||
#include "pb/cgo_msg.pb.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
|
||||
@ -27,6 +29,7 @@
|
||||
#include "log/Log.h"
|
||||
#include "mmap/Types.h"
|
||||
#include "monitor/scope_metric.h"
|
||||
#include "pb/segcore.pb.h"
|
||||
#include "segcore/Collection.h"
|
||||
#include "segcore/SegcoreConfig.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
@ -45,6 +48,48 @@
|
||||
#include "common/GeometryCache.h"
|
||||
|
||||
////////////////////////////// common interfaces //////////////////////////////
|
||||
|
||||
/**
|
||||
* @brief Create a segment from a collection.
|
||||
* @param col The collection to create the segment from.
|
||||
* @param seg_type The type of segment to create.
|
||||
* @param segment_id The ID of the segment to create.
|
||||
* @param is_sorted_by_pk Whether the data in the sealed segment is sorted by primary key.
|
||||
* @return A unique pointer to a SegmentInterface object.
|
||||
*/
|
||||
std::unique_ptr<milvus::segcore::SegmentInterface>
|
||||
CreateSegment(milvus::segcore::Collection* col,
|
||||
SegmentType seg_type,
|
||||
int64_t segment_id,
|
||||
bool is_sorted_by_pk) {
|
||||
std::unique_ptr<milvus::segcore::SegmentInterface> segment;
|
||||
switch (seg_type) {
|
||||
case Growing: {
|
||||
auto seg = milvus::segcore::CreateGrowingSegment(
|
||||
col->get_schema(),
|
||||
col->get_index_meta(),
|
||||
segment_id,
|
||||
milvus::segcore::SegcoreConfig::default_config());
|
||||
segment = std::move(seg);
|
||||
break;
|
||||
}
|
||||
case Sealed:
|
||||
case Indexing:
|
||||
segment = milvus::segcore::CreateSealedSegment(
|
||||
col->get_schema(),
|
||||
col->get_index_meta(),
|
||||
segment_id,
|
||||
milvus::segcore::SegcoreConfig::default_config(),
|
||||
is_sorted_by_pk);
|
||||
break;
|
||||
|
||||
default:
|
||||
ThrowInfo(
|
||||
milvus::UnexpectedError, "invalid segment type: {}", seg_type);
|
||||
}
|
||||
return segment;
|
||||
}
|
||||
|
||||
CStatus
|
||||
NewSegment(CCollection collection,
|
||||
SegmentType seg_type,
|
||||
@ -56,33 +101,37 @@ NewSegment(CCollection collection,
|
||||
try {
|
||||
auto col = static_cast<milvus::segcore::Collection*>(collection);
|
||||
|
||||
std::unique_ptr<milvus::segcore::SegmentInterface> segment;
|
||||
switch (seg_type) {
|
||||
case Growing: {
|
||||
auto seg = milvus::segcore::CreateGrowingSegment(
|
||||
col->get_schema(),
|
||||
col->get_index_meta(),
|
||||
segment_id,
|
||||
milvus::segcore::SegcoreConfig::default_config());
|
||||
segment = std::move(seg);
|
||||
break;
|
||||
}
|
||||
case Sealed:
|
||||
case Indexing:
|
||||
segment = milvus::segcore::CreateSealedSegment(
|
||||
col->get_schema(),
|
||||
col->get_index_meta(),
|
||||
segment_id,
|
||||
milvus::segcore::SegcoreConfig::default_config(),
|
||||
is_sorted_by_pk);
|
||||
break;
|
||||
auto segment =
|
||||
CreateSegment(col, seg_type, segment_id, is_sorted_by_pk);
|
||||
|
||||
default:
|
||||
ThrowInfo(milvus::UnexpectedError,
|
||||
"invalid segment type: {}",
|
||||
seg_type);
|
||||
}
|
||||
*newSegment = segment.release();
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(&e);
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
NewSegmentWithLoadInfo(CCollection collection,
|
||||
SegmentType seg_type,
|
||||
int64_t segment_id,
|
||||
CSegmentInterface* newSegment,
|
||||
bool is_sorted_by_pk,
|
||||
const uint8_t* load_info_blob,
|
||||
const int64_t load_info_length) {
|
||||
SCOPE_CGO_CALL_METRIC();
|
||||
|
||||
try {
|
||||
AssertInfo(load_info_blob, "load info is null");
|
||||
milvus::proto::segcore::SegmentLoadInfo load_info;
|
||||
auto suc = load_info.ParseFromArray(load_info_blob, load_info_length);
|
||||
AssertInfo(suc, "unmarshal load info failed");
|
||||
|
||||
auto col = static_cast<milvus::segcore::Collection*>(collection);
|
||||
|
||||
auto segment =
|
||||
CreateSegment(col, seg_type, segment_id, is_sorted_by_pk);
|
||||
segment->SetLoadInfo(load_info);
|
||||
*newSegment = segment.release();
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
|
||||
@ -36,6 +36,27 @@ NewSegment(CCollection collection,
|
||||
CSegmentInterface* newSegment,
|
||||
bool is_sorted_by_pk);
|
||||
|
||||
// Create a new segment with pre-loaded segment information.
|
||||
// This function creates a segment and initializes it with serialized load info,
|
||||
// which can include precomputed metadata, statistics, or configuration data.
|
||||
//
|
||||
// @param collection: The collection that this segment belongs to
|
||||
// @param seg_type: Type of the segment (growing, sealed, etc.)
|
||||
// @param segment_id: Unique identifier for this segment
|
||||
// @param newSegment: Output parameter for the created segment interface
|
||||
// @param is_sorted_by_pk: Whether the segment data is sorted by primary key
|
||||
// @param load_info_blob: Serialized load information blob
|
||||
// @param load_info_length: Length of the load_info_blob in bytes
|
||||
// @return CStatus indicating success or failure
|
||||
CStatus
|
||||
NewSegmentWithLoadInfo(CCollection collection,
|
||||
SegmentType seg_type,
|
||||
int64_t segment_id,
|
||||
CSegmentInterface* newSegment,
|
||||
bool is_sorted_by_pk,
|
||||
const uint8_t* load_info_blob,
|
||||
const int64_t load_info_length);
|
||||
|
||||
void
|
||||
DeleteSegment(CSegmentInterface c_segment);
|
||||
|
||||
|
||||
@ -370,6 +370,7 @@ func NewSegment(ctx context.Context,
|
||||
SegmentID: loadInfo.GetSegmentID(),
|
||||
SegmentType: segmentType,
|
||||
IsSorted: loadInfo.GetIsSorted(),
|
||||
LoadInfo: loadInfo,
|
||||
})
|
||||
return nil, err
|
||||
}).Await(); err != nil {
|
||||
|
||||
@ -23,6 +23,9 @@ import (
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/cgo"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||
)
|
||||
|
||||
@ -42,6 +45,7 @@ type CreateCSegmentRequest struct {
|
||||
SegmentID int64
|
||||
SegmentType SegmentType
|
||||
IsSorted bool
|
||||
LoadInfo *querypb.SegmentLoadInfo
|
||||
}
|
||||
|
||||
func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
|
||||
@ -60,7 +64,17 @@ func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
|
||||
// CreateCSegment creates a segment from a CreateCSegmentRequest.
|
||||
func CreateCSegment(req *CreateCSegmentRequest) (CSegment, error) {
|
||||
var ptr C.CSegmentInterface
|
||||
status := C.NewSegment(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted))
|
||||
var status C.CStatus
|
||||
if req.LoadInfo != nil {
|
||||
loadInfoBlob, err := proto.Marshal(req.LoadInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
status = C.NewSegmentWithLoadInfo(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted), (*C.uint8_t)(unsafe.Pointer(&loadInfoBlob[0])), C.int64_t(len(loadInfoBlob)))
|
||||
} else {
|
||||
status = C.NewSegment(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted))
|
||||
}
|
||||
if err := ConsumeCStatusIntoError(&status); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -317,3 +331,160 @@ func (s *cSegmentImpl) DropJSONIndex(ctx context.Context, fieldID int64, nestedP
|
||||
func (s *cSegmentImpl) Release() {
|
||||
C.DeleteSegment(s.ptr)
|
||||
}
|
||||
|
||||
// ConvertToSegcoreSegmentLoadInfo converts querypb.SegmentLoadInfo to segcorepb.SegmentLoadInfo.
|
||||
// This function is needed because segcorepb.SegmentLoadInfo is a simplified version that doesn't
|
||||
// depend on data_coord.proto and excludes fields like start_position, delta_position, and level.
|
||||
func ConvertToSegcoreSegmentLoadInfo(src *querypb.SegmentLoadInfo) *segcorepb.SegmentLoadInfo {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &segcorepb.SegmentLoadInfo{
|
||||
SegmentID: src.GetSegmentID(),
|
||||
PartitionID: src.GetPartitionID(),
|
||||
CollectionID: src.GetCollectionID(),
|
||||
DbID: src.GetDbID(),
|
||||
FlushTime: src.GetFlushTime(),
|
||||
BinlogPaths: convertFieldBinlogs(src.GetBinlogPaths()),
|
||||
NumOfRows: src.GetNumOfRows(),
|
||||
Statslogs: convertFieldBinlogs(src.GetStatslogs()),
|
||||
Deltalogs: convertFieldBinlogs(src.GetDeltalogs()),
|
||||
CompactionFrom: src.GetCompactionFrom(),
|
||||
IndexInfos: convertFieldIndexInfos(src.GetIndexInfos()),
|
||||
SegmentSize: src.GetSegmentSize(),
|
||||
InsertChannel: src.GetInsertChannel(),
|
||||
ReadableVersion: src.GetReadableVersion(),
|
||||
StorageVersion: src.GetStorageVersion(),
|
||||
IsSorted: src.GetIsSorted(),
|
||||
TextStatsLogs: convertTextIndexStats(src.GetTextStatsLogs()),
|
||||
Bm25Logs: convertFieldBinlogs(src.GetBm25Logs()),
|
||||
JsonKeyStatsLogs: convertJSONKeyStats(src.GetJsonKeyStatsLogs()),
|
||||
Priority: src.GetPriority(),
|
||||
}
|
||||
}
|
||||
|
||||
// convertFieldBinlogs converts datapb.FieldBinlog to segcorepb.FieldBinlog.
|
||||
func convertFieldBinlogs(src []*datapb.FieldBinlog) []*segcorepb.FieldBinlog {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make([]*segcorepb.FieldBinlog, 0, len(src))
|
||||
for _, fb := range src {
|
||||
if fb == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
result = append(result, &segcorepb.FieldBinlog{
|
||||
FieldID: fb.GetFieldID(),
|
||||
Binlogs: convertBinlogs(fb.GetBinlogs()),
|
||||
ChildFields: fb.GetChildFields(),
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// convertBinlogs converts datapb.Binlog to segcorepb.Binlog.
|
||||
func convertBinlogs(src []*datapb.Binlog) []*segcorepb.Binlog {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make([]*segcorepb.Binlog, 0, len(src))
|
||||
for _, b := range src {
|
||||
if b == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
result = append(result, &segcorepb.Binlog{
|
||||
EntriesNum: b.GetEntriesNum(),
|
||||
TimestampFrom: b.GetTimestampFrom(),
|
||||
TimestampTo: b.GetTimestampTo(),
|
||||
LogPath: b.GetLogPath(),
|
||||
LogSize: b.GetLogSize(),
|
||||
LogID: b.GetLogID(),
|
||||
MemorySize: b.GetMemorySize(),
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// convertFieldIndexInfos converts querypb.FieldIndexInfo to segcorepb.FieldIndexInfo.
|
||||
func convertFieldIndexInfos(src []*querypb.FieldIndexInfo) []*segcorepb.FieldIndexInfo {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make([]*segcorepb.FieldIndexInfo, 0, len(src))
|
||||
for _, fii := range src {
|
||||
if fii == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
result = append(result, &segcorepb.FieldIndexInfo{
|
||||
FieldID: fii.GetFieldID(),
|
||||
EnableIndex: fii.GetEnableIndex(),
|
||||
IndexName: fii.GetIndexName(),
|
||||
IndexID: fii.GetIndexID(),
|
||||
BuildID: fii.GetBuildID(),
|
||||
IndexParams: fii.GetIndexParams(),
|
||||
IndexFilePaths: fii.GetIndexFilePaths(),
|
||||
IndexSize: fii.GetIndexSize(),
|
||||
IndexVersion: fii.GetIndexVersion(),
|
||||
NumRows: fii.GetNumRows(),
|
||||
CurrentIndexVersion: fii.GetCurrentIndexVersion(),
|
||||
IndexStoreVersion: fii.GetIndexStoreVersion(),
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// convertTextIndexStats converts datapb.TextIndexStats to segcorepb.TextIndexStats.
|
||||
func convertTextIndexStats(src map[int64]*datapb.TextIndexStats) map[int64]*segcorepb.TextIndexStats {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make(map[int64]*segcorepb.TextIndexStats, len(src))
|
||||
for k, v := range src {
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
result[k] = &segcorepb.TextIndexStats{
|
||||
FieldID: v.GetFieldID(),
|
||||
Version: v.GetVersion(),
|
||||
Files: v.GetFiles(),
|
||||
LogSize: v.GetLogSize(),
|
||||
MemorySize: v.GetMemorySize(),
|
||||
BuildID: v.GetBuildID(),
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// convertJSONKeyStats converts datapb.JsonKeyStats to segcorepb.JsonKeyStats.
|
||||
func convertJSONKeyStats(src map[int64]*datapb.JsonKeyStats) map[int64]*segcorepb.JsonKeyStats {
|
||||
if src == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make(map[int64]*segcorepb.JsonKeyStats, len(src))
|
||||
for k, v := range src {
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
result[k] = &segcorepb.JsonKeyStats{
|
||||
FieldID: v.GetFieldID(),
|
||||
Version: v.GetVersion(),
|
||||
Files: v.GetFiles(),
|
||||
LogSize: v.GetLogSize(),
|
||||
MemorySize: v.GetMemorySize(),
|
||||
BuildID: v.GetBuildID(),
|
||||
JsonKeyStatsDataFormat: v.GetJsonKeyStatsDataFormat(),
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@ -8,12 +8,15 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/mocks/util/mock_segcore"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/util/initcore"
|
||||
"github.com/milvus-io/milvus/internal/util/segcore"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||
@ -139,3 +142,260 @@ func assertEqualCount(
|
||||
assert.NotNil(t, retrieveResult2)
|
||||
retrieveResult2.Release()
|
||||
}
|
||||
|
||||
func TestConvertToSegcoreSegmentLoadInfo(t *testing.T) {
|
||||
t.Run("nil input", func(t *testing.T) {
|
||||
result := segcore.ConvertToSegcoreSegmentLoadInfo(nil)
|
||||
assert.Nil(t, result)
|
||||
})
|
||||
|
||||
t.Run("empty input", func(t *testing.T) {
|
||||
src := &querypb.SegmentLoadInfo{}
|
||||
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
|
||||
assert.NotNil(t, result)
|
||||
assert.Equal(t, int64(0), result.SegmentID)
|
||||
assert.Equal(t, int64(0), result.PartitionID)
|
||||
assert.Equal(t, int64(0), result.CollectionID)
|
||||
})
|
||||
|
||||
t.Run("full conversion", func(t *testing.T) {
|
||||
// Create source querypb.SegmentLoadInfo with all fields populated
|
||||
src := &querypb.SegmentLoadInfo{
|
||||
SegmentID: 1001,
|
||||
PartitionID: 2001,
|
||||
CollectionID: 3001,
|
||||
DbID: 4001,
|
||||
FlushTime: 5001,
|
||||
BinlogPaths: []*datapb.FieldBinlog{
|
||||
{
|
||||
FieldID: 100,
|
||||
Binlogs: []*datapb.Binlog{
|
||||
{
|
||||
EntriesNum: 10,
|
||||
TimestampFrom: 1000,
|
||||
TimestampTo: 2000,
|
||||
LogPath: "/path/to/binlog",
|
||||
LogSize: 1024,
|
||||
LogID: 9001,
|
||||
MemorySize: 2048,
|
||||
},
|
||||
},
|
||||
ChildFields: []int64{101, 102},
|
||||
},
|
||||
},
|
||||
NumOfRows: 1000,
|
||||
Statslogs: []*datapb.FieldBinlog{
|
||||
{
|
||||
FieldID: 200,
|
||||
Binlogs: []*datapb.Binlog{
|
||||
{
|
||||
EntriesNum: 5,
|
||||
TimestampFrom: 1500,
|
||||
TimestampTo: 2500,
|
||||
LogPath: "/path/to/statslog",
|
||||
LogSize: 512,
|
||||
LogID: 9002,
|
||||
MemorySize: 1024,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Deltalogs: []*datapb.FieldBinlog{
|
||||
{
|
||||
FieldID: 300,
|
||||
Binlogs: []*datapb.Binlog{
|
||||
{
|
||||
EntriesNum: 3,
|
||||
TimestampFrom: 2000,
|
||||
TimestampTo: 3000,
|
||||
LogPath: "/path/to/deltalog",
|
||||
LogSize: 256,
|
||||
LogID: 9003,
|
||||
MemorySize: 512,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
CompactionFrom: []int64{8001, 8002},
|
||||
IndexInfos: []*querypb.FieldIndexInfo{
|
||||
{
|
||||
FieldID: 100,
|
||||
EnableIndex: true,
|
||||
IndexName: "test_index",
|
||||
IndexID: 7001,
|
||||
BuildID: 7002,
|
||||
IndexParams: []*commonpb.KeyValuePair{{Key: "index_type", Value: "HNSW"}},
|
||||
IndexFilePaths: []string{"/path/to/index"},
|
||||
IndexSize: 4096,
|
||||
IndexVersion: 1,
|
||||
NumRows: 1000,
|
||||
CurrentIndexVersion: 2,
|
||||
IndexStoreVersion: 3,
|
||||
},
|
||||
},
|
||||
SegmentSize: 8192,
|
||||
InsertChannel: "insert_channel_1",
|
||||
ReadableVersion: 6001,
|
||||
StorageVersion: 7001,
|
||||
IsSorted: true,
|
||||
TextStatsLogs: map[int64]*datapb.TextIndexStats{
|
||||
400: {
|
||||
FieldID: 400,
|
||||
Version: 1,
|
||||
Files: []string{"/path/to/text/stats1", "/path/to/text/stats2"},
|
||||
LogSize: 2048,
|
||||
MemorySize: 4096,
|
||||
BuildID: 9101,
|
||||
},
|
||||
},
|
||||
Bm25Logs: []*datapb.FieldBinlog{
|
||||
{
|
||||
FieldID: 500,
|
||||
Binlogs: []*datapb.Binlog{
|
||||
{
|
||||
EntriesNum: 7,
|
||||
TimestampFrom: 3000,
|
||||
TimestampTo: 4000,
|
||||
LogPath: "/path/to/bm25log",
|
||||
LogSize: 768,
|
||||
LogID: 9004,
|
||||
MemorySize: 1536,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
JsonKeyStatsLogs: map[int64]*datapb.JsonKeyStats{
|
||||
600: {
|
||||
FieldID: 600,
|
||||
Version: 2,
|
||||
Files: []string{"/path/to/json/stats"},
|
||||
LogSize: 1024,
|
||||
MemorySize: 2048,
|
||||
BuildID: 9201,
|
||||
JsonKeyStatsDataFormat: 1,
|
||||
},
|
||||
},
|
||||
Priority: commonpb.LoadPriority_HIGH,
|
||||
}
|
||||
|
||||
// Convert to segcorepb.SegmentLoadInfo
|
||||
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
|
||||
|
||||
// Validate basic fields
|
||||
assert.NotNil(t, result)
|
||||
assert.Equal(t, src.SegmentID, result.SegmentID)
|
||||
assert.Equal(t, src.PartitionID, result.PartitionID)
|
||||
assert.Equal(t, src.CollectionID, result.CollectionID)
|
||||
assert.Equal(t, src.DbID, result.DbID)
|
||||
assert.Equal(t, src.FlushTime, result.FlushTime)
|
||||
assert.Equal(t, src.NumOfRows, result.NumOfRows)
|
||||
assert.Equal(t, src.SegmentSize, result.SegmentSize)
|
||||
assert.Equal(t, src.InsertChannel, result.InsertChannel)
|
||||
assert.Equal(t, src.ReadableVersion, result.ReadableVersion)
|
||||
assert.Equal(t, src.StorageVersion, result.StorageVersion)
|
||||
assert.Equal(t, src.IsSorted, result.IsSorted)
|
||||
assert.Equal(t, src.Priority, result.Priority)
|
||||
assert.Equal(t, src.CompactionFrom, result.CompactionFrom)
|
||||
|
||||
// Validate BinlogPaths conversion
|
||||
assert.Equal(t, len(src.BinlogPaths), len(result.BinlogPaths))
|
||||
assert.Equal(t, src.BinlogPaths[0].FieldID, result.BinlogPaths[0].FieldID)
|
||||
assert.Equal(t, len(src.BinlogPaths[0].Binlogs), len(result.BinlogPaths[0].Binlogs))
|
||||
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].EntriesNum, result.BinlogPaths[0].Binlogs[0].EntriesNum)
|
||||
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].TimestampFrom, result.BinlogPaths[0].Binlogs[0].TimestampFrom)
|
||||
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].TimestampTo, result.BinlogPaths[0].Binlogs[0].TimestampTo)
|
||||
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogPath, result.BinlogPaths[0].Binlogs[0].LogPath)
|
||||
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogSize, result.BinlogPaths[0].Binlogs[0].LogSize)
|
||||
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogID, result.BinlogPaths[0].Binlogs[0].LogID)
|
||||
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].MemorySize, result.BinlogPaths[0].Binlogs[0].MemorySize)
|
||||
assert.Equal(t, src.BinlogPaths[0].ChildFields, result.BinlogPaths[0].ChildFields)
|
||||
|
||||
// Validate Statslogs conversion
|
||||
assert.Equal(t, len(src.Statslogs), len(result.Statslogs))
|
||||
assert.Equal(t, src.Statslogs[0].FieldID, result.Statslogs[0].FieldID)
|
||||
|
||||
// Validate Deltalogs conversion
|
||||
assert.Equal(t, len(src.Deltalogs), len(result.Deltalogs))
|
||||
assert.Equal(t, src.Deltalogs[0].FieldID, result.Deltalogs[0].FieldID)
|
||||
|
||||
// Validate IndexInfos conversion
|
||||
assert.Equal(t, len(src.IndexInfos), len(result.IndexInfos))
|
||||
assert.Equal(t, src.IndexInfos[0].FieldID, result.IndexInfos[0].FieldID)
|
||||
assert.Equal(t, src.IndexInfos[0].EnableIndex, result.IndexInfos[0].EnableIndex)
|
||||
assert.Equal(t, src.IndexInfos[0].IndexName, result.IndexInfos[0].IndexName)
|
||||
assert.Equal(t, src.IndexInfos[0].IndexID, result.IndexInfos[0].IndexID)
|
||||
assert.Equal(t, src.IndexInfos[0].BuildID, result.IndexInfos[0].BuildID)
|
||||
assert.Equal(t, len(src.IndexInfos[0].IndexParams), len(result.IndexInfos[0].IndexParams))
|
||||
assert.Equal(t, src.IndexInfos[0].IndexFilePaths, result.IndexInfos[0].IndexFilePaths)
|
||||
assert.Equal(t, src.IndexInfos[0].IndexSize, result.IndexInfos[0].IndexSize)
|
||||
assert.Equal(t, src.IndexInfos[0].IndexVersion, result.IndexInfos[0].IndexVersion)
|
||||
assert.Equal(t, src.IndexInfos[0].NumRows, result.IndexInfos[0].NumRows)
|
||||
assert.Equal(t, src.IndexInfos[0].CurrentIndexVersion, result.IndexInfos[0].CurrentIndexVersion)
|
||||
assert.Equal(t, src.IndexInfos[0].IndexStoreVersion, result.IndexInfos[0].IndexStoreVersion)
|
||||
|
||||
// Validate TextStatsLogs conversion
|
||||
assert.Equal(t, len(src.TextStatsLogs), len(result.TextStatsLogs))
|
||||
textStats := result.TextStatsLogs[400]
|
||||
assert.NotNil(t, textStats)
|
||||
assert.Equal(t, src.TextStatsLogs[400].FieldID, textStats.FieldID)
|
||||
assert.Equal(t, src.TextStatsLogs[400].Version, textStats.Version)
|
||||
assert.Equal(t, src.TextStatsLogs[400].Files, textStats.Files)
|
||||
assert.Equal(t, src.TextStatsLogs[400].LogSize, textStats.LogSize)
|
||||
assert.Equal(t, src.TextStatsLogs[400].MemorySize, textStats.MemorySize)
|
||||
assert.Equal(t, src.TextStatsLogs[400].BuildID, textStats.BuildID)
|
||||
|
||||
// Validate Bm25Logs conversion
|
||||
assert.Equal(t, len(src.Bm25Logs), len(result.Bm25Logs))
|
||||
assert.Equal(t, src.Bm25Logs[0].FieldID, result.Bm25Logs[0].FieldID)
|
||||
|
||||
// Validate JsonKeyStatsLogs conversion
|
||||
assert.Equal(t, len(src.JsonKeyStatsLogs), len(result.JsonKeyStatsLogs))
|
||||
jsonStats := result.JsonKeyStatsLogs[600]
|
||||
assert.NotNil(t, jsonStats)
|
||||
assert.Equal(t, src.JsonKeyStatsLogs[600].FieldID, jsonStats.FieldID)
|
||||
assert.Equal(t, src.JsonKeyStatsLogs[600].Version, jsonStats.Version)
|
||||
assert.Equal(t, src.JsonKeyStatsLogs[600].Files, jsonStats.Files)
|
||||
assert.Equal(t, src.JsonKeyStatsLogs[600].LogSize, jsonStats.LogSize)
|
||||
assert.Equal(t, src.JsonKeyStatsLogs[600].MemorySize, jsonStats.MemorySize)
|
||||
assert.Equal(t, src.JsonKeyStatsLogs[600].BuildID, jsonStats.BuildID)
|
||||
assert.Equal(t, src.JsonKeyStatsLogs[600].JsonKeyStatsDataFormat, jsonStats.JsonKeyStatsDataFormat)
|
||||
})
|
||||
|
||||
t.Run("nil elements in arrays and maps", func(t *testing.T) {
|
||||
src := &querypb.SegmentLoadInfo{
|
||||
SegmentID: 1001,
|
||||
BinlogPaths: []*datapb.FieldBinlog{
|
||||
nil, // nil element should be skipped
|
||||
{FieldID: 100},
|
||||
},
|
||||
Statslogs: []*datapb.FieldBinlog{
|
||||
nil,
|
||||
},
|
||||
IndexInfos: []*querypb.FieldIndexInfo{
|
||||
nil,
|
||||
{FieldID: 200},
|
||||
},
|
||||
TextStatsLogs: map[int64]*datapb.TextIndexStats{
|
||||
100: nil, // nil value should be skipped
|
||||
200: {FieldID: 200},
|
||||
},
|
||||
JsonKeyStatsLogs: map[int64]*datapb.JsonKeyStats{
|
||||
300: nil,
|
||||
400: {FieldID: 400},
|
||||
},
|
||||
}
|
||||
|
||||
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
|
||||
|
||||
assert.NotNil(t, result)
|
||||
assert.Equal(t, 1, len(result.BinlogPaths))
|
||||
assert.Equal(t, int64(100), result.BinlogPaths[0].FieldID)
|
||||
assert.Equal(t, 0, len(result.Statslogs))
|
||||
assert.Equal(t, 1, len(result.IndexInfos))
|
||||
assert.Equal(t, int64(200), result.IndexInfos[0].FieldID)
|
||||
assert.Equal(t, 1, len(result.TextStatsLogs))
|
||||
assert.NotNil(t, result.TextStatsLogs[200])
|
||||
assert.Equal(t, 1, len(result.JsonKeyStatsLogs))
|
||||
assert.NotNil(t, result.JsonKeyStatsLogs[400])
|
||||
})
|
||||
}
|
||||
|
||||
@ -5,6 +5,41 @@ option go_package = "github.com/milvus-io/milvus/pkg/v2/proto/segcorepb";
|
||||
import "schema.proto";
|
||||
import "common.proto";
|
||||
|
||||
message Binlog {
|
||||
int64 entries_num = 1;
|
||||
uint64 timestamp_from = 2;
|
||||
uint64 timestamp_to = 3;
|
||||
string log_path = 4;
|
||||
int64 log_size = 5;
|
||||
int64 logID = 6;
|
||||
int64 memory_size = 7;
|
||||
}
|
||||
|
||||
message FieldBinlog {
|
||||
int64 fieldID = 1;
|
||||
repeated Binlog binlogs = 2;
|
||||
repeated int64 child_fields = 3;
|
||||
}
|
||||
|
||||
message TextIndexStats {
|
||||
int64 fieldID = 1;
|
||||
int64 version = 2;
|
||||
repeated string files = 3;
|
||||
int64 log_size = 4;
|
||||
int64 memory_size = 5;
|
||||
int64 buildID = 6;
|
||||
}
|
||||
|
||||
message JsonKeyStats {
|
||||
int64 fieldID = 1;
|
||||
int64 version = 2;
|
||||
repeated string files = 3;
|
||||
int64 log_size = 4;
|
||||
int64 memory_size = 5;
|
||||
int64 buildID = 6;
|
||||
int64 json_key_stats_data_format = 7;
|
||||
}
|
||||
|
||||
message RetrieveResults {
|
||||
schema.IDs ids = 1;
|
||||
repeated int64 offset = 2;
|
||||
@ -46,3 +81,41 @@ message CollectionIndexMeta {
|
||||
int64 maxIndexRowCount = 1;
|
||||
repeated FieldIndexMeta index_metas = 2;
|
||||
}
|
||||
|
||||
message FieldIndexInfo {
|
||||
int64 fieldID = 1;
|
||||
bool enable_index = 2 [deprecated = true];
|
||||
string index_name = 3;
|
||||
int64 indexID = 4;
|
||||
int64 buildID = 5;
|
||||
repeated common.KeyValuePair index_params = 6;
|
||||
repeated string index_file_paths = 7;
|
||||
int64 index_size = 8;
|
||||
int64 index_version = 9;
|
||||
int64 num_rows = 10;
|
||||
int32 current_index_version = 11;
|
||||
int64 index_store_version = 12;
|
||||
}
|
||||
|
||||
message SegmentLoadInfo {
|
||||
int64 segmentID = 1;
|
||||
int64 partitionID = 2;
|
||||
int64 collectionID = 3;
|
||||
int64 dbID = 4;
|
||||
int64 flush_time = 5;
|
||||
repeated FieldBinlog binlog_paths = 6;
|
||||
int64 num_of_rows = 7;
|
||||
repeated FieldBinlog statslogs = 8;
|
||||
repeated FieldBinlog deltalogs = 9;
|
||||
repeated int64 compactionFrom = 10; // segmentIDs compacted from
|
||||
repeated FieldIndexInfo index_infos = 11;
|
||||
int64 segment_size = 12 [deprecated = true];
|
||||
string insert_channel = 13;
|
||||
int64 readableVersion = 14;
|
||||
int64 storageVersion = 15;
|
||||
bool is_sorted = 16;
|
||||
map<int64, TextIndexStats> textStatsLogs = 17;
|
||||
repeated FieldBinlog bm25logs = 18;
|
||||
map<int64, JsonKeyStats> jsonKeyStatsLogs = 19;
|
||||
common.LoadPriority priority = 20;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user