mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
enhance: add NewSegmentWithLoadInfo API to support segment self-managed loading (#45061)
This commit introduces the foundation for enabling segments to manage their own loading process by passing load information during segment creation. Changes: C++ Layer: - Add NewSegmentWithLoadInfo() C API to create segments with serialized load info - Add SetLoadInfo() method to SegmentInterface for storing load information - Refactor segment creation logic into shared CreateSegment() helper function - Add comprehensive documentation for the new API Go Layer: - Extend CreateCSegmentRequest to support optional LoadInfo field - Update segment creation in querynode to pass SegmentLoadInfo when available - Add ConvertToSegcoreSegmentLoadInfo() and helper converters for proto translation Proto Definitions: - Add segcorepb.SegmentLoadInfo message with essential loading metadata - Add supporting messages: Binlog, FieldBinlog, FieldIndexInfo, TextIndexStats, JsonKeyStats - Remove dependency on data_coord.proto by creating segcore-specific definitions Testing: - Add comprehensive unit tests for proto conversion functions - Test edge cases including nil inputs, empty data, and nil array/map elements This is the first step toward issue #45060 - enabling segments to autonomously manage their loading process, which will: - Clarify responsibilities between Go and C++ layers - Reduce cross-language call overhead - Enable precise resource management at the C++ level - Support better integration with caching layer - Enable proactive schema evolution handling Related to #45060 --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
dabbae0386
commit
36a887b38b
@ -197,6 +197,9 @@ class SegmentInterface {
|
|||||||
// currently it's used to sync field data list with updated schema.
|
// currently it's used to sync field data list with updated schema.
|
||||||
virtual void
|
virtual void
|
||||||
FinishLoad() = 0;
|
FinishLoad() = 0;
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
SetLoadInfo(const milvus::proto::segcore::SegmentLoadInfo& load_info) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// internal API for DSL calculation
|
// internal API for DSL calculation
|
||||||
@ -381,6 +384,12 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
FieldId field_id,
|
FieldId field_id,
|
||||||
const std::string& nested_path) const override;
|
const std::string& nested_path) const override;
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
SetLoadInfo(
|
||||||
|
const milvus::proto::segcore::SegmentLoadInfo& load_info) override {
|
||||||
|
load_info_ = load_info;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// `query_offsets` is not null only for vector array (embedding list) search
|
// `query_offsets` is not null only for vector array (embedding list) search
|
||||||
// where it denotes the number of vectors in each embedding list. The length
|
// where it denotes the number of vectors in each embedding list. The length
|
||||||
@ -599,6 +608,8 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
// mutex protecting rw options on schema_
|
// mutex protecting rw options on schema_
|
||||||
std::shared_mutex sch_mutex_;
|
std::shared_mutex sch_mutex_;
|
||||||
|
|
||||||
|
milvus::proto::segcore::SegmentLoadInfo load_info_;
|
||||||
|
|
||||||
mutable std::shared_mutex mutex_;
|
mutable std::shared_mutex mutex_;
|
||||||
// fieldID -> std::pair<num_rows, avg_size>
|
// fieldID -> std::pair<num_rows, avg_size>
|
||||||
std::unordered_map<FieldId, std::pair<int64_t, int64_t>>
|
std::unordered_map<FieldId, std::pair<int64_t, int64_t>>
|
||||||
|
|||||||
@ -14,6 +14,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
|
#include "common/EasyAssert.h"
|
||||||
|
#include "common/common_type_c.h"
|
||||||
#include "pb/cgo_msg.pb.h"
|
#include "pb/cgo_msg.pb.h"
|
||||||
#include "pb/index_cgo_msg.pb.h"
|
#include "pb/index_cgo_msg.pb.h"
|
||||||
|
|
||||||
@ -27,6 +29,7 @@
|
|||||||
#include "log/Log.h"
|
#include "log/Log.h"
|
||||||
#include "mmap/Types.h"
|
#include "mmap/Types.h"
|
||||||
#include "monitor/scope_metric.h"
|
#include "monitor/scope_metric.h"
|
||||||
|
#include "pb/segcore.pb.h"
|
||||||
#include "segcore/Collection.h"
|
#include "segcore/Collection.h"
|
||||||
#include "segcore/SegcoreConfig.h"
|
#include "segcore/SegcoreConfig.h"
|
||||||
#include "segcore/SegmentGrowingImpl.h"
|
#include "segcore/SegmentGrowingImpl.h"
|
||||||
@ -45,6 +48,48 @@
|
|||||||
#include "common/GeometryCache.h"
|
#include "common/GeometryCache.h"
|
||||||
|
|
||||||
////////////////////////////// common interfaces //////////////////////////////
|
////////////////////////////// common interfaces //////////////////////////////
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Create a segment from a collection.
|
||||||
|
* @param col The collection to create the segment from.
|
||||||
|
* @param seg_type The type of segment to create.
|
||||||
|
* @param segment_id The ID of the segment to create.
|
||||||
|
* @param is_sorted_by_pk Whether the data in the sealed segment is sorted by primary key.
|
||||||
|
* @return A unique pointer to a SegmentInterface object.
|
||||||
|
*/
|
||||||
|
std::unique_ptr<milvus::segcore::SegmentInterface>
|
||||||
|
CreateSegment(milvus::segcore::Collection* col,
|
||||||
|
SegmentType seg_type,
|
||||||
|
int64_t segment_id,
|
||||||
|
bool is_sorted_by_pk) {
|
||||||
|
std::unique_ptr<milvus::segcore::SegmentInterface> segment;
|
||||||
|
switch (seg_type) {
|
||||||
|
case Growing: {
|
||||||
|
auto seg = milvus::segcore::CreateGrowingSegment(
|
||||||
|
col->get_schema(),
|
||||||
|
col->get_index_meta(),
|
||||||
|
segment_id,
|
||||||
|
milvus::segcore::SegcoreConfig::default_config());
|
||||||
|
segment = std::move(seg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Sealed:
|
||||||
|
case Indexing:
|
||||||
|
segment = milvus::segcore::CreateSealedSegment(
|
||||||
|
col->get_schema(),
|
||||||
|
col->get_index_meta(),
|
||||||
|
segment_id,
|
||||||
|
milvus::segcore::SegcoreConfig::default_config(),
|
||||||
|
is_sorted_by_pk);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ThrowInfo(
|
||||||
|
milvus::UnexpectedError, "invalid segment type: {}", seg_type);
|
||||||
|
}
|
||||||
|
return segment;
|
||||||
|
}
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
NewSegment(CCollection collection,
|
NewSegment(CCollection collection,
|
||||||
SegmentType seg_type,
|
SegmentType seg_type,
|
||||||
@ -56,33 +101,37 @@ NewSegment(CCollection collection,
|
|||||||
try {
|
try {
|
||||||
auto col = static_cast<milvus::segcore::Collection*>(collection);
|
auto col = static_cast<milvus::segcore::Collection*>(collection);
|
||||||
|
|
||||||
std::unique_ptr<milvus::segcore::SegmentInterface> segment;
|
auto segment =
|
||||||
switch (seg_type) {
|
CreateSegment(col, seg_type, segment_id, is_sorted_by_pk);
|
||||||
case Growing: {
|
|
||||||
auto seg = milvus::segcore::CreateGrowingSegment(
|
|
||||||
col->get_schema(),
|
|
||||||
col->get_index_meta(),
|
|
||||||
segment_id,
|
|
||||||
milvus::segcore::SegcoreConfig::default_config());
|
|
||||||
segment = std::move(seg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Sealed:
|
|
||||||
case Indexing:
|
|
||||||
segment = milvus::segcore::CreateSealedSegment(
|
|
||||||
col->get_schema(),
|
|
||||||
col->get_index_meta(),
|
|
||||||
segment_id,
|
|
||||||
milvus::segcore::SegcoreConfig::default_config(),
|
|
||||||
is_sorted_by_pk);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
*newSegment = segment.release();
|
||||||
ThrowInfo(milvus::UnexpectedError,
|
return milvus::SuccessCStatus();
|
||||||
"invalid segment type: {}",
|
} catch (std::exception& e) {
|
||||||
seg_type);
|
return milvus::FailureCStatus(&e);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CStatus
|
||||||
|
NewSegmentWithLoadInfo(CCollection collection,
|
||||||
|
SegmentType seg_type,
|
||||||
|
int64_t segment_id,
|
||||||
|
CSegmentInterface* newSegment,
|
||||||
|
bool is_sorted_by_pk,
|
||||||
|
const uint8_t* load_info_blob,
|
||||||
|
const int64_t load_info_length) {
|
||||||
|
SCOPE_CGO_CALL_METRIC();
|
||||||
|
|
||||||
|
try {
|
||||||
|
AssertInfo(load_info_blob, "load info is null");
|
||||||
|
milvus::proto::segcore::SegmentLoadInfo load_info;
|
||||||
|
auto suc = load_info.ParseFromArray(load_info_blob, load_info_length);
|
||||||
|
AssertInfo(suc, "unmarshal load info failed");
|
||||||
|
|
||||||
|
auto col = static_cast<milvus::segcore::Collection*>(collection);
|
||||||
|
|
||||||
|
auto segment =
|
||||||
|
CreateSegment(col, seg_type, segment_id, is_sorted_by_pk);
|
||||||
|
segment->SetLoadInfo(load_info);
|
||||||
*newSegment = segment.release();
|
*newSegment = segment.release();
|
||||||
return milvus::SuccessCStatus();
|
return milvus::SuccessCStatus();
|
||||||
} catch (std::exception& e) {
|
} catch (std::exception& e) {
|
||||||
|
|||||||
@ -36,6 +36,27 @@ NewSegment(CCollection collection,
|
|||||||
CSegmentInterface* newSegment,
|
CSegmentInterface* newSegment,
|
||||||
bool is_sorted_by_pk);
|
bool is_sorted_by_pk);
|
||||||
|
|
||||||
|
// Create a new segment with pre-loaded segment information.
|
||||||
|
// This function creates a segment and initializes it with serialized load info,
|
||||||
|
// which can include precomputed metadata, statistics, or configuration data.
|
||||||
|
//
|
||||||
|
// @param collection: The collection that this segment belongs to
|
||||||
|
// @param seg_type: Type of the segment (growing, sealed, etc.)
|
||||||
|
// @param segment_id: Unique identifier for this segment
|
||||||
|
// @param newSegment: Output parameter for the created segment interface
|
||||||
|
// @param is_sorted_by_pk: Whether the segment data is sorted by primary key
|
||||||
|
// @param load_info_blob: Serialized load information blob
|
||||||
|
// @param load_info_length: Length of the load_info_blob in bytes
|
||||||
|
// @return CStatus indicating success or failure
|
||||||
|
CStatus
|
||||||
|
NewSegmentWithLoadInfo(CCollection collection,
|
||||||
|
SegmentType seg_type,
|
||||||
|
int64_t segment_id,
|
||||||
|
CSegmentInterface* newSegment,
|
||||||
|
bool is_sorted_by_pk,
|
||||||
|
const uint8_t* load_info_blob,
|
||||||
|
const int64_t load_info_length);
|
||||||
|
|
||||||
void
|
void
|
||||||
DeleteSegment(CSegmentInterface c_segment);
|
DeleteSegment(CSegmentInterface c_segment);
|
||||||
|
|
||||||
|
|||||||
@ -370,6 +370,7 @@ func NewSegment(ctx context.Context,
|
|||||||
SegmentID: loadInfo.GetSegmentID(),
|
SegmentID: loadInfo.GetSegmentID(),
|
||||||
SegmentType: segmentType,
|
SegmentType: segmentType,
|
||||||
IsSorted: loadInfo.GetIsSorted(),
|
IsSorted: loadInfo.GetIsSorted(),
|
||||||
|
LoadInfo: loadInfo,
|
||||||
})
|
})
|
||||||
return nil, err
|
return nil, err
|
||||||
}).Await(); err != nil {
|
}).Await(); err != nil {
|
||||||
|
|||||||
@ -23,6 +23,9 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/cgo"
|
"github.com/milvus-io/milvus/internal/util/cgo"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -42,6 +45,7 @@ type CreateCSegmentRequest struct {
|
|||||||
SegmentID int64
|
SegmentID int64
|
||||||
SegmentType SegmentType
|
SegmentType SegmentType
|
||||||
IsSorted bool
|
IsSorted bool
|
||||||
|
LoadInfo *querypb.SegmentLoadInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
|
func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
|
||||||
@ -60,7 +64,17 @@ func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
|
|||||||
// CreateCSegment creates a segment from a CreateCSegmentRequest.
|
// CreateCSegment creates a segment from a CreateCSegmentRequest.
|
||||||
func CreateCSegment(req *CreateCSegmentRequest) (CSegment, error) {
|
func CreateCSegment(req *CreateCSegmentRequest) (CSegment, error) {
|
||||||
var ptr C.CSegmentInterface
|
var ptr C.CSegmentInterface
|
||||||
status := C.NewSegment(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted))
|
var status C.CStatus
|
||||||
|
if req.LoadInfo != nil {
|
||||||
|
loadInfoBlob, err := proto.Marshal(req.LoadInfo)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
status = C.NewSegmentWithLoadInfo(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted), (*C.uint8_t)(unsafe.Pointer(&loadInfoBlob[0])), C.int64_t(len(loadInfoBlob)))
|
||||||
|
} else {
|
||||||
|
status = C.NewSegment(req.Collection.rawPointer(), req.getCSegmentType(), C.int64_t(req.SegmentID), &ptr, C.bool(req.IsSorted))
|
||||||
|
}
|
||||||
if err := ConsumeCStatusIntoError(&status); err != nil {
|
if err := ConsumeCStatusIntoError(&status); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -317,3 +331,160 @@ func (s *cSegmentImpl) DropJSONIndex(ctx context.Context, fieldID int64, nestedP
|
|||||||
func (s *cSegmentImpl) Release() {
|
func (s *cSegmentImpl) Release() {
|
||||||
C.DeleteSegment(s.ptr)
|
C.DeleteSegment(s.ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ConvertToSegcoreSegmentLoadInfo converts querypb.SegmentLoadInfo to segcorepb.SegmentLoadInfo.
|
||||||
|
// This function is needed because segcorepb.SegmentLoadInfo is a simplified version that doesn't
|
||||||
|
// depend on data_coord.proto and excludes fields like start_position, delta_position, and level.
|
||||||
|
func ConvertToSegcoreSegmentLoadInfo(src *querypb.SegmentLoadInfo) *segcorepb.SegmentLoadInfo {
|
||||||
|
if src == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &segcorepb.SegmentLoadInfo{
|
||||||
|
SegmentID: src.GetSegmentID(),
|
||||||
|
PartitionID: src.GetPartitionID(),
|
||||||
|
CollectionID: src.GetCollectionID(),
|
||||||
|
DbID: src.GetDbID(),
|
||||||
|
FlushTime: src.GetFlushTime(),
|
||||||
|
BinlogPaths: convertFieldBinlogs(src.GetBinlogPaths()),
|
||||||
|
NumOfRows: src.GetNumOfRows(),
|
||||||
|
Statslogs: convertFieldBinlogs(src.GetStatslogs()),
|
||||||
|
Deltalogs: convertFieldBinlogs(src.GetDeltalogs()),
|
||||||
|
CompactionFrom: src.GetCompactionFrom(),
|
||||||
|
IndexInfos: convertFieldIndexInfos(src.GetIndexInfos()),
|
||||||
|
SegmentSize: src.GetSegmentSize(),
|
||||||
|
InsertChannel: src.GetInsertChannel(),
|
||||||
|
ReadableVersion: src.GetReadableVersion(),
|
||||||
|
StorageVersion: src.GetStorageVersion(),
|
||||||
|
IsSorted: src.GetIsSorted(),
|
||||||
|
TextStatsLogs: convertTextIndexStats(src.GetTextStatsLogs()),
|
||||||
|
Bm25Logs: convertFieldBinlogs(src.GetBm25Logs()),
|
||||||
|
JsonKeyStatsLogs: convertJSONKeyStats(src.GetJsonKeyStatsLogs()),
|
||||||
|
Priority: src.GetPriority(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// convertFieldBinlogs converts datapb.FieldBinlog to segcorepb.FieldBinlog.
|
||||||
|
func convertFieldBinlogs(src []*datapb.FieldBinlog) []*segcorepb.FieldBinlog {
|
||||||
|
if src == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make([]*segcorepb.FieldBinlog, 0, len(src))
|
||||||
|
for _, fb := range src {
|
||||||
|
if fb == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result = append(result, &segcorepb.FieldBinlog{
|
||||||
|
FieldID: fb.GetFieldID(),
|
||||||
|
Binlogs: convertBinlogs(fb.GetBinlogs()),
|
||||||
|
ChildFields: fb.GetChildFields(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// convertBinlogs converts datapb.Binlog to segcorepb.Binlog.
|
||||||
|
func convertBinlogs(src []*datapb.Binlog) []*segcorepb.Binlog {
|
||||||
|
if src == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make([]*segcorepb.Binlog, 0, len(src))
|
||||||
|
for _, b := range src {
|
||||||
|
if b == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result = append(result, &segcorepb.Binlog{
|
||||||
|
EntriesNum: b.GetEntriesNum(),
|
||||||
|
TimestampFrom: b.GetTimestampFrom(),
|
||||||
|
TimestampTo: b.GetTimestampTo(),
|
||||||
|
LogPath: b.GetLogPath(),
|
||||||
|
LogSize: b.GetLogSize(),
|
||||||
|
LogID: b.GetLogID(),
|
||||||
|
MemorySize: b.GetMemorySize(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// convertFieldIndexInfos converts querypb.FieldIndexInfo to segcorepb.FieldIndexInfo.
|
||||||
|
func convertFieldIndexInfos(src []*querypb.FieldIndexInfo) []*segcorepb.FieldIndexInfo {
|
||||||
|
if src == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make([]*segcorepb.FieldIndexInfo, 0, len(src))
|
||||||
|
for _, fii := range src {
|
||||||
|
if fii == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result = append(result, &segcorepb.FieldIndexInfo{
|
||||||
|
FieldID: fii.GetFieldID(),
|
||||||
|
EnableIndex: fii.GetEnableIndex(),
|
||||||
|
IndexName: fii.GetIndexName(),
|
||||||
|
IndexID: fii.GetIndexID(),
|
||||||
|
BuildID: fii.GetBuildID(),
|
||||||
|
IndexParams: fii.GetIndexParams(),
|
||||||
|
IndexFilePaths: fii.GetIndexFilePaths(),
|
||||||
|
IndexSize: fii.GetIndexSize(),
|
||||||
|
IndexVersion: fii.GetIndexVersion(),
|
||||||
|
NumRows: fii.GetNumRows(),
|
||||||
|
CurrentIndexVersion: fii.GetCurrentIndexVersion(),
|
||||||
|
IndexStoreVersion: fii.GetIndexStoreVersion(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// convertTextIndexStats converts datapb.TextIndexStats to segcorepb.TextIndexStats.
|
||||||
|
func convertTextIndexStats(src map[int64]*datapb.TextIndexStats) map[int64]*segcorepb.TextIndexStats {
|
||||||
|
if src == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make(map[int64]*segcorepb.TextIndexStats, len(src))
|
||||||
|
for k, v := range src {
|
||||||
|
if v == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result[k] = &segcorepb.TextIndexStats{
|
||||||
|
FieldID: v.GetFieldID(),
|
||||||
|
Version: v.GetVersion(),
|
||||||
|
Files: v.GetFiles(),
|
||||||
|
LogSize: v.GetLogSize(),
|
||||||
|
MemorySize: v.GetMemorySize(),
|
||||||
|
BuildID: v.GetBuildID(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// convertJSONKeyStats converts datapb.JsonKeyStats to segcorepb.JsonKeyStats.
|
||||||
|
func convertJSONKeyStats(src map[int64]*datapb.JsonKeyStats) map[int64]*segcorepb.JsonKeyStats {
|
||||||
|
if src == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make(map[int64]*segcorepb.JsonKeyStats, len(src))
|
||||||
|
for k, v := range src {
|
||||||
|
if v == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result[k] = &segcorepb.JsonKeyStats{
|
||||||
|
FieldID: v.GetFieldID(),
|
||||||
|
Version: v.GetVersion(),
|
||||||
|
Files: v.GetFiles(),
|
||||||
|
LogSize: v.GetLogSize(),
|
||||||
|
MemorySize: v.GetMemorySize(),
|
||||||
|
BuildID: v.GetBuildID(),
|
||||||
|
JsonKeyStatsDataFormat: v.GetJsonKeyStatsDataFormat(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|||||||
@ -8,12 +8,15 @@ import (
|
|||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"google.golang.org/protobuf/proto"
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/mocks/util/mock_segcore"
|
"github.com/milvus-io/milvus/internal/mocks/util/mock_segcore"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/initcore"
|
"github.com/milvus-io/milvus/internal/util/initcore"
|
||||||
"github.com/milvus-io/milvus/internal/util/segcore"
|
"github.com/milvus-io/milvus/internal/util/segcore"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||||
@ -139,3 +142,260 @@ func assertEqualCount(
|
|||||||
assert.NotNil(t, retrieveResult2)
|
assert.NotNil(t, retrieveResult2)
|
||||||
retrieveResult2.Release()
|
retrieveResult2.Release()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertToSegcoreSegmentLoadInfo(t *testing.T) {
|
||||||
|
t.Run("nil input", func(t *testing.T) {
|
||||||
|
result := segcore.ConvertToSegcoreSegmentLoadInfo(nil)
|
||||||
|
assert.Nil(t, result)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("empty input", func(t *testing.T) {
|
||||||
|
src := &querypb.SegmentLoadInfo{}
|
||||||
|
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
|
||||||
|
assert.NotNil(t, result)
|
||||||
|
assert.Equal(t, int64(0), result.SegmentID)
|
||||||
|
assert.Equal(t, int64(0), result.PartitionID)
|
||||||
|
assert.Equal(t, int64(0), result.CollectionID)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("full conversion", func(t *testing.T) {
|
||||||
|
// Create source querypb.SegmentLoadInfo with all fields populated
|
||||||
|
src := &querypb.SegmentLoadInfo{
|
||||||
|
SegmentID: 1001,
|
||||||
|
PartitionID: 2001,
|
||||||
|
CollectionID: 3001,
|
||||||
|
DbID: 4001,
|
||||||
|
FlushTime: 5001,
|
||||||
|
BinlogPaths: []*datapb.FieldBinlog{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{
|
||||||
|
EntriesNum: 10,
|
||||||
|
TimestampFrom: 1000,
|
||||||
|
TimestampTo: 2000,
|
||||||
|
LogPath: "/path/to/binlog",
|
||||||
|
LogSize: 1024,
|
||||||
|
LogID: 9001,
|
||||||
|
MemorySize: 2048,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ChildFields: []int64{101, 102},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
NumOfRows: 1000,
|
||||||
|
Statslogs: []*datapb.FieldBinlog{
|
||||||
|
{
|
||||||
|
FieldID: 200,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{
|
||||||
|
EntriesNum: 5,
|
||||||
|
TimestampFrom: 1500,
|
||||||
|
TimestampTo: 2500,
|
||||||
|
LogPath: "/path/to/statslog",
|
||||||
|
LogSize: 512,
|
||||||
|
LogID: 9002,
|
||||||
|
MemorySize: 1024,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Deltalogs: []*datapb.FieldBinlog{
|
||||||
|
{
|
||||||
|
FieldID: 300,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{
|
||||||
|
EntriesNum: 3,
|
||||||
|
TimestampFrom: 2000,
|
||||||
|
TimestampTo: 3000,
|
||||||
|
LogPath: "/path/to/deltalog",
|
||||||
|
LogSize: 256,
|
||||||
|
LogID: 9003,
|
||||||
|
MemorySize: 512,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
CompactionFrom: []int64{8001, 8002},
|
||||||
|
IndexInfos: []*querypb.FieldIndexInfo{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
EnableIndex: true,
|
||||||
|
IndexName: "test_index",
|
||||||
|
IndexID: 7001,
|
||||||
|
BuildID: 7002,
|
||||||
|
IndexParams: []*commonpb.KeyValuePair{{Key: "index_type", Value: "HNSW"}},
|
||||||
|
IndexFilePaths: []string{"/path/to/index"},
|
||||||
|
IndexSize: 4096,
|
||||||
|
IndexVersion: 1,
|
||||||
|
NumRows: 1000,
|
||||||
|
CurrentIndexVersion: 2,
|
||||||
|
IndexStoreVersion: 3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
SegmentSize: 8192,
|
||||||
|
InsertChannel: "insert_channel_1",
|
||||||
|
ReadableVersion: 6001,
|
||||||
|
StorageVersion: 7001,
|
||||||
|
IsSorted: true,
|
||||||
|
TextStatsLogs: map[int64]*datapb.TextIndexStats{
|
||||||
|
400: {
|
||||||
|
FieldID: 400,
|
||||||
|
Version: 1,
|
||||||
|
Files: []string{"/path/to/text/stats1", "/path/to/text/stats2"},
|
||||||
|
LogSize: 2048,
|
||||||
|
MemorySize: 4096,
|
||||||
|
BuildID: 9101,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Bm25Logs: []*datapb.FieldBinlog{
|
||||||
|
{
|
||||||
|
FieldID: 500,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{
|
||||||
|
EntriesNum: 7,
|
||||||
|
TimestampFrom: 3000,
|
||||||
|
TimestampTo: 4000,
|
||||||
|
LogPath: "/path/to/bm25log",
|
||||||
|
LogSize: 768,
|
||||||
|
LogID: 9004,
|
||||||
|
MemorySize: 1536,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
JsonKeyStatsLogs: map[int64]*datapb.JsonKeyStats{
|
||||||
|
600: {
|
||||||
|
FieldID: 600,
|
||||||
|
Version: 2,
|
||||||
|
Files: []string{"/path/to/json/stats"},
|
||||||
|
LogSize: 1024,
|
||||||
|
MemorySize: 2048,
|
||||||
|
BuildID: 9201,
|
||||||
|
JsonKeyStatsDataFormat: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Priority: commonpb.LoadPriority_HIGH,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to segcorepb.SegmentLoadInfo
|
||||||
|
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
|
||||||
|
|
||||||
|
// Validate basic fields
|
||||||
|
assert.NotNil(t, result)
|
||||||
|
assert.Equal(t, src.SegmentID, result.SegmentID)
|
||||||
|
assert.Equal(t, src.PartitionID, result.PartitionID)
|
||||||
|
assert.Equal(t, src.CollectionID, result.CollectionID)
|
||||||
|
assert.Equal(t, src.DbID, result.DbID)
|
||||||
|
assert.Equal(t, src.FlushTime, result.FlushTime)
|
||||||
|
assert.Equal(t, src.NumOfRows, result.NumOfRows)
|
||||||
|
assert.Equal(t, src.SegmentSize, result.SegmentSize)
|
||||||
|
assert.Equal(t, src.InsertChannel, result.InsertChannel)
|
||||||
|
assert.Equal(t, src.ReadableVersion, result.ReadableVersion)
|
||||||
|
assert.Equal(t, src.StorageVersion, result.StorageVersion)
|
||||||
|
assert.Equal(t, src.IsSorted, result.IsSorted)
|
||||||
|
assert.Equal(t, src.Priority, result.Priority)
|
||||||
|
assert.Equal(t, src.CompactionFrom, result.CompactionFrom)
|
||||||
|
|
||||||
|
// Validate BinlogPaths conversion
|
||||||
|
assert.Equal(t, len(src.BinlogPaths), len(result.BinlogPaths))
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].FieldID, result.BinlogPaths[0].FieldID)
|
||||||
|
assert.Equal(t, len(src.BinlogPaths[0].Binlogs), len(result.BinlogPaths[0].Binlogs))
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].EntriesNum, result.BinlogPaths[0].Binlogs[0].EntriesNum)
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].TimestampFrom, result.BinlogPaths[0].Binlogs[0].TimestampFrom)
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].TimestampTo, result.BinlogPaths[0].Binlogs[0].TimestampTo)
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogPath, result.BinlogPaths[0].Binlogs[0].LogPath)
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogSize, result.BinlogPaths[0].Binlogs[0].LogSize)
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].LogID, result.BinlogPaths[0].Binlogs[0].LogID)
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].Binlogs[0].MemorySize, result.BinlogPaths[0].Binlogs[0].MemorySize)
|
||||||
|
assert.Equal(t, src.BinlogPaths[0].ChildFields, result.BinlogPaths[0].ChildFields)
|
||||||
|
|
||||||
|
// Validate Statslogs conversion
|
||||||
|
assert.Equal(t, len(src.Statslogs), len(result.Statslogs))
|
||||||
|
assert.Equal(t, src.Statslogs[0].FieldID, result.Statslogs[0].FieldID)
|
||||||
|
|
||||||
|
// Validate Deltalogs conversion
|
||||||
|
assert.Equal(t, len(src.Deltalogs), len(result.Deltalogs))
|
||||||
|
assert.Equal(t, src.Deltalogs[0].FieldID, result.Deltalogs[0].FieldID)
|
||||||
|
|
||||||
|
// Validate IndexInfos conversion
|
||||||
|
assert.Equal(t, len(src.IndexInfos), len(result.IndexInfos))
|
||||||
|
assert.Equal(t, src.IndexInfos[0].FieldID, result.IndexInfos[0].FieldID)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].EnableIndex, result.IndexInfos[0].EnableIndex)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].IndexName, result.IndexInfos[0].IndexName)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].IndexID, result.IndexInfos[0].IndexID)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].BuildID, result.IndexInfos[0].BuildID)
|
||||||
|
assert.Equal(t, len(src.IndexInfos[0].IndexParams), len(result.IndexInfos[0].IndexParams))
|
||||||
|
assert.Equal(t, src.IndexInfos[0].IndexFilePaths, result.IndexInfos[0].IndexFilePaths)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].IndexSize, result.IndexInfos[0].IndexSize)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].IndexVersion, result.IndexInfos[0].IndexVersion)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].NumRows, result.IndexInfos[0].NumRows)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].CurrentIndexVersion, result.IndexInfos[0].CurrentIndexVersion)
|
||||||
|
assert.Equal(t, src.IndexInfos[0].IndexStoreVersion, result.IndexInfos[0].IndexStoreVersion)
|
||||||
|
|
||||||
|
// Validate TextStatsLogs conversion
|
||||||
|
assert.Equal(t, len(src.TextStatsLogs), len(result.TextStatsLogs))
|
||||||
|
textStats := result.TextStatsLogs[400]
|
||||||
|
assert.NotNil(t, textStats)
|
||||||
|
assert.Equal(t, src.TextStatsLogs[400].FieldID, textStats.FieldID)
|
||||||
|
assert.Equal(t, src.TextStatsLogs[400].Version, textStats.Version)
|
||||||
|
assert.Equal(t, src.TextStatsLogs[400].Files, textStats.Files)
|
||||||
|
assert.Equal(t, src.TextStatsLogs[400].LogSize, textStats.LogSize)
|
||||||
|
assert.Equal(t, src.TextStatsLogs[400].MemorySize, textStats.MemorySize)
|
||||||
|
assert.Equal(t, src.TextStatsLogs[400].BuildID, textStats.BuildID)
|
||||||
|
|
||||||
|
// Validate Bm25Logs conversion
|
||||||
|
assert.Equal(t, len(src.Bm25Logs), len(result.Bm25Logs))
|
||||||
|
assert.Equal(t, src.Bm25Logs[0].FieldID, result.Bm25Logs[0].FieldID)
|
||||||
|
|
||||||
|
// Validate JsonKeyStatsLogs conversion
|
||||||
|
assert.Equal(t, len(src.JsonKeyStatsLogs), len(result.JsonKeyStatsLogs))
|
||||||
|
jsonStats := result.JsonKeyStatsLogs[600]
|
||||||
|
assert.NotNil(t, jsonStats)
|
||||||
|
assert.Equal(t, src.JsonKeyStatsLogs[600].FieldID, jsonStats.FieldID)
|
||||||
|
assert.Equal(t, src.JsonKeyStatsLogs[600].Version, jsonStats.Version)
|
||||||
|
assert.Equal(t, src.JsonKeyStatsLogs[600].Files, jsonStats.Files)
|
||||||
|
assert.Equal(t, src.JsonKeyStatsLogs[600].LogSize, jsonStats.LogSize)
|
||||||
|
assert.Equal(t, src.JsonKeyStatsLogs[600].MemorySize, jsonStats.MemorySize)
|
||||||
|
assert.Equal(t, src.JsonKeyStatsLogs[600].BuildID, jsonStats.BuildID)
|
||||||
|
assert.Equal(t, src.JsonKeyStatsLogs[600].JsonKeyStatsDataFormat, jsonStats.JsonKeyStatsDataFormat)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("nil elements in arrays and maps", func(t *testing.T) {
|
||||||
|
src := &querypb.SegmentLoadInfo{
|
||||||
|
SegmentID: 1001,
|
||||||
|
BinlogPaths: []*datapb.FieldBinlog{
|
||||||
|
nil, // nil element should be skipped
|
||||||
|
{FieldID: 100},
|
||||||
|
},
|
||||||
|
Statslogs: []*datapb.FieldBinlog{
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
IndexInfos: []*querypb.FieldIndexInfo{
|
||||||
|
nil,
|
||||||
|
{FieldID: 200},
|
||||||
|
},
|
||||||
|
TextStatsLogs: map[int64]*datapb.TextIndexStats{
|
||||||
|
100: nil, // nil value should be skipped
|
||||||
|
200: {FieldID: 200},
|
||||||
|
},
|
||||||
|
JsonKeyStatsLogs: map[int64]*datapb.JsonKeyStats{
|
||||||
|
300: nil,
|
||||||
|
400: {FieldID: 400},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result := segcore.ConvertToSegcoreSegmentLoadInfo(src)
|
||||||
|
|
||||||
|
assert.NotNil(t, result)
|
||||||
|
assert.Equal(t, 1, len(result.BinlogPaths))
|
||||||
|
assert.Equal(t, int64(100), result.BinlogPaths[0].FieldID)
|
||||||
|
assert.Equal(t, 0, len(result.Statslogs))
|
||||||
|
assert.Equal(t, 1, len(result.IndexInfos))
|
||||||
|
assert.Equal(t, int64(200), result.IndexInfos[0].FieldID)
|
||||||
|
assert.Equal(t, 1, len(result.TextStatsLogs))
|
||||||
|
assert.NotNil(t, result.TextStatsLogs[200])
|
||||||
|
assert.Equal(t, 1, len(result.JsonKeyStatsLogs))
|
||||||
|
assert.NotNil(t, result.JsonKeyStatsLogs[400])
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@ -5,6 +5,41 @@ option go_package = "github.com/milvus-io/milvus/pkg/v2/proto/segcorepb";
|
|||||||
import "schema.proto";
|
import "schema.proto";
|
||||||
import "common.proto";
|
import "common.proto";
|
||||||
|
|
||||||
|
message Binlog {
|
||||||
|
int64 entries_num = 1;
|
||||||
|
uint64 timestamp_from = 2;
|
||||||
|
uint64 timestamp_to = 3;
|
||||||
|
string log_path = 4;
|
||||||
|
int64 log_size = 5;
|
||||||
|
int64 logID = 6;
|
||||||
|
int64 memory_size = 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
message FieldBinlog {
|
||||||
|
int64 fieldID = 1;
|
||||||
|
repeated Binlog binlogs = 2;
|
||||||
|
repeated int64 child_fields = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message TextIndexStats {
|
||||||
|
int64 fieldID = 1;
|
||||||
|
int64 version = 2;
|
||||||
|
repeated string files = 3;
|
||||||
|
int64 log_size = 4;
|
||||||
|
int64 memory_size = 5;
|
||||||
|
int64 buildID = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
message JsonKeyStats {
|
||||||
|
int64 fieldID = 1;
|
||||||
|
int64 version = 2;
|
||||||
|
repeated string files = 3;
|
||||||
|
int64 log_size = 4;
|
||||||
|
int64 memory_size = 5;
|
||||||
|
int64 buildID = 6;
|
||||||
|
int64 json_key_stats_data_format = 7;
|
||||||
|
}
|
||||||
|
|
||||||
message RetrieveResults {
|
message RetrieveResults {
|
||||||
schema.IDs ids = 1;
|
schema.IDs ids = 1;
|
||||||
repeated int64 offset = 2;
|
repeated int64 offset = 2;
|
||||||
@ -46,3 +81,41 @@ message CollectionIndexMeta {
|
|||||||
int64 maxIndexRowCount = 1;
|
int64 maxIndexRowCount = 1;
|
||||||
repeated FieldIndexMeta index_metas = 2;
|
repeated FieldIndexMeta index_metas = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message FieldIndexInfo {
|
||||||
|
int64 fieldID = 1;
|
||||||
|
bool enable_index = 2 [deprecated = true];
|
||||||
|
string index_name = 3;
|
||||||
|
int64 indexID = 4;
|
||||||
|
int64 buildID = 5;
|
||||||
|
repeated common.KeyValuePair index_params = 6;
|
||||||
|
repeated string index_file_paths = 7;
|
||||||
|
int64 index_size = 8;
|
||||||
|
int64 index_version = 9;
|
||||||
|
int64 num_rows = 10;
|
||||||
|
int32 current_index_version = 11;
|
||||||
|
int64 index_store_version = 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
message SegmentLoadInfo {
|
||||||
|
int64 segmentID = 1;
|
||||||
|
int64 partitionID = 2;
|
||||||
|
int64 collectionID = 3;
|
||||||
|
int64 dbID = 4;
|
||||||
|
int64 flush_time = 5;
|
||||||
|
repeated FieldBinlog binlog_paths = 6;
|
||||||
|
int64 num_of_rows = 7;
|
||||||
|
repeated FieldBinlog statslogs = 8;
|
||||||
|
repeated FieldBinlog deltalogs = 9;
|
||||||
|
repeated int64 compactionFrom = 10; // segmentIDs compacted from
|
||||||
|
repeated FieldIndexInfo index_infos = 11;
|
||||||
|
int64 segment_size = 12 [deprecated = true];
|
||||||
|
string insert_channel = 13;
|
||||||
|
int64 readableVersion = 14;
|
||||||
|
int64 storageVersion = 15;
|
||||||
|
bool is_sorted = 16;
|
||||||
|
map<int64, TextIndexStats> textStatsLogs = 17;
|
||||||
|
repeated FieldBinlog bm25logs = 18;
|
||||||
|
map<int64, JsonKeyStats> jsonKeyStatsLogs = 19;
|
||||||
|
common.LoadPriority priority = 20;
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user