Zhen Ye a86b6f2a54
enhance: extend the stats manage at streaming shard manager for L0 (#43371)
issue: #42416

- Rename the InsertMetric into ModifiedMetric.
- Add L0 control configuration.
- Add some L0 current state collect.

Signed-off-by: chyezh <chyezh@outlook.com>
2025-08-18 20:41:46 +08:00

183 lines
5.9 KiB
Go

package utils
import (
"fmt"
"math"
"time"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/streamingpb"
)
// PartitionUniqueKey is the unique key of a partition.
type PartitionUniqueKey struct {
CollectionID int64
PartitionID int64 // -1 means all partitions, see common.AllPartitionsID.
}
// IsAllPartitions returns true if the partition is all partitions.
func (k *PartitionUniqueKey) IsAllPartitions() bool {
return k.PartitionID == common.AllPartitionsID
}
// SegmentBelongs is the info of segment belongs to a channel.
type SegmentBelongs struct {
PChannel string
VChannel string
CollectionID int64
PartitionID int64
SegmentID int64
}
// PartitionUniqueKey returns the partition unique key of the segment belongs.
func (s *SegmentBelongs) PartitionUniqueKey() PartitionUniqueKey {
return PartitionUniqueKey{
CollectionID: s.CollectionID,
PartitionID: s.PartitionID,
}
}
// SegmentStats is the usage stats of a segment.
type SegmentStats struct {
Modified ModifiedMetrics
MaxRows uint64 // MaxRows of current segment should be assigned, it's a fixed value when segment is transfer int growing.
MaxBinarySize uint64 // MaxBinarySize of current segment should be assigned, it's a fixed value when segment is transfer int growing.
CreateTime time.Time // created timestamp of this segment, it's a fixed value when segment is created, not a tso.
LastModifiedTime time.Time // LastWriteTime is the last write time of this segment, it's not a tso, just a local time.
BinLogCounter uint64 // BinLogCounter is the counter of binlog (equal to the binlog file count of primary key), it's an async stat not real time.
BinLogFileCounter uint64 // BinLogFileCounter is the counter of binlog files, it's an async stat not real time.
ReachLimit bool // ReachLimit is a flag to indicate the segment reach the limit once.
Level datapb.SegmentLevel
}
// NewSegmentStatFromProto creates a new segment assignment stat from proto.
func NewSegmentStatFromProto(statProto *streamingpb.SegmentAssignmentStat) *SegmentStats {
if statProto == nil {
return nil
}
lv := datapb.SegmentLevel_L1
if statProto.Level != datapb.SegmentLevel_Legacy {
lv = statProto.Level
}
if lv != datapb.SegmentLevel_L0 && lv != datapb.SegmentLevel_L1 {
panic(fmt.Sprintf("invalid level: %s", lv))
}
maxRows := uint64(math.MaxUint64)
if statProto.MaxRows != 0 {
maxRows = statProto.MaxRows
}
return &SegmentStats{
Modified: ModifiedMetrics{
Rows: statProto.ModifiedRows,
BinarySize: statProto.ModifiedBinarySize,
},
MaxRows: maxRows,
MaxBinarySize: statProto.MaxBinarySize,
CreateTime: time.Unix(statProto.CreateTimestamp, 0),
BinLogCounter: statProto.BinlogCounter,
LastModifiedTime: time.Unix(statProto.LastModifiedTimestamp, 0),
Level: lv,
}
}
// NewProtoFromSegmentStat creates a new proto from segment assignment stat.
func NewProtoFromSegmentStat(stat *SegmentStats) *streamingpb.SegmentAssignmentStat {
if stat == nil {
return nil
}
return &streamingpb.SegmentAssignmentStat{
MaxRows: stat.MaxRows,
MaxBinarySize: stat.MaxBinarySize,
ModifiedRows: stat.Modified.Rows,
ModifiedBinarySize: stat.Modified.BinarySize,
CreateTimestamp: stat.CreateTime.Unix(),
BinlogCounter: stat.BinLogCounter,
LastModifiedTimestamp: stat.LastModifiedTime.Unix(),
Level: stat.Level,
}
}
// AllocRows alloc space of rows on current segment.
// Return true if the segment is assigned.
func (s *SegmentStats) AllocRows(m ModifiedMetrics) bool {
if m.BinarySize > s.BinaryCanBeAssign() || m.Rows > s.RowsCanBeAssign() {
if s.Modified.BinarySize > 0 {
// if the binary size is not empty, it means the segment cannot hold more data, mark it as reach limit.
s.ReachLimit = true
}
return false
}
s.Modified.Collect(m)
s.LastModifiedTime = time.Now()
return true
}
// BinaryCanBeAssign returns the capacity of binary size can be inserted.
func (s *SegmentStats) BinaryCanBeAssign() uint64 {
return s.MaxBinarySize - s.Modified.BinarySize
}
// RowsCanBeAssign returns the capacity of rows can be inserted.
func (s *SegmentStats) RowsCanBeAssign() uint64 {
return s.MaxRows - s.Modified.Rows
}
// ShouldBeSealed returns if the segment should be sealed.
func (s *SegmentStats) ShouldBeSealed() bool {
return s.ReachLimit
}
// IsEmpty returns if the segment is empty.
func (s *SegmentStats) IsEmpty() bool {
return s.Modified.Rows == 0
}
// UpdateOnSync updates the stats of segment on sync.
func (s *SegmentStats) UpdateOnSync(f SyncOperationMetrics) {
s.BinLogCounter += f.BinLogCounterIncr
s.BinLogFileCounter += f.BinLogFileCounterIncr
}
// Copy copies the segment stats.
func (s *SegmentStats) Copy() *SegmentStats {
s2 := *s
return &s2
}
// ModifiedMetrics is the metrics of insert/delete operation.
type ModifiedMetrics struct {
Rows uint64
BinarySize uint64
}
// IsZero return true if ModifiedMetrics is zero.
func (m *ModifiedMetrics) IsZero() bool {
return m.Rows == 0 && m.BinarySize == 0
}
// Collect collects other metrics.
func (m *ModifiedMetrics) Collect(other ModifiedMetrics) {
m.Rows += other.Rows
m.BinarySize += other.BinarySize
}
// Subtract subtract by other metrics.
func (m *ModifiedMetrics) Subtract(other ModifiedMetrics) {
if m.Rows < other.Rows {
panic(fmt.Sprintf("rows cannot be less than zero, current: %d, target: %d", m.Rows, other.Rows))
}
if m.BinarySize < other.BinarySize {
panic(fmt.Sprintf("binary size cannot be less than zero, current: %d, target: %d", m.Rows, other.Rows))
}
m.Rows -= other.Rows
m.BinarySize -= other.BinarySize
}
// SyncOperationMetrics is the metrics of sync operation.
type SyncOperationMetrics struct {
BinLogCounterIncr uint64 // the counter increment of bin log
BinLogFileCounterIncr uint64 // the counter increment of bin log file
}