Add flush monitor and unit test (#5622)

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
2025-12-30 23:45:28 +08:00 · 2021-06-07 11:50:51 +08:00 · 2021-06-07 11:50:51 +08:00 · e57e2f77de
commit e57e2f77de
parent ac19711d74
2 changed files with 270 additions and 0 deletions
--- a/internal/dataservice/flush_monitor.go
+++ b/internal/dataservice/flush_monitor.go
@ -0,0 +1,144 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+package dataservice
+
+import (
+	"sort"
+	"time"
+
+	"github.com/milvus-io/milvus/internal/proto/commonpb"
+	"github.com/milvus-io/milvus/internal/proto/datapb"
+	"github.com/milvus-io/milvus/internal/proto/internalpb"
+)
+
+type flushMonitor struct {
+	meta          *meta
+	segmentPolicy SegmentFlushPolicy
+	channelPolicy ChannelFlushPolicy
+}
+
+// SegmentFlushPolicy checks segment size and returns whether segment needs to be flushed
+type SegmentFlushPolicy func(*datapb.SegmentInfo) bool
+
+// ChannelFlushPolicy checks segments inside single Vchannel count and returns segment ids needs to be flushed
+type ChannelFlushPolicy func(string, []*datapb.SegmentInfo, *internalpb.MsgPosition) []UniqueID
+
+// emptyFlushMonitor returns empty flush montior
+func emptyFlushMonitor(meta *meta) flushMonitor {
+	return flushMonitor{
+		meta: meta,
+	}
+}
+
+// defaultFlushMonitor generates auto flusher with default policies
+func defaultFlushMonitor(meta *meta) flushMonitor {
+	return flushMonitor{
+		meta: meta,
+		// segmentPolicy: estSegmentSizePolicy(1024, 1024*1024*1536), // row 1024 byte, limit 1.5GiB
+		channelPolicy: channelSizeEpochPolicy(1024, uint64(time.Hour)),
+	}
+}
+
+// CheckSegments check segemnt sizes
+func (f flushMonitor) CheckSegments(segments []*datapb.SegmentInfo) []UniqueID {
+	if f.segmentPolicy == nil {
+		return []UniqueID{}
+	}
+	result := make([]UniqueID, 0, len(segments))
+	for _, segment := range segments {
+		if f.segmentPolicy(segment) {
+			result = append(result, segment.ID)
+		}
+	}
+	return result
+}
+
+// CheckChannels check channels changed
+func (f flushMonitor) CheckChannels(channels []string, latest *internalpb.MsgPosition) []UniqueID {
+	segHits := make(map[UniqueID]struct{})
+	for _, channel := range channels {
+		segments := f.meta.GetSegmentsByChannel(channel)
+
+		growingSegments := make([]*datapb.SegmentInfo, 0, len(segments))
+		for _, segment := range segments {
+			if segment.State != commonpb.SegmentState_Growing {
+				continue
+			}
+			growingSegments = append(growingSegments, segment)
+			if f.segmentPolicy != nil && f.segmentPolicy(segment) {
+				segHits[segment.ID] = struct{}{}
+			}
+		}
+		if f.channelPolicy != nil {
+			hits := f.channelPolicy(channel, growingSegments, latest)
+			for _, hit := range hits {
+				segHits[hit] = struct{}{}
+			}
+		}
+	}
+
+	result := make([]UniqueID, 0, len(segHits))
+	for segID := range segHits {
+		result = append(result, segID)
+	}
+
+	return result
+}
+
+func estSegmentSizePolicy(rowSize, limit int64) SegmentFlushPolicy {
+	return func(seg *datapb.SegmentInfo) bool {
+		if seg == nil {
+			return false
+		}
+		if seg.NumOfRows*rowSize > limit {
+			return true
+		}
+		return false
+	}
+}
+
+func channelSizeEpochPolicy(segmentMax int, epochDuration uint64) ChannelFlushPolicy {
+	return func(channel string, segments []*datapb.SegmentInfo, latest *internalpb.MsgPosition) []UniqueID {
+		if len(segments) < segmentMax && latest == nil {
+			return []UniqueID{}
+		}
+		sortSegmentsByDmlPos(segments)
+		result := []UniqueID{}
+		overflow := len(segments) - segmentMax
+		for idx, segment := range segments {
+			if idx < overflow {
+				result = append(result, segment.ID)
+				continue
+			}
+			if latest != nil {
+				if segment.DmlPosition == nil || latest.Timestamp-segment.DmlPosition.Timestamp > uint64(time.Hour) {
+					result = append(result, segment.ID)
+					continue
+				}
+			}
+			break
+		}
+		return result
+	}
+}
+
+func sortSegmentsByDmlPos(segments []*datapb.SegmentInfo) {
+	sort.Slice(segments, func(i, j int) bool {
+		if segments[i].DmlPosition == nil {
+			return true
+		}
+		if segments[j].DmlPosition == nil {
+			return false
+		}
+		return segments[i].DmlPosition.Timestamp < segments[j].DmlPosition.Timestamp
+	})
+}
--- a/internal/dataservice/flush_monitor_test.go
+++ b/internal/dataservice/flush_monitor_test.go
@ -0,0 +1,126 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+package dataservice
+
+import (
+	"testing"
+	"time"
+
+	"github.com/milvus-io/milvus/internal/proto/datapb"
+	"github.com/milvus-io/milvus/internal/proto/internalpb"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestFlushMonitor(t *testing.T) {
+	const collID = UniqueID(0)
+	const partID0 = UniqueID(100)
+	const partID1 = UniqueID(101)
+	const channelName = "c1"
+
+	mockAllocator := newMockAllocator()
+	meta, err := newMemoryMeta(mockAllocator)
+	assert.Nil(t, err)
+
+	testSchema := newTestSchema()
+	collInfo := &datapb.CollectionInfo{
+		ID:         collID,
+		Schema:     testSchema,
+		Partitions: []UniqueID{partID0, partID1},
+	}
+
+	meta.AddCollection(collInfo)
+
+	// create seg0 for partition0, seg0/seg1 for partition1
+	segID0_0, err := mockAllocator.allocID()
+	assert.Nil(t, err)
+	segInfo0_0, err := BuildSegment(collID, partID0, segID0_0, channelName)
+	assert.Nil(t, err)
+	segID1_0, err := mockAllocator.allocID()
+	assert.Nil(t, err)
+	segInfo1_0, err := BuildSegment(collID, partID1, segID1_0, channelName)
+	assert.Nil(t, err)
+	segID1_1, err := mockAllocator.allocID()
+	assert.Nil(t, err)
+	segInfo1_1, err := BuildSegment(collID, partID1, segID1_1, channelName)
+	assert.Nil(t, err)
+
+	// check AddSegment
+	err = meta.AddSegment(segInfo0_0)
+	assert.Nil(t, err)
+	err = meta.AddSegment(segInfo0_0)
+	assert.NotNil(t, err)
+	err = meta.AddSegment(segInfo1_0)
+	assert.Nil(t, err)
+	err = meta.AddSegment(segInfo1_1)
+	assert.Nil(t, err)
+
+	t.Run("Test empty flush monitor", func(t *testing.T) {
+		fm := emptyFlushMonitor(meta)
+		ids := fm.CheckSegments([]*datapb.SegmentInfo{})
+		assert.Equal(t, 0, len(ids))
+
+		ids = fm.CheckChannels([]string{channelName}, nil)
+		assert.Equal(t, 0, len(ids))
+	})
+
+	t.Run("Test custom segment policy", func(t *testing.T) {
+		fm := emptyFlushMonitor(meta)
+		fm.segmentPolicy = estSegmentSizePolicy(1024*1024, 1024*1024*2) // row size 1Mib Limit 2 MB
+		segID3Rows, err := mockAllocator.allocID()
+		assert.Nil(t, err)
+		segInfo3Rows, err := BuildSegment(collID, partID1, segID3Rows, channelName)
+		segInfo3Rows.NumOfRows = 3
+		assert.Nil(t, err)
+
+		ids := fm.CheckSegments([]*datapb.SegmentInfo{segInfo3Rows})
+		if assert.Equal(t, 1, len(ids)) {
+			assert.Equal(t, segID3Rows, ids[0])
+		}
+	})
+
+	t.Run("Test custom channel policy", func(t *testing.T) {
+		const channelName2 = `ch2`
+		fm := emptyFlushMonitor(meta)
+		fm.channelPolicy = channelSizeEpochPolicy(100, uint64(time.Hour))
+
+		for i := 0; i < 100; i++ {
+			segID, err := mockAllocator.allocID()
+			assert.Nil(t, err)
+			seg, err := BuildSegment(collID, partID0, segID, channelName2)
+			assert.Nil(t, err)
+			seg.DmlPosition = &internalpb.MsgPosition{
+				Timestamp: uint64(i + 1),
+			}
+			meta.AddSegment(seg)
+		}
+
+		ids := fm.CheckChannels([]string{channelName2}, nil)
+		assert.Equal(t, 0, len(ids))
+
+		exSegID, err := mockAllocator.allocID()
+		assert.Nil(t, err)
+		seg, err := BuildSegment(collID, partID0, exSegID, channelName2)
+		assert.Nil(t, err)
+		seg.DmlPosition = &internalpb.MsgPosition{
+			Timestamp: uint64(0), // the oldest
+		}
+		meta.AddSegment(seg)
+
+		ids = fm.CheckChannels([]string{channelName2}, nil)
+		if assert.Equal(t, 1, len(ids)) {
+			assert.Equal(t, exSegID, ids[0])
+		}
+
+		ids = fm.CheckChannels([]string{channelName2}, &internalpb.MsgPosition{Timestamp: uint64(time.Hour + 5)})
+		assert.Equal(t, 5, len(ids))
+	})
+}