diff --git a/internal/dataservice/flush_monitor.go b/internal/dataservice/flush_monitor.go new file mode 100644 index 0000000000..f63c9b4214 --- /dev/null +++ b/internal/dataservice/flush_monitor.go @@ -0,0 +1,144 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +package dataservice + +import ( + "sort" + "time" + + "github.com/milvus-io/milvus/internal/proto/commonpb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" +) + +type flushMonitor struct { + meta *meta + segmentPolicy SegmentFlushPolicy + channelPolicy ChannelFlushPolicy +} + +// SegmentFlushPolicy checks segment size and returns whether segment needs to be flushed +type SegmentFlushPolicy func(*datapb.SegmentInfo) bool + +// ChannelFlushPolicy checks segments inside single Vchannel count and returns segment ids needs to be flushed +type ChannelFlushPolicy func(string, []*datapb.SegmentInfo, *internalpb.MsgPosition) []UniqueID + +// emptyFlushMonitor returns empty flush montior +func emptyFlushMonitor(meta *meta) flushMonitor { + return flushMonitor{ + meta: meta, + } +} + +// defaultFlushMonitor generates auto flusher with default policies +func defaultFlushMonitor(meta *meta) flushMonitor { + return flushMonitor{ + meta: meta, + // segmentPolicy: estSegmentSizePolicy(1024, 1024*1024*1536), // row 1024 byte, limit 1.5GiB + channelPolicy: channelSizeEpochPolicy(1024, uint64(time.Hour)), + } +} + +// CheckSegments check segemnt sizes +func (f flushMonitor) CheckSegments(segments []*datapb.SegmentInfo) []UniqueID { + if f.segmentPolicy == nil { + return []UniqueID{} + } + result := make([]UniqueID, 0, len(segments)) + for _, segment := range segments { + if f.segmentPolicy(segment) { + result = append(result, segment.ID) + } + } + return result +} + +// CheckChannels check channels changed +func (f flushMonitor) CheckChannels(channels []string, latest *internalpb.MsgPosition) []UniqueID { + segHits := make(map[UniqueID]struct{}) + for _, channel := range channels { + segments := f.meta.GetSegmentsByChannel(channel) + + growingSegments := make([]*datapb.SegmentInfo, 0, len(segments)) + for _, segment := range segments { + if segment.State != commonpb.SegmentState_Growing { + continue + } + growingSegments = append(growingSegments, segment) + if f.segmentPolicy != nil && f.segmentPolicy(segment) { + segHits[segment.ID] = struct{}{} + } + } + if f.channelPolicy != nil { + hits := f.channelPolicy(channel, growingSegments, latest) + for _, hit := range hits { + segHits[hit] = struct{}{} + } + } + } + + result := make([]UniqueID, 0, len(segHits)) + for segID := range segHits { + result = append(result, segID) + } + + return result +} + +func estSegmentSizePolicy(rowSize, limit int64) SegmentFlushPolicy { + return func(seg *datapb.SegmentInfo) bool { + if seg == nil { + return false + } + if seg.NumOfRows*rowSize > limit { + return true + } + return false + } +} + +func channelSizeEpochPolicy(segmentMax int, epochDuration uint64) ChannelFlushPolicy { + return func(channel string, segments []*datapb.SegmentInfo, latest *internalpb.MsgPosition) []UniqueID { + if len(segments) < segmentMax && latest == nil { + return []UniqueID{} + } + sortSegmentsByDmlPos(segments) + result := []UniqueID{} + overflow := len(segments) - segmentMax + for idx, segment := range segments { + if idx < overflow { + result = append(result, segment.ID) + continue + } + if latest != nil { + if segment.DmlPosition == nil || latest.Timestamp-segment.DmlPosition.Timestamp > uint64(time.Hour) { + result = append(result, segment.ID) + continue + } + } + break + } + return result + } +} + +func sortSegmentsByDmlPos(segments []*datapb.SegmentInfo) { + sort.Slice(segments, func(i, j int) bool { + if segments[i].DmlPosition == nil { + return true + } + if segments[j].DmlPosition == nil { + return false + } + return segments[i].DmlPosition.Timestamp < segments[j].DmlPosition.Timestamp + }) +} diff --git a/internal/dataservice/flush_monitor_test.go b/internal/dataservice/flush_monitor_test.go new file mode 100644 index 0000000000..247d482259 --- /dev/null +++ b/internal/dataservice/flush_monitor_test.go @@ -0,0 +1,126 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +package dataservice + +import ( + "testing" + "time" + + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/stretchr/testify/assert" +) + +func TestFlushMonitor(t *testing.T) { + const collID = UniqueID(0) + const partID0 = UniqueID(100) + const partID1 = UniqueID(101) + const channelName = "c1" + + mockAllocator := newMockAllocator() + meta, err := newMemoryMeta(mockAllocator) + assert.Nil(t, err) + + testSchema := newTestSchema() + collInfo := &datapb.CollectionInfo{ + ID: collID, + Schema: testSchema, + Partitions: []UniqueID{partID0, partID1}, + } + + meta.AddCollection(collInfo) + + // create seg0 for partition0, seg0/seg1 for partition1 + segID0_0, err := mockAllocator.allocID() + assert.Nil(t, err) + segInfo0_0, err := BuildSegment(collID, partID0, segID0_0, channelName) + assert.Nil(t, err) + segID1_0, err := mockAllocator.allocID() + assert.Nil(t, err) + segInfo1_0, err := BuildSegment(collID, partID1, segID1_0, channelName) + assert.Nil(t, err) + segID1_1, err := mockAllocator.allocID() + assert.Nil(t, err) + segInfo1_1, err := BuildSegment(collID, partID1, segID1_1, channelName) + assert.Nil(t, err) + + // check AddSegment + err = meta.AddSegment(segInfo0_0) + assert.Nil(t, err) + err = meta.AddSegment(segInfo0_0) + assert.NotNil(t, err) + err = meta.AddSegment(segInfo1_0) + assert.Nil(t, err) + err = meta.AddSegment(segInfo1_1) + assert.Nil(t, err) + + t.Run("Test empty flush monitor", func(t *testing.T) { + fm := emptyFlushMonitor(meta) + ids := fm.CheckSegments([]*datapb.SegmentInfo{}) + assert.Equal(t, 0, len(ids)) + + ids = fm.CheckChannels([]string{channelName}, nil) + assert.Equal(t, 0, len(ids)) + }) + + t.Run("Test custom segment policy", func(t *testing.T) { + fm := emptyFlushMonitor(meta) + fm.segmentPolicy = estSegmentSizePolicy(1024*1024, 1024*1024*2) // row size 1Mib Limit 2 MB + segID3Rows, err := mockAllocator.allocID() + assert.Nil(t, err) + segInfo3Rows, err := BuildSegment(collID, partID1, segID3Rows, channelName) + segInfo3Rows.NumOfRows = 3 + assert.Nil(t, err) + + ids := fm.CheckSegments([]*datapb.SegmentInfo{segInfo3Rows}) + if assert.Equal(t, 1, len(ids)) { + assert.Equal(t, segID3Rows, ids[0]) + } + }) + + t.Run("Test custom channel policy", func(t *testing.T) { + const channelName2 = `ch2` + fm := emptyFlushMonitor(meta) + fm.channelPolicy = channelSizeEpochPolicy(100, uint64(time.Hour)) + + for i := 0; i < 100; i++ { + segID, err := mockAllocator.allocID() + assert.Nil(t, err) + seg, err := BuildSegment(collID, partID0, segID, channelName2) + assert.Nil(t, err) + seg.DmlPosition = &internalpb.MsgPosition{ + Timestamp: uint64(i + 1), + } + meta.AddSegment(seg) + } + + ids := fm.CheckChannels([]string{channelName2}, nil) + assert.Equal(t, 0, len(ids)) + + exSegID, err := mockAllocator.allocID() + assert.Nil(t, err) + seg, err := BuildSegment(collID, partID0, exSegID, channelName2) + assert.Nil(t, err) + seg.DmlPosition = &internalpb.MsgPosition{ + Timestamp: uint64(0), // the oldest + } + meta.AddSegment(seg) + + ids = fm.CheckChannels([]string{channelName2}, nil) + if assert.Equal(t, 1, len(ids)) { + assert.Equal(t, exSegID, ids[0]) + } + + ids = fm.CheckChannels([]string{channelName2}, &internalpb.MsgPosition{Timestamp: uint64(time.Hour + 5)}) + assert.Equal(t, 5, len(ids)) + }) +}