Add flush monitor and unit test (#5622)

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2021-06-07 11:50:51 +08:00 committed by zhenshan.cao
parent ac19711d74
commit e57e2f77de
2 changed files with 270 additions and 0 deletions

View File

@ -0,0 +1,144 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
package dataservice
import (
"sort"
"time"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
)
type flushMonitor struct {
meta *meta
segmentPolicy SegmentFlushPolicy
channelPolicy ChannelFlushPolicy
}
// SegmentFlushPolicy checks segment size and returns whether segment needs to be flushed
type SegmentFlushPolicy func(*datapb.SegmentInfo) bool
// ChannelFlushPolicy checks segments inside single Vchannel count and returns segment ids needs to be flushed
type ChannelFlushPolicy func(string, []*datapb.SegmentInfo, *internalpb.MsgPosition) []UniqueID
// emptyFlushMonitor returns empty flush montior
func emptyFlushMonitor(meta *meta) flushMonitor {
return flushMonitor{
meta: meta,
}
}
// defaultFlushMonitor generates auto flusher with default policies
func defaultFlushMonitor(meta *meta) flushMonitor {
return flushMonitor{
meta: meta,
// segmentPolicy: estSegmentSizePolicy(1024, 1024*1024*1536), // row 1024 byte, limit 1.5GiB
channelPolicy: channelSizeEpochPolicy(1024, uint64(time.Hour)),
}
}
// CheckSegments check segemnt sizes
func (f flushMonitor) CheckSegments(segments []*datapb.SegmentInfo) []UniqueID {
if f.segmentPolicy == nil {
return []UniqueID{}
}
result := make([]UniqueID, 0, len(segments))
for _, segment := range segments {
if f.segmentPolicy(segment) {
result = append(result, segment.ID)
}
}
return result
}
// CheckChannels check channels changed
func (f flushMonitor) CheckChannels(channels []string, latest *internalpb.MsgPosition) []UniqueID {
segHits := make(map[UniqueID]struct{})
for _, channel := range channels {
segments := f.meta.GetSegmentsByChannel(channel)
growingSegments := make([]*datapb.SegmentInfo, 0, len(segments))
for _, segment := range segments {
if segment.State != commonpb.SegmentState_Growing {
continue
}
growingSegments = append(growingSegments, segment)
if f.segmentPolicy != nil && f.segmentPolicy(segment) {
segHits[segment.ID] = struct{}{}
}
}
if f.channelPolicy != nil {
hits := f.channelPolicy(channel, growingSegments, latest)
for _, hit := range hits {
segHits[hit] = struct{}{}
}
}
}
result := make([]UniqueID, 0, len(segHits))
for segID := range segHits {
result = append(result, segID)
}
return result
}
func estSegmentSizePolicy(rowSize, limit int64) SegmentFlushPolicy {
return func(seg *datapb.SegmentInfo) bool {
if seg == nil {
return false
}
if seg.NumOfRows*rowSize > limit {
return true
}
return false
}
}
func channelSizeEpochPolicy(segmentMax int, epochDuration uint64) ChannelFlushPolicy {
return func(channel string, segments []*datapb.SegmentInfo, latest *internalpb.MsgPosition) []UniqueID {
if len(segments) < segmentMax && latest == nil {
return []UniqueID{}
}
sortSegmentsByDmlPos(segments)
result := []UniqueID{}
overflow := len(segments) - segmentMax
for idx, segment := range segments {
if idx < overflow {
result = append(result, segment.ID)
continue
}
if latest != nil {
if segment.DmlPosition == nil || latest.Timestamp-segment.DmlPosition.Timestamp > uint64(time.Hour) {
result = append(result, segment.ID)
continue
}
}
break
}
return result
}
}
func sortSegmentsByDmlPos(segments []*datapb.SegmentInfo) {
sort.Slice(segments, func(i, j int) bool {
if segments[i].DmlPosition == nil {
return true
}
if segments[j].DmlPosition == nil {
return false
}
return segments[i].DmlPosition.Timestamp < segments[j].DmlPosition.Timestamp
})
}

View File

@ -0,0 +1,126 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
package dataservice
import (
"testing"
"time"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/stretchr/testify/assert"
)
func TestFlushMonitor(t *testing.T) {
const collID = UniqueID(0)
const partID0 = UniqueID(100)
const partID1 = UniqueID(101)
const channelName = "c1"
mockAllocator := newMockAllocator()
meta, err := newMemoryMeta(mockAllocator)
assert.Nil(t, err)
testSchema := newTestSchema()
collInfo := &datapb.CollectionInfo{
ID: collID,
Schema: testSchema,
Partitions: []UniqueID{partID0, partID1},
}
meta.AddCollection(collInfo)
// create seg0 for partition0, seg0/seg1 for partition1
segID0_0, err := mockAllocator.allocID()
assert.Nil(t, err)
segInfo0_0, err := BuildSegment(collID, partID0, segID0_0, channelName)
assert.Nil(t, err)
segID1_0, err := mockAllocator.allocID()
assert.Nil(t, err)
segInfo1_0, err := BuildSegment(collID, partID1, segID1_0, channelName)
assert.Nil(t, err)
segID1_1, err := mockAllocator.allocID()
assert.Nil(t, err)
segInfo1_1, err := BuildSegment(collID, partID1, segID1_1, channelName)
assert.Nil(t, err)
// check AddSegment
err = meta.AddSegment(segInfo0_0)
assert.Nil(t, err)
err = meta.AddSegment(segInfo0_0)
assert.NotNil(t, err)
err = meta.AddSegment(segInfo1_0)
assert.Nil(t, err)
err = meta.AddSegment(segInfo1_1)
assert.Nil(t, err)
t.Run("Test empty flush monitor", func(t *testing.T) {
fm := emptyFlushMonitor(meta)
ids := fm.CheckSegments([]*datapb.SegmentInfo{})
assert.Equal(t, 0, len(ids))
ids = fm.CheckChannels([]string{channelName}, nil)
assert.Equal(t, 0, len(ids))
})
t.Run("Test custom segment policy", func(t *testing.T) {
fm := emptyFlushMonitor(meta)
fm.segmentPolicy = estSegmentSizePolicy(1024*1024, 1024*1024*2) // row size 1Mib Limit 2 MB
segID3Rows, err := mockAllocator.allocID()
assert.Nil(t, err)
segInfo3Rows, err := BuildSegment(collID, partID1, segID3Rows, channelName)
segInfo3Rows.NumOfRows = 3
assert.Nil(t, err)
ids := fm.CheckSegments([]*datapb.SegmentInfo{segInfo3Rows})
if assert.Equal(t, 1, len(ids)) {
assert.Equal(t, segID3Rows, ids[0])
}
})
t.Run("Test custom channel policy", func(t *testing.T) {
const channelName2 = `ch2`
fm := emptyFlushMonitor(meta)
fm.channelPolicy = channelSizeEpochPolicy(100, uint64(time.Hour))
for i := 0; i < 100; i++ {
segID, err := mockAllocator.allocID()
assert.Nil(t, err)
seg, err := BuildSegment(collID, partID0, segID, channelName2)
assert.Nil(t, err)
seg.DmlPosition = &internalpb.MsgPosition{
Timestamp: uint64(i + 1),
}
meta.AddSegment(seg)
}
ids := fm.CheckChannels([]string{channelName2}, nil)
assert.Equal(t, 0, len(ids))
exSegID, err := mockAllocator.allocID()
assert.Nil(t, err)
seg, err := BuildSegment(collID, partID0, exSegID, channelName2)
assert.Nil(t, err)
seg.DmlPosition = &internalpb.MsgPosition{
Timestamp: uint64(0), // the oldest
}
meta.AddSegment(seg)
ids = fm.CheckChannels([]string{channelName2}, nil)
if assert.Equal(t, 1, len(ids)) {
assert.Equal(t, exSegID, ids[0])
}
ids = fm.CheckChannels([]string{channelName2}, &internalpb.MsgPosition{Timestamp: uint64(time.Hour + 5)})
assert.Equal(t, 5, len(ids))
})
}