mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-30 23:45:28 +08:00
Add flush monitor and unit test (#5622)
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
ac19711d74
commit
e57e2f77de
144
internal/dataservice/flush_monitor.go
Normal file
144
internal/dataservice/flush_monitor.go
Normal file
@ -0,0 +1,144 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
package dataservice
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||
)
|
||||
|
||||
type flushMonitor struct {
|
||||
meta *meta
|
||||
segmentPolicy SegmentFlushPolicy
|
||||
channelPolicy ChannelFlushPolicy
|
||||
}
|
||||
|
||||
// SegmentFlushPolicy checks segment size and returns whether segment needs to be flushed
|
||||
type SegmentFlushPolicy func(*datapb.SegmentInfo) bool
|
||||
|
||||
// ChannelFlushPolicy checks segments inside single Vchannel count and returns segment ids needs to be flushed
|
||||
type ChannelFlushPolicy func(string, []*datapb.SegmentInfo, *internalpb.MsgPosition) []UniqueID
|
||||
|
||||
// emptyFlushMonitor returns empty flush montior
|
||||
func emptyFlushMonitor(meta *meta) flushMonitor {
|
||||
return flushMonitor{
|
||||
meta: meta,
|
||||
}
|
||||
}
|
||||
|
||||
// defaultFlushMonitor generates auto flusher with default policies
|
||||
func defaultFlushMonitor(meta *meta) flushMonitor {
|
||||
return flushMonitor{
|
||||
meta: meta,
|
||||
// segmentPolicy: estSegmentSizePolicy(1024, 1024*1024*1536), // row 1024 byte, limit 1.5GiB
|
||||
channelPolicy: channelSizeEpochPolicy(1024, uint64(time.Hour)),
|
||||
}
|
||||
}
|
||||
|
||||
// CheckSegments check segemnt sizes
|
||||
func (f flushMonitor) CheckSegments(segments []*datapb.SegmentInfo) []UniqueID {
|
||||
if f.segmentPolicy == nil {
|
||||
return []UniqueID{}
|
||||
}
|
||||
result := make([]UniqueID, 0, len(segments))
|
||||
for _, segment := range segments {
|
||||
if f.segmentPolicy(segment) {
|
||||
result = append(result, segment.ID)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// CheckChannels check channels changed
|
||||
func (f flushMonitor) CheckChannels(channels []string, latest *internalpb.MsgPosition) []UniqueID {
|
||||
segHits := make(map[UniqueID]struct{})
|
||||
for _, channel := range channels {
|
||||
segments := f.meta.GetSegmentsByChannel(channel)
|
||||
|
||||
growingSegments := make([]*datapb.SegmentInfo, 0, len(segments))
|
||||
for _, segment := range segments {
|
||||
if segment.State != commonpb.SegmentState_Growing {
|
||||
continue
|
||||
}
|
||||
growingSegments = append(growingSegments, segment)
|
||||
if f.segmentPolicy != nil && f.segmentPolicy(segment) {
|
||||
segHits[segment.ID] = struct{}{}
|
||||
}
|
||||
}
|
||||
if f.channelPolicy != nil {
|
||||
hits := f.channelPolicy(channel, growingSegments, latest)
|
||||
for _, hit := range hits {
|
||||
segHits[hit] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := make([]UniqueID, 0, len(segHits))
|
||||
for segID := range segHits {
|
||||
result = append(result, segID)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func estSegmentSizePolicy(rowSize, limit int64) SegmentFlushPolicy {
|
||||
return func(seg *datapb.SegmentInfo) bool {
|
||||
if seg == nil {
|
||||
return false
|
||||
}
|
||||
if seg.NumOfRows*rowSize > limit {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func channelSizeEpochPolicy(segmentMax int, epochDuration uint64) ChannelFlushPolicy {
|
||||
return func(channel string, segments []*datapb.SegmentInfo, latest *internalpb.MsgPosition) []UniqueID {
|
||||
if len(segments) < segmentMax && latest == nil {
|
||||
return []UniqueID{}
|
||||
}
|
||||
sortSegmentsByDmlPos(segments)
|
||||
result := []UniqueID{}
|
||||
overflow := len(segments) - segmentMax
|
||||
for idx, segment := range segments {
|
||||
if idx < overflow {
|
||||
result = append(result, segment.ID)
|
||||
continue
|
||||
}
|
||||
if latest != nil {
|
||||
if segment.DmlPosition == nil || latest.Timestamp-segment.DmlPosition.Timestamp > uint64(time.Hour) {
|
||||
result = append(result, segment.ID)
|
||||
continue
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
func sortSegmentsByDmlPos(segments []*datapb.SegmentInfo) {
|
||||
sort.Slice(segments, func(i, j int) bool {
|
||||
if segments[i].DmlPosition == nil {
|
||||
return true
|
||||
}
|
||||
if segments[j].DmlPosition == nil {
|
||||
return false
|
||||
}
|
||||
return segments[i].DmlPosition.Timestamp < segments[j].DmlPosition.Timestamp
|
||||
})
|
||||
}
|
||||
126
internal/dataservice/flush_monitor_test.go
Normal file
126
internal/dataservice/flush_monitor_test.go
Normal file
@ -0,0 +1,126 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
package dataservice
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFlushMonitor(t *testing.T) {
|
||||
const collID = UniqueID(0)
|
||||
const partID0 = UniqueID(100)
|
||||
const partID1 = UniqueID(101)
|
||||
const channelName = "c1"
|
||||
|
||||
mockAllocator := newMockAllocator()
|
||||
meta, err := newMemoryMeta(mockAllocator)
|
||||
assert.Nil(t, err)
|
||||
|
||||
testSchema := newTestSchema()
|
||||
collInfo := &datapb.CollectionInfo{
|
||||
ID: collID,
|
||||
Schema: testSchema,
|
||||
Partitions: []UniqueID{partID0, partID1},
|
||||
}
|
||||
|
||||
meta.AddCollection(collInfo)
|
||||
|
||||
// create seg0 for partition0, seg0/seg1 for partition1
|
||||
segID0_0, err := mockAllocator.allocID()
|
||||
assert.Nil(t, err)
|
||||
segInfo0_0, err := BuildSegment(collID, partID0, segID0_0, channelName)
|
||||
assert.Nil(t, err)
|
||||
segID1_0, err := mockAllocator.allocID()
|
||||
assert.Nil(t, err)
|
||||
segInfo1_0, err := BuildSegment(collID, partID1, segID1_0, channelName)
|
||||
assert.Nil(t, err)
|
||||
segID1_1, err := mockAllocator.allocID()
|
||||
assert.Nil(t, err)
|
||||
segInfo1_1, err := BuildSegment(collID, partID1, segID1_1, channelName)
|
||||
assert.Nil(t, err)
|
||||
|
||||
// check AddSegment
|
||||
err = meta.AddSegment(segInfo0_0)
|
||||
assert.Nil(t, err)
|
||||
err = meta.AddSegment(segInfo0_0)
|
||||
assert.NotNil(t, err)
|
||||
err = meta.AddSegment(segInfo1_0)
|
||||
assert.Nil(t, err)
|
||||
err = meta.AddSegment(segInfo1_1)
|
||||
assert.Nil(t, err)
|
||||
|
||||
t.Run("Test empty flush monitor", func(t *testing.T) {
|
||||
fm := emptyFlushMonitor(meta)
|
||||
ids := fm.CheckSegments([]*datapb.SegmentInfo{})
|
||||
assert.Equal(t, 0, len(ids))
|
||||
|
||||
ids = fm.CheckChannels([]string{channelName}, nil)
|
||||
assert.Equal(t, 0, len(ids))
|
||||
})
|
||||
|
||||
t.Run("Test custom segment policy", func(t *testing.T) {
|
||||
fm := emptyFlushMonitor(meta)
|
||||
fm.segmentPolicy = estSegmentSizePolicy(1024*1024, 1024*1024*2) // row size 1Mib Limit 2 MB
|
||||
segID3Rows, err := mockAllocator.allocID()
|
||||
assert.Nil(t, err)
|
||||
segInfo3Rows, err := BuildSegment(collID, partID1, segID3Rows, channelName)
|
||||
segInfo3Rows.NumOfRows = 3
|
||||
assert.Nil(t, err)
|
||||
|
||||
ids := fm.CheckSegments([]*datapb.SegmentInfo{segInfo3Rows})
|
||||
if assert.Equal(t, 1, len(ids)) {
|
||||
assert.Equal(t, segID3Rows, ids[0])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Test custom channel policy", func(t *testing.T) {
|
||||
const channelName2 = `ch2`
|
||||
fm := emptyFlushMonitor(meta)
|
||||
fm.channelPolicy = channelSizeEpochPolicy(100, uint64(time.Hour))
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
segID, err := mockAllocator.allocID()
|
||||
assert.Nil(t, err)
|
||||
seg, err := BuildSegment(collID, partID0, segID, channelName2)
|
||||
assert.Nil(t, err)
|
||||
seg.DmlPosition = &internalpb.MsgPosition{
|
||||
Timestamp: uint64(i + 1),
|
||||
}
|
||||
meta.AddSegment(seg)
|
||||
}
|
||||
|
||||
ids := fm.CheckChannels([]string{channelName2}, nil)
|
||||
assert.Equal(t, 0, len(ids))
|
||||
|
||||
exSegID, err := mockAllocator.allocID()
|
||||
assert.Nil(t, err)
|
||||
seg, err := BuildSegment(collID, partID0, exSegID, channelName2)
|
||||
assert.Nil(t, err)
|
||||
seg.DmlPosition = &internalpb.MsgPosition{
|
||||
Timestamp: uint64(0), // the oldest
|
||||
}
|
||||
meta.AddSegment(seg)
|
||||
|
||||
ids = fm.CheckChannels([]string{channelName2}, nil)
|
||||
if assert.Equal(t, 1, len(ids)) {
|
||||
assert.Equal(t, exSegID, ids[0])
|
||||
}
|
||||
|
||||
ids = fm.CheckChannels([]string{channelName2}, &internalpb.MsgPosition{Timestamp: uint64(time.Hour + 5)})
|
||||
assert.Equal(t, 5, len(ids))
|
||||
})
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user