mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-02 00:45:30 +08:00
* Remove redundant session startup Signed-off-by: sunby <bingyi.sun@zilliz.com> * Register datanode after start success Signed-off-by: sunby <bingyi.sun@zilliz.com> * fix meta snap shot Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * fix datanode message stream channel Signed-off-by: yangxuan <xuan.yang@zilliz.com> * Fix bugs when drop empty collection Signed-off-by: sunby <bingyi.sun@zilliz.com> * Fix bug of getting pchan statistics from task scheduler Signed-off-by: dragondriver <jiquan.long@zilliz.com> * Fix i/dist/dataservice test code Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> * Fix epoch lifetime not applied Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> * fix datanode flowgraph dd node Signed-off-by: yangxuan <xuan.yang@zilliz.com> * Fix handle datanode timetick bug Signed-off-by: sunby <bingyi.sun@zilliz.com> * Remove repack function of dml stream Signed-off-by: dragondriver <jiquan.long@zilliz.com> * fix proxynode Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * Apply extended seal policy Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> * add check for time tick Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * fix check Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * Fix the repack function of dml stream Signed-off-by: dragondriver <jiquan.long@zilliz.com> * Fix the bug when send statistics of pchan Signed-off-by: dragondriver <jiquan.long@zilliz.com> * Fix the repack function when craete dml stream Signed-off-by: dragondriver <jiquan.long@zilliz.com> * fix bugs Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * fix describe collection Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * Fix bug when send timestamp statistics Signed-off-by: dragondriver <jiquan.long@zilliz.com> * fix data node Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * Add length check before flush request Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> * add log for data node Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * Fix SaveBinlog bugs Signed-off-by: sunby <bingyi.sun@zilliz.com> * Add more log in datanode Signed-off-by: yangxuan <xuan.yang@zilliz.com> * Put SegmentState.Flushing as the last one in enum to fit the client Signed-off-by: sunby <bingyi.sun@zilliz.com> * Fix params in GetInsertBinlogPaths Signed-off-by: sunby <bingyi.sun@zilliz.com> * Rename policy Signed-off-by: sunby <bingyi.sun@zilliz.com> * Remove unused ddl functions and fields Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> * Remove pchan when drop collection Signed-off-by: dragondriver <jiquan.long@zilliz.com> * Add balanced assignment policy Signed-off-by: sunby <bingyi.sun@zilliz.com> * fix master ut Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * Add lock in session manager Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> * add log for debug Signed-off-by: yefu.chen <yefu.chen@zilliz.com> * Fix some logic bug and typo Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> * Fix recover bugs Signed-off-by: sunby <bingyi.sun@zilliz.com> * Get collection scheme of a specific timestamp Signed-off-by: yangxuan <xuan.yang@zilliz.com> * Change CheckPoint to SegmentInfo in VchannelInfo Signed-off-by: sunby <bingyi.sun@zilliz.com> * Recover Unflushed segment numOfRows Signed-off-by: yangxuan <xuan.yang@zilliz.com> * Fix dataservice unit tests Signed-off-by: sunby <bingyi.sun@zilliz.com> Co-authored-by: yefu.chen <yefu.chen@zilliz.com> Co-authored-by: yangxuan <xuan.yang@zilliz.com> Co-authored-by: dragondriver <jiquan.long@zilliz.com> Co-authored-by: Congqi Xia <congqi.xia@zilliz.com>
151 lines
4.6 KiB
Go
151 lines
4.6 KiB
Go
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
|
|
|
package dataservice
|
|
|
|
import (
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
|
)
|
|
|
|
// flushMonitor check segments / channels meet the provided flush policy
|
|
type flushMonitor struct {
|
|
meta *meta
|
|
segmentPolicy SegmentFlushPolicy
|
|
channelPolicy ChannelFlushPolicy
|
|
}
|
|
|
|
// SegmentFlushPolicy checks segment size and returns whether segment needs to be flushed
|
|
type SegmentFlushPolicy func(*datapb.SegmentInfo) bool
|
|
|
|
// ChannelFlushPolicy checks segments inside single Vchannel count and returns segment ids needs to be flushed
|
|
type ChannelFlushPolicy func(string, []*datapb.SegmentInfo, *internalpb.MsgPosition) []UniqueID
|
|
|
|
// emptyFlushMonitor returns empty flush montior
|
|
func emptyFlushMonitor(meta *meta) flushMonitor {
|
|
return flushMonitor{
|
|
meta: meta,
|
|
}
|
|
}
|
|
|
|
// defaultFlushMonitor generates auto flusher with default policies
|
|
func defaultFlushMonitor(meta *meta) flushMonitor {
|
|
return flushMonitor{
|
|
meta: meta,
|
|
// segmentPolicy: estSegmentSizePolicy(1024, 1024*1024*1536), // row 1024 byte, limit 1.5GiB
|
|
channelPolicy: channelSizeEpochPolicy(1024, uint64(time.Hour)),
|
|
}
|
|
}
|
|
|
|
// CheckSegments check segments meet flush policy, returns segment id needs to flush
|
|
func (f flushMonitor) CheckSegments(segments []*datapb.SegmentInfo) []UniqueID {
|
|
if f.segmentPolicy == nil {
|
|
return []UniqueID{}
|
|
}
|
|
result := make([]UniqueID, 0, len(segments))
|
|
for _, segment := range segments {
|
|
if f.segmentPolicy(segment) {
|
|
result = append(result, segment.ID)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// CheckChannels check channels changed, apply `ChannelPolicy`
|
|
func (f flushMonitor) CheckChannels(channels []string, latest *internalpb.MsgPosition) []UniqueID {
|
|
segHits := make(map[UniqueID]struct{})
|
|
for _, channel := range channels {
|
|
segments := f.meta.GetSegmentsByChannel(channel)
|
|
|
|
growingSegments := make([]*datapb.SegmentInfo, 0, len(segments))
|
|
for _, segment := range segments {
|
|
if segment.State != commonpb.SegmentState_Growing {
|
|
continue
|
|
}
|
|
growingSegments = append(growingSegments, segment)
|
|
if f.segmentPolicy != nil && f.segmentPolicy(segment) {
|
|
segHits[segment.ID] = struct{}{}
|
|
}
|
|
}
|
|
if f.channelPolicy != nil {
|
|
hits := f.channelPolicy(channel, growingSegments, latest)
|
|
for _, hit := range hits {
|
|
segHits[hit] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
result := make([]UniqueID, 0, len(segHits))
|
|
for segID := range segHits {
|
|
result = append(result, segID)
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// deprecated
|
|
func estSegmentSizePolicy(rowSize, limit int64) SegmentFlushPolicy {
|
|
return func(seg *datapb.SegmentInfo) bool {
|
|
if seg == nil {
|
|
return false
|
|
}
|
|
if seg.NumOfRows*rowSize > limit {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// channelSizeEpochPolicy policy check channel sizes and segment life time
|
|
// segmentMax is the max number of segment allowed in the channel
|
|
// epochDuration is the max live time segment has
|
|
func channelSizeEpochPolicy(segmentMax int, epochDuration uint64) ChannelFlushPolicy {
|
|
return func(channel string, segments []*datapb.SegmentInfo, latest *internalpb.MsgPosition) []UniqueID {
|
|
if len(segments) < segmentMax && latest == nil {
|
|
return []UniqueID{}
|
|
}
|
|
sortSegmentsByDmlPos(segments)
|
|
result := []UniqueID{}
|
|
overflow := len(segments) - segmentMax
|
|
for idx, segment := range segments {
|
|
if idx < overflow {
|
|
result = append(result, segment.ID)
|
|
continue
|
|
}
|
|
if latest != nil {
|
|
if segment.DmlPosition == nil || latest.Timestamp-segment.DmlPosition.Timestamp > epochDuration {
|
|
result = append(result, segment.ID)
|
|
continue
|
|
}
|
|
}
|
|
break
|
|
}
|
|
return result
|
|
}
|
|
}
|
|
|
|
// sortSegmentsByDmlPos sorts input segments in ascending order by `DmlPosition.Timestamp`, nil value is less than 0
|
|
func sortSegmentsByDmlPos(segments []*datapb.SegmentInfo) {
|
|
sort.Slice(segments, func(i, j int) bool {
|
|
if segments[i].DmlPosition == nil {
|
|
return true
|
|
}
|
|
if segments[j].DmlPosition == nil {
|
|
return false
|
|
}
|
|
return segments[i].DmlPosition.Timestamp < segments[j].DmlPosition.Timestamp
|
|
})
|
|
}
|