milvus/internal/datanode/flow_graph_dd_node.go
sunby 189ac881f3 Fix bugs (#5676)
* Remove redundant session startup

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Register datanode after start success

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* fix meta snap shot

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* fix datanode message stream channel

Signed-off-by: yangxuan <xuan.yang@zilliz.com>

* Fix bugs when drop empty collection

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Fix bug of getting pchan statistics from task scheduler

Signed-off-by: dragondriver <jiquan.long@zilliz.com>

* Fix i/dist/dataservice test code

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

* Fix epoch lifetime not applied

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

* fix datanode flowgraph dd node

Signed-off-by: yangxuan <xuan.yang@zilliz.com>

* Fix handle datanode timetick bug

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Remove repack function of dml stream

Signed-off-by: dragondriver <jiquan.long@zilliz.com>

* fix proxynode

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* Apply extended seal policy

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

* add check for time tick

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* fix check

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* Fix the repack function of dml stream

Signed-off-by: dragondriver <jiquan.long@zilliz.com>

* Fix the bug when send statistics of pchan

Signed-off-by: dragondriver <jiquan.long@zilliz.com>

* Fix the repack function when craete dml stream

Signed-off-by: dragondriver <jiquan.long@zilliz.com>

* fix bugs

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* fix describe collection

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* Fix bug when send timestamp statistics

Signed-off-by: dragondriver <jiquan.long@zilliz.com>

* fix data node

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* Add length check before flush request

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

* add log for data node

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* Fix SaveBinlog bugs

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Add more log in datanode

Signed-off-by: yangxuan <xuan.yang@zilliz.com>

* Put SegmentState.Flushing as the last one in enum to fit the client

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Fix params in GetInsertBinlogPaths

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Rename policy

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Remove unused ddl functions and fields

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

* Remove pchan when drop collection

Signed-off-by: dragondriver <jiquan.long@zilliz.com>

* Add balanced assignment policy

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* fix master ut

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* Add lock in session manager

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

* add log for debug

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>

* Fix some logic bug and typo

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

* Fix recover bugs

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Get collection scheme of a specific timestamp

Signed-off-by: yangxuan <xuan.yang@zilliz.com>

* Change CheckPoint to SegmentInfo in VchannelInfo

Signed-off-by: sunby <bingyi.sun@zilliz.com>

* Recover Unflushed segment numOfRows

Signed-off-by: yangxuan <xuan.yang@zilliz.com>

* Fix dataservice unit tests

Signed-off-by: sunby <bingyi.sun@zilliz.com>

Co-authored-by: yefu.chen <yefu.chen@zilliz.com>
Co-authored-by: yangxuan <xuan.yang@zilliz.com>
Co-authored-by: dragondriver <jiquan.long@zilliz.com>
Co-authored-by: Congqi Xia <congqi.xia@zilliz.com>
2021-06-15 16:06:11 +08:00

153 lines
4.0 KiB
Go

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
package datanode
import (
"sync"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/msgstream"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/util/flowgraph"
)
type ddNode struct {
BaseNode
clearSignal chan<- UniqueID
collectionID UniqueID
mu sync.RWMutex
seg2SegInfo map[UniqueID]*datapb.SegmentInfo // Segment ID to UnFlushed Segment
vchanInfo *datapb.VchannelInfo
}
func (ddn *ddNode) Name() string {
return "ddNode"
}
func (ddn *ddNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
// log.Debug("DDNode Operating")
if len(in) != 1 {
log.Error("Invalid operate message input in ddNode", zap.Int("input length", len(in)))
// TODO: add error handling
}
if len(in) == 0 {
return []flowgraph.Msg{}
}
msMsg, ok := in[0].(*MsgStreamMsg)
if !ok {
log.Error("type assertion failed for MsgStreamMsg")
// TODO: add error handling
}
if msMsg == nil {
return []Msg{}
}
var iMsg = insertMsg{
insertMessages: make([]*msgstream.InsertMsg, 0),
timeRange: TimeRange{
timestampMin: msMsg.TimestampMin(),
timestampMax: msMsg.TimestampMax(),
},
startPositions: make([]*internalpb.MsgPosition, 0),
endPositions: make([]*internalpb.MsgPosition, 0),
}
for _, msg := range msMsg.TsMessages() {
switch msg.Type() {
case commonpb.MsgType_DropCollection:
if msg.(*msgstream.DropCollectionMsg).GetCollectionID() == ddn.collectionID {
ddn.clearSignal <- ddn.collectionID
log.Info("Destroying current flowgraph")
}
case commonpb.MsgType_Insert:
log.Debug("DDNode with insert messages")
if msg.EndTs() < FilterThreshold {
log.Info("Filtering Insert Messages",
zap.Uint64("Message endts", msg.EndTs()),
zap.Uint64("FilterThreshold", FilterThreshold),
)
resMsg := ddn.filterFlushedSegmentInsertMessages(msg.(*msgstream.InsertMsg))
if resMsg != nil {
iMsg.insertMessages = append(iMsg.insertMessages, resMsg)
}
}
iMsg.insertMessages = append(iMsg.insertMessages, msg.(*msgstream.InsertMsg))
}
}
iMsg.startPositions = append(iMsg.startPositions, msMsg.StartPositions()...)
iMsg.endPositions = append(iMsg.endPositions, msMsg.EndPositions()...)
var res Msg = &iMsg
return []Msg{res}
}
func (ddn *ddNode) filterFlushedSegmentInsertMessages(msg *msgstream.InsertMsg) *msgstream.InsertMsg {
if ddn.isFlushed(msg.GetSegmentID()) {
return nil
}
ddn.mu.Lock()
if si, ok := ddn.seg2SegInfo[msg.GetSegmentID()]; ok {
if msg.EndTs() > si.GetDmlPosition().GetTimestamp() {
delete(ddn.seg2SegInfo, msg.GetSegmentID())
return nil
}
}
ddn.mu.Unlock()
return msg
}
func (ddn *ddNode) isFlushed(segmentID UniqueID) bool {
ddn.mu.Lock()
defer ddn.mu.Unlock()
for _, id := range ddn.vchanInfo.GetFlushedSegments() {
if id == segmentID {
return true
}
}
return false
}
func newDDNode(clearSignal chan<- UniqueID, collID UniqueID, vchanInfo *datapb.VchannelInfo) *ddNode {
baseNode := BaseNode{}
baseNode.SetMaxParallelism(Params.FlowGraphMaxQueueLength)
si := make(map[UniqueID]*datapb.SegmentInfo)
for _, us := range vchanInfo.GetUnflushedSegments() {
si[us.GetID()] = us
}
return &ddNode{
BaseNode: baseNode,
clearSignal: clearSignal,
collectionID: collID,
seg2SegInfo: si,
vchanInfo: vchanInfo,
}
}