mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: Correct the update logic of timerecorder (#34339)
Correct the update logic of timerecorder in the flowgraph to avoid false failure: "some node(s) haven't received input". issue: https://github.com/milvus-io/milvus/issues/34337 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
7611128e57
commit
0b404bff22
@ -76,7 +76,7 @@ type ddNode struct {
|
|||||||
|
|
||||||
// Name returns node name, implementing flowgraph.Node
|
// Name returns node name, implementing flowgraph.Node
|
||||||
func (ddn *ddNode) Name() string {
|
func (ddn *ddNode) Name() string {
|
||||||
return fmt.Sprintf("ddNode-%d-%s", ddn.collectionID, ddn.vChannelName)
|
return fmt.Sprintf("ddNode-%s", ddn.vChannelName)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ddn *ddNode) IsValidInMsg(in []Msg) bool {
|
func (ddn *ddNode) IsValidInMsg(in []Msg) bool {
|
||||||
|
|||||||
@ -83,7 +83,7 @@ func TestFlowGraph_DDNode_newDDNode(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.NotNil(t, ddNode)
|
require.NotNil(t, ddNode)
|
||||||
|
|
||||||
assert.Equal(t, fmt.Sprintf("ddNode-%d-%s", ddNode.collectionID, ddNode.vChannelName), ddNode.Name())
|
assert.Equal(t, fmt.Sprintf("ddNode-%s", ddNode.vChannelName), ddNode.Name())
|
||||||
|
|
||||||
assert.Equal(t, len(test.inSealedSegs), len(ddNode.sealedSegInfo))
|
assert.Equal(t, len(test.inSealedSegs), len(ddNode.sealedSegInfo))
|
||||||
assert.Equal(t, len(test.inGrowingSegs), len(ddNode.growingSegInfo))
|
assert.Equal(t, len(test.inGrowingSegs), len(ddNode.growingSegInfo))
|
||||||
|
|||||||
@ -62,7 +62,7 @@ func newDmInputNode(initCtx context.Context, dispatcherClient msgdispatcher.Clie
|
|||||||
log.Info("datanode consume successfully when register to msgDispatcher")
|
log.Info("datanode consume successfully when register to msgDispatcher")
|
||||||
}
|
}
|
||||||
|
|
||||||
name := fmt.Sprintf("dmInputNode-data-%d-%s", dmNodeConfig.collectionID, dmNodeConfig.vChannelName)
|
name := fmt.Sprintf("dmInputNode-data-%s", dmNodeConfig.vChannelName)
|
||||||
node := flowgraph.NewInputNode(
|
node := flowgraph.NewInputNode(
|
||||||
input,
|
input,
|
||||||
name,
|
name,
|
||||||
|
|||||||
@ -2,6 +2,7 @@ package pipeline
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
@ -27,6 +28,11 @@ type writeNode struct {
|
|||||||
metacache metacache.MetaCache
|
metacache metacache.MetaCache
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Name returns node name, implementing flowgraph.Node
|
||||||
|
func (wNode *writeNode) Name() string {
|
||||||
|
return fmt.Sprintf("writeNode-%s", wNode.channelName)
|
||||||
|
}
|
||||||
|
|
||||||
func (wNode *writeNode) Operate(in []Msg) []Msg {
|
func (wNode *writeNode) Operate(in []Msg) []Msg {
|
||||||
fgMsg := in[0].(*FlowGraphMsg)
|
fgMsg := in[0].(*FlowGraphMsg)
|
||||||
|
|
||||||
|
|||||||
@ -75,27 +75,23 @@ func (nodeCtxManager *nodeCtxManager) Start() {
|
|||||||
// in dmInputNode, message from mq to channel, alloc goroutines
|
// in dmInputNode, message from mq to channel, alloc goroutines
|
||||||
// limit the goroutines in other node to prevent huge goroutines numbers
|
// limit the goroutines in other node to prevent huge goroutines numbers
|
||||||
nodeCtxManager.closeWg.Add(1)
|
nodeCtxManager.closeWg.Add(1)
|
||||||
go nodeCtxManager.workNodeStart()
|
curNode := nodeCtxManager.inputNodeCtx
|
||||||
}
|
|
||||||
|
|
||||||
func (nodeCtxManager *nodeCtxManager) workNodeStart() {
|
|
||||||
defer nodeCtxManager.closeWg.Done()
|
|
||||||
inputNode := nodeCtxManager.inputNodeCtx
|
|
||||||
curNode := inputNode
|
|
||||||
// tt checker start
|
// tt checker start
|
||||||
var checker *timerecord.Checker
|
|
||||||
if enableTtChecker {
|
if enableTtChecker {
|
||||||
manager := timerecord.GetCheckerManger("fgNode", nodeCtxTtInterval, func(list []string) {
|
manager := timerecord.GetCheckerManger("fgNode", nodeCtxTtInterval, func(list []string) {
|
||||||
log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", nodeCtxTtInterval))
|
log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", nodeCtxTtInterval))
|
||||||
})
|
})
|
||||||
for curNode != nil {
|
for curNode != nil {
|
||||||
name := fmt.Sprintf("nodeCtxTtChecker-%s", curNode.node.Name())
|
name := fmt.Sprintf("nodeCtxTtChecker-%s", curNode.node.Name())
|
||||||
checker = timerecord.NewChecker(name, manager)
|
curNode.checker = timerecord.NewChecker(name, manager)
|
||||||
curNode = curNode.downstream
|
curNode = curNode.downstream
|
||||||
defer checker.Close()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
go nodeCtxManager.workNodeStart()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nodeCtxManager *nodeCtxManager) workNodeStart() {
|
||||||
|
defer nodeCtxManager.closeWg.Done()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-nodeCtxManager.closeCh:
|
case <-nodeCtxManager.closeCh:
|
||||||
@ -105,7 +101,8 @@ func (nodeCtxManager *nodeCtxManager) workNodeStart() {
|
|||||||
// 2. invoke node.Operate
|
// 2. invoke node.Operate
|
||||||
// 3. deliver the Operate result to downstream nodes
|
// 3. deliver the Operate result to downstream nodes
|
||||||
default:
|
default:
|
||||||
curNode = inputNode
|
inputNode := nodeCtxManager.inputNodeCtx
|
||||||
|
curNode := inputNode
|
||||||
for curNode != nil {
|
for curNode != nil {
|
||||||
// inputs from inputsMessages for Operate
|
// inputs from inputsMessages for Operate
|
||||||
var input, output []Msg
|
var input, output []Msg
|
||||||
@ -137,8 +134,8 @@ func (nodeCtxManager *nodeCtxManager) workNodeStart() {
|
|||||||
if curNode.downstream != nil {
|
if curNode.downstream != nil {
|
||||||
curNode.downstream.inputChannel <- output
|
curNode.downstream.inputChannel <- output
|
||||||
}
|
}
|
||||||
if enableTtChecker {
|
if enableTtChecker && curNode.checker != nil {
|
||||||
checker.Check()
|
curNode.checker.Check()
|
||||||
}
|
}
|
||||||
curNode = curNode.downstream
|
curNode = curNode.downstream
|
||||||
}
|
}
|
||||||
@ -157,6 +154,7 @@ type nodeCtx struct {
|
|||||||
node Node
|
node Node
|
||||||
inputChannel chan []Msg
|
inputChannel chan []Msg
|
||||||
downstream *nodeCtx
|
downstream *nodeCtx
|
||||||
|
checker *timerecord.Checker
|
||||||
|
|
||||||
blockMutex sync.RWMutex
|
blockMutex sync.RWMutex
|
||||||
}
|
}
|
||||||
@ -192,6 +190,9 @@ func (nodeCtx *nodeCtx) Close() {
|
|||||||
if nodeCtx.node.IsInputNode() {
|
if nodeCtx.node.IsInputNode() {
|
||||||
for nodeCtx != nil {
|
for nodeCtx != nil {
|
||||||
nodeCtx.node.Close()
|
nodeCtx.node.Close()
|
||||||
|
if nodeCtx.checker != nil {
|
||||||
|
nodeCtx.checker.Close()
|
||||||
|
}
|
||||||
log.Debug("flow graph node closed", zap.String("nodeName", nodeCtx.node.Name()))
|
log.Debug("flow graph node closed", zap.String("nodeName", nodeCtx.node.Name()))
|
||||||
nodeCtx = nodeCtx.downstream
|
nodeCtx = nodeCtx.downstream
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user