fix: panic with datanode negetive wait group counter (#30136)

issue: #29170
pr: #30135

Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
chyezh 2024-01-30 18:07:03 +08:00 committed by GitHub
parent 21c944beaa
commit 3e994242d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 4 deletions

View File

@ -357,9 +357,8 @@ func (node *DataNode) Start() error {
node.timeTickSender.start()
}
node.stopWaiter.Add(1)
// Start node watch node
go node.StartWatchChannels(node.ctx)
node.startWatchChannelsAtBackground(node.ctx)
node.stopWaiter.Add(1)
go node.flowgraphManager.start(&node.stopWaiter)

View File

@ -38,6 +38,11 @@ import (
const retryWatchInterval = 20 * time.Second
func (node *DataNode) startWatchChannelsAtBackground(ctx context.Context) {
node.stopWaiter.Add(1)
go node.StartWatchChannels(ctx)
}
// StartWatchChannels start loop to watch channel allocation status via kv(etcd for now)
func (node *DataNode) StartWatchChannels(ctx context.Context) {
defer node.stopWaiter.Done()
@ -61,7 +66,7 @@ func (node *DataNode) StartWatchChannels(ctx context.Context) {
case event, ok := <-evtChan:
if !ok {
log.Warn("datanode failed to watch channel, return")
go node.StartWatchChannels(ctx)
node.startWatchChannelsAtBackground(ctx)
return
}
@ -69,7 +74,7 @@ func (node *DataNode) StartWatchChannels(ctx context.Context) {
log.Warn("datanode watch channel canceled", zap.Error(event.Err()))
// https://github.com/etcd-io/etcd/issues/8980
if event.Err() == v3rpc.ErrCompacted {
go node.StartWatchChannels(ctx)
node.startWatchChannelsAtBackground(ctx)
return
}
// if watch loop return due to event canceled, the datanode is not functional anymore