mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
fix: remove the streamingnode checking when loading segment (#45859)
issue: #43117 If we enable checking when loading segments, all segment should always be loaded by streamingnode but not 2.5 querynode, make some search and query failure when upgrading. Otherwise, some search and query result will be wrong when upgrading. We choose to disable this checking for now to promise available search and query when upgrading. also see pr: #43346 Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
parent
31976d8adb
commit
4f080bd3a0
@ -365,27 +365,6 @@ func (m *ChannelDistManager) GetShardLeader(channelName string, replica *Replica
|
||||
candidatesServiceable := candidates.IsServiceable()
|
||||
channelServiceable := channel.IsServiceable()
|
||||
|
||||
candidateIsStreamingNode := m.checkIfStreamingNode(candidates.Node)
|
||||
channelIsStreamingNode := m.checkIfStreamingNode(channel.Node)
|
||||
logger.Debug("check whether stream node is serviceable",
|
||||
zap.Bool("candidatesServiceable", candidatesServiceable),
|
||||
zap.Bool("channelServiceable", channelServiceable),
|
||||
zap.Bool("candidateIsStreamingNode", candidateIsStreamingNode),
|
||||
zap.Bool("channelIsStreamingNode", channelIsStreamingNode))
|
||||
|
||||
if channelIsStreamingNode && !candidateIsStreamingNode {
|
||||
// When upgrading from 2.5 to 2.6, the delegator leader may not locate at streaming node.
|
||||
// We always use the streaming node as the delegator leader to avoid the delete data lost when loading segment.
|
||||
logger.Debug("set delegator on stream node to candidate shard leader", zap.Int64("node", channel.Node),
|
||||
zap.Int64("channel version", channel.Version))
|
||||
candidates = channel
|
||||
} else if !channelIsStreamingNode && candidateIsStreamingNode {
|
||||
// When downgrading from 2.6 to 2.5, the delegator leader may locate at non-streaming node.
|
||||
// We always use the non-streaming node as the delegator leader to avoid the delete data lost when loading segment.
|
||||
logger.Debug("found delegator which is not on stream node", zap.Int64("node", channel.Node),
|
||||
zap.Int64("channel version", channel.Version))
|
||||
continue
|
||||
} else {
|
||||
updateNeeded := false
|
||||
switch {
|
||||
case !candidatesServiceable && channelServiceable:
|
||||
@ -407,7 +386,6 @@ func (m *ChannelDistManager) GetShardLeader(channelName string, replica *Replica
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if candidates != nil {
|
||||
logger.Debug("final", zap.Any("candidates", candidates),
|
||||
zap.Int64("candidates version", candidates.Version),
|
||||
@ -416,17 +394,6 @@ func (m *ChannelDistManager) GetShardLeader(channelName string, replica *Replica
|
||||
return candidates
|
||||
}
|
||||
|
||||
// checkIfStreamingNode checks if the node is a streaming node.
|
||||
// Because the session of streaming node and embedded query node are different,
|
||||
// So we need to check if the node is a streaming node from the query node session but not streaming node session to avoid the wrong check result.
|
||||
func (m *ChannelDistManager) checkIfStreamingNode(nodeID int64) bool {
|
||||
node := m.nodeManager.Get(nodeID)
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
return node.IsEmbeddedQueryNodeInStreamingNode() || node.IsInStandalone()
|
||||
}
|
||||
|
||||
func (m *ChannelDistManager) GetChannelDist(collectionID int64) []*metricsinfo.DmChannel {
|
||||
m.rwmutex.RLock()
|
||||
defer m.rwmutex.RUnlock()
|
||||
|
||||
@ -24,7 +24,6 @@ import (
|
||||
|
||||
"github.com/milvus-io/milvus/internal/coordinator/snmanager"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/metricsinfo"
|
||||
@ -344,25 +343,6 @@ func (suite *ChannelDistManagerSuite) TestGetShardLeader() {
|
||||
// Test nonexistent channel
|
||||
leader = dist.GetShardLeader("nonexistent", replica)
|
||||
suite.Nil(leader)
|
||||
|
||||
// Test streaming node
|
||||
nodeManager.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
NodeID: 4,
|
||||
Address: "localhost:1",
|
||||
Hostname: "localhost",
|
||||
Labels: map[string]string{sessionutil.LabelStreamingNodeEmbeddedQueryNode: "1"},
|
||||
}))
|
||||
channel1Node4 := suite.channels["dmc0"].Clone()
|
||||
channel1Node4.Node = 4
|
||||
channel1Node4.Version = 3
|
||||
channel1Node4.View.Status.Serviceable = false
|
||||
dist.Update(4, channel1Node4)
|
||||
|
||||
leader = dist.GetShardLeader("dmc0", replica)
|
||||
suite.NotNil(leader)
|
||||
suite.Equal(int64(4), leader.Node)
|
||||
suite.Equal(int64(3), leader.Version)
|
||||
suite.False(leader.IsServiceable())
|
||||
}
|
||||
|
||||
func TestGetChannelDistJSON(t *testing.T) {
|
||||
|
||||
@ -31,12 +31,10 @@ import (
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
||||
"github.com/milvus-io/milvus/internal/coordinator/snmanager"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||
"github.com/milvus-io/milvus/internal/util/streamingutil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/common"
|
||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
@ -243,11 +241,6 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := ex.checkIfShardLeaderIsStreamingNode(view); err != nil {
|
||||
log.Warn("shard leader is not a streamingnode, skip load segment", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
log = log.With(zap.Int64("shardLeader", view.Node))
|
||||
|
||||
// NOTE: for balance segment task, expected load and release execution on the same shard leader
|
||||
@ -270,25 +263,29 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkIfShardLeaderIsStreamingNode checks if the shard leader is a streamingnode.
|
||||
// If we enable following checking when loading segments,
|
||||
// 1. all segment should always be loaded by streamingnode but not 2.5 querynode, make some search and query failure when upgrading.
|
||||
// Otherwise, some search and query result will be wrong when upgrading.
|
||||
// We choose to disable this checking for now to promise available search and query when upgrading.
|
||||
//
|
||||
// Because the L0 management at 2.6 and 2.5 is different, so when upgrading mixcoord,
|
||||
// the new mixcoord will make a wrong plan when balancing a segment from one query node to another by 2.5 delegator.
|
||||
// We need to balance the 2.5 delegator to 2.6 delegator before balancing any segment by 2.6 mixcoord.
|
||||
func (ex *Executor) checkIfShardLeaderIsStreamingNode(view *meta.DmChannel) error {
|
||||
if !streamingutil.IsStreamingServiceEnabled() {
|
||||
return nil
|
||||
}
|
||||
|
||||
node := ex.nodeMgr.Get(view.Node)
|
||||
if node == nil {
|
||||
return merr.WrapErrServiceInternal(fmt.Sprintf("node %d is not found", view.Node))
|
||||
}
|
||||
nodes := snmanager.StaticStreamingNodeManager.GetStreamingQueryNodeIDs()
|
||||
if !nodes.Contain(view.Node) {
|
||||
return merr.WrapErrServiceInternal(fmt.Sprintf("channel %s at node %d is not working at streamingnode, skip load segment", view.GetChannelName(), view.Node))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// func (ex *Executor) checkIfShardLeaderIsStreamingNode(view *meta.DmChannel) error {
|
||||
// if !streamingutil.IsStreamingServiceEnabled() {
|
||||
// return nil
|
||||
// }
|
||||
//
|
||||
// node := ex.nodeMgr.Get(view.Node)
|
||||
// if node == nil {
|
||||
// return merr.WrapErrServiceInternal(fmt.Sprintf("node %d is not found", view.Node))
|
||||
// }
|
||||
// nodes := snmanager.StaticStreamingNodeManager.GetStreamingQueryNodeIDs()
|
||||
// if !nodes.Contain(view.Node) {
|
||||
// return merr.WrapErrServiceInternal(fmt.Sprintf("channel %s at node %d is not working at streamingnode, skip load segment", view.GetChannelName(), view.Node))
|
||||
// }
|
||||
// return nil
|
||||
// }
|
||||
|
||||
func (ex *Executor) releaseSegment(task *SegmentTask, step int) {
|
||||
defer ex.removeTask(task, step)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user