mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
Fix bug for IndexNode pod kill test (#18177)
Signed-off-by: Cai.Zhang <cai.zhang@zilliz.com>
This commit is contained in:
parent
b9d7027522
commit
a47a414dce
@ -166,6 +166,7 @@ func TestIndexBuilder(t *testing.T) {
|
||||
Err: false,
|
||||
},
|
||||
nodeManager: &NodeManager{
|
||||
ctx: ctx,
|
||||
nodeClients: map[UniqueID]types.IndexNode{
|
||||
4: &indexnode.Mock{
|
||||
Err: false,
|
||||
@ -295,7 +296,9 @@ func TestIndexBuilder_Error(t *testing.T) {
|
||||
Fail: false,
|
||||
Err: false,
|
||||
},
|
||||
nodeManager: &NodeManager{},
|
||||
nodeManager: &NodeManager{
|
||||
ctx: ctx,
|
||||
},
|
||||
}
|
||||
mt := &metaTable{
|
||||
indexBuildID2Meta: map[UniqueID]*Meta{
|
||||
@ -340,6 +343,7 @@ func TestIndexBuilder_Error(t *testing.T) {
|
||||
Err: false,
|
||||
},
|
||||
nodeManager: &NodeManager{
|
||||
ctx: ctx,
|
||||
nodeClients: map[UniqueID]types.IndexNode{
|
||||
1: &indexnode.Mock{
|
||||
Err: false,
|
||||
@ -391,6 +395,7 @@ func TestIndexBuilder_Error(t *testing.T) {
|
||||
Err: true,
|
||||
},
|
||||
nodeManager: &NodeManager{
|
||||
ctx: ctx,
|
||||
nodeClients: map[UniqueID]types.IndexNode{
|
||||
1: &indexnode.Mock{
|
||||
Err: false,
|
||||
|
||||
@ -20,17 +20,14 @@ import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/metrics"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
grpcindexnodeclient "github.com/milvus-io/milvus/internal/distributed/indexnode/client"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/metrics"
|
||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
||||
"github.com/milvus-io/milvus/internal/types"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// NodeManager is used by IndexCoord to manage the client of IndexNode.
|
||||
@ -56,7 +53,7 @@ func NewNodeManager(ctx context.Context) *NodeManager {
|
||||
// setClient sets IndexNode client to node manager.
|
||||
func (nm *NodeManager) setClient(nodeID UniqueID, client types.IndexNode) error {
|
||||
log.Debug("IndexCoord NodeManager setClient", zap.Int64("nodeID", nodeID))
|
||||
defer log.Debug("IndexNode NodeManager setclient success", zap.Any("nodeID", nodeID))
|
||||
defer log.Debug("IndexNode NodeManager setClient success", zap.Any("nodeID", nodeID))
|
||||
item := &PQItem{
|
||||
key: nodeID,
|
||||
priority: 0,
|
||||
@ -105,32 +102,71 @@ func (nm *NodeManager) AddNode(nodeID UniqueID, address string) error {
|
||||
|
||||
// PeekClient peeks the client with the least load.
|
||||
func (nm *NodeManager) PeekClient(meta *Meta) (UniqueID, types.IndexNode) {
|
||||
nm.lock.RLock()
|
||||
defer nm.lock.RUnlock()
|
||||
allClients := nm.GetAllClients()
|
||||
|
||||
if len(nm.nodeClients) == 0 {
|
||||
if len(allClients) == 0 {
|
||||
log.Error("there is no IndexNode online")
|
||||
return -1, nil
|
||||
}
|
||||
for nodeID, client := range nm.nodeClients {
|
||||
resp, err := client.GetTaskSlots(nm.ctx, &indexpb.GetTaskSlotsRequest{})
|
||||
|
||||
// Note: In order to quickly end other goroutines, an error is returned when the client is successfully selected
|
||||
ctx, cancel := context.WithCancel(nm.ctx)
|
||||
var (
|
||||
peekNodeID = UniqueID(0)
|
||||
nodeMutex = sync.Mutex{}
|
||||
wg = sync.WaitGroup{}
|
||||
)
|
||||
|
||||
for nodeID, client := range allClients {
|
||||
nodeID := nodeID
|
||||
client := client
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
resp, err := client.GetTaskSlots(ctx, &indexpb.GetTaskSlotsRequest{})
|
||||
if err != nil {
|
||||
log.Warn("get IndexNode slots failed", zap.Int64("nodeID", nodeID), zap.Error(err))
|
||||
continue
|
||||
return
|
||||
}
|
||||
if resp.Status.ErrorCode != commonpb.ErrorCode_Success {
|
||||
log.Warn("get IndexNode slots failed", zap.Int64("nodeID", nodeID),
|
||||
zap.String("reason", resp.Status.Reason))
|
||||
continue
|
||||
return
|
||||
}
|
||||
if resp.Slots > 0 {
|
||||
return nodeID, client
|
||||
nodeMutex.Lock()
|
||||
defer nodeMutex.Unlock()
|
||||
log.Info("peek client success", zap.Int64("nodeID", nodeID))
|
||||
if peekNodeID == 0 {
|
||||
peekNodeID = nodeID
|
||||
}
|
||||
cancel()
|
||||
// Note: In order to quickly end other goroutines, an error is returned when the client is successfully selected
|
||||
return
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
cancel()
|
||||
if peekNodeID != 0 {
|
||||
return peekNodeID, allClients[peekNodeID]
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (nm *NodeManager) GetAllClients() map[UniqueID]types.IndexNode {
|
||||
nm.lock.RLock()
|
||||
defer nm.lock.RUnlock()
|
||||
|
||||
allClients := make(map[UniqueID]types.IndexNode, len(nm.nodeClients))
|
||||
for nodeID, client := range nm.nodeClients {
|
||||
allClients[nodeID] = client
|
||||
}
|
||||
|
||||
return allClients
|
||||
}
|
||||
|
||||
// indexNodeGetMetricsResponse record the metrics information of IndexNode.
|
||||
type indexNodeGetMetricsResponse struct {
|
||||
resp *milvuspb.GetMetricsResponse
|
||||
|
||||
@ -18,15 +18,19 @@ package indexcoord
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/indexnode"
|
||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNodeManager_PeekClient(t *testing.T) {
|
||||
t.Run("success", func(t *testing.T) {
|
||||
nm := NewNodeManager(context.Background())
|
||||
meta := &Meta{
|
||||
indexMeta: &indexpb.IndexMeta{
|
||||
@ -54,4 +58,105 @@ func TestNodeManager_PeekClient(t *testing.T) {
|
||||
nodeID2, client2 := nm.PeekClient(meta)
|
||||
assert.Equal(t, int64(0), nodeID2)
|
||||
assert.Nil(t, client2)
|
||||
})
|
||||
|
||||
t.Run("multiple unavailable IndexNode", func(t *testing.T) {
|
||||
nm := &NodeManager{
|
||||
ctx: context.TODO(),
|
||||
nodeClients: map[UniqueID]types.IndexNode{
|
||||
1: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
},
|
||||
}, errors.New("error")
|
||||
},
|
||||
},
|
||||
2: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
},
|
||||
}, errors.New("error")
|
||||
},
|
||||
},
|
||||
3: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
},
|
||||
}, errors.New("error")
|
||||
},
|
||||
},
|
||||
4: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
},
|
||||
}, errors.New("error")
|
||||
},
|
||||
},
|
||||
5: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: "fail reason",
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
},
|
||||
6: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: "fail reason",
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
},
|
||||
7: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: "fail reason",
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
},
|
||||
8: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Slots: 1,
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
},
|
||||
9: &indexnode.MockIndexNode{
|
||||
GetTaskSlotsMock: func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return &indexpb.GetTaskSlotsResponse{
|
||||
Slots: 10,
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
nodeID, client := nm.PeekClient(&Meta{})
|
||||
assert.NotNil(t, client)
|
||||
assert.Contains(t, []UniqueID{8, 9}, nodeID)
|
||||
})
|
||||
}
|
||||
|
||||
@ -22,6 +22,8 @@ import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/types"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/golang/protobuf/proto"
|
||||
@ -39,6 +41,7 @@ import (
|
||||
)
|
||||
|
||||
// Mock is an alternative to IndexNode, it will return specific results based on specific parameters.
|
||||
// Deprecated, use MockIndexNode
|
||||
type Mock struct {
|
||||
Build bool
|
||||
Failure bool
|
||||
@ -386,3 +389,23 @@ func getMockSystemInfoMetrics(
|
||||
ComponentName: metricsinfo.ConstructComponentName(typeutil.IndexNodeRole, Params.IndexNodeCfg.GetNodeID()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type MockIndexNode struct {
|
||||
types.IndexNode
|
||||
|
||||
CreateIndexMock func(ctx context.Context, req *indexpb.CreateIndexRequest) (*commonpb.Status, error)
|
||||
GetTaskSlotsMock func(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error)
|
||||
GetMetricsMock func(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error)
|
||||
}
|
||||
|
||||
func (min *MockIndexNode) CreateIndex(ctx context.Context, req *indexpb.CreateIndexRequest) (*commonpb.Status, error) {
|
||||
return min.CreateIndexMock(ctx, req)
|
||||
}
|
||||
|
||||
func (min *MockIndexNode) GetTaskSlots(ctx context.Context, req *indexpb.GetTaskSlotsRequest) (*indexpb.GetTaskSlotsResponse, error) {
|
||||
return min.GetTaskSlotsMock(ctx, req)
|
||||
}
|
||||
|
||||
func (min *MockIndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
|
||||
return min.GetMetricsMock(ctx, req)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user