mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 09:38:39 +08:00
enhance: Make querycoordv2 collection observer task driven (#32441)
See also #32440 - Add loadTask in collection observer - For load collection/partitions, load task shall timeout as a whole - Change related constructor to load jobs --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
8442098457
commit
d7ff1bbe5c
@ -50,6 +50,7 @@ type LoadCollectionJob struct {
|
|||||||
cluster session.Cluster
|
cluster session.Cluster
|
||||||
targetMgr *meta.TargetManager
|
targetMgr *meta.TargetManager
|
||||||
targetObserver *observers.TargetObserver
|
targetObserver *observers.TargetObserver
|
||||||
|
collectionObserver *observers.CollectionObserver
|
||||||
nodeMgr *session.NodeManager
|
nodeMgr *session.NodeManager
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,6 +63,7 @@ func NewLoadCollectionJob(
|
|||||||
cluster session.Cluster,
|
cluster session.Cluster,
|
||||||
targetMgr *meta.TargetManager,
|
targetMgr *meta.TargetManager,
|
||||||
targetObserver *observers.TargetObserver,
|
targetObserver *observers.TargetObserver,
|
||||||
|
collectionObserver *observers.CollectionObserver,
|
||||||
nodeMgr *session.NodeManager,
|
nodeMgr *session.NodeManager,
|
||||||
) *LoadCollectionJob {
|
) *LoadCollectionJob {
|
||||||
return &LoadCollectionJob{
|
return &LoadCollectionJob{
|
||||||
@ -74,6 +76,7 @@ func NewLoadCollectionJob(
|
|||||||
cluster: cluster,
|
cluster: cluster,
|
||||||
targetMgr: targetMgr,
|
targetMgr: targetMgr,
|
||||||
targetObserver: targetObserver,
|
targetObserver: targetObserver,
|
||||||
|
collectionObserver: collectionObserver,
|
||||||
nodeMgr: nodeMgr,
|
nodeMgr: nodeMgr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -184,7 +187,7 @@ func (job *LoadCollectionJob) Execute() error {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
_, sp := otel.Tracer(typeutil.QueryCoordRole).Start(job.ctx, "LoadCollection", trace.WithNewRoot())
|
ctx, sp := otel.Tracer(typeutil.QueryCoordRole).Start(job.ctx, "LoadCollection", trace.WithNewRoot())
|
||||||
collection := &meta.Collection{
|
collection := &meta.Collection{
|
||||||
CollectionLoadInfo: &querypb.CollectionLoadInfo{
|
CollectionLoadInfo: &querypb.CollectionLoadInfo{
|
||||||
CollectionID: req.GetCollectionID(),
|
CollectionID: req.GetCollectionID(),
|
||||||
@ -214,6 +217,9 @@ func (job *LoadCollectionJob) Execute() error {
|
|||||||
}
|
}
|
||||||
job.undo.IsTargetUpdated = true
|
job.undo.IsTargetUpdated = true
|
||||||
|
|
||||||
|
// 6. register load task into collection observer
|
||||||
|
job.collectionObserver.LoadCollection(ctx, req.GetCollectionID())
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -234,6 +240,7 @@ type LoadPartitionJob struct {
|
|||||||
cluster session.Cluster
|
cluster session.Cluster
|
||||||
targetMgr *meta.TargetManager
|
targetMgr *meta.TargetManager
|
||||||
targetObserver *observers.TargetObserver
|
targetObserver *observers.TargetObserver
|
||||||
|
collectionObserver *observers.CollectionObserver
|
||||||
nodeMgr *session.NodeManager
|
nodeMgr *session.NodeManager
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,6 +253,7 @@ func NewLoadPartitionJob(
|
|||||||
cluster session.Cluster,
|
cluster session.Cluster,
|
||||||
targetMgr *meta.TargetManager,
|
targetMgr *meta.TargetManager,
|
||||||
targetObserver *observers.TargetObserver,
|
targetObserver *observers.TargetObserver,
|
||||||
|
collectionObserver *observers.CollectionObserver,
|
||||||
nodeMgr *session.NodeManager,
|
nodeMgr *session.NodeManager,
|
||||||
) *LoadPartitionJob {
|
) *LoadPartitionJob {
|
||||||
return &LoadPartitionJob{
|
return &LoadPartitionJob{
|
||||||
@ -258,6 +266,7 @@ func NewLoadPartitionJob(
|
|||||||
cluster: cluster,
|
cluster: cluster,
|
||||||
targetMgr: targetMgr,
|
targetMgr: targetMgr,
|
||||||
targetObserver: targetObserver,
|
targetObserver: targetObserver,
|
||||||
|
collectionObserver: collectionObserver,
|
||||||
nodeMgr: nodeMgr,
|
nodeMgr: nodeMgr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -360,10 +369,10 @@ func (job *LoadPartitionJob) Execute() error {
|
|||||||
CreatedAt: time.Now(),
|
CreatedAt: time.Now(),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
ctx, sp := otel.Tracer(typeutil.QueryCoordRole).Start(job.ctx, "LoadPartition", trace.WithNewRoot())
|
||||||
if !job.meta.CollectionManager.Exist(req.GetCollectionID()) {
|
if !job.meta.CollectionManager.Exist(req.GetCollectionID()) {
|
||||||
job.undo.IsNewCollection = true
|
job.undo.IsNewCollection = true
|
||||||
|
|
||||||
_, sp := otel.Tracer(typeutil.QueryCoordRole).Start(job.ctx, "LoadPartition", trace.WithNewRoot())
|
|
||||||
collection := &meta.Collection{
|
collection := &meta.Collection{
|
||||||
CollectionLoadInfo: &querypb.CollectionLoadInfo{
|
CollectionLoadInfo: &querypb.CollectionLoadInfo{
|
||||||
CollectionID: req.GetCollectionID(),
|
CollectionID: req.GetCollectionID(),
|
||||||
@ -399,6 +408,8 @@ func (job *LoadPartitionJob) Execute() error {
|
|||||||
}
|
}
|
||||||
job.undo.IsTargetUpdated = true
|
job.undo.IsTargetUpdated = true
|
||||||
|
|
||||||
|
job.collectionObserver.LoadPartitions(ctx, req.GetCollectionID(), lackPartitionIDs)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -67,6 +67,7 @@ type JobSuite struct {
|
|||||||
cluster *session.MockCluster
|
cluster *session.MockCluster
|
||||||
targetMgr *meta.TargetManager
|
targetMgr *meta.TargetManager
|
||||||
targetObserver *observers.TargetObserver
|
targetObserver *observers.TargetObserver
|
||||||
|
collectionObserver *observers.CollectionObserver
|
||||||
broker *meta.MockBroker
|
broker *meta.MockBroker
|
||||||
nodeMgr *session.NodeManager
|
nodeMgr *session.NodeManager
|
||||||
checkerController *checkers.CheckerController
|
checkerController *checkers.CheckerController
|
||||||
@ -192,6 +193,13 @@ func (suite *JobSuite) SetupTest() {
|
|||||||
suite.meta.HandleNodeUp(3000)
|
suite.meta.HandleNodeUp(3000)
|
||||||
|
|
||||||
suite.checkerController = &checkers.CheckerController{}
|
suite.checkerController = &checkers.CheckerController{}
|
||||||
|
suite.collectionObserver = observers.NewCollectionObserver(
|
||||||
|
suite.dist,
|
||||||
|
suite.meta,
|
||||||
|
suite.targetMgr,
|
||||||
|
suite.targetObserver,
|
||||||
|
suite.checkerController,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (suite *JobSuite) TearDownTest() {
|
func (suite *JobSuite) TearDownTest() {
|
||||||
@ -231,6 +239,7 @@ func (suite *JobSuite) TestLoadCollection() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -258,6 +267,7 @@ func (suite *JobSuite) TestLoadCollection() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -283,6 +293,7 @@ func (suite *JobSuite) TestLoadCollection() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -310,6 +321,7 @@ func (suite *JobSuite) TestLoadCollection() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -345,6 +357,7 @@ func (suite *JobSuite) TestLoadCollection() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -366,6 +379,7 @@ func (suite *JobSuite) TestLoadCollection() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -395,6 +409,7 @@ func (suite *JobSuite) TestLoadCollectionWithReplicas() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -427,6 +442,7 @@ func (suite *JobSuite) TestLoadCollectionWithDiffIndex() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -457,6 +473,7 @@ func (suite *JobSuite) TestLoadCollectionWithDiffIndex() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -488,6 +505,7 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -518,6 +536,7 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -545,6 +564,7 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -572,6 +592,7 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -598,6 +619,7 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -633,6 +655,7 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -655,6 +678,7 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -682,6 +706,7 @@ func (suite *JobSuite) TestDynamicLoad() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
return job
|
return job
|
||||||
@ -700,6 +725,7 @@ func (suite *JobSuite) TestDynamicLoad() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
return job
|
return job
|
||||||
@ -799,6 +825,7 @@ func (suite *JobSuite) TestLoadPartitionWithReplicas() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -832,6 +859,7 @@ func (suite *JobSuite) TestLoadPartitionWithDiffIndex() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -864,6 +892,7 @@ func (suite *JobSuite) TestLoadPartitionWithDiffIndex() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -891,6 +920,7 @@ func (suite *JobSuite) TestReleaseCollection() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
|
||||||
suite.checkerController,
|
suite.checkerController,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -1133,6 +1163,7 @@ func (suite *JobSuite) TestLoadCollectionStoreFailed() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -1174,6 +1205,7 @@ func (suite *JobSuite) TestLoadPartitionStoreFailed() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -1201,6 +1233,7 @@ func (suite *JobSuite) TestLoadCreateReplicaFailed() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -1229,6 +1262,7 @@ func (suite *JobSuite) TestCallLoadPartitionFailed() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(loadCollectionJob)
|
suite.scheduler.Add(loadCollectionJob)
|
||||||
@ -1249,6 +1283,7 @@ func (suite *JobSuite) TestCallLoadPartitionFailed() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(loadPartitionJob)
|
suite.scheduler.Add(loadPartitionJob)
|
||||||
@ -1275,6 +1310,7 @@ func (suite *JobSuite) TestCallLoadPartitionFailed() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(loadCollectionJob)
|
suite.scheduler.Add(loadCollectionJob)
|
||||||
@ -1294,6 +1330,7 @@ func (suite *JobSuite) TestCallLoadPartitionFailed() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(loadPartitionJob)
|
suite.scheduler.Add(loadPartitionJob)
|
||||||
@ -1436,6 +1473,7 @@ func (suite *JobSuite) loadAll() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
@ -1460,6 +1498,7 @@ func (suite *JobSuite) loadAll() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.scheduler.Add(job)
|
suite.scheduler.Add(job)
|
||||||
|
|||||||
@ -23,6 +23,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
"go.opentelemetry.io/otel/trace"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
@ -32,6 +33,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||||
"github.com/milvus-io/milvus/pkg/eventlog"
|
"github.com/milvus-io/milvus/pkg/eventlog"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CollectionObserver struct {
|
type CollectionObserver struct {
|
||||||
@ -45,9 +47,17 @@ type CollectionObserver struct {
|
|||||||
checkerController *checkers.CheckerController
|
checkerController *checkers.CheckerController
|
||||||
partitionLoadedCount map[int64]int
|
partitionLoadedCount map[int64]int
|
||||||
|
|
||||||
|
loadTasks *typeutil.ConcurrentMap[string, LoadTask]
|
||||||
|
|
||||||
stopOnce sync.Once
|
stopOnce sync.Once
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type LoadTask struct {
|
||||||
|
LoadType querypb.LoadType
|
||||||
|
CollectionID int64
|
||||||
|
PartitionIDs []int64
|
||||||
|
}
|
||||||
|
|
||||||
func NewCollectionObserver(
|
func NewCollectionObserver(
|
||||||
dist *meta.DistributionManager,
|
dist *meta.DistributionManager,
|
||||||
meta *meta.Meta,
|
meta *meta.Meta,
|
||||||
@ -55,14 +65,23 @@ func NewCollectionObserver(
|
|||||||
targetObserver *TargetObserver,
|
targetObserver *TargetObserver,
|
||||||
checherController *checkers.CheckerController,
|
checherController *checkers.CheckerController,
|
||||||
) *CollectionObserver {
|
) *CollectionObserver {
|
||||||
return &CollectionObserver{
|
ob := &CollectionObserver{
|
||||||
dist: dist,
|
dist: dist,
|
||||||
meta: meta,
|
meta: meta,
|
||||||
targetMgr: targetMgr,
|
targetMgr: targetMgr,
|
||||||
targetObserver: targetObserver,
|
targetObserver: targetObserver,
|
||||||
checkerController: checherController,
|
checkerController: checherController,
|
||||||
partitionLoadedCount: make(map[int64]int),
|
partitionLoadedCount: make(map[int64]int),
|
||||||
|
loadTasks: typeutil.NewConcurrentMap[string, LoadTask](),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add load task for collection recovery
|
||||||
|
collections := meta.GetAllCollections()
|
||||||
|
for _, collection := range collections {
|
||||||
|
ob.LoadCollection(context.Background(), collection.GetCollectionID())
|
||||||
|
}
|
||||||
|
|
||||||
|
return ob
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ob *CollectionObserver) Start() {
|
func (ob *CollectionObserver) Start() {
|
||||||
@ -98,52 +117,105 @@ func (ob *CollectionObserver) Stop() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ob *CollectionObserver) LoadCollection(ctx context.Context, collectionID int64) {
|
||||||
|
span := trace.SpanFromContext(ctx)
|
||||||
|
|
||||||
|
traceID := span.SpanContext().TraceID()
|
||||||
|
key := traceID.String()
|
||||||
|
|
||||||
|
if !traceID.IsValid() {
|
||||||
|
key = fmt.Sprintf("LoadCollection_%d", collectionID)
|
||||||
|
}
|
||||||
|
|
||||||
|
ob.loadTasks.Insert(key, LoadTask{LoadType: querypb.LoadType_LoadCollection, CollectionID: collectionID})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ob *CollectionObserver) LoadPartitions(ctx context.Context, collectionID int64, partitionIDs []int64) {
|
||||||
|
span := trace.SpanFromContext(ctx)
|
||||||
|
|
||||||
|
traceID := span.SpanContext().TraceID()
|
||||||
|
key := traceID.String()
|
||||||
|
if !traceID.IsValid() {
|
||||||
|
key = fmt.Sprintf("LoadPartition_%d_%v", collectionID, partitionIDs)
|
||||||
|
}
|
||||||
|
|
||||||
|
ob.loadTasks.Insert(key, LoadTask{LoadType: querypb.LoadType_LoadPartition, CollectionID: collectionID, PartitionIDs: partitionIDs})
|
||||||
|
}
|
||||||
|
|
||||||
func (ob *CollectionObserver) Observe(ctx context.Context) {
|
func (ob *CollectionObserver) Observe(ctx context.Context) {
|
||||||
ob.observeTimeout()
|
ob.observeTimeout()
|
||||||
ob.observeLoadStatus(ctx)
|
ob.observeLoadStatus(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ob *CollectionObserver) observeTimeout() {
|
func (ob *CollectionObserver) observeTimeout() {
|
||||||
collections := ob.meta.CollectionManager.GetAllCollections()
|
ob.loadTasks.Range(func(traceID string, task LoadTask) bool {
|
||||||
for _, collection := range collections {
|
collection := ob.meta.CollectionManager.GetCollection(task.CollectionID)
|
||||||
if collection.GetStatus() != querypb.LoadStatus_Loading ||
|
// collection released
|
||||||
time.Now().Before(collection.UpdatedAt.Add(Params.QueryCoordCfg.LoadTimeoutSeconds.GetAsDuration(time.Second))) {
|
if collection == nil {
|
||||||
continue
|
log.Info("Load Collection Task canceled, collection removed from meta", zap.Int64("collectionID", task.CollectionID), zap.String("traceID", traceID))
|
||||||
|
ob.loadTasks.Remove(traceID)
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch task.LoadType {
|
||||||
|
case querypb.LoadType_LoadCollection:
|
||||||
|
if collection.GetStatus() == querypb.LoadStatus_Loading &&
|
||||||
|
time.Now().After(collection.UpdatedAt.Add(Params.QueryCoordCfg.LoadTimeoutSeconds.GetAsDuration(time.Second))) {
|
||||||
log.Info("load collection timeout, cancel it",
|
log.Info("load collection timeout, cancel it",
|
||||||
zap.Int64("collectionID", collection.GetCollectionID()),
|
zap.Int64("collectionID", collection.GetCollectionID()),
|
||||||
zap.Duration("loadTime", time.Since(collection.CreatedAt)))
|
zap.Duration("loadTime", time.Since(collection.CreatedAt)))
|
||||||
ob.meta.CollectionManager.RemoveCollection(collection.GetCollectionID())
|
ob.meta.CollectionManager.RemoveCollection(collection.GetCollectionID())
|
||||||
ob.meta.ReplicaManager.RemoveCollection(collection.GetCollectionID())
|
ob.meta.ReplicaManager.RemoveCollection(collection.GetCollectionID())
|
||||||
ob.targetMgr.RemoveCollection(collection.GetCollectionID())
|
ob.targetMgr.RemoveCollection(collection.GetCollectionID())
|
||||||
|
ob.loadTasks.Remove(traceID)
|
||||||
|
}
|
||||||
|
case querypb.LoadType_LoadPartition:
|
||||||
|
partitionIDs := typeutil.NewSet(task.PartitionIDs...)
|
||||||
|
partitions := ob.meta.GetPartitionsByCollection(task.CollectionID)
|
||||||
|
partitions = lo.Filter(partitions, func(partition *meta.Partition, _ int) bool {
|
||||||
|
return partitionIDs.Contain(partition.GetPartitionID())
|
||||||
|
})
|
||||||
|
|
||||||
|
// all partition released
|
||||||
|
if len(partitions) == 0 {
|
||||||
|
log.Info("Load Partitions Task canceled, collection removed from meta",
|
||||||
|
zap.Int64("collectionID", task.CollectionID),
|
||||||
|
zap.Int64s("partitionIDs", task.PartitionIDs),
|
||||||
|
zap.String("traceID", traceID))
|
||||||
|
ob.loadTasks.Remove(traceID)
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
partitions := utils.GroupPartitionsByCollection(ob.meta.CollectionManager.GetAllPartitions())
|
working := false
|
||||||
for collection, partitions := range partitions {
|
|
||||||
for _, partition := range partitions {
|
for _, partition := range partitions {
|
||||||
if partition.GetStatus() != querypb.LoadStatus_Loading ||
|
if time.Now().Before(partition.UpdatedAt.Add(Params.QueryCoordCfg.LoadTimeoutSeconds.GetAsDuration(time.Second))) {
|
||||||
time.Now().Before(partition.UpdatedAt.Add(Params.QueryCoordCfg.LoadTimeoutSeconds.GetAsDuration(time.Second))) {
|
working = true
|
||||||
continue
|
break
|
||||||
}
|
}
|
||||||
|
}
|
||||||
log.Info("load partition timeout, cancel it",
|
// only all partitions timeout means task timeout
|
||||||
zap.Int64("collectionID", collection),
|
if !working {
|
||||||
zap.Int64("partitionID", partition.GetPartitionID()),
|
log.Info("load partitions timeout, cancel it",
|
||||||
zap.Duration("loadTime", time.Since(partition.CreatedAt)))
|
zap.Int64("collectionID", task.CollectionID),
|
||||||
ob.meta.CollectionManager.RemovePartition(collection, partition.GetPartitionID())
|
zap.Int64s("partitionIDs", task.PartitionIDs))
|
||||||
|
for _, partition := range partitions {
|
||||||
|
ob.meta.CollectionManager.RemovePartition(partition.CollectionID, partition.GetPartitionID())
|
||||||
ob.targetMgr.RemovePartition(partition.GetCollectionID(), partition.GetPartitionID())
|
ob.targetMgr.RemovePartition(partition.GetCollectionID(), partition.GetPartitionID())
|
||||||
}
|
}
|
||||||
// all partition timeout, remove collection
|
|
||||||
if len(ob.meta.CollectionManager.GetPartitionsByCollection(collection)) == 0 {
|
|
||||||
log.Info("collection timeout due to all partition removed", zap.Int64("collection", collection))
|
|
||||||
|
|
||||||
ob.meta.CollectionManager.RemoveCollection(collection)
|
// all partition timeout, remove collection
|
||||||
ob.meta.ReplicaManager.RemoveCollection(collection)
|
if len(ob.meta.CollectionManager.GetPartitionsByCollection(task.CollectionID)) == 0 {
|
||||||
ob.targetMgr.RemoveCollection(collection)
|
log.Info("collection timeout due to all partition removed", zap.Int64("collection", task.CollectionID))
|
||||||
|
|
||||||
|
ob.meta.CollectionManager.RemoveCollection(task.CollectionID)
|
||||||
|
ob.meta.ReplicaManager.RemoveCollection(task.CollectionID)
|
||||||
|
ob.targetMgr.RemoveCollection(task.CollectionID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (ob *CollectionObserver) readyToObserve(collectionID int64) bool {
|
func (ob *CollectionObserver) readyToObserve(collectionID int64) bool {
|
||||||
metaExist := (ob.meta.GetCollection(collectionID) != nil)
|
metaExist := (ob.meta.GetCollection(collectionID) != nil)
|
||||||
@ -153,8 +225,28 @@ func (ob *CollectionObserver) readyToObserve(collectionID int64) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (ob *CollectionObserver) observeLoadStatus(ctx context.Context) {
|
func (ob *CollectionObserver) observeLoadStatus(ctx context.Context) {
|
||||||
partitions := ob.meta.CollectionManager.GetAllPartitions()
|
|
||||||
loading := false
|
loading := false
|
||||||
|
ob.loadTasks.Range(func(traceID string, task LoadTask) bool {
|
||||||
|
loading = true
|
||||||
|
|
||||||
|
collection := ob.meta.CollectionManager.GetCollection(task.CollectionID)
|
||||||
|
if collection == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
var partitions []*meta.Partition
|
||||||
|
switch task.LoadType {
|
||||||
|
case querypb.LoadType_LoadCollection:
|
||||||
|
partitions = ob.meta.GetPartitionsByCollection(task.CollectionID)
|
||||||
|
case querypb.LoadType_LoadPartition:
|
||||||
|
partitionIDs := typeutil.NewSet[int64](task.PartitionIDs...)
|
||||||
|
partitions = ob.meta.GetPartitionsByCollection(task.CollectionID)
|
||||||
|
partitions = lo.Filter(partitions, func(partition *meta.Partition, _ int) bool {
|
||||||
|
return partitionIDs.Contain(partition.GetPartitionID())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
loaded := true
|
||||||
for _, partition := range partitions {
|
for _, partition := range partitions {
|
||||||
if partition.LoadPercentage == 100 {
|
if partition.LoadPercentage == 100 {
|
||||||
continue
|
continue
|
||||||
@ -162,9 +254,25 @@ func (ob *CollectionObserver) observeLoadStatus(ctx context.Context) {
|
|||||||
if ob.readyToObserve(partition.CollectionID) {
|
if ob.readyToObserve(partition.CollectionID) {
|
||||||
replicaNum := ob.meta.GetReplicaNumber(partition.GetCollectionID())
|
replicaNum := ob.meta.GetReplicaNumber(partition.GetCollectionID())
|
||||||
ob.observePartitionLoadStatus(ctx, partition, replicaNum)
|
ob.observePartitionLoadStatus(ctx, partition, replicaNum)
|
||||||
loading = true
|
}
|
||||||
|
partition = ob.meta.GetPartition(partition.PartitionID)
|
||||||
|
if partition.LoadPercentage != 100 {
|
||||||
|
loaded = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// all partition loaded, finish task
|
||||||
|
if len(partitions) > 0 && loaded {
|
||||||
|
log.Info("Load task finish",
|
||||||
|
zap.String("traceID", traceID),
|
||||||
|
zap.Int64("collectionID", task.CollectionID),
|
||||||
|
zap.Int64s("partitionIDs", task.PartitionIDs),
|
||||||
|
zap.Stringer("loadType", task.LoadType))
|
||||||
|
ob.loadTasks.Remove(traceID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
// trigger check logic when loading collections/partitions
|
// trigger check logic when loading collections/partitions
|
||||||
if loading {
|
if loading {
|
||||||
ob.checkerController.Check()
|
ob.checkerController.Check()
|
||||||
|
|||||||
@ -17,6 +17,7 @@
|
|||||||
package observers
|
package observers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -441,6 +442,8 @@ func (suite *CollectionObserverSuite) load(collection int64) {
|
|||||||
|
|
||||||
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, collection).Return(dmChannels, allSegments, nil)
|
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, collection).Return(dmChannels, allSegments, nil)
|
||||||
suite.targetMgr.UpdateCollectionNextTarget(collection)
|
suite.targetMgr.UpdateCollectionNextTarget(collection)
|
||||||
|
|
||||||
|
suite.ob.LoadCollection(context.Background(), collection)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCollectionObserver(t *testing.T) {
|
func TestCollectionObserver(t *testing.T) {
|
||||||
|
|||||||
@ -232,6 +232,7 @@ func (s *Server) LoadCollection(ctx context.Context, req *querypb.LoadCollection
|
|||||||
s.cluster,
|
s.cluster,
|
||||||
s.targetMgr,
|
s.targetMgr,
|
||||||
s.targetObserver,
|
s.targetObserver,
|
||||||
|
s.collectionObserver,
|
||||||
s.nodeMgr,
|
s.nodeMgr,
|
||||||
)
|
)
|
||||||
s.jobScheduler.Add(loadJob)
|
s.jobScheduler.Add(loadJob)
|
||||||
@ -332,6 +333,7 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions
|
|||||||
s.cluster,
|
s.cluster,
|
||||||
s.targetMgr,
|
s.targetMgr,
|
||||||
s.targetObserver,
|
s.targetObserver,
|
||||||
|
s.collectionObserver,
|
||||||
s.nodeMgr,
|
s.nodeMgr,
|
||||||
)
|
)
|
||||||
s.jobScheduler.Add(loadJob)
|
s.jobScheduler.Add(loadJob)
|
||||||
|
|||||||
@ -77,6 +77,7 @@ type ServiceSuite struct {
|
|||||||
targetMgr *meta.TargetManager
|
targetMgr *meta.TargetManager
|
||||||
broker *meta.MockBroker
|
broker *meta.MockBroker
|
||||||
targetObserver *observers.TargetObserver
|
targetObserver *observers.TargetObserver
|
||||||
|
collectionObserver *observers.CollectionObserver
|
||||||
cluster *session.MockCluster
|
cluster *session.MockCluster
|
||||||
nodeMgr *session.NodeManager
|
nodeMgr *session.NodeManager
|
||||||
jobScheduler *job.Scheduler
|
jobScheduler *job.Scheduler
|
||||||
@ -177,6 +178,15 @@ func (suite *ServiceSuite) SetupTest() {
|
|||||||
suite.distMgr = meta.NewDistributionManager()
|
suite.distMgr = meta.NewDistributionManager()
|
||||||
suite.distController = dist.NewMockController(suite.T())
|
suite.distController = dist.NewMockController(suite.T())
|
||||||
|
|
||||||
|
suite.collectionObserver = observers.NewCollectionObserver(
|
||||||
|
suite.dist,
|
||||||
|
suite.meta,
|
||||||
|
suite.targetMgr,
|
||||||
|
suite.targetObserver,
|
||||||
|
&checkers.CheckerController{},
|
||||||
|
)
|
||||||
|
suite.collectionObserver.Start()
|
||||||
|
|
||||||
suite.server = &Server{
|
suite.server = &Server{
|
||||||
kv: suite.kv,
|
kv: suite.kv,
|
||||||
store: suite.store,
|
store: suite.store,
|
||||||
@ -187,6 +197,7 @@ func (suite *ServiceSuite) SetupTest() {
|
|||||||
targetMgr: suite.targetMgr,
|
targetMgr: suite.targetMgr,
|
||||||
broker: suite.broker,
|
broker: suite.broker,
|
||||||
targetObserver: suite.targetObserver,
|
targetObserver: suite.targetObserver,
|
||||||
|
collectionObserver: suite.collectionObserver,
|
||||||
nodeMgr: suite.nodeMgr,
|
nodeMgr: suite.nodeMgr,
|
||||||
cluster: suite.cluster,
|
cluster: suite.cluster,
|
||||||
jobScheduler: suite.jobScheduler,
|
jobScheduler: suite.jobScheduler,
|
||||||
@ -195,13 +206,6 @@ func (suite *ServiceSuite) SetupTest() {
|
|||||||
distController: suite.distController,
|
distController: suite.distController,
|
||||||
ctx: context.Background(),
|
ctx: context.Background(),
|
||||||
}
|
}
|
||||||
suite.server.collectionObserver = observers.NewCollectionObserver(
|
|
||||||
suite.server.dist,
|
|
||||||
suite.server.meta,
|
|
||||||
suite.server.targetMgr,
|
|
||||||
suite.targetObserver,
|
|
||||||
&checkers.CheckerController{},
|
|
||||||
)
|
|
||||||
|
|
||||||
suite.server.UpdateStateCode(commonpb.StateCode_Healthy)
|
suite.server.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||||
}
|
}
|
||||||
@ -1802,6 +1806,7 @@ func (suite *ServiceSuite) loadAll() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.jobScheduler.Add(job)
|
suite.jobScheduler.Add(job)
|
||||||
@ -1826,6 +1831,7 @@ func (suite *ServiceSuite) loadAll() {
|
|||||||
suite.cluster,
|
suite.cluster,
|
||||||
suite.targetMgr,
|
suite.targetMgr,
|
||||||
suite.targetObserver,
|
suite.targetObserver,
|
||||||
|
suite.collectionObserver,
|
||||||
suite.nodeMgr,
|
suite.nodeMgr,
|
||||||
)
|
)
|
||||||
suite.jobScheduler.Add(job)
|
suite.jobScheduler.Add(job)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user