fix false load failure for long unserviable period(#26813) (#26818)

Signed-off-by: MrPresent-Han <chun.han@zilliz.com>
This commit is contained in:
MrPresent-Han 2023-09-06 12:57:15 +08:00 committed by GitHub
parent 7e2fa55440
commit 528948559f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 23 additions and 10 deletions

View File

@ -577,6 +577,12 @@ func (sd *shardDelegator) Close() {
func NewShardDelegator(collectionID UniqueID, replicaID UniqueID, channel string, version int64,
workerManager cluster.Manager, manager *segments.Manager, tsafeManager tsafe.Manager, loader segments.Loader,
factory msgstream.Factory, startTs uint64) (ShardDelegator, error) {
log := log.With(zap.Int64("collectionID", collectionID),
zap.Int64("replicaID", replicaID),
zap.String("channel", channel),
zap.Int64("version", version),
zap.Uint64("startTs", startTs),
)
collection := manager.Collection.Get(collectionID)
if collection == nil {
@ -584,7 +590,7 @@ func NewShardDelegator(collectionID UniqueID, replicaID UniqueID, channel string
}
maxSegmentDeleteBuffer := paramtable.Get().QueryNodeCfg.MaxSegmentDeleteBuffer.GetAsInt64()
log.Info("Init delte cache", zap.Int64("maxSegmentCacheBuffer", maxSegmentDeleteBuffer), zap.Time("startTime", tsoutil.PhysicalTime(startTs)))
log.Info("Init delta cache", zap.Int64("maxSegmentCacheBuffer", maxSegmentDeleteBuffer), zap.Time("startTime", tsoutil.PhysicalTime(startTs)))
sd := &shardDelegator{
collectionID: collectionID,
@ -607,5 +613,6 @@ func NewShardDelegator(collectionID UniqueID, replicaID UniqueID, channel string
sd.tsCond = sync.NewCond(&m)
sd.wg.Add(1)
go sd.watchTSafe()
log.Info("finish build new shardDelegator")
return sd, nil
}

View File

@ -302,6 +302,14 @@ func (loader *segmentLoader) notifyLoadFinish(segments ...*querypb.SegmentLoadIn
// requestResource requests memory & storage to load segments,
// returns the memory usage, disk usage and concurrency with the gained memory.
func (loader *segmentLoader) requestResource(ctx context.Context, infos ...*querypb.SegmentLoadInfo) (LoadResource, int, error) {
resource := LoadResource{}
// we need to deal with empty infos case separately,
// because the following judgement for requested resources are based on current status and static config
// which may block empty-load operations by accident
if len(infos) == 0 {
return resource, 0, nil
}
segmentIDs := lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) int64 {
return info.GetSegmentID()
})
@ -309,8 +317,6 @@ func (loader *segmentLoader) requestResource(ctx context.Context, infos ...*quer
zap.Int64s("segmentIDs", segmentIDs),
)
resource := LoadResource{}
loader.mut.Lock()
defer loader.mut.Unlock()

View File

@ -1179,17 +1179,17 @@ func (node *QueryNode) GetDataDistribution(ctx context.Context, req *querypb.Get
channelVersionInfos := make([]*querypb.ChannelVersionInfo, 0)
leaderViews := make([]*querypb.LeaderView, 0)
node.delegators.Range(func(key string, value delegator.ShardDelegator) bool {
if !value.Serviceable() {
node.delegators.Range(func(key string, delegator delegator.ShardDelegator) bool {
if !delegator.Serviceable() {
return true
}
channelVersionInfos = append(channelVersionInfos, &querypb.ChannelVersionInfo{
Channel: key,
Collection: value.Collection(),
Version: value.Version(),
Collection: delegator.Collection(),
Version: delegator.Version(),
})
sealed, growing := value.GetSegmentInfo(false)
sealed, growing := delegator.GetSegmentInfo(false)
sealedSegments := make(map[int64]*querypb.SegmentDist)
for _, item := range sealed {
for _, segment := range item.Segments {
@ -1212,11 +1212,11 @@ func (node *QueryNode) GetDataDistribution(ctx context.Context, req *querypb.Get
}
leaderViews = append(leaderViews, &querypb.LeaderView{
Collection: value.Collection(),
Collection: delegator.Collection(),
Channel: key,
SegmentDist: sealedSegments,
GrowingSegments: growingSegments,
TargetVersion: value.GetTargetVersion(),
TargetVersion: delegator.GetTargetVersion(),
})
return true
})