diff --git a/internal/querycoordv2/dist/dist_handler.go b/internal/querycoordv2/dist/dist_handler.go index dbb0d6a316..8d0f557883 100644 --- a/internal/querycoordv2/dist/dist_handler.go +++ b/internal/querycoordv2/dist/dist_handler.go @@ -35,12 +35,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/commonpbutil" "github.com/milvus-io/milvus/pkg/util/merr" -) - -const ( - distReqTimeout = 3 * time.Second - heartBeatLagBehindWarn = 3 * time.Second - maxFailureTimes = 3 + "github.com/milvus-io/milvus/pkg/util/paramtable" ) type distHandler struct { @@ -96,7 +91,7 @@ func (dh *distHandler) handleDistResp(resp *querypb.GetDataDistributionResponse) session.WithSegmentCnt(len(resp.GetSegments())), session.WithChannelCnt(len(resp.GetChannels())), ) - if time.Since(node.LastHeartbeat()) > heartBeatLagBehindWarn { + if time.Since(node.LastHeartbeat()) > paramtable.Get().QueryCoordCfg.HeartBeatWarningLag.GetAsDuration(time.Millisecond) { log.Warn("node last heart beat time lag too behind", zap.Time("now", time.Now()), zap.Time("lastHeartBeatTime", node.LastHeartbeat()), zap.Int64("nodeID", node.ID())) } @@ -226,7 +221,7 @@ func (dh *distHandler) getDistribution(ctx context.Context) (*querypb.GetDataDis channels[channel.GetChannelName()] = targetChannel.GetSeekPosition() } - ctx, cancel := context.WithTimeout(ctx, distReqTimeout) + ctx, cancel := context.WithTimeout(ctx, paramtable.Get().QueryCoordCfg.DistributionRequestTimeout.GetAsDuration(time.Millisecond)) defer cancel() resp, err := dh.client.GetDataDistribution(ctx, dh.nodeID, &querypb.GetDataDistributionRequest{ Base: commonpbutil.NewMsgBase( diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 3a991b9f31..cb6d39da77 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -1358,6 +1358,9 @@ type queryCoordConfig struct { HeartbeatAvailableInterval ParamItem `refreshable:"true"` LoadTimeoutSeconds ParamItem `refreshable:"true"` + DistributionRequestTimeout ParamItem `refreshable:"true"` + HeartBeatWarningLag ParamItem `refreshable:"true"` + // Deprecated: Since 2.2.2, QueryCoord do not use HandOff logic anymore CheckHandoffInterval ParamItem `refreshable:"true"` EnableActiveStandby ParamItem `refreshable:"false"` @@ -1738,6 +1741,24 @@ func (p *queryCoordConfig) init(base *BaseTable) { Export: true, } p.CheckNodeSessionInterval.Init(base.mgr) + + p.DistributionRequestTimeout = ParamItem{ + Key: "queryCoord.distRequestTimeout", + Version: "2.3.6", + DefaultValue: "5000", + Doc: "the request timeout for querycoord fetching data distribution from querynodes, in milliseconds", + Export: true, + } + p.DistributionRequestTimeout.Init(base.mgr) + + p.HeartBeatWarningLag = ParamItem{ + Key: "queryCoord.heatbeatWarningLag", + Version: "2.3.6", + DefaultValue: "5000", + Doc: "the lag value for querycoord report warning when last heatbeat is too old, in milliseconds", + Export: true, + } + p.HeartBeatWarningLag.Init(base.mgr) } // /////////////////////////////////////////////////////////////////////////////