From bd44bd5ae2ce3660f1a5699a1307305f07bf565f Mon Sep 17 00:00:00 2001 From: Chun Han <116052805+MrPresent-Han@users.noreply.github.com> Date: Wed, 27 Mar 2024 16:13:10 +0800 Subject: [PATCH] enhance: add default value config for segment prune filterRatio(#31003) (#31580) related: #31003 Signed-off-by: MrPresent-Han --- internal/querynodev2/delegator/delegator.go | 5 +++-- .../querynodev2/delegator/segment_pruner.go | 5 +++-- .../delegator/segment_pruner_test.go | 19 +++++++++++-------- pkg/util/paramtable/component_param.go | 8 ++++++++ 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/internal/querynodev2/delegator/delegator.go b/internal/querynodev2/delegator/delegator.go index 1acd76684f..8912e397ff 100644 --- a/internal/querynodev2/delegator/delegator.go +++ b/internal/querynodev2/delegator/delegator.go @@ -210,7 +210,8 @@ func (sd *shardDelegator) search(ctx context.Context, req *querypb.SearchRequest return nil, err } if paramtable.Get().QueryNodeCfg.EnableSegmentPrune.GetAsBool() { - PruneSegments(ctx, sd.partitionStats, req.GetReq(), nil, sd.collection.Schema(), sealed, PruneInfo{filterRatio: defaultFilterRatio}) + PruneSegments(ctx, sd.partitionStats, req.GetReq(), nil, sd.collection.Schema(), sealed, + PruneInfo{filterRatio: paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) } tasks, err := organizeSubTask(ctx, req, sealed, growing, sd, sd.modifySearchRequest) @@ -504,7 +505,7 @@ func (sd *shardDelegator) Query(ctx context.Context, req *querypb.QueryRequest) } if paramtable.Get().QueryNodeCfg.EnableSegmentPrune.GetAsBool() { - PruneSegments(ctx, sd.partitionStats, nil, req.GetReq(), sd.collection.Schema(), sealed, PruneInfo{defaultFilterRatio}) + PruneSegments(ctx, sd.partitionStats, nil, req.GetReq(), sd.collection.Schema(), sealed, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) } sealedNum := lo.SumBy(sealed, func(item SnapshotItem) int { return len(item.Segments) }) diff --git a/internal/querynodev2/delegator/segment_pruner.go b/internal/querynodev2/delegator/segment_pruner.go index 7b6bd9acbb..93f6c9650d 100644 --- a/internal/querynodev2/delegator/segment_pruner.go +++ b/internal/querynodev2/delegator/segment_pruner.go @@ -102,9 +102,10 @@ func PruneSegments(ctx context.Context, item.Segments = newSegments sealedSegments[idx] = item } - log.Debug("Pruned segment for search/query", - zap.Int("pruned_segment_num", len(filteredSegments)), + log.RatedInfo(30, "Pruned segment for search/query", + zap.Int("filtered_segment_num[excluded]", len(filteredSegments)), zap.Int("total_segment_num", totalSegNum), + zap.Float32("filtered_rate", float32(len(filteredSegments)/totalSegNum)), ) } } diff --git a/internal/querynodev2/delegator/segment_pruner_test.go b/internal/querynodev2/delegator/segment_pruner_test.go index cdfeb8a304..d57d4d6852 100644 --- a/internal/querynodev2/delegator/segment_pruner_test.go +++ b/internal/querynodev2/delegator/segment_pruner_test.go @@ -14,6 +14,7 @@ import ( "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/clustering" "github.com/milvus-io/milvus/internal/util/testutil" + "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -227,6 +228,7 @@ func (sps *SegmentPrunerSuite) SetupForClustering(clusterKeyFieldName string, func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarIntField() { sps.SetupForClustering("age", schemapb.DataType_Int32) + paramtable.Init() targetPartitions := make([]UniqueID, 0) targetPartitions = append(targetPartitions, sps.targetPartition) { @@ -242,7 +244,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarIntField() { SerializedExprPlan: serializedPlan, PartitionIDs: targetPartitions, } - PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{defaultFilterRatio}) + PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) sps.Equal(2, len(testSegments[0].Segments)) sps.Equal(0, len(testSegments[1].Segments)) } @@ -259,7 +261,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarIntField() { SerializedExprPlan: serializedPlan, PartitionIDs: targetPartitions, } - PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{defaultFilterRatio}) + PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) sps.Equal(0, len(testSegments[0].Segments)) sps.Equal(2, len(testSegments[1].Segments)) } @@ -276,7 +278,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarIntField() { SerializedExprPlan: serializedPlan, PartitionIDs: targetPartitions, } - PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{defaultFilterRatio}) + PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) sps.Equal(2, len(testSegments[0].Segments)) sps.Equal(2, len(testSegments[1].Segments)) } @@ -293,7 +295,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarIntField() { SerializedExprPlan: serializedPlan, PartitionIDs: targetPartitions, } - PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{defaultFilterRatio}) + PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) sps.Equal(0, len(testSegments[0].Segments)) sps.Equal(1, len(testSegments[1].Segments)) } @@ -301,6 +303,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarIntField() { func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarStrField() { sps.SetupForClustering("info", schemapb.DataType_VarChar) + paramtable.Init() targetPartitions := make([]UniqueID, 0) targetPartitions = append(targetPartitions, sps.targetPartition) { @@ -316,7 +319,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarStrField() { SerializedExprPlan: serializedPlan, PartitionIDs: targetPartitions, } - PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{defaultFilterRatio}) + PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) sps.Equal(0, len(testSegments[0].Segments)) sps.Equal(0, len(testSegments[1].Segments)) // there should be no segments fulfilling the info=="rag" @@ -334,7 +337,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarStrField() { SerializedExprPlan: serializedPlan, PartitionIDs: targetPartitions, } - PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{defaultFilterRatio}) + PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) sps.Equal(0, len(testSegments[0].Segments)) sps.Equal(1, len(testSegments[1].Segments)) // there should be no segments fulfilling the info=="rag" @@ -352,7 +355,7 @@ func (sps *SegmentPrunerSuite) TestPruneSegmentsByScalarStrField() { SerializedExprPlan: serializedPlan, PartitionIDs: targetPartitions, } - PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{defaultFilterRatio}) + PruneSegments(context.TODO(), sps.partitionStats, nil, queryReq, sps.schema, testSegments, PruneInfo{paramtable.Get().QueryNodeCfg.DefaultSegmentFilterRatio.GetAsFloat()}) sps.Equal(2, len(testSegments[0].Segments)) sps.Equal(1, len(testSegments[1].Segments)) // there should be no segments fulfilling the info=="rag" @@ -376,8 +379,8 @@ func vector2Placeholder(vectors [][]float32) *commonpb.PlaceholderValue { } func (sps *SegmentPrunerSuite) TestPruneSegmentsByVectorField() { + paramtable.Init() sps.SetupForClustering("vec", schemapb.DataType_FloatVector) - vector1 := []float32{0.8877872002188053, 0.6131822285635065, 0.8476814632326242, 0.6645877829359371, 0.9962627712600025, 0.8976183052440327, 0.41941169325798844, 0.7554387854258499} vector2 := []float32{0.8644394874390322, 0.023327886647378615, 0.08330118483461302, 0.7068040179963112, 0.6983994910799851, 0.5562075958994153, 0.3288536247938002, 0.07077341010237759} vectors := [][]float32{vector1, vector2} diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index d3c16b3005..93bc45f515 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -1995,6 +1995,7 @@ type queryNodeConfig struct { MemoryIndexLoadPredictMemoryUsageFactor ParamItem `refreshable:"true"` EnableSegmentPrune ParamItem `refreshable:"false"` + DefaultSegmentFilterRatio ParamItem `refreshable:"false"` } func (p *queryNodeConfig) init(base *BaseTable) { @@ -2475,6 +2476,13 @@ Max read concurrency must greater than or equal to 1, and less than or equal to Doc: "use partition prune function on shard delegator", } p.EnableSegmentPrune.Init(base.mgr) + p.DefaultSegmentFilterRatio = ParamItem{ + Key: "queryNode.defaultSegmentFilterRatio", + Version: "2.4.0", + DefaultValue: "0.5", + Doc: "filter ratio used for pruning segments when searching", + } + p.DefaultSegmentFilterRatio.Init(base.mgr) } // /////////////////////////////////////////////////////////////////////////////