mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
related: #36380 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant: aggregation is centralized and schema-aware — all aggregate functions are created via the exec Aggregate registry (milvus::exec::Aggregate) and validated by ValidateAggFieldType, use a single in-memory accumulator layout (Accumulator/RowContainer) and grouping primitives (GroupingSet, HashTable, VectorHasher), ensuring consistent typing, null semantics and offsets across planner → exec → reducer conversion paths (toAggregateInfo, Aggregate::create, GroupingSet, AggResult converters). - Removed / simplified logic: removed ad‑hoc count/group-by and reducer code (CountNode/PhyCountNode, GroupByNode/PhyGroupByNode, cntReducer and its tests) and consolidated into a unified AggregationNode → PhyAggregationNode + GroupingSet + HashTable execution path and centralized reducers (MilvusAggReducer, InternalAggReducer, SegcoreAggReducer). AVG now implemented compositionally (SUM + COUNT) rather than a bespoke operator, eliminating duplicate implementations. - Why this does NOT cause data loss or regressions: existing data-access and serialization paths are preserved and explicitly validated — bulk_subscript / bulk_script_field_data and FieldData creation are used for output materialization; converters (InternalResult2AggResult ↔ AggResult2internalResult, SegcoreResults2AggResult ↔ AggResult2segcoreResult) enforce shape/type/row-count validation; proxy and plan-level checks (MatchAggregationExpression, translateOutputFields, ValidateAggFieldType, translateGroupByFieldIds) reject unsupported inputs (ARRAY/JSON, unsupported datatypes) early. Empty-result generation and explicit error returns guard against silent corruption. - New capability and scope: end-to-end GROUP BY and aggregation support added across the stack — proto (plan.proto, RetrieveRequest fields group_by_field_ids/aggregates), planner nodes (AggregationNode, ProjectNode, SearchGroupByNode), exec operators (PhyAggregationNode, PhyProjectNode) and aggregation core (Aggregate implementations: Sum/Count/Min/Max, SimpleNumericAggregate, RowContainer, GroupingSet, HashTable) plus proxy/querynode reducers and tests — enabling grouped and global aggregation (sum, count, min, max, avg via sum+count) with schema-aware validation and reduction. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Signed-off-by: MrPresent-Han <chun.han@gmail.com> Co-authored-by: MrPresent-Han <chun.han@gmail.com>
66 lines
2.4 KiB
Go
66 lines
2.4 KiB
Go
package proxy
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/agg"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
|
)
|
|
|
|
type MilvusAggReducer struct {
|
|
groupAggReducer *agg.GroupAggReducer
|
|
outputMap *agg.AggregationFieldMap
|
|
}
|
|
|
|
func NewMilvusAggReducer(groupByFieldIds []int64, aggregates []*planpb.Aggregate,
|
|
outputMap *agg.AggregationFieldMap, groupLimit int64, schema *schemapb.CollectionSchema,
|
|
) *MilvusAggReducer {
|
|
// must ensure outputMap is not nil outside
|
|
// Default groupLimit to -1 (no limit) if groupLimit <= 0
|
|
if groupLimit <= 0 {
|
|
groupLimit = -1
|
|
}
|
|
return &MilvusAggReducer{
|
|
agg.NewGroupAggReducer(groupByFieldIds, aggregates, groupLimit, schema),
|
|
outputMap,
|
|
}
|
|
}
|
|
|
|
func (reducer *MilvusAggReducer) Reduce(results []*internalpb.RetrieveResults) (*milvuspb.QueryResults, error) {
|
|
reducedAggRes, err := reducer.groupAggReducer.Reduce(context.Background(), agg.InternalResult2AggResult(results))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
fieldCount := reducer.outputMap.Count()
|
|
reOrganizedFieldDatas := make([]*schemapb.FieldData, fieldCount)
|
|
reducedFieldDatas := reducedAggRes.GetFieldDatas()
|
|
for i := 0; i < fieldCount; i++ {
|
|
indices := reducer.outputMap.IndexesAt(i)
|
|
if len(indices) == 0 {
|
|
return nil, fmt.Errorf("no indices found for output field at index %d", i)
|
|
} else if len(indices) == 1 {
|
|
// Single index: direct copy (non-avg aggregation or group-by field)
|
|
reOrganizedFieldDatas[i] = reducedFieldDatas[indices[0]]
|
|
reOrganizedFieldDatas[i].FieldName = reducer.outputMap.NameAt(i)
|
|
} else if len(indices) == 2 {
|
|
// Two indices: avg aggregation (sum and count)
|
|
sumFieldData := reducedFieldDatas[indices[0]]
|
|
countFieldData := reducedFieldDatas[indices[1]]
|
|
avgFieldData, err := agg.ComputeAvgFromSumAndCount(sumFieldData, countFieldData)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to compute avg for field %s: %w", reducer.outputMap.NameAt(i), err)
|
|
}
|
|
avgFieldData.FieldName = reducer.outputMap.NameAt(i)
|
|
reOrganizedFieldDatas[i] = avgFieldData
|
|
} else {
|
|
return nil, fmt.Errorf("unexpected number of indices (%d) for output field at index %d, expected 1 or 2", len(indices), i)
|
|
}
|
|
}
|
|
return &milvuspb.QueryResults{FieldsData: reOrganizedFieldDatas, Status: merr.Success()}, nil
|
|
}
|