milvus/internal/storagecommon/column_group_splitter.go
congqixia 1e3ec42e54
fix: Check child fields len instead of nil (#44405)
Related to #44398

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
2025-09-17 11:36:00 +08:00

84 lines
2.8 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storagecommon
import (
"fmt"
"sort"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
const (
// column group id for short columns
DefaultShortColumnGroupID = 0
)
type ColumnGroup struct {
GroupID typeutil.UniqueID
Columns []int // column indices
Fields []int64
}
func (cg ColumnGroup) String() string {
return fmt.Sprintf("[GroupID: %d, ColumnIndices: %v, Fields: %v]", cg.GroupID, cg.Columns, cg.Fields)
}
func SplitColumns(fields []*schemapb.FieldSchema, stats map[int64]ColumnStats, policies ...ColumnGroupSplitPolicy) []ColumnGroup {
split := newCurrentSplit(fields, stats)
for _, policy := range policies {
split = policy.Split(split)
}
sort.Slice(split.outputGroups, func(i, j int) bool {
return split.outputGroups[i].GroupID < split.outputGroups[j].GroupID
})
return split.outputGroups
}
func DefaultPolicies() []ColumnGroupSplitPolicy {
paramtable.Init()
result := make([]ColumnGroupSplitPolicy, 0, 4)
if paramtable.Get().CommonCfg.Stv2SplitSystemColumn.GetAsBool() {
result = append(result, NewSystemColumnPolicy(paramtable.Get().CommonCfg.Stv2SystemColumnIncludePK.GetAsBool(),
paramtable.Get().CommonCfg.Stv2SystemColumnIncludePartitionKey.GetAsBool(),
paramtable.Get().CommonCfg.Stv2SystemColumnIncludeClusteringKey.GetAsBool()))
}
if paramtable.Get().CommonCfg.Stv2SplitByAvgSize.GetAsBool() {
result = append(result, NewAvgSizePolicy(paramtable.Get().CommonCfg.Stv2SplitAvgSizeThreshold.GetAsInt64()))
}
result = append(result,
NewSelectedDataTypePolicy(),
NewRemanentShortPolicy(-1))
return result
}
func IsVectorDataType(dataType schemapb.DataType) bool {
switch dataType {
case schemapb.DataType_BinaryVector,
schemapb.DataType_Float16Vector,
schemapb.DataType_BFloat16Vector,
schemapb.DataType_Int8Vector,
schemapb.DataType_FloatVector,
schemapb.DataType_SparseFloatVector,
schemapb.DataType_ArrayOfVector:
return true
}
return false
}