mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
after the pr merged, we can support to insert, upsert, build index, query, search in the added field. can only do the above operates in added field after add field request complete, which is a sync operate. compact will be supported in the next pr. #39718 --------- Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>
462 lines
14 KiB
Go
462 lines
14 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package common
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"github.com/samber/lo"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
)
|
|
|
|
// system field id:
|
|
// 0: unique row id
|
|
// 1: timestamp
|
|
// 100: first user field id
|
|
// 101: second user field id
|
|
// 102: ...
|
|
|
|
const (
|
|
// StartOfUserFieldID represents the starting ID of the user-defined field
|
|
StartOfUserFieldID = 100
|
|
|
|
// StartOfUserFunctionID represents the starting ID of the user-defined function
|
|
StartOfUserFunctionID = 100
|
|
// RowIDField is the ID of the RowID field reserved by the system
|
|
RowIDField = 0
|
|
|
|
// TimeStampField is the ID of the Timestamp field reserved by the system
|
|
TimeStampField = 1
|
|
|
|
// RowIDFieldName defines the name of the RowID field
|
|
RowIDFieldName = "RowID"
|
|
|
|
// TimeStampFieldName defines the name of the Timestamp field
|
|
TimeStampFieldName = "Timestamp"
|
|
|
|
// MetaFieldName is the field name of dynamic schema
|
|
MetaFieldName = "$meta"
|
|
|
|
// DefaultShardsNum defines the default number of shards when creating a collection
|
|
DefaultShardsNum = int32(1)
|
|
|
|
// DefaultPartitionsWithPartitionKey defines the default number of partitions when use partition key
|
|
DefaultPartitionsWithPartitionKey = int64(16)
|
|
|
|
// InvalidPartitionID indicates that the partition is not specified. It will be set when the partitionName is empty
|
|
InvalidPartitionID = int64(-1)
|
|
|
|
// AllPartitionsID indicates data applies to all partitions.
|
|
AllPartitionsID = int64(-1)
|
|
|
|
// InvalidFieldID indicates that the field does not exist . It will be set when the field is not found.
|
|
InvalidFieldID = int64(-1)
|
|
|
|
// NotRegisteredID means node is not registered into etcd.
|
|
NotRegisteredID = int64(-1)
|
|
|
|
// InvalidNodeID indicates that node is not valid in querycoord replica or shard cluster.
|
|
InvalidNodeID = int64(-1)
|
|
|
|
SystemFieldsNum = int64(2)
|
|
)
|
|
|
|
const (
|
|
MinimalScalarIndexEngineVersion = int32(0)
|
|
CurrentScalarIndexEngineVersion = int32(1)
|
|
)
|
|
|
|
// Endian is type alias of binary.LittleEndian.
|
|
// Milvus uses little endian by default.
|
|
var Endian = binary.LittleEndian
|
|
|
|
const (
|
|
// SegmentInsertLogPath storage path const for segment insert binlog.
|
|
SegmentInsertLogPath = `insert_log`
|
|
|
|
// SegmentDeltaLogPath storage path const for segment delta log.
|
|
SegmentDeltaLogPath = `delta_log`
|
|
|
|
// SegmentStatslogPath storage path const for segment stats log.
|
|
SegmentStatslogPath = `stats_log`
|
|
|
|
// SegmentIndexPath storage path const for segment index files.
|
|
SegmentIndexPath = `index_files`
|
|
|
|
// SegmentBm25LogPath storage path const for bm25 statistic
|
|
SegmentBm25LogPath = `bm25_stats`
|
|
|
|
// PartitionStatsPath storage path const for partition stats files
|
|
PartitionStatsPath = `part_stats`
|
|
|
|
// AnalyzeStatsPath storage path const for analyze.
|
|
AnalyzeStatsPath = `analyze_stats`
|
|
OffsetMapping = `offset_mapping`
|
|
Centroids = "centroids"
|
|
|
|
// TextIndexPath storage path const for text index
|
|
TextIndexPath = "text_log"
|
|
)
|
|
|
|
// Search, Index parameter keys
|
|
const (
|
|
TopKKey = "topk"
|
|
SearchParamKey = "search_param"
|
|
SegmentNumKey = "segment_num"
|
|
WithFilterKey = "with_filter"
|
|
DataTypeKey = "data_type"
|
|
ChannelNumKey = "channel_num"
|
|
WithOptimizeKey = "with_optimize"
|
|
CollectionKey = "collection"
|
|
RecallEvalKey = "recall_eval"
|
|
|
|
IndexParamsKey = "params"
|
|
IndexTypeKey = "index_type"
|
|
MetricTypeKey = "metric_type"
|
|
DimKey = "dim"
|
|
MaxLengthKey = "max_length"
|
|
MaxCapacityKey = "max_capacity"
|
|
|
|
DropRatioBuildKey = "drop_ratio_build"
|
|
|
|
IsSparseKey = "is_sparse"
|
|
AutoIndexName = "AUTOINDEX"
|
|
BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
|
|
IgnoreGrowing = "ignore_growing"
|
|
ConsistencyLevel = "consistency_level"
|
|
HintsKey = "hints"
|
|
|
|
JSONCastTypeKey = "json_cast_type"
|
|
JSONPathKey = "json_path"
|
|
)
|
|
|
|
// Doc-in-doc-out
|
|
const (
|
|
EnableAnalyzerKey = `enable_analyzer`
|
|
AnalyzerParamKey = `analyzer_params`
|
|
)
|
|
|
|
// Collection properties key
|
|
|
|
const (
|
|
CollectionTTLConfigKey = "collection.ttl.seconds"
|
|
CollectionAutoCompactionKey = "collection.autocompaction.enabled"
|
|
|
|
// rate limit
|
|
CollectionInsertRateMaxKey = "collection.insertRate.max.mb"
|
|
CollectionInsertRateMinKey = "collection.insertRate.min.mb"
|
|
CollectionUpsertRateMaxKey = "collection.upsertRate.max.mb"
|
|
CollectionUpsertRateMinKey = "collection.upsertRate.min.mb"
|
|
CollectionDeleteRateMaxKey = "collection.deleteRate.max.mb"
|
|
CollectionDeleteRateMinKey = "collection.deleteRate.min.mb"
|
|
CollectionBulkLoadRateMaxKey = "collection.bulkLoadRate.max.mb"
|
|
CollectionBulkLoadRateMinKey = "collection.bulkLoadRate.min.mb"
|
|
CollectionQueryRateMaxKey = "collection.queryRate.max.qps"
|
|
CollectionQueryRateMinKey = "collection.queryRate.min.qps"
|
|
CollectionSearchRateMaxKey = "collection.searchRate.max.vps"
|
|
CollectionSearchRateMinKey = "collection.searchRate.min.vps"
|
|
CollectionDiskQuotaKey = "collection.diskProtection.diskQuota.mb"
|
|
|
|
PartitionDiskQuotaKey = "partition.diskProtection.diskQuota.mb"
|
|
|
|
// database level properties
|
|
DatabaseReplicaNumber = "database.replica.number"
|
|
DatabaseResourceGroups = "database.resource_groups"
|
|
DatabaseDiskQuotaKey = "database.diskQuota.mb"
|
|
DatabaseMaxCollectionsKey = "database.max.collections"
|
|
DatabaseForceDenyWritingKey = "database.force.deny.writing"
|
|
DatabaseForceDenyReadingKey = "database.force.deny.reading"
|
|
|
|
DatabaseForceDenyDDLKey = "database.force.deny.ddl" // all ddl
|
|
DatabaseForceDenyCollectionDDLKey = "database.force.deny.collectionDDL"
|
|
DatabaseForceDenyPartitionDDLKey = "database.force.deny.partitionDDL"
|
|
DatabaseForceDenyIndexDDLKey = "database.force.deny.index"
|
|
DatabaseForceDenyFlushDDLKey = "database.force.deny.flush"
|
|
DatabaseForceDenyCompactionDDLKey = "database.force.deny.compaction"
|
|
|
|
// collection level load properties
|
|
CollectionReplicaNumber = "collection.replica.number"
|
|
CollectionResourceGroups = "collection.resource_groups"
|
|
)
|
|
|
|
// common properties
|
|
const (
|
|
MmapEnabledKey = "mmap.enabled"
|
|
LazyLoadEnableKey = "lazyload.enabled"
|
|
PartitionKeyIsolationKey = "partitionkey.isolation"
|
|
FieldSkipLoadKey = "field.skipLoad"
|
|
IndexOffsetCacheEnabledKey = "indexoffsetcache.enabled"
|
|
ReplicateIDKey = "replicate.id"
|
|
ReplicateEndTSKey = "replicate.endTS"
|
|
)
|
|
|
|
const (
|
|
PropertiesKey string = "properties"
|
|
TraceIDKey string = "uber-trace-id"
|
|
)
|
|
|
|
func IsSystemField(fieldID int64) bool {
|
|
return fieldID < StartOfUserFieldID
|
|
}
|
|
|
|
func IsMmapDataEnabled(kvs ...*commonpb.KeyValuePair) (bool, bool) {
|
|
for _, kv := range kvs {
|
|
if kv.Key == MmapEnabledKey {
|
|
enable, _ := strconv.ParseBool(kv.Value)
|
|
return enable, true
|
|
}
|
|
}
|
|
return false, false
|
|
}
|
|
|
|
func IsMmapIndexEnabled(kvs ...*commonpb.KeyValuePair) (bool, bool) {
|
|
for _, kv := range kvs {
|
|
if kv.Key == MmapEnabledKey {
|
|
enable, _ := strconv.ParseBool(kv.Value)
|
|
return enable, true
|
|
}
|
|
}
|
|
return false, false
|
|
}
|
|
|
|
func GetIndexType(indexParams []*commonpb.KeyValuePair) string {
|
|
for _, param := range indexParams {
|
|
if param.Key == IndexTypeKey {
|
|
return param.Value
|
|
}
|
|
}
|
|
log.Warn("IndexType not found in indexParams")
|
|
return ""
|
|
}
|
|
|
|
func FieldHasMmapKey(schema *schemapb.CollectionSchema, fieldID int64) bool {
|
|
for _, field := range schema.GetFields() {
|
|
if field.GetFieldID() == fieldID {
|
|
for _, kv := range field.GetTypeParams() {
|
|
if kv.Key == MmapEnabledKey {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func HasLazyload(props []*commonpb.KeyValuePair) bool {
|
|
for _, kv := range props {
|
|
if kv.Key == LazyLoadEnableKey {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func IsCollectionLazyLoadEnabled(kvs ...*commonpb.KeyValuePair) bool {
|
|
for _, kv := range kvs {
|
|
if kv.Key == LazyLoadEnableKey && strings.ToLower(kv.Value) == "true" {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func IsPartitionKeyIsolationKvEnabled(kvs ...*commonpb.KeyValuePair) (bool, error) {
|
|
for _, kv := range kvs {
|
|
if kv.Key == PartitionKeyIsolationKey {
|
|
val, err := strconv.ParseBool(strings.ToLower(kv.Value))
|
|
if err != nil {
|
|
return false, errors.Wrap(err, "failed to parse partition key isolation")
|
|
}
|
|
return val, nil
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
func IsPartitionKeyIsolationPropEnabled(props map[string]string) (bool, error) {
|
|
val, ok := props[PartitionKeyIsolationKey]
|
|
if !ok {
|
|
return false, nil
|
|
}
|
|
iso, parseErr := strconv.ParseBool(val)
|
|
if parseErr != nil {
|
|
return false, errors.Wrap(parseErr, "failed to parse partition key isolation property")
|
|
}
|
|
return iso, nil
|
|
}
|
|
|
|
const (
|
|
// LatestVerision is the magic number for watch latest revision
|
|
LatestRevision = int64(-1)
|
|
)
|
|
|
|
func DatabaseLevelReplicaNumber(kvs []*commonpb.KeyValuePair) (int64, error) {
|
|
for _, kv := range kvs {
|
|
if kv.Key == DatabaseReplicaNumber {
|
|
replicaNum, err := strconv.ParseInt(kv.Value, 10, 64)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("invalid database property: [key=%s] [value=%s]", kv.Key, kv.Value)
|
|
}
|
|
|
|
return replicaNum, nil
|
|
}
|
|
}
|
|
|
|
return 0, fmt.Errorf("database property not found: %s", DatabaseReplicaNumber)
|
|
}
|
|
|
|
func DatabaseLevelResourceGroups(kvs []*commonpb.KeyValuePair) ([]string, error) {
|
|
for _, kv := range kvs {
|
|
if kv.Key == DatabaseResourceGroups {
|
|
invalidPropValue := fmt.Errorf("invalid database property: [key=%s] [value=%s]", kv.Key, kv.Value)
|
|
if len(kv.Value) == 0 {
|
|
return nil, invalidPropValue
|
|
}
|
|
|
|
rgs := strings.Split(kv.Value, ",")
|
|
if len(rgs) == 0 {
|
|
return nil, invalidPropValue
|
|
}
|
|
|
|
return lo.Map(rgs, func(rg string, _ int) string { return strings.TrimSpace(rg) }), nil
|
|
}
|
|
}
|
|
|
|
return nil, fmt.Errorf("database property not found: %s", DatabaseResourceGroups)
|
|
}
|
|
|
|
func CollectionLevelReplicaNumber(kvs []*commonpb.KeyValuePair) (int64, error) {
|
|
for _, kv := range kvs {
|
|
if kv.Key == CollectionReplicaNumber {
|
|
replicaNum, err := strconv.ParseInt(kv.Value, 10, 64)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("invalid collection property: [key=%s] [value=%s]", kv.Key, kv.Value)
|
|
}
|
|
|
|
return replicaNum, nil
|
|
}
|
|
}
|
|
|
|
return 0, fmt.Errorf("collection property not found: %s", CollectionReplicaNumber)
|
|
}
|
|
|
|
func CollectionLevelResourceGroups(kvs []*commonpb.KeyValuePair) ([]string, error) {
|
|
for _, kv := range kvs {
|
|
if kv.Key == CollectionResourceGroups {
|
|
invalidPropValue := fmt.Errorf("invalid collection property: [key=%s] [value=%s]", kv.Key, kv.Value)
|
|
if len(kv.Value) == 0 {
|
|
return nil, invalidPropValue
|
|
}
|
|
|
|
rgs := strings.Split(kv.Value, ",")
|
|
if len(rgs) == 0 {
|
|
return nil, invalidPropValue
|
|
}
|
|
|
|
return lo.Map(rgs, func(rg string, _ int) string { return strings.TrimSpace(rg) }), nil
|
|
}
|
|
}
|
|
|
|
return nil, fmt.Errorf("collection property not found: %s", CollectionReplicaNumber)
|
|
}
|
|
|
|
// GetCollectionLoadFields returns the load field ids according to the type params.
|
|
func GetCollectionLoadFields(schema *schemapb.CollectionSchema, skipDynamicField bool) []int64 {
|
|
fields := lo.FilterMap(schema.GetFields(), func(field *schemapb.FieldSchema, _ int) (int64, bool) {
|
|
// skip system field
|
|
if IsSystemField(field.GetFieldID()) {
|
|
return field.GetFieldID(), false
|
|
}
|
|
// skip dynamic field if specified
|
|
if field.IsDynamic && skipDynamicField {
|
|
return field.GetFieldID(), false
|
|
}
|
|
|
|
v, err := ShouldFieldBeLoaded(field.GetTypeParams())
|
|
if err != nil {
|
|
log.Warn("type param parse skip load failed", zap.Error(err))
|
|
// if configuration cannot be parsed, ignore it and load field
|
|
return field.GetFieldID(), true
|
|
}
|
|
return field.GetFieldID(), v
|
|
})
|
|
// empty fields list means all fields will be loaded
|
|
if len(fields) == len(schema.GetFields())-int(SystemFieldsNum) {
|
|
return []int64{}
|
|
}
|
|
return fields
|
|
}
|
|
|
|
func ShouldFieldBeLoaded(kvs []*commonpb.KeyValuePair) (bool, error) {
|
|
for _, kv := range kvs {
|
|
if kv.GetKey() == FieldSkipLoadKey {
|
|
val, err := strconv.ParseBool(kv.GetValue())
|
|
return !val, err
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
func IsReplicateEnabled(kvs []*commonpb.KeyValuePair) (bool, bool) {
|
|
replicateID, ok := GetReplicateID(kvs)
|
|
return replicateID != "", ok
|
|
}
|
|
|
|
func GetReplicateID(kvs []*commonpb.KeyValuePair) (string, bool) {
|
|
for _, kv := range kvs {
|
|
if kv.GetKey() == ReplicateIDKey {
|
|
return kv.GetValue(), true
|
|
}
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
func GetReplicateEndTS(kvs []*commonpb.KeyValuePair) (uint64, bool) {
|
|
for _, kv := range kvs {
|
|
if kv.GetKey() == ReplicateEndTSKey {
|
|
ts, err := strconv.ParseUint(kv.GetValue(), 10, 64)
|
|
if err != nil {
|
|
log.Warn("parse replicate end ts failed", zap.Error(err), zap.Stack("stack"))
|
|
return 0, false
|
|
}
|
|
return ts, true
|
|
}
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
func ValidateAutoIndexMmapConfig(autoIndexConfigEnable, isVectorField bool, indexParams map[string]string) error {
|
|
if !autoIndexConfigEnable {
|
|
return nil
|
|
}
|
|
|
|
_, ok := indexParams[MmapEnabledKey]
|
|
if ok && isVectorField {
|
|
return fmt.Errorf("mmap index is not supported to config for the collection in auto index mode")
|
|
}
|
|
return nil
|
|
}
|