mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
after the pr merged, we can support to insert, upsert, build index, query, search in the added field. can only do the above operates in added field after add field request complete, which is a sync operate. compact will be supported in the next pr. #39718 --------- Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>
2356 lines
78 KiB
Go
2356 lines
78 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package proxy
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"reflect"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"github.com/samber/lo"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/crypto/bcrypt"
|
|
"google.golang.org/grpc/metadata"
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/json"
|
|
"github.com/milvus-io/milvus/internal/parser/planparserv2"
|
|
"github.com/milvus-io/milvus/internal/types"
|
|
"github.com/milvus-io/milvus/internal/util/ctokenizer"
|
|
"github.com/milvus-io/milvus/internal/util/function"
|
|
"github.com/milvus-io/milvus/internal/util/hookutil"
|
|
"github.com/milvus-io/milvus/internal/util/indexparamcheck"
|
|
typeutil2 "github.com/milvus-io/milvus/internal/util/typeutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/mq/msgstream"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/commonpbutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/contextutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/crypto"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/metric"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/tsoutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
const (
|
|
strongTS = 0
|
|
boundedTS = 2
|
|
|
|
// enableMultipleVectorFields indicates whether to enable multiple vector fields.
|
|
enableMultipleVectorFields = true
|
|
|
|
defaultMaxArrayCapacity = 4096
|
|
|
|
defaultMaxSearchRequest = 1024
|
|
|
|
// DefaultArithmeticIndexType name of default index type for scalar field
|
|
DefaultArithmeticIndexType = indexparamcheck.IndexINVERTED
|
|
|
|
// DefaultStringIndexType name of default index type for varChar/string field
|
|
DefaultStringIndexType = indexparamcheck.IndexINVERTED
|
|
|
|
defaultRRFParamsValue = 60
|
|
maxRRFParamsValue = 16384
|
|
)
|
|
|
|
var logger = log.L().WithOptions(zap.Fields(zap.String("role", typeutil.ProxyRole)))
|
|
|
|
// isAlpha check if c is alpha.
|
|
func isAlpha(c uint8) bool {
|
|
if (c < 'A' || c > 'Z') && (c < 'a' || c > 'z') {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// isNumber check if c is a number.
|
|
func isNumber(c uint8) bool {
|
|
if c < '0' || c > '9' {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func validateMaxQueryResultWindow(offset int64, limit int64) error {
|
|
if offset < 0 {
|
|
return fmt.Errorf("%s [%d] is invalid, should be gte than 0", OffsetKey, offset)
|
|
}
|
|
if limit <= 0 {
|
|
return fmt.Errorf("%s [%d] is invalid, should be greater than 0", LimitKey, limit)
|
|
}
|
|
|
|
depth := offset + limit
|
|
maxQueryResultWindow := Params.QuotaConfig.MaxQueryResultWindow.GetAsInt64()
|
|
if depth <= 0 || depth > maxQueryResultWindow {
|
|
return fmt.Errorf("(offset+limit) should be in range [1, %d], but got %d", maxQueryResultWindow, depth)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateLimit(limit int64) error {
|
|
topKLimit := Params.QuotaConfig.TopKLimit.GetAsInt64()
|
|
if limit <= 0 || limit > topKLimit {
|
|
return fmt.Errorf("it should be in range [1, %d], but got %d", topKLimit, limit)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateNQLimit(limit int64) error {
|
|
nqLimit := Params.QuotaConfig.NQLimit.GetAsInt64()
|
|
if limit <= 0 || limit > nqLimit {
|
|
return fmt.Errorf("nq (number of search vector per search request) should be in range [1, %d], but got %d", nqLimit, limit)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateCollectionNameOrAlias(entity, entityType string) error {
|
|
entity = strings.TrimSpace(entity)
|
|
|
|
if entity == "" {
|
|
return merr.WrapErrParameterInvalidMsg("collection %s should not be empty", entityType)
|
|
}
|
|
|
|
invalidMsg := fmt.Sprintf("Invalid collection %s: %s. ", entityType, entity)
|
|
if len(entity) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
return merr.WrapErrParameterInvalidMsg("%s the length of a collection %s must be less than %s characters", invalidMsg, entityType,
|
|
Params.ProxyCfg.MaxNameLength.GetValue())
|
|
}
|
|
|
|
firstChar := entity[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) {
|
|
return merr.WrapErrParameterInvalidMsg("%s the first character of a collection %s must be an underscore or letter", invalidMsg, entityType)
|
|
}
|
|
|
|
for i := 1; i < len(entity); i++ {
|
|
c := entity[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) {
|
|
return merr.WrapErrParameterInvalidMsg("%s collection %s can only contain numbers, letters and underscores", invalidMsg, entityType)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ValidatePrivilegeGroupName(groupName string) error {
|
|
if groupName == "" {
|
|
return merr.WrapErrPrivilegeGroupNameInvalid("privilege group name should not be empty")
|
|
}
|
|
|
|
if len(groupName) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
return merr.WrapErrPrivilegeGroupNameInvalid(
|
|
"the length of a privilege group name %s must be less than %s characters", groupName, Params.ProxyCfg.MaxNameLength.GetValue())
|
|
}
|
|
|
|
firstChar := groupName[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) {
|
|
return merr.WrapErrPrivilegeGroupNameInvalid(
|
|
"the first character of a privilege group name %s must be an underscore or letter", groupName)
|
|
}
|
|
|
|
for i := 1; i < len(groupName); i++ {
|
|
c := groupName[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) {
|
|
return merr.WrapErrParameterInvalidMsg(
|
|
"privilege group name %s can only contain numbers, letters and underscores", groupName)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ValidateResourceGroupName(entity string) error {
|
|
if entity == "" {
|
|
return errors.New("resource group name couldn't be empty")
|
|
}
|
|
|
|
invalidMsg := fmt.Sprintf("Invalid resource group name %s.", entity)
|
|
if len(entity) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
return merr.WrapErrParameterInvalidMsg("%s the length of a resource group name must be less than %s characters",
|
|
invalidMsg, Params.ProxyCfg.MaxNameLength.GetValue())
|
|
}
|
|
|
|
firstChar := entity[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) {
|
|
return merr.WrapErrParameterInvalidMsg("%s the first character of a resource group name must be an underscore or letter", invalidMsg)
|
|
}
|
|
|
|
for i := 1; i < len(entity); i++ {
|
|
c := entity[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) {
|
|
return merr.WrapErrParameterInvalidMsg("%s resource group name can only contain numbers, letters and underscores", invalidMsg)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ValidateDatabaseName(dbName string) error {
|
|
if dbName == "" {
|
|
return merr.WrapErrDatabaseNameInvalid(dbName, "database name couldn't be empty")
|
|
}
|
|
|
|
if len(dbName) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
return merr.WrapErrDatabaseNameInvalid(dbName,
|
|
fmt.Sprintf("the length of a database name must be less than %d characters", Params.ProxyCfg.MaxNameLength.GetAsInt()))
|
|
}
|
|
|
|
firstChar := dbName[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) {
|
|
return merr.WrapErrDatabaseNameInvalid(dbName,
|
|
"the first character of a database name must be an underscore or letter")
|
|
}
|
|
|
|
for i := 1; i < len(dbName); i++ {
|
|
c := dbName[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) {
|
|
return merr.WrapErrDatabaseNameInvalid(dbName,
|
|
"database name can only contain numbers, letters and underscores")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ValidateCollectionAlias returns true if collAlias is a valid alias name for collection, otherwise returns false.
|
|
func ValidateCollectionAlias(collAlias string) error {
|
|
return validateCollectionNameOrAlias(collAlias, "alias")
|
|
}
|
|
|
|
func validateCollectionName(collName string) error {
|
|
return validateCollectionNameOrAlias(collName, "name")
|
|
}
|
|
|
|
func validatePartitionTag(partitionTag string, strictCheck bool) error {
|
|
partitionTag = strings.TrimSpace(partitionTag)
|
|
|
|
invalidMsg := "Invalid partition name: " + partitionTag + ". "
|
|
if partitionTag == "" {
|
|
msg := invalidMsg + "Partition name should not be empty."
|
|
return errors.New(msg)
|
|
}
|
|
if len(partitionTag) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
msg := invalidMsg + "The length of a partition name must be less than " + Params.ProxyCfg.MaxNameLength.GetValue() + " characters."
|
|
return errors.New(msg)
|
|
}
|
|
|
|
if strictCheck {
|
|
firstChar := partitionTag[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) && !isNumber(firstChar) {
|
|
msg := invalidMsg + "The first character of a partition name must be an underscore or letter."
|
|
return errors.New(msg)
|
|
}
|
|
|
|
tagSize := len(partitionTag)
|
|
for i := 1; i < tagSize; i++ {
|
|
c := partitionTag[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) && c != '-' {
|
|
msg := invalidMsg + "Partition name can only contain numbers, letters and underscores."
|
|
return errors.New(msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func validateFieldName(fieldName string) error {
|
|
fieldName = strings.TrimSpace(fieldName)
|
|
|
|
if fieldName == "" {
|
|
return merr.WrapErrFieldNameInvalid(fieldName, "field name should not be empty")
|
|
}
|
|
|
|
invalidMsg := "Invalid field name: " + fieldName + ". "
|
|
if len(fieldName) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
msg := invalidMsg + "The length of a field name must be less than " + Params.ProxyCfg.MaxNameLength.GetValue() + " characters."
|
|
return merr.WrapErrFieldNameInvalid(fieldName, msg)
|
|
}
|
|
|
|
firstChar := fieldName[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) {
|
|
msg := invalidMsg + "The first character of a field name must be an underscore or letter."
|
|
return merr.WrapErrFieldNameInvalid(fieldName, msg)
|
|
}
|
|
|
|
fieldNameSize := len(fieldName)
|
|
for i := 1; i < fieldNameSize; i++ {
|
|
c := fieldName[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) {
|
|
msg := invalidMsg + "Field name can only contain numbers, letters, and underscores."
|
|
return merr.WrapErrFieldNameInvalid(fieldName, msg)
|
|
}
|
|
}
|
|
if _, ok := common.FieldNameKeywords[fieldName]; ok {
|
|
msg := invalidMsg + fmt.Sprintf("%s is keyword in milvus.", fieldName)
|
|
return merr.WrapErrFieldNameInvalid(fieldName, msg)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateDimension(field *schemapb.FieldSchema) error {
|
|
exist := false
|
|
var dim int64
|
|
for _, param := range field.TypeParams {
|
|
if param.Key == common.DimKey {
|
|
exist = true
|
|
tmp, err := strconv.ParseInt(param.Value, 10, 64)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dim = tmp
|
|
break
|
|
}
|
|
}
|
|
if typeutil.IsSparseFloatVectorType(field.DataType) {
|
|
if exist {
|
|
return fmt.Errorf("dim should not be specified for sparse vector field %s(%d)", field.GetName(), field.FieldID)
|
|
}
|
|
return nil
|
|
}
|
|
if !exist {
|
|
return errors.Newf("dimension is not defined in field type params of field %s, check type param `dim` for vector field", field.GetName())
|
|
}
|
|
|
|
if dim <= 1 {
|
|
return fmt.Errorf("invalid dimension: %d. should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt())
|
|
}
|
|
|
|
if typeutil.IsFloatVectorType(field.DataType) {
|
|
if dim > Params.ProxyCfg.MaxDimension.GetAsInt64() {
|
|
return fmt.Errorf("invalid dimension: %d of field %s. float vector dimension should be in range 2 ~ %d", dim, field.GetName(), Params.ProxyCfg.MaxDimension.GetAsInt())
|
|
}
|
|
} else {
|
|
if dim%8 != 0 {
|
|
return fmt.Errorf("invalid dimension: %d of field %s. binary vector dimension should be multiple of 8. ", dim, field.GetName())
|
|
}
|
|
if dim > Params.ProxyCfg.MaxDimension.GetAsInt64()*8 {
|
|
return fmt.Errorf("invalid dimension: %d of field %s. binary vector dimension should be in range 2 ~ %d", dim, field.GetName(), Params.ProxyCfg.MaxDimension.GetAsInt()*8)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateMaxLengthPerRow(collectionName string, field *schemapb.FieldSchema) error {
|
|
exist := false
|
|
for _, param := range field.TypeParams {
|
|
if param.Key != common.MaxLengthKey {
|
|
continue
|
|
}
|
|
|
|
maxLengthPerRow, err := strconv.ParseInt(param.Value, 10, 64)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var defaultMaxLength int64
|
|
if field.DataType == schemapb.DataType_Text {
|
|
defaultMaxLength = Params.ProxyCfg.MaxTextLength.GetAsInt64()
|
|
} else {
|
|
defaultMaxLength = Params.ProxyCfg.MaxVarCharLength.GetAsInt64()
|
|
}
|
|
|
|
if maxLengthPerRow > defaultMaxLength || maxLengthPerRow <= 0 {
|
|
return merr.WrapErrParameterInvalidMsg("the maximum length specified for the field(%s) should be in (0, %d], but got %d instead", field.GetName(), defaultMaxLength, maxLengthPerRow)
|
|
}
|
|
exist = true
|
|
}
|
|
// if not exist type params max_length, return error
|
|
if !exist {
|
|
return fmt.Errorf("type param(max_length) should be specified for the field(%s) of collection %s", field.GetName(), collectionName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func validateMaxCapacityPerRow(collectionName string, field *schemapb.FieldSchema) error {
|
|
exist := false
|
|
for _, param := range field.TypeParams {
|
|
if param.Key != common.MaxCapacityKey {
|
|
continue
|
|
}
|
|
|
|
maxCapacityPerRow, err := strconv.ParseInt(param.Value, 10, 64)
|
|
if err != nil {
|
|
return fmt.Errorf("the value for %s of field %s must be an integer", common.MaxCapacityKey, field.GetName())
|
|
}
|
|
if maxCapacityPerRow > defaultMaxArrayCapacity || maxCapacityPerRow <= 0 {
|
|
return fmt.Errorf("the maximum capacity specified for a Array should be in (0, 4096]")
|
|
}
|
|
exist = true
|
|
}
|
|
// if not exist type params max_length, return error
|
|
if !exist {
|
|
return fmt.Errorf("type param(max_capacity) should be specified for array field %s of collection %s", field.GetName(), collectionName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func validateVectorFieldMetricType(field *schemapb.FieldSchema) error {
|
|
if !typeutil.IsVectorType(field.DataType) {
|
|
return nil
|
|
}
|
|
for _, params := range field.IndexParams {
|
|
if params.Key == common.MetricTypeKey {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf(`index param "metric_type" is not specified for index float vector %s`, field.GetName())
|
|
}
|
|
|
|
func validateDuplicatedFieldName(fields []*schemapb.FieldSchema) error {
|
|
names := make(map[string]bool)
|
|
for _, field := range fields {
|
|
_, ok := names[field.Name]
|
|
if ok {
|
|
return errors.Newf("duplicated field name %s found", field.GetName())
|
|
}
|
|
names[field.Name] = true
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateElementType(dataType schemapb.DataType) error {
|
|
switch dataType {
|
|
case schemapb.DataType_Bool, schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32,
|
|
schemapb.DataType_Int64, schemapb.DataType_Float, schemapb.DataType_Double, schemapb.DataType_VarChar:
|
|
return nil
|
|
case schemapb.DataType_String:
|
|
return errors.New("string data type not supported yet, please use VarChar type instead")
|
|
case schemapb.DataType_None:
|
|
return errors.New("element data type None is not valid")
|
|
}
|
|
return fmt.Errorf("element type %s is not supported", dataType.String())
|
|
}
|
|
|
|
func validateFieldType(schema *schemapb.CollectionSchema) error {
|
|
for _, field := range schema.GetFields() {
|
|
switch field.GetDataType() {
|
|
case schemapb.DataType_String:
|
|
return errors.New("string data type not supported yet, please use VarChar type instead")
|
|
case schemapb.DataType_None:
|
|
return errors.New("data type None is not valid")
|
|
case schemapb.DataType_Array:
|
|
if err := validateElementType(field.GetElementType()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ValidateFieldAutoID call after validatePrimaryKey
|
|
func ValidateFieldAutoID(coll *schemapb.CollectionSchema) error {
|
|
idx := -1
|
|
for i, field := range coll.Fields {
|
|
if field.AutoID {
|
|
if idx != -1 {
|
|
return fmt.Errorf("only one field can speficy AutoID with true, field name = %s, %s", coll.Fields[idx].Name, field.Name)
|
|
}
|
|
idx = i
|
|
if !field.IsPrimaryKey {
|
|
return fmt.Errorf("only primary field can speficy AutoID with true, field name = %s", field.Name)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ValidateField(field *schemapb.FieldSchema, schema *schemapb.CollectionSchema) error {
|
|
// validate field name
|
|
var err error
|
|
if err := validateFieldName(field.Name); err != nil {
|
|
return err
|
|
}
|
|
// validate dense vector field type parameters
|
|
isVectorType := typeutil.IsVectorType(field.DataType)
|
|
if isVectorType {
|
|
err = validateDimension(field)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// valid max length per row parameters
|
|
// if max_length not specified, return error
|
|
if field.DataType == schemapb.DataType_VarChar ||
|
|
(field.GetDataType() == schemapb.DataType_Array && field.GetElementType() == schemapb.DataType_VarChar) {
|
|
err = validateMaxLengthPerRow(schema.Name, field)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// valid max capacity for array per row parameters
|
|
// if max_capacity not specified, return error
|
|
if field.DataType == schemapb.DataType_Array {
|
|
if err = validateMaxCapacityPerRow(schema.Name, field); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// TODO should remove the index params in the field schema
|
|
indexParams := funcutil.KeyValuePair2Map(field.GetIndexParams())
|
|
if err = ValidateAutoIndexMmapConfig(isVectorType, indexParams); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := ctokenizer.ValidateTextSchema(field, wasBm25FunctionInputField(schema, field)); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validatePrimaryKey(coll *schemapb.CollectionSchema) error {
|
|
idx := -1
|
|
for i, field := range coll.Fields {
|
|
if field.IsPrimaryKey {
|
|
if idx != -1 {
|
|
return fmt.Errorf("there are more than one primary key, field name = %s, %s", coll.Fields[idx].Name, field.Name)
|
|
}
|
|
|
|
// The type of the primary key field can only be int64 and varchar
|
|
if field.DataType != schemapb.DataType_Int64 && field.DataType != schemapb.DataType_VarChar {
|
|
return errors.New("the data type of primary key should be Int64 or VarChar")
|
|
}
|
|
|
|
// varchar field do not support autoID
|
|
// If autoID is required, it is recommended to use int64 field as the primary key
|
|
//if field.DataType == schemapb.DataType_VarChar {
|
|
// if field.AutoID {
|
|
// return fmt.Errorf("autoID is not supported when the VarChar field is the primary key")
|
|
// }
|
|
//}
|
|
|
|
idx = i
|
|
}
|
|
}
|
|
if idx == -1 {
|
|
return errors.New("primary key is not specified")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateDynamicField(coll *schemapb.CollectionSchema) error {
|
|
for _, field := range coll.Fields {
|
|
if field.IsDynamic {
|
|
return fmt.Errorf("cannot explicitly set a field as a dynamic field")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// RepeatedKeyValToMap transfer the kv pairs to map.
|
|
func RepeatedKeyValToMap(kvPairs []*commonpb.KeyValuePair) (map[string]string, error) {
|
|
resMap := make(map[string]string)
|
|
for _, kv := range kvPairs {
|
|
_, ok := resMap[kv.Key]
|
|
if ok {
|
|
return nil, fmt.Errorf("duplicated param key: %s", kv.Key)
|
|
}
|
|
resMap[kv.Key] = kv.Value
|
|
}
|
|
return resMap, nil
|
|
}
|
|
|
|
// isVector check if dataType belongs to vector type.
|
|
func isVector(dataType schemapb.DataType) (bool, error) {
|
|
switch dataType {
|
|
case schemapb.DataType_Bool, schemapb.DataType_Int8,
|
|
schemapb.DataType_Int16, schemapb.DataType_Int32,
|
|
schemapb.DataType_Int64,
|
|
schemapb.DataType_Float, schemapb.DataType_Double:
|
|
return false, nil
|
|
|
|
case schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector:
|
|
return true, nil
|
|
}
|
|
|
|
return false, fmt.Errorf("invalid data type: %d", dataType)
|
|
}
|
|
|
|
func validateMetricType(dataType schemapb.DataType, metricTypeStrRaw string) error {
|
|
metricTypeStr := strings.ToUpper(metricTypeStrRaw)
|
|
switch metricTypeStr {
|
|
case metric.L2, metric.IP, metric.COSINE:
|
|
if typeutil.IsFloatVectorType(dataType) {
|
|
return nil
|
|
}
|
|
case metric.JACCARD, metric.HAMMING, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE:
|
|
if dataType == schemapb.DataType_BinaryVector {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("data_type %s mismatch with metric_type %s", dataType.String(), metricTypeStrRaw)
|
|
}
|
|
|
|
func validateSchema(coll *schemapb.CollectionSchema) error {
|
|
autoID := coll.AutoID
|
|
primaryIdx := -1
|
|
idMap := make(map[int64]int) // fieldId -> idx
|
|
nameMap := make(map[string]int) // name -> idx
|
|
for idx, field := range coll.Fields {
|
|
// check system field
|
|
if field.FieldID < 100 {
|
|
// System Fields, not injected yet
|
|
return fmt.Errorf("fieldID(%d) that is less than 100 is reserved for system fields: %s", field.FieldID, field.Name)
|
|
}
|
|
|
|
// primary key detector
|
|
if field.IsPrimaryKey {
|
|
if autoID {
|
|
return fmt.Errorf("autoId forbids primary key")
|
|
} else if primaryIdx != -1 {
|
|
return fmt.Errorf("there are more than one primary key, field name = %s, %s", coll.Fields[primaryIdx].Name, field.Name)
|
|
}
|
|
if field.DataType != schemapb.DataType_Int64 {
|
|
return fmt.Errorf("type of primary key should be int64")
|
|
}
|
|
primaryIdx = idx
|
|
}
|
|
// check unique
|
|
elemIdx, ok := idMap[field.FieldID]
|
|
if ok {
|
|
return fmt.Errorf("duplicate field ids: %d", coll.Fields[elemIdx].FieldID)
|
|
}
|
|
idMap[field.FieldID] = idx
|
|
elemIdx, ok = nameMap[field.Name]
|
|
if ok {
|
|
return fmt.Errorf("duplicate field names: %s", coll.Fields[elemIdx].Name)
|
|
}
|
|
nameMap[field.Name] = idx
|
|
|
|
isVec, err3 := isVector(field.DataType)
|
|
if err3 != nil {
|
|
return err3
|
|
}
|
|
|
|
if isVec {
|
|
indexKv, err1 := RepeatedKeyValToMap(field.IndexParams)
|
|
if err1 != nil {
|
|
return err1
|
|
}
|
|
typeKv, err2 := RepeatedKeyValToMap(field.TypeParams)
|
|
if err2 != nil {
|
|
return err2
|
|
}
|
|
if !typeutil.IsSparseFloatVectorType(field.DataType) {
|
|
dimStr, ok := typeKv[common.DimKey]
|
|
if !ok {
|
|
return fmt.Errorf("dim not found in type_params for vector field %s(%d)", field.Name, field.FieldID)
|
|
}
|
|
dim, err := strconv.Atoi(dimStr)
|
|
if err != nil || dim < 0 {
|
|
return fmt.Errorf("invalid dim; %s", dimStr)
|
|
}
|
|
}
|
|
|
|
metricTypeStr, ok := indexKv[common.MetricTypeKey]
|
|
if ok {
|
|
err4 := validateMetricType(field.DataType, metricTypeStr)
|
|
if err4 != nil {
|
|
return err4
|
|
}
|
|
}
|
|
// in C++, default type will be specified
|
|
// do nothing
|
|
} else {
|
|
if len(field.IndexParams) != 0 {
|
|
return fmt.Errorf("index params is not empty for scalar field: %s(%d)", field.Name, field.FieldID)
|
|
}
|
|
if len(field.TypeParams) != 0 {
|
|
return fmt.Errorf("type params is not empty for scalar field: %s(%d)", field.Name, field.FieldID)
|
|
}
|
|
}
|
|
}
|
|
|
|
if !autoID && primaryIdx == -1 {
|
|
return fmt.Errorf("primary key is required for non autoid mode")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func validateFunction(coll *schemapb.CollectionSchema) error {
|
|
nameMap := lo.SliceToMap(coll.GetFields(), func(field *schemapb.FieldSchema) (string, *schemapb.FieldSchema) {
|
|
return field.GetName(), field
|
|
})
|
|
usedOutputField := typeutil.NewSet[string]()
|
|
usedFunctionName := typeutil.NewSet[string]()
|
|
|
|
for _, function := range coll.GetFunctions() {
|
|
if err := checkFunctionBasicParams(function); err != nil {
|
|
return err
|
|
}
|
|
|
|
if usedFunctionName.Contain(function.GetName()) {
|
|
return fmt.Errorf("duplicate function name: %s", function.GetName())
|
|
}
|
|
|
|
usedFunctionName.Insert(function.GetName())
|
|
inputFields := []*schemapb.FieldSchema{}
|
|
for _, name := range function.GetInputFieldNames() {
|
|
inputField, ok := nameMap[name]
|
|
if !ok {
|
|
return fmt.Errorf("function input field not found: %s", name)
|
|
}
|
|
if inputField.GetNullable() {
|
|
return fmt.Errorf("function input field cannot be nullable: function %s, field %s", function.GetName(), inputField.GetName())
|
|
}
|
|
inputFields = append(inputFields, inputField)
|
|
}
|
|
|
|
if err := checkFunctionInputField(function, inputFields); err != nil {
|
|
return err
|
|
}
|
|
|
|
outputFields := make([]*schemapb.FieldSchema, len(function.GetOutputFieldNames()))
|
|
for i, name := range function.GetOutputFieldNames() {
|
|
outputField, ok := nameMap[name]
|
|
if !ok {
|
|
return fmt.Errorf("function output field not found: %s", name)
|
|
}
|
|
|
|
if outputField.GetIsPrimaryKey() {
|
|
return fmt.Errorf("function output field cannot be primary key: function %s, field %s", function.GetName(), outputField.GetName())
|
|
}
|
|
|
|
if outputField.GetIsPartitionKey() || outputField.GetIsClusteringKey() {
|
|
return fmt.Errorf("function output field cannot be partition key or clustering key: function %s, field %s", function.GetName(), outputField.GetName())
|
|
}
|
|
|
|
if outputField.GetNullable() {
|
|
return fmt.Errorf("function output field cannot be nullable: function %s, field %s", function.GetName(), outputField.GetName())
|
|
}
|
|
|
|
outputField.IsFunctionOutput = true
|
|
outputFields[i] = outputField
|
|
if usedOutputField.Contain(name) {
|
|
return fmt.Errorf("duplicate function output field: function %s, field %s", function.GetName(), name)
|
|
}
|
|
usedOutputField.Insert(name)
|
|
}
|
|
|
|
if err := checkFunctionOutputField(function, outputFields); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := function.ValidateFunctions(coll); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func checkFunctionOutputField(fSchema *schemapb.FunctionSchema, fields []*schemapb.FieldSchema) error {
|
|
switch fSchema.GetType() {
|
|
case schemapb.FunctionType_BM25:
|
|
if len(fields) != 1 {
|
|
return fmt.Errorf("BM25 function only need 1 output field, but got %d", len(fields))
|
|
}
|
|
|
|
if !typeutil.IsSparseFloatVectorType(fields[0].GetDataType()) {
|
|
return fmt.Errorf("BM25 function output field must be a SparseFloatVector field, but got %s", fields[0].DataType.String())
|
|
}
|
|
case schemapb.FunctionType_TextEmbedding:
|
|
if err := function.TextEmbeddingOutputsCheck(fields); err != nil {
|
|
return err
|
|
}
|
|
default:
|
|
return fmt.Errorf("check output field for unknown function type")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func wasBm25FunctionInputField(coll *schemapb.CollectionSchema, field *schemapb.FieldSchema) bool {
|
|
for _, fun := range coll.GetFunctions() {
|
|
if fun.GetType() == schemapb.FunctionType_BM25 && field.GetName() == fun.GetInputFieldNames()[0] {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func checkFunctionInputField(function *schemapb.FunctionSchema, fields []*schemapb.FieldSchema) error {
|
|
switch function.GetType() {
|
|
case schemapb.FunctionType_BM25:
|
|
if len(fields) != 1 || (fields[0].DataType != schemapb.DataType_VarChar && fields[0].DataType != schemapb.DataType_Text) {
|
|
return fmt.Errorf("BM25 function input field must be a VARCHAR/TEXT field, got %d field with type %s",
|
|
len(fields), fields[0].DataType.String())
|
|
}
|
|
h := typeutil.CreateFieldSchemaHelper(fields[0])
|
|
if !h.EnableAnalyzer() {
|
|
return fmt.Errorf("BM25 function input field must set enable_analyzer to true")
|
|
}
|
|
case schemapb.FunctionType_TextEmbedding:
|
|
if len(fields) != 1 || (fields[0].DataType != schemapb.DataType_VarChar && fields[0].DataType != schemapb.DataType_Text) {
|
|
return fmt.Errorf("TextEmbedding function input field must be a VARCHAR/TEXT field")
|
|
}
|
|
default:
|
|
return fmt.Errorf("check input field with unknown function type")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func checkFunctionBasicParams(function *schemapb.FunctionSchema) error {
|
|
if function.GetName() == "" {
|
|
return fmt.Errorf("function name cannot be empty")
|
|
}
|
|
if len(function.GetInputFieldNames()) == 0 {
|
|
return fmt.Errorf("function input field names cannot be empty, function: %s", function.GetName())
|
|
}
|
|
if len(function.GetOutputFieldNames()) == 0 {
|
|
return fmt.Errorf("function output field names cannot be empty, function: %s", function.GetName())
|
|
}
|
|
for _, input := range function.GetInputFieldNames() {
|
|
if input == "" {
|
|
return fmt.Errorf("function input field name cannot be empty string, function: %s", function.GetName())
|
|
}
|
|
// if input occurs more than once, error
|
|
if lo.Count(function.GetInputFieldNames(), input) > 1 {
|
|
return fmt.Errorf("each function input field should be used exactly once in the same function, function: %s, input field: %s", function.GetName(), input)
|
|
}
|
|
}
|
|
for _, output := range function.GetOutputFieldNames() {
|
|
if output == "" {
|
|
return fmt.Errorf("function output field name cannot be empty string, function: %s", function.GetName())
|
|
}
|
|
if lo.Count(function.GetInputFieldNames(), output) > 0 {
|
|
return fmt.Errorf("a single field cannot be both input and output in the same function, function: %s, field: %s", function.GetName(), output)
|
|
}
|
|
if lo.Count(function.GetOutputFieldNames(), output) > 1 {
|
|
return fmt.Errorf("each function output field should be used exactly once in the same function, function: %s, output field: %s", function.GetName(), output)
|
|
}
|
|
}
|
|
switch function.GetType() {
|
|
case schemapb.FunctionType_BM25:
|
|
if len(function.GetParams()) != 0 {
|
|
return fmt.Errorf("BM25 function accepts no params")
|
|
}
|
|
case schemapb.FunctionType_TextEmbedding:
|
|
if len(function.GetParams()) == 0 {
|
|
return fmt.Errorf("TextEmbedding function accepts no params")
|
|
}
|
|
default:
|
|
return fmt.Errorf("check function params with unknown function type")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateMultipleVectorFields check if schema has multiple vector fields.
|
|
func validateMultipleVectorFields(schema *schemapb.CollectionSchema) error {
|
|
vecExist := false
|
|
var vecName string
|
|
|
|
for i := range schema.Fields {
|
|
name := schema.Fields[i].Name
|
|
dType := schema.Fields[i].DataType
|
|
isVec := typeutil.IsVectorType(dType)
|
|
if isVec && vecExist && !enableMultipleVectorFields {
|
|
return fmt.Errorf(
|
|
"multiple vector fields is not supported, fields name: %s, %s",
|
|
vecName,
|
|
name,
|
|
)
|
|
} else if isVec {
|
|
vecExist = true
|
|
vecName = name
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func validateLoadFieldsList(schema *schemapb.CollectionSchema) error {
|
|
var vectorCnt int
|
|
for _, field := range schema.Fields {
|
|
shouldLoad, err := common.ShouldFieldBeLoaded(field.GetTypeParams())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// shoud load field, skip other check
|
|
if shouldLoad {
|
|
if typeutil.IsVectorType(field.GetDataType()) {
|
|
vectorCnt++
|
|
}
|
|
continue
|
|
}
|
|
|
|
if field.IsPrimaryKey {
|
|
return merr.WrapErrParameterInvalidMsg("Primary key field %s cannot skip loading", field.GetName())
|
|
}
|
|
|
|
if field.IsPartitionKey {
|
|
return merr.WrapErrParameterInvalidMsg("Partition Key field %s cannot skip loading", field.GetName())
|
|
}
|
|
|
|
if field.IsClusteringKey {
|
|
return merr.WrapErrParameterInvalidMsg("Clustering Key field %s cannot skip loading", field.GetName())
|
|
}
|
|
}
|
|
|
|
if vectorCnt == 0 {
|
|
return merr.WrapErrParameterInvalidMsg("cannot config all vector field(s) skip loading")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// parsePrimaryFieldData2IDs get IDs to fill grpc result, for example insert request, delete request etc.
|
|
func parsePrimaryFieldData2IDs(fieldData *schemapb.FieldData) (*schemapb.IDs, error) {
|
|
primaryData := &schemapb.IDs{}
|
|
switch fieldData.Field.(type) {
|
|
case *schemapb.FieldData_Scalars:
|
|
scalarField := fieldData.GetScalars()
|
|
switch scalarField.Data.(type) {
|
|
case *schemapb.ScalarField_LongData:
|
|
primaryData.IdField = &schemapb.IDs_IntId{
|
|
IntId: scalarField.GetLongData(),
|
|
}
|
|
case *schemapb.ScalarField_StringData:
|
|
primaryData.IdField = &schemapb.IDs_StrId{
|
|
StrId: scalarField.GetStringData(),
|
|
}
|
|
default:
|
|
return nil, merr.WrapErrParameterInvalidMsg("currently only support DataType Int64 or VarChar as PrimaryField")
|
|
}
|
|
default:
|
|
return nil, merr.WrapErrParameterInvalidMsg("currently not support vector field as PrimaryField")
|
|
}
|
|
|
|
return primaryData, nil
|
|
}
|
|
|
|
// autoGenPrimaryFieldData generate primary data when autoID == true
|
|
func autoGenPrimaryFieldData(fieldSchema *schemapb.FieldSchema, data interface{}) (*schemapb.FieldData, error) {
|
|
var fieldData schemapb.FieldData
|
|
fieldData.FieldName = fieldSchema.Name
|
|
fieldData.Type = fieldSchema.DataType
|
|
switch data := data.(type) {
|
|
case []int64:
|
|
switch fieldData.Type {
|
|
case schemapb.DataType_Int64:
|
|
fieldData.Field = &schemapb.FieldData_Scalars{
|
|
Scalars: &schemapb.ScalarField{
|
|
Data: &schemapb.ScalarField_LongData{
|
|
LongData: &schemapb.LongArray{
|
|
Data: data,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
case schemapb.DataType_VarChar:
|
|
strIDs := make([]string, len(data))
|
|
for i, v := range data {
|
|
strIDs[i] = strconv.FormatInt(v, 10)
|
|
}
|
|
fieldData.Field = &schemapb.FieldData_Scalars{
|
|
Scalars: &schemapb.ScalarField{
|
|
Data: &schemapb.ScalarField_StringData{
|
|
StringData: &schemapb.StringArray{
|
|
Data: strIDs,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
default:
|
|
return nil, errors.New("currently only support autoID for int64 and varchar PrimaryField")
|
|
}
|
|
default:
|
|
return nil, errors.New("currently only int64 is supported as the data source for the autoID of a PrimaryField")
|
|
}
|
|
|
|
return &fieldData, nil
|
|
}
|
|
|
|
func autoGenDynamicFieldData(data [][]byte) *schemapb.FieldData {
|
|
return &schemapb.FieldData{
|
|
FieldName: common.MetaFieldName,
|
|
Type: schemapb.DataType_JSON,
|
|
Field: &schemapb.FieldData_Scalars{
|
|
Scalars: &schemapb.ScalarField{
|
|
Data: &schemapb.ScalarField_JsonData{
|
|
JsonData: &schemapb.JSONArray{
|
|
Data: data,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
IsDynamic: true,
|
|
}
|
|
}
|
|
|
|
// fillFieldPropertiesBySchema set fieldID to fieldData according FieldSchemas
|
|
func fillFieldPropertiesBySchema(columns []*schemapb.FieldData, schema *schemapb.CollectionSchema) error {
|
|
fieldName2Schema := make(map[string]*schemapb.FieldSchema)
|
|
|
|
expectColumnNum := 0
|
|
for _, field := range schema.GetFields() {
|
|
fieldName2Schema[field.Name] = field
|
|
if !IsBM25FunctionOutputField(field, schema) {
|
|
expectColumnNum++
|
|
}
|
|
}
|
|
|
|
if len(columns) != expectColumnNum {
|
|
return fmt.Errorf("len(columns) mismatch the expectColumnNum, expectColumnNum: %d, len(columns): %d",
|
|
expectColumnNum, len(columns))
|
|
}
|
|
|
|
for _, fieldData := range columns {
|
|
if fieldSchema, ok := fieldName2Schema[fieldData.FieldName]; ok {
|
|
fieldData.FieldId = fieldSchema.FieldID
|
|
fieldData.Type = fieldSchema.DataType
|
|
|
|
// Set the ElementType because it may not be set in the insert request.
|
|
if fieldData.Type == schemapb.DataType_Array {
|
|
fd, ok := fieldData.Field.(*schemapb.FieldData_Scalars)
|
|
if !ok {
|
|
return fmt.Errorf("field convert FieldData_Scalars fail in fieldData, fieldName: %s,"+
|
|
" collectionName:%s", fieldData.FieldName, schema.Name)
|
|
}
|
|
fd.Scalars.GetArrayData().ElementType = fieldSchema.ElementType
|
|
}
|
|
} else {
|
|
return fmt.Errorf("fieldName %v not exist in collection schema", fieldData.FieldName)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func ValidateUsername(username string) error {
|
|
username = strings.TrimSpace(username)
|
|
|
|
if username == "" {
|
|
return merr.WrapErrParameterInvalidMsg("username must be not empty")
|
|
}
|
|
|
|
if len(username) > Params.ProxyCfg.MaxUsernameLength.GetAsInt() {
|
|
return merr.WrapErrParameterInvalidMsg("invalid username %s with length %d, the length of username must be less than %d", username, len(username), Params.ProxyCfg.MaxUsernameLength.GetValue())
|
|
}
|
|
|
|
firstChar := username[0]
|
|
if !isAlpha(firstChar) {
|
|
return merr.WrapErrParameterInvalidMsg("invalid user name %s, the first character must be a letter, but got %s", username, string(firstChar))
|
|
}
|
|
|
|
usernameSize := len(username)
|
|
for i := 1; i < usernameSize; i++ {
|
|
c := username[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) {
|
|
return merr.WrapErrParameterInvalidMsg("invalid user name %s, username must contain only numbers, letters and underscores, but got %s", username, c)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ValidatePassword(password string) error {
|
|
if len(password) < Params.ProxyCfg.MinPasswordLength.GetAsInt() || len(password) > Params.ProxyCfg.MaxPasswordLength.GetAsInt() {
|
|
return merr.WrapErrParameterInvalidRange(Params.ProxyCfg.MinPasswordLength.GetAsInt(),
|
|
Params.ProxyCfg.MaxPasswordLength.GetAsInt(),
|
|
len(password), "invalid password length")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ReplaceID2Name(oldStr string, id int64, name string) string {
|
|
return strings.ReplaceAll(oldStr, strconv.FormatInt(id, 10), name)
|
|
}
|
|
|
|
func parseGuaranteeTsFromConsistency(ts, tMax typeutil.Timestamp, consistency commonpb.ConsistencyLevel) typeutil.Timestamp {
|
|
switch consistency {
|
|
case commonpb.ConsistencyLevel_Strong:
|
|
ts = tMax
|
|
case commonpb.ConsistencyLevel_Bounded:
|
|
ratio := Params.CommonCfg.GracefulTime.GetAsDuration(time.Millisecond)
|
|
ts = tsoutil.AddPhysicalDurationOnTs(tMax, -ratio)
|
|
case commonpb.ConsistencyLevel_Eventually:
|
|
ts = 1
|
|
}
|
|
return ts
|
|
}
|
|
|
|
func parseGuaranteeTs(ts, tMax typeutil.Timestamp) typeutil.Timestamp {
|
|
switch ts {
|
|
case strongTS:
|
|
ts = tMax
|
|
case boundedTS:
|
|
ratio := Params.CommonCfg.GracefulTime.GetAsDuration(time.Millisecond)
|
|
ts = tsoutil.AddPhysicalDurationOnTs(tMax, -ratio)
|
|
}
|
|
return ts
|
|
}
|
|
|
|
func getMaxMvccTsFromChannels(channelsTs map[string]uint64, beginTs typeutil.Timestamp) typeutil.Timestamp {
|
|
maxTs := typeutil.Timestamp(0)
|
|
for _, ts := range channelsTs {
|
|
if ts > maxTs {
|
|
maxTs = ts
|
|
}
|
|
}
|
|
|
|
if maxTs == 0 {
|
|
log.Warn("no channel ts found, use beginTs instead")
|
|
return beginTs
|
|
}
|
|
|
|
return maxTs
|
|
}
|
|
|
|
func validateName(entity string, nameType string) error {
|
|
entity = strings.TrimSpace(entity)
|
|
|
|
if entity == "" {
|
|
return merr.WrapErrParameterInvalid("not empty", entity, nameType+" should be not empty")
|
|
}
|
|
|
|
if len(entity) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
return merr.WrapErrParameterInvalidRange(0,
|
|
Params.ProxyCfg.MaxNameLength.GetAsInt(),
|
|
len(entity),
|
|
fmt.Sprintf("the length of %s must be not greater than limit", nameType))
|
|
}
|
|
|
|
firstChar := entity[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) {
|
|
return merr.WrapErrParameterInvalid('_',
|
|
firstChar,
|
|
fmt.Sprintf("the first character of %s must be an underscore or letter", nameType))
|
|
}
|
|
|
|
for i := 1; i < len(entity); i++ {
|
|
c := entity[i]
|
|
if c != '_' && c != '$' && !isAlpha(c) && !isNumber(c) {
|
|
return merr.WrapErrParameterInvalidMsg("%s can only contain numbers, letters, dollars and underscores, found %c at %d", nameType, c, i)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ValidateRoleName(entity string) error {
|
|
return validateName(entity, "role name")
|
|
}
|
|
|
|
func IsDefaultRole(roleName string) bool {
|
|
for _, defaultRole := range util.DefaultRoles {
|
|
if defaultRole == roleName {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func ValidateObjectName(entity string) error {
|
|
if util.IsAnyWord(entity) {
|
|
return nil
|
|
}
|
|
return validateName(entity, "object name")
|
|
}
|
|
|
|
func ValidateCollectionName(entity string) error {
|
|
if util.IsAnyWord(entity) {
|
|
return nil
|
|
}
|
|
return validateName(entity, "collection name")
|
|
}
|
|
|
|
func ValidateObjectType(entity string) error {
|
|
return validateName(entity, "ObjectType")
|
|
}
|
|
|
|
func ValidatePrivilege(entity string) error {
|
|
if util.IsAnyWord(entity) {
|
|
return nil
|
|
}
|
|
return validateName(entity, "Privilege")
|
|
}
|
|
|
|
func GetCurUserFromContext(ctx context.Context) (string, error) {
|
|
return contextutil.GetCurUserFromContext(ctx)
|
|
}
|
|
|
|
func GetCurUserFromContextOrDefault(ctx context.Context) string {
|
|
username, _ := GetCurUserFromContext(ctx)
|
|
return username
|
|
}
|
|
|
|
func GetCurDBNameFromContextOrDefault(ctx context.Context) string {
|
|
md, ok := metadata.FromIncomingContext(ctx)
|
|
if !ok {
|
|
return util.DefaultDBName
|
|
}
|
|
dbNameData := md[strings.ToLower(util.HeaderDBName)]
|
|
if len(dbNameData) < 1 || dbNameData[0] == "" {
|
|
return util.DefaultDBName
|
|
}
|
|
return dbNameData[0]
|
|
}
|
|
|
|
func NewContextWithMetadata(ctx context.Context, username string, dbName string) context.Context {
|
|
dbKey := strings.ToLower(util.HeaderDBName)
|
|
if dbName != "" {
|
|
ctx = contextutil.AppendToIncomingContext(ctx, dbKey, dbName)
|
|
}
|
|
if username != "" {
|
|
originValue := fmt.Sprintf("%s%s%s", username, util.CredentialSeperator, username)
|
|
authKey := strings.ToLower(util.HeaderAuthorize)
|
|
authValue := crypto.Base64Encode(originValue)
|
|
ctx = contextutil.AppendToIncomingContext(ctx, authKey, authValue)
|
|
}
|
|
return ctx
|
|
}
|
|
|
|
func AppendUserInfoForRPC(ctx context.Context) context.Context {
|
|
curUser, _ := GetCurUserFromContext(ctx)
|
|
if curUser != "" {
|
|
originValue := fmt.Sprintf("%s%s%s", curUser, util.CredentialSeperator, curUser)
|
|
authKey := strings.ToLower(util.HeaderAuthorize)
|
|
authValue := crypto.Base64Encode(originValue)
|
|
ctx = metadata.AppendToOutgoingContext(ctx, authKey, authValue)
|
|
}
|
|
return ctx
|
|
}
|
|
|
|
func GetRole(username string) ([]string, error) {
|
|
if globalMetaCache == nil {
|
|
return []string{}, merr.WrapErrServiceUnavailable("internal: Milvus Proxy is not ready yet. please wait")
|
|
}
|
|
return globalMetaCache.GetUserRole(username), nil
|
|
}
|
|
|
|
func PasswordVerify(ctx context.Context, username, rawPwd string) bool {
|
|
return passwordVerify(ctx, username, rawPwd, globalMetaCache)
|
|
}
|
|
|
|
func VerifyAPIKey(rawToken string) (string, error) {
|
|
hoo := hookutil.GetHook()
|
|
user, err := hoo.VerifyAPIKey(rawToken)
|
|
if err != nil {
|
|
log.Warn("fail to verify apikey", zap.String("api_key", rawToken), zap.Error(err))
|
|
return "", merr.WrapErrParameterInvalidMsg("invalid apikey: [%s]", rawToken)
|
|
}
|
|
return user, nil
|
|
}
|
|
|
|
// PasswordVerify verify password
|
|
func passwordVerify(ctx context.Context, username, rawPwd string, globalMetaCache Cache) bool {
|
|
// it represents the cache miss if Sha256Password is empty within credInfo, which shall be updated first connection.
|
|
// meanwhile, generating Sha256Password depends on raw password and encrypted password will not cache.
|
|
credInfo, err := globalMetaCache.GetCredentialInfo(ctx, username)
|
|
if err != nil {
|
|
log.Ctx(ctx).Error("found no credential", zap.String("username", username), zap.Error(err))
|
|
return false
|
|
}
|
|
|
|
// hit cache
|
|
sha256Pwd := crypto.SHA256(rawPwd, credInfo.Username)
|
|
if credInfo.Sha256Password != "" {
|
|
return sha256Pwd == credInfo.Sha256Password
|
|
}
|
|
|
|
// miss cache, verify against encrypted password from etcd
|
|
if err := bcrypt.CompareHashAndPassword([]byte(credInfo.EncryptedPassword), []byte(rawPwd)); err != nil {
|
|
log.Ctx(ctx).Error("Verify password failed", zap.Error(err))
|
|
return false
|
|
}
|
|
|
|
// update cache after miss cache
|
|
credInfo.Sha256Password = sha256Pwd
|
|
log.Ctx(ctx).Debug("get credential miss cache, update cache with", zap.Any("credential", credInfo))
|
|
globalMetaCache.UpdateCredential(credInfo)
|
|
return true
|
|
}
|
|
|
|
func translatePkOutputFields(schema *schemapb.CollectionSchema) ([]string, []int64) {
|
|
pkNames := []string{}
|
|
fieldIDs := []int64{}
|
|
for _, field := range schema.Fields {
|
|
if field.IsPrimaryKey {
|
|
pkNames = append(pkNames, field.GetName())
|
|
fieldIDs = append(fieldIDs, field.GetFieldID())
|
|
}
|
|
}
|
|
return pkNames, fieldIDs
|
|
}
|
|
|
|
func recallCal[T string | int64](results []T, gts []T) float32 {
|
|
hit := 0
|
|
total := 0
|
|
for _, r := range results {
|
|
total++
|
|
for _, gt := range gts {
|
|
if r == gt {
|
|
hit++
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return float32(hit) / float32(total)
|
|
}
|
|
|
|
func computeRecall(results *schemapb.SearchResultData, gts *schemapb.SearchResultData) error {
|
|
if results.GetNumQueries() != gts.GetNumQueries() {
|
|
return fmt.Errorf("num of queries is inconsistent between search results(%d) and ground truth(%d)", results.GetNumQueries(), gts.GetNumQueries())
|
|
}
|
|
|
|
switch results.GetIds().GetIdField().(type) {
|
|
case *schemapb.IDs_IntId:
|
|
switch gts.GetIds().GetIdField().(type) {
|
|
case *schemapb.IDs_IntId:
|
|
currentResultIndex := int64(0)
|
|
currentGTIndex := int64(0)
|
|
recalls := make([]float32, 0, results.GetNumQueries())
|
|
for i := 0; i < int(results.GetNumQueries()); i++ {
|
|
currentResultTopk := results.GetTopks()[i]
|
|
currentGTTopk := gts.GetTopks()[i]
|
|
recalls = append(recalls, recallCal(results.GetIds().GetIntId().GetData()[currentResultIndex:currentResultIndex+currentResultTopk],
|
|
gts.GetIds().GetIntId().GetData()[currentGTIndex:currentGTIndex+currentGTTopk]))
|
|
currentResultIndex += currentResultTopk
|
|
currentGTIndex += currentGTTopk
|
|
}
|
|
results.Recalls = recalls
|
|
return nil
|
|
case *schemapb.IDs_StrId:
|
|
return fmt.Errorf("pk type is inconsistent between search results(int64) and ground truth(string)")
|
|
default:
|
|
return fmt.Errorf("unsupported pk type")
|
|
}
|
|
|
|
case *schemapb.IDs_StrId:
|
|
switch gts.GetIds().GetIdField().(type) {
|
|
case *schemapb.IDs_StrId:
|
|
currentResultIndex := int64(0)
|
|
currentGTIndex := int64(0)
|
|
recalls := make([]float32, 0, results.GetNumQueries())
|
|
for i := 0; i < int(results.GetNumQueries()); i++ {
|
|
currentResultTopk := results.GetTopks()[i]
|
|
currentGTTopk := gts.GetTopks()[i]
|
|
recalls = append(recalls, recallCal(results.GetIds().GetStrId().GetData()[currentResultIndex:currentResultIndex+currentResultTopk],
|
|
gts.GetIds().GetStrId().GetData()[currentGTIndex:currentGTIndex+currentGTTopk]))
|
|
currentResultIndex += currentResultTopk
|
|
currentGTIndex += currentGTTopk
|
|
}
|
|
results.Recalls = recalls
|
|
return nil
|
|
case *schemapb.IDs_IntId:
|
|
return fmt.Errorf("pk type is inconsistent between search results(string) and ground truth(int64)")
|
|
default:
|
|
return fmt.Errorf("unsupported pk type")
|
|
}
|
|
default:
|
|
return fmt.Errorf("unsupported pk type")
|
|
}
|
|
}
|
|
|
|
// Support wildcard in output fields:
|
|
//
|
|
// "*" - all fields
|
|
//
|
|
// For example, A and B are scalar fields, C and D are vector fields, duplicated fields will automatically be removed.
|
|
//
|
|
// output_fields=["*"] ==> [A,B,C,D]
|
|
// output_fields=["*",A] ==> [A,B,C,D]
|
|
// output_fields=["*",C] ==> [A,B,C,D]
|
|
//
|
|
// 4th return value is true if user requested pk field explicitly or using wildcard.
|
|
// if removePkField is true, pk field will not be include in the first(resultFieldNames)/second(userOutputFields)
|
|
// return value.
|
|
func translateOutputFields(outputFields []string, schema *schemaInfo, removePkField bool) ([]string, []string, []string, bool, error) {
|
|
var primaryFieldName string
|
|
var dynamicField *schemapb.FieldSchema
|
|
allFieldNameMap := make(map[string]*schemapb.FieldSchema)
|
|
resultFieldNameMap := make(map[string]bool)
|
|
resultFieldNames := make([]string, 0)
|
|
userOutputFieldsMap := make(map[string]bool)
|
|
userOutputFields := make([]string, 0)
|
|
userDynamicFieldsMap := make(map[string]bool)
|
|
userDynamicFields := make([]string, 0)
|
|
useAllDyncamicFields := false
|
|
for _, field := range schema.Fields {
|
|
if field.IsPrimaryKey {
|
|
primaryFieldName = field.Name
|
|
}
|
|
if field.IsDynamic {
|
|
dynamicField = field
|
|
}
|
|
allFieldNameMap[field.Name] = field
|
|
}
|
|
|
|
userRequestedPkFieldExplicitly := false
|
|
|
|
for _, outputFieldName := range outputFields {
|
|
outputFieldName = strings.TrimSpace(outputFieldName)
|
|
if outputFieldName == primaryFieldName {
|
|
userRequestedPkFieldExplicitly = true
|
|
}
|
|
if outputFieldName == "*" {
|
|
userRequestedPkFieldExplicitly = true
|
|
for fieldName, field := range allFieldNameMap {
|
|
// skip Cold field and fields that can't be output
|
|
if schema.IsFieldLoaded(field.GetFieldID()) && schema.CanRetrieveRawFieldData(field) {
|
|
resultFieldNameMap[fieldName] = true
|
|
userOutputFieldsMap[fieldName] = true
|
|
}
|
|
}
|
|
useAllDyncamicFields = true
|
|
} else {
|
|
if field, ok := allFieldNameMap[outputFieldName]; ok {
|
|
if !schema.CanRetrieveRawFieldData(field) {
|
|
return nil, nil, nil, false, fmt.Errorf("not allowed to retrieve raw data of field %s", outputFieldName)
|
|
}
|
|
if schema.IsFieldLoaded(field.GetFieldID()) {
|
|
resultFieldNameMap[outputFieldName] = true
|
|
userOutputFieldsMap[outputFieldName] = true
|
|
} else {
|
|
return nil, nil, nil, false, fmt.Errorf("field %s is not loaded", outputFieldName)
|
|
}
|
|
} else {
|
|
if schema.EnableDynamicField {
|
|
if schema.IsFieldLoaded(dynamicField.GetFieldID()) {
|
|
schemaH, err := typeutil.CreateSchemaHelper(schema.CollectionSchema)
|
|
if err != nil {
|
|
return nil, nil, nil, false, err
|
|
}
|
|
err = planparserv2.ParseIdentifier(schemaH, outputFieldName, func(expr *planpb.Expr) error {
|
|
if len(expr.GetColumnExpr().GetInfo().GetNestedPath()) == 1 &&
|
|
expr.GetColumnExpr().GetInfo().GetNestedPath()[0] == outputFieldName {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("not support getting subkeys of json field yet")
|
|
})
|
|
if err != nil {
|
|
log.Info("parse output field name failed", zap.String("field name", outputFieldName))
|
|
return nil, nil, nil, false, fmt.Errorf("parse output field name failed: %s", outputFieldName)
|
|
}
|
|
resultFieldNameMap[common.MetaFieldName] = true
|
|
userOutputFieldsMap[outputFieldName] = true
|
|
userDynamicFieldsMap[outputFieldName] = true
|
|
} else {
|
|
// TODO after cold field be able to fetched with chunk cache, this check shall be removed
|
|
return nil, nil, nil, false, fmt.Errorf("field %s cannot be returned since dynamic field not loaded", outputFieldName)
|
|
}
|
|
} else {
|
|
return nil, nil, nil, false, fmt.Errorf("field %s not exist", outputFieldName)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if removePkField {
|
|
delete(resultFieldNameMap, primaryFieldName)
|
|
delete(userOutputFieldsMap, primaryFieldName)
|
|
}
|
|
|
|
for fieldName := range resultFieldNameMap {
|
|
resultFieldNames = append(resultFieldNames, fieldName)
|
|
}
|
|
for fieldName := range userOutputFieldsMap {
|
|
userOutputFields = append(userOutputFields, fieldName)
|
|
}
|
|
if !useAllDyncamicFields {
|
|
for fieldName := range userDynamicFieldsMap {
|
|
userDynamicFields = append(userDynamicFields, fieldName)
|
|
}
|
|
}
|
|
|
|
return resultFieldNames, userOutputFields, userDynamicFields, userRequestedPkFieldExplicitly, nil
|
|
}
|
|
|
|
func validateIndexName(indexName string) error {
|
|
indexName = strings.TrimSpace(indexName)
|
|
|
|
if indexName == "" {
|
|
return nil
|
|
}
|
|
invalidMsg := "Invalid index name: " + indexName + ". "
|
|
if len(indexName) > Params.ProxyCfg.MaxNameLength.GetAsInt() {
|
|
msg := invalidMsg + "The length of a index name must be less than " + Params.ProxyCfg.MaxNameLength.GetValue() + " characters."
|
|
return errors.New(msg)
|
|
}
|
|
|
|
firstChar := indexName[0]
|
|
if firstChar != '_' && !isAlpha(firstChar) {
|
|
msg := invalidMsg + "The first character of a index name must be an underscore or letter."
|
|
return errors.New(msg)
|
|
}
|
|
|
|
indexNameSize := len(indexName)
|
|
for i := 1; i < indexNameSize; i++ {
|
|
c := indexName[i]
|
|
if c != '_' && !isAlpha(c) && !isNumber(c) {
|
|
msg := invalidMsg + "Index name can only contain numbers, letters, and underscores."
|
|
return errors.New(msg)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isCollectionLoaded(ctx context.Context, qc types.QueryCoordClient, collID int64) (bool, error) {
|
|
// get all loading collections
|
|
resp, err := qc.ShowCollections(ctx, &querypb.ShowCollectionsRequest{
|
|
CollectionIDs: nil,
|
|
})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if resp.GetStatus().GetErrorCode() != commonpb.ErrorCode_Success {
|
|
return false, merr.Error(resp.GetStatus())
|
|
}
|
|
|
|
for _, loadedCollID := range resp.GetCollectionIDs() {
|
|
if collID == loadedCollID {
|
|
return true, nil
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
func isPartitionLoaded(ctx context.Context, qc types.QueryCoordClient, collID int64, partID int64) (bool, error) {
|
|
// get all loading collections
|
|
resp, err := qc.ShowPartitions(ctx, &querypb.ShowPartitionsRequest{
|
|
CollectionID: collID,
|
|
PartitionIDs: []int64{partID},
|
|
})
|
|
if err := merr.CheckRPCCall(resp, err); err != nil {
|
|
// qc returns error if partition not loaded
|
|
if errors.Is(err, merr.ErrPartitionNotLoaded) {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
func checkFieldsDataBySchema(schema *schemapb.CollectionSchema, insertMsg *msgstream.InsertMsg, inInsert bool) error {
|
|
log := log.With(zap.String("collection", schema.GetName()))
|
|
primaryKeyNum := 0
|
|
autoGenFieldNum := 0
|
|
|
|
dataNameSet := typeutil.NewSet[string]()
|
|
for _, data := range insertMsg.FieldsData {
|
|
fieldName := data.GetFieldName()
|
|
if dataNameSet.Contain(fieldName) {
|
|
return merr.WrapErrParameterInvalidMsg("duplicated field %s found", fieldName)
|
|
}
|
|
dataNameSet.Insert(fieldName)
|
|
}
|
|
|
|
for _, fieldSchema := range schema.Fields {
|
|
if fieldSchema.AutoID && !fieldSchema.IsPrimaryKey {
|
|
log.Warn("not primary key field, but set autoID true", zap.String("field", fieldSchema.GetName()))
|
|
return merr.WrapErrParameterInvalidMsg("only primary key could be with AutoID enabled")
|
|
}
|
|
|
|
if fieldSchema.IsPrimaryKey {
|
|
primaryKeyNum++
|
|
}
|
|
if fieldSchema.GetDefaultValue() != nil && fieldSchema.IsPrimaryKey {
|
|
return merr.WrapErrParameterInvalidMsg("primary key can't be with default value")
|
|
}
|
|
if (fieldSchema.IsPrimaryKey && fieldSchema.AutoID && !Params.ProxyCfg.SkipAutoIDCheck.GetAsBool() && inInsert) || IsBM25FunctionOutputField(fieldSchema, schema) {
|
|
// when inInsert, no need to pass when pk is autoid and SkipAutoIDCheck is false
|
|
autoGenFieldNum++
|
|
}
|
|
if _, ok := dataNameSet[fieldSchema.GetName()]; !ok {
|
|
if (fieldSchema.IsPrimaryKey && fieldSchema.AutoID && !Params.ProxyCfg.SkipAutoIDCheck.GetAsBool() && inInsert) || IsBM25FunctionOutputField(fieldSchema, schema) {
|
|
// autoGenField
|
|
continue
|
|
}
|
|
|
|
if fieldSchema.GetDefaultValue() == nil && !fieldSchema.GetNullable() {
|
|
log.Warn("no corresponding fieldData pass in", zap.String("fieldSchema", fieldSchema.GetName()))
|
|
return merr.WrapErrParameterInvalidMsg("fieldSchema(%s) has no corresponding fieldData pass in", fieldSchema.GetName())
|
|
}
|
|
// when use default_value or has set Nullable
|
|
// it's ok that no corresponding fieldData found
|
|
dataToAppend := &schemapb.FieldData{
|
|
Type: fieldSchema.GetDataType(),
|
|
FieldName: fieldSchema.GetName(),
|
|
}
|
|
insertMsg.FieldsData = append(insertMsg.FieldsData, dataToAppend)
|
|
}
|
|
}
|
|
|
|
if primaryKeyNum > 1 {
|
|
log.Warn("more than 1 primary keys not supported",
|
|
zap.Int64("primaryKeyNum", int64(primaryKeyNum)))
|
|
return merr.WrapErrParameterInvalidMsg("more than 1 primary keys not supported, got %d", primaryKeyNum)
|
|
}
|
|
expectedNum := len(schema.Fields)
|
|
actualNum := len(insertMsg.FieldsData) + autoGenFieldNum
|
|
|
|
if expectedNum != actualNum {
|
|
log.Warn("the number of fields is not the same as needed", zap.Int("expected", expectedNum), zap.Int("actual", actualNum))
|
|
return merr.WrapErrParameterInvalid(expectedNum, actualNum, "more fieldData has pass in")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func checkPrimaryFieldData(schema *schemapb.CollectionSchema, insertMsg *msgstream.InsertMsg) (*schemapb.IDs, error) {
|
|
log := log.With(zap.String("collectionName", insertMsg.CollectionName))
|
|
rowNums := uint32(insertMsg.NRows())
|
|
// TODO(dragondriver): in fact, NumRows is not trustable, we should check all input fields
|
|
if insertMsg.NRows() <= 0 {
|
|
return nil, merr.WrapErrParameterInvalid("invalid num_rows", fmt.Sprint(rowNums), "num_rows should be greater than 0")
|
|
}
|
|
|
|
if err := checkFieldsDataBySchema(schema, insertMsg, true); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
primaryFieldSchema, err := typeutil.GetPrimaryFieldSchema(schema)
|
|
if err != nil {
|
|
log.Error("get primary field schema failed", zap.Any("schema", schema), zap.Error(err))
|
|
return nil, err
|
|
}
|
|
if primaryFieldSchema.GetNullable() {
|
|
return nil, merr.WrapErrParameterInvalidMsg("primary field not support null")
|
|
}
|
|
var primaryFieldData *schemapb.FieldData
|
|
// when checkPrimaryFieldData in insert
|
|
|
|
skipAutoIDCheck := Params.ProxyCfg.SkipAutoIDCheck.GetAsBool() &&
|
|
primaryFieldSchema.AutoID &&
|
|
typeutil.IsPrimaryFieldDataExist(insertMsg.GetFieldsData(), primaryFieldSchema)
|
|
|
|
if !primaryFieldSchema.AutoID || skipAutoIDCheck {
|
|
primaryFieldData, err = typeutil.GetPrimaryFieldData(insertMsg.GetFieldsData(), primaryFieldSchema)
|
|
if err != nil {
|
|
log.Info("get primary field data failed", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
} else {
|
|
// check primary key data not exist
|
|
if typeutil.IsPrimaryFieldDataExist(insertMsg.GetFieldsData(), primaryFieldSchema) {
|
|
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("can not assign primary field data when auto id enabled %v", primaryFieldSchema.Name))
|
|
}
|
|
// if autoID == true, currently support autoID for int64 and varchar PrimaryField
|
|
primaryFieldData, err = autoGenPrimaryFieldData(primaryFieldSchema, insertMsg.GetRowIDs())
|
|
if err != nil {
|
|
log.Info("generate primary field data failed when autoID == true", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
// if autoID == true, set the primary field data
|
|
// insertMsg.fieldsData need append primaryFieldData
|
|
insertMsg.FieldsData = append(insertMsg.FieldsData, primaryFieldData)
|
|
}
|
|
|
|
// parse primaryFieldData to result.IDs, and as returned primary keys
|
|
ids, err := parsePrimaryFieldData2IDs(primaryFieldData)
|
|
if err != nil {
|
|
log.Warn("parse primary field data to IDs failed", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
return ids, nil
|
|
}
|
|
|
|
// for some varchar with analzyer
|
|
// we need check char format before insert it to message queue
|
|
// now only support utf-8
|
|
func checkInputUtf8Compatiable(schema *schemapb.CollectionSchema, insertMsg *msgstream.InsertMsg) error {
|
|
checkeFields := lo.FilterMap(schema.GetFields(), func(field *schemapb.FieldSchema, _ int) (int64, bool) {
|
|
if field.DataType == schemapb.DataType_VarChar {
|
|
return field.GetFieldID(), true
|
|
}
|
|
|
|
if field.DataType != schemapb.DataType_Text {
|
|
return 0, false
|
|
}
|
|
|
|
for _, kv := range field.GetTypeParams() {
|
|
if kv.Key == common.EnableAnalyzerKey {
|
|
return field.GetFieldID(), true
|
|
}
|
|
}
|
|
return 0, false
|
|
})
|
|
|
|
if len(checkeFields) == 0 {
|
|
return nil
|
|
}
|
|
|
|
for _, fieldData := range insertMsg.FieldsData {
|
|
if !lo.Contains(checkeFields, fieldData.GetFieldId()) {
|
|
continue
|
|
}
|
|
|
|
strData := fieldData.GetScalars().GetStringData()
|
|
for row, data := range strData.GetData() {
|
|
ok := utf8.ValidString(data)
|
|
if !ok {
|
|
log.Warn("string field data not utf-8 format", zap.String("messageVersion", strData.ProtoReflect().Descriptor().Syntax().GoString()))
|
|
return merr.WrapErrAsInputError(fmt.Errorf("input with analyzer should be utf-8 format, but row: %d not utf-8 format. data: %s", row, data))
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func checkUpsertPrimaryFieldData(schema *schemapb.CollectionSchema, insertMsg *msgstream.InsertMsg) (*schemapb.IDs, *schemapb.IDs, error) {
|
|
log := log.With(zap.String("collectionName", insertMsg.CollectionName))
|
|
rowNums := uint32(insertMsg.NRows())
|
|
// TODO(dragondriver): in fact, NumRows is not trustable, we should check all input fields
|
|
if insertMsg.NRows() <= 0 {
|
|
return nil, nil, merr.WrapErrParameterInvalid("invalid num_rows", fmt.Sprint(rowNums), "num_rows should be greater than 0")
|
|
}
|
|
|
|
if err := checkFieldsDataBySchema(schema, insertMsg, false); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
primaryFieldSchema, err := typeutil.GetPrimaryFieldSchema(schema)
|
|
if err != nil {
|
|
log.Error("get primary field schema failed", zap.Any("schema", schema), zap.Error(err))
|
|
return nil, nil, err
|
|
}
|
|
if primaryFieldSchema.GetNullable() {
|
|
return nil, nil, merr.WrapErrParameterInvalidMsg("primary field not support null")
|
|
}
|
|
// get primaryFieldData whether autoID is true or not
|
|
var primaryFieldData *schemapb.FieldData
|
|
var newPrimaryFieldData *schemapb.FieldData
|
|
|
|
primaryFieldID := primaryFieldSchema.FieldID
|
|
primaryFieldName := primaryFieldSchema.Name
|
|
for i, field := range insertMsg.GetFieldsData() {
|
|
if field.FieldId == primaryFieldID || field.FieldName == primaryFieldName {
|
|
primaryFieldData = field
|
|
if primaryFieldSchema.AutoID {
|
|
// use the passed pk as new pk when autoID == false
|
|
// automatic generate pk as new pk wehen autoID == true
|
|
newPrimaryFieldData, err = autoGenPrimaryFieldData(primaryFieldSchema, insertMsg.GetRowIDs())
|
|
if err != nil {
|
|
log.Info("generate new primary field data failed when upsert", zap.Error(err))
|
|
return nil, nil, err
|
|
}
|
|
insertMsg.FieldsData = append(insertMsg.GetFieldsData()[:i], insertMsg.GetFieldsData()[i+1:]...)
|
|
insertMsg.FieldsData = append(insertMsg.FieldsData, newPrimaryFieldData)
|
|
}
|
|
break
|
|
}
|
|
}
|
|
// must assign primary field data when upsert
|
|
if primaryFieldData == nil {
|
|
return nil, nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("must assign pk when upsert, primary field: %v", primaryFieldName))
|
|
}
|
|
|
|
// parse primaryFieldData to result.IDs, and as returned primary keys
|
|
ids, err := parsePrimaryFieldData2IDs(primaryFieldData)
|
|
if err != nil {
|
|
log.Warn("parse primary field data to IDs failed", zap.Error(err))
|
|
return nil, nil, err
|
|
}
|
|
if !primaryFieldSchema.GetAutoID() {
|
|
return ids, ids, nil
|
|
}
|
|
newIDs, err := parsePrimaryFieldData2IDs(newPrimaryFieldData)
|
|
if err != nil {
|
|
log.Warn("parse primary field data to IDs failed", zap.Error(err))
|
|
return nil, nil, err
|
|
}
|
|
return newIDs, ids, nil
|
|
}
|
|
|
|
func getPartitionKeyFieldData(fieldSchema *schemapb.FieldSchema, insertMsg *msgstream.InsertMsg) (*schemapb.FieldData, error) {
|
|
if len(insertMsg.GetPartitionName()) > 0 && !Params.ProxyCfg.SkipPartitionKeyCheck.GetAsBool() {
|
|
return nil, errors.New("not support manually specifying the partition names if partition key mode is used")
|
|
}
|
|
|
|
for _, fieldData := range insertMsg.GetFieldsData() {
|
|
if fieldData.GetFieldId() == fieldSchema.GetFieldID() {
|
|
return fieldData, nil
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("partition key not specify when insert")
|
|
}
|
|
|
|
func getCollectionProgress(
|
|
ctx context.Context,
|
|
queryCoord types.QueryCoordClient,
|
|
msgBase *commonpb.MsgBase,
|
|
collectionID int64,
|
|
) (loadProgress int64, refreshProgress int64, err error) {
|
|
resp, err := queryCoord.ShowCollections(ctx, &querypb.ShowCollectionsRequest{
|
|
Base: commonpbutil.UpdateMsgBase(
|
|
msgBase,
|
|
commonpbutil.WithMsgType(commonpb.MsgType_ShowCollections),
|
|
),
|
|
CollectionIDs: []int64{collectionID},
|
|
})
|
|
if err != nil {
|
|
log.Ctx(ctx).Warn("fail to show collections",
|
|
zap.Int64("collectionID", collectionID),
|
|
zap.Error(err),
|
|
)
|
|
return
|
|
}
|
|
|
|
err = merr.Error(resp.GetStatus())
|
|
if err != nil {
|
|
log.Ctx(ctx).Warn("fail to show collections",
|
|
zap.Int64("collectionID", collectionID),
|
|
zap.Error(err))
|
|
return
|
|
}
|
|
|
|
loadProgress = resp.GetInMemoryPercentages()[0]
|
|
if len(resp.GetRefreshProgress()) > 0 { // Compatibility for new Proxy with old QueryCoord
|
|
refreshProgress = resp.GetRefreshProgress()[0]
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func getPartitionProgress(
|
|
ctx context.Context,
|
|
queryCoord types.QueryCoordClient,
|
|
msgBase *commonpb.MsgBase,
|
|
partitionNames []string,
|
|
collectionName string,
|
|
collectionID int64,
|
|
dbName string,
|
|
) (loadProgress int64, refreshProgress int64, err error) {
|
|
IDs2Names := make(map[int64]string)
|
|
partitionIDs := make([]int64, 0)
|
|
for _, partitionName := range partitionNames {
|
|
var partitionID int64
|
|
partitionID, err = globalMetaCache.GetPartitionID(ctx, dbName, collectionName, partitionName)
|
|
if err != nil {
|
|
return
|
|
}
|
|
IDs2Names[partitionID] = partitionName
|
|
partitionIDs = append(partitionIDs, partitionID)
|
|
}
|
|
|
|
var resp *querypb.ShowPartitionsResponse
|
|
resp, err = queryCoord.ShowPartitions(ctx, &querypb.ShowPartitionsRequest{
|
|
Base: commonpbutil.UpdateMsgBase(
|
|
msgBase,
|
|
commonpbutil.WithMsgType(commonpb.MsgType_ShowPartitions),
|
|
),
|
|
CollectionID: collectionID,
|
|
PartitionIDs: partitionIDs,
|
|
})
|
|
if err != nil {
|
|
log.Ctx(ctx).Warn("fail to show partitions", zap.Int64("collection_id", collectionID),
|
|
zap.String("collection_name", collectionName),
|
|
zap.Strings("partition_names", partitionNames),
|
|
zap.Error(err))
|
|
return
|
|
}
|
|
|
|
err = merr.Error(resp.GetStatus())
|
|
if err != nil {
|
|
err = merr.Error(resp.GetStatus())
|
|
log.Ctx(ctx).Warn("fail to show partitions",
|
|
zap.String("collectionName", collectionName),
|
|
zap.Strings("partitionNames", partitionNames),
|
|
zap.Error(err))
|
|
return
|
|
}
|
|
|
|
for _, p := range resp.InMemoryPercentages {
|
|
loadProgress += p
|
|
}
|
|
loadProgress /= int64(len(partitionIDs))
|
|
|
|
if len(resp.GetRefreshProgress()) > 0 { // Compatibility for new Proxy with old QueryCoord
|
|
refreshProgress = resp.GetRefreshProgress()[0]
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func isPartitionKeyMode(ctx context.Context, dbName string, colName string) (bool, error) {
|
|
colSchema, err := globalMetaCache.GetCollectionSchema(ctx, dbName, colName)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
for _, fieldSchema := range colSchema.GetFields() {
|
|
if fieldSchema.IsPartitionKey {
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
func hasPartitionKeyModeField(schema *schemapb.CollectionSchema) bool {
|
|
for _, fieldSchema := range schema.GetFields() {
|
|
if fieldSchema.IsPartitionKey {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// getDefaultPartitionsInPartitionKeyMode only used in partition key mode
|
|
func getDefaultPartitionsInPartitionKeyMode(ctx context.Context, dbName string, collectionName string) ([]string, error) {
|
|
partitions, err := globalMetaCache.GetPartitions(ctx, dbName, collectionName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Make sure the order of the partition names got every time is the same
|
|
partitionNames, _, err := typeutil.RearrangePartitionsForPartitionKey(partitions)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return partitionNames, nil
|
|
}
|
|
|
|
func assignChannelsByPK(pks *schemapb.IDs, channelNames []string, insertMsg *msgstream.InsertMsg) map[string][]int {
|
|
insertMsg.HashValues = typeutil.HashPK2Channels(pks, channelNames)
|
|
|
|
// groupedHashKeys represents the dmChannel index
|
|
channel2RowOffsets := make(map[string][]int) // channelName to count
|
|
// assert len(it.hashValues) < maxInt
|
|
for offset, channelID := range insertMsg.HashValues {
|
|
channelName := channelNames[channelID]
|
|
if _, ok := channel2RowOffsets[channelName]; !ok {
|
|
channel2RowOffsets[channelName] = []int{}
|
|
}
|
|
channel2RowOffsets[channelName] = append(channel2RowOffsets[channelName], offset)
|
|
}
|
|
|
|
return channel2RowOffsets
|
|
}
|
|
|
|
func assignPartitionKeys(ctx context.Context, dbName string, collName string, keys []*planpb.GenericValue) ([]string, error) {
|
|
partitionNames, err := globalMetaCache.GetPartitionsIndex(ctx, dbName, collName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
schema, err := globalMetaCache.GetCollectionSchema(ctx, dbName, collName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
partitionKeyFieldSchema, err := typeutil.GetPartitionKeyFieldSchema(schema.CollectionSchema)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
hashedPartitionNames, err := typeutil2.HashKey2Partitions(partitionKeyFieldSchema, keys, partitionNames)
|
|
return hashedPartitionNames, err
|
|
}
|
|
|
|
func ErrWithLog(logger *log.MLogger, msg string, err error) error {
|
|
wrapErr := errors.Wrap(err, msg)
|
|
if logger != nil {
|
|
logger.Warn(msg, zap.Error(err))
|
|
return wrapErr
|
|
}
|
|
log.Warn(msg, zap.Error(err))
|
|
return wrapErr
|
|
}
|
|
|
|
func verifyDynamicFieldData(schema *schemapb.CollectionSchema, insertMsg *msgstream.InsertMsg) error {
|
|
for _, field := range insertMsg.FieldsData {
|
|
if field.GetFieldName() == common.MetaFieldName {
|
|
if !schema.EnableDynamicField {
|
|
return fmt.Errorf("without dynamic schema enabled, the field name cannot be set to %s", common.MetaFieldName)
|
|
}
|
|
for _, rowData := range field.GetScalars().GetJsonData().GetData() {
|
|
jsonData := make(map[string]interface{})
|
|
if err := json.Unmarshal(rowData, &jsonData); err != nil {
|
|
log.Info("insert invalid dynamic data, milvus only support json map",
|
|
zap.ByteString("data", rowData),
|
|
zap.Error(err),
|
|
)
|
|
return merr.WrapErrIoFailedReason(err.Error())
|
|
}
|
|
if _, ok := jsonData[common.MetaFieldName]; ok {
|
|
return fmt.Errorf("cannot set json key to: %s", common.MetaFieldName)
|
|
}
|
|
for _, f := range schema.GetFields() {
|
|
if _, ok := jsonData[f.GetName()]; ok {
|
|
log.Info("dynamic field name include the static field name", zap.String("fieldName", f.GetName()))
|
|
return fmt.Errorf("dynamic field name cannot include the static field name: %s", f.GetName())
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func checkDynamicFieldData(schema *schemapb.CollectionSchema, insertMsg *msgstream.InsertMsg) error {
|
|
for _, data := range insertMsg.FieldsData {
|
|
if data.IsDynamic {
|
|
data.FieldName = common.MetaFieldName
|
|
return verifyDynamicFieldData(schema, insertMsg)
|
|
}
|
|
}
|
|
defaultData := make([][]byte, insertMsg.NRows())
|
|
for i := range defaultData {
|
|
defaultData[i] = []byte("{}")
|
|
}
|
|
dynamicData := autoGenDynamicFieldData(defaultData)
|
|
insertMsg.FieldsData = append(insertMsg.FieldsData, dynamicData)
|
|
return nil
|
|
}
|
|
|
|
func SendReplicateMessagePack(ctx context.Context, replicateMsgStream msgstream.MsgStream, request interface{ GetBase() *commonpb.MsgBase }) {
|
|
if replicateMsgStream == nil || request == nil {
|
|
log.Ctx(ctx).Warn("replicate msg stream or request is nil", zap.Any("request", request))
|
|
return
|
|
}
|
|
msgBase := request.GetBase()
|
|
ts := msgBase.GetTimestamp()
|
|
if msgBase.GetReplicateInfo().GetIsReplicate() {
|
|
ts = msgBase.GetReplicateInfo().GetMsgTimestamp()
|
|
}
|
|
getBaseMsg := func(ctx context.Context, ts uint64) msgstream.BaseMsg {
|
|
return msgstream.BaseMsg{
|
|
Ctx: ctx,
|
|
HashValues: []uint32{0},
|
|
BeginTimestamp: ts,
|
|
EndTimestamp: ts,
|
|
}
|
|
}
|
|
|
|
var tsMsg msgstream.TsMsg
|
|
switch r := request.(type) {
|
|
case *milvuspb.CreateDatabaseRequest:
|
|
tsMsg = &msgstream.CreateDatabaseMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
CreateDatabaseRequest: r,
|
|
}
|
|
case *milvuspb.DropDatabaseRequest:
|
|
tsMsg = &msgstream.DropDatabaseMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
DropDatabaseRequest: r,
|
|
}
|
|
case *milvuspb.AlterDatabaseRequest:
|
|
tsMsg = &msgstream.AlterDatabaseMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
AlterDatabaseRequest: r,
|
|
}
|
|
case *milvuspb.FlushRequest:
|
|
tsMsg = &msgstream.FlushMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
FlushRequest: r,
|
|
}
|
|
case *milvuspb.LoadCollectionRequest:
|
|
tsMsg = &msgstream.LoadCollectionMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
LoadCollectionRequest: r,
|
|
}
|
|
case *milvuspb.ReleaseCollectionRequest:
|
|
tsMsg = &msgstream.ReleaseCollectionMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
ReleaseCollectionRequest: r,
|
|
}
|
|
case *milvuspb.CreateIndexRequest:
|
|
tsMsg = &msgstream.CreateIndexMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
CreateIndexRequest: r,
|
|
}
|
|
case *milvuspb.DropIndexRequest:
|
|
tsMsg = &msgstream.DropIndexMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
DropIndexRequest: r,
|
|
}
|
|
case *milvuspb.LoadPartitionsRequest:
|
|
tsMsg = &msgstream.LoadPartitionsMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
LoadPartitionsRequest: r,
|
|
}
|
|
case *milvuspb.ReleasePartitionsRequest:
|
|
tsMsg = &msgstream.ReleasePartitionsMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
ReleasePartitionsRequest: r,
|
|
}
|
|
case *milvuspb.AlterIndexRequest:
|
|
tsMsg = &msgstream.AlterIndexMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
AlterIndexRequest: r,
|
|
}
|
|
case *milvuspb.CreateCredentialRequest:
|
|
tsMsg = &msgstream.CreateUserMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
CreateCredentialRequest: r,
|
|
}
|
|
case *milvuspb.UpdateCredentialRequest:
|
|
tsMsg = &msgstream.UpdateUserMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
UpdateCredentialRequest: r,
|
|
}
|
|
case *milvuspb.DeleteCredentialRequest:
|
|
tsMsg = &msgstream.DeleteUserMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
DeleteCredentialRequest: r,
|
|
}
|
|
case *milvuspb.CreateRoleRequest:
|
|
tsMsg = &msgstream.CreateRoleMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
CreateRoleRequest: r,
|
|
}
|
|
case *milvuspb.DropRoleRequest:
|
|
tsMsg = &msgstream.DropRoleMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
DropRoleRequest: r,
|
|
}
|
|
case *milvuspb.OperateUserRoleRequest:
|
|
tsMsg = &msgstream.OperateUserRoleMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
OperateUserRoleRequest: r,
|
|
}
|
|
case *milvuspb.OperatePrivilegeRequest:
|
|
tsMsg = &msgstream.OperatePrivilegeMsg{
|
|
BaseMsg: getBaseMsg(ctx, ts),
|
|
OperatePrivilegeRequest: r,
|
|
}
|
|
default:
|
|
log.Warn("unknown request", zap.Any("request", request))
|
|
return
|
|
}
|
|
msgPack := &msgstream.MsgPack{
|
|
BeginTs: ts,
|
|
EndTs: ts,
|
|
Msgs: []msgstream.TsMsg{tsMsg},
|
|
}
|
|
msgErr := replicateMsgStream.Produce(ctx, msgPack)
|
|
// ignore the error if the msg stream failed to produce the msg,
|
|
// because it can be manually fixed in this error
|
|
if msgErr != nil {
|
|
log.Warn("send replicate msg failed", zap.Any("pack", msgPack), zap.Error(msgErr))
|
|
}
|
|
}
|
|
|
|
func GetCachedCollectionSchema(ctx context.Context, dbName string, colName string) (*schemaInfo, error) {
|
|
if globalMetaCache != nil {
|
|
return globalMetaCache.GetCollectionSchema(ctx, dbName, colName)
|
|
}
|
|
return nil, merr.WrapErrServiceNotReady(paramtable.GetRole(), paramtable.GetNodeID(), "initialization")
|
|
}
|
|
|
|
func CheckDatabase(ctx context.Context, dbName string) bool {
|
|
if globalMetaCache != nil {
|
|
return globalMetaCache.HasDatabase(ctx, dbName)
|
|
}
|
|
return false
|
|
}
|
|
|
|
func SetReportValue(status *commonpb.Status, value int) {
|
|
if value <= 0 {
|
|
return
|
|
}
|
|
if !merr.Ok(status) {
|
|
return
|
|
}
|
|
if status.ExtraInfo == nil {
|
|
status.ExtraInfo = make(map[string]string)
|
|
}
|
|
status.ExtraInfo["report_value"] = strconv.Itoa(value)
|
|
}
|
|
|
|
func GetCostValue(status *commonpb.Status) int {
|
|
if status == nil || status.ExtraInfo == nil {
|
|
return 0
|
|
}
|
|
value, err := strconv.Atoi(status.ExtraInfo["report_value"])
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return value
|
|
}
|
|
|
|
// GetRequestInfo returns collection name and rateType of request and return tokens needed.
|
|
func GetRequestInfo(ctx context.Context, req proto.Message) (int64, map[int64][]int64, internalpb.RateType, int, error) {
|
|
switch r := req.(type) {
|
|
case *milvuspb.InsertRequest:
|
|
dbID, collToPartIDs, err := getCollectionAndPartitionID(ctx, req.(reqPartName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DMLInsert, proto.Size(r), err
|
|
case *milvuspb.UpsertRequest:
|
|
dbID, collToPartIDs, err := getCollectionAndPartitionID(ctx, req.(reqPartName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DMLInsert, proto.Size(r), err
|
|
case *milvuspb.DeleteRequest:
|
|
dbID, collToPartIDs, err := getCollectionAndPartitionID(ctx, req.(reqPartName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DMLDelete, proto.Size(r), err
|
|
case *milvuspb.ImportRequest:
|
|
dbID, collToPartIDs, err := getCollectionAndPartitionID(ctx, req.(reqPartName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DMLBulkLoad, proto.Size(r), err
|
|
case *milvuspb.SearchRequest:
|
|
dbID, collToPartIDs, err := getCollectionAndPartitionIDs(ctx, req.(reqPartNames))
|
|
return dbID, collToPartIDs, internalpb.RateType_DQLSearch, int(r.GetNq()), err
|
|
case *milvuspb.QueryRequest:
|
|
dbID, collToPartIDs, err := getCollectionAndPartitionIDs(ctx, req.(reqPartNames))
|
|
return dbID, collToPartIDs, internalpb.RateType_DQLQuery, 1, err // think of the query request's nq as 1
|
|
case *milvuspb.CreateCollectionRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLCollection, 1, nil
|
|
case *milvuspb.DropCollectionRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLCollection, 1, nil
|
|
case *milvuspb.LoadCollectionRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLCollection, 1, nil
|
|
case *milvuspb.ReleaseCollectionRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLCollection, 1, nil
|
|
case *milvuspb.CreatePartitionRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLPartition, 1, nil
|
|
case *milvuspb.DropPartitionRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLPartition, 1, nil
|
|
case *milvuspb.LoadPartitionsRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLPartition, 1, nil
|
|
case *milvuspb.ReleasePartitionsRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLPartition, 1, nil
|
|
case *milvuspb.CreateIndexRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLIndex, 1, nil
|
|
case *milvuspb.DropIndexRequest:
|
|
dbID, collToPartIDs := getCollectionID(req.(reqCollName))
|
|
return dbID, collToPartIDs, internalpb.RateType_DDLIndex, 1, nil
|
|
case *milvuspb.FlushRequest:
|
|
db, err := globalMetaCache.GetDatabaseInfo(ctx, r.GetDbName())
|
|
if err != nil {
|
|
return util.InvalidDBID, map[int64][]int64{}, 0, 0, err
|
|
}
|
|
|
|
collToPartIDs := make(map[int64][]int64, 0)
|
|
for _, collectionName := range r.GetCollectionNames() {
|
|
collectionID, err := globalMetaCache.GetCollectionID(ctx, r.GetDbName(), collectionName)
|
|
if err != nil {
|
|
return util.InvalidDBID, map[int64][]int64{}, 0, 0, err
|
|
}
|
|
collToPartIDs[collectionID] = []int64{}
|
|
}
|
|
return db.dbID, collToPartIDs, internalpb.RateType_DDLFlush, 1, nil
|
|
case *milvuspb.ManualCompactionRequest:
|
|
dbName := GetCurDBNameFromContextOrDefault(ctx)
|
|
dbInfo, err := globalMetaCache.GetDatabaseInfo(ctx, dbName)
|
|
if err != nil {
|
|
return util.InvalidDBID, map[int64][]int64{}, 0, 0, err
|
|
}
|
|
return dbInfo.dbID, map[int64][]int64{
|
|
r.GetCollectionID(): {},
|
|
}, internalpb.RateType_DDLCompaction, 1, nil
|
|
case *milvuspb.CreateDatabaseRequest:
|
|
log.Info("rate limiter CreateDatabaseRequest")
|
|
return util.InvalidDBID, map[int64][]int64{}, internalpb.RateType_DDLDB, 1, nil
|
|
case *milvuspb.DropDatabaseRequest:
|
|
log.Info("rate limiter DropDatabaseRequest")
|
|
return util.InvalidDBID, map[int64][]int64{}, internalpb.RateType_DDLDB, 1, nil
|
|
case *milvuspb.AlterDatabaseRequest:
|
|
return util.InvalidDBID, map[int64][]int64{}, internalpb.RateType_DDLDB, 1, nil
|
|
default: // TODO: support more request
|
|
if req == nil {
|
|
return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("null request")
|
|
}
|
|
log.RatedWarn(60, "not supported request type for rate limiter", zap.String("type", reflect.TypeOf(req).String()))
|
|
return util.InvalidDBID, map[int64][]int64{}, 0, 0, nil
|
|
}
|
|
}
|
|
|
|
// GetFailedResponse returns failed response.
|
|
func GetFailedResponse(req any, err error) any {
|
|
switch req.(type) {
|
|
case *milvuspb.InsertRequest, *milvuspb.DeleteRequest, *milvuspb.UpsertRequest:
|
|
return failedMutationResult(err)
|
|
case *milvuspb.ImportRequest:
|
|
return &milvuspb.ImportResponse{
|
|
Status: merr.Status(err),
|
|
}
|
|
case *milvuspb.SearchRequest:
|
|
return &milvuspb.SearchResults{
|
|
Status: merr.Status(err),
|
|
}
|
|
case *milvuspb.QueryRequest:
|
|
return &milvuspb.QueryResults{
|
|
Status: merr.Status(err),
|
|
}
|
|
case *milvuspb.CreateCollectionRequest, *milvuspb.DropCollectionRequest,
|
|
*milvuspb.LoadCollectionRequest, *milvuspb.ReleaseCollectionRequest,
|
|
*milvuspb.CreatePartitionRequest, *milvuspb.DropPartitionRequest,
|
|
*milvuspb.LoadPartitionsRequest, *milvuspb.ReleasePartitionsRequest,
|
|
*milvuspb.CreateIndexRequest, *milvuspb.DropIndexRequest,
|
|
*milvuspb.CreateDatabaseRequest, *milvuspb.DropDatabaseRequest,
|
|
*milvuspb.AlterDatabaseRequest:
|
|
return merr.Status(err)
|
|
case *milvuspb.FlushRequest:
|
|
return &milvuspb.FlushResponse{
|
|
Status: merr.Status(err),
|
|
}
|
|
case *milvuspb.ManualCompactionRequest:
|
|
return &milvuspb.ManualCompactionResponse{
|
|
Status: merr.Status(err),
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func GetReplicateID(ctx context.Context, database, collectionName string) (string, error) {
|
|
if globalMetaCache == nil {
|
|
return "", merr.WrapErrServiceUnavailable("internal: Milvus Proxy is not ready yet. please wait")
|
|
}
|
|
colInfo, err := globalMetaCache.GetCollectionInfo(ctx, database, collectionName, 0)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if colInfo.replicateID != "" {
|
|
return colInfo.replicateID, nil
|
|
}
|
|
dbInfo, err := globalMetaCache.GetDatabaseInfo(ctx, database)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
replicateID, _ := common.GetReplicateID(dbInfo.properties)
|
|
return replicateID, nil
|
|
}
|
|
|
|
func IsBM25FunctionOutputField(field *schemapb.FieldSchema, collSchema *schemapb.CollectionSchema) bool {
|
|
if !(field.GetIsFunctionOutput() && field.GetDataType() == schemapb.DataType_SparseFloatVector) {
|
|
return false
|
|
}
|
|
|
|
for _, fSchema := range collSchema.Functions {
|
|
if fSchema.Type == schemapb.FunctionType_BM25 {
|
|
if len(fSchema.OutputFieldNames) != 0 && field.Name == fSchema.OutputFieldNames[0] {
|
|
return true
|
|
}
|
|
if len(fSchema.OutputFieldIds) != 0 && field.FieldID == fSchema.OutputFieldIds[0] {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|