From 1f43be4a3cd46b6ea6b3a0c4e90077c4f050f400 Mon Sep 17 00:00:00 2001 From: "cai.zhang" Date: Thu, 21 Mar 2024 10:37:08 +0800 Subject: [PATCH] enhance: Support auto index for scalar index (#31255) issue: #29309 reopen pr : #29310 Signed-off-by: Cai Zhang --- internal/proxy/task_index.go | 61 ++++---- internal/proxy/task_index_test.go | 146 +++++++++++++++++- internal/proxy/task_test.go | 2 +- internal/proxy/util.go | 16 +- .../indexparamcheck/auto_index_checker.go | 22 +++ pkg/util/indexparamcheck/conf_adapter_mgr.go | 7 + pkg/util/indexparamcheck/index_type.go | 7 + pkg/util/indexparamcheck/inverted_checker.go | 28 ++++ .../indexparamcheck/inverted_checker_test.go | 25 +++ .../indexparamcheck/scalar_index_checker.go | 7 +- .../scalar_index_checker_test.go | 5 +- pkg/util/indexparamcheck/stl_sort_checker.go | 28 ++++ .../indexparamcheck/stl_sort_checker_test.go | 22 +++ pkg/util/indexparamcheck/trie_checker.go | 28 ++++ pkg/util/indexparamcheck/trie_checker_test.go | 23 +++ pkg/util/paramtable/autoindex_param.go | 57 +++++++ pkg/util/paramtable/autoindex_param_test.go | 23 +++ tests/python_client/testcases/test_index.py | 19 +-- 18 files changed, 459 insertions(+), 67 deletions(-) create mode 100644 pkg/util/indexparamcheck/auto_index_checker.go create mode 100644 pkg/util/indexparamcheck/inverted_checker.go create mode 100644 pkg/util/indexparamcheck/inverted_checker_test.go create mode 100644 pkg/util/indexparamcheck/stl_sort_checker.go create mode 100644 pkg/util/indexparamcheck/stl_sort_checker_test.go create mode 100644 pkg/util/indexparamcheck/trie_checker.go create mode 100644 pkg/util/indexparamcheck/trie_checker_test.go diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 3eb1f756c6..5bc7f1c838 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -144,36 +144,29 @@ func (cit *createIndexTask) parseIndexParams() error { indexParamsMap[kv.Key] = kv.Value } } + + specifyIndexType, exist := indexParamsMap[common.IndexTypeKey] + if exist && specifyIndexType != "" { + _, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(specifyIndexType) + if err != nil { + log.Ctx(cit.ctx).Warn("Failed to get index checker", zap.String(common.IndexTypeKey, specifyIndexType)) + return merr.WrapErrParameterInvalid("valid index", fmt.Sprintf("invalid index type: %s", specifyIndexType)) + } + } + if !isVecIndex { specifyIndexType, exist := indexParamsMap[common.IndexTypeKey] - if cit.fieldSchema.DataType == schemapb.DataType_VarChar { - if !exist { - indexParamsMap[common.IndexTypeKey] = DefaultStringIndexType + if Params.AutoIndexConfig.ScalarAutoIndexEnable.GetAsBool() || specifyIndexType == AutoIndexName || !exist { + if typeutil.IsArithmetic(cit.fieldSchema.DataType) { + indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarNumericIndexType.GetValue() + } else if typeutil.IsStringType(cit.fieldSchema.DataType) { + indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarVarcharIndexType.GetValue() + } else if typeutil.IsBoolType(cit.fieldSchema.DataType) { + indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarBoolIndexType.GetValue() + } else { + return merr.WrapErrParameterInvalid("supported field", + fmt.Sprintf("create auto index on %s field is not supported", cit.fieldSchema.DataType.String())) } - - if exist && !validateStringIndexType(specifyIndexType) { - return merr.WrapErrParameterInvalid(DefaultStringIndexType, specifyIndexType, "index type not match") - } - } else if typeutil.IsArithmetic(cit.fieldSchema.DataType) { - if !exist { - indexParamsMap[common.IndexTypeKey] = DefaultArithmeticIndexType - } - - if exist && !validateArithmeticIndexType(specifyIndexType) { - return merr.WrapErrParameterInvalid(DefaultArithmeticIndexType, specifyIndexType, "index type not match") - } - } else if typeutil.IsBoolType(cit.fieldSchema.DataType) { - if !exist { - return merr.WrapErrParameterInvalidMsg("no index type specified") - } - if specifyIndexType != InvertedIndexType { - return merr.WrapErrParameterInvalidMsg("index type (%s) not supported for boolean, supported: %s", - specifyIndexType, InvertedIndexType) - } - } else { - return merr.WrapErrParameterInvalid("supported field", - fmt.Sprintf("create index on %s field", cit.fieldSchema.DataType.String()), - fmt.Sprintf("create index on %s field is not supported", cit.fieldSchema.DataType.String())) } } else { specifyIndexType, exist := indexParamsMap[common.IndexTypeKey] @@ -263,12 +256,13 @@ func (cit *createIndexTask) parseIndexParams() error { return fmt.Errorf("only IP is the supported metric type for sparse index") } } - - err := checkTrain(cit.fieldSchema, indexParamsMap) - if err != nil { - return err - } } + + err := checkTrain(cit.fieldSchema, indexParamsMap) + if err != nil { + return merr.WrapErrParameterInvalid("valid index params", "invalid index params", err.Error()) + } + typeParams := cit.fieldSchema.GetTypeParams() typeParamsMap := make(map[string]string) for _, pair := range typeParams { @@ -337,9 +331,6 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error { indexType := indexParams[common.IndexTypeKey] - if !isVectorType(field.GetDataType()) { - return indexparamcheck.CheckIndexValid(field.GetDataType(), indexType, indexParams) - } checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType) if err != nil { diff --git a/internal/proxy/task_index_test.go b/internal/proxy/task_index_test.go index e365ca88d4..20840c2848 100644 --- a/internal/proxy/task_index_test.go +++ b/internal/proxy/task_index_test.go @@ -593,6 +593,24 @@ func Test_parseIndexParams(t *testing.T) { assert.NoError(t, err) }) + t.Run("create index on VarChar field without index type", func(t *testing.T) { + cit := &createIndexTask{ + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{}, + IndexName: "", + }, + fieldSchema: &schemapb.FieldSchema{ + FieldID: 101, + Name: "FieldID", + IsPrimaryKey: false, + DataType: schemapb.DataType_VarChar, + }, + } + err := cit.parseIndexParams() + assert.NoError(t, err) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultStringIndexType}}) + }) + t.Run("create index on Arithmetic field", func(t *testing.T) { cit := &createIndexTask{ req: &milvuspb.CreateIndexRequest{ @@ -615,6 +633,24 @@ func Test_parseIndexParams(t *testing.T) { assert.NoError(t, err) }) + t.Run("create index on Arithmetic field without index type", func(t *testing.T) { + cit := &createIndexTask{ + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{}, + IndexName: "", + }, + fieldSchema: &schemapb.FieldSchema{ + FieldID: 101, + Name: "FieldID", + IsPrimaryKey: false, + DataType: schemapb.DataType_Int64, + }, + } + err := cit.parseIndexParams() + assert.NoError(t, err) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultArithmeticIndexType}}) + }) + // Compatible with the old version <= 2.3.0 t.Run("create marisa-trie index on VarChar field", func(t *testing.T) { cit := &createIndexTask{ @@ -763,7 +799,7 @@ func Test_parseIndexParams(t *testing.T) { }, } err := cit4.parseIndexParams() - assert.ErrorIs(t, err, merr.ErrParameterInvalid) + assert.Error(t, err) cit5 := &createIndexTask{ Condition: nil, @@ -808,7 +844,113 @@ func Test_parseIndexParams(t *testing.T) { }, } err = cit5.parseIndexParams() - assert.ErrorIs(t, err, merr.ErrParameterInvalid) + assert.Error(t, err) + }) + + t.Run("enable scalar auto index", func(t *testing.T) { + err := Params.Save(Params.AutoIndexConfig.ScalarAutoIndexEnable.Key, "true") + assert.NoError(t, err) + + cit := &createIndexTask{ + Condition: nil, + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{ + { + Key: common.IndexTypeKey, + Value: "", + }, + }, + IndexName: "", + }, + fieldSchema: &schemapb.FieldSchema{ + FieldID: 101, + Name: "FieldID", + IsPrimaryKey: false, + Description: "field no.1", + DataType: schemapb.DataType_Int64, + }, + } + + err = cit.parseIndexParams() + assert.NoError(t, err) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultArithmeticIndexType}}) + }) + + t.Run("create auto index on numeric field", func(t *testing.T) { + cit := &createIndexTask{ + Condition: nil, + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{ + { + Key: common.IndexTypeKey, + Value: AutoIndexName, + }, + }, + IndexName: "", + }, + fieldSchema: &schemapb.FieldSchema{ + FieldID: 101, + Name: "FieldID", + IsPrimaryKey: false, + Description: "field no.1", + DataType: schemapb.DataType_Int64, + }, + } + + err := cit.parseIndexParams() + assert.NoError(t, err) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultArithmeticIndexType}}) + }) + + t.Run("create auto index on varchar field", func(t *testing.T) { + cit := &createIndexTask{ + Condition: nil, + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{ + { + Key: common.IndexTypeKey, + Value: AutoIndexName, + }, + }, + IndexName: "", + }, + fieldSchema: &schemapb.FieldSchema{ + FieldID: 101, + Name: "FieldID", + IsPrimaryKey: false, + Description: "field no.1", + DataType: schemapb.DataType_VarChar, + }, + } + + err := cit.parseIndexParams() + assert.NoError(t, err) + assert.Equal(t, cit.newIndexParams, []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: DefaultStringIndexType}}) + }) + + t.Run("create auto index on json field", func(t *testing.T) { + cit := &createIndexTask{ + Condition: nil, + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{ + { + Key: common.IndexTypeKey, + Value: AutoIndexName, + }, + }, + IndexName: "", + }, + fieldSchema: &schemapb.FieldSchema{ + FieldID: 101, + Name: "FieldID", + IsPrimaryKey: false, + Description: "field no.1", + DataType: schemapb.DataType_JSON, + }, + } + + err := cit.parseIndexParams() + assert.Error(t, err) }) } diff --git a/internal/proxy/task_test.go b/internal/proxy/task_test.go index 18b9faf0e6..c3802bd7c4 100644 --- a/internal/proxy/task_test.go +++ b/internal/proxy/task_test.go @@ -2209,7 +2209,7 @@ func Test_checkTrain(t *testing.T) { m := map[string]string{ common.IndexTypeKey: "scalar", } - assert.NoError(t, checkTrain(f, m)) + assert.Error(t, checkTrain(f, m)) }) t.Run("dimension mismatch", func(t *testing.T) { diff --git a/internal/proxy/util.go b/internal/proxy/util.go index 1aaac133ea..9961a7d7fb 100644 --- a/internal/proxy/util.go +++ b/internal/proxy/util.go @@ -65,12 +65,10 @@ const ( defaultMaxSearchRequest = 1024 // DefaultArithmeticIndexType name of default index type for scalar field - DefaultArithmeticIndexType = "STL_SORT" + DefaultArithmeticIndexType = "INVERTED" // DefaultStringIndexType name of default index type for varChar/string field - DefaultStringIndexType = "Trie" - - InvertedIndexType = "INVERTED" + DefaultStringIndexType = "INVERTED" defaultRRFParamsValue = 60 maxRRFParamsValue = 16384 @@ -258,16 +256,6 @@ func validatePartitionTag(partitionTag string, strictCheck bool) error { return nil } -func validateStringIndexType(indexType string) bool { - // compatible with the index type marisa-trie of attu versions prior to 2.3.0 - return indexType == DefaultStringIndexType || indexType == "marisa-trie" || indexType == InvertedIndexType -} - -func validateArithmeticIndexType(indexType string) bool { - // compatible with the index type Asceneding of attu versions prior to 2.3.0 - return indexType == DefaultArithmeticIndexType || indexType == "Asceneding" || indexType == InvertedIndexType -} - func validateFieldName(fieldName string) error { fieldName = strings.TrimSpace(fieldName) diff --git a/pkg/util/indexparamcheck/auto_index_checker.go b/pkg/util/indexparamcheck/auto_index_checker.go new file mode 100644 index 0000000000..9f960d9669 --- /dev/null +++ b/pkg/util/indexparamcheck/auto_index_checker.go @@ -0,0 +1,22 @@ +package indexparamcheck + +import ( + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +) + +// AUTOINDEXChecker checks if a TRIE index can be built. +type AUTOINDEXChecker struct { + baseChecker +} + +func (c *AUTOINDEXChecker) CheckTrain(params map[string]string) error { + return nil +} + +func (c *AUTOINDEXChecker) CheckValidDataType(dType schemapb.DataType) error { + return nil +} + +func newAUTOINDEXChecker() *AUTOINDEXChecker { + return &AUTOINDEXChecker{} +} diff --git a/pkg/util/indexparamcheck/conf_adapter_mgr.go b/pkg/util/indexparamcheck/conf_adapter_mgr.go index f62fbe1bf8..9fdc1a1af6 100644 --- a/pkg/util/indexparamcheck/conf_adapter_mgr.go +++ b/pkg/util/indexparamcheck/conf_adapter_mgr.go @@ -60,6 +60,13 @@ func (mgr *indexCheckerMgrImpl) registerIndexChecker() { // WAND doesn't have more index params than sparse inverted index, thus // using the same checker. mgr.checkers[IndexSparseWand] = newSparseInvertedIndexChecker() + mgr.checkers[IndexINVERTED] = newINVERTEDChecker() + mgr.checkers[IndexSTLSORT] = newSTLSORTChecker() + mgr.checkers["Asceneding"] = newSTLSORTChecker() + mgr.checkers[IndexTRIE] = newTRIEChecker() + mgr.checkers[IndexTrie] = newTRIEChecker() + mgr.checkers["marisa-trie"] = newTRIEChecker() + mgr.checkers[AutoIndex] = newAUTOINDEXChecker() } func newIndexCheckerMgr() *indexCheckerMgrImpl { diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index f559e0cabc..7b24202f02 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -32,6 +32,13 @@ const ( IndexDISKANN IndexType = "DISKANN" IndexSparseInverted IndexType = "SPARSE_INVERTED_INDEX" IndexSparseWand IndexType = "SPARSE_WAND" + IndexINVERTED IndexType = "INVERTED" + + IndexSTLSORT IndexType = "STL_SORT" + IndexTRIE IndexType = "TRIE" + IndexTrie IndexType = "Trie" + + AutoIndex IndexType = "AUTOINDEX" ) func IsGpuIndex(indexType IndexType) bool { diff --git a/pkg/util/indexparamcheck/inverted_checker.go b/pkg/util/indexparamcheck/inverted_checker.go new file mode 100644 index 0000000000..b15549cd4b --- /dev/null +++ b/pkg/util/indexparamcheck/inverted_checker.go @@ -0,0 +1,28 @@ +package indexparamcheck + +import ( + "fmt" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +// INVERTEDChecker checks if a INVERTED index can be built. +type INVERTEDChecker struct { + scalarIndexChecker +} + +func (c *INVERTEDChecker) CheckTrain(params map[string]string) error { + return c.scalarIndexChecker.CheckTrain(params) +} + +func (c *INVERTEDChecker) CheckValidDataType(dType schemapb.DataType) error { + if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) { + return fmt.Errorf("INVERTED are not supported on %s field", dType.String()) + } + return nil +} + +func newINVERTEDChecker() *INVERTEDChecker { + return &INVERTEDChecker{} +} diff --git a/pkg/util/indexparamcheck/inverted_checker_test.go b/pkg/util/indexparamcheck/inverted_checker_test.go new file mode 100644 index 0000000000..afe41f89f1 --- /dev/null +++ b/pkg/util/indexparamcheck/inverted_checker_test.go @@ -0,0 +1,25 @@ +package indexparamcheck + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +) + +func Test_INVERTEDIndexChecker(t *testing.T) { + c := newINVERTEDChecker() + + assert.NoError(t, c.CheckTrain(map[string]string{})) + + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_VarChar)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_String)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Bool)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float)) + + assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) + assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array)) + assert.Error(t, c.CheckValidDataType(schemapb.DataType_FloatVector)) +} diff --git a/pkg/util/indexparamcheck/scalar_index_checker.go b/pkg/util/indexparamcheck/scalar_index_checker.go index 6b736ecdd7..9c372f4034 100644 --- a/pkg/util/indexparamcheck/scalar_index_checker.go +++ b/pkg/util/indexparamcheck/scalar_index_checker.go @@ -1,8 +1,9 @@ package indexparamcheck -import "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +type scalarIndexChecker struct { + baseChecker +} -// TODO: check index parameters according to the index type & data type. -func CheckIndexValid(dType schemapb.DataType, indexType IndexType, indexParams map[string]string) error { +func (c scalarIndexChecker) CheckTrain(params map[string]string) error { return nil } diff --git a/pkg/util/indexparamcheck/scalar_index_checker_test.go b/pkg/util/indexparamcheck/scalar_index_checker_test.go index 3289cd00b2..eb3ae669e2 100644 --- a/pkg/util/indexparamcheck/scalar_index_checker_test.go +++ b/pkg/util/indexparamcheck/scalar_index_checker_test.go @@ -4,10 +4,9 @@ import ( "testing" "github.com/stretchr/testify/assert" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" ) func TestCheckIndexValid(t *testing.T) { - assert.NoError(t, CheckIndexValid(schemapb.DataType_Int64, "inverted_index", nil)) + scalarIndexChecker := &scalarIndexChecker{} + assert.NoError(t, scalarIndexChecker.CheckTrain(map[string]string{})) } diff --git a/pkg/util/indexparamcheck/stl_sort_checker.go b/pkg/util/indexparamcheck/stl_sort_checker.go new file mode 100644 index 0000000000..f0b152cef9 --- /dev/null +++ b/pkg/util/indexparamcheck/stl_sort_checker.go @@ -0,0 +1,28 @@ +package indexparamcheck + +import ( + "fmt" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +// STLSORTChecker checks if a STL_SORT index can be built. +type STLSORTChecker struct { + scalarIndexChecker +} + +func (c *STLSORTChecker) CheckTrain(params map[string]string) error { + return c.scalarIndexChecker.CheckTrain(params) +} + +func (c *STLSORTChecker) CheckValidDataType(dType schemapb.DataType) error { + if !typeutil.IsArithmetic(dType) { + return fmt.Errorf("STL_SORT are only supported on numeric field") + } + return nil +} + +func newSTLSORTChecker() *STLSORTChecker { + return &STLSORTChecker{} +} diff --git a/pkg/util/indexparamcheck/stl_sort_checker_test.go b/pkg/util/indexparamcheck/stl_sort_checker_test.go new file mode 100644 index 0000000000..a4af0c51e6 --- /dev/null +++ b/pkg/util/indexparamcheck/stl_sort_checker_test.go @@ -0,0 +1,22 @@ +package indexparamcheck + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +) + +func Test_STLSORTIndexChecker(t *testing.T) { + c := newSTLSORTChecker() + + assert.NoError(t, c.CheckTrain(map[string]string{})) + + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float)) + + assert.Error(t, c.CheckValidDataType(schemapb.DataType_Bool)) + assert.Error(t, c.CheckValidDataType(schemapb.DataType_VarChar)) + assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) +} diff --git a/pkg/util/indexparamcheck/trie_checker.go b/pkg/util/indexparamcheck/trie_checker.go new file mode 100644 index 0000000000..1c63fdc366 --- /dev/null +++ b/pkg/util/indexparamcheck/trie_checker.go @@ -0,0 +1,28 @@ +package indexparamcheck + +import ( + "fmt" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +// TRIEChecker checks if a TRIE index can be built. +type TRIEChecker struct { + scalarIndexChecker +} + +func (c *TRIEChecker) CheckTrain(params map[string]string) error { + return c.scalarIndexChecker.CheckTrain(params) +} + +func (c *TRIEChecker) CheckValidDataType(dType schemapb.DataType) error { + if !typeutil.IsStringType(dType) { + return fmt.Errorf("TRIE are only supported on varchar field") + } + return nil +} + +func newTRIEChecker() *TRIEChecker { + return &TRIEChecker{} +} diff --git a/pkg/util/indexparamcheck/trie_checker_test.go b/pkg/util/indexparamcheck/trie_checker_test.go new file mode 100644 index 0000000000..25c6313ea8 --- /dev/null +++ b/pkg/util/indexparamcheck/trie_checker_test.go @@ -0,0 +1,23 @@ +package indexparamcheck + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +) + +func Test_TrieIndexChecker(t *testing.T) { + c := newTRIEChecker() + + assert.NoError(t, c.CheckTrain(map[string]string{})) + + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_VarChar)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_String)) + + assert.Error(t, c.CheckValidDataType(schemapb.DataType_Bool)) + assert.Error(t, c.CheckValidDataType(schemapb.DataType_Int64)) + assert.Error(t, c.CheckValidDataType(schemapb.DataType_Float)) + assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) +} diff --git a/pkg/util/paramtable/autoindex_param.go b/pkg/util/paramtable/autoindex_param.go index 7273325dae..a08e0eb466 100644 --- a/pkg/util/paramtable/autoindex_param.go +++ b/pkg/util/paramtable/autoindex_param.go @@ -38,6 +38,12 @@ type autoIndexConfig struct { AutoIndexTypeName ParamItem `refreshable:"true"` AutoIndexSearchConfig ParamItem `refreshable:"true"` AutoIndexTuningConfig ParamGroup `refreshable:"true"` + + ScalarAutoIndexEnable ParamItem `refreshable:"true"` + ScalarAutoIndexParams ParamItem `refreshable:"true"` + ScalarNumericIndexType ParamItem `refreshable:"true"` + ScalarVarcharIndexType ParamItem `refreshable:"true"` + ScalarBoolIndexType ParamItem `refreshable:"true"` } func (p *autoIndexConfig) init(base *BaseTable) { @@ -107,6 +113,57 @@ func (p *autoIndexConfig) init(base *BaseTable) { p.AutoIndexTuningConfig.Init(base.mgr) p.panicIfNotValidAndSetDefaultMetricType(base.mgr) + + p.ScalarAutoIndexEnable = ParamItem{ + Key: "scalarAutoIndex.enable", + Version: "2.3.4", + DefaultValue: "false", + PanicIfEmpty: true, + } + p.ScalarAutoIndexEnable.Init(base.mgr) + + p.ScalarAutoIndexParams = ParamItem{ + Key: "scalarAutoIndex.params.build", + Version: "2.3.4", + DefaultValue: `{"numeric": "INVERTED","varchar": "INVERTED","bool": "INVERTED"}`, + } + p.ScalarAutoIndexParams.Init(base.mgr) + + p.ScalarNumericIndexType = ParamItem{ + Version: "2.4.0", + Formatter: func(v string) string { + m := p.ScalarAutoIndexParams.GetAsJSONMap() + if m == nil { + return "" + } + return m["numeric"] + }, + } + p.ScalarNumericIndexType.Init(base.mgr) + + p.ScalarVarcharIndexType = ParamItem{ + Version: "2.4.0", + Formatter: func(v string) string { + m := p.ScalarAutoIndexParams.GetAsJSONMap() + if m == nil { + return "" + } + return m["varchar"] + }, + } + p.ScalarVarcharIndexType.Init(base.mgr) + + p.ScalarBoolIndexType = ParamItem{ + Version: "2.4.0", + Formatter: func(v string) string { + m := p.ScalarAutoIndexParams.GetAsJSONMap() + if m == nil { + return "" + } + return m["bool"] + }, + } + p.ScalarBoolIndexType.Init(base.mgr) } func (p *autoIndexConfig) panicIfNotValidAndSetDefaultMetricType(mgr *config.Manager) { diff --git a/pkg/util/paramtable/autoindex_param_test.go b/pkg/util/paramtable/autoindex_param_test.go index 83520ad3ef..50e239952b 100644 --- a/pkg/util/paramtable/autoindex_param_test.go +++ b/pkg/util/paramtable/autoindex_param_test.go @@ -224,3 +224,26 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) { assert.Equal(t, indexparamcheck.BinaryVectorDefaultMetricType, metricType) }) } + +func TestScalarAutoIndexParams_build(t *testing.T) { + var CParams ComponentParam + bt := NewBaseTable(SkipRemote(true)) + CParams.Init(bt) + + t.Run("parse scalar auto index param success", func(t *testing.T) { + var err error + map1 := map[string]any{ + "numeric": "STL_SORT", + "varchar": "TRIE", + "bool": "INVERTED", + } + var jsonStrBytes []byte + jsonStrBytes, err = json.Marshal(map1) + assert.NoError(t, err) + err = bt.Save(CParams.AutoIndexConfig.ScalarAutoIndexParams.Key, string(jsonStrBytes)) + assert.NoError(t, err) + assert.Equal(t, "STL_SORT", CParams.AutoIndexConfig.ScalarNumericIndexType.GetValue()) + assert.Equal(t, "TRIE", CParams.AutoIndexConfig.ScalarVarcharIndexType.GetValue()) + assert.Equal(t, "INVERTED", CParams.AutoIndexConfig.ScalarBoolIndexType.GetValue()) + }) +} diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index f937aa96ab..110f0e27dc 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -113,7 +113,7 @@ class TestIndexParams(TestcaseBase): msg = "invalid index type" self.index_wrap.init_index(collection_w.collection, default_field_name, index_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, ct.err_msg: msg}) + check_items={ct.err_code: 1100, ct.err_msg: msg}) @pytest.mark.tags(CaseLabel.L1) def test_index_type_not_supported(self): @@ -1271,7 +1271,7 @@ class TestNewIndexBinary(TestcaseBase): binary_index_params = {'index_type': 'BIN_IVF_FLAT', 'metric_type': 'L2', 'params': {'nlist': 64}} collection_w.create_index(default_binary_vec_field_name, binary_index_params, index_name=binary_field_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, + check_items={ct.err_code: 1100, ct.err_msg: "metric type L2 not found or not supported, supported: " "[HAMMING JACCARD SUBSTRUCTURE SUPERSTRUCTURE]"}) @@ -1441,7 +1441,7 @@ class TestIndexInvalid(TestcaseBase): index_annoy = {"index_type": "ANNOY", "params": {"n_trees": n_trees}, "metric_type": "L2"} collection_w.create_index("float_vector", index_annoy, check_task=CheckTasks.err_res, - check_items={"err_code": 65535, + check_items={"err_code": 1100, "err_msg": "invalid index type: ANNOY"}) @pytest.mark.tags(CaseLabel.L1) @@ -1453,6 +1453,7 @@ class TestIndexInvalid(TestcaseBase): """ collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, dim=ct.default_dim, is_index=False)[0:4] + # create index on JSON/Array field is not supported collection_w.create_index(ct.default_json_field_name, index_params=ct.default_flat_index, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, @@ -1471,8 +1472,8 @@ class TestIndexInvalid(TestcaseBase): scalar_index_params = {"index_type": scalar_index} collection_w.create_index(ct.default_float_vec_field_name, index_params=scalar_index_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, - ct.err_msg: f"invalid index type: {scalar_index}"}) + check_items={ct.err_code: 1100, + ct.err_msg: f"invalid index params"}) @pytest.mark.tags(CaseLabel.L1) def test_create_scalar_index_on_binary_vector_field(self, scalar_index): @@ -1485,7 +1486,7 @@ class TestIndexInvalid(TestcaseBase): scalar_index_params = {"index_type": scalar_index} collection_w.create_index(ct.default_binary_vec_field_name, index_params=scalar_index_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, + check_items={ct.err_code: 1100, ct.err_msg: f"invalid index type: {scalar_index}"}) @pytest.mark.tags(CaseLabel.L1) @@ -2172,7 +2173,7 @@ class TestIndexDiskann(TestcaseBase): collection_w.insert(data=df) collection_w.create_index(default_binary_vec_field_name, ct.default_diskann_index, index_name=binary_field_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, + check_items={ct.err_code: 1100, ct.err_msg: "float or float16 vector are only supported"}) @pytest.mark.tags(CaseLabel.L2) @@ -2330,7 +2331,7 @@ class TestScaNNIndex(TestcaseBase): """ collection_w = self.init_collection_general(prefix, is_index=False)[0] index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": nlist}} - error = {ct.err_code: 65535, ct.err_msg: "nlist out of range: [1, 65536]"} + error = {ct.err_code: 1100, ct.err_msg: "nlist out of range: [1, 65536]"} collection_w.create_index(default_field_name, index_params, check_task=CheckTasks.err_res, check_items=error) @@ -2344,7 +2345,7 @@ class TestScaNNIndex(TestcaseBase): """ collection_w = self.init_collection_general(prefix, is_index=False, dim=dim)[0] index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": 1024}} - error = {ct.err_code: 65535, + error = {ct.err_code: 1100, ct.err_msg: f"dimension must be able to be divided by 2, dimension: {dim}"} collection_w.create_index(default_field_name, index_params, check_task=CheckTasks.err_res, check_items=error)