From 58a71115994c1df3c6f9af01acf7644f62ea4f69 Mon Sep 17 00:00:00 2001 From: foxspy Date: Thu, 6 Jun 2024 10:33:52 +0800 Subject: [PATCH] enhance: [cherry-pick] add autoindex mapping for binary/sparse datatype (#33625) issue: #22837 pr: #33624 Signed-off-by: xianliang.li --- .../core/thirdparty/knowhere/CMakeLists.txt | 2 +- internal/proxy/task_index.go | 18 +++- internal/proxy/task_index_test.go | 89 +++++++++++++++++++ pkg/util/indexparamcheck/hnsw_checker.go | 4 +- pkg/util/indexparamcheck/hnsw_checker_test.go | 2 +- pkg/util/indexparams/index_params.go | 4 + pkg/util/indexparams/index_params_test.go | 9 +- pkg/util/paramtable/autoindex_param.go | 25 ++++++ pkg/util/paramtable/autoindex_param_test.go | 50 +++++++++++ 9 files changed, 195 insertions(+), 8 deletions(-) diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index 3c5a4b5a9e..3f8cf52615 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -12,7 +12,7 @@ #------------------------------------------------------------------------------- # Update KNOWHERE_VERSION for the first occurrence -set( KNOWHERE_VERSION v2.3.4 ) +set( KNOWHERE_VERSION v2.3.5 ) set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere version: ${KNOWHERE_VERSION}") diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 5925391c20..b8e0f88af3 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -177,9 +177,21 @@ func (cit *createIndexTask) parseIndexParams() error { metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey] - // override params by autoindex - for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() { - indexParamsMap[k] = v + if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) { + // override float vector index params by autoindex + for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() { + indexParamsMap[k] = v + } + } else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) { + // override sparse float vector index params by autoindex + for k, v := range Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap() { + indexParamsMap[k] = v + } + } else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) { + // override binary vector index params by autoindex + for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() { + indexParamsMap[k] = v + } } if metricTypeExist { diff --git a/internal/proxy/task_index_test.go b/internal/proxy/task_index_test.go index 20840c2848..c5f315ec24 100644 --- a/internal/proxy/task_index_test.go +++ b/internal/proxy/task_index_test.go @@ -963,6 +963,95 @@ func Test_wrapUserIndexParams(t *testing.T) { assert.Equal(t, "L2", params[1].Value) } +func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) { + paramtable.Init() + mgr := config.NewManager() + mgr.SetConfig("autoIndex.enable", "true") + Params.AutoIndexConfig.Enable.Init(mgr) + + mgr.SetConfig("autoIndex.params.build", `{"M": 30,"efConstruction": 360,"index_type": "HNSW"}`) + mgr.SetConfig("autoIndex.params.sparsebuild", `{"drop_ratio_build": 0.2, "index_type": "SPARSE_INVERTED_INDEX"}`) + mgr.SetConfig("autoIndex.params.binarybuild", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT"}`) + Params.AutoIndexConfig.IndexParams.Init(mgr) + Params.AutoIndexConfig.SparseIndexParams.Init(mgr) + Params.AutoIndexConfig.BinaryIndexParams.Init(mgr) + + floatFieldSchema := &schemapb.FieldSchema{ + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + {Key: common.DimKey, Value: "128"}, + }, + } + sparseFloatFieldSchema := &schemapb.FieldSchema{ + DataType: schemapb.DataType_SparseFloatVector, + TypeParams: []*commonpb.KeyValuePair{ + {Key: common.DimKey, Value: "64"}, + }, + } + binaryFieldSchema := &schemapb.FieldSchema{ + DataType: schemapb.DataType_BinaryVector, + TypeParams: []*commonpb.KeyValuePair{ + {Key: common.DimKey, Value: "4096"}, + }, + } + + t.Run("case 1, float vector parameters", func(t *testing.T) { + task := &createIndexTask{ + fieldSchema: floatFieldSchema, + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{ + {Key: common.MetricTypeKey, Value: "L2"}, + }, + }, + } + err := task.parseIndexParams() + assert.NoError(t, err) + assert.ElementsMatch(t, []*commonpb.KeyValuePair{ + {Key: common.IndexTypeKey, Value: "HNSW"}, + {Key: common.MetricTypeKey, Value: "L2"}, + {Key: "M", Value: "30"}, + {Key: "efConstruction", Value: "360"}, + }, task.newIndexParams) + }) + + t.Run("case 2, sparse vector parameters", func(t *testing.T) { + Params.AutoIndexConfig.IndexParams.Init(mgr) + task := &createIndexTask{ + fieldSchema: sparseFloatFieldSchema, + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{ + {Key: common.MetricTypeKey, Value: "IP"}, + }, + }, + } + err := task.parseIndexParams() + assert.NoError(t, err) + assert.ElementsMatch(t, []*commonpb.KeyValuePair{ + {Key: common.IndexTypeKey, Value: "SPARSE_INVERTED_INDEX"}, + {Key: common.MetricTypeKey, Value: "IP"}, + {Key: "drop_ratio_build", Value: "0.2"}, + }, task.newIndexParams) + }) + + t.Run("case 3, binary vector parameters", func(t *testing.T) { + task := &createIndexTask{ + fieldSchema: binaryFieldSchema, + req: &milvuspb.CreateIndexRequest{ + ExtraParams: []*commonpb.KeyValuePair{ + {Key: common.MetricTypeKey, Value: "JACCARD"}, + }, + }, + } + err := task.parseIndexParams() + assert.NoError(t, err) + assert.ElementsMatch(t, []*commonpb.KeyValuePair{ + {Key: common.IndexTypeKey, Value: "BIN_IVF_FLAT"}, + {Key: common.MetricTypeKey, Value: "JACCARD"}, + {Key: "nlist", Value: "1024"}, + }, task.newIndexParams) + }) +} + func Test_parseIndexParams_AutoIndex(t *testing.T) { paramtable.Init() mgr := config.NewManager() diff --git a/pkg/util/indexparamcheck/hnsw_checker.go b/pkg/util/indexparamcheck/hnsw_checker.go index 56146e2468..c407bbbfbb 100644 --- a/pkg/util/indexparamcheck/hnsw_checker.go +++ b/pkg/util/indexparamcheck/hnsw_checker.go @@ -33,8 +33,8 @@ func (c hnswChecker) CheckTrain(params map[string]string) error { func (c hnswChecker) CheckValidDataType(dType schemapb.DataType) error { // TODO(SPARSE) we'll add sparse vector support in HNSW later in cardinal - if !typeutil.IsDenseFloatVectorType(dType) { - return fmt.Errorf("HNSW only support float vector data type") + if !(typeutil.IsDenseFloatVectorType(dType) || typeutil.IsBinaryVectorType(dType)) { + return fmt.Errorf("HNSW only support float vector or bin data type") } return nil } diff --git a/pkg/util/indexparamcheck/hnsw_checker_test.go b/pkg/util/indexparamcheck/hnsw_checker_test.go index e2e374e0c9..bcb7c482a1 100644 --- a/pkg/util/indexparamcheck/hnsw_checker_test.go +++ b/pkg/util/indexparamcheck/hnsw_checker_test.go @@ -158,7 +158,7 @@ func Test_hnswChecker_CheckValidDataType(t *testing.T) { }, { dType: schemapb.DataType_BinaryVector, - errIsNil: false, + errIsNil: true, }, } diff --git a/pkg/util/indexparams/index_params.go b/pkg/util/indexparams/index_params.go index 4e2281e0b5..d3d2433591 100644 --- a/pkg/util/indexparams/index_params.go +++ b/pkg/util/indexparams/index_params.go @@ -362,6 +362,10 @@ func AppendPrepareLoadParams(params *paramtable.ComponentParam, indexParams map[ for k, v := range params.AutoIndexConfig.PrepareParams.GetAsJSONMap() { indexParams[k] = v } + + for k, v := range params.AutoIndexConfig.LoadAdaptParams.GetAsJSONMap() { + indexParams[k] = v + } } return nil } diff --git a/pkg/util/indexparams/index_params_test.go b/pkg/util/indexparams/index_params_test.go index 9b9030ff30..2051833b30 100644 --- a/pkg/util/indexparams/index_params_test.go +++ b/pkg/util/indexparams/index_params_test.go @@ -583,7 +583,7 @@ func TestBigDataIndex_parse(t *testing.T) { } func TestAppendPrepareInfo_parse(t *testing.T) { - t.Run("parse prepare info", func(t *testing.T) { + t.Run("parse load info", func(t *testing.T) { var params paramtable.ComponentParam params.Init(paramtable.NewBaseTable(paramtable.SkipRemote(true))) params.Save(params.AutoIndexConfig.Enable.Key, "true") @@ -593,9 +593,16 @@ func TestAppendPrepareInfo_parse(t *testing.T) { assert.NoError(t, err) params.Save(params.AutoIndexConfig.PrepareParams.Key, string(str)) + mapString2 := make(map[string]string) + mapString2["key2"] = "value2" + str2, err2 := json.Marshal(mapString2) + assert.NoError(t, err2) + params.Save(params.AutoIndexConfig.LoadAdaptParams.Key, string(str2)) + resultMapString := make(map[string]string) err = AppendPrepareLoadParams(¶ms, resultMapString) assert.NoError(t, err) assert.Equal(t, resultMapString["key1"], "value1") + assert.Equal(t, resultMapString["key2"], "value2") }) } diff --git a/pkg/util/paramtable/autoindex_param.go b/pkg/util/paramtable/autoindex_param.go index ff840fcc4e..accd710d75 100644 --- a/pkg/util/paramtable/autoindex_param.go +++ b/pkg/util/paramtable/autoindex_param.go @@ -32,7 +32,10 @@ type autoIndexConfig struct { EnableOptimize ParamItem `refreshable:"true"` IndexParams ParamItem `refreshable:"true"` + SparseIndexParams ParamItem `refreshable:"true"` + BinaryIndexParams ParamItem `refreshable:"true"` PrepareParams ParamItem `refreshable:"true"` + LoadAdaptParams ParamItem `refreshable:"true"` ExtraParams ParamItem `refreshable:"true"` IndexType ParamItem `refreshable:"true"` AutoIndexTypeName ParamItem `refreshable:"true"` @@ -71,12 +74,34 @@ func (p *autoIndexConfig) init(base *BaseTable) { } p.IndexParams.Init(base.mgr) + p.SparseIndexParams = ParamItem{ + Key: "autoIndex.params.sparse.build", + Version: "2.4.5", + DefaultValue: `{"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}`, + Export: true, + } + p.SparseIndexParams.Init(base.mgr) + + p.BinaryIndexParams = ParamItem{ + Key: "autoIndex.params.binary.build", + Version: "2.4.5", + DefaultValue: `{"nlist": 1024, "index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD"}`, + Export: true, + } + p.BinaryIndexParams.Init(base.mgr) + p.PrepareParams = ParamItem{ Key: "autoIndex.params.prepare", Version: "2.3.2", } p.PrepareParams.Init(base.mgr) + p.LoadAdaptParams = ParamItem{ + Key: "autoIndex.params.load", + Version: "2.4.5", + } + p.LoadAdaptParams.Init(base.mgr) + p.ExtraParams = ParamItem{ Key: "autoIndex.params.extra", Version: "2.2.0", diff --git a/pkg/util/paramtable/autoindex_param_test.go b/pkg/util/paramtable/autoindex_param_test.go index 50e239952b..4670a62ae8 100644 --- a/pkg/util/paramtable/autoindex_param_test.go +++ b/pkg/util/paramtable/autoindex_param_test.go @@ -66,6 +66,56 @@ func TestAutoIndexParams_build(t *testing.T) { assert.Equal(t, strconv.Itoa(map2["nlist"].(int)), CParams.AutoIndexConfig.IndexParams.GetAsJSONMap()["nlist"]) }) + t.Run("test parseSparseBuildParams success", func(t *testing.T) { + // Params := CParams.AutoIndexConfig + // buildParams := make([string]interface) + var err error + map1 := map[string]any{ + IndexTypeKey: "SPARSE_INVERTED_INDEX", + "drop_ratio_build": 0.1, + } + var jsonStrBytes []byte + jsonStrBytes, err = json.Marshal(map1) + assert.NoError(t, err) + bt.Save(CParams.AutoIndexConfig.SparseIndexParams.Key, string(jsonStrBytes)) + assert.Equal(t, "SPARSE_INVERTED_INDEX", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()[IndexTypeKey]) + assert.Equal(t, "0.1", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()["drop_ratio_build"]) + + map2 := map[string]interface{}{ + IndexTypeKey: "SPARSE_WAND", + "drop_ratio_build": 0.2, + } + jsonStrBytes, err = json.Marshal(map2) + assert.NoError(t, err) + bt.Save(CParams.AutoIndexConfig.SparseIndexParams.Key, string(jsonStrBytes)) + assert.Equal(t, "SPARSE_WAND", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()[IndexTypeKey]) + assert.Equal(t, "0.2", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()["drop_ratio_build"]) + }) + + t.Run("test parseBinaryParams success", func(t *testing.T) { + // Params := CParams.AutoIndexConfig + // buildParams := make([string]interface) + var err error + map1 := map[string]any{ + IndexTypeKey: "BIN_IVF_FLAT", + "nlist": 768, + } + var jsonStrBytes []byte + jsonStrBytes, err = json.Marshal(map1) + assert.NoError(t, err) + bt.Save(CParams.AutoIndexConfig.BinaryIndexParams.Key, string(jsonStrBytes)) + assert.Equal(t, "BIN_IVF_FLAT", CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()[IndexTypeKey]) + assert.Equal(t, strconv.Itoa(map1["nlist"].(int)), CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()["nlist"]) + + map2 := map[string]interface{}{ + IndexTypeKey: "BIN_FLAT", + } + jsonStrBytes, err = json.Marshal(map2) + assert.NoError(t, err) + bt.Save(CParams.AutoIndexConfig.BinaryIndexParams.Key, string(jsonStrBytes)) + assert.Equal(t, "BIN_FLAT", CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()[IndexTypeKey]) + }) + t.Run("test parsePrepareParams success", func(t *testing.T) { var err error map1 := map[string]any{