enhance: add autoindex config for deduplication case (#44186)

Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
This commit is contained in:
cqy123456 2025-09-03 17:19:53 +08:00 committed by GitHub
parent 03c46e686f
commit d50b365375
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 57 additions and 19 deletions

View File

@ -270,9 +270,20 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
indexParamsMap[k] = v
}
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
// override binary vector index params by autoindex
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
if metricTypeExist && funcutil.SliceContain(indexparamcheck.DeduplicateMetrics, metricType) {
if !Params.AutoIndexConfig.EnableDeduplicateIndex.GetAsBool() {
log.Ctx(ctx).Warn("Deduplicate index is not enabled, but metric type is deduplicate.")
return merr.WrapErrParameterInvalidMsg("Deduplicate index is not enabled, but metric type is deduplicate.")
}
// override binary vector index params by autoindex deduplicate params
for k, v := range Params.AutoIndexConfig.DeduplicateIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
} else {
// override binary vector index params by autoindex
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
}
} else if typeutil.IsIntVectorType(cit.fieldSchema.DataType) {
// override int vector index params by autoindex
@ -295,6 +306,7 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
}
log.Ctx(ctx).Info("AutoIndex triggered", fields...)
}
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
handle := func(numberParams int, autoIndexConfig map[string]string) error {
// empty case.
@ -306,8 +318,6 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
return nil
}
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
if len(indexParamsMap) > numberParams+1 {
return errors.New("only metric type can be passed when use AutoIndex")
}
@ -337,10 +347,17 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
(typeutil.IsArrayOfVectorType(cit.fieldSchema.DataType) && typeutil.IsSparseFloatVectorType(cit.fieldSchema.ElementType)) {
// override sparse float vector index params by autoindex
config = Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) ||
(typeutil.IsArrayOfVectorType(cit.fieldSchema.DataType) && typeutil.IsBinaryVectorType(cit.fieldSchema.ElementType)) {
// override binary vector index params by autoindex
config = Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
if metricTypeExist && funcutil.SliceContain(indexparamcheck.DeduplicateMetrics, metricType) {
if !Params.AutoIndexConfig.EnableDeduplicateIndex.GetAsBool() {
log.Ctx(ctx).Warn("Deduplicate index is not enabled, but metric type is deduplicate.")
return merr.WrapErrParameterInvalidMsg("Deduplicate index is not enabled, but metric type is deduplicate.")
}
config = Params.AutoIndexConfig.DeduplicateIndexParams.GetAsJSONMap()
} else {
// override binary vector index params by autoindex
config = Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
}
} else if typeutil.IsIntVectorType(cit.fieldSchema.DataType) ||
(typeutil.IsArrayOfVectorType(cit.fieldSchema.DataType) && typeutil.IsIntVectorType(cit.fieldSchema.ElementType)) {
// override int vector index params by autoindex

View File

@ -69,6 +69,7 @@ var (
supportDimPerSubQuantizer = []int{32, 28, 24, 20, 16, 12, 10, 8, 6, 4, 3, 2, 1} // const
supportSubQuantizer = []int{96, 64, 56, 48, 40, 32, 28, 24, 20, 16, 12, 8, 4, 3, 2, 1} // const
SparseMetrics = []string{metric.IP, metric.BM25} // const
DeduplicateMetrics = []string{metric.MHJACCARD} // const
)
const (

View File

@ -93,6 +93,7 @@ func CheckAutoIndexConfig() {
autoIndexCfg := &paramtable.Get().AutoIndexConfig
CheckAutoIndexHelper(autoIndexCfg.IndexParams.Key, autoIndexCfg.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector)
CheckAutoIndexHelper(autoIndexCfg.BinaryIndexParams.Key, autoIndexCfg.BinaryIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector)
CheckAutoIndexHelper(autoIndexCfg.BinaryIndexParams.Key, autoIndexCfg.DeduplicateIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector)
CheckAutoIndexHelper(autoIndexCfg.SparseIndexParams.Key, autoIndexCfg.SparseIndexParams.GetAsJSONMap(), schemapb.DataType_SparseFloatVector)
}

View File

@ -35,16 +35,18 @@ type AutoIndexConfig struct {
EnableOptimize ParamItem `refreshable:"true"`
EnableResultLimitCheck ParamItem `refreshable:"true"`
IndexParams ParamItem `refreshable:"true"`
SparseIndexParams ParamItem `refreshable:"true"`
BinaryIndexParams ParamItem `refreshable:"true"`
PrepareParams ParamItem `refreshable:"true"`
LoadAdaptParams ParamItem `refreshable:"true"`
ExtraParams ParamItem `refreshable:"true"`
IndexType ParamItem `refreshable:"true"`
AutoIndexTypeName ParamItem `refreshable:"true"`
AutoIndexSearchConfig ParamItem `refreshable:"true"`
AutoIndexTuningConfig ParamGroup `refreshable:"true"`
IndexParams ParamItem `refreshable:"true"`
SparseIndexParams ParamItem `refreshable:"true"`
BinaryIndexParams ParamItem `refreshable:"true"`
DeduplicateIndexParams ParamItem `refreshable:"true"`
EnableDeduplicateIndex ParamItem `refreshable:"true"`
PrepareParams ParamItem `refreshable:"true"`
LoadAdaptParams ParamItem `refreshable:"true"`
ExtraParams ParamItem `refreshable:"true"`
IndexType ParamItem `refreshable:"true"`
AutoIndexTypeName ParamItem `refreshable:"true"`
AutoIndexSearchConfig ParamItem `refreshable:"true"`
AutoIndexTuningConfig ParamGroup `refreshable:"true"`
ScalarAutoIndexEnable ParamItem `refreshable:"true"`
ScalarAutoIndexParams ParamItem `refreshable:"true"`
@ -108,6 +110,23 @@ func (p *AutoIndexConfig) init(base *BaseTable) {
}
p.BinaryIndexParams.Init(base.mgr)
p.DeduplicateIndexParams = ParamItem{
Key: "autoIndex.params.deduplicate.build",
Version: "2.5.18",
DefaultValue: `{"index_type": "MINHASH_LSH", "metric_type": "MHJACCARD"}`,
Formatter: GetBuildParamFormatter(BinaryVectorDefaultMetricType, "autoIndex.params.deduplicate.build"),
Export: true,
}
p.DeduplicateIndexParams.Init(base.mgr)
p.EnableDeduplicateIndex = ParamItem{
Key: "autoIndex.params.deduplicate.enable",
Version: "2.5.18",
DefaultValue: "false",
PanicIfEmpty: false,
}
p.EnableDeduplicateIndex.Init(base.mgr)
p.PrepareParams = ParamItem{
Key: "autoIndex.params.prepare",
Version: "2.3.2",