mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance:[2.5]minhash support and add autoindex config (#44015)
master pr: https://github.com/milvus-io/milvus/pull/44186 Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
This commit is contained in:
parent
d658b6f50a
commit
c17ce3cf90
@ -32,6 +32,7 @@ const (
|
||||
SUBSTRUCTURE MetricType = "SUBSTRUCTURE"
|
||||
SUPERSTRUCTURE MetricType = "SUPERSTRUCTURE"
|
||||
BM25 MetricType = "BM25"
|
||||
MHJACCARD MetricType = "MHJACCARD"
|
||||
)
|
||||
|
||||
// CompactionState enum type for compaction state
|
||||
|
||||
@ -178,7 +178,8 @@ inline bool
|
||||
PositivelyRelated(const knowhere::MetricType& metric_type) {
|
||||
return IsMetricType(metric_type, knowhere::metric::IP) ||
|
||||
IsMetricType(metric_type, knowhere::metric::COSINE) ||
|
||||
IsMetricType(metric_type, knowhere::metric::BM25);
|
||||
IsMetricType(metric_type, knowhere::metric::BM25) ||
|
||||
IsMetricType(metric_type, knowhere::metric::MHJACCARD);
|
||||
}
|
||||
|
||||
inline std::string
|
||||
|
||||
@ -272,11 +272,22 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
|
||||
indexParamsMap[k] = v
|
||||
}
|
||||
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
|
||||
if metricTypeExist && funcutil.SliceContain(indexparamcheck.DeduplicateMetrics, metricType) {
|
||||
if !Params.AutoIndexConfig.EnableDeduplicateIndex.GetAsBool() {
|
||||
log.Ctx(ctx).Warn("Deduplicate index is not enabled, but metric type is deduplicate.")
|
||||
return merr.WrapErrParameterInvalidMsg("Deduplicate index is not enabled, but metric type is deduplicate.")
|
||||
}
|
||||
// override binary vector index params by autoindex deduplicate params
|
||||
for k, v := range Params.AutoIndexConfig.DeduplicateIndexParams.GetAsJSONMap() {
|
||||
indexParamsMap[k] = v
|
||||
}
|
||||
} else {
|
||||
// override binary vector index params by autoindex
|
||||
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
|
||||
indexParamsMap[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if metricTypeExist {
|
||||
// make the users' metric type first class citizen.
|
||||
@ -292,6 +303,7 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
|
||||
}
|
||||
log.Ctx(ctx).Info("AutoIndex triggered", fields...)
|
||||
}
|
||||
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
|
||||
|
||||
handle := func(numberParams int, autoIndexConfig map[string]string) error {
|
||||
// empty case.
|
||||
@ -303,8 +315,6 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
|
||||
|
||||
if len(indexParamsMap) > numberParams+1 {
|
||||
return errors.New("only metric type can be passed when use AutoIndex")
|
||||
}
|
||||
@ -333,9 +343,17 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error {
|
||||
// override sparse float vector index params by autoindex
|
||||
config = Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()
|
||||
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
|
||||
if metricTypeExist && funcutil.SliceContain(indexparamcheck.DeduplicateMetrics, metricType) {
|
||||
if !Params.AutoIndexConfig.EnableDeduplicateIndex.GetAsBool() {
|
||||
log.Ctx(ctx).Warn("Deduplicate index is not enabled, but metric type is deduplicate.")
|
||||
return merr.WrapErrParameterInvalidMsg("Deduplicate index is not enabled, but metric type is deduplicate.")
|
||||
}
|
||||
config = Params.AutoIndexConfig.DeduplicateIndexParams.GetAsJSONMap()
|
||||
} else {
|
||||
// override binary vector index params by autoindex
|
||||
config = Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
|
||||
}
|
||||
}
|
||||
if !exist {
|
||||
if err := handle(0, config); err != nil {
|
||||
return err
|
||||
|
||||
@ -710,7 +710,7 @@ func validateMetricType(dataType schemapb.DataType, metricTypeStrRaw string) err
|
||||
if typeutil.IsFloatVectorType(dataType) {
|
||||
return nil
|
||||
}
|
||||
case metric.JACCARD, metric.HAMMING, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE:
|
||||
case metric.JACCARD, metric.HAMMING, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE, metric.MHJACCARD:
|
||||
if dataType == schemapb.DataType_BinaryVector {
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -53,7 +53,7 @@ const (
|
||||
|
||||
var (
|
||||
FloatVectorMetrics = []string{metric.L2, metric.IP, metric.COSINE} // const
|
||||
BinaryVectorMetrics = []string{metric.HAMMING, metric.JACCARD, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE} // const
|
||||
BinaryVectorMetrics = []string{metric.HAMMING, metric.JACCARD, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE, metric.MHJACCARD} // const
|
||||
)
|
||||
|
||||
// BinIDMapMetrics is a set of all metric types supported for binary vector.
|
||||
@ -66,6 +66,7 @@ var (
|
||||
supportDimPerSubQuantizer = []int{32, 28, 24, 20, 16, 12, 10, 8, 6, 4, 3, 2, 1} // const
|
||||
supportSubQuantizer = []int{96, 64, 56, 48, 40, 32, 28, 24, 20, 16, 12, 8, 4, 3, 2, 1} // const
|
||||
SparseMetrics = []string{metric.IP, metric.BM25} // const
|
||||
DeduplicateMetrics = []string{metric.MHJACCARD} // const
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
@ -93,6 +93,7 @@ func CheckAutoIndexConfig() {
|
||||
autoIndexCfg := ¶mtable.Get().AutoIndexConfig
|
||||
CheckAutoIndexHelper(autoIndexCfg.IndexParams.Key, autoIndexCfg.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector)
|
||||
CheckAutoIndexHelper(autoIndexCfg.BinaryIndexParams.Key, autoIndexCfg.BinaryIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector)
|
||||
CheckAutoIndexHelper(autoIndexCfg.BinaryIndexParams.Key, autoIndexCfg.DeduplicateIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector)
|
||||
CheckAutoIndexHelper(autoIndexCfg.SparseIndexParams.Key, autoIndexCfg.SparseIndexParams.GetAsJSONMap(), schemapb.DataType_SparseFloatVector)
|
||||
}
|
||||
|
||||
|
||||
@ -31,6 +31,9 @@ const (
|
||||
// JACCARD represents jaccard distance
|
||||
JACCARD MetricType = "JACCARD"
|
||||
|
||||
// MHJACCARD represents jaccard distance of minhash vector
|
||||
MHJACCARD MetricType = "MHJACCARD"
|
||||
|
||||
// SUBSTRUCTURE represents substructure distance
|
||||
SUBSTRUCTURE MetricType = "SUBSTRUCTURE"
|
||||
|
||||
|
||||
@ -21,5 +21,5 @@ import "strings"
|
||||
// PositivelyRelated return if metricType are "ip" or "IP"
|
||||
func PositivelyRelated(metricType string) bool {
|
||||
mUpper := strings.ToUpper(metricType)
|
||||
return mUpper == strings.ToUpper(IP) || mUpper == strings.ToUpper(COSINE) || mUpper == strings.ToUpper(BM25)
|
||||
return mUpper == strings.ToUpper(IP) || mUpper == strings.ToUpper(COSINE) || mUpper == strings.ToUpper(BM25) || mUpper == strings.ToUpper(MHJACCARD)
|
||||
}
|
||||
|
||||
@ -38,6 +38,8 @@ type AutoIndexConfig struct {
|
||||
IndexParams ParamItem `refreshable:"true"`
|
||||
SparseIndexParams ParamItem `refreshable:"true"`
|
||||
BinaryIndexParams ParamItem `refreshable:"true"`
|
||||
DeduplicateIndexParams ParamItem `refreshable:"true"`
|
||||
EnableDeduplicateIndex ParamItem `refreshable:"true"`
|
||||
PrepareParams ParamItem `refreshable:"true"`
|
||||
LoadAdaptParams ParamItem `refreshable:"true"`
|
||||
ExtraParams ParamItem `refreshable:"true"`
|
||||
@ -108,6 +110,23 @@ func (p *AutoIndexConfig) init(base *BaseTable) {
|
||||
}
|
||||
p.BinaryIndexParams.Init(base.mgr)
|
||||
|
||||
p.DeduplicateIndexParams = ParamItem{
|
||||
Key: "autoIndex.params.deduplicate.build",
|
||||
Version: "2.5.18",
|
||||
DefaultValue: `{"index_type": "MINHASH_LSH", "metric_type": "MHJACCARD"}`,
|
||||
Formatter: GetBuildParamFormatter(BinaryVectorDefaultMetricType, "autoIndex.params.deduplicate.build"),
|
||||
Export: true,
|
||||
}
|
||||
p.DeduplicateIndexParams.Init(base.mgr)
|
||||
|
||||
p.EnableDeduplicateIndex = ParamItem{
|
||||
Key: "autoIndex.params.deduplicate.enable",
|
||||
Version: "2.5.18",
|
||||
DefaultValue: "false",
|
||||
PanicIfEmpty: false,
|
||||
}
|
||||
p.EnableDeduplicateIndex.Init(base.mgr)
|
||||
|
||||
p.PrepareParams = ParamItem{
|
||||
Key: "autoIndex.params.prepare",
|
||||
Version: "2.3.2",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user