enhance: change autoindex default metric type (#34261)

issue: #34304 
cosine is more widely used in float vectors, and cosine and hamming
distance are 'metrics' which have good geometric properties

Signed-off-by: chasingegg <chao.gao@zilliz.com>
This commit is contained in:
Gao 2024-07-08 19:52:24 +08:00 committed by GitHub
parent 686a212d8b
commit ae6d6f91e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 98 additions and 25 deletions

View File

@ -187,11 +187,15 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool
return false return false
} }
useAutoIndex := false
userIndexParamsWithoutMmapKey := make([]*commonpb.KeyValuePair, 0) userIndexParamsWithoutMmapKey := make([]*commonpb.KeyValuePair, 0)
for _, param := range fieldIndex.UserIndexParams { for _, param := range fieldIndex.UserIndexParams {
if param.Key == common.MmapEnabledKey { if param.Key == common.MmapEnabledKey {
continue continue
} }
if param.Key == common.IndexTypeKey && param.Value == common.AutoIndexName {
useAutoIndex = true
}
userIndexParamsWithoutMmapKey = append(userIndexParamsWithoutMmapKey, param) userIndexParamsWithoutMmapKey = append(userIndexParamsWithoutMmapKey, param)
} }
@ -200,9 +204,24 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool
} }
for _, param1 := range userIndexParamsWithoutMmapKey { for _, param1 := range userIndexParamsWithoutMmapKey {
exist := false exist := false
for _, param2 := range req.GetUserIndexParams() { for i, param2 := range req.GetUserIndexParams() {
if param2.Key == param1.Key && param2.Value == param1.Value { if param2.Key == param1.Key && param2.Value == param1.Value {
exist = true exist = true
} else if param1.Key == common.MetricTypeKey && param2.Key == param1.Key && useAutoIndex && !req.GetUserAutoindexMetricTypeSpecified() {
// when users use autoindex, metric type is the only thing they can specify
// if they do not specify metric type, will use autoindex default metric type
// when autoindex default config upgraded, remain the old metric type at the very first time for compatibility
// warn! replace request metric type
log.Warn("user not specify autoindex metric type, autoindex config has changed, use old metric for compatibility",
zap.String("old metric type", param1.Value), zap.String("new metric type", param2.Value))
req.GetUserIndexParams()[i].Value = param1.Value
for j, param := range req.GetIndexParams() {
if param.Key == common.MetricTypeKey {
req.GetIndexParams()[j].Value = param1.Value
break
}
}
exist = true
} }
} }
if !exist { if !exist {

View File

@ -95,6 +95,20 @@ func TestMeta_CanCreateIndex(t *testing.T) {
Key: common.IndexTypeKey, Key: common.IndexTypeKey,
Value: "FLAT", Value: "FLAT",
}, },
{
Key: common.MetricTypeKey,
Value: "L2",
},
}
userIndexParams = []*commonpb.KeyValuePair{
{
Key: common.IndexTypeKey,
Value: common.AutoIndexName,
},
{
Key: common.MetricTypeKey,
Value: "L2",
},
} }
) )
@ -114,7 +128,7 @@ func TestMeta_CanCreateIndex(t *testing.T) {
IndexParams: indexParams, IndexParams: indexParams,
Timestamp: 0, Timestamp: 0,
IsAutoIndex: false, IsAutoIndex: false,
UserIndexParams: indexParams, UserIndexParams: userIndexParams,
} }
t.Run("can create index", func(t *testing.T) { t.Run("can create index", func(t *testing.T) {
@ -132,7 +146,7 @@ func TestMeta_CanCreateIndex(t *testing.T) {
TypeParams: typeParams, TypeParams: typeParams,
IndexParams: indexParams, IndexParams: indexParams,
IsAutoIndex: false, IsAutoIndex: false,
UserIndexParams: indexParams, UserIndexParams: userIndexParams,
} }
err = m.CreateIndex(index) err = m.CreateIndex(index)
@ -166,6 +180,32 @@ func TestMeta_CanCreateIndex(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
assert.Equal(t, int64(0), tmpIndexID) assert.Equal(t, int64(0), tmpIndexID)
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
req.UserIndexParams = req.IndexParams
tmpIndexID, err = m.CanCreateIndex(req)
assert.Error(t, err)
assert.Equal(t, int64(0), tmpIndexID)
// when we use autoindex, it is possible autoindex changes default metric type
// if user does not specify metric type, we should follow the very first autoindex config
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
req.UserAutoindexMetricTypeSpecified = false
tmpIndexID, err = m.CanCreateIndex(req)
assert.NoError(t, err)
assert.Equal(t, indexID, tmpIndexID)
// req should follow the meta
assert.Equal(t, "L2", req.GetUserIndexParams()[1].Value)
assert.Equal(t, "L2", req.GetIndexParams()[1].Value)
// if autoindex specify metric type, so the index param change is from user, return error
req.IndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
req.UserAutoindexMetricTypeSpecified = true
tmpIndexID, err = m.CanCreateIndex(req)
assert.Error(t, err)
assert.Equal(t, int64(0), tmpIndexID)
req.IndexParams = indexParams req.IndexParams = indexParams
req.UserIndexParams = indexParams req.UserIndexParams = indexParams
req.FieldID++ req.FieldID++

View File

@ -163,6 +163,7 @@ message CreateIndexRequest {
uint64 timestamp = 6; uint64 timestamp = 6;
bool is_auto_index = 7; bool is_auto_index = 7;
repeated common.KeyValuePair user_index_params = 8; repeated common.KeyValuePair user_index_params = 8;
bool user_autoindex_metric_type_specified = 9;
} }
message AlterIndexRequest { message AlterIndexRequest {

View File

@ -49,7 +49,7 @@ const (
GetIndexStateTaskName = "GetIndexStateTask" GetIndexStateTaskName = "GetIndexStateTask"
GetIndexBuildProgressTaskName = "GetIndexBuildProgressTask" GetIndexBuildProgressTaskName = "GetIndexBuildProgressTask"
AutoIndexName = "AUTOINDEX" AutoIndexName = common.AutoIndexName
DimKey = common.DimKey DimKey = common.DimKey
IsSparseKey = common.IsSparseKey IsSparseKey = common.IsSparseKey
) )
@ -70,8 +70,9 @@ type createIndexTask struct {
newTypeParams []*commonpb.KeyValuePair newTypeParams []*commonpb.KeyValuePair
newExtraParams []*commonpb.KeyValuePair newExtraParams []*commonpb.KeyValuePair
collectionID UniqueID collectionID UniqueID
fieldSchema *schemapb.FieldSchema fieldSchema *schemapb.FieldSchema
userAutoIndexMetricTypeSpecified bool
} }
func (cit *createIndexTask) TraceCtx() context.Context { func (cit *createIndexTask) TraceCtx() context.Context {
@ -198,6 +199,7 @@ func (cit *createIndexTask) parseIndexParams() error {
if metricTypeExist { if metricTypeExist {
// make the users' metric type first class citizen. // make the users' metric type first class citizen.
indexParamsMap[common.MetricTypeKey] = metricType indexParamsMap[common.MetricTypeKey] = metricType
cit.userAutoIndexMetricTypeSpecified = true
} }
} else { // behavior change after 2.2.9, adapt autoindex logic here. } else { // behavior change after 2.2.9, adapt autoindex logic here.
useAutoIndex := func(autoIndexConfig map[string]string) { useAutoIndex := func(autoIndexConfig map[string]string) {
@ -235,6 +237,7 @@ func (cit *createIndexTask) parseIndexParams() error {
useAutoIndex(autoIndexConfig) useAutoIndex(autoIndexConfig)
// make the users' metric type first class citizen. // make the users' metric type first class citizen.
indexParamsMap[common.MetricTypeKey] = metricType indexParamsMap[common.MetricTypeKey] = metricType
cit.userAutoIndexMetricTypeSpecified = true
} }
return nil return nil
@ -451,14 +454,15 @@ func (cit *createIndexTask) Execute(ctx context.Context) error {
var err error var err error
req := &indexpb.CreateIndexRequest{ req := &indexpb.CreateIndexRequest{
CollectionID: cit.collectionID, CollectionID: cit.collectionID,
FieldID: cit.fieldSchema.GetFieldID(), FieldID: cit.fieldSchema.GetFieldID(),
IndexName: cit.req.GetIndexName(), IndexName: cit.req.GetIndexName(),
TypeParams: cit.newTypeParams, TypeParams: cit.newTypeParams,
IndexParams: cit.newIndexParams, IndexParams: cit.newIndexParams,
IsAutoIndex: cit.isAutoIndex, IsAutoIndex: cit.isAutoIndex,
UserIndexParams: cit.newExtraParams, UserIndexParams: cit.newExtraParams,
Timestamp: cit.BeginTs(), Timestamp: cit.BeginTs(),
UserAutoindexMetricTypeSpecified: cit.userAutoIndexMetricTypeSpecified,
} }
cit.result, err = cit.datacoord.CreateIndex(ctx, req) cit.result, err = cit.datacoord.CreateIndex(ctx, req)
if err != nil { if err != nil {

View File

@ -1006,6 +1006,7 @@ func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) {
} }
err := task.parseIndexParams() err := task.parseIndexParams()
assert.NoError(t, err) assert.NoError(t, err)
assert.True(t, task.userAutoIndexMetricTypeSpecified)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{ assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "HNSW"}, {Key: common.IndexTypeKey, Value: "HNSW"},
{Key: common.MetricTypeKey, Value: "L2"}, {Key: common.MetricTypeKey, Value: "L2"},
@ -1026,6 +1027,7 @@ func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) {
} }
err := task.parseIndexParams() err := task.parseIndexParams()
assert.NoError(t, err) assert.NoError(t, err)
assert.True(t, task.userAutoIndexMetricTypeSpecified)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{ assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "SPARSE_INVERTED_INDEX"}, {Key: common.IndexTypeKey, Value: "SPARSE_INVERTED_INDEX"},
{Key: common.MetricTypeKey, Value: "IP"}, {Key: common.MetricTypeKey, Value: "IP"},
@ -1044,6 +1046,7 @@ func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) {
} }
err := task.parseIndexParams() err := task.parseIndexParams()
assert.NoError(t, err) assert.NoError(t, err)
assert.True(t, task.userAutoIndexMetricTypeSpecified)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{ assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "BIN_IVF_FLAT"}, {Key: common.IndexTypeKey, Value: "BIN_IVF_FLAT"},
{Key: common.MetricTypeKey, Value: "JACCARD"}, {Key: common.MetricTypeKey, Value: "JACCARD"},
@ -1093,6 +1096,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) {
} }
err := task.parseIndexParams() err := task.parseIndexParams()
assert.NoError(t, err) assert.NoError(t, err)
assert.False(t, task.userAutoIndexMetricTypeSpecified)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{ assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: autoIndexConfigBinary[common.MetricTypeKey]}, {Key: common.MetricTypeKey, Value: autoIndexConfigBinary[common.MetricTypeKey]},
@ -1108,6 +1112,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) {
} }
err := task.parseIndexParams() err := task.parseIndexParams()
assert.NoError(t, err) assert.NoError(t, err)
assert.False(t, task.userAutoIndexMetricTypeSpecified)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{ assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: autoIndexConfigSparse[common.MetricTypeKey]}, {Key: common.MetricTypeKey, Value: autoIndexConfigSparse[common.MetricTypeKey]},
@ -1123,6 +1128,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) {
} }
err := task.parseIndexParams() err := task.parseIndexParams()
assert.NoError(t, err) assert.NoError(t, err)
assert.False(t, task.userAutoIndexMetricTypeSpecified)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{ assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: autoIndexConfig[common.MetricTypeKey]}, {Key: common.MetricTypeKey, Value: autoIndexConfig[common.MetricTypeKey]},
@ -1140,6 +1146,7 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) {
} }
err := task.parseIndexParams() err := task.parseIndexParams()
assert.NoError(t, err) assert.NoError(t, err)
assert.True(t, task.userAutoIndexMetricTypeSpecified)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{ assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName}, {Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: "L2"}, {Key: common.MetricTypeKey, Value: "L2"},

View File

@ -121,6 +121,7 @@ const (
BitmapCardinalityLimitKey = "bitmap_cardinality_limit" BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
IsSparseKey = "is_sparse" IsSparseKey = "is_sparse"
AutoIndexName = "AUTOINDEX"
) )
// Collection properties key // Collection properties key

View File

@ -64,7 +64,7 @@ var (
) )
const ( const (
FloatVectorDefaultMetricType = metric.IP FloatVectorDefaultMetricType = metric.COSINE
SparseFloatVectorDefaultMetricType = metric.IP SparseFloatVectorDefaultMetricType = metric.IP
BinaryVectorDefaultMetricType = metric.JACCARD BinaryVectorDefaultMetricType = metric.HAMMING
) )

View File

@ -180,15 +180,15 @@ func Test_hnswChecker_SetDefaultMetricType(t *testing.T) {
}{ }{
{ {
dType: schemapb.DataType_FloatVector, dType: schemapb.DataType_FloatVector,
metricType: metric.IP, metricType: metric.COSINE,
}, },
{ {
dType: schemapb.DataType_Float16Vector, dType: schemapb.DataType_Float16Vector,
metricType: metric.IP, metricType: metric.COSINE,
}, },
{ {
dType: schemapb.DataType_BFloat16Vector, dType: schemapb.DataType_BFloat16Vector,
metricType: metric.IP, metricType: metric.COSINE,
}, },
{ {
dType: schemapb.DataType_SparseFloatVector, dType: schemapb.DataType_SparseFloatVector,
@ -196,7 +196,7 @@ func Test_hnswChecker_SetDefaultMetricType(t *testing.T) {
}, },
{ {
dType: schemapb.DataType_BinaryVector, dType: schemapb.DataType_BinaryVector,
metricType: metric.JACCARD, metricType: metric.HAMMING,
}, },
} }

View File

@ -70,7 +70,7 @@ func (p *autoIndexConfig) init(base *BaseTable) {
p.IndexParams = ParamItem{ p.IndexParams = ParamItem{
Key: "autoIndex.params.build", Key: "autoIndex.params.build",
Version: "2.2.0", Version: "2.2.0",
DefaultValue: `{"M": 18,"efConstruction": 240,"index_type": "HNSW", "metric_type": "IP"}`, DefaultValue: `{"M": 18,"efConstruction": 240,"index_type": "HNSW", "metric_type": "COSINE"}`,
Export: true, Export: true,
} }
p.IndexParams.Init(base.mgr) p.IndexParams.Init(base.mgr)
@ -86,7 +86,7 @@ func (p *autoIndexConfig) init(base *BaseTable) {
p.BinaryIndexParams = ParamItem{ p.BinaryIndexParams = ParamItem{
Key: "autoIndex.params.binary.build", Key: "autoIndex.params.binary.build",
Version: "2.4.5", Version: "2.4.5",
DefaultValue: `{"nlist": 1024, "index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD"}`, DefaultValue: `{"nlist": 1024, "index_type": "BIN_IVF_FLAT", "metric_type": "HAMMING"}`,
Export: true, Export: true,
} }
p.BinaryIndexParams.Init(base.mgr) p.BinaryIndexParams.Init(base.mgr)

View File

@ -22,7 +22,8 @@ prefix = "index"
default_schema = cf.gen_default_collection_schema() default_schema = cf.gen_default_collection_schema()
default_field_name = ct.default_float_vec_field_name default_field_name = ct.default_float_vec_field_name
default_index_params = ct.default_index default_index_params = ct.default_index
default_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "IP"} default_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "COSINE"}
default_sparse_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "IP"}
# copied from pymilvus # copied from pymilvus
uid = "test_index" uid = "test_index"
@ -2118,7 +2119,7 @@ class TestAutoIndex(TestcaseBase):
""" """
collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0] collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0]
collection_w.create_index(binary_field_name, {}) collection_w.create_index(binary_field_name, {})
assert collection_w.index()[0].params == {'index_type': 'AUTOINDEX', 'metric_type': 'JACCARD'} assert collection_w.index()[0].params == {'index_type': 'AUTOINDEX', 'metric_type': 'HAMMING'}
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_create_autoindex_on_all_vector_type(self): def test_create_autoindex_on_all_vector_type(self):
@ -2141,7 +2142,7 @@ class TestAutoIndex(TestcaseBase):
collection_w.index(index_name="bf16")[0].params.items()) collection_w.index(index_name="bf16")[0].params.items())
collection_w.create_index("sparse", index_name="sparse") collection_w.create_index("sparse", index_name="sparse")
assert all(item in default_autoindex_params.items() for item in assert all(item in default_sparse_autoindex_params.items() for item in
collection_w.index(index_name="sparse")[0].params.items()) collection_w.index(index_name="sparse")[0].params.items())