fix:support config index offsetcache and fix create same index again (#35985)

#35971

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2024-09-08 18:23:05 +08:00 committed by GitHub
parent 91d23ecbe1
commit 208c8a2328
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 70 additions and 8 deletions

View File

@ -425,6 +425,7 @@ queryNode:
requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default
maxRetryTimes: 1 # max retry times for lazy load, 1 by default maxRetryTimes: 1 # max retry times for lazy load, 1 by default
maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default
indexOffsetCacheEnabled: false # enable index offset cache for some scalar indexes, now is just for bitmap index, enable this param can improve performance for retrieving raw data from index
grouping: grouping:
enabled: true enabled: true
maxNQ: 1000 maxNQ: 1000

View File

@ -38,6 +38,7 @@ import (
"github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/indexparamcheck" "github.com/milvus-io/milvus/pkg/util/indexparamcheck"
"github.com/milvus-io/milvus/pkg/util/indexparams"
"github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/pkg/util/typeutil"
) )
@ -191,21 +192,21 @@ func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool
} }
useAutoIndex := false useAutoIndex := false
userIndexParamsWithoutMmapKey := make([]*commonpb.KeyValuePair, 0) userIndexParamsWithoutConfigableKey := make([]*commonpb.KeyValuePair, 0)
for _, param := range fieldIndex.UserIndexParams { for _, param := range fieldIndex.UserIndexParams {
if param.Key == common.MmapEnabledKey { if indexparams.IsConfigableIndexParam(param.Key) {
continue continue
} }
if param.Key == common.IndexTypeKey && param.Value == common.AutoIndexName { if param.Key == common.IndexTypeKey && param.Value == common.AutoIndexName {
useAutoIndex = true useAutoIndex = true
} }
userIndexParamsWithoutMmapKey = append(userIndexParamsWithoutMmapKey, param) userIndexParamsWithoutConfigableKey = append(userIndexParamsWithoutConfigableKey, param)
} }
if len(userIndexParamsWithoutMmapKey) != len(req.GetUserIndexParams()) { if len(userIndexParamsWithoutConfigableKey) != len(req.GetUserIndexParams()) {
return false return false
} }
for _, param1 := range userIndexParamsWithoutMmapKey { for _, param1 := range userIndexParamsWithoutConfigableKey {
exist := false exist := false
for i, param2 := range req.GetUserIndexParams() { for i, param2 := range req.GetUserIndexParams() {
if param2.Key == param1.Key && param2.Value == param1.Value { if param2.Key == param1.Key && param2.Value == param1.Value {

View File

@ -183,6 +183,7 @@ func (s *Server) CreateIndex(ctx context.Context, req *indexpb.CreateIndexReques
zap.String("IndexName", req.GetIndexName()), zap.Int64("fieldID", req.GetFieldID()), zap.String("IndexName", req.GetIndexName()), zap.Int64("fieldID", req.GetFieldID()),
zap.Any("TypeParams", req.GetTypeParams()), zap.Any("TypeParams", req.GetTypeParams()),
zap.Any("IndexParams", req.GetIndexParams()), zap.Any("IndexParams", req.GetIndexParams()),
zap.Any("UserIndexParams", req.GetUserIndexParams()),
) )
if err := merr.CheckHealthy(s.GetStateCode()); err != nil { if err := merr.CheckHealthy(s.GetStateCode()); err != nil {
@ -343,7 +344,7 @@ func (s *Server) AlterIndex(ctx context.Context, req *indexpb.AlterIndexRequest)
// update index params // update index params
newIndexParams := UpdateParams(index, index.IndexParams, req.GetParams()) newIndexParams := UpdateParams(index, index.IndexParams, req.GetParams())
log.Info("alter index user index params", log.Info("alter index index params",
zap.String("indexName", index.IndexName), zap.String("indexName", index.IndexName),
zap.Any("params", newIndexParams), zap.Any("params", newIndexParams),
) )

View File

@ -1116,6 +1116,11 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn
} }
} }
// set whether enable offset cache for bitmap index
if indexParams["index_type"] == indexparamcheck.IndexBitmap {
indexparams.SetBitmapIndexLoadParams(paramtable.Get(), indexParams)
}
if err := indexparams.AppendPrepareLoadParams(paramtable.Get(), indexParams); err != nil { if err := indexparams.AppendPrepareLoadParams(paramtable.Get(), indexParams); err != nil {
return err return err
} }

View File

@ -299,6 +299,14 @@ func SetDiskIndexBuildParams(indexParams map[string]string, fieldDataSize int64)
return nil return nil
} }
func SetBitmapIndexLoadParams(params *paramtable.ComponentParam, indexParams map[string]string) {
_, exist := indexParams[common.IndexOffsetCacheEnabledKey]
if exist {
return
}
indexParams[common.IndexOffsetCacheEnabledKey] = params.QueryNodeCfg.IndexOffsetCacheEnabled.GetValue()
}
// SetDiskIndexLoadParams set disk index load params with ratio params on queryNode // SetDiskIndexLoadParams set disk index load params with ratio params on queryNode
// QueryNode cal load params with ratio params ans cpu count... // QueryNode cal load params with ratio params ans cpu count...
func SetDiskIndexLoadParams(params *paramtable.ComponentParam, indexParams map[string]string, numRows int64) error { func SetDiskIndexLoadParams(params *paramtable.ComponentParam, indexParams map[string]string, numRows int64) error {

View File

@ -2358,6 +2358,8 @@ type queryNodeConfig struct {
LazyLoadMaxRetryTimes ParamItem `refreshable:"true"` LazyLoadMaxRetryTimes ParamItem `refreshable:"true"`
LazyLoadMaxEvictPerRetry ParamItem `refreshable:"true"` LazyLoadMaxEvictPerRetry ParamItem `refreshable:"true"`
IndexOffsetCacheEnabled ParamItem `refreshable:"true"`
// chunk cache // chunk cache
ReadAheadPolicy ParamItem `refreshable:"false"` ReadAheadPolicy ParamItem `refreshable:"false"`
ChunkCacheWarmingUp ParamItem `refreshable:"true"` ChunkCacheWarmingUp ParamItem `refreshable:"true"`
@ -2864,6 +2866,16 @@ Max read concurrency must greater than or equal to 1, and less than or equal to
} }
p.EnableDisk.Init(base.mgr) p.EnableDisk.Init(base.mgr)
p.IndexOffsetCacheEnabled = ParamItem{
Key: "queryNode.indexOffsetCacheEnabled",
Version: "2.5.0",
DefaultValue: "false",
Doc: "enable index offset cache for some scalar indexes, now is just for bitmap index," +
" enable this param can improve performance for retrieving raw data from index",
Export: true,
}
p.IndexOffsetCacheEnabled.Init(base.mgr)
p.DiskCapacityLimit = ParamItem{ p.DiskCapacityLimit = ParamItem{
Key: "LOCAL_STORAGE_SIZE", Key: "LOCAL_STORAGE_SIZE",
Version: "2.2.0", Version: "2.2.0",

View File

@ -2703,6 +2703,40 @@ class TestQueryString(TestcaseBase):
collection_w.query(expression, output_fields=output_fields, collection_w.query(expression, output_fields=output_fields,
check_task=CheckTasks.check_query_results, check_items={exp_res: res}) check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
def test_bitmap_alter_offset_cache_param(self):
"""
target: test bitmap index with enable offset cache.
expected: verify create index and load successfully
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,is_index=False,
primary_field=default_int_field_name)[0:2]
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="test_vec")
collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"})
time.sleep(1)
collection_w.load()
expression = 'varchar like "0%"'
result , _ = collection_w.query(expression, output_fields=['varchar'])
res_len = len(result)
collection_w.release()
collection_w.alter_index("bitmap_offset_cache", {'indexoffsetcache.enabled': True})
collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"})
collection_w.load()
expression = 'varchar like "0%"'
result , _ = collection_w.query(expression, output_fields=['varchar'])
res_len_new = len(result)
assert res_len_new == res_len
collection_w.release()
collection_w.alter_index("bitmap_offset_cache", {'indexoffsetcache.enabled': False})
collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"})
collection_w.load()
expression = 'varchar like "0%"'
result , _ = collection_w.query(expression, output_fields=['varchar'])
res_len_new = len(result)
assert res_len_new == res_len
collection_w.release()
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_query_string_expr_with_prefixes_auto_index(self): def test_query_string_expr_with_prefixes_auto_index(self):
""" """
@ -2736,7 +2770,7 @@ class TestQueryString(TestcaseBase):
primary_field=default_int_field_name)[0:2] primary_field=default_int_field_name)[0:2]
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index") collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index")
collection_w.create_index("varchar", index_name="bitmap_auto_index") collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"})
time.sleep(1) time.sleep(1)
collection_w.load() collection_w.load()
expression = 'varchar like "0%"' expression = 'varchar like "0%"'
@ -2782,7 +2816,7 @@ class TestQueryString(TestcaseBase):
primary_field=default_int_field_name)[0:2] primary_field=default_int_field_name)[0:2]
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index") collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index")
collection_w.create_index("varchar", index_name="bitmap_auto_index") collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"})
time.sleep(1) time.sleep(1)
collection_w.load() collection_w.load()
expression = 'varchar like "%0%"' expression = 'varchar like "%0%"'