milvus/internal/util/indexparamcheck/conf_adapter_mgr.go
Spade A 26ec841feb
feat: optimize Like query with n-gram (#41803)
Ref #42053

This is the first PR for optimizing `LIKE` with ngram inverted index.
Now, only VARCHAR data type is supported and only InnerMatch LIKE
(%xxx%) query is supported.


How to use it:
```
milvus_client = MilvusClient("http://localhost:19530")
schema = milvus_client.create_schema()
...
schema.add_field("content_ngram", DataType.VARCHAR, max_length=10000)
...
index_params = milvus_client.prepare_index_params()
index_params.add_index(field_name="content_ngram", index_type="NGRAM", index_name="ngram_index", min_gram=2, max_gram=3)
milvus_client.create_collection(COLLECTION_NAME, ...)
```

min_gram and max_gram controls how we tokenize the documents. For
example, for min_gram=2 and max_gram=4, we will tokenize each document
with 2-gram, 3-gram and 4-gram.

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
2025-07-01 10:08:44 +08:00

81 lines
2.5 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package indexparamcheck
import (
"sync"
"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus/internal/util/vecindexmgr"
)
type IndexCheckerMgr interface {
GetChecker(indexType string) (IndexChecker, error)
}
// indexCheckerMgrImpl implements IndexChecker.
type indexCheckerMgrImpl struct {
checkers map[IndexType]IndexChecker
once sync.Once
}
func (mgr *indexCheckerMgrImpl) GetChecker(indexType string) (IndexChecker, error) {
mgr.once.Do(mgr.registerIndexChecker)
// Unify the vector index checker
if vecindexmgr.GetVecIndexMgrInstance().IsVecIndex(indexType) {
return mgr.checkers[IndexVector], nil
}
adapter, ok := mgr.checkers[indexType]
if ok {
return adapter, nil
}
return nil, errors.New("Can not find index: " + indexType + " , please check")
}
func (mgr *indexCheckerMgrImpl) registerIndexChecker() {
mgr.checkers[IndexVector] = newVecIndexChecker()
mgr.checkers[IndexINVERTED] = newINVERTEDChecker()
mgr.checkers[IndexSTLSORT] = newSTLSORTChecker()
mgr.checkers["Asceneding"] = newSTLSORTChecker()
mgr.checkers[IndexTRIE] = newTRIEChecker()
mgr.checkers[IndexTrie] = newTRIEChecker()
mgr.checkers[IndexBitmap] = newBITMAPChecker()
mgr.checkers[IndexHybrid] = newHYBRIDChecker()
mgr.checkers["marisa-trie"] = newTRIEChecker()
mgr.checkers[AutoIndex] = newAUTOINDEXChecker()
mgr.checkers[IndexNGRAM] = newNgramIndexChecker()
}
func newIndexCheckerMgr() *indexCheckerMgrImpl {
return &indexCheckerMgrImpl{
checkers: make(map[IndexType]IndexChecker),
}
}
var indexCheckerMgr IndexCheckerMgr
var getIndexCheckerMgrOnce sync.Once
// GetIndexCheckerMgrInstance gets the instance of IndexCheckerMgr.
func GetIndexCheckerMgrInstance() IndexCheckerMgr {
getIndexCheckerMgrOnce.Do(func() {
indexCheckerMgr = newIndexCheckerMgr()
})
return indexCheckerMgr
}