mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
Ref #42053 This is the first PR for optimizing `LIKE` with ngram inverted index. Now, only VARCHAR data type is supported and only InnerMatch LIKE (%xxx%) query is supported. How to use it: ``` milvus_client = MilvusClient("http://localhost:19530") schema = milvus_client.create_schema() ... schema.add_field("content_ngram", DataType.VARCHAR, max_length=10000) ... index_params = milvus_client.prepare_index_params() index_params.add_index(field_name="content_ngram", index_type="NGRAM", index_name="ngram_index", min_gram=2, max_gram=3) milvus_client.create_collection(COLLECTION_NAME, ...) ``` min_gram and max_gram controls how we tokenize the documents. For example, for min_gram=2 and max_gram=4, we will tokenize each document with 2-gram, 3-gram and 4-gram. --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com> Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
81 lines
2.5 KiB
Go
81 lines
2.5 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package indexparamcheck
|
|
|
|
import (
|
|
"sync"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/vecindexmgr"
|
|
)
|
|
|
|
type IndexCheckerMgr interface {
|
|
GetChecker(indexType string) (IndexChecker, error)
|
|
}
|
|
|
|
// indexCheckerMgrImpl implements IndexChecker.
|
|
type indexCheckerMgrImpl struct {
|
|
checkers map[IndexType]IndexChecker
|
|
once sync.Once
|
|
}
|
|
|
|
func (mgr *indexCheckerMgrImpl) GetChecker(indexType string) (IndexChecker, error) {
|
|
mgr.once.Do(mgr.registerIndexChecker)
|
|
// Unify the vector index checker
|
|
if vecindexmgr.GetVecIndexMgrInstance().IsVecIndex(indexType) {
|
|
return mgr.checkers[IndexVector], nil
|
|
}
|
|
adapter, ok := mgr.checkers[indexType]
|
|
if ok {
|
|
return adapter, nil
|
|
}
|
|
return nil, errors.New("Can not find index: " + indexType + " , please check")
|
|
}
|
|
|
|
func (mgr *indexCheckerMgrImpl) registerIndexChecker() {
|
|
mgr.checkers[IndexVector] = newVecIndexChecker()
|
|
mgr.checkers[IndexINVERTED] = newINVERTEDChecker()
|
|
mgr.checkers[IndexSTLSORT] = newSTLSORTChecker()
|
|
mgr.checkers["Asceneding"] = newSTLSORTChecker()
|
|
mgr.checkers[IndexTRIE] = newTRIEChecker()
|
|
mgr.checkers[IndexTrie] = newTRIEChecker()
|
|
mgr.checkers[IndexBitmap] = newBITMAPChecker()
|
|
mgr.checkers[IndexHybrid] = newHYBRIDChecker()
|
|
mgr.checkers["marisa-trie"] = newTRIEChecker()
|
|
mgr.checkers[AutoIndex] = newAUTOINDEXChecker()
|
|
mgr.checkers[IndexNGRAM] = newNgramIndexChecker()
|
|
}
|
|
|
|
func newIndexCheckerMgr() *indexCheckerMgrImpl {
|
|
return &indexCheckerMgrImpl{
|
|
checkers: make(map[IndexType]IndexChecker),
|
|
}
|
|
}
|
|
|
|
var indexCheckerMgr IndexCheckerMgr
|
|
|
|
var getIndexCheckerMgrOnce sync.Once
|
|
|
|
// GetIndexCheckerMgrInstance gets the instance of IndexCheckerMgr.
|
|
func GetIndexCheckerMgrInstance() IndexCheckerMgr {
|
|
getIndexCheckerMgrOnce.Do(func() {
|
|
indexCheckerMgr = newIndexCheckerMgr()
|
|
})
|
|
return indexCheckerMgr
|
|
}
|