mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
Ref https://github.com/milvus-io/milvus/issues/42148 This PR supports create index for vector array (now, only for `DataType.FLOAT_VECTOR`) and search on it. The index type supported in this PR is `EMB_LIST_HNSW` and the metric type is `MAX_SIM` only. The way to use it: ```python milvus_client = MilvusClient("xxx:19530") schema = milvus_client.create_schema(enable_dynamic_field=True, auto_id=True) ... struct_schema = milvus_client.create_struct_array_field_schema("struct_array_field") ... struct_schema.add_field("struct_float_vec", DataType.ARRAY_OF_VECTOR, element_type=DataType.FLOAT_VECTOR, dim=128, max_capacity=1000) ... schema.add_struct_array_field(struct_schema) index_params = milvus_client.prepare_index_params() index_params.add_index(field_name="struct_float_vec", index_type="EMB_LIST_HNSW", metric_type="MAX_SIM", index_params={"nlist": 128}) ... milvus_client.create_index(COLLECTION_NAME, schema=schema, index_params=index_params) ``` Note: This PR uses `Lims` to convey offsets of the vector array to knowhere where vectors of multiple vector arrays are concatenated and we need offsets to specify which vectors belong to which vector array. --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com> Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
82 lines
2.2 KiB
Go
82 lines
2.2 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package proxy
|
|
|
|
/*
|
|
#cgo pkg-config: milvus_core
|
|
#include "segcore/check_vec_index_c.h"
|
|
#include <stdlib.h>
|
|
*/
|
|
import "C"
|
|
|
|
import (
|
|
"runtime"
|
|
"sync"
|
|
"unsafe"
|
|
|
|
"go.uber.org/atomic"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/conc"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/hardware"
|
|
)
|
|
|
|
var (
|
|
dp atomic.Pointer[conc.Pool[any]]
|
|
dynOnce sync.Once
|
|
)
|
|
|
|
func initDynamicPool() {
|
|
dynOnce.Do(func() {
|
|
pool := conc.NewPool[any](
|
|
hardware.GetCPUNum(),
|
|
conc.WithPreAlloc(false),
|
|
conc.WithDisablePurge(false),
|
|
conc.WithPreHandler(runtime.LockOSThread), // lock os thread for cgo thread disposal
|
|
)
|
|
|
|
dp.Store(pool)
|
|
log.Info("init dynamicPool done", zap.Int("size", hardware.GetCPUNum()))
|
|
})
|
|
}
|
|
|
|
// GetDynamicPool returns the singleton pool for dynamic cgo operations.
|
|
func GetDynamicPool() *conc.Pool[any] {
|
|
initDynamicPool()
|
|
return dp.Load()
|
|
}
|
|
|
|
func CheckVecIndexWithDataTypeExist(name string, dataType schemapb.DataType, elementType schemapb.DataType) bool {
|
|
isEmbeddingList := dataType == schemapb.DataType_ArrayOfVector
|
|
if isEmbeddingList {
|
|
dataType = elementType
|
|
}
|
|
|
|
var result bool
|
|
GetDynamicPool().Submit(func() (any, error) {
|
|
cIndexName := C.CString(name)
|
|
cType := uint32(dataType)
|
|
defer C.free(unsafe.Pointer(cIndexName))
|
|
result = bool(C.CheckVecIndexWithDataType(cIndexName, cType, C.bool(isEmbeddingList)))
|
|
return nil, nil
|
|
}).Await()
|
|
|
|
return result
|
|
}
|