mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
Ref https://github.com/milvus-io/milvus/issues/42148 This PR supports create index for vector array (now, only for `DataType.FLOAT_VECTOR`) and search on it. The index type supported in this PR is `EMB_LIST_HNSW` and the metric type is `MAX_SIM` only. The way to use it: ```python milvus_client = MilvusClient("xxx:19530") schema = milvus_client.create_schema(enable_dynamic_field=True, auto_id=True) ... struct_schema = milvus_client.create_struct_array_field_schema("struct_array_field") ... struct_schema.add_field("struct_float_vec", DataType.ARRAY_OF_VECTOR, element_type=DataType.FLOAT_VECTOR, dim=128, max_capacity=1000) ... schema.add_struct_array_field(struct_schema) index_params = milvus_client.prepare_index_params() index_params.add_index(field_name="struct_float_vec", index_type="EMB_LIST_HNSW", metric_type="MAX_SIM", index_params={"nlist": 128}) ... milvus_client.create_index(COLLECTION_NAME, schema=schema, index_params=index_params) ``` Note: This PR uses `Lims` to convey offsets of the vector array to knowhere where vectors of multiple vector arrays are concatenated and we need offsets to specify which vectors belong to which vector array. --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com> Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
131 lines
4.4 KiB
Go
131 lines
4.4 KiB
Go
package indexparamcheck
|
|
|
|
/*
|
|
#cgo pkg-config: milvus_core
|
|
|
|
#include <stdlib.h> // free
|
|
#include "segcore/vector_index_c.h"
|
|
*/
|
|
import "C"
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"unsafe"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/util/vecindexmgr"
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/indexcgopb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
type vecIndexChecker struct {
|
|
baseChecker
|
|
}
|
|
|
|
// HandleCStatus deals with the error returned from CGO
|
|
func HandleCStatus(status *C.CStatus) error {
|
|
if status.error_code == 0 {
|
|
return nil
|
|
}
|
|
errorMsg := C.GoString(status.error_msg)
|
|
defer C.free(unsafe.Pointer(status.error_msg))
|
|
|
|
return fmt.Errorf("%s", errorMsg)
|
|
}
|
|
|
|
func (c vecIndexChecker) StaticCheck(dataType schemapb.DataType, elementType schemapb.DataType, params map[string]string) error {
|
|
if typeutil.IsDenseFloatVectorType(dataType) {
|
|
if !CheckStrByValues(params, Metric, FloatVectorMetrics) {
|
|
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], FloatVectorMetrics)
|
|
}
|
|
} else if typeutil.IsSparseFloatVectorType(dataType) {
|
|
if !CheckStrByValues(params, Metric, SparseMetrics) {
|
|
return fmt.Errorf("metric type not found or not supported, supported: %v", SparseMetrics)
|
|
}
|
|
} else if typeutil.IsBinaryVectorType(dataType) {
|
|
if !CheckStrByValues(params, Metric, BinaryVectorMetrics) {
|
|
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], BinaryVectorMetrics)
|
|
}
|
|
} else if typeutil.IsIntVectorType(dataType) {
|
|
if !CheckStrByValues(params, Metric, IntVectorMetrics) {
|
|
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], IntVectorMetrics)
|
|
}
|
|
} else if typeutil.IsArrayOfVectorType(dataType) {
|
|
if !CheckStrByValues(params, Metric, EmbListMetrics) {
|
|
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], EmbListMetrics)
|
|
}
|
|
}
|
|
|
|
indexType, exist := params[common.IndexTypeKey]
|
|
|
|
if !exist {
|
|
return errors.New("no indexType is specified")
|
|
}
|
|
|
|
if !vecindexmgr.GetVecIndexMgrInstance().IsVecIndex(indexType) {
|
|
return fmt.Errorf("indexType %s is not supported", indexType)
|
|
}
|
|
|
|
protoIndexParams := &indexcgopb.IndexParams{
|
|
Params: make([]*commonpb.KeyValuePair, 0),
|
|
}
|
|
|
|
for key, value := range params {
|
|
protoIndexParams.Params = append(protoIndexParams.Params, &commonpb.KeyValuePair{Key: key, Value: value})
|
|
}
|
|
|
|
indexParamsBlob, err := proto.Marshal(protoIndexParams)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal index params: %s", err)
|
|
}
|
|
|
|
var status C.CStatus
|
|
|
|
cIndexType := C.CString(indexType)
|
|
cDataType := uint32(dataType)
|
|
cElementType := uint32(elementType)
|
|
status = C.ValidateIndexParams(cIndexType, cDataType, cElementType, (*C.uint8_t)(unsafe.Pointer(&indexParamsBlob[0])), (C.uint64_t)(len(indexParamsBlob)))
|
|
C.free(unsafe.Pointer(cIndexType))
|
|
|
|
return HandleCStatus(&status)
|
|
}
|
|
|
|
func (c vecIndexChecker) CheckTrain(dataType schemapb.DataType, elementType schemapb.DataType, params map[string]string) error {
|
|
if err := c.StaticCheck(dataType, elementType, params); err != nil {
|
|
return err
|
|
}
|
|
|
|
if typeutil.IsFixDimVectorType(dataType) || (typeutil.IsArrayOfVectorType(dataType) && typeutil.IsFixDimVectorType(elementType)) {
|
|
if !CheckIntByRange(params, DIM, 1, math.MaxInt) {
|
|
return errors.New("failed to check vector dimension, should be larger than 0 and smaller than math.MaxInt")
|
|
}
|
|
}
|
|
|
|
return c.baseChecker.CheckTrain(dataType, elementType, params)
|
|
}
|
|
|
|
func (c vecIndexChecker) CheckValidDataType(indexType IndexType, field *schemapb.FieldSchema) error {
|
|
if !typeutil.IsVectorType(field.GetDataType()) {
|
|
return fmt.Errorf("index %s only supports vector data type", indexType)
|
|
}
|
|
if !vecindexmgr.GetVecIndexMgrInstance().IsDataTypeSupport(indexType, field.GetDataType(), field.GetElementType()) {
|
|
return fmt.Errorf("index %s do not support data type: %s", indexType, schemapb.DataType_name[int32(field.GetDataType())])
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c vecIndexChecker) SetDefaultMetricTypeIfNotExist(dType schemapb.DataType, params map[string]string) {
|
|
paramtable.SetDefaultMetricTypeIfNotExist(dType, params)
|
|
}
|
|
|
|
func newVecIndexChecker() IndexChecker {
|
|
return &vecIndexChecker{}
|
|
}
|