milvus/internal/util/indexparamcheck/vector_index_checker.go
Spade A d6a428e880
feat: impl StructArray -- support create index for vector array (embedding list) and search on it (#43726)
Ref https://github.com/milvus-io/milvus/issues/42148

This PR supports create index for vector array (now, only for
`DataType.FLOAT_VECTOR`) and search on it.
The index type supported in this PR is `EMB_LIST_HNSW` and the metric
type is `MAX_SIM` only.

The way to use it:
```python
milvus_client = MilvusClient("xxx:19530")
schema = milvus_client.create_schema(enable_dynamic_field=True, auto_id=True)
...
struct_schema = milvus_client.create_struct_array_field_schema("struct_array_field")
...
struct_schema.add_field("struct_float_vec", DataType.ARRAY_OF_VECTOR, element_type=DataType.FLOAT_VECTOR, dim=128, max_capacity=1000)
...
schema.add_struct_array_field(struct_schema)
index_params = milvus_client.prepare_index_params()
index_params.add_index(field_name="struct_float_vec", index_type="EMB_LIST_HNSW", metric_type="MAX_SIM", index_params={"nlist": 128})
...
milvus_client.create_index(COLLECTION_NAME, schema=schema, index_params=index_params)
```

Note: This PR uses `Lims` to convey offsets of the vector array to
knowhere where vectors of multiple vector arrays are concatenated and we
need offsets to specify which vectors belong to which vector array.

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
2025-08-20 10:27:46 +08:00

131 lines
4.4 KiB
Go

package indexparamcheck
/*
#cgo pkg-config: milvus_core
#include <stdlib.h> // free
#include "segcore/vector_index_c.h"
*/
import "C"
import (
"fmt"
"math"
"unsafe"
"github.com/cockroachdb/errors"
"google.golang.org/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/util/vecindexmgr"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/proto/indexcgopb"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type vecIndexChecker struct {
baseChecker
}
// HandleCStatus deals with the error returned from CGO
func HandleCStatus(status *C.CStatus) error {
if status.error_code == 0 {
return nil
}
errorMsg := C.GoString(status.error_msg)
defer C.free(unsafe.Pointer(status.error_msg))
return fmt.Errorf("%s", errorMsg)
}
func (c vecIndexChecker) StaticCheck(dataType schemapb.DataType, elementType schemapb.DataType, params map[string]string) error {
if typeutil.IsDenseFloatVectorType(dataType) {
if !CheckStrByValues(params, Metric, FloatVectorMetrics) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], FloatVectorMetrics)
}
} else if typeutil.IsSparseFloatVectorType(dataType) {
if !CheckStrByValues(params, Metric, SparseMetrics) {
return fmt.Errorf("metric type not found or not supported, supported: %v", SparseMetrics)
}
} else if typeutil.IsBinaryVectorType(dataType) {
if !CheckStrByValues(params, Metric, BinaryVectorMetrics) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], BinaryVectorMetrics)
}
} else if typeutil.IsIntVectorType(dataType) {
if !CheckStrByValues(params, Metric, IntVectorMetrics) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], IntVectorMetrics)
}
} else if typeutil.IsArrayOfVectorType(dataType) {
if !CheckStrByValues(params, Metric, EmbListMetrics) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], EmbListMetrics)
}
}
indexType, exist := params[common.IndexTypeKey]
if !exist {
return errors.New("no indexType is specified")
}
if !vecindexmgr.GetVecIndexMgrInstance().IsVecIndex(indexType) {
return fmt.Errorf("indexType %s is not supported", indexType)
}
protoIndexParams := &indexcgopb.IndexParams{
Params: make([]*commonpb.KeyValuePair, 0),
}
for key, value := range params {
protoIndexParams.Params = append(protoIndexParams.Params, &commonpb.KeyValuePair{Key: key, Value: value})
}
indexParamsBlob, err := proto.Marshal(protoIndexParams)
if err != nil {
return fmt.Errorf("failed to marshal index params: %s", err)
}
var status C.CStatus
cIndexType := C.CString(indexType)
cDataType := uint32(dataType)
cElementType := uint32(elementType)
status = C.ValidateIndexParams(cIndexType, cDataType, cElementType, (*C.uint8_t)(unsafe.Pointer(&indexParamsBlob[0])), (C.uint64_t)(len(indexParamsBlob)))
C.free(unsafe.Pointer(cIndexType))
return HandleCStatus(&status)
}
func (c vecIndexChecker) CheckTrain(dataType schemapb.DataType, elementType schemapb.DataType, params map[string]string) error {
if err := c.StaticCheck(dataType, elementType, params); err != nil {
return err
}
if typeutil.IsFixDimVectorType(dataType) || (typeutil.IsArrayOfVectorType(dataType) && typeutil.IsFixDimVectorType(elementType)) {
if !CheckIntByRange(params, DIM, 1, math.MaxInt) {
return errors.New("failed to check vector dimension, should be larger than 0 and smaller than math.MaxInt")
}
}
return c.baseChecker.CheckTrain(dataType, elementType, params)
}
func (c vecIndexChecker) CheckValidDataType(indexType IndexType, field *schemapb.FieldSchema) error {
if !typeutil.IsVectorType(field.GetDataType()) {
return fmt.Errorf("index %s only supports vector data type", indexType)
}
if !vecindexmgr.GetVecIndexMgrInstance().IsDataTypeSupport(indexType, field.GetDataType(), field.GetElementType()) {
return fmt.Errorf("index %s do not support data type: %s", indexType, schemapb.DataType_name[int32(field.GetDataType())])
}
return nil
}
func (c vecIndexChecker) SetDefaultMetricTypeIfNotExist(dType schemapb.DataType, params map[string]string) {
paramtable.SetDefaultMetricTypeIfNotExist(dType, params)
}
func newVecIndexChecker() IndexChecker {
return &vecIndexChecker{}
}