mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
Ref https://github.com/milvus-io/milvus/issues/42148 This PR supports create index for vector array (now, only for `DataType.FLOAT_VECTOR`) and search on it. The index type supported in this PR is `EMB_LIST_HNSW` and the metric type is `MAX_SIM` only. The way to use it: ```python milvus_client = MilvusClient("xxx:19530") schema = milvus_client.create_schema(enable_dynamic_field=True, auto_id=True) ... struct_schema = milvus_client.create_struct_array_field_schema("struct_array_field") ... struct_schema.add_field("struct_float_vec", DataType.ARRAY_OF_VECTOR, element_type=DataType.FLOAT_VECTOR, dim=128, max_capacity=1000) ... schema.add_struct_array_field(struct_schema) index_params = milvus_client.prepare_index_params() index_params.add_index(field_name="struct_float_vec", index_type="EMB_LIST_HNSW", metric_type="MAX_SIM", index_params={"nlist": 128}) ... milvus_client.create_index(COLLECTION_NAME, schema=schema, index_params=index_params) ``` Note: This PR uses `Lims` to convey offsets of the vector array to knowhere where vectors of multiple vector arrays are concatenated and we need offsets to specify which vectors belong to which vector array. --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com> Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
62 lines
2.0 KiB
Go
62 lines
2.0 KiB
Go
package indexparamcheck
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/samber/lo"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
// INVERTEDChecker checks if a INVERTED index can be built.
|
|
type INVERTEDChecker struct {
|
|
scalarIndexChecker
|
|
}
|
|
|
|
var validJSONCastTypes = []string{"BOOL", "DOUBLE", "VARCHAR", "ARRAY_BOOL", "ARRAY_DOUBLE", "ARRAY_VARCHAR", "JSON"}
|
|
|
|
var validJSONCastFunctions = []string{"STRING_TO_DOUBLE"}
|
|
|
|
func (c *INVERTEDChecker) CheckTrain(dataType schemapb.DataType, elementType schemapb.DataType, params map[string]string) error {
|
|
// check json index params
|
|
isJSONIndex := typeutil.IsJSONType(dataType)
|
|
if isJSONIndex {
|
|
castType, exist := params[common.JSONCastTypeKey]
|
|
if !exist {
|
|
return merr.WrapErrParameterMissing(common.JSONCastTypeKey, "json index must specify cast type")
|
|
}
|
|
|
|
if !lo.Contains(validJSONCastTypes, castType) {
|
|
return merr.WrapErrParameterInvalidMsg("json_cast_type %v is not supported", castType)
|
|
}
|
|
castFunction, exist := params[common.JSONCastFunctionKey]
|
|
if exist {
|
|
switch castFunction {
|
|
case "STRING_TO_DOUBLE":
|
|
if castType != "DOUBLE" {
|
|
return merr.WrapErrParameterInvalidMsg("json_cast_function %v is not supported for json_cast_type %v", castFunction, castType)
|
|
}
|
|
default:
|
|
return merr.WrapErrParameterInvalidMsg("json_cast_function %v is not supported", castFunction)
|
|
}
|
|
}
|
|
}
|
|
return c.scalarIndexChecker.CheckTrain(dataType, elementType, params)
|
|
}
|
|
|
|
func (c *INVERTEDChecker) CheckValidDataType(indexType IndexType, field *schemapb.FieldSchema) error {
|
|
dType := field.GetDataType()
|
|
if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) &&
|
|
!typeutil.IsArrayType(dType) && !typeutil.IsJSONType(dType) {
|
|
return fmt.Errorf("INVERTED are not supported on %s field", dType.String())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func newINVERTEDChecker() *INVERTEDChecker {
|
|
return &INVERTEDChecker{}
|
|
}
|