Spade A d6a428e880
feat: impl StructArray -- support create index for vector array (embedding list) and search on it (#43726)
Ref https://github.com/milvus-io/milvus/issues/42148

This PR supports create index for vector array (now, only for
`DataType.FLOAT_VECTOR`) and search on it.
The index type supported in this PR is `EMB_LIST_HNSW` and the metric
type is `MAX_SIM` only.

The way to use it:
```python
milvus_client = MilvusClient("xxx:19530")
schema = milvus_client.create_schema(enable_dynamic_field=True, auto_id=True)
...
struct_schema = milvus_client.create_struct_array_field_schema("struct_array_field")
...
struct_schema.add_field("struct_float_vec", DataType.ARRAY_OF_VECTOR, element_type=DataType.FLOAT_VECTOR, dim=128, max_capacity=1000)
...
schema.add_struct_array_field(struct_schema)
index_params = milvus_client.prepare_index_params()
index_params.add_index(field_name="struct_float_vec", index_type="EMB_LIST_HNSW", metric_type="MAX_SIM", index_params={"nlist": 128})
...
milvus_client.create_index(COLLECTION_NAME, schema=schema, index_params=index_params)
```

Note: This PR uses `Lims` to convey offsets of the vector array to
knowhere where vectors of multiple vector arrays are concatenated and we
need offsets to specify which vectors belong to which vector array.

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
2025-08-20 10:27:46 +08:00

102 lines
3.3 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package indexparamcheck
import (
"fmt"
"strconv"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
// CheckIntByRange check if the data corresponding to the key is in the range of [min, max].
// Return false if:
// 1. the key does not exist, or
// 2. the data cannot be converted to an integer, or
// 3. the number is not in the range [min, max]
//
// Return true otherwise
func CheckIntByRange(params map[string]string, key string, min, max int) bool {
valueStr, ok := params[key]
if !ok {
return false
}
value, err := strconv.Atoi(valueStr)
if err != nil {
return false
}
return value >= min && value <= max
}
// CheckStrByValues check whether the data corresponding to the key appears in the string slice of container.
// Return false if:
// 1. the key does not exist, or
// 2. the data does not appear in the container
//
// Return true otherwise
func CheckStrByValues(params map[string]string, key string, container []string) bool {
value, ok := params[key]
if !ok {
return false
}
return funcutil.SliceContain(container, value)
}
func errOutOfRange(x interface{}, lb interface{}, ub interface{}) error {
return fmt.Errorf("%v out of range: [%v, %v]", x, lb, ub)
}
func setDefaultIfNotExist(params map[string]string, key string, defaultValue string) {
_, exist := params[key]
if !exist {
params[key] = defaultValue
}
}
func CheckAutoIndexHelper(key string, m map[string]string, dtype schemapb.DataType) {
indexType, ok := m[common.IndexTypeKey]
if !ok {
panic(fmt.Sprintf("%s invalid, index type not found", key))
}
checker, err := GetIndexCheckerMgrInstance().GetChecker(indexType)
if err != nil {
panic(fmt.Sprintf("%s invalid, unsupported index type: %s", key, indexType))
}
if err := checker.StaticCheck(dtype, schemapb.DataType_None, m); err != nil {
panic(fmt.Sprintf("%s invalid, parameters invalid, error: %s", key, err.Error()))
}
}
func CheckAutoIndexConfig() {
autoIndexCfg := &paramtable.Get().AutoIndexConfig
CheckAutoIndexHelper(autoIndexCfg.IndexParams.Key, autoIndexCfg.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector)
CheckAutoIndexHelper(autoIndexCfg.BinaryIndexParams.Key, autoIndexCfg.BinaryIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector)
CheckAutoIndexHelper(autoIndexCfg.SparseIndexParams.Key, autoIndexCfg.SparseIndexParams.GetAsJSONMap(), schemapb.DataType_SparseFloatVector)
}
func ValidateParamTable() {
CheckAutoIndexConfig()
}