milvus/internal/core/src/common/VectorTrait.h
Spade A d6a428e880
feat: impl StructArray -- support create index for vector array (embedding list) and search on it (#43726)
Ref https://github.com/milvus-io/milvus/issues/42148

This PR supports create index for vector array (now, only for
`DataType.FLOAT_VECTOR`) and search on it.
The index type supported in this PR is `EMB_LIST_HNSW` and the metric
type is `MAX_SIM` only.

The way to use it:
```python
milvus_client = MilvusClient("xxx:19530")
schema = milvus_client.create_schema(enable_dynamic_field=True, auto_id=True)
...
struct_schema = milvus_client.create_struct_array_field_schema("struct_array_field")
...
struct_schema.add_field("struct_float_vec", DataType.ARRAY_OF_VECTOR, element_type=DataType.FLOAT_VECTOR, dim=128, max_capacity=1000)
...
schema.add_struct_array_field(struct_schema)
index_params = milvus_client.prepare_index_params()
index_params.add_index(field_name="struct_float_vec", index_type="EMB_LIST_HNSW", metric_type="MAX_SIM", index_params={"nlist": 128})
...
milvus_client.create_index(COLLECTION_NAME, schema=schema, index_params=index_params)
```

Note: This PR uses `Lims` to convey offsets of the vector array to
knowhere where vectors of multiple vector arrays are concatenated and we
need offsets to specify which vectors belong to which vector array.

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
2025-08-20 10:27:46 +08:00

181 lines
7.0 KiB
C++

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <type_traits>
#include "Array.h"
#include "Types.h"
#include "common/type_c.h"
#include "pb/common.pb.h"
#include "pb/plan.pb.h"
#include "pb/schema.pb.h"
namespace milvus {
#define GET_ELEM_TYPE_FOR_VECTOR_TRAIT \
using elem_type = std::conditional_t< \
std::is_same_v<TraitType, milvus::EmbListFloatVector>, \
milvus::EmbListFloatVector::embedded_type, \
std::conditional_t< \
std::is_same_v<TraitType, milvus::FloatVector>, \
milvus::FloatVector::embedded_type, \
std::conditional_t< \
std::is_same_v<TraitType, milvus::Float16Vector>, \
milvus::Float16Vector::embedded_type, \
std::conditional_t< \
std::is_same_v<TraitType, milvus::BFloat16Vector>, \
milvus::BFloat16Vector::embedded_type, \
std::conditional_t< \
std::is_same_v<TraitType, milvus::Int8Vector>, \
milvus::Int8Vector::embedded_type, \
milvus::BinaryVector::embedded_type>>>>>;
#define GET_SCHEMA_DATA_TYPE_FOR_VECTOR_TRAIT \
auto schema_data_type = \
std::is_same_v<TraitType, milvus::FloatVector> \
? milvus::FloatVector::schema_data_type \
: std::is_same_v<TraitType, milvus::Float16Vector> \
? milvus::Float16Vector::schema_data_type \
: std::is_same_v<TraitType, milvus::BFloat16Vector> \
? milvus::BFloat16Vector::schema_data_type \
: std::is_same_v<TraitType, milvus::Int8Vector> \
? milvus::Int8Vector::schema_data_type \
: milvus::BinaryVector::schema_data_type;
class VectorTrait {
public:
static constexpr bool
is_embedding_list() {
return false;
}
};
class FloatVector : public VectorTrait {
public:
using embedded_type = float;
static constexpr int32_t dim_factor = 1;
static constexpr auto data_type = DataType::VECTOR_FLOAT;
static constexpr auto c_data_type = CDataType::FloatVector;
static constexpr auto schema_data_type =
proto::schema::DataType::FloatVector;
static constexpr auto vector_type = proto::plan::VectorType::FloatVector;
static constexpr auto placeholder_type =
proto::common::PlaceholderType::FloatVector;
};
class BinaryVector : public VectorTrait {
public:
using embedded_type = uint8_t;
static constexpr int32_t dim_factor = 8;
static constexpr auto data_type = DataType::VECTOR_BINARY;
static constexpr auto c_data_type = CDataType::BinaryVector;
static constexpr auto schema_data_type =
proto::schema::DataType::BinaryVector;
static constexpr auto vector_type = proto::plan::VectorType::BinaryVector;
static constexpr auto placeholder_type =
proto::common::PlaceholderType::BinaryVector;
};
class Float16Vector : public VectorTrait {
public:
using embedded_type = float16;
static constexpr int32_t dim_factor = 1;
static constexpr auto data_type = DataType::VECTOR_FLOAT16;
static constexpr auto c_data_type = CDataType::Float16Vector;
static constexpr auto schema_data_type =
proto::schema::DataType::Float16Vector;
static constexpr auto vector_type = proto::plan::VectorType::Float16Vector;
static constexpr auto placeholder_type =
proto::common::PlaceholderType::Float16Vector;
};
class BFloat16Vector : public VectorTrait {
public:
using embedded_type = bfloat16;
static constexpr int32_t dim_factor = 1;
static constexpr auto data_type = DataType::VECTOR_BFLOAT16;
static constexpr auto c_data_type = CDataType::BFloat16Vector;
static constexpr auto schema_data_type =
proto::schema::DataType::BFloat16Vector;
static constexpr auto vector_type = proto::plan::VectorType::BFloat16Vector;
static constexpr auto placeholder_type =
proto::common::PlaceholderType::BFloat16Vector;
};
class SparseFloatVector : public VectorTrait {
public:
using embedded_type = float;
static constexpr int32_t dim_factor = 1;
static constexpr auto data_type = DataType::VECTOR_SPARSE_FLOAT;
static constexpr auto c_data_type = CDataType::SparseFloatVector;
static constexpr auto schema_data_type =
proto::schema::DataType::SparseFloatVector;
static constexpr auto vector_type =
proto::plan::VectorType::SparseFloatVector;
static constexpr auto placeholder_type =
proto::common::PlaceholderType::SparseFloatVector;
};
class Int8Vector : public VectorTrait {
public:
using embedded_type = int8;
static constexpr int32_t dim_factor = 1;
static constexpr auto data_type = DataType::VECTOR_INT8;
static constexpr auto c_data_type = CDataType::Int8Vector;
static constexpr auto schema_data_type =
proto::schema::DataType::Int8Vector;
static constexpr auto vector_type = proto::plan::VectorType::Int8Vector;
static constexpr auto placeholder_type =
proto::common::PlaceholderType::Int8Vector;
};
class EmbListFloatVector : public VectorTrait {
public:
using embedded_type = float;
static constexpr int32_t dim_factor = 1;
static constexpr auto data_type = DataType::VECTOR_ARRAY;
static constexpr auto c_data_type = CDataType::VectorArray;
static constexpr auto schema_data_type =
proto::schema::DataType::ArrayOfVector;
static constexpr auto vector_type =
proto::plan::VectorType::EmbListFloatVector;
static constexpr auto placeholder_type =
proto::common::PlaceholderType::EmbListFloatVector;
static constexpr bool
is_embedding_list() {
return true;
}
};
struct FundamentalTag {};
struct StringTag {};
template <class T>
struct TagDispatchTrait {
using Tag = FundamentalTag;
};
template <>
struct TagDispatchTrait<std::string> {
using Tag = StringTag;
};
} // namespace milvus