enhance: support readAt interface for remote input stream (#43997)

#42032 

Also, fix the cacheoptfield method to work in storagev2.
Also, change the sparse related interface for knowhere version bump
#43974 .
Also, includes https://github.com/milvus-io/milvus/pull/44046 for metric
lost.

---------

Signed-off-by: chasingegg <chao.gao@zilliz.com>
Signed-off-by: marcelo.chen <marcelo.chen@zilliz.com>
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
Co-authored-by: marcelo.chen <marcelo.chen@zilliz.com>
Co-authored-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
Gao 2025-08-26 11:19:58 +08:00 committed by GitHub
parent 8934c18792
commit e97a618630
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
66 changed files with 304 additions and 189 deletions

View File

@ -500,7 +500,7 @@ class SparseFloatVectorChunk : public Chunk {
reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num); reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num);
for (int i = 0; i < row_nums; i++) { for (int i = 0; i < row_nums; i++) {
vec_[i] = {(offsets_ptr[i + 1] - offsets_ptr[i]) / vec_[i] = {(offsets_ptr[i + 1] - offsets_ptr[i]) /
knowhere::sparse::SparseRow<float>::element_size(), knowhere::sparse::SparseRow<sparseValueType>::element_size(),
reinterpret_cast<uint8_t*>(data + offsets_ptr[i]), reinterpret_cast<uint8_t*>(data + offsets_ptr[i]),
false}; false};
dim_ = std::max(dim_, vec_[i].dim()); dim_ = std::max(dim_, vec_[i].dim());
@ -519,7 +519,7 @@ class SparseFloatVectorChunk : public Chunk {
} }
// only for test // only for test
std::vector<knowhere::sparse::SparseRow<float>>& std::vector<knowhere::sparse::SparseRow<sparseValueType>>&
Vec() { Vec() {
return vec_; return vec_;
} }
@ -531,6 +531,6 @@ class SparseFloatVectorChunk : public Chunk {
private: private:
int64_t dim_ = 0; int64_t dim_ = 0;
std::vector<knowhere::sparse::SparseRow<float>> vec_; std::vector<knowhere::sparse::SparseRow<sparseValueType>> vec_;
}; };
} // namespace milvus } // namespace milvus

View File

@ -447,7 +447,7 @@ create_chunk_writer(const FieldMeta& field_meta, Args&&... args) {
field_meta.get_element_type(), field_meta.get_element_type(),
std::forward<Args>(args)..., std::forward<Args>(args)...,
nullable); nullable);
case milvus::DataType::VECTOR_SPARSE_FLOAT: case milvus::DataType::VECTOR_SPARSE_U32_F32:
return std::make_shared<SparseFloatVectorChunkWriter>( return std::make_shared<SparseFloatVectorChunkWriter>(
std::forward<Args>(args)..., nullable); std::forward<Args>(args)..., nullable);
case milvus::DataType::VECTOR_ARRAY: case milvus::DataType::VECTOR_ARRAY:

View File

@ -284,11 +284,11 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
array); array);
return FillFieldData(array_info.first, array_info.second); return FillFieldData(array_info.first, array_info.second);
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
AssertInfo(array->type()->id() == arrow::Type::type::BINARY, AssertInfo(array->type()->id() == arrow::Type::type::BINARY,
"inconsistent data type"); "inconsistent data type");
auto arr = std::dynamic_pointer_cast<arrow::BinaryArray>(array); auto arr = std::dynamic_pointer_cast<arrow::BinaryArray>(array);
std::vector<knowhere::sparse::SparseRow<float>> values; std::vector<knowhere::sparse::SparseRow<sparseValueType>> values;
for (size_t index = 0; index < element_count; ++index) { for (size_t index = 0; index < element_count; ++index) {
auto view = arr->GetString(index); auto view = arr->GetString(index);
values.push_back( values.push_back(
@ -460,7 +460,7 @@ template class FieldDataImpl<int8_t, false>;
template class FieldDataImpl<float, false>; template class FieldDataImpl<float, false>;
template class FieldDataImpl<float16, false>; template class FieldDataImpl<float16, false>;
template class FieldDataImpl<bfloat16, false>; template class FieldDataImpl<bfloat16, false>;
template class FieldDataImpl<knowhere::sparse::SparseRow<float>, true>; template class FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true>;
template class FieldDataImpl<VectorArray, true>; template class FieldDataImpl<VectorArray, true>;
FieldDataPtr FieldDataPtr

View File

@ -723,14 +723,14 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
}; };
class FieldDataSparseVectorImpl class FieldDataSparseVectorImpl
: public FieldDataImpl<knowhere::sparse::SparseRow<float>, true> { : public FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true> {
public: public:
explicit FieldDataSparseVectorImpl(DataType data_type, explicit FieldDataSparseVectorImpl(DataType data_type,
int64_t total_num_rows = 0) int64_t total_num_rows = 0)
: FieldDataImpl<knowhere::sparse::SparseRow<float>, true>( : FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true>(
/*dim=*/1, data_type, false, total_num_rows), /*dim=*/1, data_type, false, total_num_rows),
vec_dim_(0) { vec_dim_(0) {
AssertInfo(data_type == DataType::VECTOR_SPARSE_FLOAT, AssertInfo(data_type == DataType::VECTOR_SPARSE_U32_F32,
"invalid data type for sparse vector"); "invalid data type for sparse vector");
} }
@ -753,7 +753,7 @@ class FieldDataSparseVectorImpl
} }
// source is a pointer to element_count of // source is a pointer to element_count of
// knowhere::sparse::SparseRow<float> // knowhere::sparse::SparseRow<sparseValueType>
void void
FillFieldData(const void* source, ssize_t element_count) override { FillFieldData(const void* source, ssize_t element_count) override {
if (element_count == 0) { if (element_count == 0) {
@ -765,7 +765,7 @@ class FieldDataSparseVectorImpl
resize_field_data(length_ + element_count); resize_field_data(length_ + element_count);
} }
auto ptr = auto ptr =
static_cast<const knowhere::sparse::SparseRow<float>*>(source); static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(source);
for (int64_t i = 0; i < element_count; ++i) { for (int64_t i = 0; i < element_count; ++i) {
auto& row = ptr[i]; auto& row = ptr[i];
vec_dim_ = std::max(vec_dim_, row.dim()); vec_dim_ = std::max(vec_dim_, row.dim());
@ -774,7 +774,7 @@ class FieldDataSparseVectorImpl
length_ += element_count; length_ += element_count;
} }
// each binary in array is a knowhere::sparse::SparseRow<float> // each binary in array is a knowhere::sparse::SparseRow<sparseValueType>
void void
FillFieldData(const std::shared_ptr<arrow::BinaryArray>& array) override { FillFieldData(const std::shared_ptr<arrow::BinaryArray>& array) override {
auto n = array->length(); auto n = array->length();

View File

@ -37,7 +37,7 @@ constexpr bool IsScalar =
template <typename T> template <typename T>
constexpr bool IsSparse = std::is_same_v<T, SparseFloatVector> || constexpr bool IsSparse = std::is_same_v<T, SparseFloatVector> ||
std::is_same_v<T, knowhere::sparse::SparseRow<float>>; std::is_same_v<T, knowhere::sparse::SparseRow<sparseValueType>>;
template <typename T> template <typename T>
constexpr bool IsVariableType = constexpr bool IsVariableType =
@ -52,7 +52,7 @@ template <typename T>
constexpr bool IsVariableTypeSupportInChunk = constexpr bool IsVariableTypeSupportInChunk =
std::is_same_v<T, std::string> || std::is_same_v<T, Array> || std::is_same_v<T, std::string> || std::is_same_v<T, Array> ||
std::is_same_v<T, Json> || std::is_same_v<T, Json> ||
std::is_same_v<T, knowhere::sparse::SparseRow<float>>; std::is_same_v<T, knowhere::sparse::SparseRow<sparseValueType>>;
template <typename T> template <typename T>
using ChunkViewType = std::conditional_t< using ChunkViewType = std::conditional_t<

View File

@ -63,6 +63,7 @@ using float16 = knowhere::fp16;
using bfloat16 = knowhere::bf16; using bfloat16 = knowhere::bf16;
using bin1 = knowhere::bin1; using bin1 = knowhere::bin1;
using int8 = knowhere::int8; using int8 = knowhere::int8;
using sparse_u32_f32 = knowhere::sparse_u32_f32;
// See also: https://github.com/milvus-io/milvus-proto/blob/master/proto/schema.proto // See also: https://github.com/milvus-io/milvus-proto/blob/master/proto/schema.proto
enum class DataType { enum class DataType {
@ -91,7 +92,7 @@ enum class DataType {
VECTOR_FLOAT = 101, VECTOR_FLOAT = 101,
VECTOR_FLOAT16 = 102, VECTOR_FLOAT16 = 102,
VECTOR_BFLOAT16 = 103, VECTOR_BFLOAT16 = 103,
VECTOR_SPARSE_FLOAT = 104, VECTOR_SPARSE_U32_F32 = 104,
VECTOR_INT8 = 105, VECTOR_INT8 = 105,
VECTOR_ARRAY = 106, VECTOR_ARRAY = 106,
}; };
@ -139,7 +140,7 @@ GetDataTypeSize(DataType data_type, int dim = 1) {
return sizeof(bfloat16) * dim; return sizeof(bfloat16) * dim;
case DataType::VECTOR_INT8: case DataType::VECTOR_INT8:
return sizeof(int8) * dim; return sizeof(int8) * dim;
// Not supporting variable length types(such as VECTOR_SPARSE_FLOAT and // Not supporting variable length types(such as VECTOR_SPARSE_U32_F32 and
// VARCHAR) here intentionally. We can't easily estimate the size of // VARCHAR) here intentionally. We can't easily estimate the size of
// them. Caller of this method must handle this case themselves and must // them. Caller of this method must handle this case themselves and must
// not pass variable length types to this method. // not pass variable length types to this method.
@ -184,7 +185,7 @@ GetArrowDataType(DataType data_type, int dim = 1) {
case DataType::VECTOR_FLOAT16: case DataType::VECTOR_FLOAT16:
case DataType::VECTOR_BFLOAT16: case DataType::VECTOR_BFLOAT16:
return arrow::fixed_size_binary(dim * 2); return arrow::fixed_size_binary(dim * 2);
case DataType::VECTOR_SPARSE_FLOAT: case DataType::VECTOR_SPARSE_U32_F32:
return arrow::binary(); return arrow::binary();
case DataType::VECTOR_INT8: case DataType::VECTOR_INT8:
return arrow::fixed_size_binary(dim); return arrow::fixed_size_binary(dim);
@ -244,8 +245,8 @@ GetDataTypeName(DataType data_type) {
return "vector_float16"; return "vector_float16";
case DataType::VECTOR_BFLOAT16: case DataType::VECTOR_BFLOAT16:
return "vector_bfloat16"; return "vector_bfloat16";
case DataType::VECTOR_SPARSE_FLOAT: case DataType::VECTOR_SPARSE_U32_F32:
return "vector_sparse_float"; return "VECTOR_SPARSE_U32_F32";
case DataType::VECTOR_INT8: case DataType::VECTOR_INT8:
return "vector_int8"; return "vector_int8";
case DataType::VECTOR_ARRAY: case DataType::VECTOR_ARRAY:
@ -386,7 +387,7 @@ IsDenseFloatVectorDataType(DataType data_type) {
inline bool inline bool
IsSparseFloatVectorDataType(DataType data_type) { IsSparseFloatVectorDataType(DataType data_type) {
return data_type == DataType::VECTOR_SPARSE_FLOAT; return data_type == DataType::VECTOR_SPARSE_U32_F32;
} }
inline bool inline bool
@ -749,8 +750,8 @@ struct fmt::formatter<milvus::DataType> : formatter<string_view> {
case milvus::DataType::VECTOR_BFLOAT16: case milvus::DataType::VECTOR_BFLOAT16:
name = "VECTOR_BFLOAT16"; name = "VECTOR_BFLOAT16";
break; break;
case milvus::DataType::VECTOR_SPARSE_FLOAT: case milvus::DataType::VECTOR_SPARSE_U32_F32:
name = "VECTOR_SPARSE_FLOAT"; name = "VECTOR_SPARSE_U32_F32";
break; break;
case milvus::DataType::VECTOR_INT8: case milvus::DataType::VECTOR_INT8:
name = "VECTOR_INT8"; name = "VECTOR_INT8";

View File

@ -43,6 +43,7 @@ namespace milvus {
(data_array->vectors().type##_vector().data()) (data_array->vectors().type##_vector().data())
using CheckDataValid = std::function<bool(size_t)>; using CheckDataValid = std::function<bool(size_t)>;
using sparseValueType = typename knowhere::sparse_u32_f32::ValueType;
inline DatasetPtr inline DatasetPtr
GenDataset(const int64_t nb, const int64_t dim, const void* xb) { GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
@ -245,17 +246,17 @@ EscapeBraces(const std::string& input) {
return result; return result;
} }
inline knowhere::sparse::SparseRow<float> inline knowhere::sparse::SparseRow<sparseValueType>
CopyAndWrapSparseRow(const void* data, CopyAndWrapSparseRow(const void* data,
size_t size, size_t size,
const bool validate = false) { const bool validate = false) {
size_t num_elements = size_t num_elements =
size / knowhere::sparse::SparseRow<float>::element_size(); size / knowhere::sparse::SparseRow<sparseValueType>::element_size();
knowhere::sparse::SparseRow<float> row(num_elements); knowhere::sparse::SparseRow<sparseValueType> row(num_elements);
std::memcpy(row.data(), data, size); std::memcpy(row.data(), data, size);
if (validate) { if (validate) {
AssertInfo( AssertInfo(
size % knowhere::sparse::SparseRow<float>::element_size() == 0, size % knowhere::sparse::SparseRow<sparseValueType>::element_size() == 0,
"Invalid size for sparse row data"); "Invalid size for sparse row data");
for (size_t i = 0; i < num_elements; ++i) { for (size_t i = 0; i < num_elements; ++i) {
auto element = row[i]; auto element = row[i];
@ -276,17 +277,17 @@ CopyAndWrapSparseRow(const void* data,
// Iterable is a list of bytes, each is a byte array representation of a single // Iterable is a list of bytes, each is a byte array representation of a single
// sparse float row. This helper function converts such byte arrays into a list // sparse float row. This helper function converts such byte arrays into a list
// of knowhere::sparse::SparseRow<float>. The resulting list is a deep copy of // of knowhere::sparse::SparseRow<sparseValueType>. The resulting list is a deep copy of
// the source data. // the source data.
// //
// Here in segcore we validate the sparse row data only for search requests, // Here in segcore we validate the sparse row data only for search requests,
// as the insert/upsert data are already validated in go code. // as the insert/upsert data are already validated in go code.
template <typename Iterable> template <typename Iterable>
std::unique_ptr<knowhere::sparse::SparseRow<float>[]> std::unique_ptr<knowhere::sparse::SparseRow<sparseValueType>[]>
SparseBytesToRows(const Iterable& rows, const bool validate = false) { SparseBytesToRows(const Iterable& rows, const bool validate = false) {
AssertInfo(rows.size() > 0, "at least 1 sparse row should be provided"); AssertInfo(rows.size() > 0, "at least 1 sparse row should be provided");
auto res = auto res =
std::make_unique<knowhere::sparse::SparseRow<float>[]>(rows.size()); std::make_unique<knowhere::sparse::SparseRow<sparseValueType>[]>(rows.size());
for (size_t i = 0; i < rows.size(); ++i) { for (size_t i = 0; i < rows.size(); ++i) {
res[i] = std::move( res[i] = std::move(
CopyAndWrapSparseRow(rows[i].data(), rows[i].size(), validate)); CopyAndWrapSparseRow(rows[i].data(), rows[i].size(), validate));
@ -294,11 +295,11 @@ SparseBytesToRows(const Iterable& rows, const bool validate = false) {
return res; return res;
} }
// SparseRowsToProto converts a list of knowhere::sparse::SparseRow<float> to // SparseRowsToProto converts a list of knowhere::sparse::SparseRow<sparseValueType> to
// a milvus::proto::schema::SparseFloatArray. The resulting proto is a deep copy // a milvus::proto::schema::SparseFloatArray. The resulting proto is a deep copy
// of the source data. source(i) returns the i-th row to be copied. // of the source data. source(i) returns the i-th row to be copied.
inline void SparseRowsToProto( inline void SparseRowsToProto(
const std::function<const knowhere::sparse::SparseRow<float>*(size_t)>& const std::function<const knowhere::sparse::SparseRow<sparseValueType>*(size_t)>&
source, source,
int64_t rows, int64_t rows,
milvus::proto::schema::SparseFloatArray* proto) { milvus::proto::schema::SparseFloatArray* proto) {

View File

@ -122,7 +122,7 @@ class SparseFloatVector : public VectorTrait {
public: public:
using embedded_type = float; using embedded_type = float;
static constexpr int32_t dim_factor = 1; static constexpr int32_t dim_factor = 1;
static constexpr auto data_type = DataType::VECTOR_SPARSE_FLOAT; static constexpr auto data_type = DataType::VECTOR_SPARSE_U32_F32;
static constexpr auto c_data_type = CDataType::SparseFloatVector; static constexpr auto c_data_type = CDataType::SparseFloatVector;
static constexpr auto schema_data_type = static constexpr auto schema_data_type =
proto::schema::DataType::SparseFloatVector; proto::schema::DataType::SparseFloatVector;

View File

@ -93,6 +93,11 @@ KnowhereInitSearchThreadPool(const uint32_t num_threads) {
} }
} }
void
KnowhereInitFetchThreadPool(const uint32_t num_threads) {
knowhere::KnowhereConfig::SetFetchThreadPoolSize(num_threads);
}
void void
KnowhereInitGPUMemoryPool(const uint32_t init_size, const uint32_t max_size) { KnowhereInitGPUMemoryPool(const uint32_t init_size, const uint32_t max_size) {
if (init_size == 0 && max_size == 0) { if (init_size == 0 && max_size == 0) {

View File

@ -35,6 +35,9 @@ KnowhereInitBuildThreadPool(const uint32_t);
void void
KnowhereInitSearchThreadPool(const uint32_t); KnowhereInitSearchThreadPool(const uint32_t);
void
KnowhereInitFetchThreadPool(const uint32_t);
int32_t int32_t
GetMinimalIndexVersion(); GetMinimalIndexVersion();

View File

@ -184,9 +184,9 @@ IndexFactory::VecIndexLoadResource(
knowhere::IndexStaticFaced<knowhere::bf16>::HasRawData( knowhere::IndexStaticFaced<knowhere::bf16>::HasRawData(
index_type, index_version, config); index_type, index_version, config);
break; break;
case milvus::DataType::VECTOR_SPARSE_FLOAT: case milvus::DataType::VECTOR_SPARSE_U32_F32:
resource = knowhere::IndexStaticFaced< resource = knowhere::IndexStaticFaced<
knowhere::fp32>::EstimateLoadResource(index_type, knowhere::sparse_u32_f32>::EstimateLoadResource(index_type,
index_version, index_version,
index_size_gb, index_size_gb,
config); config);
@ -516,8 +516,8 @@ IndexFactory::CreateVectorIndex(
return std::make_unique<VectorDiskAnnIndex<bin1>>( return std::make_unique<VectorDiskAnnIndex<bin1>>(
index_type, metric_type, version, file_manager_context); index_type, metric_type, version, file_manager_context);
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
return std::make_unique<VectorDiskAnnIndex<float>>( return std::make_unique<VectorDiskAnnIndex<sparse_u32_f32>>(
index_type, metric_type, version, file_manager_context); index_type, metric_type, version, file_manager_context);
} }
case DataType::VECTOR_ARRAY: { case DataType::VECTOR_ARRAY: {
@ -537,8 +537,7 @@ IndexFactory::CreateVectorIndex(
} }
} else { // create mem index } else { // create mem index
switch (data_type) { switch (data_type) {
case DataType::VECTOR_FLOAT: case DataType::VECTOR_FLOAT: {
case DataType::VECTOR_SPARSE_FLOAT: {
return std::make_unique<VectorMemIndex<float>>( return std::make_unique<VectorMemIndex<float>>(
DataType::NONE, DataType::NONE,
index_type, index_type,
@ -547,6 +546,15 @@ IndexFactory::CreateVectorIndex(
use_knowhere_build_pool, use_knowhere_build_pool,
file_manager_context); file_manager_context);
} }
case DataType::VECTOR_SPARSE_U32_F32: {
return std::make_unique<VectorMemIndex<sparse_u32_f32>>(
DataType::NONE,
index_type,
metric_type,
version,
use_knowhere_build_pool,
file_manager_context);
}
case DataType::VECTOR_BINARY: { case DataType::VECTOR_BINARY: {
return std::make_unique<VectorMemIndex<bin1>>( return std::make_unique<VectorMemIndex<bin1>>(
DataType::NONE, DataType::NONE,
@ -596,11 +604,19 @@ IndexFactory::CreateVectorIndex(
version, version,
use_knowhere_build_pool, use_knowhere_build_pool,
file_manager_context); file_manager_context);
case DataType::VECTOR_SPARSE_U32_F32:
return std::make_unique<VectorMemIndex<sparse_u32_f32>>(
element_type,
index_type,
metric_type,
version,
use_knowhere_build_pool,
file_manager_context);
default: default:
ThrowInfo(NotImplemented, ThrowInfo(NotImplemented,
fmt::format("not implemented data type to " fmt::format("not implemented data type to "
"build mem index: {}", "build mem index: {}",
data_type)); element_type));
} }
} }
default: default:

View File

@ -168,7 +168,7 @@ VectorDiskAnnIndex<T>::Build(const Config& config) {
index_.IsAdditionalScalarSupported( index_.IsAdditionalScalarSupported(
is_partition_key_isolation.value_or(false))) { is_partition_key_isolation.value_or(false))) {
build_config[VEC_OPT_FIELDS_PATH] = build_config[VEC_OPT_FIELDS_PATH] =
file_manager_->CacheOptFieldToDisk(opt_fields.value()); file_manager_->CacheOptFieldToDisk(config);
// `partition_key_isolation` is already in the config, so it falls through // `partition_key_isolation` is already in the config, so it falls through
// into the index Build call directly // into the index Build call directly
} }
@ -415,5 +415,6 @@ template class VectorDiskAnnIndex<float>;
template class VectorDiskAnnIndex<float16>; template class VectorDiskAnnIndex<float16>;
template class VectorDiskAnnIndex<bfloat16>; template class VectorDiskAnnIndex<bfloat16>;
template class VectorDiskAnnIndex<bin1>; template class VectorDiskAnnIndex<bin1>;
template class VectorDiskAnnIndex<sparse_u32_f32>;
} // namespace milvus::index } // namespace milvus::index

View File

@ -80,7 +80,7 @@ class VectorDiskAnnIndex : public VectorIndex {
std::vector<uint8_t> std::vector<uint8_t>
GetVector(const DatasetPtr dataset) const override; GetVector(const DatasetPtr dataset) const override;
std::unique_ptr<const knowhere::sparse::SparseRow<float>[]> std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
GetSparseVector(const DatasetPtr dataset) const override { GetSparseVector(const DatasetPtr dataset) const override {
ThrowInfo(ErrorCode::Unsupported, ThrowInfo(ErrorCode::Unsupported,
"get sparse vector not supported for disk index"); "get sparse vector not supported for disk index");

View File

@ -76,7 +76,7 @@ class VectorIndex : public IndexBase {
virtual std::vector<uint8_t> virtual std::vector<uint8_t>
GetVector(const DatasetPtr dataset) const = 0; GetVector(const DatasetPtr dataset) const = 0;
virtual std::unique_ptr<const knowhere::sparse::SparseRow<float>[]> virtual std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
GetSparseVector(const DatasetPtr dataset) const = 0; GetSparseVector(const DatasetPtr dataset) const = 0;
IndexType IndexType

View File

@ -426,10 +426,10 @@ VectorMemIndex<T>::Build(const Config& config) {
field_data) field_data)
->Dim()); ->Dim());
} }
std::vector<knowhere::sparse::SparseRow<float>> vec(total_rows); std::vector<knowhere::sparse::SparseRow<sparseValueType>> vec(total_rows);
int64_t offset = 0; int64_t offset = 0;
for (auto field_data : field_datas) { for (auto field_data : field_datas) {
auto ptr = static_cast<const knowhere::sparse::SparseRow<float>*>( auto ptr = static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
field_data->Data()); field_data->Data());
AssertInfo(ptr, "failed to cast field data to sparse rows"); AssertInfo(ptr, "failed to cast field data to sparse rows");
for (size_t i = 0; i < field_data->Length(); ++i) { for (size_t i = 0; i < field_data->Length(); ++i) {
@ -570,7 +570,7 @@ VectorMemIndex<T>::GetVector(const DatasetPtr dataset) const {
} }
template <typename T> template <typename T>
std::unique_ptr<const knowhere::sparse::SparseRow<float>[]> std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const { VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const {
auto res = index_.GetVectorByIds(dataset); auto res = index_.GetVectorByIds(dataset);
if (!res.has_value()) { if (!res.has_value()) {
@ -579,8 +579,8 @@ VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const {
} }
// release and transfer ownership to the result unique ptr. // release and transfer ownership to the result unique ptr.
res.value()->SetIsOwner(false); res.value()->SetIsOwner(false);
return std::unique_ptr<const knowhere::sparse::SparseRow<float>[]>( return std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>(
static_cast<const knowhere::sparse::SparseRow<float>*>( static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
res.value()->GetTensor())); res.value()->GetTensor()));
} }
@ -751,5 +751,6 @@ template class VectorMemIndex<bin1>;
template class VectorMemIndex<float16>; template class VectorMemIndex<float16>;
template class VectorMemIndex<bfloat16>; template class VectorMemIndex<bfloat16>;
template class VectorMemIndex<int8>; template class VectorMemIndex<int8>;
template class VectorMemIndex<sparse_u32_f32>;
} // namespace milvus::index } // namespace milvus::index

View File

@ -87,7 +87,7 @@ class VectorMemIndex : public VectorIndex {
std::vector<uint8_t> std::vector<uint8_t>
GetVector(const DatasetPtr dataset) const override; GetVector(const DatasetPtr dataset) const override;
std::unique_ptr<const knowhere::sparse::SparseRow<float>[]> std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
GetSparseVector(const DatasetPtr dataset) const override; GetSparseVector(const DatasetPtr dataset) const override;
IndexStatsPtr IndexStatsPtr

View File

@ -68,7 +68,7 @@ class IndexFactory {
case DataType::VECTOR_FLOAT16: case DataType::VECTOR_FLOAT16:
case DataType::VECTOR_BFLOAT16: case DataType::VECTOR_BFLOAT16:
case DataType::VECTOR_BINARY: case DataType::VECTOR_BINARY:
case DataType::VECTOR_SPARSE_FLOAT: case DataType::VECTOR_SPARSE_U32_F32:
case DataType::VECTOR_INT8: case DataType::VECTOR_INT8:
case DataType::VECTOR_ARRAY: case DataType::VECTOR_ARRAY:
return std::make_unique<VecIndexCreator>(type, config, context); return std::make_unique<VecIndexCreator>(type, config, context);

View File

@ -134,8 +134,8 @@ VariableLengthChunk<std::string>::set(
// Template specialization for sparse vector // Template specialization for sparse vector
template <> template <>
inline void inline void
VariableLengthChunk<knowhere::sparse::SparseRow<float>>::set( VariableLengthChunk<knowhere::sparse::SparseRow<sparseValueType>>::set(
const knowhere::sparse::SparseRow<float>* src, const knowhere::sparse::SparseRow<sparseValueType>* src,
uint32_t begin, uint32_t begin,
uint32_t length, uint32_t length,
const std::optional<CheckDataValid>& check_data_valid) { const std::optional<CheckDataValid>& check_data_valid) {
@ -158,7 +158,7 @@ VariableLengthChunk<knowhere::sparse::SparseRow<float>>::set(
uint8_t* data_ptr = buf + offset; uint8_t* data_ptr = buf + offset;
std::memcpy(data_ptr, (uint8_t*)src[i].data(), data_size); std::memcpy(data_ptr, (uint8_t*)src[i].data(), data_size);
data_[i + begin] = data_[i + begin] =
knowhere::sparse::SparseRow<float>(src[i].size(), data_ptr, false); knowhere::sparse::SparseRow<sparseValueType>(src[i].size(), data_ptr, false);
offset += data_size; offset += data_size;
} }
} }

View File

@ -16,9 +16,9 @@
char* char*
GetCoreMetrics() { GetCoreMetrics() {
auto str = milvus::monitor::prometheusClient->GetMetrics(); auto str = milvus::monitor::getPrometheusClient().GetMetrics();
auto len = str.length(); auto len = str.length();
char* res = (char*)malloc(len + 1); char* res = static_cast<char*>(malloc(len + 1));
memcpy(res, str.data(), len); memcpy(res, str.data(), len);
res[len] = '\0'; res[len] = '\0';
return res; return res;

View File

@ -27,10 +27,11 @@ const prometheus::Histogram::BucketBoundaries cgoCallDurationbuckets = {
// One histogram per function name (label) // One histogram per function name (label)
static inline prometheus::Histogram& static inline prometheus::Histogram&
GetHistogram(std::string&& func) { GetHistogram(std::string&& func) {
static auto& hist_family = prometheus::BuildHistogram() static auto& hist_family =
prometheus::BuildHistogram()
.Name("milvus_cgocall_duration_seconds") .Name("milvus_cgocall_duration_seconds")
.Help("Duration of cgo-exposed functions") .Help("Duration of cgo-exposed functions")
.Register(prometheusClient->GetRegistry()); .Register(getPrometheusClient().GetRegistry());
// default buckets: [0.005, 0.01, ..., 1.0] // default buckets: [0.005, 0.01, ..., 1.0]
return hist_family.Add({{"func", func}}, cgoCallDurationbuckets); return hist_family.Add({{"func", func}}, cgoCallDurationbuckets);

View File

@ -23,6 +23,7 @@
#include "common/Json.h" #include "common/Json.h"
#include "common/Consts.h" #include "common/Consts.h"
#include "common/Schema.h" #include "common/Schema.h"
#include "common/Utils.h"
namespace milvus::query { namespace milvus::query {
@ -80,7 +81,7 @@ struct Placeholder {
// only one of blob_ and sparse_matrix_ should be set. blob_ is used for // only one of blob_ and sparse_matrix_ should be set. blob_ is used for
// dense vector search and sparse_matrix_ is for sparse vector search. // dense vector search and sparse_matrix_ is for sparse vector search.
aligned_vector<char> blob_; aligned_vector<char> blob_;
std::unique_ptr<knowhere::sparse::SparseRow<float>[]> sparse_matrix_; std::unique_ptr<knowhere::sparse::SparseRow<sparseValueType>[]> sparse_matrix_;
// offsets for embedding list // offsets for embedding list
aligned_vector<size_t> lims_; aligned_vector<size_t> lims_;

View File

@ -106,7 +106,7 @@ PrepareBFDataSet(const dataset::SearchDataset& query_ds,
query_dataset->SetRows(query_ds.query_lims[query_ds.num_queries]); query_dataset->SetRows(query_ds.query_lims[query_ds.num_queries]);
} }
if (data_type == DataType::VECTOR_SPARSE_FLOAT) { if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
base_dataset->SetIsSparse(true); base_dataset->SetIsSparse(true);
query_dataset->SetIsSparse(true); query_dataset->SetIsSparse(true);
} }
@ -168,9 +168,9 @@ BruteForceSearch(const dataset::SearchDataset& query_ds,
} else if (data_type == DataType::VECTOR_BINARY) { } else if (data_type == DataType::VECTOR_BINARY) {
res = knowhere::BruteForce::RangeSearch<bin1>( res = knowhere::BruteForce::RangeSearch<bin1>(
base_dataset, query_dataset, search_cfg, bitset); base_dataset, query_dataset, search_cfg, bitset);
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) { } else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
res = knowhere::BruteForce::RangeSearch< res = knowhere::BruteForce::RangeSearch<
knowhere::sparse::SparseRow<float>>( knowhere::sparse::SparseRow<sparseValueType>>(
base_dataset, query_dataset, search_cfg, bitset); base_dataset, query_dataset, search_cfg, bitset);
} else if (data_type == DataType::VECTOR_INT8) { } else if (data_type == DataType::VECTOR_INT8) {
res = knowhere::BruteForce::RangeSearch<int8>( res = knowhere::BruteForce::RangeSearch<int8>(
@ -229,7 +229,7 @@ BruteForceSearch(const dataset::SearchDataset& query_ds,
sub_result.mutable_distances().data(), sub_result.mutable_distances().data(),
search_cfg, search_cfg,
bitset); bitset);
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) { } else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
stat = knowhere::BruteForce::SearchSparseWithBuf( stat = knowhere::BruteForce::SearchSparseWithBuf(
base_dataset, base_dataset,
query_dataset, query_dataset,
@ -279,9 +279,9 @@ DispatchBruteForceIteratorByDataType(const knowhere::DataSetPtr& base_dataset,
case DataType::VECTOR_BFLOAT16: case DataType::VECTOR_BFLOAT16:
return knowhere::BruteForce::AnnIterator<bfloat16>( return knowhere::BruteForce::AnnIterator<bfloat16>(
base_dataset, query_dataset, config, bitset); base_dataset, query_dataset, config, bitset);
case DataType::VECTOR_SPARSE_FLOAT: case DataType::VECTOR_SPARSE_U32_F32:
return knowhere::BruteForce::AnnIterator< return knowhere::BruteForce::AnnIterator<
knowhere::sparse::SparseRow<float>>( knowhere::sparse::SparseRow<sparseValueType>>(
base_dataset, query_dataset, config, bitset); base_dataset, query_dataset, config, bitset);
case DataType::VECTOR_INT8: case DataType::VECTOR_INT8:
return knowhere::BruteForce::AnnIterator<int8>( return knowhere::BruteForce::AnnIterator<int8>(

View File

@ -38,13 +38,13 @@ FloatSegmentIndexSearch(const segcore::SegmentGrowingImpl& segment,
auto vecfield_id = info.field_id_; auto vecfield_id = info.field_id_;
auto& field = schema[vecfield_id]; auto& field = schema[vecfield_id];
auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT; auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_U32_F32;
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder. // TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
auto dim = is_sparse ? 0 : field.get_dim(); auto dim = is_sparse ? 0 : field.get_dim();
AssertInfo(IsVectorDataType(field.get_data_type()), AssertInfo(IsVectorDataType(field.get_data_type()),
"[FloatSearch]Field data type isn't VECTOR_FLOAT, " "[FloatSearch]Field data type isn't VECTOR_FLOAT, "
"VECTOR_FLOAT16, VECTOR_BFLOAT16 or VECTOR_SPARSE_FLOAT"); "VECTOR_FLOAT16, VECTOR_BFLOAT16 or VECTOR_SPARSE_U32_F32");
dataset::SearchDataset search_dataset{info.metric_type_, dataset::SearchDataset search_dataset{info.metric_type_,
num_queries, num_queries,
info.topk_, info.topk_,
@ -119,7 +119,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
} }
SubSearchResult final_qr(num_queries, topk, metric_type, round_decimal); SubSearchResult final_qr(num_queries, topk, metric_type, round_decimal);
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder. // TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
auto dim = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT auto dim = field.get_data_type() == DataType::VECTOR_SPARSE_U32_F32
? 0 ? 0
: field.get_dim(); : field.get_dim();
dataset::SearchDataset search_dataset{metric_type, dataset::SearchDataset search_dataset{metric_type,

View File

@ -40,7 +40,7 @@ SearchOnSealedIndex(const Schema& schema,
auto field_id = search_info.field_id_; auto field_id = search_info.field_id_;
auto& field = schema[field_id]; auto& field = schema[field_id];
auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT; auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_U32_F32;
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder. // TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
auto dim = is_sparse ? 0 : field.get_dim(); auto dim = is_sparse ? 0 : field.get_dim();
@ -115,7 +115,7 @@ SearchOnSealedColumn(const Schema& schema,
auto data_type = field.get_data_type(); auto data_type = field.get_data_type();
auto element_type = field.get_element_type(); auto element_type = field.get_element_type();
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder. // TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
auto dim = data_type == DataType::VECTOR_SPARSE_FLOAT ? 0 : field.get_dim(); auto dim = data_type == DataType::VECTOR_SPARSE_U32_F32 ? 0 : field.get_dim();
query::dataset::SearchDataset query_dataset{search_info.metric_type_, query::dataset::SearchDataset query_dataset{search_info.metric_type_,
num_queries, num_queries,

View File

@ -813,7 +813,7 @@ ChunkedSegmentSealedImpl::get_vector(FieldId field_id,
if (has_raw_data) { if (has_raw_data) {
// If index has raw data, get vector from memory. // If index has raw data, get vector from memory.
auto ids_ds = GenIdsDataset(count, ids); auto ids_ds = GenIdsDataset(count, ids);
if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) { if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_U32_F32) {
auto res = vec_index->GetSparseVector(ids_ds); auto res = vec_index->GetSparseVector(ids_ds);
return segcore::CreateVectorDataArrayFrom( return segcore::CreateVectorDataArrayFrom(
res.get(), count, field_meta); res.get(), count, field_meta);
@ -1752,7 +1752,7 @@ ChunkedSegmentSealedImpl::get_raw_data(FieldId field_id,
ret->mutable_vectors()->mutable_int8_vector()->data()); ret->mutable_vectors()->mutable_int8_vector()->data());
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
auto dst = ret->mutable_vectors()->mutable_sparse_float_vector(); auto dst = ret->mutable_vectors()->mutable_sparse_float_vector();
int64_t max_dim = 0; int64_t max_dim = 0;
column->BulkValueAt( column->BulkValueAt(
@ -1761,7 +1761,7 @@ ChunkedSegmentSealedImpl::get_raw_data(FieldId field_id,
auto row = auto row =
offset != INVALID_SEG_OFFSET offset != INVALID_SEG_OFFSET
? static_cast< ? static_cast<
const knowhere::sparse::SparseRow<float>*>( const knowhere::sparse::SparseRow<sparseValueType>*>(
static_cast<const void*>(value)) static_cast<const void*>(value))
: nullptr; : nullptr;
if (row == nullptr) { if (row == nullptr) {
@ -2108,7 +2108,7 @@ ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id,
auto& index_params = field_index_meta.GetIndexParams(); auto& index_params = field_index_meta.GetIndexParams();
bool is_sparse = bool is_sparse =
field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT; field_meta.get_data_type() == DataType::VECTOR_SPARSE_U32_F32;
bool enable_growing_mmap = storage::MmapManager::GetInstance() bool enable_growing_mmap = storage::MmapManager::GetInstance()
.GetMmapConfig() .GetMmapConfig()

View File

@ -37,7 +37,7 @@ VectorBase::set_data_raw(ssize_t element_offset,
return set_data_raw( return set_data_raw(
element_offset, VEC_FIELD_DATA(data, bfloat16), element_count); element_offset, VEC_FIELD_DATA(data, bfloat16), element_count);
} else if (field_meta.get_data_type() == } else if (field_meta.get_data_type() ==
DataType::VECTOR_SPARSE_FLOAT) { DataType::VECTOR_SPARSE_U32_F32) {
return set_data_raw( return set_data_raw(
element_offset, element_offset,
SparseBytesToRows( SparseBytesToRows(

View File

@ -504,13 +504,13 @@ class ConcurrentVector<VectorArray>
template <> template <>
class ConcurrentVector<SparseFloatVector> class ConcurrentVector<SparseFloatVector>
: public ConcurrentVectorImpl<knowhere::sparse::SparseRow<float>, true> { : public ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>, true> {
public: public:
explicit ConcurrentVector( explicit ConcurrentVector(
int64_t size_per_chunk, int64_t size_per_chunk,
storage::MmapChunkDescriptorPtr mmap_descriptor = nullptr, storage::MmapChunkDescriptorPtr mmap_descriptor = nullptr,
ThreadSafeValidDataPtr valid_data_ptr = nullptr) ThreadSafeValidDataPtr valid_data_ptr = nullptr)
: ConcurrentVectorImpl<knowhere::sparse::SparseRow<float>, : ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>,
true>::ConcurrentVectorImpl(1, true>::ConcurrentVectorImpl(1,
size_per_chunk, size_per_chunk,
std::move( std::move(
@ -524,11 +524,11 @@ class ConcurrentVector<SparseFloatVector>
const void* source, const void* source,
ssize_t element_count) override { ssize_t element_count) override {
auto* src = auto* src =
static_cast<const knowhere::sparse::SparseRow<float>*>(source); static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(source);
for (int i = 0; i < element_count; ++i) { for (int i = 0; i < element_count; ++i) {
dim_ = std::max(dim_, src[i].dim()); dim_ = std::max(dim_, src[i].dim());
} }
ConcurrentVectorImpl<knowhere::sparse::SparseRow<float>, ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>,
true>::set_data_raw(element_offset, true>::set_data_raw(element_offset,
source, source,
element_count); element_count);

View File

@ -46,7 +46,7 @@ void
VectorFieldIndexing::recreate_index(DataType data_type, VectorFieldIndexing::recreate_index(DataType data_type,
const VectorBase* field_raw_data) { const VectorBase* field_raw_data) {
if (IsSparseFloatVectorDataType(data_type)) { if (IsSparseFloatVectorDataType(data_type)) {
index_ = std::make_unique<index::VectorMemIndex<float>>( index_ = std::make_unique<index::VectorMemIndex<sparse_u32_f32>>(
DataType::NONE, DataType::NONE,
config_->GetIndexType(), config_->GetIndexType(),
config_->GetMetricType(), config_->GetMetricType(),
@ -150,7 +150,7 @@ VectorFieldIndexing::AppendSegmentIndexSparse(int64_t reserved_offset,
auto dim = source->Dim(); auto dim = source->Dim();
while (total_rows > 0) { while (total_rows > 0) {
auto mat = static_cast<const knowhere::sparse::SparseRow<float>*>( auto mat = static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
source->get_chunk_data(chunk_id)); source->get_chunk_data(chunk_id));
auto rows = std::min(source->get_size_per_chunk(), total_rows); auto rows = std::min(source->get_size_per_chunk(), total_rows);
auto dataset = knowhere::GenDataSet(rows, dim, mat); auto dataset = knowhere::GenDataSet(rows, dim, mat);
@ -336,7 +336,7 @@ CreateIndex(const FieldMeta& field_meta,
field_meta.get_data_type() == DataType::VECTOR_FLOAT16 || field_meta.get_data_type() == DataType::VECTOR_FLOAT16 ||
field_meta.get_data_type() == DataType::VECTOR_BFLOAT16 || field_meta.get_data_type() == DataType::VECTOR_BFLOAT16 ||
field_meta.get_data_type() == DataType::VECTOR_INT8 || field_meta.get_data_type() == DataType::VECTOR_INT8 ||
field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) { field_meta.get_data_type() == DataType::VECTOR_SPARSE_U32_F32) {
return std::make_unique<VectorFieldIndexing>(field_meta, return std::make_unique<VectorFieldIndexing>(field_meta,
field_index_meta, field_index_meta,
segment_max_row_count, segment_max_row_count,

View File

@ -345,7 +345,7 @@ class IndexingRecord {
size, size,
field_raw_data, field_raw_data,
stream_data->vectors().bfloat16_vector().data()); stream_data->vectors().bfloat16_vector().data());
} else if (type == DataType::VECTOR_SPARSE_FLOAT) { } else if (type == DataType::VECTOR_SPARSE_U32_F32) {
auto data = SparseBytesToRows( auto data = SparseBytesToRows(
stream_data->vectors().sparse_float_vector().contents()); stream_data->vectors().sparse_float_vector().contents());
indexing->AppendSegmentIndexSparse( indexing->AppendSegmentIndexSparse(
@ -378,7 +378,7 @@ class IndexingRecord {
auto vec_base = record.get_data_base(fieldId); auto vec_base = record.get_data_base(fieldId);
indexing->AppendSegmentIndexDense( indexing->AppendSegmentIndexDense(
reserved_offset, size, vec_base, data->Data()); reserved_offset, size, vec_base, data->Data());
} else if (type == DataType::VECTOR_SPARSE_FLOAT) { } else if (type == DataType::VECTOR_SPARSE_U32_F32) {
auto vec_base = record.get_data_base(fieldId); auto vec_base = record.get_data_base(fieldId);
indexing->AppendSegmentIndexSparse( indexing->AppendSegmentIndexSparse(
reserved_offset, reserved_offset,
@ -406,7 +406,7 @@ class IndexingRecord {
if (data_type == DataType::VECTOR_FLOAT || if (data_type == DataType::VECTOR_FLOAT ||
data_type == DataType::VECTOR_FLOAT16 || data_type == DataType::VECTOR_FLOAT16 ||
data_type == DataType::VECTOR_BFLOAT16 || data_type == DataType::VECTOR_BFLOAT16 ||
data_type == DataType::VECTOR_SPARSE_FLOAT) { data_type == DataType::VECTOR_SPARSE_U32_F32) {
indexing->GetDataFromIndex( indexing->GetDataFromIndex(
seg_offsets, count, element_size, output_raw); seg_offsets, count, element_size, output_raw);
} }

View File

@ -699,7 +699,7 @@ struct InsertRecord<false> : public InsertRecord<true> {
dense_vec_mmap_descriptor); dense_vec_mmap_descriptor);
return; return;
} else if (field_meta.get_data_type() == } else if (field_meta.get_data_type() ==
DataType::VECTOR_SPARSE_FLOAT) { DataType::VECTOR_SPARSE_U32_F32) {
this->append_data<SparseFloatVector>( this->append_data<SparseFloatVector>(
field_id, size_per_chunk, vec_mmap_descriptor); field_id, size_per_chunk, vec_mmap_descriptor);
return; return;

View File

@ -782,7 +782,7 @@ SegmentGrowingImpl::bulk_subscript(FieldId field_id,
count, count,
result->mutable_vectors()->mutable_bfloat16_vector()->data()); result->mutable_vectors()->mutable_bfloat16_vector()->data());
} else if (field_meta.get_data_type() == } else if (field_meta.get_data_type() ==
DataType::VECTOR_SPARSE_FLOAT) { DataType::VECTOR_SPARSE_U32_F32) {
bulk_subscript_sparse_float_vector_impl( bulk_subscript_sparse_float_vector_impl(
field_id, field_id,
(const ConcurrentVector<SparseFloatVector>*)vec_ptr, (const ConcurrentVector<SparseFloatVector>*)vec_ptr,

View File

@ -210,7 +210,7 @@ GetRawDataSizeOfDataArray(const DataArray* data,
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
// TODO(SPARSE, size) // TODO(SPARSE, size)
result += data->vectors().sparse_float_vector().ByteSizeLong(); result += data->vectors().sparse_float_vector().ByteSizeLong();
break; break;
@ -342,7 +342,7 @@ CreateEmptyVectorDataArray(int64_t count, const FieldMeta& field_meta) {
auto vector_array = data_array->mutable_vectors(); auto vector_array = data_array->mutable_vectors();
auto dim = 0; auto dim = 0;
if (data_type != DataType::VECTOR_SPARSE_FLOAT) { if (data_type != DataType::VECTOR_SPARSE_U32_F32) {
dim = field_meta.get_dim(); dim = field_meta.get_dim();
vector_array->set_dim(dim); vector_array->set_dim(dim);
} }
@ -373,7 +373,7 @@ CreateEmptyVectorDataArray(int64_t count, const FieldMeta& field_meta) {
obj->resize(length * sizeof(bfloat16)); obj->resize(length * sizeof(bfloat16));
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
// does nothing here // does nothing here
break; break;
} }
@ -544,11 +544,11 @@ CreateVectorDataArrayFrom(const void* data_raw,
obj->assign(data, length * sizeof(bfloat16)); obj->assign(data, length * sizeof(bfloat16));
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
SparseRowsToProto( SparseRowsToProto(
[&](size_t i) { [&](size_t i) {
return reinterpret_cast< return reinterpret_cast<
const knowhere::sparse::SparseRow<float>*>( const knowhere::sparse::SparseRow<sparseValueType>*>(
data_raw) + data_raw) +
i; i;
}, },
@ -655,7 +655,7 @@ MergeDataArray(std::vector<MergeBase>& merge_bases,
auto obj = vector_array->mutable_binary_vector(); auto obj = vector_array->mutable_binary_vector();
obj->assign(data + src_offset * num_bytes, num_bytes); obj->assign(data + src_offset * num_bytes, num_bytes);
} else if (field_meta.get_data_type() == } else if (field_meta.get_data_type() ==
DataType::VECTOR_SPARSE_FLOAT) { DataType::VECTOR_SPARSE_U32_F32) {
auto src = src_field_data->vectors().sparse_float_vector(); auto src = src_field_data->vectors().sparse_float_vector();
auto dst = vector_array->mutable_sparse_float_vector(); auto dst = vector_array->mutable_sparse_float_vector();
if (src.dim() > dst->dim()) { if (src.dim() > dst->dim()) {

View File

@ -123,6 +123,11 @@ SegcoreSetKnowhereSearchThreadPoolNum(const uint32_t num_threads) {
milvus::config::KnowhereInitSearchThreadPool(num_threads); milvus::config::KnowhereInitSearchThreadPool(num_threads);
} }
extern "C" void
SegcoreSetKnowhereFetchThreadPoolNum(const uint32_t num_threads) {
milvus::config::KnowhereInitFetchThreadPool(num_threads);
}
extern "C" void extern "C" void
SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size, SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size,
const uint32_t max_size) { const uint32_t max_size) {

View File

@ -71,6 +71,9 @@ SegcoreSetKnowhereBuildThreadPoolNum(const uint32_t num_threads);
void void
SegcoreSetKnowhereSearchThreadPoolNum(const uint32_t num_threads); SegcoreSetKnowhereSearchThreadPoolNum(const uint32_t num_threads);
void
SegcoreSetKnowhereFetchThreadPoolNum(const uint32_t num_threads);
void void
SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size, SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size,
const uint32_t max_size); const uint32_t max_size);

View File

@ -105,7 +105,8 @@ InterimSealedIndexTranslator::get_cells(
false); false);
} }
} else { } else {
vec_index = std::make_unique<index::VectorMemIndex<float>>( // sparse vector case
vec_index = std::make_unique<index::VectorMemIndex<sparse_u32_f32>>(
DataType::NONE, DataType::NONE,
index_type_, index_type_,
metric_type_, metric_type_,

View File

@ -75,9 +75,9 @@ ValidateIndexParams(const char* index_type,
knowhere::Version::GetCurrentVersion().VersionNumber(), knowhere::Version::GetCurrentVersion().VersionNumber(),
json, json,
error_msg); error_msg);
} else if (dataType == milvus::DataType::VECTOR_SPARSE_FLOAT) { } else if (dataType == milvus::DataType::VECTOR_SPARSE_U32_F32) {
status = status =
knowhere::IndexStaticFaced<knowhere::fp32>::ConfigCheck( knowhere::IndexStaticFaced<knowhere::sparse_u32_f32>::ConfigCheck(
index_type, index_type,
knowhere::Version::GetCurrentVersion().VersionNumber(), knowhere::Version::GetCurrentVersion().VersionNumber(),
json, json,

View File

@ -476,7 +476,7 @@ DiskFileManagerImpl::cache_raw_data_to_disk_common(
GetFieldDataMeta().segment_id, GetFieldDataMeta().segment_id,
GetFieldDataMeta().field_id) + GetFieldDataMeta().field_id) +
"raw_data"; "raw_data";
if (dt == milvus::DataType::VECTOR_SPARSE_FLOAT) { if (dt == milvus::DataType::VECTOR_SPARSE_U32_F32) {
local_data_path += ".sparse_u32_f32"; local_data_path += ".sparse_u32_f32";
} }
local_chunk_manager->CreateFile(local_data_path); local_chunk_manager->CreateFile(local_data_path);
@ -484,13 +484,13 @@ DiskFileManagerImpl::cache_raw_data_to_disk_common(
init_file_info(data_type); init_file_info(data_type);
file_created = true; file_created = true;
} }
if (data_type == milvus::DataType::VECTOR_SPARSE_FLOAT) { if (data_type == milvus::DataType::VECTOR_SPARSE_U32_F32) {
dim = dim =
(uint32_t)(std::dynamic_pointer_cast<FieldData<SparseFloatVector>>( (uint32_t)(std::dynamic_pointer_cast<FieldData<SparseFloatVector>>(
field_data) field_data)
->Dim()); ->Dim());
auto sparse_rows = auto sparse_rows =
static_cast<const knowhere::sparse::SparseRow<float>*>( static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
field_data->Data()); field_data->Data());
for (size_t i = 0; i < field_data->Length(); ++i) { for (size_t i = 0; i < field_data->Length(); ++i) {
auto row = sparse_rows[i]; auto row = sparse_rows[i];
@ -620,9 +620,11 @@ WriteOptFieldIvfDataImpl(
// Do not write to disk if there is only one value // Do not write to disk if there is only one value
if (mp.size() <= 1) { if (mp.size() <= 1) {
LOG_INFO("There are only one category, skip caching to local disk");
return false; return false;
} }
LOG_INFO("Get opt fields with {} categories", mp.size());
local_chunk_manager->Write(local_data_path, local_chunk_manager->Write(local_data_path,
write_offset, write_offset,
const_cast<int64_t*>(&field_id), const_cast<int64_t*>(&field_id),
@ -712,7 +714,31 @@ WriteOptFieldsIvfMeta(
} }
std::string std::string
DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) { DiskFileManagerImpl::CacheOptFieldToDisk(const Config& config) {
auto storage_version =
index::GetValueFromConfig<int64_t>(config, STORAGE_VERSION_KEY)
.value_or(0);
auto opt_fields =
index::GetValueFromConfig<OptFieldT>(config, VEC_OPT_FIELDS);
if (!opt_fields.has_value()) {
return "";
}
std::vector<std::vector<std::string>> remote_files_storage_v2;
if (storage_version == STORAGE_V2) {
auto segment_insert_files =
index::GetValueFromConfig<std::vector<std::vector<std::string>>>(
config, SEGMENT_INSERT_FILES_KEY);
AssertInfo(segment_insert_files.has_value(),
"segment insert files is empty when build index while "
"caching opt fields");
remote_files_storage_v2 = segment_insert_files.value();
for (auto& remote_files : remote_files_storage_v2) {
SortByPath(remote_files);
}
}
auto fields_map = opt_fields.value();
const uint32_t num_of_fields = fields_map.size(); const uint32_t num_of_fields = fields_map.size();
if (0 == num_of_fields) { if (0 == num_of_fields) {
return ""; return "";
@ -737,6 +763,13 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
std::unordered_set<int64_t> actual_field_ids; std::unordered_set<int64_t> actual_field_ids;
for (auto& [field_id, tup] : fields_map) { for (auto& [field_id, tup] : fields_map) {
const auto& field_type = std::get<1>(tup); const auto& field_type = std::get<1>(tup);
std::vector<FieldDataPtr> field_datas;
// fetch scalar data from storage v2
if (storage_version == STORAGE_V2) {
field_datas = GetFieldDatasFromStorageV2(
remote_files_storage_v2, field_id, field_type, 1, fs_);
} else { // original way
auto& field_paths = std::get<2>(tup); auto& field_paths = std::get<2>(tup);
if (0 == field_paths.size()) { if (0 == field_paths.size()) {
LOG_WARN("optional field {} has no data", field_id); LOG_WARN("optional field {} has no data", field_id);
@ -744,8 +777,8 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
} }
SortByPath(field_paths); SortByPath(field_paths);
std::vector<FieldDataPtr> field_datas = field_datas = FetchFieldData(rcm_.get(), field_paths);
FetchFieldData(rcm_.get(), field_paths); }
if (WriteOptFieldIvfData(field_type, if (WriteOptFieldIvfData(field_type,
field_id, field_id,
@ -934,6 +967,8 @@ template std::string
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(const Config& config); DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(const Config& config);
template std::string template std::string
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(const Config& config); DiskFileManagerImpl::CacheRawDataToDisk<bin1>(const Config& config);
template std::string
DiskFileManagerImpl::CacheRawDataToDisk<sparse_u32_f32>(const Config& config);
std::string std::string
DiskFileManagerImpl::GetRemoteIndexFilePrefixV2() const { DiskFileManagerImpl::GetRemoteIndexFilePrefixV2() const {

View File

@ -158,7 +158,7 @@ class DiskFileManagerImpl : public FileManagerImpl {
CacheRawDataToDisk(const Config& config); CacheRawDataToDisk(const Config& config);
std::string std::string
CacheOptFieldToDisk(OptFieldT& fields_map); CacheOptFieldToDisk(const Config& config);
std::string std::string
GetRemoteIndexPrefix() const { GetRemoteIndexPrefix() const {

View File

@ -300,11 +300,11 @@ BaseEventData::Serialize() {
} }
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
for (size_t offset = 0; offset < field_data->get_num_rows(); for (size_t offset = 0; offset < field_data->get_num_rows();
++offset) { ++offset) {
auto row = auto row =
static_cast<const knowhere::sparse::SparseRow<float>*>( static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
field_data->RawValue(offset)); field_data->RawValue(offset));
payload_writer->add_one_binary_payload( payload_writer->add_one_binary_payload(
static_cast<const uint8_t*>(row->data()), static_cast<const uint8_t*>(row->data()),

View File

@ -32,7 +32,7 @@ PayloadWriter::PayloadWriter(const DataType column_type, bool nullable)
// create payload writer for vector data type // create payload writer for vector data type
PayloadWriter::PayloadWriter(const DataType column_type, int dim, bool nullable) PayloadWriter::PayloadWriter(const DataType column_type, int dim, bool nullable)
: column_type_(column_type), nullable_(nullable) { : column_type_(column_type), nullable_(nullable) {
AssertInfo(column_type != DataType::VECTOR_SPARSE_FLOAT, AssertInfo(column_type != DataType::VECTOR_SPARSE_U32_F32,
"PayloadWriter for Sparse Float Vector should be created " "PayloadWriter for Sparse Float Vector should be created "
"using the constructor without dimension"); "using the constructor without dimension");
AssertInfo(nullable == false, "only scalcar type support null now"); AssertInfo(nullable == false, "only scalcar type support null now");

View File

@ -20,6 +20,13 @@ RemoteInputStream::Read(void* data, size_t size) {
return static_cast<size_t>(status.ValueOrDie()); return static_cast<size_t>(status.ValueOrDie());
} }
size_t
RemoteInputStream::ReadAt(void* data, size_t offset, size_t size) {
auto status = remote_file_->ReadAt(offset, size, data);
AssertInfo(status.ok(), "Failed to read from input stream");
return static_cast<size_t>(status.ValueOrDie());
}
size_t size_t
RemoteInputStream::Read(int fd, size_t size) { RemoteInputStream::Read(int fd, size_t size) {
size_t read_batch_size = size_t read_batch_size =

View File

@ -29,6 +29,9 @@ class RemoteInputStream : public milvus::InputStream {
size_t size_t
Read(void* data, size_t size) override; Read(void* data, size_t size) override;
size_t
ReadAt(void* data, size_t offset, size_t size) override;
size_t size_t
Read(int fd, size_t size) override; Read(int fd, size_t size) override;

View File

@ -206,7 +206,7 @@ AddPayloadToArrowBuilder(std::shared_ptr<arrow::ArrayBuilder> builder,
add_vector_payload(builder, const_cast<uint8_t*>(raw_data), length); add_vector_payload(builder, const_cast<uint8_t*>(raw_data), length);
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
ThrowInfo(DataTypeInvalid, ThrowInfo(DataTypeInvalid,
"Sparse Float Vector payload should be added by calling " "Sparse Float Vector payload should be added by calling "
"add_one_binary_payload", "add_one_binary_payload",
@ -287,7 +287,7 @@ CreateArrowBuilder(DataType data_type) {
return std::make_shared<arrow::BinaryBuilder>(); return std::make_shared<arrow::BinaryBuilder>();
} }
// sparse float vector doesn't require a dim // sparse float vector doesn't require a dim
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
return std::make_shared<arrow::BinaryBuilder>(); return std::make_shared<arrow::BinaryBuilder>();
} }
default: { default: {
@ -416,7 +416,7 @@ CreateArrowSchema(DataType data_type, bool nullable) {
{arrow::field("val", arrow::binary(), nullable)}); {arrow::field("val", arrow::binary(), nullable)});
} }
// sparse float vector doesn't require a dim // sparse float vector doesn't require a dim
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
return arrow::schema( return arrow::schema(
{arrow::field("val", arrow::binary(), nullable)}); {arrow::field("val", arrow::binary(), nullable)});
} }
@ -456,7 +456,7 @@ CreateArrowSchema(DataType data_type, int dim, bool nullable) {
arrow::fixed_size_binary(dim * sizeof(bfloat16)), arrow::fixed_size_binary(dim * sizeof(bfloat16)),
nullable)}); nullable)});
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
return arrow::schema( return arrow::schema(
{arrow::field("val", arrow::binary(), nullable)}); {arrow::field("val", arrow::binary(), nullable)});
} }
@ -490,7 +490,7 @@ GetDimensionFromFileMetaData(const parquet::ColumnDescriptor* schema,
case DataType::VECTOR_BFLOAT16: { case DataType::VECTOR_BFLOAT16: {
return schema->type_length() / sizeof(bfloat16); return schema->type_length() / sizeof(bfloat16);
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
ThrowInfo(DataTypeInvalid, ThrowInfo(DataTypeInvalid,
fmt::format("GetDimensionFromFileMetaData should not be " fmt::format("GetDimensionFromFileMetaData should not be "
"called for sparse vector")); "called for sparse vector"));
@ -971,7 +971,7 @@ CreateFieldData(const DataType& type,
case DataType::VECTOR_BFLOAT16: case DataType::VECTOR_BFLOAT16:
return std::make_shared<FieldData<BFloat16Vector>>( return std::make_shared<FieldData<BFloat16Vector>>(
dim, type, total_num_rows); dim, type, total_num_rows);
case DataType::VECTOR_SPARSE_FLOAT: case DataType::VECTOR_SPARSE_U32_F32:
return std::make_shared<FieldData<SparseFloatVector>>( return std::make_shared<FieldData<SparseFloatVector>>(
type, total_num_rows); type, total_num_rows);
case DataType::VECTOR_INT8: case DataType::VECTOR_INT8:

View File

@ -14,7 +14,7 @@
# Update KNOWHERE_VERSION for the first occurrence # Update KNOWHERE_VERSION for the first occurrence
milvus_add_pkg_config("knowhere") milvus_add_pkg_config("knowhere")
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "") set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
set( KNOWHERE_VERSION v2.6.1-rc ) set( KNOWHERE_VERSION v2.6.1 )
set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git")
message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}")

View File

@ -13,7 +13,7 @@
milvus_add_pkg_config("milvus-common") milvus_add_pkg_config("milvus-common")
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "") set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
set( MILVUS-COMMON-VERSION 41fa9b1 ) set( MILVUS-COMMON-VERSION 5770e40 )
set( GIT_REPOSITORY "https://github.com/zilliztech/milvus-common.git") set( GIT_REPOSITORY "https://github.com/zilliztech/milvus-common.git")
message(STATUS "milvus-common repo: ${GIT_REPOSITORY}") message(STATUS "milvus-common repo: ${GIT_REPOSITORY}")

View File

@ -29,7 +29,7 @@ INSTANTIATE_TEST_SUITE_P(
ExprAlwaysTrueParameters, ExprAlwaysTrueParameters,
ExprAlwaysTrueTest, ExprAlwaysTrueTest,
::testing::Values(milvus::DataType::VECTOR_FLOAT, ::testing::Values(milvus::DataType::VECTOR_FLOAT,
milvus::DataType::VECTOR_SPARSE_FLOAT)); milvus::DataType::VECTOR_SPARSE_U32_F32));
TEST_P(ExprAlwaysTrueTest, AlwaysTrue) { TEST_P(ExprAlwaysTrueTest, AlwaysTrue) {
using namespace milvus; using namespace milvus;

View File

@ -27,8 +27,8 @@ using namespace milvus::query;
namespace { namespace {
std::vector<int> std::vector<int>
SearchRef(const knowhere::sparse::SparseRow<float>* base, SearchRef(const knowhere::sparse::SparseRow<milvus::sparseValueType>* base,
const knowhere::sparse::SparseRow<float>& query, const knowhere::sparse::SparseRow<milvus::sparseValueType>& query,
int nb, int nb,
int topk) { int topk) {
std::vector<std::tuple<float, int>> res; std::vector<std::tuple<float, int>> res;
@ -51,8 +51,8 @@ SearchRef(const knowhere::sparse::SparseRow<float>* base,
} }
std::vector<int> std::vector<int>
RangeSearchRef(const knowhere::sparse::SparseRow<float>* base, RangeSearchRef(const knowhere::sparse::SparseRow<milvus::sparseValueType>* base,
const knowhere::sparse::SparseRow<float>& query, const knowhere::sparse::SparseRow<milvus::sparseValueType>& query,
int nb, int nb,
float radius, float radius,
float range_filter, float range_filter,
@ -113,7 +113,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
search_info, search_info,
index_info, index_info,
bitset_view, bitset_view,
DataType::VECTOR_SPARSE_FLOAT, DataType::VECTOR_SPARSE_U32_F32,
DataType::NONE)); DataType::NONE));
return; return;
} }
@ -122,7 +122,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
search_info, search_info,
index_info, index_info,
bitset_view, bitset_view,
DataType::VECTOR_SPARSE_FLOAT, DataType::VECTOR_SPARSE_U32_F32,
DataType::NONE); DataType::NONE);
for (int i = 0; i < nq; i++) { for (int i = 0; i < nq; i++) {
auto ref = SearchRef(base.get(), *(query.get() + i), nb, topk); auto ref = SearchRef(base.get(), *(query.get() + i), nb, topk);
@ -137,7 +137,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
search_info, search_info,
index_info, index_info,
bitset_view, bitset_view,
DataType::VECTOR_SPARSE_FLOAT, DataType::VECTOR_SPARSE_U32_F32,
DataType::NONE); DataType::NONE);
for (int i = 0; i < nq; i++) { for (int i = 0; i < nq; i++) {
auto ref = RangeSearchRef( auto ref = RangeSearchRef(
@ -152,7 +152,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
search_info, search_info,
index_info, index_info,
bitset_view, bitset_view,
DataType::VECTOR_SPARSE_FLOAT); DataType::VECTOR_SPARSE_U32_F32);
auto iterators = result3.chunk_iterators(); auto iterators = result3.chunk_iterators();
for (int i = 0; i < nq; i++) { for (int i = 0; i < nq; i++) {
auto it = iterators[i]; auto it = iterators[i];

View File

@ -91,7 +91,7 @@ class BinlogIndexTest : public ::testing::TestWithParam<Param> {
} else { } else {
intermin_index_has_raw_data = true; intermin_index_has_raw_data = true;
} }
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) { } else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
auto sparse_vecs = GenerateRandomSparseFloatVector(data_n); auto sparse_vecs = GenerateRandomSparseFloatVector(data_n);
vec_field_data->FillFieldData(sparse_vecs.get(), data_n); vec_field_data->FillFieldData(sparse_vecs.get(), data_n);
data_d = std::dynamic_pointer_cast< data_d = std::dynamic_pointer_cast<
@ -190,12 +190,12 @@ INSTANTIATE_TEST_SUITE_P(
knowhere::IndexEnum:: knowhere::IndexEnum::
INDEX_FAISS_SCANN_DVR), // intermin index not has data INDEX_FAISS_SCANN_DVR), // intermin index not has data
std::make_tuple( std::make_tuple(
DataType::VECTOR_SPARSE_FLOAT, DataType::VECTOR_SPARSE_U32_F32,
knowhere::metric::IP, knowhere::metric::IP,
knowhere::IndexEnum:: knowhere::IndexEnum::
INDEX_SPARSE_INVERTED_INDEX, //intermin index not has data INDEX_SPARSE_INVERTED_INDEX, //intermin index not has data
std::nullopt), std::nullopt),
std::make_tuple(DataType::VECTOR_SPARSE_FLOAT, std::make_tuple(DataType::VECTOR_SPARSE_U32_F32,
knowhere::metric::IP, knowhere::metric::IP,
knowhere::IndexEnum:: knowhere::IndexEnum::
INDEX_SPARSE_WAND, // intermin index not has data INDEX_SPARSE_WAND, // intermin index not has data

View File

@ -568,7 +568,7 @@ TEST(chunk, test_sparse_float) {
auto vecs = milvus::segcore::GenerateRandomSparseFloatVector( auto vecs = milvus::segcore::GenerateRandomSparseFloatVector(
n_rows, kTestSparseDim, kTestSparseVectorDensity); n_rows, kTestSparseDim, kTestSparseVectorDensity);
auto field_data = milvus::storage::CreateFieldData( auto field_data = milvus::storage::CreateFieldData(
storage::DataType::VECTOR_SPARSE_FLOAT, false, kTestSparseDim, n_rows); storage::DataType::VECTOR_SPARSE_U32_F32, false, kTestSparseDim, n_rows);
field_data->FillFieldData(vecs.get(), n_rows); field_data->FillFieldData(vecs.get(), n_rows);
storage::InsertEventData event_data; storage::InsertEventData event_data;
@ -593,7 +593,7 @@ TEST(chunk, test_sparse_float) {
FieldMeta field_meta(FieldName("a"), FieldMeta field_meta(FieldName("a"),
milvus::FieldId(1), milvus::FieldId(1),
DataType::VECTOR_SPARSE_FLOAT, DataType::VECTOR_SPARSE_U32_F32,
kTestSparseDim, kTestSparseDim,
"IP", "IP",
false, false,

View File

@ -71,7 +71,7 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
auto bf16_vec = schema->AddDebugField( auto bf16_vec = schema->AddDebugField(
"bf16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type); "bf16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type);
auto sparse_vec = schema->AddDebugField( auto sparse_vec = schema->AddDebugField(
"sparse_vec", DataType::VECTOR_SPARSE_FLOAT, 128, metric_type); "sparse_vec", DataType::VECTOR_SPARSE_U32_F32, 128, metric_type);
auto int8_vec = schema->AddDebugField( auto int8_vec = schema->AddDebugField(
"int8_vec", DataType::VECTOR_INT8, 128, metric_type); "int8_vec", DataType::VECTOR_INT8, 128, metric_type);
schema->set_primary_field_id(int64_field); schema->set_primary_field_id(int64_field);
@ -200,7 +200,7 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
auto fp16_vec_gt = dataset.get_col<float16>(fp16_vec); auto fp16_vec_gt = dataset.get_col<float16>(fp16_vec);
auto bf16_vec_gt = dataset.get_col<bfloat16>(bf16_vec); auto bf16_vec_gt = dataset.get_col<bfloat16>(bf16_vec);
auto sparse_vec_gt = auto sparse_vec_gt =
dataset.get_col<knowhere::sparse::SparseRow<float>>(sparse_vec); dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(sparse_vec);
auto int8_vec_gt = dataset.get_col<int8>(int8_vec); auto int8_vec_gt = dataset.get_col<int8>(int8_vec);
for (size_t i = 0; i < num_inserted; ++i) { for (size_t i = 0; i < num_inserted; ++i) {
@ -234,7 +234,7 @@ INSTANTIATE_TEST_SUITE_P(IsSparse, ChunkVectorTest, ::testing::Bool());
TEST_P(ChunkVectorTest, SearchWithMmap) { TEST_P(ChunkVectorTest, SearchWithMmap) {
auto is_sparse = GetParam(); auto is_sparse = GetParam();
auto data_type = auto data_type =
is_sparse ? DataType::VECTOR_SPARSE_FLOAT : DataType::VECTOR_FLOAT; is_sparse ? DataType::VECTOR_SPARSE_U32_F32 : DataType::VECTOR_FLOAT;
auto schema = std::make_shared<Schema>(); auto schema = std::make_shared<Schema>();
auto pk = schema->AddDebugField("pk", DataType::INT64); auto pk = schema->AddDebugField("pk", DataType::INT64);
auto random = schema->AddDebugField("random", DataType::DOUBLE); auto random = schema->AddDebugField("random", DataType::DOUBLE);

View File

@ -591,7 +591,7 @@ TEST(storage, InsertDataSparseFloat) {
auto vecs = milvus::segcore::GenerateRandomSparseFloatVector( auto vecs = milvus::segcore::GenerateRandomSparseFloatVector(
n_rows, kTestSparseDim, kTestSparseVectorDensity); n_rows, kTestSparseDim, kTestSparseVectorDensity);
auto field_data = milvus::storage::CreateFieldData( auto field_data = milvus::storage::CreateFieldData(
storage::DataType::VECTOR_SPARSE_FLOAT, false, kTestSparseDim, n_rows); storage::DataType::VECTOR_SPARSE_U32_F32, false, kTestSparseDim, n_rows);
field_data->FillFieldData(vecs.get(), n_rows); field_data->FillFieldData(vecs.get(), n_rows);
auto payload_reader = auto payload_reader =
@ -611,10 +611,10 @@ TEST(storage, InsertDataSparseFloat) {
std::make_pair(Timestamp(0), Timestamp(100))); std::make_pair(Timestamp(0), Timestamp(100)));
auto new_payload = new_insert_data->GetFieldData(); auto new_payload = new_insert_data->GetFieldData();
ASSERT_TRUE(new_payload->get_data_type() == ASSERT_TRUE(new_payload->get_data_type() ==
storage::DataType::VECTOR_SPARSE_FLOAT); storage::DataType::VECTOR_SPARSE_U32_F32);
ASSERT_EQ(new_payload->get_num_rows(), n_rows); ASSERT_EQ(new_payload->get_num_rows(), n_rows);
ASSERT_EQ(new_payload->get_null_count(), 0); ASSERT_EQ(new_payload->get_null_count(), 0);
auto new_data = static_cast<const knowhere::sparse::SparseRow<float>*>( auto new_data = static_cast<const knowhere::sparse::SparseRow<milvus::sparseValueType>*>(
new_payload->Data()); new_payload->Data());
for (auto i = 0; i < n_rows; ++i) { for (auto i = 0; i < n_rows; ++i) {

View File

@ -455,16 +455,20 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) {
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path); PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
opt_fields[kOptFieldId + 1] = { opt_fields[kOptFieldId + 1] = {
kOptFieldName + "second", DataType::INT64, {insert_file_path}}; kOptFieldName + "second", DataType::INT64, {insert_file_path}};
EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError); milvus::Config config;
config[VEC_OPT_FIELDS] = opt_fields;
EXPECT_THROW(file_manager->CacheOptFieldToDisk(config), SegcoreError);
} }
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) { TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
auto file_manager = CreateFileManager(cm_); auto file_manager = CreateFileManager(cm_);
const auto insert_file_path = const auto insert_file_path =
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange); PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
auto opt_fileds = auto opt_fields =
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path); PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(opt_fileds); milvus::Config config;
config[VEC_OPT_FIELDS] = opt_fields;
auto res = file_manager->CacheOptFieldToDisk(config);
ASSERT_FALSE(res.empty()); ASSERT_FALSE(res.empty());
CheckOptFieldCorrectness(res); CheckOptFieldCorrectness(res);
} }
@ -475,7 +479,9 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
auto insert_file_path = PrepareInsertData<TYPE, NATIVE_TYPE>(RANGE); \ auto insert_file_path = PrepareInsertData<TYPE, NATIVE_TYPE>(RANGE); \
auto opt_fields = \ auto opt_fields = \
PrepareOptionalField<TYPE>(file_manager, insert_file_path); \ PrepareOptionalField<TYPE>(file_manager, insert_file_path); \
auto res = file_manager->CacheOptFieldToDisk(opt_fields); \ milvus::Config config; \
config[VEC_OPT_FIELDS] = opt_fields; \
auto res = file_manager->CacheOptFieldToDisk(config); \
ASSERT_FALSE(res.empty()); \ ASSERT_FALSE(res.empty()); \
CheckOptFieldCorrectness(res, RANGE); \ CheckOptFieldCorrectness(res, RANGE); \
}; };
@ -496,9 +502,11 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) {
{ {
const auto insert_file_path = const auto insert_file_path =
PrepareInsertData<DataType::INT64, int64_t>(1); PrepareInsertData<DataType::INT64, int64_t>(1);
auto opt_fileds = PrepareOptionalField<DataType::INT64>( auto opt_fields = PrepareOptionalField<DataType::INT64>(
file_manager, insert_file_path); file_manager, insert_file_path);
auto res = file_manager->CacheOptFieldToDisk(opt_fileds); milvus::Config config;
config[VEC_OPT_FIELDS] = opt_fields;
auto res = file_manager->CacheOptFieldToDisk(config);
ASSERT_TRUE(res.empty()); ASSERT_TRUE(res.empty());
} }
} }

View File

@ -105,7 +105,7 @@ class TaskTest : public testing::TestWithParam<DataType> {
INSTANTIATE_TEST_SUITE_P(TaskTestSuite, INSTANTIATE_TEST_SUITE_P(TaskTestSuite,
TaskTest, TaskTest,
::testing::Values(DataType::VECTOR_FLOAT, ::testing::Values(DataType::VECTOR_FLOAT,
DataType::VECTOR_SPARSE_FLOAT)); DataType::VECTOR_SPARSE_U32_F32));
TEST_P(TaskTest, RegisterFunction) { TEST_P(TaskTest, RegisterFunction) {
milvus::exec::expression::FunctionFactory& factory = milvus::exec::expression::FunctionFactory& factory =

View File

@ -95,7 +95,7 @@ INSTANTIATE_TEST_SUITE_P(
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT, std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
knowhere::metric::L2), knowhere::metric::L2),
false), false),
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_FLOAT, std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_U32_F32,
knowhere::metric::IP), knowhere::metric::IP),
false), false),
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY, std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,
@ -104,7 +104,7 @@ INSTANTIATE_TEST_SUITE_P(
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT, std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
knowhere::metric::L2), knowhere::metric::L2),
true), true),
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_FLOAT, std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_U32_F32,
knowhere::metric::IP), knowhere::metric::IP),
true), true),
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY, std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,

View File

@ -109,7 +109,7 @@ class GrowingTest
} else if (index_type == } else if (index_type ==
knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX || knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) { index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) {
data_type = DataType::VECTOR_SPARSE_FLOAT; data_type = DataType::VECTOR_SPARSE_U32_F32;
} else { } else {
ASSERT_TRUE(false); ASSERT_TRUE(false);
} }
@ -242,7 +242,7 @@ TEST_P(GrowingTest, FillData) {
if (data_type == DataType::VECTOR_FLOAT) { if (data_type == DataType::VECTOR_FLOAT) {
EXPECT_EQ(vec_result->vectors().float_vector().data_size(), EXPECT_EQ(vec_result->vectors().float_vector().data_size(),
num_inserted * dim); num_inserted * dim);
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) { } else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
EXPECT_EQ( EXPECT_EQ(
vec_result->vectors().sparse_float_vector().contents_size(), vec_result->vectors().sparse_float_vector().contents_size(),
num_inserted); num_inserted);

View File

@ -41,7 +41,7 @@ class GrowingIndexTest : public ::testing::TestWithParam<Param> {
metric_type = std::get<2>(param); metric_type = std::get<2>(param);
dense_vec_intermin_index_type = std::get<3>(param); dense_vec_intermin_index_type = std::get<3>(param);
dense_refine_type = std::get<4>(param); dense_refine_type = std::get<4>(param);
if (data_type == DataType::VECTOR_SPARSE_FLOAT) { if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
is_sparse = true; is_sparse = true;
if (metric_type == knowhere::metric::IP) { if (metric_type == knowhere::metric::IP) {
intermin_index_with_raw_data = true; intermin_index_with_raw_data = true;
@ -108,7 +108,7 @@ INSTANTIATE_TEST_SUITE_P(
SparseIndexTypeParameters, SparseIndexTypeParameters,
GrowingIndexTest, GrowingIndexTest,
::testing::Combine( ::testing::Combine(
::testing::Values(DataType::VECTOR_SPARSE_FLOAT), ::testing::Values(DataType::VECTOR_SPARSE_U32_F32),
// VecIndexConfig will convert INDEX_SPARSE_INVERTED_INDEX/ // VecIndexConfig will convert INDEX_SPARSE_INVERTED_INDEX/
// INDEX_SPARSE_WAND to INDEX_SPARSE_INVERTED_INDEX_CC/ // INDEX_SPARSE_WAND to INDEX_SPARSE_INVERTED_INDEX_CC/
// INDEX_SPARSE_WAND_CC, thus no need to use _CC version here. // INDEX_SPARSE_WAND_CC, thus no need to use _CC version here.
@ -409,7 +409,7 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
} }
EXPECT_EQ(index->Count(), (add_cont + 1) * N); EXPECT_EQ(index->Count(), (add_cont + 1) * N);
} else if (is_sparse) { } else if (is_sparse) {
auto index = std::make_unique<milvus::index::VectorMemIndex<float>>( auto index = std::make_unique<milvus::index::VectorMemIndex<sparse_u32_f32>>(
DataType::NONE, DataType::NONE,
index_type, index_type,
metric_type, metric_type,
@ -417,7 +417,7 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
false, false,
milvus::storage::FileManagerContext()); milvus::storage::FileManagerContext());
auto sparse_data = auto sparse_data =
dataset.get_col<knowhere::sparse::SparseRow<float>>(vec); dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(vec);
index->BuildWithDataset( index->BuildWithDataset(
knowhere::GenDataSet(N, dim, sparse_data.data()), build_config); knowhere::GenDataSet(N, dim, sparse_data.data()), build_config);
for (int i = 0; i < add_cont; i++) { for (int i = 0; i < add_cont; i++) {
@ -560,14 +560,14 @@ TEST_P(GrowingIndexTest, GetVector) {
} }
} }
} else if (is_sparse) { } else if (is_sparse) {
// GetVector for VECTOR_SPARSE_FLOAT // GetVector for VECTOR_SPARSE_U32_F32
int64_t per_batch = 5000; int64_t per_batch = 5000;
int64_t n_batch = 20; int64_t n_batch = 20;
int64_t dim = 128; int64_t dim = 128;
for (int64_t i = 0; i < n_batch; i++) { for (int64_t i = 0; i < n_batch; i++) {
auto dataset = DataGen(schema, per_batch); auto dataset = DataGen(schema, per_batch);
auto fakevec = auto fakevec =
dataset.get_col<knowhere::sparse::SparseRow<float>>(vec); dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(vec);
auto offset = segment->PreInsert(per_batch); auto offset = segment->PreInsert(per_batch);
segment->Insert(offset, segment->Insert(offset,
per_batch, per_batch,

View File

@ -68,7 +68,7 @@ TestVecIndex() {
status = BuildBinaryVecIndex(index, NB * DIM / 8, xb_data.data()); status = BuildBinaryVecIndex(index, NB * DIM / 8, xb_data.data());
} else if (std::is_same_v<TraitType, milvus::SparseFloatVector>) { } else if (std::is_same_v<TraitType, milvus::SparseFloatVector>) {
auto xb_data = auto xb_data =
dataset.template get_col<knowhere::sparse::SparseRow<float>>( dataset.template get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
milvus::FieldId(100)); milvus::FieldId(100));
status = BuildSparseFloatVecIndex( status = BuildSparseFloatVecIndex(
index, index,

View File

@ -70,9 +70,9 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
DataType::VECTOR_BINARY}, DataType::VECTOR_BINARY},
{knowhere::IndexEnum::INDEX_HNSW, DataType::VECTOR_FLOAT}, {knowhere::IndexEnum::INDEX_HNSW, DataType::VECTOR_FLOAT},
{knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX, {knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX,
DataType::VECTOR_SPARSE_FLOAT}, DataType::VECTOR_SPARSE_U32_F32},
{knowhere::IndexEnum::INDEX_SPARSE_WAND, {knowhere::IndexEnum::INDEX_SPARSE_WAND,
DataType::VECTOR_SPARSE_FLOAT}, DataType::VECTOR_SPARSE_U32_F32},
}; };
vec_field_data_type = index_to_vec_type[index_type]; vec_field_data_type = index_to_vec_type[index_type];
@ -132,9 +132,9 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100)); auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data()); xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
ASSERT_NO_THROW(index->Build(xb_dataset)); ASSERT_NO_THROW(index->Build(xb_dataset));
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) { } else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) {
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type); auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<float>>( auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
milvus::FieldId(100)); milvus::FieldId(100));
xb_dataset = xb_dataset =
knowhere::GenDataSet(NB, kTestSparseDim, sparse_vecs.data()); knowhere::GenDataSet(NB, kTestSparseDim, sparse_vecs.data());
@ -159,7 +159,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
vec_field_data_type, config, file_manager_context); vec_field_data_type, config, file_manager_context);
auto vec_index = auto vec_index =
static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get()); static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
if (vec_field_data_type != DataType::VECTOR_SPARSE_FLOAT) { if (vec_field_data_type != DataType::VECTOR_SPARSE_U32_F32) {
ASSERT_EQ(vec_index->dim(), DIM); ASSERT_EQ(vec_index->dim(), DIM);
} }
@ -177,9 +177,9 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
auto xq_dataset = auto xq_dataset =
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset); knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
result = vec_index->Query(xq_dataset, search_info, nullptr); result = vec_index->Query(xq_dataset, search_info, nullptr);
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) { } else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) {
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type); auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>( auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
milvus::FieldId(100)); milvus::FieldId(100));
auto xq_dataset = auto xq_dataset =
knowhere::GenDataSet(NQ, kTestSparseDim, xb_data.data()); knowhere::GenDataSet(NQ, kTestSparseDim, xb_data.data());

View File

@ -331,7 +331,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
if (index_type == knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX || if (index_type == knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) { index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) {
is_sparse = true; is_sparse = true;
vec_field_data_type = milvus::DataType::VECTOR_SPARSE_FLOAT; vec_field_data_type = milvus::DataType::VECTOR_SPARSE_U32_F32;
} else if (IsBinaryVectorMetricType(metric_type)) { } else if (IsBinaryVectorMetricType(metric_type)) {
is_binary = true; is_binary = true;
vec_field_data_type = milvus::DataType::VECTOR_BINARY; vec_field_data_type = milvus::DataType::VECTOR_BINARY;
@ -349,7 +349,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
} else if (is_sparse) { } else if (is_sparse) {
// sparse vector // sparse vector
xb_sparse_data = xb_sparse_data =
dataset.get_col<knowhere::sparse::SparseRow<float>>( dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
milvus::FieldId(100)); milvus::FieldId(100));
xb_dataset = xb_dataset =
knowhere::GenDataSet(NB, kTestSparseDim, xb_sparse_data.data()); knowhere::GenDataSet(NB, kTestSparseDim, xb_sparse_data.data());
@ -382,7 +382,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
knowhere::DataSetPtr xb_dataset; knowhere::DataSetPtr xb_dataset;
FixedVector<float> xb_data; FixedVector<float> xb_data;
FixedVector<uint8_t> xb_bin_data; FixedVector<uint8_t> xb_bin_data;
FixedVector<knowhere::sparse::SparseRow<float>> xb_sparse_data; FixedVector<knowhere::sparse::SparseRow<milvus::sparseValueType>> xb_sparse_data;
knowhere::DataSetPtr xq_dataset; knowhere::DataSetPtr xq_dataset;
int64_t query_offset = 100; int64_t query_offset = 100;
int64_t NB = 3000; // will be updated to 27000 for mmap+hnsw int64_t NB = 3000; // will be updated to 27000 for mmap+hnsw
@ -686,7 +686,7 @@ TEST_P(IndexTest, GetVector_EmptySparseVector) {
} }
NB = 3; NB = 3;
std::vector<knowhere::sparse::SparseRow<float>> vec; std::vector<knowhere::sparse::SparseRow<milvus::sparseValueType>> vec;
vec.reserve(NB); vec.reserve(NB);
vec.emplace_back(2); vec.emplace_back(2);
vec[0].set_at(0, 1, 1.0); vec[0].set_at(0, 1, 1.0);

View File

@ -47,8 +47,8 @@ class IndexLoadTest : public ::testing::TestWithParam<Param> {
data_type = milvus::DataType::VECTOR_FLOAT16; data_type = milvus::DataType::VECTOR_FLOAT16;
} else if (field_type == "vector_binary") { } else if (field_type == "vector_binary") {
data_type = milvus::DataType::VECTOR_BINARY; data_type = milvus::DataType::VECTOR_BINARY;
} else if (field_type == "vector_sparse_float") { } else if (field_type == "VECTOR_SPARSE_U32_F32") {
data_type = milvus::DataType::VECTOR_SPARSE_FLOAT; data_type = milvus::DataType::VECTOR_SPARSE_U32_F32;
} else if (field_type == "vector_int8") { } else if (field_type == "vector_int8") {
data_type = milvus::DataType::VECTOR_INT8; data_type = milvus::DataType::VECTOR_INT8;
} else if (field_type == "array") { } else if (field_type == "array") {

View File

@ -46,7 +46,7 @@ class RetrieveTest : public ::testing::TestWithParam<Param> {
INSTANTIATE_TEST_SUITE_P(RetrieveTest, INSTANTIATE_TEST_SUITE_P(RetrieveTest,
RetrieveTest, RetrieveTest,
::testing::Values(DataType::VECTOR_FLOAT, ::testing::Values(DataType::VECTOR_FLOAT,
DataType::VECTOR_SPARSE_FLOAT)); DataType::VECTOR_SPARSE_U32_F32));
TEST_P(RetrieveTest, AutoID) { TEST_P(RetrieveTest, AutoID) {
auto schema = std::make_shared<Schema>(); auto schema = std::make_shared<Schema>();
@ -422,7 +422,7 @@ TEST_P(RetrieveTest, LargeTimestamp) {
Assert(field_data.vectors().float_vector().data_size() == Assert(field_data.vectors().float_vector().data_size() ==
target_num * DIM); target_num * DIM);
} }
if (DataType(field_data.type()) == DataType::VECTOR_SPARSE_FLOAT) { if (DataType(field_data.type()) == DataType::VECTOR_SPARSE_U32_F32) {
Assert(field_data.vectors() Assert(field_data.vectors()
.sparse_float_vector() .sparse_float_vector()
.contents_size() == target_num); .contents_size() == target_num);

View File

@ -97,8 +97,8 @@ TEST(GetArrowDataTypeTest, VECTOR_BFLOAT16) {
ASSERT_TRUE(result->Equals(arrow::fixed_size_binary(dim * 2))); ASSERT_TRUE(result->Equals(arrow::fixed_size_binary(dim * 2)));
} }
TEST(GetArrowDataTypeTest, VECTOR_SPARSE_FLOAT) { TEST(GetArrowDataTypeTest, VECTOR_SPARSE_U32_F32) {
auto result = GetArrowDataType(DataType::VECTOR_SPARSE_FLOAT); auto result = GetArrowDataType(DataType::VECTOR_SPARSE_U32_F32);
ASSERT_TRUE(result->Equals(arrow::binary())); ASSERT_TRUE(result->Equals(arrow::binary()));
} }

View File

@ -114,7 +114,7 @@ struct GeneratedData {
} else { } else {
if (field_meta.is_vector() && if (field_meta.is_vector() &&
field_meta.get_data_type() != field_meta.get_data_type() !=
DataType::VECTOR_SPARSE_FLOAT) { DataType::VECTOR_SPARSE_U32_F32) {
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) { if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
int len = raw_->num_rows() * field_meta.get_dim(); int len = raw_->num_rows() * field_meta.get_dim();
ret.resize(len); ret.resize(len);
@ -164,7 +164,7 @@ struct GeneratedData {
} }
if constexpr (std::is_same_v< if constexpr (std::is_same_v<
T, T,
knowhere::sparse::SparseRow<float>>) { knowhere::sparse::SparseRow<milvus::sparseValueType>>) {
auto sparse_float_array = auto sparse_float_array =
target_field_data.vectors().sparse_float_vector(); target_field_data.vectors().sparse_float_vector();
auto rows = auto rows =
@ -301,7 +301,7 @@ struct GeneratedData {
int array_len); int array_len);
}; };
inline std::unique_ptr<knowhere::sparse::SparseRow<float>[]> inline std::unique_ptr<knowhere::sparse::SparseRow<milvus::sparseValueType>[]>
GenerateRandomSparseFloatVector(size_t rows, GenerateRandomSparseFloatVector(size_t rows,
size_t cols = kTestSparseDim, size_t cols = kTestSparseDim,
float density = kTestSparseVectorDensity, float density = kTestSparseVectorDensity,
@ -340,13 +340,13 @@ GenerateRandomSparseFloatVector(size_t rows,
data[row][col] = val; data[row][col] = val;
} }
auto tensor = std::make_unique<knowhere::sparse::SparseRow<float>[]>(rows); auto tensor = std::make_unique<knowhere::sparse::SparseRow<milvus::sparseValueType>[]>(rows);
for (int32_t i = 0; i < rows; ++i) { for (int32_t i = 0; i < rows; ++i) {
if (data[i].size() == 0) { if (data[i].size() == 0) {
continue; continue;
} }
knowhere::sparse::SparseRow<float> row(data[i].size()); knowhere::sparse::SparseRow<milvus::sparseValueType> row(data[i].size());
size_t j = 0; size_t j = 0;
for (auto& [idx, val] : data[i]) { for (auto& [idx, val] : data[i]) {
row.set_at(j++, idx, val); row.set_at(j++, idx, val);
@ -544,7 +544,7 @@ DataGen(SchemaPtr schema,
insert_cols(data, N, field_meta, random_valid); insert_cols(data, N, field_meta, random_valid);
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
auto res = GenerateRandomSparseFloatVector( auto res = GenerateRandomSparseFloatVector(
N, kTestSparseDim, kTestSparseVectorDensity, seed); N, kTestSparseDim, kTestSparseVectorDensity, seed);
auto array = milvus::segcore::CreateDataArrayFrom( auto array = milvus::segcore::CreateDataArrayFrom(
@ -595,7 +595,7 @@ DataGen(SchemaPtr schema,
obj->assign(data, length * sizeof(float16)); obj->assign(data, length * sizeof(float16));
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: case DataType::VECTOR_SPARSE_U32_F32:
ThrowInfo(DataTypeInvalid, "not implemented"); ThrowInfo(DataTypeInvalid, "not implemented");
break; break;
case DataType::VECTOR_BFLOAT16: { case DataType::VECTOR_BFLOAT16: {
@ -1195,10 +1195,10 @@ CreateFieldDataFromDataArray(ssize_t raw_count,
createFieldData(raw_data, DataType::VECTOR_BFLOAT16, dim); createFieldData(raw_data, DataType::VECTOR_BFLOAT16, dim);
break; break;
} }
case DataType::VECTOR_SPARSE_FLOAT: { case DataType::VECTOR_SPARSE_U32_F32: {
auto sparse_float_array = data->vectors().sparse_float_vector(); auto sparse_float_array = data->vectors().sparse_float_vector();
auto rows = SparseBytesToRows(sparse_float_array.contents()); auto rows = SparseBytesToRows(sparse_float_array.contents());
createFieldData(rows.get(), DataType::VECTOR_SPARSE_FLOAT, 0); createFieldData(rows.get(), DataType::VECTOR_SPARSE_U32_F32, 0);
break; break;
} }
case DataType::VECTOR_INT8: { case DataType::VECTOR_INT8: {

View File

@ -234,7 +234,7 @@ GenFieldData(int64_t N,
schema->AddDebugField( schema->AddDebugField(
"fakevec", "fakevec",
data_type, data_type,
(data_type != milvus::DataType::VECTOR_SPARSE_FLOAT ? dim : 0), (data_type != milvus::DataType::VECTOR_SPARSE_U32_F32 ? dim : 0),
metric_type); metric_type);
return milvus::segcore::DataGen(schema, N); return milvus::segcore::DataGen(schema, N);
} }

View File

@ -259,6 +259,9 @@ func (node *QueryNode) InitSegcore() error {
cKnowhereThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereThreadPoolSize.GetAsUint32()) cKnowhereThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereThreadPoolSize.GetAsUint32())
C.SegcoreSetKnowhereSearchThreadPoolNum(cKnowhereThreadPoolSize) C.SegcoreSetKnowhereSearchThreadPoolNum(cKnowhereThreadPoolSize)
cKnowhereFetchThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereFetchThreadPoolSize.GetAsUint32())
C.SegcoreSetKnowhereFetchThreadPoolNum(cKnowhereFetchThreadPoolSize)
// override segcore SIMD type // override segcore SIMD type
cSimdType := C.CString(paramtable.Get().CommonCfg.SimdType.GetValue()) cSimdType := C.CString(paramtable.Get().CommonCfg.SimdType.GetValue())
C.SegcoreSetSimdType(cSimdType) C.SegcoreSetSimdType(cSimdType)

View File

@ -2879,6 +2879,7 @@ type queryNodeConfig struct {
StatsPublishInterval ParamItem `refreshable:"true"` StatsPublishInterval ParamItem `refreshable:"true"`
// segcore // segcore
KnowhereFetchThreadPoolSize ParamItem `refreshable:"false"`
KnowhereThreadPoolSize ParamItem `refreshable:"false"` KnowhereThreadPoolSize ParamItem `refreshable:"false"`
ChunkRows ParamItem `refreshable:"false"` ChunkRows ParamItem `refreshable:"false"`
EnableInterminSegmentIndex ParamItem `refreshable:"false"` EnableInterminSegmentIndex ParamItem `refreshable:"false"`
@ -3322,6 +3323,25 @@ If set to 0, time based eviction is disabled.`,
} }
p.KnowhereThreadPoolSize.Init(base.mgr) p.KnowhereThreadPoolSize.Init(base.mgr)
p.KnowhereFetchThreadPoolSize = ParamItem{
Key: "queryNode.segcore.knowhereFetchThreadPoolNumRatio",
Version: "2.6.0",
DefaultValue: "4",
Formatter: func(v string) string {
factor := getAsInt64(v)
if factor <= 0 {
factor = 1
} else if factor > 32 {
factor = 32
}
knowhereFetchThreadPoolSize := uint32(hardware.GetCPUNum()) * uint32(factor)
return strconv.FormatUint(uint64(knowhereFetchThreadPoolSize), 10)
},
Doc: "The number of threads in knowhere's fetch thread pool for object storage. The pool size will multiply with knowhereThreadPoolNumRatio([1, 32])",
Export: false,
}
p.KnowhereFetchThreadPoolSize.Init(base.mgr)
p.ChunkRows = ParamItem{ p.ChunkRows = ParamItem{
Key: "queryNode.segcore.chunkRows", Key: "queryNode.segcore.chunkRows",
Version: "2.0.0", Version: "2.0.0",