mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
enhance: support readAt interface for remote input stream (#43997)
#42032 Also, fix the cacheoptfield method to work in storagev2. Also, change the sparse related interface for knowhere version bump #43974 . Also, includes https://github.com/milvus-io/milvus/pull/44046 for metric lost. --------- Signed-off-by: chasingegg <chao.gao@zilliz.com> Signed-off-by: marcelo.chen <marcelo.chen@zilliz.com> Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> Co-authored-by: marcelo.chen <marcelo.chen@zilliz.com> Co-authored-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
8934c18792
commit
e97a618630
@ -500,7 +500,7 @@ class SparseFloatVectorChunk : public Chunk {
|
|||||||
reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num);
|
reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num);
|
||||||
for (int i = 0; i < row_nums; i++) {
|
for (int i = 0; i < row_nums; i++) {
|
||||||
vec_[i] = {(offsets_ptr[i + 1] - offsets_ptr[i]) /
|
vec_[i] = {(offsets_ptr[i + 1] - offsets_ptr[i]) /
|
||||||
knowhere::sparse::SparseRow<float>::element_size(),
|
knowhere::sparse::SparseRow<sparseValueType>::element_size(),
|
||||||
reinterpret_cast<uint8_t*>(data + offsets_ptr[i]),
|
reinterpret_cast<uint8_t*>(data + offsets_ptr[i]),
|
||||||
false};
|
false};
|
||||||
dim_ = std::max(dim_, vec_[i].dim());
|
dim_ = std::max(dim_, vec_[i].dim());
|
||||||
@ -519,7 +519,7 @@ class SparseFloatVectorChunk : public Chunk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// only for test
|
// only for test
|
||||||
std::vector<knowhere::sparse::SparseRow<float>>&
|
std::vector<knowhere::sparse::SparseRow<sparseValueType>>&
|
||||||
Vec() {
|
Vec() {
|
||||||
return vec_;
|
return vec_;
|
||||||
}
|
}
|
||||||
@ -531,6 +531,6 @@ class SparseFloatVectorChunk : public Chunk {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
int64_t dim_ = 0;
|
int64_t dim_ = 0;
|
||||||
std::vector<knowhere::sparse::SparseRow<float>> vec_;
|
std::vector<knowhere::sparse::SparseRow<sparseValueType>> vec_;
|
||||||
};
|
};
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
@ -447,7 +447,7 @@ create_chunk_writer(const FieldMeta& field_meta, Args&&... args) {
|
|||||||
field_meta.get_element_type(),
|
field_meta.get_element_type(),
|
||||||
std::forward<Args>(args)...,
|
std::forward<Args>(args)...,
|
||||||
nullable);
|
nullable);
|
||||||
case milvus::DataType::VECTOR_SPARSE_FLOAT:
|
case milvus::DataType::VECTOR_SPARSE_U32_F32:
|
||||||
return std::make_shared<SparseFloatVectorChunkWriter>(
|
return std::make_shared<SparseFloatVectorChunkWriter>(
|
||||||
std::forward<Args>(args)..., nullable);
|
std::forward<Args>(args)..., nullable);
|
||||||
case milvus::DataType::VECTOR_ARRAY:
|
case milvus::DataType::VECTOR_ARRAY:
|
||||||
|
|||||||
@ -284,11 +284,11 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
|
|||||||
array);
|
array);
|
||||||
return FillFieldData(array_info.first, array_info.second);
|
return FillFieldData(array_info.first, array_info.second);
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
AssertInfo(array->type()->id() == arrow::Type::type::BINARY,
|
AssertInfo(array->type()->id() == arrow::Type::type::BINARY,
|
||||||
"inconsistent data type");
|
"inconsistent data type");
|
||||||
auto arr = std::dynamic_pointer_cast<arrow::BinaryArray>(array);
|
auto arr = std::dynamic_pointer_cast<arrow::BinaryArray>(array);
|
||||||
std::vector<knowhere::sparse::SparseRow<float>> values;
|
std::vector<knowhere::sparse::SparseRow<sparseValueType>> values;
|
||||||
for (size_t index = 0; index < element_count; ++index) {
|
for (size_t index = 0; index < element_count; ++index) {
|
||||||
auto view = arr->GetString(index);
|
auto view = arr->GetString(index);
|
||||||
values.push_back(
|
values.push_back(
|
||||||
@ -460,7 +460,7 @@ template class FieldDataImpl<int8_t, false>;
|
|||||||
template class FieldDataImpl<float, false>;
|
template class FieldDataImpl<float, false>;
|
||||||
template class FieldDataImpl<float16, false>;
|
template class FieldDataImpl<float16, false>;
|
||||||
template class FieldDataImpl<bfloat16, false>;
|
template class FieldDataImpl<bfloat16, false>;
|
||||||
template class FieldDataImpl<knowhere::sparse::SparseRow<float>, true>;
|
template class FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true>;
|
||||||
template class FieldDataImpl<VectorArray, true>;
|
template class FieldDataImpl<VectorArray, true>;
|
||||||
|
|
||||||
FieldDataPtr
|
FieldDataPtr
|
||||||
|
|||||||
@ -723,14 +723,14 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
class FieldDataSparseVectorImpl
|
class FieldDataSparseVectorImpl
|
||||||
: public FieldDataImpl<knowhere::sparse::SparseRow<float>, true> {
|
: public FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true> {
|
||||||
public:
|
public:
|
||||||
explicit FieldDataSparseVectorImpl(DataType data_type,
|
explicit FieldDataSparseVectorImpl(DataType data_type,
|
||||||
int64_t total_num_rows = 0)
|
int64_t total_num_rows = 0)
|
||||||
: FieldDataImpl<knowhere::sparse::SparseRow<float>, true>(
|
: FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true>(
|
||||||
/*dim=*/1, data_type, false, total_num_rows),
|
/*dim=*/1, data_type, false, total_num_rows),
|
||||||
vec_dim_(0) {
|
vec_dim_(0) {
|
||||||
AssertInfo(data_type == DataType::VECTOR_SPARSE_FLOAT,
|
AssertInfo(data_type == DataType::VECTOR_SPARSE_U32_F32,
|
||||||
"invalid data type for sparse vector");
|
"invalid data type for sparse vector");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -753,7 +753,7 @@ class FieldDataSparseVectorImpl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// source is a pointer to element_count of
|
// source is a pointer to element_count of
|
||||||
// knowhere::sparse::SparseRow<float>
|
// knowhere::sparse::SparseRow<sparseValueType>
|
||||||
void
|
void
|
||||||
FillFieldData(const void* source, ssize_t element_count) override {
|
FillFieldData(const void* source, ssize_t element_count) override {
|
||||||
if (element_count == 0) {
|
if (element_count == 0) {
|
||||||
@ -765,7 +765,7 @@ class FieldDataSparseVectorImpl
|
|||||||
resize_field_data(length_ + element_count);
|
resize_field_data(length_ + element_count);
|
||||||
}
|
}
|
||||||
auto ptr =
|
auto ptr =
|
||||||
static_cast<const knowhere::sparse::SparseRow<float>*>(source);
|
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(source);
|
||||||
for (int64_t i = 0; i < element_count; ++i) {
|
for (int64_t i = 0; i < element_count; ++i) {
|
||||||
auto& row = ptr[i];
|
auto& row = ptr[i];
|
||||||
vec_dim_ = std::max(vec_dim_, row.dim());
|
vec_dim_ = std::max(vec_dim_, row.dim());
|
||||||
@ -774,7 +774,7 @@ class FieldDataSparseVectorImpl
|
|||||||
length_ += element_count;
|
length_ += element_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// each binary in array is a knowhere::sparse::SparseRow<float>
|
// each binary in array is a knowhere::sparse::SparseRow<sparseValueType>
|
||||||
void
|
void
|
||||||
FillFieldData(const std::shared_ptr<arrow::BinaryArray>& array) override {
|
FillFieldData(const std::shared_ptr<arrow::BinaryArray>& array) override {
|
||||||
auto n = array->length();
|
auto n = array->length();
|
||||||
|
|||||||
@ -37,7 +37,7 @@ constexpr bool IsScalar =
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
constexpr bool IsSparse = std::is_same_v<T, SparseFloatVector> ||
|
constexpr bool IsSparse = std::is_same_v<T, SparseFloatVector> ||
|
||||||
std::is_same_v<T, knowhere::sparse::SparseRow<float>>;
|
std::is_same_v<T, knowhere::sparse::SparseRow<sparseValueType>>;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
constexpr bool IsVariableType =
|
constexpr bool IsVariableType =
|
||||||
@ -52,7 +52,7 @@ template <typename T>
|
|||||||
constexpr bool IsVariableTypeSupportInChunk =
|
constexpr bool IsVariableTypeSupportInChunk =
|
||||||
std::is_same_v<T, std::string> || std::is_same_v<T, Array> ||
|
std::is_same_v<T, std::string> || std::is_same_v<T, Array> ||
|
||||||
std::is_same_v<T, Json> ||
|
std::is_same_v<T, Json> ||
|
||||||
std::is_same_v<T, knowhere::sparse::SparseRow<float>>;
|
std::is_same_v<T, knowhere::sparse::SparseRow<sparseValueType>>;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using ChunkViewType = std::conditional_t<
|
using ChunkViewType = std::conditional_t<
|
||||||
|
|||||||
@ -63,6 +63,7 @@ using float16 = knowhere::fp16;
|
|||||||
using bfloat16 = knowhere::bf16;
|
using bfloat16 = knowhere::bf16;
|
||||||
using bin1 = knowhere::bin1;
|
using bin1 = knowhere::bin1;
|
||||||
using int8 = knowhere::int8;
|
using int8 = knowhere::int8;
|
||||||
|
using sparse_u32_f32 = knowhere::sparse_u32_f32;
|
||||||
|
|
||||||
// See also: https://github.com/milvus-io/milvus-proto/blob/master/proto/schema.proto
|
// See also: https://github.com/milvus-io/milvus-proto/blob/master/proto/schema.proto
|
||||||
enum class DataType {
|
enum class DataType {
|
||||||
@ -91,7 +92,7 @@ enum class DataType {
|
|||||||
VECTOR_FLOAT = 101,
|
VECTOR_FLOAT = 101,
|
||||||
VECTOR_FLOAT16 = 102,
|
VECTOR_FLOAT16 = 102,
|
||||||
VECTOR_BFLOAT16 = 103,
|
VECTOR_BFLOAT16 = 103,
|
||||||
VECTOR_SPARSE_FLOAT = 104,
|
VECTOR_SPARSE_U32_F32 = 104,
|
||||||
VECTOR_INT8 = 105,
|
VECTOR_INT8 = 105,
|
||||||
VECTOR_ARRAY = 106,
|
VECTOR_ARRAY = 106,
|
||||||
};
|
};
|
||||||
@ -139,7 +140,7 @@ GetDataTypeSize(DataType data_type, int dim = 1) {
|
|||||||
return sizeof(bfloat16) * dim;
|
return sizeof(bfloat16) * dim;
|
||||||
case DataType::VECTOR_INT8:
|
case DataType::VECTOR_INT8:
|
||||||
return sizeof(int8) * dim;
|
return sizeof(int8) * dim;
|
||||||
// Not supporting variable length types(such as VECTOR_SPARSE_FLOAT and
|
// Not supporting variable length types(such as VECTOR_SPARSE_U32_F32 and
|
||||||
// VARCHAR) here intentionally. We can't easily estimate the size of
|
// VARCHAR) here intentionally. We can't easily estimate the size of
|
||||||
// them. Caller of this method must handle this case themselves and must
|
// them. Caller of this method must handle this case themselves and must
|
||||||
// not pass variable length types to this method.
|
// not pass variable length types to this method.
|
||||||
@ -184,7 +185,7 @@ GetArrowDataType(DataType data_type, int dim = 1) {
|
|||||||
case DataType::VECTOR_FLOAT16:
|
case DataType::VECTOR_FLOAT16:
|
||||||
case DataType::VECTOR_BFLOAT16:
|
case DataType::VECTOR_BFLOAT16:
|
||||||
return arrow::fixed_size_binary(dim * 2);
|
return arrow::fixed_size_binary(dim * 2);
|
||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
case DataType::VECTOR_SPARSE_U32_F32:
|
||||||
return arrow::binary();
|
return arrow::binary();
|
||||||
case DataType::VECTOR_INT8:
|
case DataType::VECTOR_INT8:
|
||||||
return arrow::fixed_size_binary(dim);
|
return arrow::fixed_size_binary(dim);
|
||||||
@ -244,8 +245,8 @@ GetDataTypeName(DataType data_type) {
|
|||||||
return "vector_float16";
|
return "vector_float16";
|
||||||
case DataType::VECTOR_BFLOAT16:
|
case DataType::VECTOR_BFLOAT16:
|
||||||
return "vector_bfloat16";
|
return "vector_bfloat16";
|
||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
case DataType::VECTOR_SPARSE_U32_F32:
|
||||||
return "vector_sparse_float";
|
return "VECTOR_SPARSE_U32_F32";
|
||||||
case DataType::VECTOR_INT8:
|
case DataType::VECTOR_INT8:
|
||||||
return "vector_int8";
|
return "vector_int8";
|
||||||
case DataType::VECTOR_ARRAY:
|
case DataType::VECTOR_ARRAY:
|
||||||
@ -386,7 +387,7 @@ IsDenseFloatVectorDataType(DataType data_type) {
|
|||||||
|
|
||||||
inline bool
|
inline bool
|
||||||
IsSparseFloatVectorDataType(DataType data_type) {
|
IsSparseFloatVectorDataType(DataType data_type) {
|
||||||
return data_type == DataType::VECTOR_SPARSE_FLOAT;
|
return data_type == DataType::VECTOR_SPARSE_U32_F32;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool
|
inline bool
|
||||||
@ -749,8 +750,8 @@ struct fmt::formatter<milvus::DataType> : formatter<string_view> {
|
|||||||
case milvus::DataType::VECTOR_BFLOAT16:
|
case milvus::DataType::VECTOR_BFLOAT16:
|
||||||
name = "VECTOR_BFLOAT16";
|
name = "VECTOR_BFLOAT16";
|
||||||
break;
|
break;
|
||||||
case milvus::DataType::VECTOR_SPARSE_FLOAT:
|
case milvus::DataType::VECTOR_SPARSE_U32_F32:
|
||||||
name = "VECTOR_SPARSE_FLOAT";
|
name = "VECTOR_SPARSE_U32_F32";
|
||||||
break;
|
break;
|
||||||
case milvus::DataType::VECTOR_INT8:
|
case milvus::DataType::VECTOR_INT8:
|
||||||
name = "VECTOR_INT8";
|
name = "VECTOR_INT8";
|
||||||
|
|||||||
@ -43,6 +43,7 @@ namespace milvus {
|
|||||||
(data_array->vectors().type##_vector().data())
|
(data_array->vectors().type##_vector().data())
|
||||||
|
|
||||||
using CheckDataValid = std::function<bool(size_t)>;
|
using CheckDataValid = std::function<bool(size_t)>;
|
||||||
|
using sparseValueType = typename knowhere::sparse_u32_f32::ValueType;
|
||||||
|
|
||||||
inline DatasetPtr
|
inline DatasetPtr
|
||||||
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
|
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
|
||||||
@ -245,17 +246,17 @@ EscapeBraces(const std::string& input) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline knowhere::sparse::SparseRow<float>
|
inline knowhere::sparse::SparseRow<sparseValueType>
|
||||||
CopyAndWrapSparseRow(const void* data,
|
CopyAndWrapSparseRow(const void* data,
|
||||||
size_t size,
|
size_t size,
|
||||||
const bool validate = false) {
|
const bool validate = false) {
|
||||||
size_t num_elements =
|
size_t num_elements =
|
||||||
size / knowhere::sparse::SparseRow<float>::element_size();
|
size / knowhere::sparse::SparseRow<sparseValueType>::element_size();
|
||||||
knowhere::sparse::SparseRow<float> row(num_elements);
|
knowhere::sparse::SparseRow<sparseValueType> row(num_elements);
|
||||||
std::memcpy(row.data(), data, size);
|
std::memcpy(row.data(), data, size);
|
||||||
if (validate) {
|
if (validate) {
|
||||||
AssertInfo(
|
AssertInfo(
|
||||||
size % knowhere::sparse::SparseRow<float>::element_size() == 0,
|
size % knowhere::sparse::SparseRow<sparseValueType>::element_size() == 0,
|
||||||
"Invalid size for sparse row data");
|
"Invalid size for sparse row data");
|
||||||
for (size_t i = 0; i < num_elements; ++i) {
|
for (size_t i = 0; i < num_elements; ++i) {
|
||||||
auto element = row[i];
|
auto element = row[i];
|
||||||
@ -276,17 +277,17 @@ CopyAndWrapSparseRow(const void* data,
|
|||||||
|
|
||||||
// Iterable is a list of bytes, each is a byte array representation of a single
|
// Iterable is a list of bytes, each is a byte array representation of a single
|
||||||
// sparse float row. This helper function converts such byte arrays into a list
|
// sparse float row. This helper function converts such byte arrays into a list
|
||||||
// of knowhere::sparse::SparseRow<float>. The resulting list is a deep copy of
|
// of knowhere::sparse::SparseRow<sparseValueType>. The resulting list is a deep copy of
|
||||||
// the source data.
|
// the source data.
|
||||||
//
|
//
|
||||||
// Here in segcore we validate the sparse row data only for search requests,
|
// Here in segcore we validate the sparse row data only for search requests,
|
||||||
// as the insert/upsert data are already validated in go code.
|
// as the insert/upsert data are already validated in go code.
|
||||||
template <typename Iterable>
|
template <typename Iterable>
|
||||||
std::unique_ptr<knowhere::sparse::SparseRow<float>[]>
|
std::unique_ptr<knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||||
SparseBytesToRows(const Iterable& rows, const bool validate = false) {
|
SparseBytesToRows(const Iterable& rows, const bool validate = false) {
|
||||||
AssertInfo(rows.size() > 0, "at least 1 sparse row should be provided");
|
AssertInfo(rows.size() > 0, "at least 1 sparse row should be provided");
|
||||||
auto res =
|
auto res =
|
||||||
std::make_unique<knowhere::sparse::SparseRow<float>[]>(rows.size());
|
std::make_unique<knowhere::sparse::SparseRow<sparseValueType>[]>(rows.size());
|
||||||
for (size_t i = 0; i < rows.size(); ++i) {
|
for (size_t i = 0; i < rows.size(); ++i) {
|
||||||
res[i] = std::move(
|
res[i] = std::move(
|
||||||
CopyAndWrapSparseRow(rows[i].data(), rows[i].size(), validate));
|
CopyAndWrapSparseRow(rows[i].data(), rows[i].size(), validate));
|
||||||
@ -294,11 +295,11 @@ SparseBytesToRows(const Iterable& rows, const bool validate = false) {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
// SparseRowsToProto converts a list of knowhere::sparse::SparseRow<float> to
|
// SparseRowsToProto converts a list of knowhere::sparse::SparseRow<sparseValueType> to
|
||||||
// a milvus::proto::schema::SparseFloatArray. The resulting proto is a deep copy
|
// a milvus::proto::schema::SparseFloatArray. The resulting proto is a deep copy
|
||||||
// of the source data. source(i) returns the i-th row to be copied.
|
// of the source data. source(i) returns the i-th row to be copied.
|
||||||
inline void SparseRowsToProto(
|
inline void SparseRowsToProto(
|
||||||
const std::function<const knowhere::sparse::SparseRow<float>*(size_t)>&
|
const std::function<const knowhere::sparse::SparseRow<sparseValueType>*(size_t)>&
|
||||||
source,
|
source,
|
||||||
int64_t rows,
|
int64_t rows,
|
||||||
milvus::proto::schema::SparseFloatArray* proto) {
|
milvus::proto::schema::SparseFloatArray* proto) {
|
||||||
|
|||||||
@ -122,7 +122,7 @@ class SparseFloatVector : public VectorTrait {
|
|||||||
public:
|
public:
|
||||||
using embedded_type = float;
|
using embedded_type = float;
|
||||||
static constexpr int32_t dim_factor = 1;
|
static constexpr int32_t dim_factor = 1;
|
||||||
static constexpr auto data_type = DataType::VECTOR_SPARSE_FLOAT;
|
static constexpr auto data_type = DataType::VECTOR_SPARSE_U32_F32;
|
||||||
static constexpr auto c_data_type = CDataType::SparseFloatVector;
|
static constexpr auto c_data_type = CDataType::SparseFloatVector;
|
||||||
static constexpr auto schema_data_type =
|
static constexpr auto schema_data_type =
|
||||||
proto::schema::DataType::SparseFloatVector;
|
proto::schema::DataType::SparseFloatVector;
|
||||||
|
|||||||
@ -93,6 +93,11 @@ KnowhereInitSearchThreadPool(const uint32_t num_threads) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
KnowhereInitFetchThreadPool(const uint32_t num_threads) {
|
||||||
|
knowhere::KnowhereConfig::SetFetchThreadPoolSize(num_threads);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
KnowhereInitGPUMemoryPool(const uint32_t init_size, const uint32_t max_size) {
|
KnowhereInitGPUMemoryPool(const uint32_t init_size, const uint32_t max_size) {
|
||||||
if (init_size == 0 && max_size == 0) {
|
if (init_size == 0 && max_size == 0) {
|
||||||
|
|||||||
@ -35,6 +35,9 @@ KnowhereInitBuildThreadPool(const uint32_t);
|
|||||||
void
|
void
|
||||||
KnowhereInitSearchThreadPool(const uint32_t);
|
KnowhereInitSearchThreadPool(const uint32_t);
|
||||||
|
|
||||||
|
void
|
||||||
|
KnowhereInitFetchThreadPool(const uint32_t);
|
||||||
|
|
||||||
int32_t
|
int32_t
|
||||||
GetMinimalIndexVersion();
|
GetMinimalIndexVersion();
|
||||||
|
|
||||||
|
|||||||
@ -184,9 +184,9 @@ IndexFactory::VecIndexLoadResource(
|
|||||||
knowhere::IndexStaticFaced<knowhere::bf16>::HasRawData(
|
knowhere::IndexStaticFaced<knowhere::bf16>::HasRawData(
|
||||||
index_type, index_version, config);
|
index_type, index_version, config);
|
||||||
break;
|
break;
|
||||||
case milvus::DataType::VECTOR_SPARSE_FLOAT:
|
case milvus::DataType::VECTOR_SPARSE_U32_F32:
|
||||||
resource = knowhere::IndexStaticFaced<
|
resource = knowhere::IndexStaticFaced<
|
||||||
knowhere::fp32>::EstimateLoadResource(index_type,
|
knowhere::sparse_u32_f32>::EstimateLoadResource(index_type,
|
||||||
index_version,
|
index_version,
|
||||||
index_size_gb,
|
index_size_gb,
|
||||||
config);
|
config);
|
||||||
@ -516,8 +516,8 @@ IndexFactory::CreateVectorIndex(
|
|||||||
return std::make_unique<VectorDiskAnnIndex<bin1>>(
|
return std::make_unique<VectorDiskAnnIndex<bin1>>(
|
||||||
index_type, metric_type, version, file_manager_context);
|
index_type, metric_type, version, file_manager_context);
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
return std::make_unique<VectorDiskAnnIndex<float>>(
|
return std::make_unique<VectorDiskAnnIndex<sparse_u32_f32>>(
|
||||||
index_type, metric_type, version, file_manager_context);
|
index_type, metric_type, version, file_manager_context);
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_ARRAY: {
|
case DataType::VECTOR_ARRAY: {
|
||||||
@ -537,8 +537,7 @@ IndexFactory::CreateVectorIndex(
|
|||||||
}
|
}
|
||||||
} else { // create mem index
|
} else { // create mem index
|
||||||
switch (data_type) {
|
switch (data_type) {
|
||||||
case DataType::VECTOR_FLOAT:
|
case DataType::VECTOR_FLOAT: {
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
|
||||||
return std::make_unique<VectorMemIndex<float>>(
|
return std::make_unique<VectorMemIndex<float>>(
|
||||||
DataType::NONE,
|
DataType::NONE,
|
||||||
index_type,
|
index_type,
|
||||||
@ -547,6 +546,15 @@ IndexFactory::CreateVectorIndex(
|
|||||||
use_knowhere_build_pool,
|
use_knowhere_build_pool,
|
||||||
file_manager_context);
|
file_manager_context);
|
||||||
}
|
}
|
||||||
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
|
return std::make_unique<VectorMemIndex<sparse_u32_f32>>(
|
||||||
|
DataType::NONE,
|
||||||
|
index_type,
|
||||||
|
metric_type,
|
||||||
|
version,
|
||||||
|
use_knowhere_build_pool,
|
||||||
|
file_manager_context);
|
||||||
|
}
|
||||||
case DataType::VECTOR_BINARY: {
|
case DataType::VECTOR_BINARY: {
|
||||||
return std::make_unique<VectorMemIndex<bin1>>(
|
return std::make_unique<VectorMemIndex<bin1>>(
|
||||||
DataType::NONE,
|
DataType::NONE,
|
||||||
@ -596,11 +604,19 @@ IndexFactory::CreateVectorIndex(
|
|||||||
version,
|
version,
|
||||||
use_knowhere_build_pool,
|
use_knowhere_build_pool,
|
||||||
file_manager_context);
|
file_manager_context);
|
||||||
|
case DataType::VECTOR_SPARSE_U32_F32:
|
||||||
|
return std::make_unique<VectorMemIndex<sparse_u32_f32>>(
|
||||||
|
element_type,
|
||||||
|
index_type,
|
||||||
|
metric_type,
|
||||||
|
version,
|
||||||
|
use_knowhere_build_pool,
|
||||||
|
file_manager_context);
|
||||||
default:
|
default:
|
||||||
ThrowInfo(NotImplemented,
|
ThrowInfo(NotImplemented,
|
||||||
fmt::format("not implemented data type to "
|
fmt::format("not implemented data type to "
|
||||||
"build mem index: {}",
|
"build mem index: {}",
|
||||||
data_type));
|
element_type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|||||||
@ -168,7 +168,7 @@ VectorDiskAnnIndex<T>::Build(const Config& config) {
|
|||||||
index_.IsAdditionalScalarSupported(
|
index_.IsAdditionalScalarSupported(
|
||||||
is_partition_key_isolation.value_or(false))) {
|
is_partition_key_isolation.value_or(false))) {
|
||||||
build_config[VEC_OPT_FIELDS_PATH] =
|
build_config[VEC_OPT_FIELDS_PATH] =
|
||||||
file_manager_->CacheOptFieldToDisk(opt_fields.value());
|
file_manager_->CacheOptFieldToDisk(config);
|
||||||
// `partition_key_isolation` is already in the config, so it falls through
|
// `partition_key_isolation` is already in the config, so it falls through
|
||||||
// into the index Build call directly
|
// into the index Build call directly
|
||||||
}
|
}
|
||||||
@ -415,5 +415,6 @@ template class VectorDiskAnnIndex<float>;
|
|||||||
template class VectorDiskAnnIndex<float16>;
|
template class VectorDiskAnnIndex<float16>;
|
||||||
template class VectorDiskAnnIndex<bfloat16>;
|
template class VectorDiskAnnIndex<bfloat16>;
|
||||||
template class VectorDiskAnnIndex<bin1>;
|
template class VectorDiskAnnIndex<bin1>;
|
||||||
|
template class VectorDiskAnnIndex<sparse_u32_f32>;
|
||||||
|
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -80,7 +80,7 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||||||
std::vector<uint8_t>
|
std::vector<uint8_t>
|
||||||
GetVector(const DatasetPtr dataset) const override;
|
GetVector(const DatasetPtr dataset) const override;
|
||||||
|
|
||||||
std::unique_ptr<const knowhere::sparse::SparseRow<float>[]>
|
std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||||
GetSparseVector(const DatasetPtr dataset) const override {
|
GetSparseVector(const DatasetPtr dataset) const override {
|
||||||
ThrowInfo(ErrorCode::Unsupported,
|
ThrowInfo(ErrorCode::Unsupported,
|
||||||
"get sparse vector not supported for disk index");
|
"get sparse vector not supported for disk index");
|
||||||
|
|||||||
@ -76,7 +76,7 @@ class VectorIndex : public IndexBase {
|
|||||||
virtual std::vector<uint8_t>
|
virtual std::vector<uint8_t>
|
||||||
GetVector(const DatasetPtr dataset) const = 0;
|
GetVector(const DatasetPtr dataset) const = 0;
|
||||||
|
|
||||||
virtual std::unique_ptr<const knowhere::sparse::SparseRow<float>[]>
|
virtual std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||||
GetSparseVector(const DatasetPtr dataset) const = 0;
|
GetSparseVector(const DatasetPtr dataset) const = 0;
|
||||||
|
|
||||||
IndexType
|
IndexType
|
||||||
|
|||||||
@ -426,10 +426,10 @@ VectorMemIndex<T>::Build(const Config& config) {
|
|||||||
field_data)
|
field_data)
|
||||||
->Dim());
|
->Dim());
|
||||||
}
|
}
|
||||||
std::vector<knowhere::sparse::SparseRow<float>> vec(total_rows);
|
std::vector<knowhere::sparse::SparseRow<sparseValueType>> vec(total_rows);
|
||||||
int64_t offset = 0;
|
int64_t offset = 0;
|
||||||
for (auto field_data : field_datas) {
|
for (auto field_data : field_datas) {
|
||||||
auto ptr = static_cast<const knowhere::sparse::SparseRow<float>*>(
|
auto ptr = static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||||
field_data->Data());
|
field_data->Data());
|
||||||
AssertInfo(ptr, "failed to cast field data to sparse rows");
|
AssertInfo(ptr, "failed to cast field data to sparse rows");
|
||||||
for (size_t i = 0; i < field_data->Length(); ++i) {
|
for (size_t i = 0; i < field_data->Length(); ++i) {
|
||||||
@ -570,7 +570,7 @@ VectorMemIndex<T>::GetVector(const DatasetPtr dataset) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::unique_ptr<const knowhere::sparse::SparseRow<float>[]>
|
std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||||
VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const {
|
VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const {
|
||||||
auto res = index_.GetVectorByIds(dataset);
|
auto res = index_.GetVectorByIds(dataset);
|
||||||
if (!res.has_value()) {
|
if (!res.has_value()) {
|
||||||
@ -579,8 +579,8 @@ VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const {
|
|||||||
}
|
}
|
||||||
// release and transfer ownership to the result unique ptr.
|
// release and transfer ownership to the result unique ptr.
|
||||||
res.value()->SetIsOwner(false);
|
res.value()->SetIsOwner(false);
|
||||||
return std::unique_ptr<const knowhere::sparse::SparseRow<float>[]>(
|
return std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>(
|
||||||
static_cast<const knowhere::sparse::SparseRow<float>*>(
|
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||||
res.value()->GetTensor()));
|
res.value()->GetTensor()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -751,5 +751,6 @@ template class VectorMemIndex<bin1>;
|
|||||||
template class VectorMemIndex<float16>;
|
template class VectorMemIndex<float16>;
|
||||||
template class VectorMemIndex<bfloat16>;
|
template class VectorMemIndex<bfloat16>;
|
||||||
template class VectorMemIndex<int8>;
|
template class VectorMemIndex<int8>;
|
||||||
|
template class VectorMemIndex<sparse_u32_f32>;
|
||||||
|
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -87,7 +87,7 @@ class VectorMemIndex : public VectorIndex {
|
|||||||
std::vector<uint8_t>
|
std::vector<uint8_t>
|
||||||
GetVector(const DatasetPtr dataset) const override;
|
GetVector(const DatasetPtr dataset) const override;
|
||||||
|
|
||||||
std::unique_ptr<const knowhere::sparse::SparseRow<float>[]>
|
std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||||
GetSparseVector(const DatasetPtr dataset) const override;
|
GetSparseVector(const DatasetPtr dataset) const override;
|
||||||
|
|
||||||
IndexStatsPtr
|
IndexStatsPtr
|
||||||
|
|||||||
@ -68,7 +68,7 @@ class IndexFactory {
|
|||||||
case DataType::VECTOR_FLOAT16:
|
case DataType::VECTOR_FLOAT16:
|
||||||
case DataType::VECTOR_BFLOAT16:
|
case DataType::VECTOR_BFLOAT16:
|
||||||
case DataType::VECTOR_BINARY:
|
case DataType::VECTOR_BINARY:
|
||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
case DataType::VECTOR_SPARSE_U32_F32:
|
||||||
case DataType::VECTOR_INT8:
|
case DataType::VECTOR_INT8:
|
||||||
case DataType::VECTOR_ARRAY:
|
case DataType::VECTOR_ARRAY:
|
||||||
return std::make_unique<VecIndexCreator>(type, config, context);
|
return std::make_unique<VecIndexCreator>(type, config, context);
|
||||||
|
|||||||
@ -134,8 +134,8 @@ VariableLengthChunk<std::string>::set(
|
|||||||
// Template specialization for sparse vector
|
// Template specialization for sparse vector
|
||||||
template <>
|
template <>
|
||||||
inline void
|
inline void
|
||||||
VariableLengthChunk<knowhere::sparse::SparseRow<float>>::set(
|
VariableLengthChunk<knowhere::sparse::SparseRow<sparseValueType>>::set(
|
||||||
const knowhere::sparse::SparseRow<float>* src,
|
const knowhere::sparse::SparseRow<sparseValueType>* src,
|
||||||
uint32_t begin,
|
uint32_t begin,
|
||||||
uint32_t length,
|
uint32_t length,
|
||||||
const std::optional<CheckDataValid>& check_data_valid) {
|
const std::optional<CheckDataValid>& check_data_valid) {
|
||||||
@ -158,7 +158,7 @@ VariableLengthChunk<knowhere::sparse::SparseRow<float>>::set(
|
|||||||
uint8_t* data_ptr = buf + offset;
|
uint8_t* data_ptr = buf + offset;
|
||||||
std::memcpy(data_ptr, (uint8_t*)src[i].data(), data_size);
|
std::memcpy(data_ptr, (uint8_t*)src[i].data(), data_size);
|
||||||
data_[i + begin] =
|
data_[i + begin] =
|
||||||
knowhere::sparse::SparseRow<float>(src[i].size(), data_ptr, false);
|
knowhere::sparse::SparseRow<sparseValueType>(src[i].size(), data_ptr, false);
|
||||||
offset += data_size;
|
offset += data_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,9 +16,9 @@
|
|||||||
|
|
||||||
char*
|
char*
|
||||||
GetCoreMetrics() {
|
GetCoreMetrics() {
|
||||||
auto str = milvus::monitor::prometheusClient->GetMetrics();
|
auto str = milvus::monitor::getPrometheusClient().GetMetrics();
|
||||||
auto len = str.length();
|
auto len = str.length();
|
||||||
char* res = (char*)malloc(len + 1);
|
char* res = static_cast<char*>(malloc(len + 1));
|
||||||
memcpy(res, str.data(), len);
|
memcpy(res, str.data(), len);
|
||||||
res[len] = '\0';
|
res[len] = '\0';
|
||||||
return res;
|
return res;
|
||||||
|
|||||||
@ -27,10 +27,11 @@ const prometheus::Histogram::BucketBoundaries cgoCallDurationbuckets = {
|
|||||||
// One histogram per function name (label)
|
// One histogram per function name (label)
|
||||||
static inline prometheus::Histogram&
|
static inline prometheus::Histogram&
|
||||||
GetHistogram(std::string&& func) {
|
GetHistogram(std::string&& func) {
|
||||||
static auto& hist_family = prometheus::BuildHistogram()
|
static auto& hist_family =
|
||||||
|
prometheus::BuildHistogram()
|
||||||
.Name("milvus_cgocall_duration_seconds")
|
.Name("milvus_cgocall_duration_seconds")
|
||||||
.Help("Duration of cgo-exposed functions")
|
.Help("Duration of cgo-exposed functions")
|
||||||
.Register(prometheusClient->GetRegistry());
|
.Register(getPrometheusClient().GetRegistry());
|
||||||
|
|
||||||
// default buckets: [0.005, 0.01, ..., 1.0]
|
// default buckets: [0.005, 0.01, ..., 1.0]
|
||||||
return hist_family.Add({{"func", func}}, cgoCallDurationbuckets);
|
return hist_family.Add({{"func", func}}, cgoCallDurationbuckets);
|
||||||
|
|||||||
@ -23,6 +23,7 @@
|
|||||||
#include "common/Json.h"
|
#include "common/Json.h"
|
||||||
#include "common/Consts.h"
|
#include "common/Consts.h"
|
||||||
#include "common/Schema.h"
|
#include "common/Schema.h"
|
||||||
|
#include "common/Utils.h"
|
||||||
|
|
||||||
namespace milvus::query {
|
namespace milvus::query {
|
||||||
|
|
||||||
@ -80,7 +81,7 @@ struct Placeholder {
|
|||||||
// only one of blob_ and sparse_matrix_ should be set. blob_ is used for
|
// only one of blob_ and sparse_matrix_ should be set. blob_ is used for
|
||||||
// dense vector search and sparse_matrix_ is for sparse vector search.
|
// dense vector search and sparse_matrix_ is for sparse vector search.
|
||||||
aligned_vector<char> blob_;
|
aligned_vector<char> blob_;
|
||||||
std::unique_ptr<knowhere::sparse::SparseRow<float>[]> sparse_matrix_;
|
std::unique_ptr<knowhere::sparse::SparseRow<sparseValueType>[]> sparse_matrix_;
|
||||||
// offsets for embedding list
|
// offsets for embedding list
|
||||||
aligned_vector<size_t> lims_;
|
aligned_vector<size_t> lims_;
|
||||||
|
|
||||||
|
|||||||
@ -106,7 +106,7 @@ PrepareBFDataSet(const dataset::SearchDataset& query_ds,
|
|||||||
query_dataset->SetRows(query_ds.query_lims[query_ds.num_queries]);
|
query_dataset->SetRows(query_ds.query_lims[query_ds.num_queries]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
base_dataset->SetIsSparse(true);
|
base_dataset->SetIsSparse(true);
|
||||||
query_dataset->SetIsSparse(true);
|
query_dataset->SetIsSparse(true);
|
||||||
}
|
}
|
||||||
@ -168,9 +168,9 @@ BruteForceSearch(const dataset::SearchDataset& query_ds,
|
|||||||
} else if (data_type == DataType::VECTOR_BINARY) {
|
} else if (data_type == DataType::VECTOR_BINARY) {
|
||||||
res = knowhere::BruteForce::RangeSearch<bin1>(
|
res = knowhere::BruteForce::RangeSearch<bin1>(
|
||||||
base_dataset, query_dataset, search_cfg, bitset);
|
base_dataset, query_dataset, search_cfg, bitset);
|
||||||
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
res = knowhere::BruteForce::RangeSearch<
|
res = knowhere::BruteForce::RangeSearch<
|
||||||
knowhere::sparse::SparseRow<float>>(
|
knowhere::sparse::SparseRow<sparseValueType>>(
|
||||||
base_dataset, query_dataset, search_cfg, bitset);
|
base_dataset, query_dataset, search_cfg, bitset);
|
||||||
} else if (data_type == DataType::VECTOR_INT8) {
|
} else if (data_type == DataType::VECTOR_INT8) {
|
||||||
res = knowhere::BruteForce::RangeSearch<int8>(
|
res = knowhere::BruteForce::RangeSearch<int8>(
|
||||||
@ -229,7 +229,7 @@ BruteForceSearch(const dataset::SearchDataset& query_ds,
|
|||||||
sub_result.mutable_distances().data(),
|
sub_result.mutable_distances().data(),
|
||||||
search_cfg,
|
search_cfg,
|
||||||
bitset);
|
bitset);
|
||||||
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
stat = knowhere::BruteForce::SearchSparseWithBuf(
|
stat = knowhere::BruteForce::SearchSparseWithBuf(
|
||||||
base_dataset,
|
base_dataset,
|
||||||
query_dataset,
|
query_dataset,
|
||||||
@ -279,9 +279,9 @@ DispatchBruteForceIteratorByDataType(const knowhere::DataSetPtr& base_dataset,
|
|||||||
case DataType::VECTOR_BFLOAT16:
|
case DataType::VECTOR_BFLOAT16:
|
||||||
return knowhere::BruteForce::AnnIterator<bfloat16>(
|
return knowhere::BruteForce::AnnIterator<bfloat16>(
|
||||||
base_dataset, query_dataset, config, bitset);
|
base_dataset, query_dataset, config, bitset);
|
||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
case DataType::VECTOR_SPARSE_U32_F32:
|
||||||
return knowhere::BruteForce::AnnIterator<
|
return knowhere::BruteForce::AnnIterator<
|
||||||
knowhere::sparse::SparseRow<float>>(
|
knowhere::sparse::SparseRow<sparseValueType>>(
|
||||||
base_dataset, query_dataset, config, bitset);
|
base_dataset, query_dataset, config, bitset);
|
||||||
case DataType::VECTOR_INT8:
|
case DataType::VECTOR_INT8:
|
||||||
return knowhere::BruteForce::AnnIterator<int8>(
|
return knowhere::BruteForce::AnnIterator<int8>(
|
||||||
|
|||||||
@ -38,13 +38,13 @@ FloatSegmentIndexSearch(const segcore::SegmentGrowingImpl& segment,
|
|||||||
|
|
||||||
auto vecfield_id = info.field_id_;
|
auto vecfield_id = info.field_id_;
|
||||||
auto& field = schema[vecfield_id];
|
auto& field = schema[vecfield_id];
|
||||||
auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT;
|
auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_U32_F32;
|
||||||
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
||||||
auto dim = is_sparse ? 0 : field.get_dim();
|
auto dim = is_sparse ? 0 : field.get_dim();
|
||||||
|
|
||||||
AssertInfo(IsVectorDataType(field.get_data_type()),
|
AssertInfo(IsVectorDataType(field.get_data_type()),
|
||||||
"[FloatSearch]Field data type isn't VECTOR_FLOAT, "
|
"[FloatSearch]Field data type isn't VECTOR_FLOAT, "
|
||||||
"VECTOR_FLOAT16, VECTOR_BFLOAT16 or VECTOR_SPARSE_FLOAT");
|
"VECTOR_FLOAT16, VECTOR_BFLOAT16 or VECTOR_SPARSE_U32_F32");
|
||||||
dataset::SearchDataset search_dataset{info.metric_type_,
|
dataset::SearchDataset search_dataset{info.metric_type_,
|
||||||
num_queries,
|
num_queries,
|
||||||
info.topk_,
|
info.topk_,
|
||||||
@ -119,7 +119,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||||||
}
|
}
|
||||||
SubSearchResult final_qr(num_queries, topk, metric_type, round_decimal);
|
SubSearchResult final_qr(num_queries, topk, metric_type, round_decimal);
|
||||||
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
||||||
auto dim = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT
|
auto dim = field.get_data_type() == DataType::VECTOR_SPARSE_U32_F32
|
||||||
? 0
|
? 0
|
||||||
: field.get_dim();
|
: field.get_dim();
|
||||||
dataset::SearchDataset search_dataset{metric_type,
|
dataset::SearchDataset search_dataset{metric_type,
|
||||||
|
|||||||
@ -40,7 +40,7 @@ SearchOnSealedIndex(const Schema& schema,
|
|||||||
|
|
||||||
auto field_id = search_info.field_id_;
|
auto field_id = search_info.field_id_;
|
||||||
auto& field = schema[field_id];
|
auto& field = schema[field_id];
|
||||||
auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT;
|
auto is_sparse = field.get_data_type() == DataType::VECTOR_SPARSE_U32_F32;
|
||||||
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
||||||
auto dim = is_sparse ? 0 : field.get_dim();
|
auto dim = is_sparse ? 0 : field.get_dim();
|
||||||
|
|
||||||
@ -115,7 +115,7 @@ SearchOnSealedColumn(const Schema& schema,
|
|||||||
auto data_type = field.get_data_type();
|
auto data_type = field.get_data_type();
|
||||||
auto element_type = field.get_element_type();
|
auto element_type = field.get_element_type();
|
||||||
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
// TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder.
|
||||||
auto dim = data_type == DataType::VECTOR_SPARSE_FLOAT ? 0 : field.get_dim();
|
auto dim = data_type == DataType::VECTOR_SPARSE_U32_F32 ? 0 : field.get_dim();
|
||||||
|
|
||||||
query::dataset::SearchDataset query_dataset{search_info.metric_type_,
|
query::dataset::SearchDataset query_dataset{search_info.metric_type_,
|
||||||
num_queries,
|
num_queries,
|
||||||
|
|||||||
@ -813,7 +813,7 @@ ChunkedSegmentSealedImpl::get_vector(FieldId field_id,
|
|||||||
if (has_raw_data) {
|
if (has_raw_data) {
|
||||||
// If index has raw data, get vector from memory.
|
// If index has raw data, get vector from memory.
|
||||||
auto ids_ds = GenIdsDataset(count, ids);
|
auto ids_ds = GenIdsDataset(count, ids);
|
||||||
if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) {
|
if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
auto res = vec_index->GetSparseVector(ids_ds);
|
auto res = vec_index->GetSparseVector(ids_ds);
|
||||||
return segcore::CreateVectorDataArrayFrom(
|
return segcore::CreateVectorDataArrayFrom(
|
||||||
res.get(), count, field_meta);
|
res.get(), count, field_meta);
|
||||||
@ -1752,7 +1752,7 @@ ChunkedSegmentSealedImpl::get_raw_data(FieldId field_id,
|
|||||||
ret->mutable_vectors()->mutable_int8_vector()->data());
|
ret->mutable_vectors()->mutable_int8_vector()->data());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
auto dst = ret->mutable_vectors()->mutable_sparse_float_vector();
|
auto dst = ret->mutable_vectors()->mutable_sparse_float_vector();
|
||||||
int64_t max_dim = 0;
|
int64_t max_dim = 0;
|
||||||
column->BulkValueAt(
|
column->BulkValueAt(
|
||||||
@ -1761,7 +1761,7 @@ ChunkedSegmentSealedImpl::get_raw_data(FieldId field_id,
|
|||||||
auto row =
|
auto row =
|
||||||
offset != INVALID_SEG_OFFSET
|
offset != INVALID_SEG_OFFSET
|
||||||
? static_cast<
|
? static_cast<
|
||||||
const knowhere::sparse::SparseRow<float>*>(
|
const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||||
static_cast<const void*>(value))
|
static_cast<const void*>(value))
|
||||||
: nullptr;
|
: nullptr;
|
||||||
if (row == nullptr) {
|
if (row == nullptr) {
|
||||||
@ -2108,7 +2108,7 @@ ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id,
|
|||||||
auto& index_params = field_index_meta.GetIndexParams();
|
auto& index_params = field_index_meta.GetIndexParams();
|
||||||
|
|
||||||
bool is_sparse =
|
bool is_sparse =
|
||||||
field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT;
|
field_meta.get_data_type() == DataType::VECTOR_SPARSE_U32_F32;
|
||||||
|
|
||||||
bool enable_growing_mmap = storage::MmapManager::GetInstance()
|
bool enable_growing_mmap = storage::MmapManager::GetInstance()
|
||||||
.GetMmapConfig()
|
.GetMmapConfig()
|
||||||
|
|||||||
@ -37,7 +37,7 @@ VectorBase::set_data_raw(ssize_t element_offset,
|
|||||||
return set_data_raw(
|
return set_data_raw(
|
||||||
element_offset, VEC_FIELD_DATA(data, bfloat16), element_count);
|
element_offset, VEC_FIELD_DATA(data, bfloat16), element_count);
|
||||||
} else if (field_meta.get_data_type() ==
|
} else if (field_meta.get_data_type() ==
|
||||||
DataType::VECTOR_SPARSE_FLOAT) {
|
DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
return set_data_raw(
|
return set_data_raw(
|
||||||
element_offset,
|
element_offset,
|
||||||
SparseBytesToRows(
|
SparseBytesToRows(
|
||||||
|
|||||||
@ -504,13 +504,13 @@ class ConcurrentVector<VectorArray>
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
class ConcurrentVector<SparseFloatVector>
|
class ConcurrentVector<SparseFloatVector>
|
||||||
: public ConcurrentVectorImpl<knowhere::sparse::SparseRow<float>, true> {
|
: public ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>, true> {
|
||||||
public:
|
public:
|
||||||
explicit ConcurrentVector(
|
explicit ConcurrentVector(
|
||||||
int64_t size_per_chunk,
|
int64_t size_per_chunk,
|
||||||
storage::MmapChunkDescriptorPtr mmap_descriptor = nullptr,
|
storage::MmapChunkDescriptorPtr mmap_descriptor = nullptr,
|
||||||
ThreadSafeValidDataPtr valid_data_ptr = nullptr)
|
ThreadSafeValidDataPtr valid_data_ptr = nullptr)
|
||||||
: ConcurrentVectorImpl<knowhere::sparse::SparseRow<float>,
|
: ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>,
|
||||||
true>::ConcurrentVectorImpl(1,
|
true>::ConcurrentVectorImpl(1,
|
||||||
size_per_chunk,
|
size_per_chunk,
|
||||||
std::move(
|
std::move(
|
||||||
@ -524,11 +524,11 @@ class ConcurrentVector<SparseFloatVector>
|
|||||||
const void* source,
|
const void* source,
|
||||||
ssize_t element_count) override {
|
ssize_t element_count) override {
|
||||||
auto* src =
|
auto* src =
|
||||||
static_cast<const knowhere::sparse::SparseRow<float>*>(source);
|
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(source);
|
||||||
for (int i = 0; i < element_count; ++i) {
|
for (int i = 0; i < element_count; ++i) {
|
||||||
dim_ = std::max(dim_, src[i].dim());
|
dim_ = std::max(dim_, src[i].dim());
|
||||||
}
|
}
|
||||||
ConcurrentVectorImpl<knowhere::sparse::SparseRow<float>,
|
ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>,
|
||||||
true>::set_data_raw(element_offset,
|
true>::set_data_raw(element_offset,
|
||||||
source,
|
source,
|
||||||
element_count);
|
element_count);
|
||||||
|
|||||||
@ -46,7 +46,7 @@ void
|
|||||||
VectorFieldIndexing::recreate_index(DataType data_type,
|
VectorFieldIndexing::recreate_index(DataType data_type,
|
||||||
const VectorBase* field_raw_data) {
|
const VectorBase* field_raw_data) {
|
||||||
if (IsSparseFloatVectorDataType(data_type)) {
|
if (IsSparseFloatVectorDataType(data_type)) {
|
||||||
index_ = std::make_unique<index::VectorMemIndex<float>>(
|
index_ = std::make_unique<index::VectorMemIndex<sparse_u32_f32>>(
|
||||||
DataType::NONE,
|
DataType::NONE,
|
||||||
config_->GetIndexType(),
|
config_->GetIndexType(),
|
||||||
config_->GetMetricType(),
|
config_->GetMetricType(),
|
||||||
@ -150,7 +150,7 @@ VectorFieldIndexing::AppendSegmentIndexSparse(int64_t reserved_offset,
|
|||||||
auto dim = source->Dim();
|
auto dim = source->Dim();
|
||||||
|
|
||||||
while (total_rows > 0) {
|
while (total_rows > 0) {
|
||||||
auto mat = static_cast<const knowhere::sparse::SparseRow<float>*>(
|
auto mat = static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||||
source->get_chunk_data(chunk_id));
|
source->get_chunk_data(chunk_id));
|
||||||
auto rows = std::min(source->get_size_per_chunk(), total_rows);
|
auto rows = std::min(source->get_size_per_chunk(), total_rows);
|
||||||
auto dataset = knowhere::GenDataSet(rows, dim, mat);
|
auto dataset = knowhere::GenDataSet(rows, dim, mat);
|
||||||
@ -336,7 +336,7 @@ CreateIndex(const FieldMeta& field_meta,
|
|||||||
field_meta.get_data_type() == DataType::VECTOR_FLOAT16 ||
|
field_meta.get_data_type() == DataType::VECTOR_FLOAT16 ||
|
||||||
field_meta.get_data_type() == DataType::VECTOR_BFLOAT16 ||
|
field_meta.get_data_type() == DataType::VECTOR_BFLOAT16 ||
|
||||||
field_meta.get_data_type() == DataType::VECTOR_INT8 ||
|
field_meta.get_data_type() == DataType::VECTOR_INT8 ||
|
||||||
field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) {
|
field_meta.get_data_type() == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
return std::make_unique<VectorFieldIndexing>(field_meta,
|
return std::make_unique<VectorFieldIndexing>(field_meta,
|
||||||
field_index_meta,
|
field_index_meta,
|
||||||
segment_max_row_count,
|
segment_max_row_count,
|
||||||
|
|||||||
@ -345,7 +345,7 @@ class IndexingRecord {
|
|||||||
size,
|
size,
|
||||||
field_raw_data,
|
field_raw_data,
|
||||||
stream_data->vectors().bfloat16_vector().data());
|
stream_data->vectors().bfloat16_vector().data());
|
||||||
} else if (type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
auto data = SparseBytesToRows(
|
auto data = SparseBytesToRows(
|
||||||
stream_data->vectors().sparse_float_vector().contents());
|
stream_data->vectors().sparse_float_vector().contents());
|
||||||
indexing->AppendSegmentIndexSparse(
|
indexing->AppendSegmentIndexSparse(
|
||||||
@ -378,7 +378,7 @@ class IndexingRecord {
|
|||||||
auto vec_base = record.get_data_base(fieldId);
|
auto vec_base = record.get_data_base(fieldId);
|
||||||
indexing->AppendSegmentIndexDense(
|
indexing->AppendSegmentIndexDense(
|
||||||
reserved_offset, size, vec_base, data->Data());
|
reserved_offset, size, vec_base, data->Data());
|
||||||
} else if (type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
auto vec_base = record.get_data_base(fieldId);
|
auto vec_base = record.get_data_base(fieldId);
|
||||||
indexing->AppendSegmentIndexSparse(
|
indexing->AppendSegmentIndexSparse(
|
||||||
reserved_offset,
|
reserved_offset,
|
||||||
@ -406,7 +406,7 @@ class IndexingRecord {
|
|||||||
if (data_type == DataType::VECTOR_FLOAT ||
|
if (data_type == DataType::VECTOR_FLOAT ||
|
||||||
data_type == DataType::VECTOR_FLOAT16 ||
|
data_type == DataType::VECTOR_FLOAT16 ||
|
||||||
data_type == DataType::VECTOR_BFLOAT16 ||
|
data_type == DataType::VECTOR_BFLOAT16 ||
|
||||||
data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
indexing->GetDataFromIndex(
|
indexing->GetDataFromIndex(
|
||||||
seg_offsets, count, element_size, output_raw);
|
seg_offsets, count, element_size, output_raw);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -699,7 +699,7 @@ struct InsertRecord<false> : public InsertRecord<true> {
|
|||||||
dense_vec_mmap_descriptor);
|
dense_vec_mmap_descriptor);
|
||||||
return;
|
return;
|
||||||
} else if (field_meta.get_data_type() ==
|
} else if (field_meta.get_data_type() ==
|
||||||
DataType::VECTOR_SPARSE_FLOAT) {
|
DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
this->append_data<SparseFloatVector>(
|
this->append_data<SparseFloatVector>(
|
||||||
field_id, size_per_chunk, vec_mmap_descriptor);
|
field_id, size_per_chunk, vec_mmap_descriptor);
|
||||||
return;
|
return;
|
||||||
|
|||||||
@ -782,7 +782,7 @@ SegmentGrowingImpl::bulk_subscript(FieldId field_id,
|
|||||||
count,
|
count,
|
||||||
result->mutable_vectors()->mutable_bfloat16_vector()->data());
|
result->mutable_vectors()->mutable_bfloat16_vector()->data());
|
||||||
} else if (field_meta.get_data_type() ==
|
} else if (field_meta.get_data_type() ==
|
||||||
DataType::VECTOR_SPARSE_FLOAT) {
|
DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
bulk_subscript_sparse_float_vector_impl(
|
bulk_subscript_sparse_float_vector_impl(
|
||||||
field_id,
|
field_id,
|
||||||
(const ConcurrentVector<SparseFloatVector>*)vec_ptr,
|
(const ConcurrentVector<SparseFloatVector>*)vec_ptr,
|
||||||
|
|||||||
@ -210,7 +210,7 @@ GetRawDataSizeOfDataArray(const DataArray* data,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
// TODO(SPARSE, size)
|
// TODO(SPARSE, size)
|
||||||
result += data->vectors().sparse_float_vector().ByteSizeLong();
|
result += data->vectors().sparse_float_vector().ByteSizeLong();
|
||||||
break;
|
break;
|
||||||
@ -342,7 +342,7 @@ CreateEmptyVectorDataArray(int64_t count, const FieldMeta& field_meta) {
|
|||||||
|
|
||||||
auto vector_array = data_array->mutable_vectors();
|
auto vector_array = data_array->mutable_vectors();
|
||||||
auto dim = 0;
|
auto dim = 0;
|
||||||
if (data_type != DataType::VECTOR_SPARSE_FLOAT) {
|
if (data_type != DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
dim = field_meta.get_dim();
|
dim = field_meta.get_dim();
|
||||||
vector_array->set_dim(dim);
|
vector_array->set_dim(dim);
|
||||||
}
|
}
|
||||||
@ -373,7 +373,7 @@ CreateEmptyVectorDataArray(int64_t count, const FieldMeta& field_meta) {
|
|||||||
obj->resize(length * sizeof(bfloat16));
|
obj->resize(length * sizeof(bfloat16));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
// does nothing here
|
// does nothing here
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -544,11 +544,11 @@ CreateVectorDataArrayFrom(const void* data_raw,
|
|||||||
obj->assign(data, length * sizeof(bfloat16));
|
obj->assign(data, length * sizeof(bfloat16));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
SparseRowsToProto(
|
SparseRowsToProto(
|
||||||
[&](size_t i) {
|
[&](size_t i) {
|
||||||
return reinterpret_cast<
|
return reinterpret_cast<
|
||||||
const knowhere::sparse::SparseRow<float>*>(
|
const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||||
data_raw) +
|
data_raw) +
|
||||||
i;
|
i;
|
||||||
},
|
},
|
||||||
@ -655,7 +655,7 @@ MergeDataArray(std::vector<MergeBase>& merge_bases,
|
|||||||
auto obj = vector_array->mutable_binary_vector();
|
auto obj = vector_array->mutable_binary_vector();
|
||||||
obj->assign(data + src_offset * num_bytes, num_bytes);
|
obj->assign(data + src_offset * num_bytes, num_bytes);
|
||||||
} else if (field_meta.get_data_type() ==
|
} else if (field_meta.get_data_type() ==
|
||||||
DataType::VECTOR_SPARSE_FLOAT) {
|
DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
auto src = src_field_data->vectors().sparse_float_vector();
|
auto src = src_field_data->vectors().sparse_float_vector();
|
||||||
auto dst = vector_array->mutable_sparse_float_vector();
|
auto dst = vector_array->mutable_sparse_float_vector();
|
||||||
if (src.dim() > dst->dim()) {
|
if (src.dim() > dst->dim()) {
|
||||||
|
|||||||
@ -123,6 +123,11 @@ SegcoreSetKnowhereSearchThreadPoolNum(const uint32_t num_threads) {
|
|||||||
milvus::config::KnowhereInitSearchThreadPool(num_threads);
|
milvus::config::KnowhereInitSearchThreadPool(num_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C" void
|
||||||
|
SegcoreSetKnowhereFetchThreadPoolNum(const uint32_t num_threads) {
|
||||||
|
milvus::config::KnowhereInitFetchThreadPool(num_threads);
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" void
|
extern "C" void
|
||||||
SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size,
|
SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size,
|
||||||
const uint32_t max_size) {
|
const uint32_t max_size) {
|
||||||
|
|||||||
@ -71,6 +71,9 @@ SegcoreSetKnowhereBuildThreadPoolNum(const uint32_t num_threads);
|
|||||||
void
|
void
|
||||||
SegcoreSetKnowhereSearchThreadPoolNum(const uint32_t num_threads);
|
SegcoreSetKnowhereSearchThreadPoolNum(const uint32_t num_threads);
|
||||||
|
|
||||||
|
void
|
||||||
|
SegcoreSetKnowhereFetchThreadPoolNum(const uint32_t num_threads);
|
||||||
|
|
||||||
void
|
void
|
||||||
SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size,
|
SegcoreSetKnowhereGpuMemoryPoolSize(const uint32_t init_size,
|
||||||
const uint32_t max_size);
|
const uint32_t max_size);
|
||||||
|
|||||||
@ -105,7 +105,8 @@ InterimSealedIndexTranslator::get_cells(
|
|||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
vec_index = std::make_unique<index::VectorMemIndex<float>>(
|
// sparse vector case
|
||||||
|
vec_index = std::make_unique<index::VectorMemIndex<sparse_u32_f32>>(
|
||||||
DataType::NONE,
|
DataType::NONE,
|
||||||
index_type_,
|
index_type_,
|
||||||
metric_type_,
|
metric_type_,
|
||||||
|
|||||||
@ -75,9 +75,9 @@ ValidateIndexParams(const char* index_type,
|
|||||||
knowhere::Version::GetCurrentVersion().VersionNumber(),
|
knowhere::Version::GetCurrentVersion().VersionNumber(),
|
||||||
json,
|
json,
|
||||||
error_msg);
|
error_msg);
|
||||||
} else if (dataType == milvus::DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (dataType == milvus::DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
status =
|
status =
|
||||||
knowhere::IndexStaticFaced<knowhere::fp32>::ConfigCheck(
|
knowhere::IndexStaticFaced<knowhere::sparse_u32_f32>::ConfigCheck(
|
||||||
index_type,
|
index_type,
|
||||||
knowhere::Version::GetCurrentVersion().VersionNumber(),
|
knowhere::Version::GetCurrentVersion().VersionNumber(),
|
||||||
json,
|
json,
|
||||||
|
|||||||
@ -476,7 +476,7 @@ DiskFileManagerImpl::cache_raw_data_to_disk_common(
|
|||||||
GetFieldDataMeta().segment_id,
|
GetFieldDataMeta().segment_id,
|
||||||
GetFieldDataMeta().field_id) +
|
GetFieldDataMeta().field_id) +
|
||||||
"raw_data";
|
"raw_data";
|
||||||
if (dt == milvus::DataType::VECTOR_SPARSE_FLOAT) {
|
if (dt == milvus::DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
local_data_path += ".sparse_u32_f32";
|
local_data_path += ".sparse_u32_f32";
|
||||||
}
|
}
|
||||||
local_chunk_manager->CreateFile(local_data_path);
|
local_chunk_manager->CreateFile(local_data_path);
|
||||||
@ -484,13 +484,13 @@ DiskFileManagerImpl::cache_raw_data_to_disk_common(
|
|||||||
init_file_info(data_type);
|
init_file_info(data_type);
|
||||||
file_created = true;
|
file_created = true;
|
||||||
}
|
}
|
||||||
if (data_type == milvus::DataType::VECTOR_SPARSE_FLOAT) {
|
if (data_type == milvus::DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
dim =
|
dim =
|
||||||
(uint32_t)(std::dynamic_pointer_cast<FieldData<SparseFloatVector>>(
|
(uint32_t)(std::dynamic_pointer_cast<FieldData<SparseFloatVector>>(
|
||||||
field_data)
|
field_data)
|
||||||
->Dim());
|
->Dim());
|
||||||
auto sparse_rows =
|
auto sparse_rows =
|
||||||
static_cast<const knowhere::sparse::SparseRow<float>*>(
|
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||||
field_data->Data());
|
field_data->Data());
|
||||||
for (size_t i = 0; i < field_data->Length(); ++i) {
|
for (size_t i = 0; i < field_data->Length(); ++i) {
|
||||||
auto row = sparse_rows[i];
|
auto row = sparse_rows[i];
|
||||||
@ -620,9 +620,11 @@ WriteOptFieldIvfDataImpl(
|
|||||||
|
|
||||||
// Do not write to disk if there is only one value
|
// Do not write to disk if there is only one value
|
||||||
if (mp.size() <= 1) {
|
if (mp.size() <= 1) {
|
||||||
|
LOG_INFO("There are only one category, skip caching to local disk");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_INFO("Get opt fields with {} categories", mp.size());
|
||||||
local_chunk_manager->Write(local_data_path,
|
local_chunk_manager->Write(local_data_path,
|
||||||
write_offset,
|
write_offset,
|
||||||
const_cast<int64_t*>(&field_id),
|
const_cast<int64_t*>(&field_id),
|
||||||
@ -712,7 +714,31 @@ WriteOptFieldsIvfMeta(
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
|
DiskFileManagerImpl::CacheOptFieldToDisk(const Config& config) {
|
||||||
|
auto storage_version =
|
||||||
|
index::GetValueFromConfig<int64_t>(config, STORAGE_VERSION_KEY)
|
||||||
|
.value_or(0);
|
||||||
|
auto opt_fields =
|
||||||
|
index::GetValueFromConfig<OptFieldT>(config, VEC_OPT_FIELDS);
|
||||||
|
if (!opt_fields.has_value()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::string>> remote_files_storage_v2;
|
||||||
|
if (storage_version == STORAGE_V2) {
|
||||||
|
auto segment_insert_files =
|
||||||
|
index::GetValueFromConfig<std::vector<std::vector<std::string>>>(
|
||||||
|
config, SEGMENT_INSERT_FILES_KEY);
|
||||||
|
AssertInfo(segment_insert_files.has_value(),
|
||||||
|
"segment insert files is empty when build index while "
|
||||||
|
"caching opt fields");
|
||||||
|
remote_files_storage_v2 = segment_insert_files.value();
|
||||||
|
for (auto& remote_files : remote_files_storage_v2) {
|
||||||
|
SortByPath(remote_files);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto fields_map = opt_fields.value();
|
||||||
const uint32_t num_of_fields = fields_map.size();
|
const uint32_t num_of_fields = fields_map.size();
|
||||||
if (0 == num_of_fields) {
|
if (0 == num_of_fields) {
|
||||||
return "";
|
return "";
|
||||||
@ -737,6 +763,13 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
|
|||||||
std::unordered_set<int64_t> actual_field_ids;
|
std::unordered_set<int64_t> actual_field_ids;
|
||||||
for (auto& [field_id, tup] : fields_map) {
|
for (auto& [field_id, tup] : fields_map) {
|
||||||
const auto& field_type = std::get<1>(tup);
|
const auto& field_type = std::get<1>(tup);
|
||||||
|
|
||||||
|
std::vector<FieldDataPtr> field_datas;
|
||||||
|
// fetch scalar data from storage v2
|
||||||
|
if (storage_version == STORAGE_V2) {
|
||||||
|
field_datas = GetFieldDatasFromStorageV2(
|
||||||
|
remote_files_storage_v2, field_id, field_type, 1, fs_);
|
||||||
|
} else { // original way
|
||||||
auto& field_paths = std::get<2>(tup);
|
auto& field_paths = std::get<2>(tup);
|
||||||
if (0 == field_paths.size()) {
|
if (0 == field_paths.size()) {
|
||||||
LOG_WARN("optional field {} has no data", field_id);
|
LOG_WARN("optional field {} has no data", field_id);
|
||||||
@ -744,8 +777,8 @@ DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SortByPath(field_paths);
|
SortByPath(field_paths);
|
||||||
std::vector<FieldDataPtr> field_datas =
|
field_datas = FetchFieldData(rcm_.get(), field_paths);
|
||||||
FetchFieldData(rcm_.get(), field_paths);
|
}
|
||||||
|
|
||||||
if (WriteOptFieldIvfData(field_type,
|
if (WriteOptFieldIvfData(field_type,
|
||||||
field_id,
|
field_id,
|
||||||
@ -934,6 +967,8 @@ template std::string
|
|||||||
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(const Config& config);
|
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(const Config& config);
|
||||||
template std::string
|
template std::string
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(const Config& config);
|
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(const Config& config);
|
||||||
|
template std::string
|
||||||
|
DiskFileManagerImpl::CacheRawDataToDisk<sparse_u32_f32>(const Config& config);
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
DiskFileManagerImpl::GetRemoteIndexFilePrefixV2() const {
|
DiskFileManagerImpl::GetRemoteIndexFilePrefixV2() const {
|
||||||
|
|||||||
@ -158,7 +158,7 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||||||
CacheRawDataToDisk(const Config& config);
|
CacheRawDataToDisk(const Config& config);
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
CacheOptFieldToDisk(OptFieldT& fields_map);
|
CacheOptFieldToDisk(const Config& config);
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
GetRemoteIndexPrefix() const {
|
GetRemoteIndexPrefix() const {
|
||||||
|
|||||||
@ -300,11 +300,11 @@ BaseEventData::Serialize() {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
for (size_t offset = 0; offset < field_data->get_num_rows();
|
for (size_t offset = 0; offset < field_data->get_num_rows();
|
||||||
++offset) {
|
++offset) {
|
||||||
auto row =
|
auto row =
|
||||||
static_cast<const knowhere::sparse::SparseRow<float>*>(
|
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||||
field_data->RawValue(offset));
|
field_data->RawValue(offset));
|
||||||
payload_writer->add_one_binary_payload(
|
payload_writer->add_one_binary_payload(
|
||||||
static_cast<const uint8_t*>(row->data()),
|
static_cast<const uint8_t*>(row->data()),
|
||||||
|
|||||||
@ -32,7 +32,7 @@ PayloadWriter::PayloadWriter(const DataType column_type, bool nullable)
|
|||||||
// create payload writer for vector data type
|
// create payload writer for vector data type
|
||||||
PayloadWriter::PayloadWriter(const DataType column_type, int dim, bool nullable)
|
PayloadWriter::PayloadWriter(const DataType column_type, int dim, bool nullable)
|
||||||
: column_type_(column_type), nullable_(nullable) {
|
: column_type_(column_type), nullable_(nullable) {
|
||||||
AssertInfo(column_type != DataType::VECTOR_SPARSE_FLOAT,
|
AssertInfo(column_type != DataType::VECTOR_SPARSE_U32_F32,
|
||||||
"PayloadWriter for Sparse Float Vector should be created "
|
"PayloadWriter for Sparse Float Vector should be created "
|
||||||
"using the constructor without dimension");
|
"using the constructor without dimension");
|
||||||
AssertInfo(nullable == false, "only scalcar type support null now");
|
AssertInfo(nullable == false, "only scalcar type support null now");
|
||||||
|
|||||||
@ -20,6 +20,13 @@ RemoteInputStream::Read(void* data, size_t size) {
|
|||||||
return static_cast<size_t>(status.ValueOrDie());
|
return static_cast<size_t>(status.ValueOrDie());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
RemoteInputStream::ReadAt(void* data, size_t offset, size_t size) {
|
||||||
|
auto status = remote_file_->ReadAt(offset, size, data);
|
||||||
|
AssertInfo(status.ok(), "Failed to read from input stream");
|
||||||
|
return static_cast<size_t>(status.ValueOrDie());
|
||||||
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
RemoteInputStream::Read(int fd, size_t size) {
|
RemoteInputStream::Read(int fd, size_t size) {
|
||||||
size_t read_batch_size =
|
size_t read_batch_size =
|
||||||
|
|||||||
@ -29,6 +29,9 @@ class RemoteInputStream : public milvus::InputStream {
|
|||||||
size_t
|
size_t
|
||||||
Read(void* data, size_t size) override;
|
Read(void* data, size_t size) override;
|
||||||
|
|
||||||
|
size_t
|
||||||
|
ReadAt(void* data, size_t offset, size_t size) override;
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
Read(int fd, size_t size) override;
|
Read(int fd, size_t size) override;
|
||||||
|
|
||||||
|
|||||||
@ -206,7 +206,7 @@ AddPayloadToArrowBuilder(std::shared_ptr<arrow::ArrayBuilder> builder,
|
|||||||
add_vector_payload(builder, const_cast<uint8_t*>(raw_data), length);
|
add_vector_payload(builder, const_cast<uint8_t*>(raw_data), length);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
ThrowInfo(DataTypeInvalid,
|
ThrowInfo(DataTypeInvalid,
|
||||||
"Sparse Float Vector payload should be added by calling "
|
"Sparse Float Vector payload should be added by calling "
|
||||||
"add_one_binary_payload",
|
"add_one_binary_payload",
|
||||||
@ -287,7 +287,7 @@ CreateArrowBuilder(DataType data_type) {
|
|||||||
return std::make_shared<arrow::BinaryBuilder>();
|
return std::make_shared<arrow::BinaryBuilder>();
|
||||||
}
|
}
|
||||||
// sparse float vector doesn't require a dim
|
// sparse float vector doesn't require a dim
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
return std::make_shared<arrow::BinaryBuilder>();
|
return std::make_shared<arrow::BinaryBuilder>();
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@ -416,7 +416,7 @@ CreateArrowSchema(DataType data_type, bool nullable) {
|
|||||||
{arrow::field("val", arrow::binary(), nullable)});
|
{arrow::field("val", arrow::binary(), nullable)});
|
||||||
}
|
}
|
||||||
// sparse float vector doesn't require a dim
|
// sparse float vector doesn't require a dim
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
return arrow::schema(
|
return arrow::schema(
|
||||||
{arrow::field("val", arrow::binary(), nullable)});
|
{arrow::field("val", arrow::binary(), nullable)});
|
||||||
}
|
}
|
||||||
@ -456,7 +456,7 @@ CreateArrowSchema(DataType data_type, int dim, bool nullable) {
|
|||||||
arrow::fixed_size_binary(dim * sizeof(bfloat16)),
|
arrow::fixed_size_binary(dim * sizeof(bfloat16)),
|
||||||
nullable)});
|
nullable)});
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
return arrow::schema(
|
return arrow::schema(
|
||||||
{arrow::field("val", arrow::binary(), nullable)});
|
{arrow::field("val", arrow::binary(), nullable)});
|
||||||
}
|
}
|
||||||
@ -490,7 +490,7 @@ GetDimensionFromFileMetaData(const parquet::ColumnDescriptor* schema,
|
|||||||
case DataType::VECTOR_BFLOAT16: {
|
case DataType::VECTOR_BFLOAT16: {
|
||||||
return schema->type_length() / sizeof(bfloat16);
|
return schema->type_length() / sizeof(bfloat16);
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
ThrowInfo(DataTypeInvalid,
|
ThrowInfo(DataTypeInvalid,
|
||||||
fmt::format("GetDimensionFromFileMetaData should not be "
|
fmt::format("GetDimensionFromFileMetaData should not be "
|
||||||
"called for sparse vector"));
|
"called for sparse vector"));
|
||||||
@ -971,7 +971,7 @@ CreateFieldData(const DataType& type,
|
|||||||
case DataType::VECTOR_BFLOAT16:
|
case DataType::VECTOR_BFLOAT16:
|
||||||
return std::make_shared<FieldData<BFloat16Vector>>(
|
return std::make_shared<FieldData<BFloat16Vector>>(
|
||||||
dim, type, total_num_rows);
|
dim, type, total_num_rows);
|
||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
case DataType::VECTOR_SPARSE_U32_F32:
|
||||||
return std::make_shared<FieldData<SparseFloatVector>>(
|
return std::make_shared<FieldData<SparseFloatVector>>(
|
||||||
type, total_num_rows);
|
type, total_num_rows);
|
||||||
case DataType::VECTOR_INT8:
|
case DataType::VECTOR_INT8:
|
||||||
|
|||||||
@ -14,7 +14,7 @@
|
|||||||
# Update KNOWHERE_VERSION for the first occurrence
|
# Update KNOWHERE_VERSION for the first occurrence
|
||||||
milvus_add_pkg_config("knowhere")
|
milvus_add_pkg_config("knowhere")
|
||||||
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
|
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
|
||||||
set( KNOWHERE_VERSION v2.6.1-rc )
|
set( KNOWHERE_VERSION v2.6.1 )
|
||||||
set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git")
|
set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git")
|
||||||
|
|
||||||
message(STATUS "Knowhere repo: ${GIT_REPOSITORY}")
|
message(STATUS "Knowhere repo: ${GIT_REPOSITORY}")
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
milvus_add_pkg_config("milvus-common")
|
milvus_add_pkg_config("milvus-common")
|
||||||
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
|
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
|
||||||
set( MILVUS-COMMON-VERSION 41fa9b1 )
|
set( MILVUS-COMMON-VERSION 5770e40 )
|
||||||
set( GIT_REPOSITORY "https://github.com/zilliztech/milvus-common.git")
|
set( GIT_REPOSITORY "https://github.com/zilliztech/milvus-common.git")
|
||||||
|
|
||||||
message(STATUS "milvus-common repo: ${GIT_REPOSITORY}")
|
message(STATUS "milvus-common repo: ${GIT_REPOSITORY}")
|
||||||
|
|||||||
@ -29,7 +29,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||||||
ExprAlwaysTrueParameters,
|
ExprAlwaysTrueParameters,
|
||||||
ExprAlwaysTrueTest,
|
ExprAlwaysTrueTest,
|
||||||
::testing::Values(milvus::DataType::VECTOR_FLOAT,
|
::testing::Values(milvus::DataType::VECTOR_FLOAT,
|
||||||
milvus::DataType::VECTOR_SPARSE_FLOAT));
|
milvus::DataType::VECTOR_SPARSE_U32_F32));
|
||||||
|
|
||||||
TEST_P(ExprAlwaysTrueTest, AlwaysTrue) {
|
TEST_P(ExprAlwaysTrueTest, AlwaysTrue) {
|
||||||
using namespace milvus;
|
using namespace milvus;
|
||||||
|
|||||||
@ -27,8 +27,8 @@ using namespace milvus::query;
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
std::vector<int>
|
std::vector<int>
|
||||||
SearchRef(const knowhere::sparse::SparseRow<float>* base,
|
SearchRef(const knowhere::sparse::SparseRow<milvus::sparseValueType>* base,
|
||||||
const knowhere::sparse::SparseRow<float>& query,
|
const knowhere::sparse::SparseRow<milvus::sparseValueType>& query,
|
||||||
int nb,
|
int nb,
|
||||||
int topk) {
|
int topk) {
|
||||||
std::vector<std::tuple<float, int>> res;
|
std::vector<std::tuple<float, int>> res;
|
||||||
@ -51,8 +51,8 @@ SearchRef(const knowhere::sparse::SparseRow<float>* base,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<int>
|
std::vector<int>
|
||||||
RangeSearchRef(const knowhere::sparse::SparseRow<float>* base,
|
RangeSearchRef(const knowhere::sparse::SparseRow<milvus::sparseValueType>* base,
|
||||||
const knowhere::sparse::SparseRow<float>& query,
|
const knowhere::sparse::SparseRow<milvus::sparseValueType>& query,
|
||||||
int nb,
|
int nb,
|
||||||
float radius,
|
float radius,
|
||||||
float range_filter,
|
float range_filter,
|
||||||
@ -113,7 +113,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
|
|||||||
search_info,
|
search_info,
|
||||||
index_info,
|
index_info,
|
||||||
bitset_view,
|
bitset_view,
|
||||||
DataType::VECTOR_SPARSE_FLOAT,
|
DataType::VECTOR_SPARSE_U32_F32,
|
||||||
DataType::NONE));
|
DataType::NONE));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -122,7 +122,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
|
|||||||
search_info,
|
search_info,
|
||||||
index_info,
|
index_info,
|
||||||
bitset_view,
|
bitset_view,
|
||||||
DataType::VECTOR_SPARSE_FLOAT,
|
DataType::VECTOR_SPARSE_U32_F32,
|
||||||
DataType::NONE);
|
DataType::NONE);
|
||||||
for (int i = 0; i < nq; i++) {
|
for (int i = 0; i < nq; i++) {
|
||||||
auto ref = SearchRef(base.get(), *(query.get() + i), nb, topk);
|
auto ref = SearchRef(base.get(), *(query.get() + i), nb, topk);
|
||||||
@ -137,7 +137,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
|
|||||||
search_info,
|
search_info,
|
||||||
index_info,
|
index_info,
|
||||||
bitset_view,
|
bitset_view,
|
||||||
DataType::VECTOR_SPARSE_FLOAT,
|
DataType::VECTOR_SPARSE_U32_F32,
|
||||||
DataType::NONE);
|
DataType::NONE);
|
||||||
for (int i = 0; i < nq; i++) {
|
for (int i = 0; i < nq; i++) {
|
||||||
auto ref = RangeSearchRef(
|
auto ref = RangeSearchRef(
|
||||||
@ -152,7 +152,7 @@ class TestSparseFloatSearchBruteForce : public ::testing::Test {
|
|||||||
search_info,
|
search_info,
|
||||||
index_info,
|
index_info,
|
||||||
bitset_view,
|
bitset_view,
|
||||||
DataType::VECTOR_SPARSE_FLOAT);
|
DataType::VECTOR_SPARSE_U32_F32);
|
||||||
auto iterators = result3.chunk_iterators();
|
auto iterators = result3.chunk_iterators();
|
||||||
for (int i = 0; i < nq; i++) {
|
for (int i = 0; i < nq; i++) {
|
||||||
auto it = iterators[i];
|
auto it = iterators[i];
|
||||||
|
|||||||
@ -91,7 +91,7 @@ class BinlogIndexTest : public ::testing::TestWithParam<Param> {
|
|||||||
} else {
|
} else {
|
||||||
intermin_index_has_raw_data = true;
|
intermin_index_has_raw_data = true;
|
||||||
}
|
}
|
||||||
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
auto sparse_vecs = GenerateRandomSparseFloatVector(data_n);
|
auto sparse_vecs = GenerateRandomSparseFloatVector(data_n);
|
||||||
vec_field_data->FillFieldData(sparse_vecs.get(), data_n);
|
vec_field_data->FillFieldData(sparse_vecs.get(), data_n);
|
||||||
data_d = std::dynamic_pointer_cast<
|
data_d = std::dynamic_pointer_cast<
|
||||||
@ -190,12 +190,12 @@ INSTANTIATE_TEST_SUITE_P(
|
|||||||
knowhere::IndexEnum::
|
knowhere::IndexEnum::
|
||||||
INDEX_FAISS_SCANN_DVR), // intermin index not has data
|
INDEX_FAISS_SCANN_DVR), // intermin index not has data
|
||||||
std::make_tuple(
|
std::make_tuple(
|
||||||
DataType::VECTOR_SPARSE_FLOAT,
|
DataType::VECTOR_SPARSE_U32_F32,
|
||||||
knowhere::metric::IP,
|
knowhere::metric::IP,
|
||||||
knowhere::IndexEnum::
|
knowhere::IndexEnum::
|
||||||
INDEX_SPARSE_INVERTED_INDEX, //intermin index not has data
|
INDEX_SPARSE_INVERTED_INDEX, //intermin index not has data
|
||||||
std::nullopt),
|
std::nullopt),
|
||||||
std::make_tuple(DataType::VECTOR_SPARSE_FLOAT,
|
std::make_tuple(DataType::VECTOR_SPARSE_U32_F32,
|
||||||
knowhere::metric::IP,
|
knowhere::metric::IP,
|
||||||
knowhere::IndexEnum::
|
knowhere::IndexEnum::
|
||||||
INDEX_SPARSE_WAND, // intermin index not has data
|
INDEX_SPARSE_WAND, // intermin index not has data
|
||||||
|
|||||||
@ -568,7 +568,7 @@ TEST(chunk, test_sparse_float) {
|
|||||||
auto vecs = milvus::segcore::GenerateRandomSparseFloatVector(
|
auto vecs = milvus::segcore::GenerateRandomSparseFloatVector(
|
||||||
n_rows, kTestSparseDim, kTestSparseVectorDensity);
|
n_rows, kTestSparseDim, kTestSparseVectorDensity);
|
||||||
auto field_data = milvus::storage::CreateFieldData(
|
auto field_data = milvus::storage::CreateFieldData(
|
||||||
storage::DataType::VECTOR_SPARSE_FLOAT, false, kTestSparseDim, n_rows);
|
storage::DataType::VECTOR_SPARSE_U32_F32, false, kTestSparseDim, n_rows);
|
||||||
field_data->FillFieldData(vecs.get(), n_rows);
|
field_data->FillFieldData(vecs.get(), n_rows);
|
||||||
|
|
||||||
storage::InsertEventData event_data;
|
storage::InsertEventData event_data;
|
||||||
@ -593,7 +593,7 @@ TEST(chunk, test_sparse_float) {
|
|||||||
|
|
||||||
FieldMeta field_meta(FieldName("a"),
|
FieldMeta field_meta(FieldName("a"),
|
||||||
milvus::FieldId(1),
|
milvus::FieldId(1),
|
||||||
DataType::VECTOR_SPARSE_FLOAT,
|
DataType::VECTOR_SPARSE_U32_F32,
|
||||||
kTestSparseDim,
|
kTestSparseDim,
|
||||||
"IP",
|
"IP",
|
||||||
false,
|
false,
|
||||||
|
|||||||
@ -71,7 +71,7 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
|
|||||||
auto bf16_vec = schema->AddDebugField(
|
auto bf16_vec = schema->AddDebugField(
|
||||||
"bf16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type);
|
"bf16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type);
|
||||||
auto sparse_vec = schema->AddDebugField(
|
auto sparse_vec = schema->AddDebugField(
|
||||||
"sparse_vec", DataType::VECTOR_SPARSE_FLOAT, 128, metric_type);
|
"sparse_vec", DataType::VECTOR_SPARSE_U32_F32, 128, metric_type);
|
||||||
auto int8_vec = schema->AddDebugField(
|
auto int8_vec = schema->AddDebugField(
|
||||||
"int8_vec", DataType::VECTOR_INT8, 128, metric_type);
|
"int8_vec", DataType::VECTOR_INT8, 128, metric_type);
|
||||||
schema->set_primary_field_id(int64_field);
|
schema->set_primary_field_id(int64_field);
|
||||||
@ -200,7 +200,7 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
|
|||||||
auto fp16_vec_gt = dataset.get_col<float16>(fp16_vec);
|
auto fp16_vec_gt = dataset.get_col<float16>(fp16_vec);
|
||||||
auto bf16_vec_gt = dataset.get_col<bfloat16>(bf16_vec);
|
auto bf16_vec_gt = dataset.get_col<bfloat16>(bf16_vec);
|
||||||
auto sparse_vec_gt =
|
auto sparse_vec_gt =
|
||||||
dataset.get_col<knowhere::sparse::SparseRow<float>>(sparse_vec);
|
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(sparse_vec);
|
||||||
auto int8_vec_gt = dataset.get_col<int8>(int8_vec);
|
auto int8_vec_gt = dataset.get_col<int8>(int8_vec);
|
||||||
|
|
||||||
for (size_t i = 0; i < num_inserted; ++i) {
|
for (size_t i = 0; i < num_inserted; ++i) {
|
||||||
@ -234,7 +234,7 @@ INSTANTIATE_TEST_SUITE_P(IsSparse, ChunkVectorTest, ::testing::Bool());
|
|||||||
TEST_P(ChunkVectorTest, SearchWithMmap) {
|
TEST_P(ChunkVectorTest, SearchWithMmap) {
|
||||||
auto is_sparse = GetParam();
|
auto is_sparse = GetParam();
|
||||||
auto data_type =
|
auto data_type =
|
||||||
is_sparse ? DataType::VECTOR_SPARSE_FLOAT : DataType::VECTOR_FLOAT;
|
is_sparse ? DataType::VECTOR_SPARSE_U32_F32 : DataType::VECTOR_FLOAT;
|
||||||
auto schema = std::make_shared<Schema>();
|
auto schema = std::make_shared<Schema>();
|
||||||
auto pk = schema->AddDebugField("pk", DataType::INT64);
|
auto pk = schema->AddDebugField("pk", DataType::INT64);
|
||||||
auto random = schema->AddDebugField("random", DataType::DOUBLE);
|
auto random = schema->AddDebugField("random", DataType::DOUBLE);
|
||||||
|
|||||||
@ -591,7 +591,7 @@ TEST(storage, InsertDataSparseFloat) {
|
|||||||
auto vecs = milvus::segcore::GenerateRandomSparseFloatVector(
|
auto vecs = milvus::segcore::GenerateRandomSparseFloatVector(
|
||||||
n_rows, kTestSparseDim, kTestSparseVectorDensity);
|
n_rows, kTestSparseDim, kTestSparseVectorDensity);
|
||||||
auto field_data = milvus::storage::CreateFieldData(
|
auto field_data = milvus::storage::CreateFieldData(
|
||||||
storage::DataType::VECTOR_SPARSE_FLOAT, false, kTestSparseDim, n_rows);
|
storage::DataType::VECTOR_SPARSE_U32_F32, false, kTestSparseDim, n_rows);
|
||||||
field_data->FillFieldData(vecs.get(), n_rows);
|
field_data->FillFieldData(vecs.get(), n_rows);
|
||||||
|
|
||||||
auto payload_reader =
|
auto payload_reader =
|
||||||
@ -611,10 +611,10 @@ TEST(storage, InsertDataSparseFloat) {
|
|||||||
std::make_pair(Timestamp(0), Timestamp(100)));
|
std::make_pair(Timestamp(0), Timestamp(100)));
|
||||||
auto new_payload = new_insert_data->GetFieldData();
|
auto new_payload = new_insert_data->GetFieldData();
|
||||||
ASSERT_TRUE(new_payload->get_data_type() ==
|
ASSERT_TRUE(new_payload->get_data_type() ==
|
||||||
storage::DataType::VECTOR_SPARSE_FLOAT);
|
storage::DataType::VECTOR_SPARSE_U32_F32);
|
||||||
ASSERT_EQ(new_payload->get_num_rows(), n_rows);
|
ASSERT_EQ(new_payload->get_num_rows(), n_rows);
|
||||||
ASSERT_EQ(new_payload->get_null_count(), 0);
|
ASSERT_EQ(new_payload->get_null_count(), 0);
|
||||||
auto new_data = static_cast<const knowhere::sparse::SparseRow<float>*>(
|
auto new_data = static_cast<const knowhere::sparse::SparseRow<milvus::sparseValueType>*>(
|
||||||
new_payload->Data());
|
new_payload->Data());
|
||||||
|
|
||||||
for (auto i = 0; i < n_rows; ++i) {
|
for (auto i = 0; i < n_rows; ++i) {
|
||||||
|
|||||||
@ -455,16 +455,20 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) {
|
|||||||
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
||||||
opt_fields[kOptFieldId + 1] = {
|
opt_fields[kOptFieldId + 1] = {
|
||||||
kOptFieldName + "second", DataType::INT64, {insert_file_path}};
|
kOptFieldName + "second", DataType::INT64, {insert_file_path}};
|
||||||
EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError);
|
milvus::Config config;
|
||||||
|
config[VEC_OPT_FIELDS] = opt_fields;
|
||||||
|
EXPECT_THROW(file_manager->CacheOptFieldToDisk(config), SegcoreError);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
|
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
|
||||||
auto file_manager = CreateFileManager(cm_);
|
auto file_manager = CreateFileManager(cm_);
|
||||||
const auto insert_file_path =
|
const auto insert_file_path =
|
||||||
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
|
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
|
||||||
auto opt_fileds =
|
auto opt_fields =
|
||||||
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
||||||
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
|
milvus::Config config;
|
||||||
|
config[VEC_OPT_FIELDS] = opt_fields;
|
||||||
|
auto res = file_manager->CacheOptFieldToDisk(config);
|
||||||
ASSERT_FALSE(res.empty());
|
ASSERT_FALSE(res.empty());
|
||||||
CheckOptFieldCorrectness(res);
|
CheckOptFieldCorrectness(res);
|
||||||
}
|
}
|
||||||
@ -475,7 +479,9 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
|
|||||||
auto insert_file_path = PrepareInsertData<TYPE, NATIVE_TYPE>(RANGE); \
|
auto insert_file_path = PrepareInsertData<TYPE, NATIVE_TYPE>(RANGE); \
|
||||||
auto opt_fields = \
|
auto opt_fields = \
|
||||||
PrepareOptionalField<TYPE>(file_manager, insert_file_path); \
|
PrepareOptionalField<TYPE>(file_manager, insert_file_path); \
|
||||||
auto res = file_manager->CacheOptFieldToDisk(opt_fields); \
|
milvus::Config config; \
|
||||||
|
config[VEC_OPT_FIELDS] = opt_fields; \
|
||||||
|
auto res = file_manager->CacheOptFieldToDisk(config); \
|
||||||
ASSERT_FALSE(res.empty()); \
|
ASSERT_FALSE(res.empty()); \
|
||||||
CheckOptFieldCorrectness(res, RANGE); \
|
CheckOptFieldCorrectness(res, RANGE); \
|
||||||
};
|
};
|
||||||
@ -496,9 +502,11 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) {
|
|||||||
{
|
{
|
||||||
const auto insert_file_path =
|
const auto insert_file_path =
|
||||||
PrepareInsertData<DataType::INT64, int64_t>(1);
|
PrepareInsertData<DataType::INT64, int64_t>(1);
|
||||||
auto opt_fileds = PrepareOptionalField<DataType::INT64>(
|
auto opt_fields = PrepareOptionalField<DataType::INT64>(
|
||||||
file_manager, insert_file_path);
|
file_manager, insert_file_path);
|
||||||
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
|
milvus::Config config;
|
||||||
|
config[VEC_OPT_FIELDS] = opt_fields;
|
||||||
|
auto res = file_manager->CacheOptFieldToDisk(config);
|
||||||
ASSERT_TRUE(res.empty());
|
ASSERT_TRUE(res.empty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -105,7 +105,7 @@ class TaskTest : public testing::TestWithParam<DataType> {
|
|||||||
INSTANTIATE_TEST_SUITE_P(TaskTestSuite,
|
INSTANTIATE_TEST_SUITE_P(TaskTestSuite,
|
||||||
TaskTest,
|
TaskTest,
|
||||||
::testing::Values(DataType::VECTOR_FLOAT,
|
::testing::Values(DataType::VECTOR_FLOAT,
|
||||||
DataType::VECTOR_SPARSE_FLOAT));
|
DataType::VECTOR_SPARSE_U32_F32));
|
||||||
|
|
||||||
TEST_P(TaskTest, RegisterFunction) {
|
TEST_P(TaskTest, RegisterFunction) {
|
||||||
milvus::exec::expression::FunctionFactory& factory =
|
milvus::exec::expression::FunctionFactory& factory =
|
||||||
|
|||||||
@ -95,7 +95,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
|
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
|
||||||
knowhere::metric::L2),
|
knowhere::metric::L2),
|
||||||
false),
|
false),
|
||||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_FLOAT,
|
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_U32_F32,
|
||||||
knowhere::metric::IP),
|
knowhere::metric::IP),
|
||||||
false),
|
false),
|
||||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,
|
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,
|
||||||
@ -104,7 +104,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
|
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
|
||||||
knowhere::metric::L2),
|
knowhere::metric::L2),
|
||||||
true),
|
true),
|
||||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_FLOAT,
|
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_U32_F32,
|
||||||
knowhere::metric::IP),
|
knowhere::metric::IP),
|
||||||
true),
|
true),
|
||||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,
|
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,
|
||||||
|
|||||||
@ -109,7 +109,7 @@ class GrowingTest
|
|||||||
} else if (index_type ==
|
} else if (index_type ==
|
||||||
knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
|
knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
|
||||||
index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) {
|
index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) {
|
||||||
data_type = DataType::VECTOR_SPARSE_FLOAT;
|
data_type = DataType::VECTOR_SPARSE_U32_F32;
|
||||||
} else {
|
} else {
|
||||||
ASSERT_TRUE(false);
|
ASSERT_TRUE(false);
|
||||||
}
|
}
|
||||||
@ -242,7 +242,7 @@ TEST_P(GrowingTest, FillData) {
|
|||||||
if (data_type == DataType::VECTOR_FLOAT) {
|
if (data_type == DataType::VECTOR_FLOAT) {
|
||||||
EXPECT_EQ(vec_result->vectors().float_vector().data_size(),
|
EXPECT_EQ(vec_result->vectors().float_vector().data_size(),
|
||||||
num_inserted * dim);
|
num_inserted * dim);
|
||||||
} else if (data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
EXPECT_EQ(
|
EXPECT_EQ(
|
||||||
vec_result->vectors().sparse_float_vector().contents_size(),
|
vec_result->vectors().sparse_float_vector().contents_size(),
|
||||||
num_inserted);
|
num_inserted);
|
||||||
|
|||||||
@ -41,7 +41,7 @@ class GrowingIndexTest : public ::testing::TestWithParam<Param> {
|
|||||||
metric_type = std::get<2>(param);
|
metric_type = std::get<2>(param);
|
||||||
dense_vec_intermin_index_type = std::get<3>(param);
|
dense_vec_intermin_index_type = std::get<3>(param);
|
||||||
dense_refine_type = std::get<4>(param);
|
dense_refine_type = std::get<4>(param);
|
||||||
if (data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
is_sparse = true;
|
is_sparse = true;
|
||||||
if (metric_type == knowhere::metric::IP) {
|
if (metric_type == knowhere::metric::IP) {
|
||||||
intermin_index_with_raw_data = true;
|
intermin_index_with_raw_data = true;
|
||||||
@ -108,7 +108,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||||||
SparseIndexTypeParameters,
|
SparseIndexTypeParameters,
|
||||||
GrowingIndexTest,
|
GrowingIndexTest,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
::testing::Values(DataType::VECTOR_SPARSE_FLOAT),
|
::testing::Values(DataType::VECTOR_SPARSE_U32_F32),
|
||||||
// VecIndexConfig will convert INDEX_SPARSE_INVERTED_INDEX/
|
// VecIndexConfig will convert INDEX_SPARSE_INVERTED_INDEX/
|
||||||
// INDEX_SPARSE_WAND to INDEX_SPARSE_INVERTED_INDEX_CC/
|
// INDEX_SPARSE_WAND to INDEX_SPARSE_INVERTED_INDEX_CC/
|
||||||
// INDEX_SPARSE_WAND_CC, thus no need to use _CC version here.
|
// INDEX_SPARSE_WAND_CC, thus no need to use _CC version here.
|
||||||
@ -409,7 +409,7 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
|
|||||||
}
|
}
|
||||||
EXPECT_EQ(index->Count(), (add_cont + 1) * N);
|
EXPECT_EQ(index->Count(), (add_cont + 1) * N);
|
||||||
} else if (is_sparse) {
|
} else if (is_sparse) {
|
||||||
auto index = std::make_unique<milvus::index::VectorMemIndex<float>>(
|
auto index = std::make_unique<milvus::index::VectorMemIndex<sparse_u32_f32>>(
|
||||||
DataType::NONE,
|
DataType::NONE,
|
||||||
index_type,
|
index_type,
|
||||||
metric_type,
|
metric_type,
|
||||||
@ -417,7 +417,7 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
|
|||||||
false,
|
false,
|
||||||
milvus::storage::FileManagerContext());
|
milvus::storage::FileManagerContext());
|
||||||
auto sparse_data =
|
auto sparse_data =
|
||||||
dataset.get_col<knowhere::sparse::SparseRow<float>>(vec);
|
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(vec);
|
||||||
index->BuildWithDataset(
|
index->BuildWithDataset(
|
||||||
knowhere::GenDataSet(N, dim, sparse_data.data()), build_config);
|
knowhere::GenDataSet(N, dim, sparse_data.data()), build_config);
|
||||||
for (int i = 0; i < add_cont; i++) {
|
for (int i = 0; i < add_cont; i++) {
|
||||||
@ -560,14 +560,14 @@ TEST_P(GrowingIndexTest, GetVector) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (is_sparse) {
|
} else if (is_sparse) {
|
||||||
// GetVector for VECTOR_SPARSE_FLOAT
|
// GetVector for VECTOR_SPARSE_U32_F32
|
||||||
int64_t per_batch = 5000;
|
int64_t per_batch = 5000;
|
||||||
int64_t n_batch = 20;
|
int64_t n_batch = 20;
|
||||||
int64_t dim = 128;
|
int64_t dim = 128;
|
||||||
for (int64_t i = 0; i < n_batch; i++) {
|
for (int64_t i = 0; i < n_batch; i++) {
|
||||||
auto dataset = DataGen(schema, per_batch);
|
auto dataset = DataGen(schema, per_batch);
|
||||||
auto fakevec =
|
auto fakevec =
|
||||||
dataset.get_col<knowhere::sparse::SparseRow<float>>(vec);
|
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(vec);
|
||||||
auto offset = segment->PreInsert(per_batch);
|
auto offset = segment->PreInsert(per_batch);
|
||||||
segment->Insert(offset,
|
segment->Insert(offset,
|
||||||
per_batch,
|
per_batch,
|
||||||
|
|||||||
@ -68,7 +68,7 @@ TestVecIndex() {
|
|||||||
status = BuildBinaryVecIndex(index, NB * DIM / 8, xb_data.data());
|
status = BuildBinaryVecIndex(index, NB * DIM / 8, xb_data.data());
|
||||||
} else if (std::is_same_v<TraitType, milvus::SparseFloatVector>) {
|
} else if (std::is_same_v<TraitType, milvus::SparseFloatVector>) {
|
||||||
auto xb_data =
|
auto xb_data =
|
||||||
dataset.template get_col<knowhere::sparse::SparseRow<float>>(
|
dataset.template get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||||
milvus::FieldId(100));
|
milvus::FieldId(100));
|
||||||
status = BuildSparseFloatVecIndex(
|
status = BuildSparseFloatVecIndex(
|
||||||
index,
|
index,
|
||||||
|
|||||||
@ -70,9 +70,9 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
|
|||||||
DataType::VECTOR_BINARY},
|
DataType::VECTOR_BINARY},
|
||||||
{knowhere::IndexEnum::INDEX_HNSW, DataType::VECTOR_FLOAT},
|
{knowhere::IndexEnum::INDEX_HNSW, DataType::VECTOR_FLOAT},
|
||||||
{knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX,
|
{knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX,
|
||||||
DataType::VECTOR_SPARSE_FLOAT},
|
DataType::VECTOR_SPARSE_U32_F32},
|
||||||
{knowhere::IndexEnum::INDEX_SPARSE_WAND,
|
{knowhere::IndexEnum::INDEX_SPARSE_WAND,
|
||||||
DataType::VECTOR_SPARSE_FLOAT},
|
DataType::VECTOR_SPARSE_U32_F32},
|
||||||
};
|
};
|
||||||
|
|
||||||
vec_field_data_type = index_to_vec_type[index_type];
|
vec_field_data_type = index_to_vec_type[index_type];
|
||||||
@ -132,9 +132,9 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||||||
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||||
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
|
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
|
||||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||||
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||||
milvus::FieldId(100));
|
milvus::FieldId(100));
|
||||||
xb_dataset =
|
xb_dataset =
|
||||||
knowhere::GenDataSet(NB, kTestSparseDim, sparse_vecs.data());
|
knowhere::GenDataSet(NB, kTestSparseDim, sparse_vecs.data());
|
||||||
@ -159,7 +159,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||||||
vec_field_data_type, config, file_manager_context);
|
vec_field_data_type, config, file_manager_context);
|
||||||
auto vec_index =
|
auto vec_index =
|
||||||
static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
|
static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
|
||||||
if (vec_field_data_type != DataType::VECTOR_SPARSE_FLOAT) {
|
if (vec_field_data_type != DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
ASSERT_EQ(vec_index->dim(), DIM);
|
ASSERT_EQ(vec_index->dim(), DIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,9 +177,9 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
|||||||
auto xq_dataset =
|
auto xq_dataset =
|
||||||
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
|
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
|
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
|
||||||
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||||
milvus::FieldId(100));
|
milvus::FieldId(100));
|
||||||
auto xq_dataset =
|
auto xq_dataset =
|
||||||
knowhere::GenDataSet(NQ, kTestSparseDim, xb_data.data());
|
knowhere::GenDataSet(NQ, kTestSparseDim, xb_data.data());
|
||||||
|
|||||||
@ -331,7 +331,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
|||||||
if (index_type == knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
|
if (index_type == knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX ||
|
||||||
index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) {
|
index_type == knowhere::IndexEnum::INDEX_SPARSE_WAND) {
|
||||||
is_sparse = true;
|
is_sparse = true;
|
||||||
vec_field_data_type = milvus::DataType::VECTOR_SPARSE_FLOAT;
|
vec_field_data_type = milvus::DataType::VECTOR_SPARSE_U32_F32;
|
||||||
} else if (IsBinaryVectorMetricType(metric_type)) {
|
} else if (IsBinaryVectorMetricType(metric_type)) {
|
||||||
is_binary = true;
|
is_binary = true;
|
||||||
vec_field_data_type = milvus::DataType::VECTOR_BINARY;
|
vec_field_data_type = milvus::DataType::VECTOR_BINARY;
|
||||||
@ -349,7 +349,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
|||||||
} else if (is_sparse) {
|
} else if (is_sparse) {
|
||||||
// sparse vector
|
// sparse vector
|
||||||
xb_sparse_data =
|
xb_sparse_data =
|
||||||
dataset.get_col<knowhere::sparse::SparseRow<float>>(
|
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||||
milvus::FieldId(100));
|
milvus::FieldId(100));
|
||||||
xb_dataset =
|
xb_dataset =
|
||||||
knowhere::GenDataSet(NB, kTestSparseDim, xb_sparse_data.data());
|
knowhere::GenDataSet(NB, kTestSparseDim, xb_sparse_data.data());
|
||||||
@ -382,7 +382,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
|||||||
knowhere::DataSetPtr xb_dataset;
|
knowhere::DataSetPtr xb_dataset;
|
||||||
FixedVector<float> xb_data;
|
FixedVector<float> xb_data;
|
||||||
FixedVector<uint8_t> xb_bin_data;
|
FixedVector<uint8_t> xb_bin_data;
|
||||||
FixedVector<knowhere::sparse::SparseRow<float>> xb_sparse_data;
|
FixedVector<knowhere::sparse::SparseRow<milvus::sparseValueType>> xb_sparse_data;
|
||||||
knowhere::DataSetPtr xq_dataset;
|
knowhere::DataSetPtr xq_dataset;
|
||||||
int64_t query_offset = 100;
|
int64_t query_offset = 100;
|
||||||
int64_t NB = 3000; // will be updated to 27000 for mmap+hnsw
|
int64_t NB = 3000; // will be updated to 27000 for mmap+hnsw
|
||||||
@ -686,7 +686,7 @@ TEST_P(IndexTest, GetVector_EmptySparseVector) {
|
|||||||
}
|
}
|
||||||
NB = 3;
|
NB = 3;
|
||||||
|
|
||||||
std::vector<knowhere::sparse::SparseRow<float>> vec;
|
std::vector<knowhere::sparse::SparseRow<milvus::sparseValueType>> vec;
|
||||||
vec.reserve(NB);
|
vec.reserve(NB);
|
||||||
vec.emplace_back(2);
|
vec.emplace_back(2);
|
||||||
vec[0].set_at(0, 1, 1.0);
|
vec[0].set_at(0, 1, 1.0);
|
||||||
|
|||||||
@ -47,8 +47,8 @@ class IndexLoadTest : public ::testing::TestWithParam<Param> {
|
|||||||
data_type = milvus::DataType::VECTOR_FLOAT16;
|
data_type = milvus::DataType::VECTOR_FLOAT16;
|
||||||
} else if (field_type == "vector_binary") {
|
} else if (field_type == "vector_binary") {
|
||||||
data_type = milvus::DataType::VECTOR_BINARY;
|
data_type = milvus::DataType::VECTOR_BINARY;
|
||||||
} else if (field_type == "vector_sparse_float") {
|
} else if (field_type == "VECTOR_SPARSE_U32_F32") {
|
||||||
data_type = milvus::DataType::VECTOR_SPARSE_FLOAT;
|
data_type = milvus::DataType::VECTOR_SPARSE_U32_F32;
|
||||||
} else if (field_type == "vector_int8") {
|
} else if (field_type == "vector_int8") {
|
||||||
data_type = milvus::DataType::VECTOR_INT8;
|
data_type = milvus::DataType::VECTOR_INT8;
|
||||||
} else if (field_type == "array") {
|
} else if (field_type == "array") {
|
||||||
|
|||||||
@ -46,7 +46,7 @@ class RetrieveTest : public ::testing::TestWithParam<Param> {
|
|||||||
INSTANTIATE_TEST_SUITE_P(RetrieveTest,
|
INSTANTIATE_TEST_SUITE_P(RetrieveTest,
|
||||||
RetrieveTest,
|
RetrieveTest,
|
||||||
::testing::Values(DataType::VECTOR_FLOAT,
|
::testing::Values(DataType::VECTOR_FLOAT,
|
||||||
DataType::VECTOR_SPARSE_FLOAT));
|
DataType::VECTOR_SPARSE_U32_F32));
|
||||||
|
|
||||||
TEST_P(RetrieveTest, AutoID) {
|
TEST_P(RetrieveTest, AutoID) {
|
||||||
auto schema = std::make_shared<Schema>();
|
auto schema = std::make_shared<Schema>();
|
||||||
@ -422,7 +422,7 @@ TEST_P(RetrieveTest, LargeTimestamp) {
|
|||||||
Assert(field_data.vectors().float_vector().data_size() ==
|
Assert(field_data.vectors().float_vector().data_size() ==
|
||||||
target_num * DIM);
|
target_num * DIM);
|
||||||
}
|
}
|
||||||
if (DataType(field_data.type()) == DataType::VECTOR_SPARSE_FLOAT) {
|
if (DataType(field_data.type()) == DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
Assert(field_data.vectors()
|
Assert(field_data.vectors()
|
||||||
.sparse_float_vector()
|
.sparse_float_vector()
|
||||||
.contents_size() == target_num);
|
.contents_size() == target_num);
|
||||||
|
|||||||
@ -97,8 +97,8 @@ TEST(GetArrowDataTypeTest, VECTOR_BFLOAT16) {
|
|||||||
ASSERT_TRUE(result->Equals(arrow::fixed_size_binary(dim * 2)));
|
ASSERT_TRUE(result->Equals(arrow::fixed_size_binary(dim * 2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(GetArrowDataTypeTest, VECTOR_SPARSE_FLOAT) {
|
TEST(GetArrowDataTypeTest, VECTOR_SPARSE_U32_F32) {
|
||||||
auto result = GetArrowDataType(DataType::VECTOR_SPARSE_FLOAT);
|
auto result = GetArrowDataType(DataType::VECTOR_SPARSE_U32_F32);
|
||||||
ASSERT_TRUE(result->Equals(arrow::binary()));
|
ASSERT_TRUE(result->Equals(arrow::binary()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -114,7 +114,7 @@ struct GeneratedData {
|
|||||||
} else {
|
} else {
|
||||||
if (field_meta.is_vector() &&
|
if (field_meta.is_vector() &&
|
||||||
field_meta.get_data_type() !=
|
field_meta.get_data_type() !=
|
||||||
DataType::VECTOR_SPARSE_FLOAT) {
|
DataType::VECTOR_SPARSE_U32_F32) {
|
||||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
int len = raw_->num_rows() * field_meta.get_dim();
|
int len = raw_->num_rows() * field_meta.get_dim();
|
||||||
ret.resize(len);
|
ret.resize(len);
|
||||||
@ -164,7 +164,7 @@ struct GeneratedData {
|
|||||||
}
|
}
|
||||||
if constexpr (std::is_same_v<
|
if constexpr (std::is_same_v<
|
||||||
T,
|
T,
|
||||||
knowhere::sparse::SparseRow<float>>) {
|
knowhere::sparse::SparseRow<milvus::sparseValueType>>) {
|
||||||
auto sparse_float_array =
|
auto sparse_float_array =
|
||||||
target_field_data.vectors().sparse_float_vector();
|
target_field_data.vectors().sparse_float_vector();
|
||||||
auto rows =
|
auto rows =
|
||||||
@ -301,7 +301,7 @@ struct GeneratedData {
|
|||||||
int array_len);
|
int array_len);
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::unique_ptr<knowhere::sparse::SparseRow<float>[]>
|
inline std::unique_ptr<knowhere::sparse::SparseRow<milvus::sparseValueType>[]>
|
||||||
GenerateRandomSparseFloatVector(size_t rows,
|
GenerateRandomSparseFloatVector(size_t rows,
|
||||||
size_t cols = kTestSparseDim,
|
size_t cols = kTestSparseDim,
|
||||||
float density = kTestSparseVectorDensity,
|
float density = kTestSparseVectorDensity,
|
||||||
@ -340,13 +340,13 @@ GenerateRandomSparseFloatVector(size_t rows,
|
|||||||
data[row][col] = val;
|
data[row][col] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto tensor = std::make_unique<knowhere::sparse::SparseRow<float>[]>(rows);
|
auto tensor = std::make_unique<knowhere::sparse::SparseRow<milvus::sparseValueType>[]>(rows);
|
||||||
|
|
||||||
for (int32_t i = 0; i < rows; ++i) {
|
for (int32_t i = 0; i < rows; ++i) {
|
||||||
if (data[i].size() == 0) {
|
if (data[i].size() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
knowhere::sparse::SparseRow<float> row(data[i].size());
|
knowhere::sparse::SparseRow<milvus::sparseValueType> row(data[i].size());
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
for (auto& [idx, val] : data[i]) {
|
for (auto& [idx, val] : data[i]) {
|
||||||
row.set_at(j++, idx, val);
|
row.set_at(j++, idx, val);
|
||||||
@ -544,7 +544,7 @@ DataGen(SchemaPtr schema,
|
|||||||
insert_cols(data, N, field_meta, random_valid);
|
insert_cols(data, N, field_meta, random_valid);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
auto res = GenerateRandomSparseFloatVector(
|
auto res = GenerateRandomSparseFloatVector(
|
||||||
N, kTestSparseDim, kTestSparseVectorDensity, seed);
|
N, kTestSparseDim, kTestSparseVectorDensity, seed);
|
||||||
auto array = milvus::segcore::CreateDataArrayFrom(
|
auto array = milvus::segcore::CreateDataArrayFrom(
|
||||||
@ -595,7 +595,7 @@ DataGen(SchemaPtr schema,
|
|||||||
obj->assign(data, length * sizeof(float16));
|
obj->assign(data, length * sizeof(float16));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
case DataType::VECTOR_SPARSE_U32_F32:
|
||||||
ThrowInfo(DataTypeInvalid, "not implemented");
|
ThrowInfo(DataTypeInvalid, "not implemented");
|
||||||
break;
|
break;
|
||||||
case DataType::VECTOR_BFLOAT16: {
|
case DataType::VECTOR_BFLOAT16: {
|
||||||
@ -1195,10 +1195,10 @@ CreateFieldDataFromDataArray(ssize_t raw_count,
|
|||||||
createFieldData(raw_data, DataType::VECTOR_BFLOAT16, dim);
|
createFieldData(raw_data, DataType::VECTOR_BFLOAT16, dim);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
case DataType::VECTOR_SPARSE_U32_F32: {
|
||||||
auto sparse_float_array = data->vectors().sparse_float_vector();
|
auto sparse_float_array = data->vectors().sparse_float_vector();
|
||||||
auto rows = SparseBytesToRows(sparse_float_array.contents());
|
auto rows = SparseBytesToRows(sparse_float_array.contents());
|
||||||
createFieldData(rows.get(), DataType::VECTOR_SPARSE_FLOAT, 0);
|
createFieldData(rows.get(), DataType::VECTOR_SPARSE_U32_F32, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VECTOR_INT8: {
|
case DataType::VECTOR_INT8: {
|
||||||
|
|||||||
@ -234,7 +234,7 @@ GenFieldData(int64_t N,
|
|||||||
schema->AddDebugField(
|
schema->AddDebugField(
|
||||||
"fakevec",
|
"fakevec",
|
||||||
data_type,
|
data_type,
|
||||||
(data_type != milvus::DataType::VECTOR_SPARSE_FLOAT ? dim : 0),
|
(data_type != milvus::DataType::VECTOR_SPARSE_U32_F32 ? dim : 0),
|
||||||
metric_type);
|
metric_type);
|
||||||
return milvus::segcore::DataGen(schema, N);
|
return milvus::segcore::DataGen(schema, N);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -259,6 +259,9 @@ func (node *QueryNode) InitSegcore() error {
|
|||||||
cKnowhereThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereThreadPoolSize.GetAsUint32())
|
cKnowhereThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereThreadPoolSize.GetAsUint32())
|
||||||
C.SegcoreSetKnowhereSearchThreadPoolNum(cKnowhereThreadPoolSize)
|
C.SegcoreSetKnowhereSearchThreadPoolNum(cKnowhereThreadPoolSize)
|
||||||
|
|
||||||
|
cKnowhereFetchThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereFetchThreadPoolSize.GetAsUint32())
|
||||||
|
C.SegcoreSetKnowhereFetchThreadPoolNum(cKnowhereFetchThreadPoolSize)
|
||||||
|
|
||||||
// override segcore SIMD type
|
// override segcore SIMD type
|
||||||
cSimdType := C.CString(paramtable.Get().CommonCfg.SimdType.GetValue())
|
cSimdType := C.CString(paramtable.Get().CommonCfg.SimdType.GetValue())
|
||||||
C.SegcoreSetSimdType(cSimdType)
|
C.SegcoreSetSimdType(cSimdType)
|
||||||
|
|||||||
@ -2879,6 +2879,7 @@ type queryNodeConfig struct {
|
|||||||
StatsPublishInterval ParamItem `refreshable:"true"`
|
StatsPublishInterval ParamItem `refreshable:"true"`
|
||||||
|
|
||||||
// segcore
|
// segcore
|
||||||
|
KnowhereFetchThreadPoolSize ParamItem `refreshable:"false"`
|
||||||
KnowhereThreadPoolSize ParamItem `refreshable:"false"`
|
KnowhereThreadPoolSize ParamItem `refreshable:"false"`
|
||||||
ChunkRows ParamItem `refreshable:"false"`
|
ChunkRows ParamItem `refreshable:"false"`
|
||||||
EnableInterminSegmentIndex ParamItem `refreshable:"false"`
|
EnableInterminSegmentIndex ParamItem `refreshable:"false"`
|
||||||
@ -3322,6 +3323,25 @@ If set to 0, time based eviction is disabled.`,
|
|||||||
}
|
}
|
||||||
p.KnowhereThreadPoolSize.Init(base.mgr)
|
p.KnowhereThreadPoolSize.Init(base.mgr)
|
||||||
|
|
||||||
|
p.KnowhereFetchThreadPoolSize = ParamItem{
|
||||||
|
Key: "queryNode.segcore.knowhereFetchThreadPoolNumRatio",
|
||||||
|
Version: "2.6.0",
|
||||||
|
DefaultValue: "4",
|
||||||
|
Formatter: func(v string) string {
|
||||||
|
factor := getAsInt64(v)
|
||||||
|
if factor <= 0 {
|
||||||
|
factor = 1
|
||||||
|
} else if factor > 32 {
|
||||||
|
factor = 32
|
||||||
|
}
|
||||||
|
knowhereFetchThreadPoolSize := uint32(hardware.GetCPUNum()) * uint32(factor)
|
||||||
|
return strconv.FormatUint(uint64(knowhereFetchThreadPoolSize), 10)
|
||||||
|
},
|
||||||
|
Doc: "The number of threads in knowhere's fetch thread pool for object storage. The pool size will multiply with knowhereThreadPoolNumRatio([1, 32])",
|
||||||
|
Export: false,
|
||||||
|
}
|
||||||
|
p.KnowhereFetchThreadPoolSize.Init(base.mgr)
|
||||||
|
|
||||||
p.ChunkRows = ParamItem{
|
p.ChunkRows = ParamItem{
|
||||||
Key: "queryNode.segcore.chunkRows",
|
Key: "queryNode.segcore.chunkRows",
|
||||||
Version: "2.0.0",
|
Version: "2.0.0",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user