mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
enhance: add sparse_u32_f32 data type for sparse vertor (#43974)
issue: #43973 Signed-off-by: marcelo.chen <marcelo.chen@zilliz.com>
This commit is contained in:
parent
da156981c6
commit
e13e19cd2c
@ -528,7 +528,7 @@ class SparseFloatVectorChunk : public Chunk {
|
||||
reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num);
|
||||
for (int i = 0; i < row_nums; i++) {
|
||||
vec_[i] = {(offsets_ptr[i + 1] - offsets_ptr[i]) /
|
||||
knowhere::sparse::SparseRow<sparseValueType>::element_size(),
|
||||
knowhere::sparse::SparseRow<SparseValueType>::element_size(),
|
||||
reinterpret_cast<uint8_t*>(data + offsets_ptr[i]),
|
||||
false};
|
||||
dim_ = std::max(dim_, vec_[i].dim());
|
||||
@ -547,7 +547,7 @@ class SparseFloatVectorChunk : public Chunk {
|
||||
}
|
||||
|
||||
// only for test
|
||||
std::vector<knowhere::sparse::SparseRow<sparseValueType>>&
|
||||
std::vector<knowhere::sparse::SparseRow<SparseValueType>>&
|
||||
Vec() {
|
||||
return vec_;
|
||||
}
|
||||
@ -559,6 +559,6 @@ class SparseFloatVectorChunk : public Chunk {
|
||||
|
||||
private:
|
||||
int64_t dim_ = 0;
|
||||
std::vector<knowhere::sparse::SparseRow<sparseValueType>> vec_;
|
||||
std::vector<knowhere::sparse::SparseRow<SparseValueType>> vec_;
|
||||
};
|
||||
} // namespace milvus
|
||||
@ -300,7 +300,7 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::BINARY,
|
||||
"inconsistent data type");
|
||||
auto arr = std::dynamic_pointer_cast<arrow::BinaryArray>(array);
|
||||
std::vector<knowhere::sparse::SparseRow<sparseValueType>> values;
|
||||
std::vector<knowhere::sparse::SparseRow<SparseValueType>> values;
|
||||
for (size_t index = 0; index < element_count; ++index) {
|
||||
auto view = arr->GetString(index);
|
||||
values.push_back(
|
||||
@ -483,7 +483,7 @@ template class FieldDataImpl<int8_t, false>;
|
||||
template class FieldDataImpl<float, false>;
|
||||
template class FieldDataImpl<float16, false>;
|
||||
template class FieldDataImpl<bfloat16, false>;
|
||||
template class FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true>;
|
||||
template class FieldDataImpl<knowhere::sparse::SparseRow<SparseValueType>, true>;
|
||||
template class FieldDataImpl<VectorArray, true>;
|
||||
|
||||
FieldDataPtr
|
||||
|
||||
@ -723,11 +723,11 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
|
||||
};
|
||||
|
||||
class FieldDataSparseVectorImpl
|
||||
: public FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true> {
|
||||
: public FieldDataImpl<knowhere::sparse::SparseRow<SparseValueType>, true> {
|
||||
public:
|
||||
explicit FieldDataSparseVectorImpl(DataType data_type,
|
||||
int64_t total_num_rows = 0)
|
||||
: FieldDataImpl<knowhere::sparse::SparseRow<sparseValueType>, true>(
|
||||
: FieldDataImpl<knowhere::sparse::SparseRow<SparseValueType>, true>(
|
||||
/*dim=*/1, data_type, false, total_num_rows),
|
||||
vec_dim_(0) {
|
||||
AssertInfo(data_type == DataType::VECTOR_SPARSE_U32_F32,
|
||||
@ -753,7 +753,7 @@ class FieldDataSparseVectorImpl
|
||||
}
|
||||
|
||||
// source is a pointer to element_count of
|
||||
// knowhere::sparse::SparseRow<sparseValueType>
|
||||
// knowhere::sparse::SparseRow<SparseValueType>
|
||||
void
|
||||
FillFieldData(const void* source, ssize_t element_count) override {
|
||||
if (element_count == 0) {
|
||||
@ -765,7 +765,7 @@ class FieldDataSparseVectorImpl
|
||||
resize_field_data(length_ + element_count);
|
||||
}
|
||||
auto ptr =
|
||||
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(source);
|
||||
static_cast<const knowhere::sparse::SparseRow<SparseValueType>*>(source);
|
||||
for (int64_t i = 0; i < element_count; ++i) {
|
||||
auto& row = ptr[i];
|
||||
vec_dim_ = std::max(vec_dim_, row.dim());
|
||||
@ -774,7 +774,7 @@ class FieldDataSparseVectorImpl
|
||||
length_ += element_count;
|
||||
}
|
||||
|
||||
// each binary in array is a knowhere::sparse::SparseRow<sparseValueType>
|
||||
// each binary in array is a knowhere::sparse::SparseRow<SparseValueType>
|
||||
void
|
||||
FillFieldData(const std::shared_ptr<arrow::BinaryArray>& array) override {
|
||||
auto n = array->length();
|
||||
|
||||
@ -37,7 +37,7 @@ constexpr bool IsScalar =
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsSparse = std::is_same_v<T, SparseFloatVector> ||
|
||||
std::is_same_v<T, knowhere::sparse::SparseRow<sparseValueType>>;
|
||||
std::is_same_v<T, knowhere::sparse::SparseRow<SparseValueType>>;
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsVariableType =
|
||||
@ -52,7 +52,7 @@ template <typename T>
|
||||
constexpr bool IsVariableTypeSupportInChunk =
|
||||
std::is_same_v<T, std::string> || std::is_same_v<T, Array> ||
|
||||
std::is_same_v<T, Json> ||
|
||||
std::is_same_v<T, knowhere::sparse::SparseRow<sparseValueType>>;
|
||||
std::is_same_v<T, knowhere::sparse::SparseRow<SparseValueType>>;
|
||||
|
||||
template <typename T>
|
||||
using ChunkViewType = std::conditional_t<
|
||||
|
||||
@ -43,7 +43,7 @@ namespace milvus {
|
||||
(data_array->vectors().type##_vector().data())
|
||||
|
||||
using CheckDataValid = std::function<bool(size_t)>;
|
||||
using sparseValueType = typename knowhere::sparse_u32_f32::ValueType;
|
||||
using SparseValueType = typename knowhere::sparse_u32_f32::ValueType;
|
||||
|
||||
inline DatasetPtr
|
||||
GenDataset(const int64_t nb, const int64_t dim, const void* xb) {
|
||||
@ -246,17 +246,17 @@ EscapeBraces(const std::string& input) {
|
||||
return result;
|
||||
}
|
||||
|
||||
inline knowhere::sparse::SparseRow<sparseValueType>
|
||||
inline knowhere::sparse::SparseRow<SparseValueType>
|
||||
CopyAndWrapSparseRow(const void* data,
|
||||
size_t size,
|
||||
const bool validate = false) {
|
||||
size_t num_elements =
|
||||
size / knowhere::sparse::SparseRow<sparseValueType>::element_size();
|
||||
knowhere::sparse::SparseRow<sparseValueType> row(num_elements);
|
||||
size / knowhere::sparse::SparseRow<SparseValueType>::element_size();
|
||||
knowhere::sparse::SparseRow<SparseValueType> row(num_elements);
|
||||
std::memcpy(row.data(), data, size);
|
||||
if (validate) {
|
||||
AssertInfo(
|
||||
size % knowhere::sparse::SparseRow<sparseValueType>::element_size() == 0,
|
||||
size % knowhere::sparse::SparseRow<SparseValueType>::element_size() == 0,
|
||||
"Invalid size for sparse row data");
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
auto element = row[i];
|
||||
@ -277,17 +277,17 @@ CopyAndWrapSparseRow(const void* data,
|
||||
|
||||
// Iterable is a list of bytes, each is a byte array representation of a single
|
||||
// sparse float row. This helper function converts such byte arrays into a list
|
||||
// of knowhere::sparse::SparseRow<sparseValueType>. The resulting list is a deep copy of
|
||||
// of knowhere::sparse::SparseRow<SparseValueType>. The resulting list is a deep copy of
|
||||
// the source data.
|
||||
//
|
||||
// Here in segcore we validate the sparse row data only for search requests,
|
||||
// as the insert/upsert data are already validated in go code.
|
||||
template <typename Iterable>
|
||||
std::unique_ptr<knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||
std::unique_ptr<knowhere::sparse::SparseRow<SparseValueType>[]>
|
||||
SparseBytesToRows(const Iterable& rows, const bool validate = false) {
|
||||
AssertInfo(rows.size() > 0, "at least 1 sparse row should be provided");
|
||||
auto res =
|
||||
std::make_unique<knowhere::sparse::SparseRow<sparseValueType>[]>(rows.size());
|
||||
std::make_unique<knowhere::sparse::SparseRow<SparseValueType>[]>(rows.size());
|
||||
for (size_t i = 0; i < rows.size(); ++i) {
|
||||
res[i] = std::move(
|
||||
CopyAndWrapSparseRow(rows[i].data(), rows[i].size(), validate));
|
||||
@ -295,11 +295,11 @@ SparseBytesToRows(const Iterable& rows, const bool validate = false) {
|
||||
return res;
|
||||
}
|
||||
|
||||
// SparseRowsToProto converts a list of knowhere::sparse::SparseRow<sparseValueType> to
|
||||
// SparseRowsToProto converts a list of knowhere::sparse::SparseRow<SparseValueType> to
|
||||
// a milvus::proto::schema::SparseFloatArray. The resulting proto is a deep copy
|
||||
// of the source data. source(i) returns the i-th row to be copied.
|
||||
inline void SparseRowsToProto(
|
||||
const std::function<const knowhere::sparse::SparseRow<sparseValueType>*(size_t)>&
|
||||
const std::function<const knowhere::sparse::SparseRow<SparseValueType>*(size_t)>&
|
||||
source,
|
||||
int64_t rows,
|
||||
milvus::proto::schema::SparseFloatArray* proto) {
|
||||
|
||||
@ -604,14 +604,6 @@ IndexFactory::CreateVectorIndex(
|
||||
version,
|
||||
use_knowhere_build_pool,
|
||||
file_manager_context);
|
||||
case DataType::VECTOR_SPARSE_U32_F32:
|
||||
return std::make_unique<VectorMemIndex<sparse_u32_f32>>(
|
||||
element_type,
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
use_knowhere_build_pool,
|
||||
file_manager_context);
|
||||
default:
|
||||
ThrowInfo(NotImplemented,
|
||||
fmt::format("not implemented data type to "
|
||||
|
||||
@ -80,7 +80,7 @@ class VectorDiskAnnIndex : public VectorIndex {
|
||||
std::vector<uint8_t>
|
||||
GetVector(const DatasetPtr dataset) const override;
|
||||
|
||||
std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||
std::unique_ptr<const knowhere::sparse::SparseRow<SparseValueType>[]>
|
||||
GetSparseVector(const DatasetPtr dataset) const override {
|
||||
ThrowInfo(ErrorCode::Unsupported,
|
||||
"get sparse vector not supported for disk index");
|
||||
|
||||
@ -76,7 +76,7 @@ class VectorIndex : public IndexBase {
|
||||
virtual std::vector<uint8_t>
|
||||
GetVector(const DatasetPtr dataset) const = 0;
|
||||
|
||||
virtual std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||
virtual std::unique_ptr<const knowhere::sparse::SparseRow<SparseValueType>[]>
|
||||
GetSparseVector(const DatasetPtr dataset) const = 0;
|
||||
|
||||
IndexType
|
||||
|
||||
@ -427,10 +427,10 @@ VectorMemIndex<T>::Build(const Config& config) {
|
||||
field_data)
|
||||
->Dim());
|
||||
}
|
||||
std::vector<knowhere::sparse::SparseRow<sparseValueType>> vec(total_rows);
|
||||
std::vector<knowhere::sparse::SparseRow<SparseValueType>> vec(total_rows);
|
||||
int64_t offset = 0;
|
||||
for (auto field_data : field_datas) {
|
||||
auto ptr = static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||
auto ptr = static_cast<const knowhere::sparse::SparseRow<SparseValueType>*>(
|
||||
field_data->Data());
|
||||
AssertInfo(ptr, "failed to cast field data to sparse rows");
|
||||
for (size_t i = 0; i < field_data->Length(); ++i) {
|
||||
@ -571,7 +571,7 @@ VectorMemIndex<T>::GetVector(const DatasetPtr dataset) const {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||
std::unique_ptr<const knowhere::sparse::SparseRow<SparseValueType>[]>
|
||||
VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const {
|
||||
auto res = index_.GetVectorByIds(dataset);
|
||||
if (!res.has_value()) {
|
||||
@ -580,8 +580,8 @@ VectorMemIndex<T>::GetSparseVector(const DatasetPtr dataset) const {
|
||||
}
|
||||
// release and transfer ownership to the result unique ptr.
|
||||
res.value()->SetIsOwner(false);
|
||||
return std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>(
|
||||
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||
return std::unique_ptr<const knowhere::sparse::SparseRow<SparseValueType>[]>(
|
||||
static_cast<const knowhere::sparse::SparseRow<SparseValueType>*>(
|
||||
res.value()->GetTensor()));
|
||||
}
|
||||
|
||||
|
||||
@ -87,7 +87,7 @@ class VectorMemIndex : public VectorIndex {
|
||||
std::vector<uint8_t>
|
||||
GetVector(const DatasetPtr dataset) const override;
|
||||
|
||||
std::unique_ptr<const knowhere::sparse::SparseRow<sparseValueType>[]>
|
||||
std::unique_ptr<const knowhere::sparse::SparseRow<SparseValueType>[]>
|
||||
GetSparseVector(const DatasetPtr dataset) const override;
|
||||
|
||||
IndexStatsPtr
|
||||
|
||||
@ -134,8 +134,8 @@ VariableLengthChunk<std::string>::set(
|
||||
// Template specialization for sparse vector
|
||||
template <>
|
||||
inline void
|
||||
VariableLengthChunk<knowhere::sparse::SparseRow<sparseValueType>>::set(
|
||||
const knowhere::sparse::SparseRow<sparseValueType>* src,
|
||||
VariableLengthChunk<knowhere::sparse::SparseRow<SparseValueType>>::set(
|
||||
const knowhere::sparse::SparseRow<SparseValueType>* src,
|
||||
uint32_t begin,
|
||||
uint32_t length,
|
||||
const std::optional<CheckDataValid>& check_data_valid) {
|
||||
@ -158,7 +158,7 @@ VariableLengthChunk<knowhere::sparse::SparseRow<sparseValueType>>::set(
|
||||
uint8_t* data_ptr = buf + offset;
|
||||
std::memcpy(data_ptr, (uint8_t*)src[i].data(), data_size);
|
||||
data_[i + begin] =
|
||||
knowhere::sparse::SparseRow<sparseValueType>(src[i].size(), data_ptr, false);
|
||||
knowhere::sparse::SparseRow<SparseValueType>(src[i].size(), data_ptr, false);
|
||||
offset += data_size;
|
||||
}
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ struct Placeholder {
|
||||
// only one of blob_ and sparse_matrix_ should be set. blob_ is used for
|
||||
// dense vector search and sparse_matrix_ is for sparse vector search.
|
||||
aligned_vector<char> blob_;
|
||||
std::unique_ptr<knowhere::sparse::SparseRow<sparseValueType>[]> sparse_matrix_;
|
||||
std::unique_ptr<knowhere::sparse::SparseRow<SparseValueType>[]> sparse_matrix_;
|
||||
// offsets for embedding list
|
||||
aligned_vector<size_t> lims_;
|
||||
|
||||
|
||||
@ -170,7 +170,7 @@ BruteForceSearch(const dataset::SearchDataset& query_ds,
|
||||
base_dataset, query_dataset, search_cfg, bitset);
|
||||
} else if (data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||
res = knowhere::BruteForce::RangeSearch<
|
||||
knowhere::sparse::SparseRow<sparseValueType>>(
|
||||
knowhere::sparse::SparseRow<SparseValueType>>(
|
||||
base_dataset, query_dataset, search_cfg, bitset);
|
||||
} else if (data_type == DataType::VECTOR_INT8) {
|
||||
res = knowhere::BruteForce::RangeSearch<int8>(
|
||||
@ -281,7 +281,7 @@ DispatchBruteForceIteratorByDataType(const knowhere::DataSetPtr& base_dataset,
|
||||
base_dataset, query_dataset, config, bitset);
|
||||
case DataType::VECTOR_SPARSE_U32_F32:
|
||||
return knowhere::BruteForce::AnnIterator<
|
||||
knowhere::sparse::SparseRow<sparseValueType>>(
|
||||
knowhere::sparse::SparseRow<SparseValueType>>(
|
||||
base_dataset, query_dataset, config, bitset);
|
||||
case DataType::VECTOR_INT8:
|
||||
return knowhere::BruteForce::AnnIterator<int8>(
|
||||
|
||||
@ -1797,7 +1797,7 @@ ChunkedSegmentSealedImpl::get_raw_data(FieldId field_id,
|
||||
auto row =
|
||||
offset != INVALID_SEG_OFFSET
|
||||
? static_cast<
|
||||
const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||
const knowhere::sparse::SparseRow<SparseValueType>*>(
|
||||
static_cast<const void*>(value))
|
||||
: nullptr;
|
||||
if (row == nullptr) {
|
||||
|
||||
@ -504,13 +504,13 @@ class ConcurrentVector<VectorArray>
|
||||
|
||||
template <>
|
||||
class ConcurrentVector<SparseFloatVector>
|
||||
: public ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>, true> {
|
||||
: public ConcurrentVectorImpl<knowhere::sparse::SparseRow<SparseValueType>, true> {
|
||||
public:
|
||||
explicit ConcurrentVector(
|
||||
int64_t size_per_chunk,
|
||||
storage::MmapChunkDescriptorPtr mmap_descriptor = nullptr,
|
||||
ThreadSafeValidDataPtr valid_data_ptr = nullptr)
|
||||
: ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>,
|
||||
: ConcurrentVectorImpl<knowhere::sparse::SparseRow<SparseValueType>,
|
||||
true>::ConcurrentVectorImpl(1,
|
||||
size_per_chunk,
|
||||
std::move(
|
||||
@ -524,11 +524,11 @@ class ConcurrentVector<SparseFloatVector>
|
||||
const void* source,
|
||||
ssize_t element_count) override {
|
||||
auto* src =
|
||||
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(source);
|
||||
static_cast<const knowhere::sparse::SparseRow<SparseValueType>*>(source);
|
||||
for (int i = 0; i < element_count; ++i) {
|
||||
dim_ = std::max(dim_, src[i].dim());
|
||||
}
|
||||
ConcurrentVectorImpl<knowhere::sparse::SparseRow<sparseValueType>,
|
||||
ConcurrentVectorImpl<knowhere::sparse::SparseRow<SparseValueType>,
|
||||
true>::set_data_raw(element_offset,
|
||||
source,
|
||||
element_count);
|
||||
|
||||
@ -150,7 +150,7 @@ VectorFieldIndexing::AppendSegmentIndexSparse(int64_t reserved_offset,
|
||||
auto dim = source->Dim();
|
||||
|
||||
while (total_rows > 0) {
|
||||
auto mat = static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||
auto mat = static_cast<const knowhere::sparse::SparseRow<SparseValueType>*>(
|
||||
source->get_chunk_data(chunk_id));
|
||||
auto rows = std::min(source->get_size_per_chunk(), total_rows);
|
||||
auto dataset = knowhere::GenDataSet(rows, dim, mat);
|
||||
|
||||
@ -567,7 +567,7 @@ CreateVectorDataArrayFrom(const void* data_raw,
|
||||
SparseRowsToProto(
|
||||
[&](size_t i) {
|
||||
return reinterpret_cast<
|
||||
const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||
const knowhere::sparse::SparseRow<SparseValueType>*>(
|
||||
data_raw) +
|
||||
i;
|
||||
},
|
||||
|
||||
@ -492,7 +492,7 @@ DiskFileManagerImpl::cache_raw_data_to_disk_common(
|
||||
field_data)
|
||||
->Dim());
|
||||
auto sparse_rows =
|
||||
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||
static_cast<const knowhere::sparse::SparseRow<SparseValueType>*>(
|
||||
field_data->Data());
|
||||
for (size_t i = 0; i < field_data->Length(); ++i) {
|
||||
auto row = sparse_rows[i];
|
||||
|
||||
@ -315,7 +315,7 @@ BaseEventData::Serialize() {
|
||||
for (size_t offset = 0; offset < field_data->get_num_rows();
|
||||
++offset) {
|
||||
auto row =
|
||||
static_cast<const knowhere::sparse::SparseRow<sparseValueType>*>(
|
||||
static_cast<const knowhere::sparse::SparseRow<SparseValueType>*>(
|
||||
field_data->RawValue(offset));
|
||||
payload_writer->add_one_binary_payload(
|
||||
static_cast<const uint8_t*>(row->data()),
|
||||
|
||||
@ -27,8 +27,8 @@ using namespace milvus::query;
|
||||
namespace {
|
||||
|
||||
std::vector<int>
|
||||
SearchRef(const knowhere::sparse::SparseRow<milvus::sparseValueType>* base,
|
||||
const knowhere::sparse::SparseRow<milvus::sparseValueType>& query,
|
||||
SearchRef(const knowhere::sparse::SparseRow<milvus::SparseValueType>* base,
|
||||
const knowhere::sparse::SparseRow<milvus::SparseValueType>& query,
|
||||
int nb,
|
||||
int topk) {
|
||||
std::vector<std::tuple<float, int>> res;
|
||||
@ -51,8 +51,8 @@ SearchRef(const knowhere::sparse::SparseRow<milvus::sparseValueType>* base,
|
||||
}
|
||||
|
||||
std::vector<int>
|
||||
RangeSearchRef(const knowhere::sparse::SparseRow<milvus::sparseValueType>* base,
|
||||
const knowhere::sparse::SparseRow<milvus::sparseValueType>& query,
|
||||
RangeSearchRef(const knowhere::sparse::SparseRow<milvus::SparseValueType>* base,
|
||||
const knowhere::sparse::SparseRow<milvus::SparseValueType>& query,
|
||||
int nb,
|
||||
float radius,
|
||||
float range_filter,
|
||||
|
||||
@ -206,7 +206,7 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
|
||||
auto fp16_vec_gt = dataset.get_col<float16>(fp16_vec);
|
||||
auto bf16_vec_gt = dataset.get_col<bfloat16>(bf16_vec);
|
||||
auto sparse_vec_gt =
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(sparse_vec);
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::SparseValueType>>(sparse_vec);
|
||||
auto int8_vec_gt = dataset.get_col<int8>(int8_vec);
|
||||
|
||||
for (size_t i = 0; i < num_inserted; ++i) {
|
||||
|
||||
@ -680,7 +680,7 @@ TEST(storage, InsertDataSparseFloat) {
|
||||
storage::DataType::VECTOR_SPARSE_U32_F32);
|
||||
ASSERT_EQ(new_payload->get_num_rows(), n_rows);
|
||||
ASSERT_EQ(new_payload->get_null_count(), 0);
|
||||
auto new_data = static_cast<const knowhere::sparse::SparseRow<milvus::sparseValueType>*>(
|
||||
auto new_data = static_cast<const knowhere::sparse::SparseRow<milvus::SparseValueType>*>(
|
||||
new_payload->Data());
|
||||
|
||||
for (auto i = 0; i < n_rows; ++i) {
|
||||
|
||||
@ -417,7 +417,7 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
|
||||
false,
|
||||
milvus::storage::FileManagerContext());
|
||||
auto sparse_data =
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(vec);
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::SparseValueType>>(vec);
|
||||
index->BuildWithDataset(
|
||||
knowhere::GenDataSet(N, dim, sparse_data.data()), build_config);
|
||||
for (int i = 0; i < add_cont; i++) {
|
||||
@ -567,7 +567,7 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
for (int64_t i = 0; i < n_batch; i++) {
|
||||
auto dataset = DataGen(schema, per_batch);
|
||||
auto fakevec =
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(vec);
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::SparseValueType>>(vec);
|
||||
auto offset = segment->PreInsert(per_batch);
|
||||
segment->Insert(offset,
|
||||
per_batch,
|
||||
|
||||
@ -68,7 +68,7 @@ TestVecIndex() {
|
||||
status = BuildBinaryVecIndex(index, NB * DIM / 8, xb_data.data());
|
||||
} else if (std::is_same_v<TraitType, milvus::SparseFloatVector>) {
|
||||
auto xb_data =
|
||||
dataset.template get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||
dataset.template get_col<knowhere::sparse::SparseRow<milvus::SparseValueType>>(
|
||||
milvus::FieldId(100));
|
||||
status = BuildSparseFloatVecIndex(
|
||||
index,
|
||||
|
||||
@ -134,7 +134,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<milvus::SparseValueType>>(
|
||||
milvus::FieldId(100));
|
||||
xb_dataset =
|
||||
knowhere::GenDataSet(NB, kTestSparseDim, sparse_vecs.data());
|
||||
@ -179,7 +179,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
|
||||
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<milvus::SparseValueType>>(
|
||||
milvus::FieldId(100));
|
||||
auto xq_dataset =
|
||||
knowhere::GenDataSet(NQ, kTestSparseDim, xb_data.data());
|
||||
|
||||
@ -349,7 +349,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
||||
} else if (is_sparse) {
|
||||
// sparse vector
|
||||
xb_sparse_data =
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::sparseValueType>>(
|
||||
dataset.get_col<knowhere::sparse::SparseRow<milvus::SparseValueType>>(
|
||||
milvus::FieldId(100));
|
||||
xb_dataset =
|
||||
knowhere::GenDataSet(NB, kTestSparseDim, xb_sparse_data.data());
|
||||
@ -382,7 +382,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
||||
knowhere::DataSetPtr xb_dataset;
|
||||
FixedVector<float> xb_data;
|
||||
FixedVector<uint8_t> xb_bin_data;
|
||||
FixedVector<knowhere::sparse::SparseRow<milvus::sparseValueType>> xb_sparse_data;
|
||||
FixedVector<knowhere::sparse::SparseRow<milvus::SparseValueType>> xb_sparse_data;
|
||||
knowhere::DataSetPtr xq_dataset;
|
||||
int64_t query_offset = 100;
|
||||
int64_t NB = 3000; // will be updated to 27000 for mmap+hnsw
|
||||
@ -686,7 +686,7 @@ TEST_P(IndexTest, GetVector_EmptySparseVector) {
|
||||
}
|
||||
NB = 3;
|
||||
|
||||
std::vector<knowhere::sparse::SparseRow<milvus::sparseValueType>> vec;
|
||||
std::vector<knowhere::sparse::SparseRow<milvus::SparseValueType>> vec;
|
||||
vec.reserve(NB);
|
||||
vec.emplace_back(2);
|
||||
vec[0].set_at(0, 1, 1.0);
|
||||
|
||||
@ -165,7 +165,7 @@ struct GeneratedData {
|
||||
}
|
||||
if constexpr (std::is_same_v<
|
||||
T,
|
||||
knowhere::sparse::SparseRow<milvus::sparseValueType>>) {
|
||||
knowhere::sparse::SparseRow<milvus::SparseValueType>>) {
|
||||
auto sparse_float_array =
|
||||
target_field_data.vectors().sparse_float_vector();
|
||||
auto rows =
|
||||
@ -311,7 +311,7 @@ struct GeneratedData {
|
||||
int array_len);
|
||||
};
|
||||
|
||||
inline std::unique_ptr<knowhere::sparse::SparseRow<milvus::sparseValueType>[]>
|
||||
inline std::unique_ptr<knowhere::sparse::SparseRow<milvus::SparseValueType>[]>
|
||||
GenerateRandomSparseFloatVector(size_t rows,
|
||||
size_t cols = kTestSparseDim,
|
||||
float density = kTestSparseVectorDensity,
|
||||
@ -350,13 +350,13 @@ GenerateRandomSparseFloatVector(size_t rows,
|
||||
data[row][col] = val;
|
||||
}
|
||||
|
||||
auto tensor = std::make_unique<knowhere::sparse::SparseRow<milvus::sparseValueType>[]>(rows);
|
||||
auto tensor = std::make_unique<knowhere::sparse::SparseRow<milvus::SparseValueType>[]>(rows);
|
||||
|
||||
for (int32_t i = 0; i < rows; ++i) {
|
||||
if (data[i].size() == 0) {
|
||||
continue;
|
||||
}
|
||||
knowhere::sparse::SparseRow<milvus::sparseValueType> row(data[i].size());
|
||||
knowhere::sparse::SparseRow<milvus::SparseValueType> row(data[i].size());
|
||||
size_t j = 0;
|
||||
for (auto& [idx, val] : data[i]) {
|
||||
row.set_at(j++, idx, val);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user