mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-29 06:55:27 +08:00
Merge pull request #335 from fishpenguin/0.6.0-yk
memory usage increased slowly during searching vectors
This commit is contained in:
commit
db75a9aeff
@ -4,6 +4,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
# Milvus 0.6.0 (TODO)
|
||||
|
||||
## Bug
|
||||
- \#228 - memory usage increased slowly during searching vectors
|
||||
- \#246 - Exclude src/external folder from code coverage for jenkin ci
|
||||
- \#248 - Reside src/external in thirdparty
|
||||
|
||||
|
||||
@ -89,34 +89,35 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
|
||||
}
|
||||
}
|
||||
|
||||
auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
|
||||
auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
|
||||
|
||||
// TODO: magic
|
||||
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
|
||||
auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
auto float_type = std::make_shared<arrow::FloatType>();
|
||||
|
||||
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
|
||||
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
|
||||
|
||||
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
|
||||
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
|
||||
|
||||
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
std::vector<ArrayPtr> array{ids, dists};
|
||||
|
||||
auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
|
||||
auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
|
||||
std::vector<FieldPtr> fields{field_id, field_dist};
|
||||
auto schema = std::make_shared<Schema>(fields);
|
||||
|
||||
return std::make_shared<Dataset>(array, schema);
|
||||
// auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
|
||||
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
|
||||
//
|
||||
// // TODO: magic
|
||||
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
//
|
||||
// auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
// auto float_type = std::make_shared<arrow::FloatType>();
|
||||
//
|
||||
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
// // auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
|
||||
// // auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
|
||||
//
|
||||
// // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
|
||||
// // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
|
||||
//
|
||||
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
// std::vector<ArrayPtr> array{ids, dists};
|
||||
//
|
||||
// auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
|
||||
// auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
|
||||
// std::vector<FieldPtr> fields{field_id, field_dist};
|
||||
// auto schema = std::make_shared<Schema>(fields);
|
||||
//
|
||||
// return std::make_shared<Dataset>(array, schema);
|
||||
return std::make_shared<Dataset>((void*)p_id, (void*)p_dist);
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
|
||||
@ -54,6 +54,9 @@ class Dataset {
|
||||
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {
|
||||
}
|
||||
|
||||
Dataset(void* ids, void* dists) : ids_(ids), dists_(dists) {
|
||||
}
|
||||
|
||||
Dataset(const Dataset&) = delete;
|
||||
Dataset&
|
||||
operator=(const Dataset&) = delete;
|
||||
@ -128,6 +131,16 @@ class Dataset {
|
||||
tensor_schema_ = std::move(tensor_schema);
|
||||
}
|
||||
|
||||
void*
|
||||
ids() {
|
||||
return ids_;
|
||||
}
|
||||
|
||||
void*
|
||||
dist() {
|
||||
return dists_;
|
||||
}
|
||||
|
||||
// const Config &
|
||||
// meta() const { return meta_; }
|
||||
|
||||
@ -141,6 +154,9 @@ class Dataset {
|
||||
SchemaPtr array_schema_;
|
||||
std::vector<TensorPtr> tensor_;
|
||||
SchemaPtr tensor_schema_;
|
||||
// TODO(yukun): using smart pointer
|
||||
void* ids_;
|
||||
void* dists_;
|
||||
// Config meta_;
|
||||
};
|
||||
|
||||
|
||||
@ -80,23 +80,24 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) {
|
||||
|
||||
search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config());
|
||||
|
||||
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
|
||||
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
|
||||
|
||||
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
|
||||
auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
auto float_type = std::make_shared<arrow::FloatType>();
|
||||
|
||||
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
|
||||
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
std::vector<ArrayPtr> array{ids, dists};
|
||||
|
||||
return std::make_shared<Dataset>(array, nullptr);
|
||||
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
|
||||
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
|
||||
//
|
||||
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
//
|
||||
// auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
// auto float_type = std::make_shared<arrow::FloatType>();
|
||||
//
|
||||
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
//
|
||||
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
// std::vector<ArrayPtr> array{ids, dists};
|
||||
//
|
||||
// return std::make_shared<Dataset>(array, nullptr);
|
||||
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -139,23 +139,23 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
|
||||
// std::cout << ss_res_id.str() << std::endl;
|
||||
// std::cout << ss_res_dist.str() << std::endl << std::endl;
|
||||
|
||||
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
|
||||
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
|
||||
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
|
||||
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
|
||||
//
|
||||
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
//
|
||||
// auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
// auto float_type = std::make_shared<arrow::FloatType>();
|
||||
//
|
||||
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
//
|
||||
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
// std::vector<ArrayPtr> array{ids, dists};
|
||||
|
||||
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
|
||||
auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
auto float_type = std::make_shared<arrow::FloatType>();
|
||||
|
||||
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
|
||||
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
std::vector<ArrayPtr> array{ids, dists};
|
||||
|
||||
return std::make_shared<Dataset>(array, nullptr);
|
||||
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -88,23 +88,24 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) {
|
||||
s_params.search_length = build_cfg->search_length;
|
||||
index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params);
|
||||
|
||||
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
|
||||
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
|
||||
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
|
||||
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
|
||||
|
||||
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
|
||||
auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
auto float_type = std::make_shared<arrow::FloatType>();
|
||||
|
||||
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
|
||||
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
std::vector<ArrayPtr> array{ids, dists};
|
||||
|
||||
return std::make_shared<Dataset>(array, nullptr);
|
||||
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
|
||||
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
|
||||
//
|
||||
// auto int64_type = std::make_shared<arrow::Int64Type>();
|
||||
// auto float_type = std::make_shared<arrow::FloatType>();
|
||||
//
|
||||
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
|
||||
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
|
||||
//
|
||||
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
|
||||
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
|
||||
// std::vector<ArrayPtr> array{ids, dists};
|
||||
//
|
||||
// return std::make_shared<Dataset>(array, nullptr);
|
||||
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
|
||||
}
|
||||
|
||||
IndexModelPtr
|
||||
|
||||
@ -181,11 +181,13 @@ TEST_P(IVFTest, clone_test) {
|
||||
// PrintResult(result, nq, k);
|
||||
|
||||
auto AssertEqual = [&](knowhere::DatasetPtr p1, knowhere::DatasetPtr p2) {
|
||||
auto ids_p1 = p1->array()[0];
|
||||
auto ids_p2 = p2->array()[0];
|
||||
auto ids_p1 = p1->ids();
|
||||
auto ids_p2 = p2->ids();
|
||||
|
||||
for (int i = 0; i < nq * k; ++i) {
|
||||
EXPECT_EQ(*(ids_p2->data()->GetValues<int64_t>(1, i)), *(ids_p1->data()->GetValues<int64_t>(1, i)));
|
||||
EXPECT_EQ(*((int64_t*)(ids_p2) + i), *((int64_t*)(ids_p1) + i));
|
||||
// EXPECT_EQ(*(ids_p2->data()->GetValues<int64_t>(1, i)), *(ids_p1->data()->GetValues<int64_t>(1,
|
||||
// i)));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -66,15 +66,19 @@ TEST_F(KDTTest, kdt_basic) {
|
||||
AssertAnns(result, nq, k);
|
||||
|
||||
{
|
||||
auto ids = result->array()[0];
|
||||
auto dists = result->array()[1];
|
||||
// auto ids = result->array()[0];
|
||||
// auto dists = result->array()[1];
|
||||
auto ids = result->ids();
|
||||
auto dists = result->dist();
|
||||
|
||||
std::stringstream ss_id;
|
||||
std::stringstream ss_dist;
|
||||
for (auto i = 0; i < nq; i++) {
|
||||
for (auto j = 0; j < k; ++j) {
|
||||
ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
|
||||
ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
|
||||
ss_id << *((int64_t*)(ids) + i * k + j) << " ";
|
||||
ss_dist << *((float*)(dists) + i * k + j) << " ";
|
||||
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
|
||||
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
|
||||
}
|
||||
ss_id << std::endl;
|
||||
ss_dist << std::endl;
|
||||
|
||||
@ -151,9 +151,10 @@ generate_query_dataset(int64_t nb, int64_t dim, float* xb) {
|
||||
|
||||
void
|
||||
AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
|
||||
auto ids = result->array()[0];
|
||||
auto ids = result->ids();
|
||||
for (auto i = 0; i < nq; i++) {
|
||||
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
|
||||
EXPECT_EQ(i, *((int64_t*)(ids) + i * k));
|
||||
// EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -84,8 +84,8 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
|
||||
Config search_cfg = cfg;
|
||||
|
||||
auto res = index_->Search(dataset, search_cfg);
|
||||
auto ids_array = res->array()[0];
|
||||
auto dis_array = res->array()[1];
|
||||
// auto ids_array = res->array()[0];
|
||||
// auto dis_array = res->array()[1];
|
||||
|
||||
//{
|
||||
// auto& ids = ids_array;
|
||||
@ -104,12 +104,14 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
|
||||
// std::cout << "dist\n" << ss_dist.str() << std::endl;
|
||||
//}
|
||||
|
||||
auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
|
||||
auto p_dist = dis_array->data()->GetValues<float>(1, 0);
|
||||
// auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
|
||||
// auto p_dist = dis_array->data()->GetValues<float>(1, 0);
|
||||
|
||||
// TODO(linxj): avoid copy here.
|
||||
memcpy(ids, p_ids, sizeof(int64_t) * nq * k);
|
||||
memcpy(dist, p_dist, sizeof(float) * nq * k);
|
||||
memcpy(ids, res->ids(), sizeof(int64_t) * nq * k);
|
||||
memcpy(dist, res->dist(), sizeof(float) * nq * k);
|
||||
free(res->ids());
|
||||
free(res->dist());
|
||||
} catch (knowhere::KnowhereException& e) {
|
||||
WRAPPER_LOG_ERROR << e.what();
|
||||
return Status(KNOWHERE_UNEXPECTED_ERROR, e.what());
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user