Merge pull request #335 from fishpenguin/0.6.0-yk

memory usage increased slowly during searching vectors
This commit is contained in:
Jin Hai 2019-11-15 14:25:26 +08:00 committed by GitHub
commit db75a9aeff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 121 additions and 92 deletions

View File

@ -4,6 +4,7 @@ Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.6.0 (TODO)
## Bug
- \#228 - memory usage increased slowly during searching vectors
- \#246 - Exclude src/external folder from code coverage for jenkin ci
- \#248 - Reside src/external in thirdparty

View File

@ -89,34 +89,35 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
}
}
auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
// TODO: magic
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
std::vector<FieldPtr> fields{field_id, field_dist};
auto schema = std::make_shared<Schema>(fields);
return std::make_shared<Dataset>(array, schema);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
//
// // TODO: magic
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// // auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// // auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
//
// // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
// auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
// std::vector<FieldPtr> fields{field_id, field_dist};
// auto schema = std::make_shared<Schema>(fields);
//
// return std::make_shared<Dataset>(array, schema);
return std::make_shared<Dataset>((void*)p_id, (void*)p_dist);
}
} // namespace knowhere

View File

@ -54,6 +54,9 @@ class Dataset {
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {
}
Dataset(void* ids, void* dists) : ids_(ids), dists_(dists) {
}
Dataset(const Dataset&) = delete;
Dataset&
operator=(const Dataset&) = delete;
@ -128,6 +131,16 @@ class Dataset {
tensor_schema_ = std::move(tensor_schema);
}
void*
ids() {
return ids_;
}
void*
dist() {
return dists_;
}
// const Config &
// meta() const { return meta_; }
@ -141,6 +154,9 @@ class Dataset {
SchemaPtr array_schema_;
std::vector<TensorPtr> tensor_;
SchemaPtr tensor_schema_;
// TODO(yukun): using smart pointer
void* ids_;
void* dists_;
// Config meta_;
};

View File

@ -80,23 +80,24 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) {
search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config());
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
}
void

View File

@ -139,23 +139,23 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
// std::cout << ss_res_id.str() << std::endl;
// std::cout << ss_res_dist.str() << std::endl << std::endl;
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
}
void

View File

@ -88,23 +88,24 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) {
s_params.search_length = build_cfg->search_length;
index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params);
auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
std::vector<BufferPtr> id_bufs{nullptr, id_buf};
std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
auto int64_type = std::make_shared<arrow::Int64Type>();
auto float_type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>(array, nullptr);
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
}
IndexModelPtr

View File

@ -181,11 +181,13 @@ TEST_P(IVFTest, clone_test) {
// PrintResult(result, nq, k);
auto AssertEqual = [&](knowhere::DatasetPtr p1, knowhere::DatasetPtr p2) {
auto ids_p1 = p1->array()[0];
auto ids_p2 = p2->array()[0];
auto ids_p1 = p1->ids();
auto ids_p2 = p2->ids();
for (int i = 0; i < nq * k; ++i) {
EXPECT_EQ(*(ids_p2->data()->GetValues<int64_t>(1, i)), *(ids_p1->data()->GetValues<int64_t>(1, i)));
EXPECT_EQ(*((int64_t*)(ids_p2) + i), *((int64_t*)(ids_p1) + i));
// EXPECT_EQ(*(ids_p2->data()->GetValues<int64_t>(1, i)), *(ids_p1->data()->GetValues<int64_t>(1,
// i)));
}
};

View File

@ -66,15 +66,19 @@ TEST_F(KDTTest, kdt_basic) {
AssertAnns(result, nq, k);
{
auto ids = result->array()[0];
auto dists = result->array()[1];
// auto ids = result->array()[0];
// auto dists = result->array()[1];
auto ids = result->ids();
auto dists = result->dist();
std::stringstream ss_id;
std::stringstream ss_dist;
for (auto i = 0; i < nq; i++) {
for (auto j = 0; j < k; ++j) {
ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
ss_id << *((int64_t*)(ids) + i * k + j) << " ";
ss_dist << *((float*)(dists) + i * k + j) << " ";
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
}
ss_id << std::endl;
ss_dist << std::endl;

View File

@ -151,9 +151,10 @@ generate_query_dataset(int64_t nb, int64_t dim, float* xb) {
void
AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->array()[0];
auto ids = result->ids();
for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
EXPECT_EQ(i, *((int64_t*)(ids) + i * k));
// EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
}
}

View File

@ -84,8 +84,8 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
Config search_cfg = cfg;
auto res = index_->Search(dataset, search_cfg);
auto ids_array = res->array()[0];
auto dis_array = res->array()[1];
// auto ids_array = res->array()[0];
// auto dis_array = res->array()[1];
//{
// auto& ids = ids_array;
@ -104,12 +104,14 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
// std::cout << "dist\n" << ss_dist.str() << std::endl;
//}
auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
auto p_dist = dis_array->data()->GetValues<float>(1, 0);
// auto p_ids = ids_array->data()->GetValues<int64_t>(1, 0);
// auto p_dist = dis_array->data()->GetValues<float>(1, 0);
// TODO(linxj): avoid copy here.
memcpy(ids, p_ids, sizeof(int64_t) * nq * k);
memcpy(dist, p_dist, sizeof(float) * nq * k);
memcpy(ids, res->ids(), sizeof(int64_t) * nq * k);
memcpy(dist, res->dist(), sizeof(float) * nq * k);
free(res->ids());
free(res->dist());
} catch (knowhere::KnowhereException& e) {
WRAPPER_LOG_ERROR << e.what();
return Status(KNOWHERE_UNEXPECTED_ERROR, e.what());