From e77fd12286dc7e37708f839acf15579aba91cc09 Mon Sep 17 00:00:00 2001 From: feisiyicl <64510805+feisiyicl@users.noreply.github.com> Date: Thu, 30 Apr 2020 10:03:54 +0800 Subject: [PATCH] Feisiyicl 2190 memory usage (#2191) * rename MILVUS_ENABLE_PROFILING to ENABLE_CPU_PROFILING, and add ENABLE_MEM_PROFILING Signed-off-by: feisiyicl <7764126@qq.com> * optimize ReadOnlyArrayInvertedLists Signed-off-by: feisiyicl <7764126@qq.com> * update changelog Signed-off-by: feisiyicl <7764126@qq.com> * retry ci Signed-off-by: feisiyicl <7764126@qq.com> * fix build issue Signed-off-by: feisiyicl <7764126@qq.com> --- CHANGELOG.md | 2 + core/src/CMakeLists.txt | 3 +- .../knowhere/index/vector_index/IndexIVF.cpp | 2 - .../helpers/FaissGpuResourceMgr.cpp | 2 +- .../index/thirdparty/faiss/InvertedLists.cpp | 71 +++++++++++-------- .../hybrid_request/HybridSearchRequest.cpp | 4 +- .../hybrid_request/InsertEntityRequest.cpp | 6 +- .../server/delivery/request/InsertRequest.cpp | 6 +- .../delivery/request/SearchByIDRequest.cpp | 6 +- .../server/delivery/request/SearchRequest.cpp | 6 +- core/src/utils/CommonUtil.cpp | 2 +- core/src/utils/CommonUtil.h | 2 +- 12 files changed, 64 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf9b828278..de2fb6bd2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,9 +35,11 @@ Please mark all change in change log and use the issue from GitHub - \#2167 Merge log_config.conf with server_config.yaml - \#2173 Check storage permission - \#2178 Using elkan K-Means to improve IVF +- \#2190 Fix memory usage is twice of index size when using GPU searching ## Task + # Milvus 0.8.0 (2020-04-15) ## Bug diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 7e78560cab..7beab7d455 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -212,9 +212,10 @@ if (MILVUS_GPU_VERSION) ) endif () -if (MILVUS_ENABLE_PROFILING) +if (ENABLE_CPU_PROFILING OR ENABLE_MEM_PROFILING) set(third_party_libs ${third_party_libs} gperftools + tcmalloc libunwind ) endif () diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index c5f315334d..2779f3ac93 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -310,8 +310,6 @@ IVF::SealImpl() { faiss::Index* index = index_.get(); auto idx = dynamic_cast(index); if (idx != nullptr) { - // To be deleted - LOG_KNOWHERE_DEBUG_ << "Test before to_readonly: IVF READONLY " << std::boolalpha << idx->is_readonly(); idx->to_readonly(); } #endif diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp index b315f538c9..10b047d53d 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp @@ -124,7 +124,7 @@ void FaissGpuResourceMgr::Dump() { for (auto& item : idle_map_) { auto& bq = item.second; - // std::cout << "DEVICEID: " << item.first << ", resource count:" << bq.Size(); + LOG_KNOWHERE_DEBUG_ << "DEVICEID: " << item.first << ", resource count:" << bq.Size(); } } diff --git a/core/src/index/thirdparty/faiss/InvertedLists.cpp b/core/src/index/thirdparty/faiss/InvertedLists.cpp index 8636b89efd..e126081e45 100644 --- a/core/src/index/thirdparty/faiss/InvertedLists.cpp +++ b/core/src/index/thirdparty/faiss/InvertedLists.cpp @@ -29,25 +29,32 @@ namespace faiss { */ PageLockMemory::PageLockMemory(size_t size) : nbytes(size) { - CUDA_VERIFY(cudaHostAlloc(&data, size, 0)); + auto err = cudaHostAlloc(&(this->data), size, 0); + if (err) { + FAISS_THROW_MSG("Fail to alloc page lock memory " + std::to_string(size)); + } } PageLockMemory::~PageLockMemory() { - CUDA_VERIFY(cudaFreeHost((void*)data)); + CUDA_VERIFY(cudaFreeHost((void*)(this->data))); } PageLockMemory::PageLockMemory(const PageLockMemory& other) { - CUDA_VERIFY(cudaHostAlloc(&data, other.nbytes, 0)); - memcpy(data, other.data, other.nbytes); - nbytes = other.nbytes; + auto err = cudaHostAlloc(&(this->data), other.nbytes, 0); + if (err) { + FAISS_THROW_MSG("Fail to alloc page lock memory " + std::to_string(other.nbytes)); + } + memcpy(this->data, other.data, other.nbytes); + this->nbytes = other.nbytes; } PageLockMemory::PageLockMemory(PageLockMemory &&other) { - data = other.data; - nbytes = other.nbytes; + this->data = other.data; + this->nbytes = other.nbytes; other.data = nullptr; other.nbytes = 0; } + } #endif @@ -274,34 +281,42 @@ ReadOnlyArrayInvertedLists::ReadOnlyArrayInvertedLists(size_t nlist, ReadOnlyArrayInvertedLists::ReadOnlyArrayInvertedLists(const ArrayInvertedLists& other) : InvertedLists (other.nlist, other.code_size) { -#ifndef USE_CPU - std::vector readonly_codes; - std::vector readonly_ids; -#endif - readonly_length.reserve(nlist); + readonly_length.resize(nlist); + readonly_offset.resize(nlist); size_t offset = 0; - for (auto& list_ids : other.ids) { - readonly_length.emplace_back(list_ids.size()); - readonly_offset.emplace_back(offset); + for (auto i = 0; i < other.ids.size(); i++) { + auto& list_ids = other.ids[i]; + readonly_length[i] = list_ids.size(); + readonly_offset[i] = offset; offset += list_ids.size(); - readonly_ids.insert(readonly_ids.end(), list_ids.begin(), list_ids.end()); } - for(auto& list_codes : other.codes) { +#ifdef USE_CPU + for (auto i = 0; i < other.ids.size(); i++) { + auto& list_ids = other.ids[i]; + readonly_ids.insert(readonly_ids.end(), list_ids.begin(), list_ids.end()); + + auto& list_codes = other.codes[i]; readonly_codes.insert(readonly_codes.end(), list_codes.begin(), list_codes.end()); } +#else + size_t ids_size = offset * sizeof(idx_t); + size_t codes_size = offset * (this->code_size) * sizeof(uint8_t); + pin_readonly_codes = std::make_shared(codes_size); + pin_readonly_ids = std::make_shared(ids_size); -#ifndef USE_CPU - // convert to page-lock memory - { - size_t size = readonly_codes.size() * sizeof(uint8_t); - pin_readonly_codes = std::make_shared(size); - memcpy(pin_readonly_codes->data, readonly_codes.data(), size); - } - { - size_t size = readonly_ids.size() * sizeof(idx_t); - pin_readonly_ids = std::make_shared(size); - memcpy(pin_readonly_ids->data, readonly_ids.data(), size); + offset = 0; + for (auto i = 0; i < other.ids.size(); i++) { + auto& list_ids = other.ids[i]; + auto& list_codes = other.codes[i]; + + uint8_t* ids_ptr = (uint8_t*)(pin_readonly_ids->data) + offset * sizeof(idx_t); + memcpy(ids_ptr, list_ids.data(), list_ids.size() * sizeof(idx_t)); + + uint8_t* codes_ptr = (uint8_t*)(pin_readonly_codes->data) + offset * (this->code_size) * sizeof(uint8_t); + memcpy(codes_ptr, list_codes.data(), list_codes.size() * sizeof(uint8_t)); + + offset += list_ids.size(); } #endif diff --git a/core/src/server/delivery/hybrid_request/HybridSearchRequest.cpp b/core/src/server/delivery/hybrid_request/HybridSearchRequest.cpp index a392e9e6e6..7e768b277d 100644 --- a/core/src/server/delivery/hybrid_request/HybridSearchRequest.cpp +++ b/core/src/server/delivery/hybrid_request/HybridSearchRequest.cpp @@ -23,7 +23,7 @@ #include #include #include -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING #include #endif @@ -98,7 +98,7 @@ HybridSearchRequest::OnExecute() { status = DBWrapper::DB()->HybridQuery(context_, collection_name_, partition_list_, hybrid_search_contxt_, general_query_, attr_type, nq, result_ids, result_distances); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING ProfilerStop(); #endif diff --git a/core/src/server/delivery/hybrid_request/InsertEntityRequest.cpp b/core/src/server/delivery/hybrid_request/InsertEntityRequest.cpp index 14ac042ad5..4c619974e1 100644 --- a/core/src/server/delivery/hybrid_request/InsertEntityRequest.cpp +++ b/core/src/server/delivery/hybrid_request/InsertEntityRequest.cpp @@ -23,7 +23,7 @@ #include #include #include -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING #include #endif @@ -123,7 +123,7 @@ InsertEntityRequest::OnExecute() { rc.RecordSection("check validation"); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING std::string fname = "/tmp/insert_" + CommonUtil::GetCurrentTimeStr() + ".profiling"; ProfilerStart(fname.c_str()); #endif @@ -162,7 +162,7 @@ InsertEntityRequest::OnExecute() { : collection_schema.flag_ |= engine::meta::FLAG_MASK_NO_USERID; status = DBWrapper::DB()->UpdateCollectionFlag(collection_name_, collection_schema.flag_); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING ProfilerStop(); #endif diff --git a/core/src/server/delivery/request/InsertRequest.cpp b/core/src/server/delivery/request/InsertRequest.cpp index 96c822c699..89d091abd7 100644 --- a/core/src/server/delivery/request/InsertRequest.cpp +++ b/core/src/server/delivery/request/InsertRequest.cpp @@ -22,7 +22,7 @@ #include #include -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING #include #endif @@ -124,7 +124,7 @@ InsertRequest::OnExecute() { rc.RecordSection("check validation"); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING std::string fname = "/tmp/insert_" + CommonUtil::GetCurrentTimeStr() + ".profiling"; ProfilerStart(fname.c_str()); #endif @@ -167,7 +167,7 @@ InsertRequest::OnExecute() { : collection_schema.flag_ |= engine::meta::FLAG_MASK_NO_USERID; status = DBWrapper::DB()->UpdateCollectionFlag(collection_name_, collection_schema.flag_); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING ProfilerStop(); #endif diff --git a/core/src/server/delivery/request/SearchByIDRequest.cpp b/core/src/server/delivery/request/SearchByIDRequest.cpp index 88806f8ba6..5984d49f7a 100644 --- a/core/src/server/delivery/request/SearchByIDRequest.cpp +++ b/core/src/server/delivery/request/SearchByIDRequest.cpp @@ -26,7 +26,7 @@ #include "utils/TimeRecorder.h" #include "utils/ValidationUtil.h" -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING #include #endif @@ -122,7 +122,7 @@ SearchByIDRequest::OnExecute() { engine::ResultIds result_ids; engine::ResultDistances result_distances; -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING std::string fname = "/tmp/search_by_id_" + CommonUtil::GetCurrentTimeStr() + ".profiling"; ProfilerStart(fname.c_str()); #endif @@ -132,7 +132,7 @@ SearchByIDRequest::OnExecute() { status = DBWrapper::DB()->QueryByIDs(context_, collection_name_, partition_list_, (size_t)topk_, extra_params_, id_array_, result_ids, result_distances); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING ProfilerStop(); #endif diff --git a/core/src/server/delivery/request/SearchRequest.cpp b/core/src/server/delivery/request/SearchRequest.cpp index 48af1c86e7..55f66f3431 100644 --- a/core/src/server/delivery/request/SearchRequest.cpp +++ b/core/src/server/delivery/request/SearchRequest.cpp @@ -22,7 +22,7 @@ #include "utils/TimeRecorder.h" #include "utils/ValidationUtil.h" -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING #include #endif @@ -136,7 +136,7 @@ SearchRequest::OnExecute() { rc.RecordSection("check validation"); // step 7: search vectors -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING std::string fname = "/tmp/search_" + CommonUtil::GetCurrentTimeStr() + ".profiling"; ProfilerStart(fname.c_str()); #endif @@ -154,7 +154,7 @@ SearchRequest::OnExecute() { rc.RecordSection("query vectors from engine"); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING ProfilerStop(); #endif fiu_do_on("SearchRequest.OnExecute.query_fail", status = Status(milvus::SERVER_UNEXPECTED_ERROR, "")); diff --git a/core/src/utils/CommonUtil.cpp b/core/src/utils/CommonUtil.cpp index 39c510f562..f79babbb4c 100644 --- a/core/src/utils/CommonUtil.cpp +++ b/core/src/utils/CommonUtil.cpp @@ -228,7 +228,7 @@ CommonUtil::ConvertTime(tm time_struct, time_t& time_integer) { time_integer = mktime(&time_struct); } -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING std::string CommonUtil::GetCurrentTimeStr() { time_t tt; diff --git a/core/src/utils/CommonUtil.h b/core/src/utils/CommonUtil.h index 402168b276..26d039b89f 100644 --- a/core/src/utils/CommonUtil.h +++ b/core/src/utils/CommonUtil.h @@ -51,7 +51,7 @@ class CommonUtil { static void ConvertTime(tm time_struct, time_t& time_integer); -#ifdef MILVUS_ENABLE_PROFILING +#ifdef ENABLE_CPU_PROFILING static std::string GetCurrentTimeStr(); #endif