From fd0bf04cf87b4209613983e0fd28fc534c564b59 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 20 Sep 2019 18:32:17 +0800 Subject: [PATCH 001/149] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f496cdd884..69470bd0b2 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ -# Milvus is coming soon! \ No newline at end of file +![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) +# Milvus is coming soon! From c804b91e782f2802a73b3a5f57f25313144eb7f7 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 20 Sep 2019 18:37:57 +0800 Subject: [PATCH 002/149] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 69470bd0b2..5249aace41 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ -![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) +![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) # Milvus is coming soon! From c85d7feb8db4c6bf60a8fb4f880995105149da7a Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 15:49:34 +0800 Subject: [PATCH 003/149] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5249aace41..17928c6538 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) +![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) # Milvus is coming soon! From 1f1511d1ccc9906951c7bdac97a474c3216d876d Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 15:51:13 +0800 Subject: [PATCH 004/149] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 17928c6538..5163824f0a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) +![Release](https://img.shields.io/badge/Release-v0.5.0-orange) # Milvus is coming soon! From ef4bce71b4bc32dc1944c77bda35d0c56cfcbc0a Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:05:25 +0800 Subject: [PATCH 005/149] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5163824f0a..eec486fdb2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) -![Release](https://img.shields.io/badge/Release-v0.5.0-orange) +![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) # Milvus is coming soon! From 16f34710cea79850582bcb946e0ad1c5e9398c49 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:06:13 +0800 Subject: [PATCH 006/149] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index eec486fdb2..ff9007e353 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) +![Release date](https://img.shields.io/badge/release date-October-yellowgreen) + # Milvus is coming soon! From 45c970a78369042acdca003b9b0a2d1062ff9833 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:06:44 +0800 Subject: [PATCH 007/149] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ff9007e353..cdedb2a096 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Release date](https://img.shields.io/badge/release date-October-yellowgreen) +![Releasedate](https://img.shields.io/badge/release date-October-yellowgreen.svg) # Milvus is coming soon! From 6a879914e04a3961f4f3edbee92b8b123a4b6fc9 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:07:34 +0800 Subject: [PATCH 008/149] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cdedb2a096..1aba7639f2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Releasedate](https://img.shields.io/badge/release date-October-yellowgreen.svg) +![Release date](https://img.shields.io/badge/release date-October-yellowgreen.svg) # Milvus is coming soon! From 428d52970ac7d7d926874036ffcaec429d640938 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:08:58 +0800 Subject: [PATCH 009/149] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1aba7639f2..13c7f1137c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Release date](https://img.shields.io/badge/release date-October-yellowgreen.svg) +![Release date](https://img.shields.io/badge/release__date-October-yellowgreen) # Milvus is coming soon! From fbffc78b9e9a36432011bb7b69bf5be221840c24 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Fri, 18 Oct 2019 17:06:48 +0800 Subject: [PATCH 010/149] Update FAISS package to 0.3.0 Former-commit-id: d8baa122f0a9e2e02709d50bc7d4a5105cab7f2f --- core/src/index/cmake/ThirdPartyPackagesCore.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index ee1d88ee32..2dd619f7bc 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -243,7 +243,8 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0 # set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0 # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1 - set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 endif() else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") From 6572e6bf25cf3908b4c27c5812dbd15290380c4b Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sat, 19 Oct 2019 17:49:28 +0800 Subject: [PATCH 011/149] MS-671 Fix HybridIndex Crash MS-670 Adapt to faiss-1.6 Former-commit-id: 12300b390b6d300dc460f0729a368a240c972a80 --- core/src/db/engine/ExecutionEngineImpl.cpp | 53 +++++++- .../index/vector_index/FaissBaseIndex.h | 2 +- .../index/vector_index/IndexGPUIVF.cpp | 11 +- .../index/vector_index/IndexGPUIVFPQ.cpp | 4 +- .../index/vector_index/IndexGPUIVFSQ.cpp | 14 +- .../index/vector_index/IndexGPUIVFSQ.h | 4 - .../index/vector_index/IndexIDMAP.cpp | 5 +- .../knowhere/index/vector_index/IndexIVF.cpp | 6 +- .../knowhere/index/vector_index/IndexIVF.h | 2 +- .../index/vector_index/IndexIVFSQ.cpp | 10 +- .../index/vector_index/IndexIVFSQHybrid.cpp | 97 ++++++++++---- .../index/vector_index/IndexIVFSQHybrid.h | 7 + .../knowhere/index/vector_index/VectorIndex.h | 1 + .../index/vector_index/helpers/FaissIO.h | 2 +- core/src/index/unittest/Helper.h | 2 +- .../index/unittest/test_customized_index.cpp | 120 +++++++++++++++++- core/src/index/unittest/test_ivf.cpp | 41 ++---- core/src/scheduler/SchedInst.h | 1 + .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 84 ++++++------ core/src/scheduler/optimizer/LargeSQ8HPass.h | 4 +- core/src/server/DBWrapper.cpp | 2 +- core/src/wrapper/VecIndex.h | 1 + core/unittest/wrapper/test_wrapper.cpp | 2 +- 23 files changed, 325 insertions(+), 150 deletions(-) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 862c1026d2..9c411deba1 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -22,10 +22,7 @@ #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" - #include "knowhere/common/Config.h" -#include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "scheduler/Utils.h" #include "server/Config.h" #include "wrapper/ConfAdapter.h" @@ -249,6 +246,56 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { +#if 1 + const std::string key = location_ + ".quantizer"; + std::vector gpus = scheduler::get_gpu_pool(); + + const int64_t NOT_FOUND = -1; + int64_t device_id = NOT_FOUND; + + // cache hit + { + knowhere::QuantizerPtr quantizer = nullptr; + + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + if (auto cached_quantizer = cache->GetIndex(key)) { + device_id = gpu; + quantizer = std::static_pointer_cast(cached_quantizer)->Data(); + } + } + + if (device_id != NOT_FOUND) { + // cache hit + auto config = std::make_shared(); + config->gpu_id = device_id; + config->mode = 2; + auto new_index = index_->LoadData(quantizer, config); + index_ = new_index; + } + } + + if (device_id == NOT_FOUND) { + // cache miss + std::vector all_free_mem; + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); + all_free_mem.push_back(free_mem); + } + + auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); + auto best_index = std::distance(all_free_mem.begin(), max_e); + device_id = gpus[best_index]; + + auto pair = index_->CopyToGpuWithQuantizer(device_id); + index_ = pair.first; + + // cache + auto cached_quantizer = std::make_shared(pair.second); + cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); + } +#endif return Status::OK(); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h index f3fceebb88..359af97d90 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h @@ -38,7 +38,7 @@ class FaissBaseIndex { virtual void SealImpl(); - protected: + public: std::shared_ptr index_ = nullptr; }; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index a5e8f90f34..65938e1630 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include + #include #include #include -#include +#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -130,13 +130,12 @@ void GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { std::lock_guard lk(mutex_); - // TODO(linxj): gpu index support GenParams if (auto device_index = std::dynamic_pointer_cast(index_)) { auto search_cfg = std::dynamic_pointer_cast(cfg); - device_index->setNumProbes(search_cfg->nprobe); + device_index->nprobe = search_cfg->nprobe; +// assert(device_index->getNumProbes() == search_cfg->nprobe); { - // TODO(linxj): allocate gpu mem ResScope rs(res_, gpu_id_); device_index->search(n, (float*)data, k, distances, labels); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp index 213141b3ac..9ba8dd0456 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp @@ -16,8 +16,10 @@ // under the License. #include -#include #include +#include +#include + #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp index 5e1f5226f2..fff27cd7db 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include +#include +#include + #include -#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -71,13 +72,4 @@ GPUIVFSQ::CopyGpuToCpu(const Config& config) { return std::make_shared(new_index); } -void -GPUIVFSQ::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { -#ifdef CUSTOMIZATION - GPUIVF::search_impl(n, data, k, distances, labels, cfg); -#else - IVF::search_impl(n, data, k, distances, labels, cfg); -#endif -} - } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h index 7332bce691..ed8013d77f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h @@ -38,10 +38,6 @@ class GPUIVFSQ : public GPUIVF { VectorIndexPtr CopyGpuToCpu(const Config& config) override; - - protected: - void - search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override; }; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 2371591b5c..643bb16076 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include #include #include -#include #include +#include +#include + #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 0c4856f2b6..02708ff5d7 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -15,15 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include #include #include #include #include -#include -#include -#include #include #include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index ef9982fa30..e064b6f08c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -30,7 +30,7 @@ namespace knowhere { using Graph = std::vector>; -class IVF : public VectorIndex, protected FaissBaseIndex { +class IVF : public VectorIndex, public FaissBaseIndex { public: IVF() : FaissBaseIndex(nullptr) { } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index 063dc63550..80b4c78883 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include +#include +#include #include #include "knowhere/adapter/VectorAdapter.h" @@ -56,14 +57,7 @@ IVFSQ::CopyCpuToGpu(const int64_t& device_id, const Config& config) { if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); -#ifdef CUSTOMIZATION - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get(), &option); -#else auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get()); -#endif std::shared_ptr device_index; device_index.reset(gpu_index); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index fe5bf0990a..af67722266 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -17,19 +17,25 @@ // under the License. #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" -#include -#include "faiss/AutoTune.h" -#include "faiss/gpu/GpuAutoTune.h" -#include "faiss/gpu/GpuIndexIVF.h" #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" +#include + +#include +#include +#include + namespace knowhere { #ifdef CUSTOMIZATION +//std::mutex g_mutex; + IndexModelPtr IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { +// std::lock_guard lk(g_mutex); + auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { build_cfg->CheckValid(); // throw exception @@ -63,23 +69,25 @@ IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { VectorIndexPtr IVFSQHybrid::CopyGpuToCpu(const Config& config) { + if (gpu_mode == 0) { + return std::make_shared(index_); + } std::lock_guard lk(mutex_); - if (auto device_idx = std::dynamic_pointer_cast(index_)) { faiss::Index* device_index = index_.get(); faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); std::shared_ptr new_index; new_index.reset(host_index); return std::make_shared(new_index); - } else { - // TODO(linxj): why? jinhai - return std::make_shared(index_); - } } VectorIndexPtr IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { + if (gpu_mode != 0) { + KNOWHERE_THROW_MSG("Not a GpuIndex Type"); + } + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; @@ -105,16 +113,26 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { FaissBaseIndex::LoadImpl(index_binary); // load on cpu auto* ivf_index = dynamic_cast(index_.get()); ivf_index->backup_quantizer(); + gpu_mode = 0; } void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { +// std::lock_guard lk(g_mutex); +// static int64_t search_count; +// ++search_count; + if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); - } else if (gpu_mode == 1) { - ResScope rs(res_, gpu_id_); - IVF::search_impl(n, data, k, distances, labels, cfg); +// index_->search(n, (float*)data, k, distances, labels); + } else if (gpu_mode == 1) { // hybrid + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(quantizer_gpu_id_)) { + ResScope rs(res, quantizer_gpu_id_, true); + IVF::search_impl(n, data, k, distances, labels, cfg); + } else { + KNOWHERE_THROW_MSG("Hybrid Search Error, can't get gpu: " + std::to_string(quantizer_gpu_id_) + "resource"); + } } else if (gpu_mode == 0) { IVF::search_impl(n, data, k, distances, labels, cfg); } @@ -122,16 +140,18 @@ IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distanc QuantizerPtr IVFSQHybrid::LoadQuantizer(const Config& conf) { +// std::lock_guard lk(g_mutex); + auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { if (quantizer_conf->mode != 1) { KNOWHERE_THROW_MSG("mode only support 1 in this func"); } } - gpu_id_ = quantizer_conf->gpu_id; + auto gpu_id = quantizer_conf->gpu_id; - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { - ResScope rs(res, gpu_id_, false); + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id)) { + ResScope rs(res, gpu_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; @@ -148,16 +168,19 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { auto& q_ptr = index_composition->quantizer; q->size = q_ptr->d * q_ptr->getNumVecs() * sizeof(float); q->quantizer = q_ptr; + q->gpu_id = gpu_id; res_ = res; gpu_mode = 1; return q; } else { - KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); + KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id) + "resource"); } } void IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { +// std::lock_guard lk(g_mutex); + auto ivf_quantizer = std::dynamic_pointer_cast(q); if (ivf_quantizer == nullptr) { KNOWHERE_THROW_MSG("Quantizer type error"); @@ -170,20 +193,27 @@ IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { // delete ivf_index->quantizer; ivf_index->quantizer = ivf_quantizer->quantizer; } + quantizer_gpu_id_ = ivf_quantizer->gpu_id; + gpu_mode = 1; } void IVFSQHybrid::UnsetQuantizer() { +// std::lock_guard lk(g_mutex); + auto* ivf_index = dynamic_cast(index_.get()); if (ivf_index == nullptr) { KNOWHERE_THROW_MSG("Index type error"); } ivf_index->quantizer = nullptr; + quantizer_gpu_id_ = -1; } VectorIndexPtr IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { +// std::lock_guard lk(g_mutex); + auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { if (quantizer_conf->mode != 2) { @@ -192,13 +222,11 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { } else { KNOWHERE_THROW_MSG("conf error"); } - // if (quantizer_conf->gpu_id != gpu_id_) { - // KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card"); - // } - gpu_id_ = quantizer_conf->gpu_id; - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { - ResScope rs(res, gpu_id_, false); + auto gpu_id = quantizer_conf->gpu_id; + + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id)) { + ResScope rs(res, gpu_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; @@ -211,18 +239,20 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { index_composition->quantizer = ivf_quantizer->quantizer; index_composition->mode = quantizer_conf->mode; // only 2 - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option); + auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id, index_composition, &option); std::shared_ptr new_idx; new_idx.reset(gpu_index); - auto sq_idx = std::make_shared(new_idx, gpu_id_, res); + auto sq_idx = std::make_shared(new_idx, gpu_id, res); return sq_idx; } else { - KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); + KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id) + "resource"); } } std::pair IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) { +// std::lock_guard lk(g_mutex); + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; @@ -242,12 +272,29 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c auto q = std::make_shared(); q->quantizer = index_composition.quantizer; q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float); + q->gpu_id = device_id; return std::make_pair(new_idx, q); } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } +void +IVFSQHybrid::set_index_model(IndexModelPtr model) { + std::lock_guard lk(mutex_); + + auto host_index = std::static_pointer_cast(model); + if (auto gpures = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { + ResScope rs(gpures, gpu_id_, false); + auto device_index = faiss::gpu::index_cpu_to_gpu(gpures->faiss_res.get(), gpu_id_, host_index->index_.get()); + index_.reset(device_index); + res_ = gpures; + gpu_mode = 2; + } else { + KNOWHERE_THROW_MSG("load index model error, can't get gpu_resource"); + } +} + FaissIVFQuantizer::~FaissIVFQuantizer() { if (quantizer != nullptr) { delete quantizer; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h index f54c61c20f..87cc22931f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h @@ -18,6 +18,8 @@ #pragma once #include +#include + #include #include @@ -29,6 +31,7 @@ namespace knowhere { #ifdef CUSTOMIZATION struct FaissIVFQuantizer : public Quantizer { faiss::gpu::GpuIndexFlat* quantizer = nullptr; + int64_t gpu_id; ~FaissIVFQuantizer() override; }; @@ -52,6 +55,9 @@ class IVFSQHybrid : public GPUIVFSQ { } public: + void + set_index_model(IndexModelPtr model) override; + QuantizerPtr LoadQuantizer(const Config& conf); @@ -85,6 +91,7 @@ class IVFSQHybrid : public GPUIVFSQ { protected: int64_t gpu_mode = 0; // 0,1,2 + int64_t quantizer_gpu_id_ = -1; }; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h index 810c4d2ea4..6509458b7b 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h @@ -48,6 +48,7 @@ class VectorIndex : public Index { virtual void Seal() = 0; + // TODO(linxj): Deprecated virtual VectorIndexPtr Clone() = 0; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h index 7cce5bbbac..a7f8f349e1 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h @@ -17,7 +17,7 @@ #pragma once -#include +#include namespace knowhere { diff --git a/core/src/index/unittest/Helper.h b/core/src/index/unittest/Helper.h index d11a484c03..8d4bb0f4ae 100644 --- a/core/src/index/unittest/Helper.h +++ b/core/src/index/unittest/Helper.h @@ -26,7 +26,7 @@ #include "knowhere/index/vector_index/IndexIVFSQ.h" #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" -constexpr int DEVICEID = 0; +int DEVICEID = 0; constexpr int64_t DIM = 128; constexpr int64_t NB = 10000; constexpr int64_t NQ = 10; diff --git a/core/src/index/unittest/test_customized_index.cpp b/core/src/index/unittest/test_customized_index.cpp index 1e0b1d932d..f9b48b8b67 100644 --- a/core/src/index/unittest/test_customized_index.cpp +++ b/core/src/index/unittest/test_customized_index.cpp @@ -16,17 +16,23 @@ // under the License. #include +#include #include "unittest/Helper.h" #include "unittest/utils.h" +#include "knowhere/common/Timer.h" + class SingleIndexTest : public DataGen, public TestGpuIndexBase { protected: void SetUp() override { TestGpuIndexBase::SetUp(); - Generate(DIM, NB, NQ); - k = K; + nb = 1000000; + nq = 1000; + dim = DIM; + Generate(dim, nb, nq); + k = 1000; } void @@ -119,4 +125,114 @@ TEST_F(SingleIndexTest, IVFSQHybrid) { } } +//TEST_F(SingleIndexTest, thread_safe) { +// assert(!xb.empty()); +// +// index_type = "IVFSQHybrid"; +// index_ = IndexFactory(index_type); +// auto base = ParamGenerator::GetInstance().Gen(ParameterType::ivfsq); +// auto conf = std::dynamic_pointer_cast(base); +// conf->nlist = 16384; +// conf->k = k; +// conf->nprobe = 10; +// conf->d = dim; +// auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); +// index_->set_preprocessor(preprocessor); +// +// auto model = index_->Train(base_dataset, conf); +// index_->set_index_model(model); +// index_->Add(base_dataset, conf); +// EXPECT_EQ(index_->Count(), nb); +// EXPECT_EQ(index_->Dimension(), dim); +// +// auto binaryset = index_->Serialize(); +// +// +// +// auto cpu_idx = std::make_shared(DEVICEID); +// cpu_idx->Load(binaryset); +// auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICEID, conf); +// auto quantizer = pair.second; +// +// auto quantizer_conf = std::make_shared(); +// quantizer_conf->mode = 2; // only copy data +// quantizer_conf->gpu_id = DEVICEID; +// +// auto CopyAllToGpu = [&](int64_t search_count, bool do_search = false) { +// for (int i = 0; i < search_count; ++i) { +// auto gpu_idx = cpu_idx->CopyCpuToGpu(DEVICEID, conf); +// if (do_search) { +// auto result = gpu_idx->Search(query_dataset, conf); +// AssertAnns(result, nq, conf->k); +// } +// } +// }; +// +// auto hybrid_qt_idx = std::make_shared(DEVICEID); +// hybrid_qt_idx->Load(binaryset); +// auto SetQuantizerDoSearch = [&](int64_t search_count) { +// for (int i = 0; i < search_count; ++i) { +// hybrid_qt_idx->SetQuantizer(quantizer); +// auto result = hybrid_qt_idx->Search(query_dataset, conf); +// AssertAnns(result, nq, conf->k); +// // PrintResult(result, nq, k); +// hybrid_qt_idx->UnsetQuantizer(); +// } +// }; +// +// auto hybrid_data_idx = std::make_shared(DEVICEID); +// hybrid_data_idx->Load(binaryset); +// auto LoadDataDoSearch = [&](int64_t search_count, bool do_search = false) { +// for (int i = 0; i < search_count; ++i) { +// auto hybrid_idx = hybrid_data_idx->LoadData(quantizer, quantizer_conf); +// if (do_search) { +// auto result = hybrid_idx->Search(query_dataset, conf); +//// AssertAnns(result, nq, conf->k); +// } +// } +// }; +// +// knowhere::TimeRecorder tc(""); +// CopyAllToGpu(2000/2, false); +// tc.RecordSection("CopyAllToGpu witout search"); +// CopyAllToGpu(400/2, true); +// tc.RecordSection("CopyAllToGpu with search"); +// SetQuantizerDoSearch(6); +// tc.RecordSection("SetQuantizer with search"); +// LoadDataDoSearch(2000/2, false); +// tc.RecordSection("LoadData without search"); +// LoadDataDoSearch(400/2, true); +// tc.RecordSection("LoadData with search"); +// +// { +// std::thread t1(CopyAllToGpu, 2000, false); +// std::thread t2(CopyAllToGpu, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(SetQuantizerDoSearch, 12); +// std::thread t2(CopyAllToGpu, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(SetQuantizerDoSearch, 12); +// std::thread t2(LoadDataDoSearch, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(LoadDataDoSearch, 2000, false); +// std::thread t2(LoadDataDoSearch, 400, true); +// t1.join(); +// t2.join(); +// } +// +//} + + #endif diff --git a/core/src/index/unittest/test_ivf.cpp b/core/src/index/unittest/test_ivf.cpp index fae27b0dd3..3fd3e16d0e 100644 --- a/core/src/index/unittest/test_ivf.cpp +++ b/core/src/index/unittest/test_ivf.cpp @@ -20,19 +20,12 @@ #include #include -#include -#include #include #include "knowhere/common/Exception.h" #include "knowhere/common/Timer.h" #include "knowhere/index/vector_index/IndexGPUIVF.h" -#include "knowhere/index/vector_index/IndexGPUIVFPQ.h" -#include "knowhere/index/vector_index/IndexGPUIVFSQ.h" #include "knowhere/index/vector_index/IndexIVF.h" -#include "knowhere/index/vector_index/IndexIVFPQ.h" -#include "knowhere/index/vector_index/IndexIVFSQ.h" -#include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "knowhere/index/vector_index/helpers/Cloner.h" #include "unittest/Helper.h" @@ -51,6 +44,9 @@ class IVFTest : public DataGen, public TestWithParam<::std::tuple gpu_idx{"GPUIVFSQ"}; - auto finder = std::find(gpu_idx.cbegin(), gpu_idx.cend(), index_type); - if (finder != gpu_idx.cend()) { - return knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, knowhere::Config()); - } - return index_; - } - protected: std::string index_type; knowhere::Config conf; @@ -100,8 +86,7 @@ TEST_P(IVFTest, ivf_basic) { EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); // PrintResult(result, nq, k); } @@ -134,8 +119,7 @@ TEST_P(IVFTest, ivf_serialize) { index_->set_index_model(model); index_->Add(base_dataset, conf); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); } @@ -159,8 +143,7 @@ TEST_P(IVFTest, ivf_serialize) { index_->Load(binaryset); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); } } @@ -176,8 +159,7 @@ TEST_P(IVFTest, clone_test) { index_->Add(base_dataset, conf); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); // PrintResult(result, nq, k); @@ -210,12 +192,6 @@ TEST_P(IVFTest, clone_test) { // } // } - { - if (index_type == "IVFSQHybrid") { - return; - } - } - { // copy from gpu to cpu std::vector support_idx_vec{"GPUIVF", "GPUIVFSQ", "IVFSQHybrid"}; @@ -277,8 +253,7 @@ TEST_P(IVFTest, gpu_seal_test) { index_->Add(base_dataset, conf); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config()); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 60033731ae..b9153d3bc3 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -94,6 +94,7 @@ class OptimizerInst { std::lock_guard lock(mutex_); if (instance == nullptr) { std::vector pass_list; + pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); } diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index 8368a90000..0d5a81a7b6 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -26,48 +26,48 @@ namespace milvus { namespace scheduler { -// bool -// LargeSQ8HPass::Run(const TaskPtr& task) { -// if (task->Type() != TaskType::SearchTask) { -// return false; -// } -// -// auto search_task = std::static_pointer_cast(task); -// if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8H) { -// return false; -// } -// -// auto search_job = std::static_pointer_cast(search_task->job_.lock()); -// -// // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu -// if (search_job->nq() < 100) { -// return false; -// } -// -// std::vector gpus = scheduler::get_gpu_pool(); -// std::vector all_free_mem; -// for (auto& gpu : gpus) { -// auto cache = cache::GpuCacheMgr::GetInstance(gpu); -// auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); -// all_free_mem.push_back(free_mem); -// } -// -// auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); -// auto best_index = std::distance(all_free_mem.begin(), max_e); -// auto best_device_id = gpus[best_index]; -// -// ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); -// if (not res_ptr) { -// SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; -// // TODO: throw critical error and exit -// return false; -// } -// -// auto label = std::make_shared(std::weak_ptr(res_ptr)); -// task->label() = label; -// -// return true; -// } + bool + LargeSQ8HPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8H) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + + // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu + if (search_job->nq() < 100) { + return false; + } + + std::vector gpus = scheduler::get_gpu_pool(); + std::vector all_free_mem; + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); + all_free_mem.push_back(free_mem); + } + + auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); + auto best_index = std::distance(all_free_mem.begin(), max_e); + auto best_device_id = gpus[best_index]; + + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + if (not res_ptr) { + SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; + // TODO: throw critical error and exit + return false; + } + + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + + return true; + } } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/LargeSQ8HPass.h index 3335a37cc7..49e658002f 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.h @@ -37,8 +37,8 @@ class LargeSQ8HPass : public Pass { LargeSQ8HPass() = default; public: - // bool - // Run(const TaskPtr& task) override; + bool + Run(const TaskPtr& task) override; }; using LargeSQ8HPassPtr = std::shared_ptr; diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index a5b892ad47..34c8d38faf 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include +#include #include #include #include diff --git a/core/src/wrapper/VecIndex.h b/core/src/wrapper/VecIndex.h index 05da9ccc03..1729d583ae 100644 --- a/core/src/wrapper/VecIndex.h +++ b/core/src/wrapper/VecIndex.h @@ -70,6 +70,7 @@ class VecIndex : public cache::DataObj { virtual VecIndexPtr CopyToCpu(const Config& cfg = Config()) = 0; + // TODO(linxj): Deprecated virtual VecIndexPtr Clone() = 0; diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index f112fc7e65..2f8fd6fafe 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -74,7 +74,7 @@ INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_CPU, "Default", DIM, NB, 10, 10), -// std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), + std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_MIX, "Default", DIM, NB, 10, 10), // std::make_tuple(IndexType::NSG_MIX, "Default", 128, 250000, 10, 10), // std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 128, 250000, 10, 10), From f23e8e2143caac8c5fbb4b1332f0124d159919ac Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sat, 19 Oct 2019 17:51:59 +0800 Subject: [PATCH 012/149] clang-format... Former-commit-id: 3bd5e246b34d8ca7800dbc6ce0e784dfa26ce18a --- core/src/db/engine/ExecutionEngineImpl.cpp | 6 ++-- .../index/vector_index/IndexGPUIVF.cpp | 4 +-- .../index/vector_index/IndexGPUIVFPQ.cpp | 2 +- .../index/vector_index/IndexGPUIVFSQ.cpp | 2 +- .../index/vector_index/IndexIDMAP.cpp | 4 +-- .../knowhere/index/vector_index/IndexIVF.cpp | 2 +- .../index/vector_index/IndexIVFSQ.cpp | 2 +- .../index/vector_index/IndexIVFSQHybrid.cpp | 36 +++++++++---------- .../index/vector_index/IndexIVFSQHybrid.h | 2 +- .../index/unittest/test_customized_index.cpp | 3 +- core/src/index/unittest/test_ivf.cpp | 6 ++-- .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 6 ++-- 12 files changed, 37 insertions(+), 38 deletions(-) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 9c411deba1..aeb17203ef 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -18,13 +18,13 @@ #include "db/engine/ExecutionEngineImpl.h" #include "cache/CpuCacheMgr.h" #include "cache/GpuCacheMgr.h" +#include "knowhere/common/Config.h" #include "metrics/Metrics.h" +#include "scheduler/Utils.h" +#include "server/Config.h" #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" -#include "knowhere/common/Config.h" -#include "scheduler/Utils.h" -#include "server/Config.h" #include "wrapper/ConfAdapter.h" #include "wrapper/ConfAdapterMgr.h" #include "wrapper/VecImpl.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index 65938e1630..a26f947181 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -17,10 +17,10 @@ #include +#include #include #include #include -#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -133,7 +133,7 @@ GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, i if (auto device_index = std::dynamic_pointer_cast(index_)) { auto search_cfg = std::dynamic_pointer_cast(cfg); device_index->nprobe = search_cfg->nprobe; -// assert(device_index->getNumProbes() == search_cfg->nprobe); + // assert(device_index->getNumProbes() == search_cfg->nprobe); { ResScope rs(res_, gpu_id_); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp index 9ba8dd0456..b027539c37 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp @@ -16,9 +16,9 @@ // under the License. #include +#include #include #include -#include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp index fff27cd7db..941f9adc48 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include #include +#include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 643bb16076..f926951736 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include #include +#include +#include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 02708ff5d7..73f5c4164b 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include #include #include #include #include +#include #include #include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index 80b4c78883..6e9a1d94da 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include #include +#include #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index af67722266..7b229db21e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -22,19 +22,19 @@ #include +#include #include #include -#include namespace knowhere { #ifdef CUSTOMIZATION -//std::mutex g_mutex; +// std::mutex g_mutex; IndexModelPtr IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { @@ -74,12 +74,12 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) { } std::lock_guard lk(mutex_); - faiss::Index* device_index = index_.get(); - faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::Index* device_index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); - std::shared_ptr new_index; - new_index.reset(host_index); - return std::make_shared(new_index); + std::shared_ptr new_index; + new_index.reset(host_index); + return std::make_shared(new_index); } VectorIndexPtr @@ -119,14 +119,14 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { -// std::lock_guard lk(g_mutex); -// static int64_t search_count; -// ++search_count; + // std::lock_guard lk(g_mutex); + // static int64_t search_count; + // ++search_count; if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); -// index_->search(n, (float*)data, k, distances, labels); - } else if (gpu_mode == 1) { // hybrid + // index_->search(n, (float*)data, k, distances, labels); + } else if (gpu_mode == 1) { // hybrid if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(quantizer_gpu_id_)) { ResScope rs(res, quantizer_gpu_id_, true); IVF::search_impl(n, data, k, distances, labels, cfg); @@ -140,7 +140,7 @@ IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distanc QuantizerPtr IVFSQHybrid::LoadQuantizer(const Config& conf) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { @@ -179,7 +179,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { void IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto ivf_quantizer = std::dynamic_pointer_cast(q); if (ivf_quantizer == nullptr) { @@ -199,7 +199,7 @@ IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { void IVFSQHybrid::UnsetQuantizer() { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto* ivf_index = dynamic_cast(index_.get()); if (ivf_index == nullptr) { @@ -212,7 +212,7 @@ IVFSQHybrid::UnsetQuantizer() { VectorIndexPtr IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { @@ -251,7 +251,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { std::pair IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h index 87cc22931f..d2a3be6c39 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include #include diff --git a/core/src/index/unittest/test_customized_index.cpp b/core/src/index/unittest/test_customized_index.cpp index f9b48b8b67..346e8e3d93 100644 --- a/core/src/index/unittest/test_customized_index.cpp +++ b/core/src/index/unittest/test_customized_index.cpp @@ -125,7 +125,7 @@ TEST_F(SingleIndexTest, IVFSQHybrid) { } } -//TEST_F(SingleIndexTest, thread_safe) { +// TEST_F(SingleIndexTest, thread_safe) { // assert(!xb.empty()); // // index_type = "IVFSQHybrid"; @@ -234,5 +234,4 @@ TEST_F(SingleIndexTest, IVFSQHybrid) { // //} - #endif diff --git a/core/src/index/unittest/test_ivf.cpp b/core/src/index/unittest/test_ivf.cpp index 3fd3e16d0e..20addc82bb 100644 --- a/core/src/index/unittest/test_ivf.cpp +++ b/core/src/index/unittest/test_ivf.cpp @@ -44,9 +44,9 @@ class IVFTest : public DataGen, public TestWithParam<::std::tupleType() != TaskType::SearchTask) { return false; } @@ -67,7 +67,7 @@ namespace scheduler { task->label() = label; return true; - } +} } // namespace scheduler } // namespace milvus From 6a005b9f714a71284a6fcfb5018948b4b34cab5a Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sat, 19 Oct 2019 20:33:55 +0800 Subject: [PATCH 013/149] update ci files Former-commit-id: 1dc3c1153a1e3f2d21bf7981c4349abc29b305ee --- ci/jenkinsfile/milvus_build.groovy | 2 +- ci/jenkinsfile/milvus_build_no_ut.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkinsfile/milvus_build.groovy b/ci/jenkinsfile/milvus_build.groovy index 11f8c51d33..e7341988b2 100644 --- a/ci/jenkinsfile/milvus_build.groovy +++ b/ci/jenkinsfile/milvus_build.groovy @@ -14,7 +14,7 @@ container('milvus-build-env') { sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' \ && export JFROG_USER_NAME='${USERNAME}' \ && export JFROG_PASSWORD='${PASSWORD}' \ - && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.2.1/faiss-branch-0.2.1.tar.gz' \ + && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.3.0/faiss-branch-0.3.0.tar.gz' \ && ./build.sh -t ${params.BUILD_TYPE} -d /opt/milvus -j -u -c" sh "./coverage.sh -u root -p 123456 -t 192.168.1.194" diff --git a/ci/jenkinsfile/milvus_build_no_ut.groovy b/ci/jenkinsfile/milvus_build_no_ut.groovy index 1dd3361106..3f221b8947 100644 --- a/ci/jenkinsfile/milvus_build_no_ut.groovy +++ b/ci/jenkinsfile/milvus_build_no_ut.groovy @@ -14,7 +14,7 @@ container('milvus-build-env') { sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' \ && export JFROG_USER_NAME='${USERNAME}' \ && export JFROG_PASSWORD='${PASSWORD}' \ - && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.2.1/faiss-branch-0.2.1.tar.gz' \ + && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.3.0/faiss-branch-0.3.0.tar.gz' \ && ./build.sh -t ${params.BUILD_TYPE} -j -d /opt/milvus" } } From cdb28ceaaea5c5af642f2d1c9403566bfafd0acf Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sat, 19 Oct 2019 21:07:17 +0800 Subject: [PATCH 014/149] Update ci files Former-commit-id: 7972762f8bf8607d8f77e07a3226f08a24981308 --- core/src/index/thirdparty/versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/thirdparty/versions.txt b/core/src/index/thirdparty/versions.txt index 9ee845f1e3..a2b16414c2 100644 --- a/core/src/index/thirdparty/versions.txt +++ b/core/src/index/thirdparty/versions.txt @@ -3,4 +3,4 @@ BOOST_VERSION=1.70.0 GTEST_VERSION=1.8.1 LAPACK_VERSION=v3.8.0 OPENBLAS_VERSION=v0.3.6 -FAISS_VERSION=branch-0.2.1 \ No newline at end of file +FAISS_VERSION=branch-0.3.0 \ No newline at end of file From d9138e19601b36aeed78c385179d4530a83b4f3d Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 22 Oct 2019 18:40:42 +0800 Subject: [PATCH 015/149] #80 Print version information into log during server start Former-commit-id: 874acccc6856b18b9f5992a2ee7bf14382f0f3c6 --- CHANGELOG.md | 1 + core/src/server/Server.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b897c608d..2d94687930 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement - \#64 - Improvement dump function in scheduler +- \#80 - Print version information into log during server start ## Feature ## Task diff --git a/core/src/server/Server.cpp b/core/src/server/Server.cpp index 465ed62ddf..4933af72b7 100644 --- a/core/src/server/Server.cpp +++ b/core/src/server/Server.cpp @@ -19,6 +19,7 @@ #include #include +#include "../../version.h" #include "metrics/Metrics.h" #include "scheduler/SchedInst.h" #include "server/Config.h" @@ -180,6 +181,9 @@ Server::Start() { InitLog(log_config_file_); + // print version information + SERVER_LOG_INFO << "Milvus " << BUILD_TYPE << " version: v" << MILVUS_VERSION << ", built at " << BUILD_TIME; + server::Metrics::GetInstance().Init(); server::SystemInfo::GetInstance().Init(); From 4f3fa67be30ca1420e515f4fa5d3ac6476c85158 Mon Sep 17 00:00:00 2001 From: wxyu Date: Tue, 22 Oct 2019 19:11:17 +0800 Subject: [PATCH 016/149] Move easyloggingpp into external directory Former-commit-id: f2392522699d094720b92e5ee281973e3835bb18 --- CHANGELOG.md | 1 + ci/jenkins/scripts/coverage.sh | 4 ++-- core/coverage.sh | 4 ++-- core/src/CMakeLists.txt | 8 ++++++++ .../{utils => external/easyloggingpp}/easylogging++.cc | 0 .../{utils => external/easyloggingpp}/easylogging++.h | 0 core/src/index/knowhere/knowhere/common/Log.h | 2 +- core/src/index/unittest/CMakeLists.txt | 2 +- core/src/main.cpp | 2 +- core/src/utils/Log.h | 2 +- core/src/utils/LogUtil.h | 2 +- core/unittest/CMakeLists.txt | 10 +++++++++- core/unittest/main.cpp | 2 +- core/unittest/wrapper/CMakeLists.txt | 2 +- core/unittest/wrapper/test_wrapper.cpp | 2 +- 15 files changed, 30 insertions(+), 13 deletions(-) rename core/src/{utils => external/easyloggingpp}/easylogging++.cc (100%) rename core/src/{utils => external/easyloggingpp}/easylogging++.h (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b897c608d..6ce6403028 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement - \#64 - Improvement dump function in scheduler +- \#82 - Move easyloggingpp into "external" directory ## Feature ## Task diff --git a/ci/jenkins/scripts/coverage.sh b/ci/jenkins/scripts/coverage.sh index ecbb2dfbe9..dca8c63982 100755 --- a/ci/jenkins/scripts/coverage.sh +++ b/ci/jenkins/scripts/coverage.sh @@ -131,8 +131,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/utils/easylogging++.h" \ - "*/src/utils/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.h" \ + "*/src/external/easyloggingpp/easylogging++.cc" # gen html report # ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ diff --git a/core/coverage.sh b/core/coverage.sh index 74f9f4219d..6db1e18d39 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -121,8 +121,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/utils/easylogging++.h" \ - "*/src/utils/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.h" \ + "*/src/external/easyloggingpp/easylogging++.cc" # gen html report ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index b0228bd090..d086955078 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -64,6 +64,13 @@ set(scheduler_files ${scheduler_task_files} ) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/easyloggingpp external_easyloggingpp_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/nlohmann external_nlohmann_files) +set(external_files + ${external_easyloggingpp_files} + ${external_nlohmann_files} + ) + aux_source_directory(${MILVUS_ENGINE_SRC}/server server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/server/grpc_impl grpc_server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/utils utils_files) @@ -77,6 +84,7 @@ set(engine_files ${db_insert_files} ${db_meta_files} ${metrics_files} + ${external_files} ${utils_files} ${wrapper_files} ) diff --git a/core/src/utils/easylogging++.cc b/core/src/external/easyloggingpp/easylogging++.cc similarity index 100% rename from core/src/utils/easylogging++.cc rename to core/src/external/easyloggingpp/easylogging++.cc diff --git a/core/src/utils/easylogging++.h b/core/src/external/easyloggingpp/easylogging++.h similarity index 100% rename from core/src/utils/easylogging++.h rename to core/src/external/easyloggingpp/easylogging++.h diff --git a/core/src/index/knowhere/knowhere/common/Log.h b/core/src/index/knowhere/knowhere/common/Log.h index 222d03d73e..369e7143af 100644 --- a/core/src/index/knowhere/knowhere/common/Log.h +++ b/core/src/index/knowhere/knowhere/common/Log.h @@ -17,7 +17,7 @@ #pragma once -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" namespace knowhere { diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index 8a5e089486..f840b28e28 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -20,7 +20,7 @@ set(basic_libs ) set(util_srcs - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp diff --git a/core/src/main.cpp b/core/src/main.cpp index d1c9ba6dfd..d60f26d702 100644 --- a/core/src/main.cpp +++ b/core/src/main.cpp @@ -23,11 +23,11 @@ #include #include "../version.h" +#include "external/easyloggingpp/easylogging++.h" #include "metrics/Metrics.h" #include "server/Server.h" #include "utils/CommonUtil.h" #include "utils/SignalUtil.h" -#include "utils/easylogging++.h" INITIALIZE_EASYLOGGINGPP diff --git a/core/src/utils/Log.h b/core/src/utils/Log.h index 1dd116367a..4aa3146b01 100644 --- a/core/src/utils/Log.h +++ b/core/src/utils/Log.h @@ -17,7 +17,7 @@ #pragma once -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" namespace milvus { diff --git a/core/src/utils/LogUtil.h b/core/src/utils/LogUtil.h index 9926939442..7e5afd087a 100644 --- a/core/src/utils/LogUtil.h +++ b/core/src/utils/LogUtil.h @@ -17,8 +17,8 @@ #pragma once +#include "external/easyloggingpp/easylogging++.h" #include "utils/Status.h" -#include "utils/easylogging++.h" #include #include diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index 258fd76a8e..aae7fb8d7f 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -60,6 +60,13 @@ set(scheduler_files ${scheduler_optimizer_files} ) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/easyloggingpp external_easyloggingpp_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/nlohmann external_nlohmann_files) +set(external_files + ${external_easyloggingpp_files} + ${external_nlohmann_files} + ) + aux_source_directory(${MILVUS_ENGINE_SRC}/server server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/server/grpc_impl grpc_server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/utils utils_files) @@ -74,7 +81,7 @@ set(helper_files ${MILVUS_ENGINE_SRC}/utils/TimeRecorder.cpp ${MILVUS_ENGINE_SRC}/utils/Status.cpp ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ) set(common_files @@ -85,6 +92,7 @@ set(common_files ${db_insert_files} ${db_meta_files} ${metrics_files} + ${external_files} ${scheduler_files} ${wrapper_files} ${helper_files} diff --git a/core/unittest/main.cpp b/core/unittest/main.cpp index d17cf9da58..2cd0624f7b 100644 --- a/core/unittest/main.cpp +++ b/core/unittest/main.cpp @@ -18,7 +18,7 @@ #include #include -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" INITIALIZE_EASYLOGGINGPP diff --git a/core/unittest/wrapper/CMakeLists.txt b/core/unittest/wrapper/CMakeLists.txt index 156d89b241..a8015f8d34 100644 --- a/core/unittest/wrapper/CMakeLists.txt +++ b/core/unittest/wrapper/CMakeLists.txt @@ -26,7 +26,7 @@ set(wrapper_files set(util_files utils.cpp - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ${MILVUS_ENGINE_SRC}/utils/Status.cpp ) diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index f112fc7e65..1ec98ccb5d 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" #include "wrapper/VecIndex.h" #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" From 9f8bcb2698014b55cb038c540560b587f43f1098 Mon Sep 17 00:00:00 2001 From: wxyu Date: Tue, 22 Oct 2019 19:56:39 +0800 Subject: [PATCH 017/149] update lint exclusions Former-commit-id: 84ebefc29597fd228c4efb9f27e485b56ae9e413 --- core/build-support/lint_exclusions.txt | 1 + core/coverage.sh | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/core/build-support/lint_exclusions.txt b/core/build-support/lint_exclusions.txt index 226db75a43..2be060f121 100644 --- a/core/build-support/lint_exclusions.txt +++ b/core/build-support/lint_exclusions.txt @@ -6,4 +6,5 @@ *easylogging++* *SqliteMetaImpl.cpp *src/grpc* +*src/external* *milvus/include* \ No newline at end of file diff --git a/core/coverage.sh b/core/coverage.sh index 6db1e18d39..5792af5ec2 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -122,7 +122,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ "*/src/external/easyloggingpp/easylogging++.h" \ - "*/src/external/easyloggingpp/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.cc" \ + "*/src/external/*" # gen html report ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ From 13d7b1971a7e2b12f88491f7a1b96fd188d374f4 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 09:53:33 +0800 Subject: [PATCH 018/149] Update test nq size Former-commit-id: 933de17077aa4027a827f01f30278cab6e8b8434 --- tests/milvus_python_test/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 435a547855..9e9f0830ac 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -11,7 +11,7 @@ import numpy from milvus import Milvus, IndexType, MetricType from utils import * -nb = 100000 +nb = 10000 dim = 128 index_file_size = 10 vectors = gen_vectors(nb, dim) From dead8ee68867935e2eb8f7357ad7a6e0911143e3 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 14:33:01 +0800 Subject: [PATCH 019/149] add exit code in coverage.sh Former-commit-id: 27a834232561cbda1360cd245f73d89db5d48750 --- core/coverage.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/coverage.sh b/core/coverage.sh index 74f9f4219d..b3c2e96eed 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -99,6 +99,7 @@ for test in `ls ${DIR_UNITTEST}`; do if [ $? -ne 0 ]; then echo ${args} echo ${DIR_UNITTEST}/${test} "run failed" + exit -1 fi done @@ -123,6 +124,10 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ "*/src/utils/easylogging++.h" \ "*/src/utils/easylogging++.cc" +if [ $? -ne 0 ]; then + echo "generate ${FILE_INFO_OUTPUT_NEW} failed" + exit -2 +fi # gen html report ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ From 07cc917c5612e18173c6d6dce7de34054394e2d2 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 14:52:12 +0800 Subject: [PATCH 020/149] update coverage timeout && add coverage exit code Former-commit-id: 0c623d2348e73dc0c1df24f96098c4c5568e73ba --- ci/jenkins/jenkinsfile/coverage.groovy | 2 +- ci/jenkins/scripts/coverage.sh | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/coverage.groovy b/ci/jenkins/jenkinsfile/coverage.groovy index 7c3b16c029..ff2e3e6fa2 100644 --- a/ci/jenkins/jenkinsfile/coverage.groovy +++ b/ci/jenkins/jenkinsfile/coverage.groovy @@ -1,4 +1,4 @@ -timeout(time: 60, unit: 'MINUTES') { +timeout(time: 30, unit: 'MINUTES') { dir ("ci/jenkins/scripts") { sh "./coverage.sh -o /opt/milvus -u root -p 123456 -t \$POD_IP" // Set some env variables so codecov detection script works correctly diff --git a/ci/jenkins/scripts/coverage.sh b/ci/jenkins/scripts/coverage.sh index ecbb2dfbe9..dd52df442c 100755 --- a/ci/jenkins/scripts/coverage.sh +++ b/ci/jenkins/scripts/coverage.sh @@ -109,6 +109,7 @@ for test in `ls ${DIR_UNITTEST}`; do if [ $? -ne 0 ]; then echo ${args} echo ${DIR_UNITTEST}/${test} "run failed" + exit -1 fi done @@ -134,5 +135,10 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/utils/easylogging++.h" \ "*/src/utils/easylogging++.cc" +if [ $? -ne 0 ]; then + echo "gen ${FILE_INFO_OUTPUT_NEW} failed" + exit -2 +fi + # gen html report # ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ From e7876f6343ce77a6b6a4a02a471be59bd6163f70 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 23 Oct 2019 15:05:22 +0800 Subject: [PATCH 021/149] avoid relative header path Former-commit-id: d4a01b720ac7011e17076b7c36a20af0ebe2b58a --- core/CMakeLists.txt | 2 +- core/src/main.cpp | 2 +- core/src/sdk/grpc/ClientProxy.cpp | 2 +- core/src/server/Server.cpp | 2 +- core/src/server/grpc_impl/GrpcRequestTask.cpp | 2 +- core/{ => src}/version.h.macro | 0 core/unittest/db/CMakeLists.txt | 7 --- core/unittest/db/appendix/log_config.conf | 27 ---------- core/unittest/db/appendix/server_config.yaml | 37 ------------- core/unittest/db/test_db.cpp | 6 +-- core/unittest/db/utils.cpp | 53 ++++++++++++++++++- core/unittest/db/utils.h | 3 ++ core/unittest/server/test_config.cpp | 16 +++--- core/unittest/server/test_rpc.cpp | 38 ++++++------- 14 files changed, 90 insertions(+), 107 deletions(-) rename core/{ => src}/version.h.macro (100%) delete mode 100644 core/unittest/db/appendix/log_config.conf delete mode 100644 core/unittest/db/appendix/server_config.yaml diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 5915006ca1..51cb2270a1 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -71,7 +71,7 @@ if(MILVUS_VERSION_MAJOR STREQUAL "" endif() message(STATUS "Build version = ${MILVUS_VERSION}") -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version.h.macro ${CMAKE_CURRENT_SOURCE_DIR}/version.h) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/version.h.macro ${CMAKE_CURRENT_SOURCE_DIR}/src/version.h) message(STATUS "Milvus version: " "${MILVUS_VERSION_MAJOR}.${MILVUS_VERSION_MINOR}.${MILVUS_VERSION_PATCH} " diff --git a/core/src/main.cpp b/core/src/main.cpp index d60f26d702..85f91469bb 100644 --- a/core/src/main.cpp +++ b/core/src/main.cpp @@ -22,10 +22,10 @@ #include #include -#include "../version.h" #include "external/easyloggingpp/easylogging++.h" #include "metrics/Metrics.h" #include "server/Server.h" +#include "src/version.h" #include "utils/CommonUtil.h" #include "utils/SignalUtil.h" diff --git a/core/src/sdk/grpc/ClientProxy.cpp b/core/src/sdk/grpc/ClientProxy.cpp index 7e1955b04b..c726cfc532 100644 --- a/core/src/sdk/grpc/ClientProxy.cpp +++ b/core/src/sdk/grpc/ClientProxy.cpp @@ -16,8 +16,8 @@ // under the License. #include "sdk/grpc/ClientProxy.h" -#include "../../../version.h" #include "grpc/gen-milvus/milvus.grpc.pb.h" +#include "src/version.h" #include #include diff --git a/core/src/server/Server.cpp b/core/src/server/Server.cpp index 4933af72b7..5676504722 100644 --- a/core/src/server/Server.cpp +++ b/core/src/server/Server.cpp @@ -19,13 +19,13 @@ #include #include -#include "../../version.h" #include "metrics/Metrics.h" #include "scheduler/SchedInst.h" #include "server/Config.h" #include "server/DBWrapper.h" #include "server/Server.h" #include "server/grpc_impl/GrpcServer.h" +#include "src/version.h" #include "utils/Log.h" #include "utils/LogUtil.h" #include "utils/SignalUtil.h" diff --git a/core/src/server/grpc_impl/GrpcRequestTask.cpp b/core/src/server/grpc_impl/GrpcRequestTask.cpp index 86ff23b3d0..be1fca0186 100644 --- a/core/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/core/src/server/grpc_impl/GrpcRequestTask.cpp @@ -23,12 +23,12 @@ #include //#include -#include "../../../version.h" #include "GrpcServer.h" #include "db/Utils.h" #include "scheduler/SchedInst.h" #include "server/DBWrapper.h" #include "server/Server.h" +#include "src/version.h" #include "utils/CommonUtil.h" #include "utils/Log.h" #include "utils/TimeRecorder.h" diff --git a/core/version.h.macro b/core/src/version.h.macro similarity index 100% rename from core/version.h.macro rename to core/src/version.h.macro diff --git a/core/unittest/db/CMakeLists.txt b/core/unittest/db/CMakeLists.txt index 4bce9f35b3..3954dd8656 100644 --- a/core/unittest/db/CMakeLists.txt +++ b/core/unittest/db/CMakeLists.txt @@ -31,12 +31,5 @@ target_link_libraries(test_db install(TARGETS test_db DESTINATION unittest) -configure_file(appendix/server_config.yaml - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/server_config.yaml" - COPYONLY) - -configure_file(appendix/log_config.conf - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/log_config.conf" - COPYONLY) diff --git a/core/unittest/db/appendix/log_config.conf b/core/unittest/db/appendix/log_config.conf deleted file mode 100644 index 0a3e0d21af..0000000000 --- a/core/unittest/db/appendix/log_config.conf +++ /dev/null @@ -1,27 +0,0 @@ -* GLOBAL: - FORMAT = "%datetime | %level | %logger | %msg" - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-global.log" - ENABLED = true - TO_FILE = true - TO_STANDARD_OUTPUT = false - SUBSECOND_PRECISION = 3 - PERFORMANCE_TRACKING = false - MAX_LOG_FILE_SIZE = 209715200 ## Throw log files away after 200MB -* DEBUG: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-debug.log" - ENABLED = true -* WARNING: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-warning.log" -* TRACE: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-trace.log" -* VERBOSE: - FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" - TO_FILE = false - TO_STANDARD_OUTPUT = false -## Error logs -* ERROR: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-error.log" -* FATAL: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-fatal.log" diff --git a/core/unittest/db/appendix/server_config.yaml b/core/unittest/db/appendix/server_config.yaml deleted file mode 100644 index f92b2f1a18..0000000000 --- a/core/unittest/db/appendix/server_config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# All the following configurations are default values. - -server_config: - address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # port range: 1025 ~ 65534 - deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable - time_zone: UTC+8 - -db_config: - primary_path: /tmp/milvus # path used to store data and meta - secondary_path: # path used to store data only, split by semicolon - - backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database - # Keep 'dialect://:@:/', and replace other texts with real values. - # Replace 'dialect' with 'mysql' or 'sqlite' - - insert_buffer_size: 4 # GB, maximum insert buffer size allowed - build_index_gpu: 0 # gpu id used for building index - -metric_config: - enable_monitor: false # enable monitoring or not - collector: prometheus # prometheus - prometheus_config: - port: 8080 # port prometheus used to fetch metrics - -cache_config: - cpu_mem_capacity: 16 # GB, CPU memory used for cache - cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered - cache_insert_data: false # whether load inserted data into cache - -engine_config: - blas_threshold: 20 - -resource_config: - resource_pool: - - cpu - - gpu0 diff --git a/core/unittest/db/test_db.cpp b/core/unittest/db/test_db.cpp index 5e6ecc2ac4..b869d17388 100644 --- a/core/unittest/db/test_db.cpp +++ b/core/unittest/db/test_db.cpp @@ -33,8 +33,6 @@ namespace { -static const char *CONFIG_FILE_PATH = "./milvus/conf/server_config.yaml"; - static const char *TABLE_NAME = "test_group"; static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t VECTOR_COUNT = 25000; @@ -232,8 +230,10 @@ TEST_F(DBTest, DB_TEST) { } TEST_F(DBTest, SEARCH_TEST) { + std::string config_path(CONFIG_PATH); + config_path += CONFIG_FILE; milvus::server::Config &config = milvus::server::Config::GetInstance(); - milvus::Status s = config.LoadConfigFile(CONFIG_FILE_PATH); + milvus::Status s = config.LoadConfigFile(config_path); milvus::engine::meta::TableSchema table_info = BuildTableSchema(); auto stat = db_->CreateTable(table_info); diff --git a/core/unittest/db/utils.cpp b/core/unittest/db/utils.cpp index 67beeba36f..8903ce14ea 100644 --- a/core/unittest/db/utils.cpp +++ b/core/unittest/db/utils.cpp @@ -28,11 +28,59 @@ #include "db/DBFactory.h" #include "db/Options.h" #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" +#include "utils/CommonUtil.h" INITIALIZE_EASYLOGGINGPP namespace { +static const char + * CONFIG_STR = "# All the following configurations are default values.\n" + "\n" + "server_config:\n" + " address: 0.0.0.0 # milvus server ip address (IPv4)\n" + " port: 19530 # port range: 1025 ~ 65534\n" + " deploy_mode: single \n" + " time_zone: UTC+8\n" + "\n" + "db_config:\n" + " primary_path: /tmp/milvus # path used to store data and meta\n" + " secondary_path: # path used to store data only, split by semicolon\n" + "\n" + " backend_url: sqlite://:@:/ \n" + " \n" + " # Replace 'dialect' with 'mysql' or 'sqlite'\n" + "\n" + " insert_buffer_size: 4 # GB, maximum insert buffer size allowed\n" + "\n" + "metric_config:\n" + " enable_monitor: false # enable monitoring or not\n" + " collector: prometheus # prometheus\n" + " prometheus_config:\n" + " port: 8080 # port prometheus used to fetch metrics\n" + "\n" + "cache_config:\n" + " cpu_mem_capacity: 16 # GB, CPU memory used for cache\n" + " cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered\n" + " cache_insert_data: false # whether load inserted data into cache\n" + "\n" + "engine_config:\n" + " blas_threshold: 20\n" + "\n" + "resource_config:\n" + " resource_pool:\n" + " - gpu0\n" + " index_build_device: gpu0 # GPU used for building index"; + +void +WriteToFile(const std::string& file_path, const char* content) { + std::fstream fs(file_path.c_str(), std::ios_base::out); + + //write data to file + fs << content; + fs.close(); +} + class DBTestEnvironment : public ::testing::Environment { public: explicit DBTestEnvironment(const std::string& uri) @@ -84,7 +132,7 @@ BaseTest::TearDown() { milvus::engine::DBOptions BaseTest::GetOptions() { auto options = milvus::engine::DBFactory::BuildOption(); - options.meta_.path_ = "/tmp/milvus_test"; + options.meta_.path_ = CONFIG_PATH; options.meta_.backend_uri_ = "sqlite://:@:/"; return options; } @@ -111,6 +159,9 @@ DBTest::SetUp() { auto options = GetOptions(); db_ = milvus::engine::DBFactory::Build(options); + + std::string config_path(options.meta_.path_ + CONFIG_FILE); + WriteToFile(config_path, CONFIG_STR); } void diff --git a/core/unittest/db/utils.h b/core/unittest/db/utils.h index 8da160dc92..94735e4886 100644 --- a/core/unittest/db/utils.h +++ b/core/unittest/db/utils.h @@ -42,6 +42,9 @@ #define STOP_TIMER(name) #endif +static const char *CONFIG_PATH = "/tmp/milvus_test"; +static const char *CONFIG_FILE = "/server_config.yaml"; + class BaseTest : public ::testing::Test { protected: void InitLog(); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index f3adf8a2c3..76230cbcc3 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -33,13 +33,13 @@ static constexpr uint64_t GB = MB * 1024; } // namespace TEST_F(ConfigTest, CONFIG_TEST) { - milvus::server::ConfigMgr *config_mgr = milvus::server::YamlConfigMgr::GetInstance(); + milvus::server::ConfigMgr* config_mgr = milvus::server::YamlConfigMgr::GetInstance(); milvus::Status s = config_mgr->LoadConfigFile(""); ASSERT_FALSE(s.ok()); std::string config_path(CONFIG_PATH); - s = config_mgr->LoadConfigFile(config_path+ INVALID_CONFIG_FILE); + s = config_mgr->LoadConfigFile(config_path + INVALID_CONFIG_FILE); ASSERT_FALSE(s.ok()); s = config_mgr->LoadConfigFile(config_path + VALID_CONFIG_FILE); @@ -47,11 +47,11 @@ TEST_F(ConfigTest, CONFIG_TEST) { config_mgr->Print(); - milvus::server::ConfigNode &root_config = config_mgr->GetRootNode(); - milvus::server::ConfigNode &server_config = root_config.GetChild("server_config"); - milvus::server::ConfigNode &db_config = root_config.GetChild("db_config"); - milvus::server::ConfigNode &metric_config = root_config.GetChild("metric_config"); - milvus::server::ConfigNode &cache_config = root_config.GetChild("cache_config"); + milvus::server::ConfigNode& root_config = config_mgr->GetRootNode(); + milvus::server::ConfigNode& server_config = root_config.GetChild("server_config"); + milvus::server::ConfigNode& db_config = root_config.GetChild("db_config"); + milvus::server::ConfigNode& metric_config = root_config.GetChild("metric_config"); + milvus::server::ConfigNode& cache_config = root_config.GetChild("cache_config"); milvus::server::ConfigNode invalid_config = root_config.GetChild("invalid_config"); auto valus = invalid_config.GetSequence("not_exist"); float ff = invalid_config.GetFloatValue("not_exist", 3.0); @@ -100,7 +100,7 @@ TEST_F(ConfigTest, CONFIG_TEST) { TEST_F(ConfigTest, SERVER_CONFIG_TEST) { std::string config_path(CONFIG_PATH); - milvus::server::Config &config = milvus::server::Config::GetInstance(); + milvus::server::Config& config = milvus::server::Config::GetInstance(); milvus::Status s = config.LoadConfigFile(config_path + VALID_CONFIG_FILE); ASSERT_TRUE(s.ok()); diff --git a/core/unittest/server/test_rpc.cpp b/core/unittest/server/test_rpc.cpp index 09a56699ea..ebbcd810c1 100644 --- a/core/unittest/server/test_rpc.cpp +++ b/core/unittest/server/test_rpc.cpp @@ -23,7 +23,7 @@ #include "server/grpc_impl/GrpcRequestHandler.h" #include "server/grpc_impl/GrpcRequestScheduler.h" #include "server/grpc_impl/GrpcRequestTask.h" -#include "../version.h" +#include "src/version.h" #include "grpc/gen-milvus/milvus.grpc.pb.h" #include "grpc/gen-status/status.pb.h" @@ -36,7 +36,7 @@ namespace { -static const char *TABLE_NAME = "test_grpc"; +static const char* TABLE_NAME = "test_grpc"; static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t INDEX_FILE_SIZE = 1024; static constexpr int64_t VECTOR_COUNT = 1000; @@ -109,7 +109,7 @@ class RpcHandlerTest : public testing::Test { void BuildVectors(int64_t from, int64_t to, - std::vector> &vector_record_array) { + std::vector>& vector_record_array) { if (to <= from) { return; } @@ -119,7 +119,7 @@ BuildVectors(int64_t from, int64_t to, std::vector record; record.resize(TABLE_DIM); for (int64_t i = 0; i < TABLE_DIM; i++) { - record[i] = (float) (k % (i + 1)); + record[i] = (float)(k % (i + 1)); } vector_record_array.emplace_back(record); @@ -136,7 +136,7 @@ CurrentTmDate(int64_t offset_day = 0) { gmtime_r(&tt, &t); std::string str = std::to_string(t.tm_year + 1900) + "-" + std::to_string(t.tm_mon + 1) - + "-" + std::to_string(t.tm_mday); + + "-" + std::to_string(t.tm_mday); return str; } @@ -200,8 +200,8 @@ TEST_F(RpcHandlerTest, INSERT_TEST) { std::vector> record_array; BuildVectors(0, VECTOR_COUNT, record_array); ::milvus::grpc::VectorIds vector_ids; - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *grpc_record = request.add_row_record_array(); + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* grpc_record = request.add_row_record_array(); for (size_t i = 0; i < record.size(); i++) { grpc_record->add_vector_data(record[i]); } @@ -239,8 +239,8 @@ TEST_F(RpcHandlerTest, SEARCH_TEST) { std::vector> record_array; BuildVectors(0, VECTOR_COUNT, record_array); ::milvus::grpc::InsertParam insert_param; - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *grpc_record = insert_param.add_row_record_array(); + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* grpc_record = insert_param.add_row_record_array(); for (size_t i = 0; i < record.size(); i++) { grpc_record->add_vector_data(record[i]); } @@ -252,16 +252,16 @@ TEST_F(RpcHandlerTest, SEARCH_TEST) { sleep(7); BuildVectors(0, 10, record_array); - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *row_record = request.add_query_record_array(); - for (auto &rec : record) { + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* row_record = request.add_query_record_array(); + for (auto& rec : record) { row_record->add_vector_data(rec); } } handler->Search(&context, &request, &response); //test search with range - ::milvus::grpc::Range *range = request.mutable_query_range_array()->Add(); + ::milvus::grpc::Range* range = request.mutable_query_range_array()->Add(); range->set_start_value(CurrentTmDate(-2)); range->set_end_value(CurrentTmDate(-3)); handler->Search(&context, &request, &response); @@ -273,7 +273,7 @@ TEST_F(RpcHandlerTest, SEARCH_TEST) { handler->Search(&context, &request, &response); ::milvus::grpc::SearchInFilesParam search_in_files_param; - std::string *file_id = search_in_files_param.add_file_id_array(); + std::string* file_id = search_in_files_param.add_file_id_array(); *file_id = "test_tbl"; handler->SearchInFiles(&context, &search_in_files_param, &response); } @@ -323,8 +323,8 @@ TEST_F(RpcHandlerTest, TABLES_TEST) { //test empty row record handler->Insert(&context, &request, &vector_ids); - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *grpc_record = request.add_row_record_array(); + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* grpc_record = request.add_row_record_array(); for (size_t i = 0; i < record.size(); i++) { grpc_record->add_vector_data(record[i]); } @@ -341,7 +341,7 @@ TEST_F(RpcHandlerTest, TABLES_TEST) { request.clear_row_record_array(); vector_ids.clear_vector_id_array(); for (uint64_t i = 0; i < 10; ++i) { - ::milvus::grpc::RowRecord *grpc_record = request.add_row_record_array(); + ::milvus::grpc::RowRecord* grpc_record = request.add_row_record_array(); for (size_t j = 0; j < 10; j++) { grpc_record->add_vector_data(record_array[i][j]); } @@ -431,12 +431,12 @@ class DummyTask : public milvus::server::grpc::GrpcBaseTask { } static milvus::server::grpc::BaseTaskPtr - Create(std::string &dummy) { + Create(std::string& dummy) { return std::shared_ptr(new DummyTask(dummy)); } public: - explicit DummyTask(std::string &dummy) : GrpcBaseTask(dummy) { + explicit DummyTask(std::string& dummy) : GrpcBaseTask(dummy) { } }; From f410219fc29c54cf1dcb19f193ab91cea299f6f2 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 16:43:08 +0800 Subject: [PATCH 022/149] update utils.py Former-commit-id: 837591d04e95503c6e66f3960866949f628224e4 --- tests/milvus_python_test/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/milvus_python_test/utils.py b/tests/milvus_python_test/utils.py index 806af62f57..007bff9c75 100644 --- a/tests/milvus_python_test/utils.py +++ b/tests/milvus_python_test/utils.py @@ -475,7 +475,7 @@ if __name__ == "__main__": table = "test" - file_name = '/poc/yuncong/ann_1000m/query.npy' + file_name = 'query.npy' data = np.load(file_name) vectors = data[0:nq].tolist() # print(vectors) From edcb0bc5e1e375d9345f822cccea53af110a9ac4 Mon Sep 17 00:00:00 2001 From: wxyu Date: Wed, 23 Oct 2019 17:12:36 +0800 Subject: [PATCH 023/149] Improvement dump function in scheduler Former-commit-id: b7c550a7fb4e05c66afa217f435b401d9cae5613 --- core/src/scheduler/JobMgr.cpp | 9 ++++++ core/src/scheduler/JobMgr.h | 6 +++- core/src/scheduler/ResourceMgr.cpp | 28 +++++++++-------- core/src/scheduler/ResourceMgr.h | 7 +++-- core/src/scheduler/Scheduler.cpp | 10 ++++-- core/src/scheduler/Scheduler.h | 11 +++---- core/src/scheduler/TaskTable.cpp | 6 ++-- core/src/scheduler/TaskTable.h | 6 ++-- core/src/scheduler/interface/interfaces.h | 2 +- core/src/scheduler/job/BuildIndexJob.cpp | 8 +++++ core/src/scheduler/job/BuildIndexJob.h | 3 ++ core/src/scheduler/job/DeleteJob.cpp | 10 ++++++ core/src/scheduler/job/DeleteJob.h | 3 ++ core/src/scheduler/job/Job.h | 4 ++- core/src/scheduler/job/SearchJob.cpp | 10 ++++++ core/src/scheduler/job/SearchJob.h | 3 ++ core/src/scheduler/resource/CpuResource.cpp | 2 +- core/src/scheduler/resource/CpuResource.h | 5 --- core/src/scheduler/resource/DiskResource.h | 5 --- core/src/scheduler/resource/GpuResource.cpp | 2 +- core/src/scheduler/resource/GpuResource.h | 5 --- core/src/scheduler/resource/Node.cpp | 22 ++++++++----- core/src/scheduler/resource/Node.h | 7 +++-- core/src/scheduler/resource/Resource.cpp | 34 +++++++++++++++++++++ core/src/scheduler/resource/Resource.h | 6 ++-- core/src/scheduler/resource/TestResource.h | 5 --- 26 files changed, 148 insertions(+), 71 deletions(-) diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 70f1352a5c..e7b15a8185 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -49,6 +49,15 @@ JobMgr::Stop() { } } +json +JobMgr::Dump() const { + json ret{ + {"running", running_}, + {"event_queue_length", queue_.size()}, + }; + return ret; +} + void JobMgr::Put(const JobPtr& job) { { diff --git a/core/src/scheduler/JobMgr.h b/core/src/scheduler/JobMgr.h index b4c706d359..fbd6c0ee45 100644 --- a/core/src/scheduler/JobMgr.h +++ b/core/src/scheduler/JobMgr.h @@ -28,13 +28,14 @@ #include #include "ResourceMgr.h" +#include "interface/interfaces.h" #include "job/Job.h" #include "task/Task.h" namespace milvus { namespace scheduler { -class JobMgr { +class JobMgr : public interface::dumpable { public: explicit JobMgr(ResourceMgrPtr res_mgr); @@ -44,6 +45,9 @@ class JobMgr { void Stop(); + json + Dump() const override; + public: void Put(const JobPtr& job); diff --git a/core/src/scheduler/ResourceMgr.cpp b/core/src/scheduler/ResourceMgr.cpp index 6e839062ef..383ad89c4e 100644 --- a/core/src/scheduler/ResourceMgr.cpp +++ b/core/src/scheduler/ResourceMgr.cpp @@ -170,16 +170,20 @@ ResourceMgr::GetNumGpuResource() const { return num; } -std::string -ResourceMgr::Dump() { - std::stringstream ss; - ss << "ResourceMgr contains " << resources_.size() << " resources." << std::endl; - - for (auto& res : resources_) { - ss << res->Dump(); +json +ResourceMgr::Dump() const { + json resources{}; + for (auto &res : resources_) { + resources.push_back(res->Dump()); } - - return ss.str(); + json ret{ + {"number_of_resource", resources_.size()}, + {"number_of_disk_resource", disk_resources_.size()}, + {"number_of_cpu_resource", cpu_resources_.size()}, + {"number_of_gpu_resource", gpu_resources_.size()}, + {"resources", resources}, + }; + return ret; } std::string @@ -187,9 +191,9 @@ ResourceMgr::DumpTaskTables() { std::stringstream ss; ss << ">>>>>>>>>>>>>>>ResourceMgr::DumpTaskTable<<<<<<<<<<<<<<<" << std::endl; for (auto& resource : resources_) { - ss << resource->Dump() << std::endl; - ss << resource->task_table().Dump(); - ss << resource->Dump() << std::endl << std::endl; + ss << resource->name() << std::endl; + ss << resource->task_table().Dump().dump(); + ss << resource->name() << std::endl << std::endl; } return ss.str(); } diff --git a/core/src/scheduler/ResourceMgr.h b/core/src/scheduler/ResourceMgr.h index 7a8e1ca4ca..4d2361fb3d 100644 --- a/core/src/scheduler/ResourceMgr.h +++ b/core/src/scheduler/ResourceMgr.h @@ -25,13 +25,14 @@ #include #include +#include "interface/interfaces.h" #include "resource/Resource.h" #include "utils/Log.h" namespace milvus { namespace scheduler { -class ResourceMgr { +class ResourceMgr : public interface::dumpable { public: ResourceMgr() = default; @@ -103,8 +104,8 @@ class ResourceMgr { public: /******** Utility Functions ********/ - std::string - Dump(); + json + Dump() const override; std::string DumpTaskTables(); diff --git a/core/src/scheduler/Scheduler.cpp b/core/src/scheduler/Scheduler.cpp index 19197b4168..fef5cc1a95 100644 --- a/core/src/scheduler/Scheduler.cpp +++ b/core/src/scheduler/Scheduler.cpp @@ -66,9 +66,13 @@ Scheduler::PostEvent(const EventPtr& event) { event_cv_.notify_one(); } -std::string -Scheduler::Dump() { - return std::string(); +json +Scheduler::Dump() const { + json ret{ + {"running", running_}, + {"event_queue_length", event_queue_.size()}, + }; + return ret; } void diff --git a/core/src/scheduler/Scheduler.h b/core/src/scheduler/Scheduler.h index 5b222cc41a..8d9ea83794 100644 --- a/core/src/scheduler/Scheduler.h +++ b/core/src/scheduler/Scheduler.h @@ -25,14 +25,14 @@ #include #include "ResourceMgr.h" +#include "interface/interfaces.h" #include "resource/Resource.h" #include "utils/Log.h" namespace milvus { namespace scheduler { -// TODO(wxyu): refactor, not friendly to unittest, logical in framework code -class Scheduler { +class Scheduler : public interface::dumpable { public: explicit Scheduler(ResourceMgrWPtr res_mgr); @@ -57,11 +57,8 @@ class Scheduler { void PostEvent(const EventPtr& event); - /* - * Dump as string; - */ - std::string - Dump(); + json + Dump() const override; private: /******** Events ********/ diff --git a/core/src/scheduler/TaskTable.cpp b/core/src/scheduler/TaskTable.cpp index cad7ce3a74..d0e6c1c38b 100644 --- a/core/src/scheduler/TaskTable.cpp +++ b/core/src/scheduler/TaskTable.cpp @@ -53,7 +53,7 @@ ToString(TaskTableItemState state) { } json -TaskTimestamp::Dump() { +TaskTimestamp::Dump() const { json ret{ {"start", start}, {"load", load}, {"loaded", loaded}, {"execute", execute}, {"executed", executed}, {"move", move}, {"moved", moved}, {"finish", finish}, @@ -141,7 +141,7 @@ TaskTableItem::Moved() { } json -TaskTableItem::Dump() { +TaskTableItem::Dump() const { json ret{ {"id", id}, {"task", (int64_t)task.get()}, @@ -263,7 +263,7 @@ TaskTable::Get(uint64_t index) { //} json -TaskTable::Dump() { +TaskTable::Dump() const { json ret; for (auto& item : table_) { ret.push_back(item->Dump()); diff --git a/core/src/scheduler/TaskTable.h b/core/src/scheduler/TaskTable.h index 307528fffb..a9d00043c2 100644 --- a/core/src/scheduler/TaskTable.h +++ b/core/src/scheduler/TaskTable.h @@ -54,7 +54,7 @@ struct TaskTimestamp : public interface::dumpable { uint64_t finish = 0; json - Dump() override; + Dump() const override; }; struct TaskTableItem : public interface::dumpable { @@ -92,7 +92,7 @@ struct TaskTableItem : public interface::dumpable { Moved(); json - Dump() override; + Dump() const override; }; using TaskTableItemPtr = std::shared_ptr; @@ -245,7 +245,7 @@ class TaskTable : public interface::dumpable { * Dump; */ json - Dump() override; + Dump() const override; private: std::uint64_t id_ = 0; diff --git a/core/src/scheduler/interface/interfaces.h b/core/src/scheduler/interface/interfaces.h index 68e5af8cdb..9920e4f80a 100644 --- a/core/src/scheduler/interface/interfaces.h +++ b/core/src/scheduler/interface/interfaces.h @@ -37,7 +37,7 @@ struct dumpable { } virtual json - Dump() = 0; + Dump() const = 0; }; } // namespace interface diff --git a/core/src/scheduler/job/BuildIndexJob.cpp b/core/src/scheduler/job/BuildIndexJob.cpp index 423121c5fb..39c08b6b51 100644 --- a/core/src/scheduler/job/BuildIndexJob.cpp +++ b/core/src/scheduler/job/BuildIndexJob.cpp @@ -54,5 +54,13 @@ BuildIndexJob::BuildIndexDone(size_t to_index_id) { SERVER_LOG_DEBUG << "BuildIndexJob " << id() << " finish index file: " << to_index_id; } +json +BuildIndexJob::Dump() const { + json ret{ + {"number_of_to_index_file", to_index_files_.size()}, + }; + return ret; +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/job/BuildIndexJob.h b/core/src/scheduler/job/BuildIndexJob.h index b6ca462537..e3450ee048 100644 --- a/core/src/scheduler/job/BuildIndexJob.h +++ b/core/src/scheduler/job/BuildIndexJob.h @@ -53,6 +53,9 @@ class BuildIndexJob : public Job { void BuildIndexDone(size_t to_index_id); + json + Dump() const override; + public: Status& GetStatus() { diff --git a/core/src/scheduler/job/DeleteJob.cpp b/core/src/scheduler/job/DeleteJob.cpp index 96a6bb1817..04a9557177 100644 --- a/core/src/scheduler/job/DeleteJob.cpp +++ b/core/src/scheduler/job/DeleteJob.cpp @@ -45,5 +45,15 @@ DeleteJob::ResourceDone() { cv_.notify_one(); } +json +DeleteJob::Dump() const { + json ret{ + {"table_id", table_id_}, + {"number_of_resource", num_resource_}, + {"number_of_done", done_resource}, + }; + return ret; +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/job/DeleteJob.h b/core/src/scheduler/job/DeleteJob.h index 4ac48f6913..93e5aa40cc 100644 --- a/core/src/scheduler/job/DeleteJob.h +++ b/core/src/scheduler/job/DeleteJob.h @@ -44,6 +44,9 @@ class DeleteJob : public Job { void ResourceDone(); + json + Dump() const override; + public: std::string table_id() const { diff --git a/core/src/scheduler/job/Job.h b/core/src/scheduler/job/Job.h index 5fe645363f..709db8cffc 100644 --- a/core/src/scheduler/job/Job.h +++ b/core/src/scheduler/job/Job.h @@ -27,6 +27,8 @@ #include #include +#include "scheduler/interface/interfaces.h" + namespace milvus { namespace scheduler { @@ -39,7 +41,7 @@ enum class JobType { using JobId = std::uint64_t; -class Job { +class Job : public interface::dumpable { public: inline JobId id() const { diff --git a/core/src/scheduler/job/SearchJob.cpp b/core/src/scheduler/job/SearchJob.cpp index 518e3111c0..1143e33add 100644 --- a/core/src/scheduler/job/SearchJob.cpp +++ b/core/src/scheduler/job/SearchJob.cpp @@ -63,5 +63,15 @@ SearchJob::GetStatus() { return status_; } +json +SearchJob::Dump() const { + json ret{ + {"topk", topk_}, + {"nq", nq_}, + {"nprobe", nprobe_}, + }; + return ret; +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/job/SearchJob.h b/core/src/scheduler/job/SearchJob.h index fb2d87d876..6c2bd7eea9 100644 --- a/core/src/scheduler/job/SearchJob.h +++ b/core/src/scheduler/job/SearchJob.h @@ -61,6 +61,9 @@ class SearchJob : public Job { Status& GetStatus(); + json + Dump() const override; + public: uint64_t topk() const { diff --git a/core/src/scheduler/resource/CpuResource.cpp b/core/src/scheduler/resource/CpuResource.cpp index 500737a829..eb43a863cc 100644 --- a/core/src/scheduler/resource/CpuResource.cpp +++ b/core/src/scheduler/resource/CpuResource.cpp @@ -24,7 +24,7 @@ namespace scheduler { std::ostream& operator<<(std::ostream& out, const CpuResource& resource) { - out << resource.Dump(); + out << resource.Dump().dump(); return out; } diff --git a/core/src/scheduler/resource/CpuResource.h b/core/src/scheduler/resource/CpuResource.h index e3e4fc383f..10cd88ea2d 100644 --- a/core/src/scheduler/resource/CpuResource.h +++ b/core/src/scheduler/resource/CpuResource.h @@ -28,11 +28,6 @@ class CpuResource : public Resource { public: explicit CpuResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const CpuResource& resource); diff --git a/core/src/scheduler/resource/DiskResource.h b/core/src/scheduler/resource/DiskResource.h index 2346cd115a..384e44b4f2 100644 --- a/core/src/scheduler/resource/DiskResource.h +++ b/core/src/scheduler/resource/DiskResource.h @@ -28,11 +28,6 @@ class DiskResource : public Resource { public: explicit DiskResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const DiskResource& resource); diff --git a/core/src/scheduler/resource/GpuResource.cpp b/core/src/scheduler/resource/GpuResource.cpp index 20ed73e38c..f6363ff01d 100644 --- a/core/src/scheduler/resource/GpuResource.cpp +++ b/core/src/scheduler/resource/GpuResource.cpp @@ -22,7 +22,7 @@ namespace scheduler { std::ostream& operator<<(std::ostream& out, const GpuResource& resource) { - out << resource.Dump(); + out << resource.Dump().dump(); return out; } diff --git a/core/src/scheduler/resource/GpuResource.h b/core/src/scheduler/resource/GpuResource.h index e0df03d5a7..86b3b6658c 100644 --- a/core/src/scheduler/resource/GpuResource.h +++ b/core/src/scheduler/resource/GpuResource.h @@ -29,11 +29,6 @@ class GpuResource : public Resource { public: explicit GpuResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const GpuResource& resource); diff --git a/core/src/scheduler/resource/Node.cpp b/core/src/scheduler/resource/Node.cpp index 5401c36441..9621c4324e 100644 --- a/core/src/scheduler/resource/Node.cpp +++ b/core/src/scheduler/resource/Node.cpp @@ -38,15 +38,21 @@ Node::GetNeighbours() { return ret; } -std::string -Node::Dump() { - std::stringstream ss; - ss << "::neighbours:" << std::endl; - for (auto& neighbour : neighbours_) { - ss << "\t" << std::endl; +json +Node::Dump() const { + json neighbours; + for (auto & neighbour : neighbours_) { + json n; + n["id"] = neighbour.first; + n["connection"] = neighbour.second.connection.Dump(); + neighbours.push_back(n); } - return ss.str(); + + json ret{ + {"id", id_}, + {"neighbours", neighbours}, + }; + return ret; } void diff --git a/core/src/scheduler/resource/Node.h b/core/src/scheduler/resource/Node.h index 071ee9bab8..4539c8c86a 100644 --- a/core/src/scheduler/resource/Node.h +++ b/core/src/scheduler/resource/Node.h @@ -24,6 +24,7 @@ #include "Connection.h" #include "scheduler/TaskTable.h" +#include "scheduler/interface/interfaces.h" namespace milvus { namespace scheduler { @@ -41,7 +42,7 @@ struct Neighbour { }; // TODO(lxj): return type void -> Status -class Node { +class Node : public interface::dumpable { public: Node(); @@ -52,8 +53,8 @@ class Node { GetNeighbours(); public: - std::string - Dump(); + json + Dump() const override; private: std::mutex mutex_; diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index 59fd22b5d6..aac1953845 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -32,6 +32,24 @@ operator<<(std::ostream& out, const Resource& resource) { return out; } +std::string +ToString(ResourceType type) { + switch (type) { + case ResourceType::DISK: { + return "DISK"; + } + case ResourceType::CPU: { + return "CPU"; + } + case ResourceType::GPU: { + return "GPU"; + } + default: { + return "UNKNOWN"; + } + } +} + Resource::Resource(std::string name, ResourceType type, uint64_t device_id, bool enable_loader, bool enable_executor) : name_(std::move(name)), type_(type), @@ -89,6 +107,22 @@ Resource::WakeupExecutor() { exec_cv_.notify_one(); } +json +Resource::Dump() const { + json ret{ + {"device_id", device_id_}, + {"name", name_}, + {"type", ToString(type_)}, + {"task_average_cost", TaskAvgCost()}, + {"task_total_cost", total_cost_}, + {"total_tasks", total_task_}, + {"running", running_}, + {"enable_loader", enable_loader_}, + {"enable_executor", enable_executor_}, + }; + return ret; +} + uint64_t Resource::NumOfTaskToExec() { uint64_t count = 0; diff --git a/core/src/scheduler/resource/Resource.h b/core/src/scheduler/resource/Resource.h index c9026f13b6..c797e13de8 100644 --- a/core/src/scheduler/resource/Resource.h +++ b/core/src/scheduler/resource/Resource.h @@ -77,10 +77,8 @@ class Resource : public Node, public std::enable_shared_from_this { subscriber_ = std::move(subscriber); } - inline virtual std::string - Dump() const { - return ""; - } + json + Dump() const override; public: inline std::string diff --git a/core/src/scheduler/resource/TestResource.h b/core/src/scheduler/resource/TestResource.h index 9bbc5a54d0..4e4e148d6f 100644 --- a/core/src/scheduler/resource/TestResource.h +++ b/core/src/scheduler/resource/TestResource.h @@ -29,11 +29,6 @@ class TestResource : public Resource { public: explicit TestResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const TestResource& resource); From 72ae6f59cc484c4d67fd424f155ab335c141093e Mon Sep 17 00:00:00 2001 From: wxyu Date: Wed, 23 Oct 2019 17:14:50 +0800 Subject: [PATCH 024/149] clang-format Former-commit-id: 0ef52c892fdff56800f509cecf010f95c6e7e421 --- core/src/scheduler/ResourceMgr.cpp | 2 +- core/src/scheduler/resource/Node.cpp | 2 +- core/src/scheduler/resource/Resource.cpp | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/src/scheduler/ResourceMgr.cpp b/core/src/scheduler/ResourceMgr.cpp index 383ad89c4e..187cd97bc7 100644 --- a/core/src/scheduler/ResourceMgr.cpp +++ b/core/src/scheduler/ResourceMgr.cpp @@ -173,7 +173,7 @@ ResourceMgr::GetNumGpuResource() const { json ResourceMgr::Dump() const { json resources{}; - for (auto &res : resources_) { + for (auto& res : resources_) { resources.push_back(res->Dump()); } json ret{ diff --git a/core/src/scheduler/resource/Node.cpp b/core/src/scheduler/resource/Node.cpp index 9621c4324e..dcf03a321c 100644 --- a/core/src/scheduler/resource/Node.cpp +++ b/core/src/scheduler/resource/Node.cpp @@ -41,7 +41,7 @@ Node::GetNeighbours() { json Node::Dump() const { json neighbours; - for (auto & neighbour : neighbours_) { + for (auto& neighbour : neighbours_) { json n; n["id"] = neighbour.first; n["connection"] = neighbour.second.connection.Dump(); diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index aac1953845..1cd4cde609 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -44,9 +44,7 @@ ToString(ResourceType type) { case ResourceType::GPU: { return "GPU"; } - default: { - return "UNKNOWN"; - } + default: { return "UNKNOWN"; } } } From 635434126c8b7a6372ee0a46a653cdc517ecdf5d Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 17:44:57 +0800 Subject: [PATCH 025/149] #89 add faiss benchmark Former-commit-id: 12f9741900e36bb22cd8b7839f16174fd5d0c6f9 --- core/src/index/unittest/CMakeLists.txt | 1 + .../unittest/faiss_benchmark/CMakeLists.txt | 24 + .../faiss_benchmark/faiss_benchmark_test.cpp | 546 ++++++++++++++++++ 3 files changed, 571 insertions(+) create mode 100644 core/src/index/unittest/faiss_benchmark/CMakeLists.txt create mode 100644 core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index f840b28e28..2e84908cd7 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -86,5 +86,6 @@ install(TARGETS test_gpuresource DESTINATION unittest) install(TARGETS test_customized_index DESTINATION unittest) #add_subdirectory(faiss_ori) +#add_subdirectory(faiss_benchmark) add_subdirectory(test_nsg) diff --git a/core/src/index/unittest/faiss_benchmark/CMakeLists.txt b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt new file mode 100644 index 0000000000..556364b68a --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt @@ -0,0 +1,24 @@ +include_directories(${INDEX_SOURCE_DIR}/thirdparty) +include_directories(${INDEX_SOURCE_DIR}/include) +include_directories(/usr/local/cuda/include) +include_directories(/usr/local/hdf5/include) + +link_directories(/usr/local/cuda/lib64) +link_directories(/usr/local/hdf5/lib) + +set(unittest_libs + gtest gmock gtest_main gmock_main) + +set(depend_libs + faiss openblas lapack hdf5 + arrow ${ARROW_PREFIX}/lib/libjemalloc_pic.a + ) + +set(basic_libs + cudart cublas + gomp gfortran pthread + ) + +add_executable(test_faiss_benchmark faiss_benchmark_test.cpp) +target_link_libraries(test_faiss_benchmark ${depend_libs} ${unittest_libs} ${basic_libs}) +install(TARGETS test_faiss_benchmark DESTINATION unittest) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp new file mode 100644 index 0000000000..5ece23c7aa --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -0,0 +1,546 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +/***************************************************** + * To run this test, please download the HDF5 from + * https://support.hdfgroup.org/ftp/HDF5/releases/ + * and install it to /usr/local/hdf5 . + *****************************************************/ + +double elapsed() { + struct timeval tv; + gettimeofday(&tv, nullptr); + return tv.tv_sec + tv.tv_usec * 1e-6; +} + +void* hdf5_read(const char *file_name, + const char *dataset_name, + H5T_class_t dataset_class, + size_t &d_out, + size_t &n_out) { + hid_t file, dataset, datatype, dataspace, memspace; + H5T_class_t t_class; /* data type class */ + H5T_order_t order; /* data order */ + size_t size; /* size of the data element stored in file */ + hsize_t dimsm[3]; /* memory space dimensions */ + hsize_t dims_out[2]; /* dataset dimensions */ + hsize_t count[2]; /* size of the hyperslab in the file */ + hsize_t offset[2]; /* hyperslab offset in the file */ + hsize_t count_out[3]; /* size of the hyperslab in memory */ + hsize_t offset_out[3]; /* hyperslab offset in memory */ + int rank; + void* data_out; /* output buffer */ + + /* Open the file and the dataset. */ + file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); + dataset = H5Dopen2(file, dataset_name, H5P_DEFAULT); + + /* + * Get datatype and dataspace handles and then query + * dataset class, order, size, rank and dimensions. + */ + datatype = H5Dget_type(dataset); /* datatype handle */ + t_class = H5Tget_class(datatype); + assert(t_class == dataset_class || !"Illegal dataset class type"); + + order = H5Tget_order(datatype); + switch (order) { + case H5T_ORDER_LE: + printf("Little endian order \n"); + break; + case H5T_ORDER_BE: + printf("Big endian order \n"); + break; + default: + printf("Illegal endian order \n"); + break; + } + + size = H5Tget_size(datatype); + printf("Data size is %d \n", (int)size); + + dataspace = H5Dget_space(dataset); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); + H5Sget_simple_extent_dims(dataspace, dims_out, NULL); + n_out = dims_out[0]; + d_out = dims_out[1]; + printf("rank %d, dimensions %lu x %lu \n", rank, n_out, d_out); + + /* Define hyperslab in the dataset. */ + offset[0] = offset[1] = 0; + count[0] = dims_out[0]; + count[1] = dims_out[1]; + H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); + + /* Define the memory dataspace. */ + dimsm[0] = dims_out[0]; + dimsm[1] = dims_out[1]; + dimsm[2] = 1; + memspace = H5Screate_simple(3, dimsm, NULL); + + /* Define memory hyperslab. */ + offset_out[0] = offset_out[1] = offset_out[2] = 0; + count_out[0] = dims_out[0]; + count_out[1] = dims_out[1]; + count_out[2] = 1; + H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); + + /* Read data from hyperslab in the file into the hyperslab in memory and display. */ + switch (t_class) { + case H5T_INTEGER: + data_out = new int[dims_out[0] * dims_out[1]]; + H5Dread(dataset, H5T_NATIVE_INT, memspace, dataspace, H5P_DEFAULT, data_out); + break; + case H5T_FLOAT: + data_out = new float[dims_out[0] * dims_out[1]]; + H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace, H5P_DEFAULT, data_out); + break; + default: + printf("Illegal dataset class type\n"); + break; + } + + /* Close/release resources. */ + H5Tclose(datatype); + H5Dclose(dataset); + H5Sclose(dataspace); + H5Sclose(memspace); + H5Fclose(file); + + return data_out; +} + +std::string get_index_file_name(const std::string& ann_test_name, + const std::string& index_key, + int32_t data_loops) { + size_t pos = index_key.find_first_of(',', 0); + std::string file_name = ann_test_name; + file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos+1); + file_name = file_name + "_" + std::to_string(data_loops) + ".index"; + return file_name; +} + +bool parse_ann_test_name(const std::string& ann_test_name, + size_t &dim, + faiss::MetricType &metric_type) { + size_t pos1, pos2; + + if (ann_test_name.empty()) return false; + + pos1 = ann_test_name.find_first_of('-', 0); + if (pos1 == std::string::npos) return false; + pos2 = ann_test_name.find_first_of('-', pos1 + 1); + if (pos2 == std::string::npos) return false; + + dim = std::stoi(ann_test_name.substr(pos1+1, pos2-pos1-1)); + std::string metric_str = ann_test_name.substr(pos2+1); + if (metric_str == "angular") { + metric_type = faiss::METRIC_INNER_PRODUCT; + } else if (metric_str == "euclidean") { + metric_type = faiss::METRIC_L2; + } else { + return false; + } + + return true; +} + +void test_ann_hdf5(const std::string& ann_test_name, + const std::string& index_key, + int32_t index_add_loops, + const std::vector& nprobes) { + double t0 = elapsed(); + + const std::string ann_file_name = ann_test_name + ".hdf5"; + + faiss::MetricType metric_type; + size_t dim; + + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { + printf("Invalid ann test name: %s\n", ann_test_name.c_str()); + return; + } + + faiss::Index * index; + size_t d; + + std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); + try { + index = faiss::read_index(index_file_name.c_str()); + d = dim; + } + catch (...) { + printf("Cannot read index file: %s\n", index_file_name.c_str()); + + printf ("[%.3f s] Loading train set\n", elapsed() - t0); + + size_t nb; + float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + assert(d == dim || !"dataset does not have correct dimension"); + + printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", + elapsed() - t0, index_key.c_str(), d); + + index = faiss::index_factory(d, index_key.c_str(), metric_type); + + printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + + index->train(nb, xb); + + printf ("[%.3f s] Loading database\n", elapsed() - t0); + + // add index multiple times to get ~1G data set + for (int i = 0; i < index_add_loops; i++) { + printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + index->add(nb, xb); + } + + faiss::write_index(index, index_file_name.c_str()); + + delete [] xb; + } + + size_t nq; + float *xq; + { + printf ("[%.3f s] Loading queries\n", elapsed() - t0); + + size_t d2; + xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); + assert(d == d2 || !"query does not have same dimension as train set"); + } + + size_t k; // nb of results per query in the GT + faiss::Index::idx_t *gt; // nq * k matrix of ground-truth nearest-neighbors + { + printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + + // load ground-truth and convert int to long + size_t nq2; + int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth entries"); + + gt = new faiss::Index::idx_t[k * nq]; + for(int i = 0; i < k * nq; i++) { + gt[i] = gt_int[i]; + } + delete [] gt_int; + } + + for (auto nprobe : nprobes) { + + faiss::ParameterSpace params; + + printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + + std::string nprobe_str = "nprobe=" + std::to_string(nprobe); + params.set_index_parameters(index, nprobe_str.c_str()); + + // output buffers +#if 1 + const size_t NQ = 1000, K = 1000; + faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; + float *D = new float[NQ * K]; + + printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("====================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + double t_start = elapsed(), t_end; + + index->search(t_nq, xq, t_k, D, I); + + t_end = elapsed(); + + // k = 100 for ground truth + int hit = 0; + for (int i = 0; i < t_nq; i++) { + // count the num of results exist in ground truth result set + // consider: each result replicates DATA_LOOPS times + for (int j_c = 0; j_c < k; j_c++) { + int r_c = I[i * t_k + j_c]; + for (int j_g = 0; j_g < k/index_add_loops; j_g++) { + if (gt[i * k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + } + } + printf ("====================================================\n"); +#else + printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); + + faiss::Index::idx_t *I = new faiss::Index::idx_t[nq * k]; + float *D = new float[nq * k]; + + index->search(nq, xq, k, D, I); + + printf ("[%.3f s] Compute recalls\n", elapsed() - t0); + + // evaluate result by hand. + int n_1 = 0, n_10 = 0, n_100 = 0; + for(int i = 0; i < nq; i++) { + int gt_nn = gt[i * k]; + for(int j = 0; j < k; j++) { + if (I[i * k + j] == gt_nn) { + if(j < 1) n_1++; + if(j < 10) n_10++; + if(j < 100) n_100++; + } + } + } + printf("R@1 = %.4f\n", n_1 / float(nq)); + printf("R@10 = %.4f\n", n_10 / float(nq)); + printf("R@100 = %.4f\n", n_100 / float(nq)); +#endif + + printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete [] I; + delete [] D; + } + + delete [] xq; + delete [] gt; + delete index; +} + +#ifdef CUSTOMIZATION +void test_ivfsq8h_gpu(const std::string& ann_test_name, + int32_t index_add_loops, + const std::vector& nprobes){ + double t0 = elapsed(); + + const std::string ann_file_name = ann_test_name + ".hdf5"; + + faiss::MetricType metric_type; + size_t dim; + + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { + printf("Invalid ann test name: %s\n", ann_test_name.c_str()); + return; + } + + faiss::distance_compute_blas_threshold = 800; + faiss::gpu::StandardGpuResources res; + + const std::string index_key = "IVF16384,SQ8Hybrid"; + + faiss::Index* cpu_index = nullptr; + size_t d; + + std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); + try{ + cpu_index = faiss::read_index(index_file_name.c_str()); + d = dim; + } + catch (...){ + printf("Cannot read index file: %s\n", index_file_name.c_str()); + + printf ("[%.3f s] Loading train set\n", elapsed() - t0); + + size_t nb; + float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + assert(d == dim || !"dataset does not have correct dimension"); + + printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + + faiss::Index *ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); + + auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); + + printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + + device_index->train(nb, xb); + + printf ("[%.3f s] Loading database\n", elapsed() - t0); + + for (int i = 0; i < index_add_loops; i++) { + printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + device_index->add(nb, xb); + } + + cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::write_index(cpu_index, index_file_name.c_str()); + + delete []xb; + } + + faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); + if(cpu_ivf_index != nullptr) { + cpu_ivf_index->to_readonly(); + } + + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = cpu_index; + index_composition.quantizer = nullptr; + index_composition.mode = 1; + + auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + + size_t nq; + float *xq; + { + printf ("[%.3f s] Loading queries\n", elapsed() - t0); + + size_t d2; + xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); + assert(d == d2 || !"query does not have same dimension as train set"); + } + + size_t k; + faiss::Index::idx_t *gt; + { + printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + + size_t nq2; + int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth entries"); + + gt = new faiss::Index::idx_t[k * nq]; + for (unsigned long i = 0; i < k * nq; ++i) { + gt[i] = gt_int[i]; + } + delete []gt_int; + } + + for (auto nprobe : nprobes){ + printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", + elapsed() - t0, nprobe); + + auto ivf_index = dynamic_cast(cpu_index); + ivf_index->nprobe = nprobe; + + auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); + if(is_gpu_flat_index == nullptr) { + delete ivf_index->quantizer; + ivf_index->quantizer = index_composition.quantizer; + } + + const size_t NQ = 1000, K = 1000; + long *I = new faiss::Index::idx_t[NQ * K]; + float *D = new float[NQ * K]; + + printf ("\n%s %ld\n", index_key.c_str(), nprobe); + printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("====================================================\n"); + + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + double t_start = elapsed(), t_end; + + cpu_index->search(t_nq, xq, t_k, D, I); + + t_end = elapsed(); + + // k = 100 for ground truth + int hit = 0; + for (unsigned long i = 0; i < t_nq; i++) { + // count the num of results exist in ground truth result set + // consider: each result replicates DATA_LOOPS times + for (unsigned long j_c = 0; j_c < k; j_c++) { + int r_c = I[i * t_k + j_c]; + for (unsigned long j_g = 0; j_g < k/index_add_loops; j_g++) { + if (gt[i * k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + } + } + printf ("====================================================\n"); + + printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete [] I; + delete [] D; + } + + delete [] xq; + delete [] gt; + delete cpu_index; +} +#endif + +/************************************************************************************ + * https://github.com/erikbern/ann-benchmarks + * + * Dataset Dimensions Train_size Test_size Neighbors Distance Download + * Fashion- + * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) + * GIST 960 1,000,000 1,000 100 Euclidean HDF5 (3.6GB) + * GloVe 100 1,183,514 10,000 100 Angular HDF5 (463MB) + * GloVe 200 1,183,514 10,000 100 Angular HDF5 (918MB) + * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) + * NYTimes 256 290,000 10,000 100 Angular HDF5 (301MB) + * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) +*************************************************************************************/ + +TEST(FAISSTEST, sift1m_L2) { + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); +#ifdef CUSTOMIZATION + test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); +#endif + + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); +#ifdef CUSTOMIZATION + test_ivfsq8h_gpu("glove-200-angular", 2, {128, 1024}); +#endif +} + From 52ca4c4ae2578e270ca4b41a6f8f1f4e1b2a68d9 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 17:46:27 +0800 Subject: [PATCH 026/149] update test parameter Former-commit-id: 0871f4f0acd87280b5327caf4f14dae5b0d5e165 --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5ece23c7aa..5f787a415c 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -540,7 +540,7 @@ TEST(FAISSTEST, sift1m_L2) { test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); #ifdef CUSTOMIZATION - test_ivfsq8h_gpu("glove-200-angular", 2, {128, 1024}); + test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); #endif } From 4c7d590eb22323d1ef0f6a474a1f332d5c4ebd66 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 18:24:35 +0800 Subject: [PATCH 027/149] 9 update unittest name Former-commit-id: 4bd87de1ceee872b9de13c5e5c65bab7291eb9c7 --- .../index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5f787a415c..f1dc060825 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -469,10 +469,8 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, long *I = new faiss::Index::idx_t[NQ * K]; float *D = new float[NQ * K]; - printf ("\n%s %ld\n", index_key.c_str(), nprobe); printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); printf ("====================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} double t_start = elapsed(), t_end; @@ -528,7 +526,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) *************************************************************************************/ -TEST(FAISSTEST, sift1m_L2) { +TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); From 82ddcf8340b84a1eb2f4ea0209554a006f4dac6a Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 18:39:43 +0800 Subject: [PATCH 028/149] #89 display quant/search time Former-commit-id: 563141ab22274ca0e3e84253df8a79af58c16eca --- .../faiss_benchmark/faiss_benchmark_test.cpp | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index f1dc060825..5d63d63003 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -282,9 +282,12 @@ void test_ann_hdf5(const std::string& ann_test_name, float *D = new float[NQ * K]; printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("====================================================\n"); + printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + double t_start = elapsed(), t_end; index->search(t_nq, xq, t_k, D, I); @@ -306,11 +309,14 @@ void test_ann_hdf5(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), + faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, + (hit / float(t_nq * k / index_add_loops))); } } - printf ("====================================================\n"); + printf ("============================================================================================\n"); #else printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); @@ -470,9 +476,12 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, float *D = new float[NQ * K]; printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("====================================================\n"); + printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + double t_start = elapsed(), t_end; cpu_index->search(t_nq, xq, t_k, D, I); @@ -494,11 +503,14 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), + faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, + (hit / float(t_nq * k / index_add_loops))); } } - printf ("====================================================\n"); + printf ("============================================================================================\n"); printf ("[%.3f s] Search test done\n\n", elapsed() - t0); From e25caa572cc295ba55ca82c3eebc53c2c47e32d5 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 23 Oct 2019 20:19:45 +0800 Subject: [PATCH 029/149] remove unused files of wrapper test Former-commit-id: b2b30f0d09085207c70d386e19c3c1fb6bbc0e2f --- core/unittest/wrapper/CMakeLists.txt | 8 -- .../unittest/wrapper/appendix/log_config.conf | 27 ---- .../wrapper/appendix/server_config.yaml | 37 ------ core/unittest/wrapper/test_knowhere.cpp | 16 +-- core/unittest/wrapper/utils.cpp | 125 +++++++++++++----- core/unittest/wrapper/utils.h | 10 ++ 6 files changed, 111 insertions(+), 112 deletions(-) delete mode 100644 core/unittest/wrapper/appendix/log_config.conf delete mode 100644 core/unittest/wrapper/appendix/server_config.yaml diff --git a/core/unittest/wrapper/CMakeLists.txt b/core/unittest/wrapper/CMakeLists.txt index a8015f8d34..ef145a9f50 100644 --- a/core/unittest/wrapper/CMakeLists.txt +++ b/core/unittest/wrapper/CMakeLists.txt @@ -41,11 +41,3 @@ target_link_libraries(test_wrapper ${unittest_libs}) install(TARGETS test_wrapper DESTINATION unittest) - -configure_file(appendix/server_config.yaml - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/server_config.yaml" - COPYONLY) - -configure_file(appendix/log_config.conf - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/log_config.conf" - COPYONLY) \ No newline at end of file diff --git a/core/unittest/wrapper/appendix/log_config.conf b/core/unittest/wrapper/appendix/log_config.conf deleted file mode 100644 index 0a3e0d21af..0000000000 --- a/core/unittest/wrapper/appendix/log_config.conf +++ /dev/null @@ -1,27 +0,0 @@ -* GLOBAL: - FORMAT = "%datetime | %level | %logger | %msg" - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-global.log" - ENABLED = true - TO_FILE = true - TO_STANDARD_OUTPUT = false - SUBSECOND_PRECISION = 3 - PERFORMANCE_TRACKING = false - MAX_LOG_FILE_SIZE = 209715200 ## Throw log files away after 200MB -* DEBUG: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-debug.log" - ENABLED = true -* WARNING: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-warning.log" -* TRACE: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-trace.log" -* VERBOSE: - FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" - TO_FILE = false - TO_STANDARD_OUTPUT = false -## Error logs -* ERROR: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-error.log" -* FATAL: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-fatal.log" diff --git a/core/unittest/wrapper/appendix/server_config.yaml b/core/unittest/wrapper/appendix/server_config.yaml deleted file mode 100644 index f92b2f1a18..0000000000 --- a/core/unittest/wrapper/appendix/server_config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# All the following configurations are default values. - -server_config: - address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # port range: 1025 ~ 65534 - deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable - time_zone: UTC+8 - -db_config: - primary_path: /tmp/milvus # path used to store data and meta - secondary_path: # path used to store data only, split by semicolon - - backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database - # Keep 'dialect://:@:/', and replace other texts with real values. - # Replace 'dialect' with 'mysql' or 'sqlite' - - insert_buffer_size: 4 # GB, maximum insert buffer size allowed - build_index_gpu: 0 # gpu id used for building index - -metric_config: - enable_monitor: false # enable monitoring or not - collector: prometheus # prometheus - prometheus_config: - port: 8080 # port prometheus used to fetch metrics - -cache_config: - cpu_mem_capacity: 16 # GB, CPU memory used for cache - cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered - cache_insert_data: false # whether load inserted data into cache - -engine_config: - blas_threshold: 20 - -resource_config: - resource_pool: - - cpu - - gpu0 diff --git a/core/unittest/wrapper/test_knowhere.cpp b/core/unittest/wrapper/test_knowhere.cpp index e9b93fb63e..455fdbdebe 100644 --- a/core/unittest/wrapper/test_knowhere.cpp +++ b/core/unittest/wrapper/test_knowhere.cpp @@ -16,20 +16,16 @@ // under the License. #include "wrapper/KnowhereResource.h" +#include "wrapper/utils.h" #include "server/Config.h" #include -namespace { - -static const char* CONFIG_FILE_PATH = "./milvus/conf/server_config.yaml"; -static const char* LOG_FILE_PATH = "./milvus/conf/log_config.conf"; - -} // namespace - -TEST(KnowhereTest, KNOWHERE_RESOURCE_TEST) { - milvus::server::Config &config = milvus::server::Config::GetInstance(); - milvus::Status s = config.LoadConfigFile(CONFIG_FILE_PATH); +TEST_F(KnowhereTest, KNOWHERE_RESOURCE_TEST) { + std::string config_path(CONFIG_PATH); + config_path += CONFIG_FILE; + milvus::server::Config& config = milvus::server::Config::GetInstance(); + milvus::Status s = config.LoadConfigFile(config_path); ASSERT_TRUE(s.ok()); milvus::engine::KnowhereResource::Initialize(); diff --git a/core/unittest/wrapper/utils.cpp b/core/unittest/wrapper/utils.cpp index 445b7a2de6..6204ac0c05 100644 --- a/core/unittest/wrapper/utils.cpp +++ b/core/unittest/wrapper/utils.cpp @@ -18,13 +18,78 @@ #include #include +#include #include "wrapper/utils.h" +#include "utils/CommonUtil.h" + +namespace { +static const char + * CONFIG_STR = "# All the following configurations are default values.\n" + "\n" + "server_config:\n" + " address: 0.0.0.0 # milvus server ip address (IPv4)\n" + " port: 19530 # port range: 1025 ~ 65534\n" + " deploy_mode: single \n" + " time_zone: UTC+8\n" + "\n" + "db_config:\n" + " primary_path: /tmp/milvus # path used to store data and meta\n" + " secondary_path: # path used to store data only, split by semicolon\n" + "\n" + " backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database\n" + " \n" + " # Replace 'dialect' with 'mysql' or 'sqlite'\n" + "\n" + " insert_buffer_size: 4 # GB, maximum insert buffer size allowed\n" + "\n" + "metric_config:\n" + " enable_monitor: false # enable monitoring or not\n" + " collector: prometheus # prometheus\n" + " prometheus_config:\n" + " port: 8080 # port prometheus used to fetch metrics\n" + "\n" + "cache_config:\n" + " cpu_mem_capacity: 16 # GB, CPU memory used for cache\n" + " cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered\n" + " cache_insert_data: false # whether load inserted data into cache\n" + "\n" + "engine_config:\n" + " blas_threshold: 20\n" + "\n" + "resource_config:\n" + " resource_pool:\n" + " - gpu0\n" + " index_build_device: gpu0 # GPU used for building index"; void -DataGenBase::GenData(const int &dim, const int &nb, const int &nq, - float *xb, float *xq, int64_t *ids, - const int &k, int64_t *gt_ids, float *gt_dis) { +WriteToFile(const std::string& file_path, const char* content) { + std::fstream fs(file_path.c_str(), std::ios_base::out); + + //write data to file + fs << content; + fs.close(); +} + +} // namespace + +void +KnowhereTest::SetUp() { + std::string config_path(CONFIG_PATH); + milvus::server::CommonUtil::CreateDirectory(config_path); + WriteToFile(config_path + CONFIG_FILE, CONFIG_STR); +} + +void +KnowhereTest::TearDown() { + std::string config_path(CONFIG_PATH); + milvus::server::CommonUtil::DeleteDirectory(config_path); +} + +void +DataGenBase::GenData(const int& dim, const int& nb, const int& nq, + float* xb, float* xq, int64_t* ids, + const int& k, int64_t* gt_ids, float* gt_dis) { for (auto i = 0; i < nb; ++i) { for (auto j = 0; j < dim; ++j) { //p_data[i * d + j] = float(base + i); @@ -44,15 +109,15 @@ DataGenBase::GenData(const int &dim, const int &nb, const int &nq, } void -DataGenBase::GenData(const int &dim, - const int &nb, - const int &nq, - std::vector &xb, - std::vector &xq, - std::vector &ids, - const int &k, - std::vector >_ids, - std::vector >_dis) { +DataGenBase::GenData(const int& dim, + const int& nb, + const int& nq, + std::vector& xb, + std::vector& xq, + std::vector& ids, + const int& k, + std::vector& gt_ids, + std::vector& gt_dis) { xb.resize(nb * dim); xq.resize(nq * dim); ids.resize(nb); @@ -63,27 +128,27 @@ DataGenBase::GenData(const int &dim, void DataGenBase::AssertResult(const std::vector& ids, const std::vector& dis) { - EXPECT_EQ(ids.size(), nq * k); - EXPECT_EQ(dis.size(), nq * k); + EXPECT_EQ(ids.size(), nq * k); + EXPECT_EQ(dis.size(), nq * k); - for (auto i = 0; i < nq; i++) { - EXPECT_EQ(ids[i * k], gt_ids[i * k]); - //EXPECT_EQ(dis[i * k], gt_dis[i * k]); - } + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(ids[i * k], gt_ids[i * k]); + //EXPECT_EQ(dis[i * k], gt_dis[i * k]); + } - int match = 0; - for (int i = 0; i < nq; ++i) { - for (int j = 0; j < k; ++j) { - for (int l = 0; l < k; ++l) { - if (ids[i * nq + j] == gt_ids[i * nq + l]) match++; - } + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (ids[i * nq + j] == gt_ids[i * nq + l]) match++; } } + } - auto precision = float(match) / (nq * k); - EXPECT_GT(precision, 0.5); - std::cout << std::endl << "Precision: " << precision - << ", match: " << match - << ", total: " << nq * k - << std::endl; + auto precision = float(match) / (nq * k); + EXPECT_GT(precision, 0.5); + std::cout << std::endl << "Precision: " << precision + << ", match: " << match + << ", total: " << nq * k + << std::endl; } diff --git a/core/unittest/wrapper/utils.h b/core/unittest/wrapper/utils.h index 5a614543c9..0b9e422152 100644 --- a/core/unittest/wrapper/utils.h +++ b/core/unittest/wrapper/utils.h @@ -18,6 +18,7 @@ #pragma once +#include #include #include #include @@ -40,6 +41,15 @@ constexpr int64_t PINMEM = 1024 * 1024 * 200; constexpr int64_t TEMPMEM = 1024 * 1024 * 300; constexpr int64_t RESNUM = 2; +static const char *CONFIG_PATH = "/tmp/milvus_test"; +static const char *CONFIG_FILE = "/server_config.yaml"; + +class KnowhereTest : public ::testing::Test { + protected: + void SetUp() override; + void TearDown() override; +}; + class DataGenBase { public: virtual void GenData(const int& dim, const int& nb, const int& nq, float* xb, float* xq, int64_t* ids, From a6279d2acb1992b2fb53930e084ca3b5c7e77ee5 Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Wed, 23 Oct 2019 21:45:48 +0800 Subject: [PATCH 030/149] Speed up CMake build process Former-commit-id: 2edb2ae26d93ed9b7a8d25b89d30152e60257250 --- CHANGELOG.md | 3 +- core/cmake/DefineOptions.cmake | 23 +- core/cmake/ThirdPartyPackages.cmake | 707 +++--------------- core/src/CMakeLists.txt | 8 +- .../index/cmake/ThirdPartyPackagesCore.cmake | 23 +- core/src/sdk/CMakeLists.txt | 3 - core/thirdparty/versions.txt | 7 +- core/ubuntu_build_deps.sh | 3 +- core/unittest/CMakeLists.txt | 1 - core/unittest/server/CMakeLists.txt | 3 - 10 files changed, 125 insertions(+), 656 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0efa5cebcc..ebad4e2f32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,9 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement -- \#64 - Improvement dump function in scheduler +- \#64 - Improvement dump function in - \#82 - Move easyloggingpp into "external" directory +- \#92 - Speed up CMake build process ## Feature ## Task diff --git a/core/cmake/DefineOptions.cmake b/core/cmake/DefineOptions.cmake index 7aae177f0b..167b6e9d66 100644 --- a/core/cmake/DefineOptions.cmake +++ b/core/cmake/DefineOptions.cmake @@ -55,21 +55,10 @@ define_option_string(MILVUS_DEPENDENCY_SOURCE define_option(MILVUS_VERBOSE_THIRDPARTY_BUILD "Show output from ExternalProjects rather than just logging to files" ON) -define_option(MILVUS_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \ -Note that this requires linking Boost statically" OFF) - -define_option(MILVUS_BOOST_HEADER_ONLY "Use only BOOST headers" OFF) - -define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON) - define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON) -define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) - define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) -define_option(MILVUS_WITH_SNAPPY "Build with Snappy compression" ON) - define_option(MILVUS_WITH_SQLITE "Build with SQLite library" ON) define_option(MILVUS_WITH_SQLITE_ORM "Build with SQLite ORM library" ON) @@ -78,16 +67,6 @@ define_option(MILVUS_WITH_MYSQLPP "Build with MySQL++" ON) define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) -define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) - -if(CMAKE_VERSION VERSION_LESS 3.7) - set(MILVUS_WITH_ZSTD_DEFAULT OFF) -else() - # ExternalProject_Add(SOURCE_SUBDIR) is available since CMake 3.7. - set(MILVUS_WITH_ZSTD_DEFAULT ON) -endif() -define_option(MILVUS_WITH_ZSTD "Build with zstd compression" ${MILVUS_WITH_ZSTD_DEFAULT}) - if (MILVUS_ENABLE_PROFILING STREQUAL "ON") define_option(MILVUS_WITH_LIBUNWIND "Build with libunwind" ON) define_option(MILVUS_WITH_GPERFTOOLS "Build with gperftools" ON) @@ -95,6 +74,8 @@ endif() define_option(MILVUS_WITH_GRPC "Build with GRPC" ON) +define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) + #---------------------------------------------------------------------- if(MSVC) set_option_category("MSVC") diff --git a/core/cmake/ThirdPartyPackages.cmake b/core/cmake/ThirdPartyPackages.cmake index ade57c06ad..d0057d3c22 100644 --- a/core/cmake/ThirdPartyPackages.cmake +++ b/core/cmake/ThirdPartyPackages.cmake @@ -16,21 +16,16 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES - BOOST - BZip2 GTest - Lz4 MySQLPP Prometheus - Snappy SQLite SQLite_ORM yaml-cpp - ZLIB - ZSTD libunwind gperftools - GRPC) + GRPC + ZLIB) message(STATUS "Using ${MILVUS_DEPENDENCY_SOURCE} approach to find dependencies") @@ -42,34 +37,26 @@ foreach(DEPENDENCY ${MILVUS_THIRDPARTY_DEPENDENCIES}) endforeach() macro(build_dependency DEPENDENCY_NAME) - if("${DEPENDENCY_NAME}" STREQUAL "BZip2") - build_bzip2() - elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest") + if ("${DEPENDENCY_NAME}" STREQUAL "GTest") build_gtest() - elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4") - build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "MySQLPP") build_mysqlpp() elseif ("${DEPENDENCY_NAME}" STREQUAL "Prometheus") build_prometheus() - elseif ("${DEPENDENCY_NAME}" STREQUAL "Snappy") - build_snappy() elseif ("${DEPENDENCY_NAME}" STREQUAL "SQLite") build_sqlite() elseif ("${DEPENDENCY_NAME}" STREQUAL "SQLite_ORM") build_sqlite_orm() elseif("${DEPENDENCY_NAME}" STREQUAL "yaml-cpp") build_yamlcpp() - elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") - build_zlib() - elseif("${DEPENDENCY_NAME}" STREQUAL "ZSTD") - build_zstd() elseif("${DEPENDENCY_NAME}" STREQUAL "libunwind") build_libunwind() elseif("${DEPENDENCY_NAME}" STREQUAL "gperftools") build_gperftools() elseif("${DEPENDENCY_NAME}" STREQUAL "GRPC") build_grpc() + elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") + build_zlib() else() message(FATAL_ERROR "Unknown thirdparty dependency to build: ${DEPENDENCY_NAME}") endif () @@ -263,23 +250,6 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) set(${_LIB_NAME} "${_LIB_VERSION}") endforeach() -if(DEFINED ENV{MILVUS_BOOST_URL}) - set(BOOST_SOURCE_URL "$ENV{MILVUS_BOOST_URL}") -else() - string(REPLACE "." "_" BOOST_VERSION_UNDERSCORES ${BOOST_VERSION}) - set(BOOST_SOURCE_URL - "https://nchc.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION_UNDERSCORES}.tar.gz") - #"https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION_UNDERSCORES}.tar.gz") -endif() -set(BOOST_MD5 "fea771fe8176828fabf9c09242ee8c26") - -if(DEFINED ENV{MILVUS_BZIP2_URL}) - set(BZIP2_SOURCE_URL "$ENV{MILVUS_BZIP2_URL}") -else() - set(BZIP2_SOURCE_URL "https://sourceware.org/pub/bzip2/bzip2-${BZIP2_VERSION}.tar.gz") -endif() -set(BZIP2_MD5 "00b516f4704d4a7cb50a1d97e6e8e15b") - if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () @@ -288,13 +258,6 @@ else () endif() set(GTEST_MD5 "2e6fbeb6a91310a16efe181886c59596") -if(DEFINED ENV{MILVUS_LZ4_URL}) - set(LZ4_SOURCE_URL "$ENV{MILVUS_LZ4_URL}") -else() - set(LZ4_SOURCE_URL "https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz") -endif() -set(LZ4_MD5 "a80f28f2a2e5fe59ebfe8407f793da22") - if(DEFINED ENV{MILVUS_MYSQLPP_URL}) set(MYSQLPP_SOURCE_URL "$ENV{MILVUS_MYSQLPP_URL}") else() @@ -309,14 +272,6 @@ else () https://github.com/jupp0r/prometheus-cpp.git) endif() -if(DEFINED ENV{MILVUS_SNAPPY_URL}) - set(SNAPPY_SOURCE_URL "$ENV{MILVUS_SNAPPY_URL}") -else() - set(SNAPPY_SOURCE_URL - "https://github.com/google/snappy/archive/${SNAPPY_VERSION}.tar.gz") -endif() -set(SNAPPY_MD5 "ee9086291c9ae8deb4dac5e0b85bf54a") - if(DEFINED ENV{MILVUS_SQLITE_URL}) set(SQLITE_SOURCE_URL "$ENV{MILVUS_SQLITE_URL}") else() @@ -329,7 +284,6 @@ if(DEFINED ENV{MILVUS_SQLITE_ORM_URL}) set(SQLITE_ORM_SOURCE_URL "$ENV{MILVUS_SQLITE_ORM_URL}") else() set(SQLITE_ORM_SOURCE_URL -# "http://192.168.1.105:6060/Test/sqlite_orm/-/archive/master/sqlite_orm-master.zip") "https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.zip") endif() set(SQLITE_ORM_MD5 "ba9a405a8a1421c093aa8ce988ff8598") @@ -341,20 +295,6 @@ else() endif() set(YAMLCPP_MD5 "5b943e9af0060d0811148b037449ef82") -if(DEFINED ENV{MILVUS_ZLIB_URL}) - set(ZLIB_SOURCE_URL "$ENV{MILVUS_ZLIB_URL}") -else() - set(ZLIB_SOURCE_URL "https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz") -endif() -set(ZLIB_MD5 "0095d2d2d1f3442ce1318336637b695f") - -if(DEFINED ENV{MILVUS_ZSTD_URL}) - set(ZSTD_SOURCE_URL "$ENV{MILVUS_ZSTD_URL}") -else() - set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz") -endif() -set(ZSTD_MD5 "340c837db48354f8d5eafe74c6077120") - if(DEFINED ENV{MILVUS_LIBUNWIND_URL}) set(LIBUNWIND_SOURCE_URL "$ENV{MILVUS_LIBUNWIND_URL}") else() @@ -379,202 +319,12 @@ else() endif() set(GRPC_MD5 "0362ba219f59432c530070b5f5c3df73") - -# ---------------------------------------------------------------------- -# Add Boost dependencies (code adapted from Apache Kudu (incubating)) - -set(Boost_USE_MULTITHREADED ON) -set(Boost_ADDITIONAL_VERSIONS - "1.70.0" - "1.70" - "1.69.0" - "1.69" - "1.68.0" - "1.68" - "1.67.0" - "1.67" - "1.66.0" - "1.66" - "1.65.0" - "1.65" - "1.64.0" - "1.64" - "1.63.0" - "1.63" - "1.62.0" - "1.61" - "1.61.0" - "1.62" - "1.60.0" - "1.60") - -if(MILVUS_BOOST_VENDORED) - set(BOOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-prefix/src/boost_ep") - set(BOOST_LIB_DIR "${BOOST_PREFIX}/stage/lib") - set(BOOST_BUILD_LINK "static") - set(BOOST_STATIC_SYSTEM_LIBRARY - "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_system${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set(BOOST_STATIC_FILESYSTEM_LIBRARY - "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_filesystem${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set(BOOST_STATIC_SERIALIZATION_LIBRARY - "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_serialization${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set(BOOST_SYSTEM_LIBRARY boost_system_static) - set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static) - set(BOOST_SERIALIZATION_LIBRARY boost_serialization_static) - - if(MILVUS_BOOST_HEADER_ONLY) - set(BOOST_BUILD_PRODUCTS) - set(BOOST_CONFIGURE_COMMAND "") - set(BOOST_BUILD_COMMAND "") - else() - set(BOOST_BUILD_PRODUCTS ${BOOST_STATIC_SYSTEM_LIBRARY} - ${BOOST_STATIC_FILESYSTEM_LIBRARY} ${BOOST_STATIC_SERIALIZATION_LIBRARY}) - set(BOOST_CONFIGURE_COMMAND "./bootstrap.sh" "--prefix=${BOOST_PREFIX}" - "--with-libraries=filesystem,serialization,system") - if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(BOOST_BUILD_VARIANT "debug") - else() - set(BOOST_BUILD_VARIANT "release") - endif() - set(BOOST_BUILD_COMMAND - "./b2" - "link=${BOOST_BUILD_LINK}" - "variant=${BOOST_BUILD_VARIANT}" - "cxxflags=-fPIC") - - add_thirdparty_lib(boost_system STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}") - - add_thirdparty_lib(boost_filesystem STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}") - - add_thirdparty_lib(boost_serialization STATIC_LIB "${BOOST_STATIC_SERIALIZATION_LIBRARY}") - - set(MILVUS_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY} ${BOOST_STATIC_SERIALIZATION_LIBRARY}) - endif() - externalproject_add(boost_ep - URL - ${BOOST_SOURCE_URL} - BUILD_BYPRODUCTS - ${BOOST_BUILD_PRODUCTS} - BUILD_IN_SOURCE - 1 - CONFIGURE_COMMAND - ${BOOST_CONFIGURE_COMMAND} - BUILD_COMMAND - ${BOOST_BUILD_COMMAND} - INSTALL_COMMAND - "" - ${EP_LOG_OPTIONS}) - - - set(Boost_INCLUDE_DIR "${BOOST_PREFIX}") - set(Boost_INCLUDE_DIRS "${Boost_INCLUDE_DIR}") - add_dependencies(boost_system_static boost_ep) - add_dependencies(boost_filesystem_static boost_ep) - add_dependencies(boost_serialization_static boost_ep) - -endif() - -include_directories(SYSTEM ${Boost_INCLUDE_DIR}) -link_directories(SYSTEM ${BOOST_LIB_DIR}) - -# ---------------------------------------------------------------------- -# bzip2 - -macro(build_bzip2) - message(STATUS "Building BZip2-${BZIP2_VERSION} from source") - set(BZIP2_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/bzip2_ep-prefix/src/bzip2_ep") - set(BZIP2_INCLUDE_DIR "${BZIP2_PREFIX}/include") - set(BZIP2_STATIC_LIB - "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}") - - if(USE_JFROG_CACHE STREQUAL "ON") - set(BZIP2_CACHE_PACKAGE_NAME "bzip2_${BZIP2_MD5}.tar.gz") - set(BZIP2_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${BZIP2_CACHE_PACKAGE_NAME}") - set(BZIP2_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${BZIP2_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${BZIP2_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote cache file ${BZIP2_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(bzip2_ep - ${EP_LOG_OPTIONS} - CONFIGURE_COMMAND - "" - BUILD_IN_SOURCE - 1 - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - CFLAGS=${EP_C_FLAGS} - INSTALL_COMMAND - ${MAKE} - install - PREFIX=${BZIP2_PREFIX} - CFLAGS=${EP_C_FLAGS} - INSTALL_DIR - ${BZIP2_PREFIX} - URL - ${BZIP2_SOURCE_URL} - BUILD_BYPRODUCTS - "${BZIP2_STATIC_LIB}") - - ExternalProject_Create_Cache(bzip2_ep ${BZIP2_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/bzip2_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${BZIP2_CACHE_URL}) - else() - file(DOWNLOAD ${BZIP2_CACHE_URL} ${BZIP2_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${BZIP2_CACHE_URL} TO ${BZIP2_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(bzip2_ep ${BZIP2_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(bzip2_ep - ${EP_LOG_OPTIONS} - CONFIGURE_COMMAND - "" - BUILD_IN_SOURCE - 1 - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - CFLAGS=${EP_C_FLAGS} - INSTALL_COMMAND - ${MAKE} - install - PREFIX=${BZIP2_PREFIX} - CFLAGS=${EP_C_FLAGS} - INSTALL_DIR - ${BZIP2_PREFIX} - URL - ${BZIP2_SOURCE_URL} - BUILD_BYPRODUCTS - "${BZIP2_STATIC_LIB}") - endif() - - file(MAKE_DIRECTORY "${BZIP2_INCLUDE_DIR}") - add_library(bzip2 STATIC IMPORTED) - set_target_properties( - bzip2 - PROPERTIES IMPORTED_LOCATION "${BZIP2_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}") - - add_dependencies(bzip2 bzip2_ep) -endmacro() - -if(MILVUS_WITH_BZ2) - resolve_dependency(BZip2) - - if(NOT TARGET bzip2) - add_library(bzip2 UNKNOWN IMPORTED) - set_target_properties(bzip2 - PROPERTIES IMPORTED_LOCATION "${BZIP2_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}") - endif() - link_directories(SYSTEM ${BZIP2_PREFIX}/lib/) - include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") +if(DEFINED ENV{MILVUS_ZLIB_URL}) + set(ZLIB_SOURCE_URL "$ENV{MILVUS_ZLIB_URL}") +else() + set(ZLIB_SOURCE_URL "https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz") endif() +set(ZLIB_MD5 "0095d2d2d1f3442ce1318336637b695f") # ---------------------------------------------------------------------- # Google gtest @@ -689,95 +439,6 @@ if (MILVUS_BUILD_TESTS) include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) endif() -# ---------------------------------------------------------------------- -# lz4 - -macro(build_lz4) - message(STATUS "Building lz4-${LZ4_VERSION} from source") - set(LZ4_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix/src/lz4_ep") - set(LZ4_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix/") - - set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a") - set(LZ4_BUILD_COMMAND BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS} CFLAGS=${EP_C_FLAGS}) - - # We need to copy the header in lib to directory outside of the build - if(USE_JFROG_CACHE STREQUAL "ON") - set(LZ4_CACHE_PACKAGE_NAME "lz4_${LZ4_MD5}.tar.gz") - set(LZ4_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${LZ4_CACHE_PACKAGE_NAME}") - set(LZ4_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${LZ4_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${LZ4_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${LZ4_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(lz4_ep - URL - ${LZ4_SOURCE_URL} - ${EP_LOG_OPTIONS} - UPDATE_COMMAND - ${CMAKE_COMMAND} - -E - copy_directory - "${LZ4_BUILD_DIR}/lib" - "${LZ4_PREFIX}/include" - ${LZ4_PATCH_COMMAND} - CONFIGURE_COMMAND - "" - INSTALL_COMMAND - "" - BINARY_DIR - ${LZ4_BUILD_DIR} - BUILD_BYPRODUCTS - ${LZ4_STATIC_LIB} - ${LZ4_BUILD_COMMAND}) - - ExternalProject_Create_Cache(lz4_ep ${LZ4_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${LZ4_CACHE_URL}) - else() - file(DOWNLOAD ${LZ4_CACHE_URL} ${LZ4_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${LZ4_CACHE_URL} TO ${LZ4_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(lz4_ep ${LZ4_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(lz4_ep - URL - ${LZ4_SOURCE_URL} - ${EP_LOG_OPTIONS} - UPDATE_COMMAND - ${CMAKE_COMMAND} - -E - copy_directory - "${LZ4_BUILD_DIR}/lib" - "${LZ4_PREFIX}/include" - ${LZ4_PATCH_COMMAND} - CONFIGURE_COMMAND - "" - INSTALL_COMMAND - "" - BINARY_DIR - ${LZ4_BUILD_DIR} - BUILD_BYPRODUCTS - ${LZ4_STATIC_LIB} - ${LZ4_BUILD_COMMAND}) - endif() - - file(MAKE_DIRECTORY "${LZ4_PREFIX}/include") - add_library(lz4 STATIC IMPORTED) - set_target_properties(lz4 - PROPERTIES IMPORTED_LOCATION "${LZ4_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${LZ4_PREFIX}/include") - add_dependencies(lz4 lz4_ep) -endmacro() - -if(MILVUS_WITH_LZ4) - resolve_dependency(Lz4) - - get_target_property(LZ4_INCLUDE_DIR lz4 INTERFACE_INCLUDE_DIRECTORIES) - link_directories(SYSTEM ${LZ4_BUILD_DIR}/lib/) - include_directories(SYSTEM ${LZ4_INCLUDE_DIR}) -endif() - # ---------------------------------------------------------------------- # MySQL++ @@ -996,93 +657,6 @@ if(MILVUS_WITH_PROMETHEUS) endif() -# ---------------------------------------------------------------------- -# Snappy - -macro(build_snappy) - message(STATUS "Building snappy-${SNAPPY_VERSION} from source") - set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep-prefix/src/snappy_ep") - set(SNAPPY_INCLUDE_DIRS "${SNAPPY_PREFIX}/include") - set(SNAPPY_STATIC_LIB_NAME snappy) - set(SNAPPY_STATIC_LIB - "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - - set(SNAPPY_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - -DCMAKE_INSTALL_LIBDIR=lib - -DSNAPPY_BUILD_TESTS=OFF - "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}") - - if(USE_JFROG_CACHE STREQUAL "ON") - set(SNAPPY_CACHE_PACKAGE_NAME "snappy_${SNAPPY_MD5}.tar.gz") - set(SNAPPY_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${SNAPPY_CACHE_PACKAGE_NAME}") - set(SNAPPY_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${SNAPPY_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${SNAPPY_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${SNAPPY_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(snappy_ep - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_IN_SOURCE - 1 - INSTALL_DIR - ${SNAPPY_PREFIX} - URL - ${SNAPPY_SOURCE_URL} - CMAKE_ARGS - ${SNAPPY_CMAKE_ARGS} - BUILD_BYPRODUCTS - "${SNAPPY_STATIC_LIB}") - - ExternalProject_Create_Cache(snappy_ep ${SNAPPY_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${SNAPPY_CACHE_URL}) - else() - file(DOWNLOAD ${SNAPPY_CACHE_URL} ${SNAPPY_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${SNAPPY_CACHE_URL} TO ${SNAPPY_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(snappy_ep ${SNAPPY_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(snappy_ep - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_IN_SOURCE - 1 - INSTALL_DIR - ${SNAPPY_PREFIX} - URL - ${SNAPPY_SOURCE_URL} - CMAKE_ARGS - ${SNAPPY_CMAKE_ARGS} - BUILD_BYPRODUCTS - "${SNAPPY_STATIC_LIB}") - endif() - - file(MAKE_DIRECTORY "${SNAPPY_INCLUDE_DIR}") - add_library(snappy STATIC IMPORTED) - set_target_properties(snappy - PROPERTIES IMPORTED_LOCATION "${SNAPPY_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES - "${SNAPPY_INCLUDE_DIR}") - add_dependencies(snappy snappy_ep) -endmacro() - -if(MILVUS_WITH_SNAPPY) - - resolve_dependency(Snappy) - - get_target_property(SNAPPY_INCLUDE_DIRS snappy INTERFACE_INCLUDE_DIRECTORIES) - link_directories(SYSTEM ${SNAPPY_PREFIX}/lib/) - include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS}) -endif() - # ---------------------------------------------------------------------- # SQLite @@ -1265,176 +839,6 @@ if(MILVUS_WITH_YAMLCPP) include_directories(SYSTEM ${YAMLCPP_INCLUDE_DIR}) endif() -# ---------------------------------------------------------------------- -# zlib - -macro(build_zlib) - message(STATUS "Building ZLIB-${ZLIB_VERSION} from source") - set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix/src/zlib_ep") - set(ZLIB_STATIC_LIB_NAME libz.a) - set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") - set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") - set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}" - -DBUILD_SHARED_LIBS=OFF) - - if(USE_JFROG_CACHE STREQUAL "ON") - set(ZLIB_CACHE_PACKAGE_NAME "zlib_${ZLIB_MD5}.tar.gz") - set(ZLIB_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${ZLIB_CACHE_PACKAGE_NAME}") - set(ZLIB_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${ZLIB_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${ZLIB_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${ZLIB_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(zlib_ep - URL - ${ZLIB_SOURCE_URL} - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_BYPRODUCTS - "${ZLIB_STATIC_LIB}" - CMAKE_ARGS - ${ZLIB_CMAKE_ARGS}) - - ExternalProject_Create_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${ZLIB_CACHE_URL}) - else() - file(DOWNLOAD ${ZLIB_CACHE_URL} ${ZLIB_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${ZLIB_CACHE_URL} TO ${ZLIB_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(zlib_ep - URL - ${ZLIB_SOURCE_URL} - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_BYPRODUCTS - "${ZLIB_STATIC_LIB}" - CMAKE_ARGS - ${ZLIB_CMAKE_ARGS}) - endif() - - file(MAKE_DIRECTORY "${ZLIB_INCLUDE_DIR}") - add_library(zlib STATIC IMPORTED) - set_target_properties(zlib - PROPERTIES IMPORTED_LOCATION "${ZLIB_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${ZLIB_INCLUDE_DIR}") - - add_dependencies(zlib zlib_ep) -endmacro() - -if(MILVUS_WITH_ZLIB) - resolve_dependency(ZLIB) - - get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) - include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) -endif() - -# ---------------------------------------------------------------------- -# zstd - -macro(build_zstd) - message(STATUS "Building zstd-${ZSTD_VERSION} from source") - set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix/src/zstd_ep") - - set(ZSTD_CMAKE_ARGS - ${EP_COMMON_TOOLCHAIN} - "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}" - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_INSTALL_LIBDIR=lib #${CMAKE_INSTALL_LIBDIR} - -DZSTD_BUILD_PROGRAMS=off - -DZSTD_BUILD_SHARED=off - -DZSTD_BUILD_STATIC=on - -DZSTD_MULTITHREAD_SUPPORT=off) - - - set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/libzstd.a") - set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include") - set(ZSTD_CMAKE_ARGS - ${ZSTD_CMAKE_ARGS} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_FLAGS=${EP_C_FLAGS} - -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}) - - if(CMAKE_VERSION VERSION_LESS 3.7) - message(FATAL_ERROR "Building zstd using ExternalProject requires at least CMake 3.7") - endif() - - if(USE_JFROG_CACHE STREQUAL "ON") - set(ZSTD_CACHE_PACKAGE_NAME "zstd_${ZSTD_MD5}.tar.gz") - set(ZSTD_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${ZSTD_CACHE_PACKAGE_NAME}") - set(ZSTD_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${ZSTD_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${ZSTD_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${ZSTD_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(zstd_ep - ${EP_LOG_OPTIONS} - CMAKE_ARGS - ${ZSTD_CMAKE_ARGS} - SOURCE_SUBDIR - "build/cmake" - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - INSTALL_DIR - ${ZSTD_PREFIX} - URL - ${ZSTD_SOURCE_URL} - BUILD_BYPRODUCTS - "${ZSTD_STATIC_LIB}") - - ExternalProject_Create_Cache(zstd_ep ${ZSTD_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${ZSTD_CACHE_URL}) - else() - file(DOWNLOAD ${ZSTD_CACHE_URL} ${ZSTD_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${ZSTD_CACHE_URL} TO ${ZSTD_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(zstd_ep ${ZSTD_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(zstd_ep - ${EP_LOG_OPTIONS} - CMAKE_ARGS - ${ZSTD_CMAKE_ARGS} - SOURCE_SUBDIR - "build/cmake" - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - INSTALL_DIR - ${ZSTD_PREFIX} - URL - ${ZSTD_SOURCE_URL} - BUILD_BYPRODUCTS - "${ZSTD_STATIC_LIB}") - endif() - - file(MAKE_DIRECTORY "${ZSTD_INCLUDE_DIR}") - add_library(zstd STATIC IMPORTED) - set_target_properties(zstd - PROPERTIES IMPORTED_LOCATION "${ZSTD_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIR}") - - add_dependencies(zstd zstd_ep) -endmacro() - -if(MILVUS_WITH_ZSTD) - resolve_dependency(ZSTD) - - get_target_property(ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) - link_directories(SYSTEM ${ZSTD_PREFIX}/lib) - include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) -endif() - # ---------------------------------------------------------------------- # libunwind @@ -1637,6 +1041,8 @@ macro(build_grpc) ${GRPC_PROTOBUF_STATIC_LIB} ${GRPC_PROTOC_STATIC_LIB}) + ExternalProject_Add_StepDependencies(grpc_ep build zlib_ep) + ExternalProject_Create_Cache(grpc_ep ${GRPC_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/grpc_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${GRPC_CACHE_URL}) else() file(DOWNLOAD ${GRPC_CACHE_URL} ${GRPC_CACHE_PACKAGE_PATH} STATUS status) @@ -1665,6 +1071,9 @@ macro(build_grpc) ${GRPCPP_CHANNELZ_STATIC_LIB} ${GRPC_PROTOBUF_STATIC_LIB} ${GRPC_PROTOC_STATIC_LIB}) + + ExternalProject_Add_StepDependencies(grpc_ep build zlib_ep) + endif() file(MAKE_DIRECTORY "${GRPC_INCLUDE_DIR}") @@ -1672,25 +1081,30 @@ macro(build_grpc) add_library(grpc STATIC IMPORTED) set_target_properties(grpc PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpc++ STATIC IMPORTED) set_target_properties(grpc++ PROPERTIES IMPORTED_LOCATION "${GRPC++_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpcpp_channelz STATIC IMPORTED) set_target_properties(grpcpp_channelz PROPERTIES IMPORTED_LOCATION "${GRPCPP_CHANNELZ_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpc_protobuf STATIC IMPORTED) set_target_properties(grpc_protobuf - PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOBUF_STATIC_LIB}") + PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOBUF_STATIC_LIB}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpc_protoc STATIC IMPORTED) set_target_properties(grpc_protoc - PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOC_STATIC_LIB}") + PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOC_STATIC_LIB}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_dependencies(grpc grpc_ep) add_dependencies(grpc++ grpc_ep) @@ -1710,3 +1124,74 @@ if(MILVUS_WITH_GRPC) include_directories(SYSTEM ${GRPC_THIRD_PARTY_DIR}/protobuf/src) link_directories(SYSTEM ${GRPC_PROTOBUF_LIB_DIR}) endif() + +# ---------------------------------------------------------------------- +# zlib + +macro(build_zlib) + message(STATUS "Building ZLIB-${ZLIB_VERSION} from source") + set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix/src/zlib_ep") + set(ZLIB_STATIC_LIB_NAME libz.a) + set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") + set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") + set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}" + -DBUILD_SHARED_LIBS=OFF) + + if(USE_JFROG_CACHE STREQUAL "ON") + set(ZLIB_CACHE_PACKAGE_NAME "zlib_${ZLIB_MD5}.tar.gz") + set(ZLIB_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${ZLIB_CACHE_PACKAGE_NAME}") + set(ZLIB_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${ZLIB_CACHE_PACKAGE_NAME}") + + execute_process(COMMAND wget -q --method HEAD ${ZLIB_CACHE_URL} RESULT_VARIABLE return_code) + message(STATUS "Check the remote file ${ZLIB_CACHE_URL}. return code = ${return_code}") + if (NOT return_code EQUAL 0) + externalproject_add(zlib_ep + URL + ${ZLIB_SOURCE_URL} + ${EP_LOG_OPTIONS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + "${ZLIB_STATIC_LIB}" + CMAKE_ARGS + ${ZLIB_CMAKE_ARGS}) + + ExternalProject_Create_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${ZLIB_CACHE_URL}) + else() + file(DOWNLOAD ${ZLIB_CACHE_URL} ${ZLIB_CACHE_PACKAGE_PATH} STATUS status) + list(GET status 0 status_code) + message(STATUS "DOWNLOADING FROM ${ZLIB_CACHE_URL} TO ${ZLIB_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") + if (status_code EQUAL 0) + ExternalProject_Use_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) + endif() + endif() + else() + externalproject_add(zlib_ep + URL + ${ZLIB_SOURCE_URL} + ${EP_LOG_OPTIONS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + "${ZLIB_STATIC_LIB}" + CMAKE_ARGS + ${ZLIB_CMAKE_ARGS}) + endif() + + file(MAKE_DIRECTORY "${ZLIB_INCLUDE_DIR}") + add_library(zlib STATIC IMPORTED) + set_target_properties(zlib + PROPERTIES IMPORTED_LOCATION "${ZLIB_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ZLIB_INCLUDE_DIR}") + + add_dependencies(zlib zlib_ep) +endmacro() + +if(MILVUS_WITH_ZLIB) + resolve_dependency(ZLIB) + + get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) +endif() diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index d086955078..ae3a458987 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -120,14 +120,10 @@ set(third_party_libs ${client_grpc_lib} yaml-cpp ${prometheus_lib} - ${boost_lib} - bzip2 - lz4 - snappy - zlib - zstd ${cuda_lib} mysqlpp + zlib + ${boost_lib} ) if (MILVUS_ENABLE_PROFILING STREQUAL "ON") diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index 7e9bb0b671..0712966d9c 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -299,12 +299,29 @@ macro(build_arrow) ${EP_COMMON_CMAKE_ARGS} -DARROW_BUILD_STATIC=ON -DARROW_BUILD_SHARED=OFF - -DARROW_PARQUET=OFF -DARROW_USE_GLOG=OFF -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX} - "-DCMAKE_LIBRARY_PATH=${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs" + -DARROW_CUDA=OFF + -DARROW_FLIGHT=OFF + -DARROW_GANDIVA=OFF + -DARROW_GANDIVA_JAVA=OFF + -DARROW_HDFS=OFF + -DARROW_HIVESERVER2=OFF + -DARROW_ORC=OFF + -DARROW_PARQUET=OFF + -DARROW_PLASMA=OFF + -DARROW_PLASMA_JAVA_CLIENT=OFF + -DARROW_PYTHON=OFF + -DARROW_WITH_BZ2=OFF + -DARROW_WITH_ZLIB=OFF + -DARROW_WITH_LZ4=OFF + -DARROW_WITH_SNAPPY=OFF + -DARROW_WITH_ZSTD=OFF + -DARROW_WITH_BROTLI=OFF -DCMAKE_BUILD_TYPE=Release - -DARROW_DEPENDENCY_SOURCE=BUNDLED) #Build all arrow dependencies from source instead of calling find_package first + -DARROW_DEPENDENCY_SOURCE=BUNDLED #Build all arrow dependencies from source instead of calling find_package first + -DBOOST_SOURCE=AUTO #try to find BOOST in the system default locations and build from source if not found + ) if(USE_JFROG_CACHE STREQUAL "ON") diff --git a/core/src/sdk/CMakeLists.txt b/core/src/sdk/CMakeLists.txt index a2991a49b4..c68712d34c 100644 --- a/core/src/sdk/CMakeLists.txt +++ b/core/src/sdk/CMakeLists.txt @@ -30,9 +30,6 @@ add_library(milvus_sdk STATIC target_link_libraries(milvus_sdk ${client_grpc_lib} - bzip2 - lz4 - snappy zlib ) diff --git a/core/thirdparty/versions.txt b/core/thirdparty/versions.txt index ec270c0670..4faaf119e4 100644 --- a/core/thirdparty/versions.txt +++ b/core/thirdparty/versions.txt @@ -1,18 +1,13 @@ -BOOST_VERSION=1.70.0 -BZIP2_VERSION=1.0.6 EASYLOGGINGPP_VERSION=v9.96.7 GTEST_VERSION=1.8.1 -LZ4_VERSION=v1.9.1 MYSQLPP_VERSION=3.2.4 PROMETHEUS_VERSION=v0.7.0 -SNAPPY_VERSION=1.1.7 SQLITE_VERSION=3280000 SQLITE_ORM_VERSION=master YAMLCPP_VERSION=0.6.2 -ZLIB_VERSION=v1.2.11 -ZSTD_VERSION=v1.4.0 LIBUNWIND_VERSION=1.3.1 GPERFTOOLS_VERSION=2.7 GRPC_VERSION=master +ZLIB_VERSION=v1.2.11 # vim: set filetype=sh: diff --git a/core/ubuntu_build_deps.sh b/core/ubuntu_build_deps.sh index ed9eb9dee5..e454a147ac 100755 --- a/core/ubuntu_build_deps.sh +++ b/core/ubuntu_build_deps.sh @@ -1,5 +1,6 @@ #!/bin/bash -sudo apt-get install -y gfortran libmysqlclient-dev mysql-client libcurl4-openssl-dev libboost-system-dev libboost-filesystem-dev libboost-serialization-dev +sudo apt-get install -y gfortran libmysqlclient-dev mysql-client libcurl4-openssl-dev libboost-system-dev \ +libboost-filesystem-dev libboost-serialization-dev libboost-regex-dev sudo ln -s /usr/lib/x86_64-linux-gnu/libmysqlclient.so /usr/lib/x86_64-linux-gnu/libmysqlclient_r.so diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index aae7fb8d7f..62b5bdf256 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -102,7 +102,6 @@ set(unittest_libs sqlite libboost_system.a libboost_filesystem.a - lz4 mysqlpp yaml-cpp gtest diff --git a/core/unittest/server/CMakeLists.txt b/core/unittest/server/CMakeLists.txt index 180dcfa6d5..1f89de8d3f 100644 --- a/core/unittest/server/CMakeLists.txt +++ b/core/unittest/server/CMakeLists.txt @@ -59,9 +59,6 @@ set(client_grpc_lib target_link_libraries(test_server knowhere stdc++ - snappy - bz2 - zstd ${client_grpc_lib} ${unittest_libs} ) From 581c662b61c99a94f4969c9b56c7fc1c0e3654f2 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 00:52:33 +0800 Subject: [PATCH 031/149] remove sq8h Former-commit-id: 31deed25ae121396fe8352efad36609452ac6c01 --- tests/milvus_python_test/test_add_vectors.py | 8 ++++---- tests/milvus_python_test/test_index.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index 51c12dcd87..e223eaa5f6 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -573,7 +573,7 @@ class TestAddBase: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): + for i in range(20): table_name = gen_unique_str('test_add_vector_multi_tables') table_list.append(table_name) param = {'table_name': table_name, @@ -581,9 +581,9 @@ class TestAddBase: 'index_file_size': index_file_size, 'metric_type': MetricType.L2} connect.create_table(param) - time.sleep(2) - for j in range(10): - for i in range(50): + time.sleep(5) + for j in range(5): + for i in range(20): status, ids = connect.add_vectors(table_name=table_list[i], records=vectors) assert status.OK() diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 9e9f0830ac..76774c0cf9 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -37,7 +37,10 @@ class TestIndexBase: params=gen_simple_index_params() ) def get_simple_index_params(self, request): - yield request.param + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param """ ****************************************************************** @@ -515,7 +518,10 @@ class TestIndexIP: params=gen_simple_index_params() ) def get_simple_index_params(self, request): - yield request.param + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param """ ****************************************************************** From c01107e2c7cb40d65ea49e2527ecafc5f99f0695 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 01:00:20 +0800 Subject: [PATCH 032/149] re-define case level Former-commit-id: 6c2ae8329c1ea9e22f0cba75d6603987a874b8c8 --- tests/milvus_python_test/test_add_vectors.py | 4 +++- tests/milvus_python_test/test_index.py | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index e223eaa5f6..e33328625a 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -407,6 +407,7 @@ class TestAddBase: def get_vector_id(self, request): yield request.param + @pytest.mark.level(2) def test_add_vectors_ids_invalid(self, connect, table, get_vector_id): ''' target: test add vectors in table, use customize ids, which are not int64 @@ -974,6 +975,7 @@ class TestAddIP: def get_vector_id(self, request): yield request.param + @pytest.mark.level(2) def test_add_vectors_ids_invalid(self, connect, ip_table, get_vector_id): ''' target: test add vectors in table, use customize ids, which are not int64 @@ -1223,7 +1225,7 @@ class TestAddTableVectorsInvalid(object): with pytest.raises(Exception) as e: status, result = connect.add_vectors(table, tmp_single_vector) - @pytest.mark.level(1) + @pytest.mark.level(2) def test_add_vectors_with_invalid_vectors(self, connect, table, gen_vector): tmp_vectors = copy.deepcopy(self.vectors) tmp_vectors[1][1] = gen_vector diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 76774c0cf9..e4c8848d63 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -528,7 +528,7 @@ class TestIndexIP: The following cases are used to test `create_index` function ****************************************************************** """ - + @pytest.mark.level(2) @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index(self, connect, ip_table, get_index_params): ''' @@ -563,6 +563,7 @@ class TestIndexIP: logging.getLogger().info(index_params) status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) + assert status.OK() logging.getLogger().info(connect.describe_index(ip_table)) query_vecs = [vectors[0], vectors[1], vectors[2]] top_k = 5 @@ -933,19 +934,19 @@ class TestIndexTableInvalid(object): def get_table_name(self, request): yield request.param - # @pytest.mark.level(1) + @pytest.mark.level(2) def test_create_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status = connect.create_index(table_name, random.choice(gen_index_params())) assert not status.OK() - # @pytest.mark.level(1) + @pytest.mark.level(2) def test_describe_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status, result = connect.describe_index(table_name) assert not status.OK() - # @pytest.mark.level(1) + @pytest.mark.level(2) def test_drop_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status = connect.drop_index(table_name) From ec022c330d264ec6fcbd28ef28e613b4d3804b45 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 09:33:09 +0800 Subject: [PATCH 033/149] #89 update unittest Former-commit-id: f9b518f2961f3c7da30a76a53a49da8403208a0b --- .../index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5d63d63003..3d60574231 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -541,15 +541,15 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); #ifdef CUSTOMIZATION + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); #endif test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); #ifdef CUSTOMIZATION + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); #endif } From 4f5906b9bc6316523f2de708b2d3e6cb448aec2d Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 09:44:01 +0800 Subject: [PATCH 034/149] #89 update SQ8Hybrid-gpu log Former-commit-id: c2e70121ee65ed044c059ac3948b3412353e829e --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 3d60574231..0c7cb97807 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -475,7 +475,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, long *I = new faiss::Index::idx_t[NQ * K]; float *D = new float[NQ * K]; - printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} From d3d6077eb2daedb92b1a33a2e281e6445afd548f Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 10:24:02 +0800 Subject: [PATCH 035/149] #89 update unittest parameter Former-commit-id: 4692890b67109edefbd0cc0a0a5a628f6433306d --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 0c7cb97807..ed00e74a98 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -550,7 +550,7 @@ TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); - test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); + test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); #endif } From f7a7f9b7da16cade2fa96b4701d4f913ad325a78 Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 24 Oct 2019 10:43:14 +0800 Subject: [PATCH 036/149] ignore easylogging files Former-commit-id: 0b609d729f3914e8d57b2f5a7d0baa38ad92cc2f --- core/coverage.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/coverage.sh b/core/coverage.sh index 5792af5ec2..e47e720ce5 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -99,6 +99,7 @@ for test in `ls ${DIR_UNITTEST}`; do if [ $? -ne 0 ]; then echo ${args} echo ${DIR_UNITTEST}/${test} "run failed" + exit -1 fi done @@ -121,8 +122,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/external/easyloggingpp/easylogging++.h" \ - "*/src/external/easyloggingpp/easylogging++.cc" \ + "*/easylogging++.h" \ + "*/easylogging++.cc" \ "*/src/external/*" # gen html report From 460bbf2782394107c4f539edae7c1b46eb1c53df Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Thu, 24 Oct 2019 10:49:02 +0800 Subject: [PATCH 037/149] Update CHANGELOG.md Former-commit-id: 47da01a8c9193284097d22eb35c79b377f20253d --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebad4e2f32..bf91805d39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement -- \#64 - Improvement dump function in +- \#64 - Improvement dump function in scheduler - \#82 - Move easyloggingpp into "external" directory - \#92 - Speed up CMake build process From 65b46de1ac7629948cd27a2ba00c5872b48b2f8b Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 10:56:12 +0800 Subject: [PATCH 038/149] #89 code format Former-commit-id: ced158f26d9c18e38c7afb84ad17fdb6f9057259 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 282 +++++++++--------- 1 file changed, 136 insertions(+), 146 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index ed00e74a98..d1db0e9049 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -17,28 +17,28 @@ #include +#include #include #include #include -#include #include #include #include #include -#include #include #include +#include #include #include #include -#include #include #include #include #include +#include /***************************************************** * To run this test, please download the HDF5 from @@ -46,29 +46,27 @@ * and install it to /usr/local/hdf5 . *****************************************************/ -double elapsed() { +double +elapsed() { struct timeval tv; gettimeofday(&tv, nullptr); return tv.tv_sec + tv.tv_usec * 1e-6; } -void* hdf5_read(const char *file_name, - const char *dataset_name, - H5T_class_t dataset_class, - size_t &d_out, - size_t &n_out) { - hid_t file, dataset, datatype, dataspace, memspace; - H5T_class_t t_class; /* data type class */ - H5T_order_t order; /* data order */ - size_t size; /* size of the data element stored in file */ - hsize_t dimsm[3]; /* memory space dimensions */ - hsize_t dims_out[2]; /* dataset dimensions */ - hsize_t count[2]; /* size of the hyperslab in the file */ - hsize_t offset[2]; /* hyperslab offset in the file */ - hsize_t count_out[3]; /* size of the hyperslab in memory */ - hsize_t offset_out[3]; /* hyperslab offset in memory */ - int rank; - void* data_out; /* output buffer */ +void* +hdf5_read(const char* file_name, const char* dataset_name, H5T_class_t dataset_class, size_t& d_out, size_t& n_out) { + hid_t file, dataset, datatype, dataspace, memspace; + H5T_class_t t_class; /* data type class */ + H5T_order_t order; /* data order */ + size_t size; /* size of the data element stored in file */ + hsize_t dimsm[3]; /* memory space dimensions */ + hsize_t dims_out[2]; /* dataset dimensions */ + hsize_t count[2]; /* size of the hyperslab in the file */ + hsize_t offset[2]; /* hyperslab offset in the file */ + hsize_t count_out[3]; /* size of the hyperslab in memory */ + hsize_t offset_out[3]; /* hyperslab offset in memory */ + int rank; + void* data_out; /* output buffer */ /* Open the file and the dataset. */ file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); @@ -78,7 +76,7 @@ void* hdf5_read(const char *file_name, * Get datatype and dataspace handles and then query * dataset class, order, size, rank and dimensions. */ - datatype = H5Dget_type(dataset); /* datatype handle */ + datatype = H5Dget_type(dataset); /* datatype handle */ t_class = H5Tget_class(datatype); assert(t_class == dataset_class || !"Illegal dataset class type"); @@ -95,11 +93,11 @@ void* hdf5_read(const char *file_name, break; } - size = H5Tget_size(datatype); + size = H5Tget_size(datatype); printf("Data size is %d \n", (int)size); - dataspace = H5Dget_space(dataset); /* dataspace handle */ - rank = H5Sget_simple_extent_ndims(dataspace); + dataspace = H5Dget_space(dataset); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); H5Sget_simple_extent_dims(dataspace, dims_out, NULL); n_out = dims_out[0]; d_out = dims_out[1]; @@ -107,8 +105,8 @@ void* hdf5_read(const char *file_name, /* Define hyperslab in the dataset. */ offset[0] = offset[1] = 0; - count[0] = dims_out[0]; - count[1] = dims_out[1]; + count[0] = dims_out[0]; + count[1] = dims_out[1]; H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); /* Define the memory dataspace. */ @@ -119,9 +117,9 @@ void* hdf5_read(const char *file_name, /* Define memory hyperslab. */ offset_out[0] = offset_out[1] = offset_out[2] = 0; - count_out[0] = dims_out[0]; - count_out[1] = dims_out[1]; - count_out[2] = 1; + count_out[0] = dims_out[0]; + count_out[1] = dims_out[1]; + count_out[2] = 1; H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); /* Read data from hyperslab in the file into the hyperslab in memory and display. */ @@ -149,30 +147,31 @@ void* hdf5_read(const char *file_name, return data_out; } -std::string get_index_file_name(const std::string& ann_test_name, - const std::string& index_key, - int32_t data_loops) { +std::string +get_index_file_name(const std::string& ann_test_name, const std::string& index_key, int32_t data_loops) { size_t pos = index_key.find_first_of(',', 0); std::string file_name = ann_test_name; - file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos+1); + file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos + 1); file_name = file_name + "_" + std::to_string(data_loops) + ".index"; return file_name; } -bool parse_ann_test_name(const std::string& ann_test_name, - size_t &dim, - faiss::MetricType &metric_type) { +bool +parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::MetricType& metric_type) { size_t pos1, pos2; - if (ann_test_name.empty()) return false; + if (ann_test_name.empty()) + return false; pos1 = ann_test_name.find_first_of('-', 0); - if (pos1 == std::string::npos) return false; + if (pos1 == std::string::npos) + return false; pos2 = ann_test_name.find_first_of('-', pos1 + 1); - if (pos2 == std::string::npos) return false; + if (pos2 == std::string::npos) + return false; - dim = std::stoi(ann_test_name.substr(pos1+1, pos2-pos1-1)); - std::string metric_str = ann_test_name.substr(pos2+1); + dim = std::stoi(ann_test_name.substr(pos1 + 1, pos2 - pos1 - 1)); + std::string metric_str = ann_test_name.substr(pos2 + 1); if (metric_str == "angular") { metric_type = faiss::METRIC_INNER_PRODUCT; } else if (metric_str == "euclidean") { @@ -184,10 +183,9 @@ bool parse_ann_test_name(const std::string& ann_test_name, return true; } -void test_ann_hdf5(const std::string& ann_test_name, - const std::string& index_key, - int32_t index_add_loops, - const std::vector& nprobes) { +void +test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops, + const std::vector& nprobes) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -200,77 +198,74 @@ void test_ann_hdf5(const std::string& ann_test_name, return; } - faiss::Index * index; + faiss::Index* index; size_t d; std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); try { index = faiss::read_index(index_file_name.c_str()); d = dim; - } - catch (...) { + } catch (...) { printf("Cannot read index file: %s\n", index_file_name.c_str()); - printf ("[%.3f s] Loading train set\n", elapsed() - t0); + printf("[%.3f s] Loading train set\n", elapsed() - t0); size_t nb; - float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); - printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", - elapsed() - t0, index_key.c_str(), d); + printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); index = faiss::index_factory(d, index_key.c_str(), metric_type); - printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); index->train(nb, xb); - printf ("[%.3f s] Loading database\n", elapsed() - t0); + printf("[%.3f s] Loading database\n", elapsed() - t0); // add index multiple times to get ~1G data set for (int i = 0; i < index_add_loops; i++) { - printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); index->add(nb, xb); } faiss::write_index(index, index_file_name.c_str()); - delete [] xb; + delete[] xb; } size_t nq; - float *xq; + float* xq; { - printf ("[%.3f s] Loading queries\n", elapsed() - t0); + printf("[%.3f s] Loading queries\n", elapsed() - t0); size_t d2; xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); assert(d == d2 || !"query does not have same dimension as train set"); } - size_t k; // nb of results per query in the GT - faiss::Index::idx_t *gt; // nq * k matrix of ground-truth nearest-neighbors + size_t k; // nb of results per query in the GT + faiss::Index::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors { - printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); // load ground-truth and convert int to long size_t nq2; - int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; - for(int i = 0; i < k * nq; i++) { + for (int i = 0; i < k * nq; i++) { gt[i] = gt_int[i]; } - delete [] gt_int; + delete[] gt_int; } for (auto nprobe : nprobes) { - faiss::ParameterSpace params; - printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); std::string nprobe_str = "nprobe=" + std::to_string(nprobe); params.set_index_parameters(index, nprobe_str.c_str()); @@ -278,13 +273,13 @@ void test_ann_hdf5(const std::string& ann_test_name, // output buffers #if 1 const size_t NQ = 1000, K = 1000; - faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; - float *D = new float[NQ * K]; + faiss::Index::idx_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; - printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("============================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -301,7 +296,7 @@ void test_ann_hdf5(const std::string& ann_test_name, // consider: each result replicates DATA_LOOPS times for (int j_c = 0; j_c < k; j_c++) { int r_c = I[i * t_k + j_c]; - for (int j_g = 0; j_g < k/index_add_loops; j_g++) { + for (int j_g = 0; j_g < k / index_add_loops; j_g++) { if (gt[i * k + j_g] == r_c) { hit++; continue; @@ -309,33 +304,34 @@ void test_ann_hdf5(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), - faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, - (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); } } - printf ("============================================================================================\n"); + printf("============================================================================================\n"); #else - printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); - faiss::Index::idx_t *I = new faiss::Index::idx_t[nq * k]; - float *D = new float[nq * k]; + faiss::Index::idx_t* I = new faiss::Index::idx_t[nq * k]; + float* D = new float[nq * k]; index->search(nq, xq, k, D, I); - printf ("[%.3f s] Compute recalls\n", elapsed() - t0); + printf("[%.3f s] Compute recalls\n", elapsed() - t0); // evaluate result by hand. int n_1 = 0, n_10 = 0, n_100 = 0; - for(int i = 0; i < nq; i++) { + for (int i = 0; i < nq; i++) { int gt_nn = gt[i * k]; - for(int j = 0; j < k; j++) { + for (int j = 0; j < k; j++) { if (I[i * k + j] == gt_nn) { - if(j < 1) n_1++; - if(j < 10) n_10++; - if(j < 100) n_100++; + if (j < 1) + n_1++; + if (j < 10) + n_10++; + if (j < 100) + n_100++; } } } @@ -344,21 +340,20 @@ void test_ann_hdf5(const std::string& ann_test_name, printf("R@100 = %.4f\n", n_100 / float(nq)); #endif - printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + printf("[%.3f s] Search test done\n\n", elapsed() - t0); - delete [] I; - delete [] D; + delete[] I; + delete[] D; } - delete [] xq; - delete [] gt; + delete[] xq; + delete[] gt; delete index; } #ifdef CUSTOMIZATION -void test_ivfsq8h_gpu(const std::string& ann_test_name, - int32_t index_add_loops, - const std::vector& nprobes){ +void +test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -380,44 +375,43 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, size_t d; std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); - try{ + try { cpu_index = faiss::read_index(index_file_name.c_str()); d = dim; - } - catch (...){ + } catch (...) { printf("Cannot read index file: %s\n", index_file_name.c_str()); - printf ("[%.3f s] Loading train set\n", elapsed() - t0); + printf("[%.3f s] Loading train set\n", elapsed() - t0); - size_t nb; - float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + size_t nb; + float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); - printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); - faiss::Index *ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); + faiss::Index* ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); - printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); device_index->train(nb, xb); - printf ("[%.3f s] Loading database\n", elapsed() - t0); + printf("[%.3f s] Loading database\n", elapsed() - t0); for (int i = 0; i < index_add_loops; i++) { - printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); device_index->add(nb, xb); } cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); faiss::write_index(cpu_index, index_file_name.c_str()); - delete []xb; + delete[] xb; } - faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); - if(cpu_ivf_index != nullptr) { + faiss::IndexIVF* cpu_ivf_index = dynamic_cast(cpu_index); + if (cpu_ivf_index != nullptr) { cpu_ivf_index->to_readonly(); } @@ -433,9 +427,9 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, delete index; size_t nq; - float *xq; + float* xq; { - printf ("[%.3f s] Loading queries\n", elapsed() - t0); + printf("[%.3f s] Loading queries\n", elapsed() - t0); size_t d2; xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); @@ -443,42 +437,41 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } size_t k; - faiss::Index::idx_t *gt; + faiss::Index::idx_t* gt; { - printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); size_t nq2; - int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; for (unsigned long i = 0; i < k * nq; ++i) { gt[i] = gt_int[i]; } - delete []gt_int; + delete[] gt_int; } - for (auto nprobe : nprobes){ - printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", - elapsed() - t0, nprobe); + for (auto nprobe : nprobes) { + printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); - auto ivf_index = dynamic_cast(cpu_index); + auto ivf_index = dynamic_cast(cpu_index); ivf_index->nprobe = nprobe; auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); - if(is_gpu_flat_index == nullptr) { + if (is_gpu_flat_index == nullptr) { delete ivf_index->quantizer; ivf_index->quantizer = index_composition.quantizer; } const size_t NQ = 1000, K = 1000; - long *I = new faiss::Index::idx_t[NQ * K]; - float *D = new float[NQ * K]; + long* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; - printf ("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + printf("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("============================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -495,7 +488,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, // consider: each result replicates DATA_LOOPS times for (unsigned long j_c = 0; j_c < k; j_c++) { int r_c = I[i * t_k + j_c]; - for (unsigned long j_g = 0; j_g < k/index_add_loops; j_g++) { + for (unsigned long j_g = 0; j_g < k / index_add_loops; j_g++) { if (gt[i * k + j_g] == r_c) { hit++; continue; @@ -503,23 +496,21 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), - faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, - (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); } } - printf ("============================================================================================\n"); + printf("============================================================================================\n"); - printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + printf("[%.3f s] Search test done\n\n", elapsed() - t0); - delete [] I; - delete [] D; + delete[] I; + delete[] D; } - delete [] xq; - delete [] gt; + delete[] xq; + delete[] gt; delete cpu_index; } #endif @@ -536,21 +527,20 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) * NYTimes 256 290,000 10,000 100 Angular HDF5 (301MB) * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) -*************************************************************************************/ + *************************************************************************************/ TEST(FAISSTEST, BENCHMARK) { - test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); #endif - test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); #endif } - From 82a271943c92995213e1a3c71f32cacdd028c7ec Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 16:05:14 +0800 Subject: [PATCH 039/149] Update tests timeout Former-commit-id: 15c28be882db3cc2fda3bc1520b277c144c44558 --- ci/jenkins/jenkinsfile/singleDevTest.groovy | 2 +- tests/milvus_python_test/test_table.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index ae57ffd42b..44f6361835 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -1,4 +1,4 @@ -timeout(time: 30, unit: 'MINUTES') { +timeout(time: 60, unit: 'MINUTES') { dir ("tests/milvus_python_test") { sh 'python3 -m pip install -r requirements.txt' sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" diff --git a/tests/milvus_python_test/test_table.py b/tests/milvus_python_test/test_table.py index eb538281ed..934f3c2f9f 100644 --- a/tests/milvus_python_test/test_table.py +++ b/tests/milvus_python_test/test_table.py @@ -656,6 +656,7 @@ class TestTableInvalid(object): def get_table_name(self, request): yield request.param + @pytest.mark.level(2) def test_create_table_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name param = {'table_name': table_name, @@ -691,6 +692,7 @@ class TestCreateTableDimInvalid(object): def get_dim(self, request): yield request.param + @pytest.mark.level(2) @pytest.mark.timeout(5) def test_create_table_with_invalid_dimension(self, connect, get_dim): dimension = get_dim From 80682fc766d9ea5cb25c054a3f745c1b2500e535 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 18:04:34 +0800 Subject: [PATCH 040/149] fix test case Former-commit-id: 99aef46da1dda4e750f445f1de03d3f3701ebeec --- tests/milvus_python_test/test_index.py | 114 ++++++++++++++++--------- 1 file changed, 72 insertions(+), 42 deletions(-) diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index e4c8848d63..47b0db64e3 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -36,7 +36,7 @@ class TestIndexBase: scope="function", params=gen_simple_index_params() ) - def get_simple_index_params(self, request): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -68,8 +68,10 @@ class TestIndexBase: method: create table and add vectors in it, check if added successfully expected: raise exception ''' + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} with pytest.raises(Exception) as e: - status = dis_connect.create_index(table, random.choice(gen_index_params())) + status = dis_connect.create_index(table, index_param) @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_search_with_query_vectors(self, connect, table, get_index_params): @@ -182,12 +184,14 @@ class TestIndexBase: def test_create_index_table_not_existed(self, connect): ''' target: test create index interface when table name not existed - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, create index failed ''' table_name = gen_unique_str(self.__class__.__name__) - status = connect.create_index(table_name, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table_name, index_param) assert not status.OK() def test_create_index_table_None(self, connect): @@ -197,8 +201,10 @@ class TestIndexBase: expected: return code not equals to 0, create index failed ''' table_name = None + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} with pytest.raises(Exception) as e: - status = connect.create_index(table_name, random.choice(gen_index_params())) + status = connect.create_index(table_name, index_param) def test_create_index_no_vectors(self, connect, table): ''' @@ -206,7 +212,9 @@ class TestIndexBase: method: create table and add no vectors in it, and then create index expected: return code equals to 0 ''' - status = connect.create_index(table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table, index_param) assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) @@ -216,7 +224,9 @@ class TestIndexBase: method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - status = connect.create_index(table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table, index_param) status, ids = connect.add_vectors(table, vectors) assert status.OK() @@ -227,11 +237,12 @@ class TestIndexBase: method: create index after index have been built expected: return code success, and search ok ''' + nlist = 16384 status, ids = connect.add_vectors(table, vectors) - index_params = random.choice(gen_index_params()) + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} # index_params = get_index_params - status = connect.create_index(table, index_params) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) + status = connect.create_index(table, index_param) assert status.OK() query_vec = [vectors[0]] top_k = 1 @@ -246,16 +257,19 @@ class TestIndexBase: method: create another index with different index_params after index have been built expected: return code 0, and describe index result equals with the second index params ''' + nlist = 16384 status, ids = connect.add_vectors(table, vectors) - index_params = random.sample(gen_index_params(), 2) + index_type_1 = IndexType.IVF_SQ8 + index_type_2 = IndexType.IVFLAT + index_params = [{"index_type": index_type_1, "nlist": nlist}, {"index_type": index_type_2, "nlist": nlist}] logging.getLogger().info(index_params) - status = connect.create_index(table, index_params[0]) - status = connect.create_index(table, index_params[1]) - assert status.OK() + for index_param in index_params: + status = connect.create_index(table, index_param) + assert status.OK() status, result = connect.describe_index(table) - assert result._nlist == index_params[1]["nlist"] + assert result._nlist == nlist assert result._table_name == table - assert result._index_type == index_params[1]["index_type"] + assert result._index_type == index_type_2 """ ****************************************************************** @@ -331,7 +345,7 @@ class TestIndexBase: def test_describe_index_table_not_existed(self, connect): ''' target: test describe index interface when table name not existed - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, describe index failed ''' @@ -352,7 +366,7 @@ class TestIndexBase: def test_describe_index_not_create(self, connect, table): ''' target: test describe index interface when index not created - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, describe index failed ''' @@ -425,7 +439,7 @@ class TestIndexBase: def test_drop_index_table_not_existed(self, connect): ''' target: test drop index interface when table name not existed - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index, and then drop it expected: return code not equals to 0, drop index failed ''' @@ -449,8 +463,8 @@ class TestIndexBase: method: create table and add vectors in it, create index expected: return code not equals to 0, drop index failed ''' - index_params = random.choice(gen_index_params()) - logging.getLogger().info(index_params) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} status, ids = connect.add_vectors(table, vectors) status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -486,7 +500,8 @@ class TestIndexBase: method: create index, drop index, four times, each tme use different index_params to create index expected: return code 0 ''' - index_params = random.sample(gen_index_params(), 2) + nlist = 16384 + index_params = [{"index_type": IndexType.IVFLAT, "nlist": nlist}, {"index_type": IndexType.IVF_SQ8, "nlist": nlist}] status, ids = connect.add_vectors(table, vectors) for i in range(2): status = connect.create_index(table, index_params[i]) @@ -517,7 +532,7 @@ class TestIndexIP: scope="function", params=gen_simple_index_params() ) - def get_simple_index_params(self, request): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -549,8 +564,10 @@ class TestIndexIP: method: create table and add vectors in it, check if added successfully expected: raise exception ''' + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} with pytest.raises(Exception) as e: - status = dis_connect.create_index(ip_table, random.choice(gen_index_params())) + status = dis_connect.create_index(ip_table, index_param) @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_search_with_query_vectors(self, connect, ip_table, get_index_params): @@ -665,7 +682,9 @@ class TestIndexIP: method: create table and add no vectors in it, and then create index expected: return code equals to 0 ''' - status = connect.create_index(ip_table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(ip_table, index_param) assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) @@ -675,7 +694,9 @@ class TestIndexIP: method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - status = connect.create_index(ip_table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(ip_table, index_param) status, ids = connect.add_vectors(ip_table, vectors) assert status.OK() @@ -686,11 +707,11 @@ class TestIndexIP: method: create index after index have been built expected: return code success, and search ok ''' + nlist = 16384 status, ids = connect.add_vectors(ip_table, vectors) - index_params = random.choice(gen_index_params()) - # index_params = get_index_params - status = connect.create_index(ip_table, index_params) - status = connect.create_index(ip_table, index_params) + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(ip_table, index_param) + status = connect.create_index(ip_table, index_param) assert status.OK() query_vec = [vectors[0]] top_k = 1 @@ -705,16 +726,19 @@ class TestIndexIP: method: create another index with different index_params after index have been built expected: return code 0, and describe index result equals with the second index params ''' + nlist = 16384 status, ids = connect.add_vectors(ip_table, vectors) - index_params = random.sample(gen_index_params(), 2) + index_type_1 = IndexType.IVF_SQ8 + index_type_2 = IndexType.IVFLAT + index_params = [{"index_type": index_type_1, "nlist": nlist}, {"index_type": index_type_2, "nlist": nlist}] logging.getLogger().info(index_params) - status = connect.create_index(ip_table, index_params[0]) - status = connect.create_index(ip_table, index_params[1]) - assert status.OK() + for index_param in index_params: + status = connect.create_index(ip_table, index_param) + assert status.OK() status, result = connect.describe_index(ip_table) - assert result._nlist == index_params[1]["nlist"] + assert result._nlist == nlist assert result._table_name == ip_table - assert result._index_type == index_params[1]["index_type"] + assert result._index_type == index_type_2 """ ****************************************************************** @@ -790,7 +814,7 @@ class TestIndexIP: def test_describe_index_not_create(self, connect, ip_table): ''' target: test describe index interface when index not created - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, describe index failed ''' @@ -857,8 +881,10 @@ class TestIndexIP: method: drop index, and check if drop successfully expected: raise exception ''' + nlist = 16384 + index_param = {"index_type": IndexType.IVFLAT, "nlist": nlist} with pytest.raises(Exception) as e: - status = dis_connect.drop_index(ip_table, random.choice(gen_index_params())) + status = dis_connect.drop_index(ip_table, index_param) def test_drop_index_table_not_create(self, connect, ip_table): ''' @@ -866,8 +892,9 @@ class TestIndexIP: method: create table and add vectors in it, create index expected: return code not equals to 0, drop index failed ''' - index_params = random.choice(gen_index_params()) - logging.getLogger().info(index_params) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + logging.getLogger().info(index_param) status, ids = connect.add_vectors(ip_table, vectors) status, result = connect.describe_index(ip_table) logging.getLogger().info(result) @@ -903,7 +930,8 @@ class TestIndexIP: method: create index, drop index, four times, each tme use different index_params to create index expected: return code 0 ''' - index_params = random.sample(gen_index_params(), 2) + nlist = 16384 + index_params = [{"index_type": IndexType.IVFLAT, "nlist": nlist}, {"index_type": IndexType.IVF_SQ8, "nlist": nlist}] status, ids = connect.add_vectors(ip_table, vectors) for i in range(2): status = connect.create_index(ip_table, index_params[i]) @@ -937,7 +965,9 @@ class TestIndexTableInvalid(object): @pytest.mark.level(2) def test_create_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name - status = connect.create_index(table_name, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table_name, index_param) assert not status.OK() @pytest.mark.level(2) From 8f9b4715b05bcfe42ee0bae0c7fcd6595b66e52e Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Thu, 24 Oct 2019 22:32:48 +0800 Subject: [PATCH 041/149] Add ELK for Jenkins CI Former-commit-id: 2ba8fbbd356fbb05eaf6305d10cdade6e78843ce --- ci/jenkins/jenkinsfile/deploySingle2Dev.groovy | 2 +- ci/jenkins/jenkinsfile/singleDevTest.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy index 2ab13486a6..718b74778e 100644 --- a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy +++ b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy @@ -4,7 +4,7 @@ try { dir ('milvus-helm') { checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml --namespace milvus ." + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } } catch (exc) { diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index 44f6361835..adfadc9271 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -13,7 +13,7 @@ timeout(time: 60, unit: 'MINUTES') { } dir ("milvus-helm") { dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml --namespace milvus ." + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } dir ("tests/milvus_python_test") { From 408b81f613c361bd922718f0a9d32960f32d228e Mon Sep 17 00:00:00 2001 From: zhenwu Date: Fri, 25 Oct 2019 16:14:51 +0800 Subject: [PATCH 042/149] fix test case for open-version Former-commit-id: 16427e4b2bfbe63c2d1bea4aaaa68a41fd8e1622 --- tests/milvus_python_test/requirements.txt | 4 +- tests/milvus_python_test/test_add_vectors.py | 133 ++++++++++-------- tests/milvus_python_test/test_index.py | 29 ++-- tests/milvus_python_test/test_mix.py | 17 ++- .../milvus_python_test/test_search_vectors.py | 32 +++-- tests/milvus_python_test/test_table.py | 39 ++--- tests/milvus_python_test/test_table_count.py | 38 +++-- tests/milvus_python_test/utils.py | 7 +- 8 files changed, 160 insertions(+), 139 deletions(-) diff --git a/tests/milvus_python_test/requirements.txt b/tests/milvus_python_test/requirements.txt index 4bdecd6033..c8fc02c096 100644 --- a/tests/milvus_python_test/requirements.txt +++ b/tests/milvus_python_test/requirements.txt @@ -17,9 +17,9 @@ allure-pytest==2.7.0 pytest-print==0.1.2 pytest-level==0.1.1 six==1.12.0 -thrift==0.11.0 typed-ast==1.3.5 wcwidth==0.1.7 wrapt==1.11.1 zipp==0.5.1 -pymilvus-test>=0.2.0 +scikit-learn>=0.19.1 +pymilvus-test>=0.2.0 \ No newline at end of file diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index e33328625a..f9f7f7d4ca 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -16,9 +16,6 @@ ADD_TIMEOUT = 60 nprobe = 1 epsilon = 0.0001 -index_params = random.choice(gen_index_params()) -logging.getLogger().info(index_params) - class TestAddBase: """ @@ -26,6 +23,15 @@ class TestAddBase: The following cases are used to test `add_vectors / index / search / delete` mixed function ****************************************************************** """ + @pytest.fixture( + scope="function", + params=gen_simple_index_params() + ) + def get_simple_index_params(self, request, args): + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param def test_add_vector_create_table(self, connect, table): ''' @@ -71,7 +77,7 @@ class TestAddBase: method: delete table_2 and add vector to table_1 expected: status ok ''' - param = {'table_name': 'test_delete_table_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -79,7 +85,6 @@ class TestAddBase: status = connect.delete_table(table) vector = gen_single_vector(dim) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -101,14 +106,13 @@ class TestAddBase: method: add vector and delete table expected: status ok ''' - param = {'table_name': 'test_add_vector_delete_another_table', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) - status = connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -131,7 +135,7 @@ class TestAddBase: method: add vector , sleep, and delete table expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_delete_another_table', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -143,86 +147,91 @@ class TestAddBase: assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector(self, connect, table): + def test_create_index_add_vector(self, connect, table, get_simple_index_params): ''' target: test add vector after build index method: build index and add vector expected: status ok ''' - status = connect.create_index(table, index_params) + index_param = get_simple_index_params + status = connect.create_index(table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector_another(self, connect, table): + def test_create_index_add_vector_another(self, connect, table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_create_index_add_vector_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index(self, connect, table): + def test_add_vector_create_index(self, connect, table, get_simple_index_params): ''' target: test build index add after vector method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index_another(self, connect, table): + def test_add_vector_create_index_another(self, connect, table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index(self, connect, table): + def test_add_vector_sleep_create_index(self, connect, table, get_simple_index_params): ''' target: test build index add after vector for a while method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) time.sleep(1) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index_another(self, connect, table): + def test_add_vector_sleep_create_index_another(self, connect, table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 for a while method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -230,8 +239,7 @@ class TestAddBase: vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) time.sleep(1) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -253,7 +261,7 @@ class TestAddBase: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_search_vector_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -261,7 +269,6 @@ class TestAddBase: vector = gen_single_vector(dim) status, result = connect.search_vectors(table, 1, nprobe, vector) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -283,7 +290,7 @@ class TestAddBase: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -291,7 +298,6 @@ class TestAddBase: vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -314,7 +320,7 @@ class TestAddBase: method: search table , sleep, and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -323,7 +329,6 @@ class TestAddBase: status, ids = connect.add_vectors(table, vector) time.sleep(1) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() """ @@ -594,6 +599,15 @@ class TestAddIP: The following cases are used to test `add_vectors / index / search / delete` mixed function ****************************************************************** """ + @pytest.fixture( + scope="function", + params=gen_simple_index_params() + ) + def get_simple_index_params(self, request, args): + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param def test_add_vector_create_table(self, connect, ip_table): ''' @@ -639,7 +653,7 @@ class TestAddIP: method: delete table_2 and add vector to table_1 expected: status ok ''' - param = {'table_name': 'test_delete_table_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -647,7 +661,6 @@ class TestAddIP: status = connect.delete_table(ip_table) vector = gen_single_vector(dim) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -699,7 +712,7 @@ class TestAddIP: method: add vector , sleep, and delete table expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_delete_another_table', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -711,86 +724,90 @@ class TestAddIP: assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector(self, connect, ip_table): + def test_create_index_add_vector(self, connect, ip_table, get_simple_index_params): ''' target: test add vector after build index method: build index and add vector expected: status ok ''' - status = connect.create_index(ip_table, index_params) + index_param = get_simple_index_params + status = connect.create_index(ip_table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector_another(self, connect, ip_table): + def test_create_index_add_vector_another(self, connect, ip_table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_create_index_add_vector_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index(self, connect, ip_table): + def test_add_vector_create_index(self, connect, ip_table, get_simple_index_params): ''' target: test build index add after vector method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index_another(self, connect, ip_table): + def test_add_vector_create_index_another(self, connect, ip_table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index(self, connect, ip_table): + def test_add_vector_sleep_create_index(self, connect, ip_table, get_simple_index_params): ''' target: test build index add after vector for a while method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) time.sleep(1) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index_another(self, connect, ip_table): + def test_add_vector_sleep_create_index_another(self, connect, ip_table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 for a while method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -798,8 +815,7 @@ class TestAddIP: vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) time.sleep(1) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -821,7 +837,7 @@ class TestAddIP: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_search_vector_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -829,7 +845,6 @@ class TestAddIP: vector = gen_single_vector(dim) status, result = connect.search_vectors(ip_table, 1, nprobe, vector) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -851,7 +866,7 @@ class TestAddIP: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -859,7 +874,6 @@ class TestAddIP: vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -882,7 +896,7 @@ class TestAddIP: method: search table , sleep, and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -891,7 +905,6 @@ class TestAddIP: status, ids = connect.add_vectors(ip_table, vector) time.sleep(1) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() """ @@ -1130,7 +1143,7 @@ class TestAddIP: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): + for i in range(20): table_name = gen_unique_str('test_add_vector_multi_tables') table_list.append(table_name) param = {'table_name': table_name, @@ -1140,7 +1153,7 @@ class TestAddIP: connect.create_table(param) time.sleep(2) for j in range(10): - for i in range(50): + for i in range(20): status, ids = connect.add_vectors(table_name=table_list[i], records=vectors) assert status.OK() diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 47b0db64e3..65716d45aa 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -8,6 +8,7 @@ import pdb import threading from multiprocessing import Pool, Process import numpy +import sklearn.preprocessing from milvus import Milvus, IndexType, MetricType from utils import * @@ -15,7 +16,7 @@ nb = 10000 dim = 128 index_file_size = 10 vectors = gen_vectors(nb, dim) -vectors /= numpy.linalg.norm(vectors) +vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() BUILD_TIMEOUT = 60 nprobe = 1 @@ -218,29 +219,26 @@ class TestIndexBase: assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_no_vectors_then_add_vectors(self, connect, table): + def test_create_index_no_vectors_then_add_vectors(self, connect, table, get_simple_index_params): ''' target: test create index interface when there is no vectors in table, and does not affect the subsequent process method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - nlist = 16384 - index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + index_param = get_simple_index_params status = connect.create_index(table, index_param) status, ids = connect.add_vectors(table, vectors) assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_same_index_repeatedly(self, connect, table): + def test_create_same_index_repeatedly(self, connect, table, get_simple_index_params): ''' target: check if index can be created repeatedly, with the same create_index params method: create index after index have been built expected: return code success, and search ok ''' - nlist = 16384 status, ids = connect.add_vectors(table, vectors) - index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} - # index_params = get_index_params + index_param = get_simple_index_params status = connect.create_index(table, index_param) status = connect.create_index(table, index_param) assert status.OK() @@ -390,9 +388,9 @@ class TestIndexBase: method: create table and add vectors in it, create index, call drop index expected: return code 0, and default index param ''' - index_params = get_index_params + index_param = get_index_params status, ids = connect.add_vectors(table, vectors) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -404,15 +402,15 @@ class TestIndexBase: assert result._table_name == table assert result._index_type == IndexType.FLAT - def test_drop_index_repeatly(self, connect, table, get_simple_index_params): + def test_drop_index_repeatly(self, connect, table, get_index_params): ''' target: test drop index repeatly method: create index, call drop index, and drop again expected: return code 0 ''' - index_params = get_simple_index_params + index_param = get_index_params status, ids = connect.add_vectors(table, vectors) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -688,14 +686,13 @@ class TestIndexIP: assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_no_vectors_then_add_vectors(self, connect, ip_table): + def test_create_index_no_vectors_then_add_vectors(self, connect, ip_table, get_simple_index_params): ''' target: test create index interface when there is no vectors in table, and does not affect the subsequent process method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - nlist = 16384 - index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + index_param = get_simple_index_params status = connect.create_index(ip_table, index_param) status, ids = connect.add_vectors(ip_table, vectors) assert status.OK() diff --git a/tests/milvus_python_test/test_mix.py b/tests/milvus_python_test/test_mix.py index 4578e330b3..f099db5c31 100644 --- a/tests/milvus_python_test/test_mix.py +++ b/tests/milvus_python_test/test_mix.py @@ -6,7 +6,7 @@ import datetime import logging from time import sleep from multiprocessing import Process -import numpy +import sklearn.preprocessing from milvus import Milvus, IndexType, MetricType from utils import * @@ -15,7 +15,7 @@ index_file_size = 10 table_id = "test_mix" add_interval_time = 2 vectors = gen_vectors(100000, dim) -vectors /= numpy.linalg.norm(vectors) +vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() top_k = 1 nprobe = 1 @@ -26,9 +26,9 @@ index_params = {'index_type': IndexType.IVFLAT, 'nlist': 16384} class TestMixBase: # TODO: enable - def _test_search_during_createIndex(self, args): + def test_search_during_createIndex(self, args): loops = 100000 - table = "test_search_during_createIndex" + table = gen_unique_str() query_vecs = [vectors[0], vectors[1]] uri = "tcp://%s:%s" % (args["ip"], args["port"]) id_0 = 0; id_1 = 0 @@ -54,6 +54,7 @@ class TestMixBase: status, ids = milvus_instance.add_vectors(table, vectors) logging.getLogger().info(status) def search(milvus_instance): + logging.getLogger().info("In search vectors") for i in range(loops): status, result = milvus_instance.search_vectors(table, top_k, nprobe, query_vecs) logging.getLogger().info(status) @@ -69,6 +70,7 @@ class TestMixBase: p_create.start() p_create.join() + @pytest.mark.level(2) def test_mix_multi_tables(self, connect): ''' target: test functions with multiple tables of different metric_types and index_types @@ -77,6 +79,7 @@ class TestMixBase: expected: status ok ''' nq = 10000 + nlist= 16384 vectors = gen_vectors(nq, dim) table_list = [] idx = [] @@ -112,17 +115,17 @@ class TestMixBase: #create index for i in range(10): - index_params = {'index_type': IndexType.FLAT, 'nlist': 16384} + index_params = {'index_type': IndexType.FLAT, 'nlist': nlist} status = connect.create_index(table_list[i], index_params) assert status.OK() status = connect.create_index(table_list[30 + i], index_params) assert status.OK() - index_params = {'index_type': IndexType.IVFLAT, 'nlist': 16384} + index_params = {'index_type': IndexType.IVFLAT, 'nlist': nlist} status = connect.create_index(table_list[10 + i], index_params) assert status.OK() status = connect.create_index(table_list[40 + i], index_params) assert status.OK() - index_params = {'index_type': IndexType.IVF_SQ8, 'nlist': 16384} + index_params = {'index_type': IndexType.IVF_SQ8, 'nlist': nlist} status = connect.create_index(table_list[20 + i], index_params) assert status.OK() status = connect.create_index(table_list[50 + i], index_params) diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py index e52e0d2d08..da53466828 100644 --- a/tests/milvus_python_test/test_search_vectors.py +++ b/tests/milvus_python_test/test_search_vectors.py @@ -54,7 +54,7 @@ class TestSearchBase: """ @pytest.fixture( scope="function", - params=[1, 99, 101, 1024, 2048, 2049] + params=[1, 99, 1024, 2048, 2049] ) def get_top_k(self, request): yield request.param @@ -482,8 +482,9 @@ class TestSearchBase: """ class TestSearchParamsInvalid(object): - index_params = random.choice(gen_index_params()) - logging.getLogger().info(index_params) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + logging.getLogger().info(index_param) def init_data(self, connect, table, nb=100): ''' @@ -528,7 +529,7 @@ class TestSearchParamsInvalid(object): def get_top_k(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_search_with_invalid_top_k(self, connect, table, get_top_k): ''' target: test search fuction, with the wrong top_k @@ -539,9 +540,12 @@ class TestSearchParamsInvalid(object): logging.getLogger().info(top_k) nprobe = 1 query_vecs = gen_vectors(1, dim) - with pytest.raises(Exception) as e: + if isinstance(top_k, int): status, result = connect.search_vectors(table, top_k, nprobe, query_vecs) - res = connect.server_version() + assert not status.OK() + else: + with pytest.raises(Exception) as e: + status, result = connect.search_vectors(table, top_k, nprobe, query_vecs) @pytest.mark.level(2) def test_search_with_invalid_top_k_ip(self, connect, ip_table, get_top_k): @@ -554,10 +558,12 @@ class TestSearchParamsInvalid(object): logging.getLogger().info(top_k) nprobe = 1 query_vecs = gen_vectors(1, dim) - with pytest.raises(Exception) as e: + if isinstance(top_k, int): status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) - res = connect.server_version() - + assert not status.OK() + else: + with pytest.raises(Exception) as e: + status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) """ Test search table with invalid nprobe """ @@ -568,7 +574,7 @@ class TestSearchParamsInvalid(object): def get_nprobes(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_search_with_invalid_nrpobe(self, connect, table, get_nprobes): ''' target: test search fuction, with the wrong top_k @@ -579,7 +585,7 @@ class TestSearchParamsInvalid(object): nprobe = get_nprobes logging.getLogger().info(nprobe) query_vecs = gen_vectors(1, dim) - if isinstance(nprobe, int) and nprobe > 0: + if isinstance(nprobe, int): status, result = connect.search_vectors(table, top_k, nprobe, query_vecs) assert not status.OK() else: @@ -597,7 +603,7 @@ class TestSearchParamsInvalid(object): nprobe = get_nprobes logging.getLogger().info(nprobe) query_vecs = gen_vectors(1, dim) - if isinstance(nprobe, int) and nprobe > 0: + if isinstance(nprobe, int): status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) assert not status.OK() else: @@ -614,7 +620,7 @@ class TestSearchParamsInvalid(object): def get_query_ranges(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_search_flat_with_invalid_query_range(self, connect, table, get_query_ranges): ''' target: test search fuction, with the wrong query_range diff --git a/tests/milvus_python_test/test_table.py b/tests/milvus_python_test/test_table.py index 934f3c2f9f..88f7caca3c 100644 --- a/tests/milvus_python_test/test_table.py +++ b/tests/milvus_python_test/test_table.py @@ -178,6 +178,7 @@ class TestTable: assert res.table_name == table_name assert res.metric_type == MetricType.L2 + @pytest.mark.level(2) def test_table_describe_table_name_ip(self, connect): ''' target: test describe table created with correct params @@ -266,6 +267,7 @@ class TestTable: status = connect.delete_table(table) assert not assert_has_table(connect, table) + @pytest.mark.level(2) def test_delete_table_ip(self, connect, ip_table): ''' target: test delete table created with correct params @@ -335,7 +337,6 @@ class TestTable: time.sleep(2) assert status.OK() - @pytest.mark.level(2) def test_delete_create_table_repeatedly_ip(self, connect): ''' target: test delete and create the same table repeatedly @@ -587,25 +588,25 @@ class TestTable: """ @pytest.fixture( scope="function", - params=gen_index_params() + params=gen_simple_index_params() ) - def get_index_params(self, request, args): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") return request.param @pytest.mark.level(1) - def test_preload_table(self, connect, table, get_index_params): - index_params = get_index_params + def test_preload_table(self, connect, table, get_simple_index_params): + index_params = get_simple_index_params status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_params) status = connect.preload_table(table) assert status.OK() @pytest.mark.level(1) - def test_preload_table_ip(self, connect, ip_table, get_index_params): - index_params = get_index_params + def test_preload_table_ip(self, connect, ip_table, get_simple_index_params): + index_params = get_simple_index_params status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) status = connect.preload_table(ip_table) @@ -613,19 +614,21 @@ class TestTable: @pytest.mark.level(1) def test_preload_table_not_existed(self, connect, table): - table_name = gen_unique_str("test_preload_table_not_existed") - index_params = random.choice(gen_index_params()) + table_name = gen_unique_str() + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} status, ids = connect.add_vectors(table, vectors) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) status = connect.preload_table(table_name) assert not status.OK() - @pytest.mark.level(1) + @pytest.mark.level(2) def test_preload_table_not_existed_ip(self, connect, ip_table): - table_name = gen_unique_str("test_preload_table_not_existed") - index_params = random.choice(gen_index_params()) + table_name = gen_unique_str() + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} status, ids = connect.add_vectors(ip_table, vectors) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) status = connect.preload_table(table_name) assert not status.OK() @@ -634,7 +637,7 @@ class TestTable: status = connect.preload_table(table) assert status.OK() - @pytest.mark.level(1) + @pytest.mark.level(2) def test_preload_table_no_vectors_ip(self, connect, ip_table): status = connect.preload_table(ip_table) assert status.OK() @@ -728,7 +731,7 @@ class TestCreateTableIndexSizeInvalid(object): 'dimension': dim, 'index_file_size': file_size, 'metric_type': MetricType.L2} - if isinstance(file_size, int) and file_size > 0: + if isinstance(file_size, int): status = connect.create_table(param) assert not status.OK() else: @@ -779,7 +782,7 @@ def preload_table(connect, **params): return status def has(connect, **params): - status = assert_has_table(connect, params["table_name"]) + status, result = connect.has_table(params["table_name"]) return status def show(connect, **params): @@ -803,7 +806,7 @@ def create_index(connect, **params): return status func_map = { - # 0:has, + 0:has, 1:show, 10:create_table, 11:describe, diff --git a/tests/milvus_python_test/test_table_count.py b/tests/milvus_python_test/test_table_count.py index 820fb9d546..4e8a780c62 100644 --- a/tests/milvus_python_test/test_table_count.py +++ b/tests/milvus_python_test/test_table_count.py @@ -23,7 +23,7 @@ class TestTableCount: @pytest.fixture( scope="function", params=[ - 100, + 1, 5000, 100000, ], @@ -36,9 +36,9 @@ class TestTableCount: """ @pytest.fixture( scope="function", - params=gen_index_params() + params=gen_simple_index_params() ) - def get_index_params(self, request, args): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -58,14 +58,14 @@ class TestTableCount: status, res = connect.get_table_row_count(table) assert res == nb - def test_table_rows_count_after_index_created(self, connect, table, get_index_params): + def test_table_rows_count_after_index_created(self, connect, table, get_simple_index_params): ''' target: test get_table_row_count, after index have been created method: add vectors in db, and create index, then calling get_table_row_count with correct params expected: get_table_row_count raise exception ''' nb = 100 - index_params = get_index_params + index_params = get_simple_index_params vectors = gen_vectors(nb, dim) res = connect.add_vectors(table_name=table, records=vectors) time.sleep(add_time_interval) @@ -91,7 +91,7 @@ class TestTableCount: assert the value returned by get_table_row_count method is equal to 0 expected: the count is equal to 0 ''' - table_name = gen_unique_str("test_table") + table_name = gen_unique_str() param = {'table_name': table_name, 'dimension': dim, 'index_file_size': index_file_size} @@ -142,8 +142,8 @@ class TestTableCount: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): - table_name = gen_unique_str('test_table_rows_count_multi_tables') + for i in range(20): + table_name = gen_unique_str() table_list.append(table_name) param = {'table_name': table_name, 'dimension': dim, @@ -152,7 +152,7 @@ class TestTableCount: connect.create_table(param) res = connect.add_vectors(table_name=table_name, records=vectors) time.sleep(2) - for i in range(50): + for i in range(20): status, res = connect.get_table_row_count(table_list[i]) assert status.OK() assert res == nq @@ -166,7 +166,7 @@ class TestTableCountIP: @pytest.fixture( scope="function", params=[ - 100, + 1, 5000, 100000, ], @@ -180,9 +180,9 @@ class TestTableCountIP: @pytest.fixture( scope="function", - params=gen_index_params() + params=gen_simple_index_params() ) - def get_index_params(self, request, args): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -202,14 +202,14 @@ class TestTableCountIP: status, res = connect.get_table_row_count(ip_table) assert res == nb - def test_table_rows_count_after_index_created(self, connect, ip_table, get_index_params): + def test_table_rows_count_after_index_created(self, connect, ip_table, get_simple_index_params): ''' target: test get_table_row_count, after index have been created method: add vectors in db, and create index, then calling get_table_row_count with correct params expected: get_table_row_count raise exception ''' nb = 100 - index_params = get_index_params + index_params = get_simple_index_params vectors = gen_vectors(nb, dim) res = connect.add_vectors(table_name=ip_table, records=vectors) time.sleep(add_time_interval) @@ -243,10 +243,8 @@ class TestTableCountIP: status, res = connect.get_table_row_count(ip_table) assert res == 0 - # TODO: enable - @pytest.mark.level(2) - @pytest.mark.timeout(20) - def _test_table_rows_count_multiprocessing(self, connect, ip_table, args): + @pytest.mark.timeout(60) + def test_table_rows_count_multiprocessing(self, connect, ip_table, args): ''' target: test table rows_count is correct or not with multiprocess method: create table and add vectors in it, @@ -286,7 +284,7 @@ class TestTableCountIP: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): + for i in range(20): table_name = gen_unique_str('test_table_rows_count_multi_tables') table_list.append(table_name) param = {'table_name': table_name, @@ -296,7 +294,7 @@ class TestTableCountIP: connect.create_table(param) res = connect.add_vectors(table_name=table_name, records=vectors) time.sleep(2) - for i in range(50): + for i in range(20): status, res = connect.get_table_row_count(table_list[i]) assert status.OK() assert res == nq \ No newline at end of file diff --git a/tests/milvus_python_test/utils.py b/tests/milvus_python_test/utils.py index 007bff9c75..159c8407c6 100644 --- a/tests/milvus_python_test/utils.py +++ b/tests/milvus_python_test/utils.py @@ -26,9 +26,9 @@ def gen_vector(nb, d, seed=np.random.RandomState(1234)): return xb.tolist() -def gen_unique_str(str=None): +def gen_unique_str(str_value=None): prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) - return prefix if str is None else str + "_" + prefix + return "test_"+prefix if str_value is None else str_value+"_"+prefix def get_current_day(): @@ -449,10 +449,11 @@ def gen_index_params(): return gen_params(index_types, nlists) + def gen_simple_index_params(): index_params = [] index_types = [IndexType.FLAT, IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H] - nlists = [16384] + nlists = [1024] def gen_params(index_types, nlists): return [ {"index_type": index_type, "nlist": nlist} \ From 2ca28c44ae970242ad339909022e9d6f07e486b0 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 25 Oct 2019 16:27:56 +0800 Subject: [PATCH 043/149] Update README.md Former-commit-id: a5f25b72a07bdabef85a2097764a55261159cbf3 --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4fd8bbae2e..e847f0e8ca 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue) [![codebeat badge](https://codebeat.co/badges/e030a4f6-b126-4475-a938-4723d54ec3a7?style=plastic)](https://codebeat.co/projects/github-com-jinhai-cn-milvus-master) +![Release](https://img.shields.io/badge/release-v0.5.0-orange) +![Release_date](https://img.shields.io/badge/release_date-October-yellowgreen) - [Slack Community](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) - [Twitter](https://twitter.com/milvusio) @@ -22,7 +24,7 @@ Milvus is an open source similarity search engine for massive feature vectors. D Milvus provides stable Python, Java and C++ APIs. -Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/releases/v0.5.0/). +Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/release/v0.5.0/). - Heterogeneous computing @@ -90,7 +92,7 @@ Use Docker to install Milvus is a breeze. See the [Milvus install guide](https:/ ```shell $ cd [Milvus sourcecode path]/core -./ubuntu_build_deps.sh +$ ./ubuntu_build_deps.sh ``` ##### Step 2 Build From 5a66b9aa964c25942671b3a211aed179d1af440a Mon Sep 17 00:00:00 2001 From: zhenwu Date: Fri, 25 Oct 2019 16:42:37 +0800 Subject: [PATCH 044/149] remove one case in search Former-commit-id: 23aa50a7a89d72d1334c9a0a405020aa84004931 --- tests/milvus_python_test/test_search_vectors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py index da53466828..10892d6de3 100644 --- a/tests/milvus_python_test/test_search_vectors.py +++ b/tests/milvus_python_test/test_search_vectors.py @@ -220,7 +220,6 @@ class TestSearchBase: scope="function", params=[ (get_last_day(2), get_last_day(1)), - (get_last_day(2), get_current_day()), (get_next_day(1), get_next_day(2)) ] ) From d65160e4a12b0748d327ceb75fc98936544b676e Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Fri, 25 Oct 2019 18:01:35 +0800 Subject: [PATCH 045/149] Remove .a file in milvus/lib for docker-version Former-commit-id: b27fd9ddcf9d0dcbcceca2422b8bf6230bf4e21a --- CHANGELOG.md | 2 +- core/src/index/knowhere/CMakeLists.txt | 36 +------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7865430dd9..770098f7a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#80 - Print version information into log during server start - \#82 - Move easyloggingpp into "external" directory - \#92 - Speed up CMake build process - +- \#96 - Remove .a file in milvus/lib for docker-version ## Feature ## Task diff --git a/core/src/index/knowhere/CMakeLists.txt b/core/src/index/knowhere/CMakeLists.txt index 2a499dc1a3..bece9058a9 100644 --- a/core/src/index/knowhere/CMakeLists.txt +++ b/core/src/index/knowhere/CMakeLists.txt @@ -81,27 +81,6 @@ target_link_libraries( ${depend_libs} ) -INSTALL(TARGETS - knowhere - SPTAGLibStatic - DESTINATION - lib) - -INSTALL(FILES - ${ARROW_STATIC_LIB} - ${ARROW_PREFIX}/lib/libjemalloc_pic.a - ${FAISS_STATIC_LIB} - ${LAPACK_STATIC_LIB} - ${BLAS_STATIC_LIB} - DESTINATION - lib - ) - -INSTALL(FILES ${OPENBLAS_REAL_STATIC_LIB} - RENAME "libopenblas.a" - DESTINATION lib - ) - set(INDEX_INCLUDE_DIRS ${INDEX_SOURCE_DIR}/knowhere ${INDEX_SOURCE_DIR}/thirdparty @@ -112,17 +91,4 @@ set(INDEX_INCLUDE_DIRS ${LAPACK_INCLUDE_DIR} ) -set(INDEX_INCLUDE_DIRS ${INDEX_INCLUDE_DIRS} PARENT_SCOPE) - -#INSTALL(DIRECTORY -# ${INDEX_SOURCE_DIR}/include/knowhere -# ${ARROW_INCLUDE_DIR}/arrow -# ${FAISS_PREFIX}/include/faiss -# ${OPENBLAS_INCLUDE_DIR}/ -# DESTINATION -# include) -# -#INSTALL(DIRECTORY -# ${SPTAG_SOURCE_DIR}/AnnService/inc/ -# DESTINATION -# include/SPTAG/AnnService/inc) +set(INDEX_INCLUDE_DIRS ${INDEX_INCLUDE_DIRS} PARENT_SCOPE) \ No newline at end of file From f32921f0fb2c39c5b2cca2688def619d22679681 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 25 Oct 2019 19:43:07 +0800 Subject: [PATCH 046/149] test_scheduler core dump Former-commit-id: 79208b55f7b016bde5100cc7474850160dc72503 --- CHANGELOG.md | 2 ++ core/src/scheduler/resource/Resource.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7865430dd9..74a1951142 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.5.1 (TODO) ## Bug +- \#104 - test_scheduler core dump + ## Improvement - \#64 - Improvement dump function in scheduler - \#80 - Print version information into log during server start diff --git a/core/src/scheduler/resource/Resource.h b/core/src/scheduler/resource/Resource.h index c797e13de8..2af44b3d90 100644 --- a/core/src/scheduler/resource/Resource.h +++ b/core/src/scheduler/resource/Resource.h @@ -119,6 +119,9 @@ class Resource : public Node, public std::enable_shared_from_this { // TODO(wxyu): need double ? inline uint64_t TaskAvgCost() const { + if (total_task_ == 0) { + return 0; + } return total_cost_ / total_task_; } From 02cfe7cf49def66bdb57272f8a2f830985b6b6a5 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 11:25:08 +0800 Subject: [PATCH 047/149] add build periodically in ci/jenkins/Jenkinsfile Former-commit-id: 2455e5910f0111e5dec7a7f966ccbbdaf47fba0c --- ci/jenkins/Jenkinsfile | 10 ++++++++- .../jenkinsfile/singleDevNightlyTest.groovy | 22 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index fbdf3a3096..179a7fed38 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,6 +1,9 @@ +String cron_string = BRANCH_NAME == "master" || BRANCH_NAME == "0.5.0" || BRANCH_NAME == "0.5.1" ? "H 0 * * *" : "" pipeline { agent none + triggers { cron(cron_string) } + options { timestamps() } @@ -20,6 +23,7 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" + NIGHTLIY_TEST = "${cron_string == "" ? false : true}" } stages { @@ -119,7 +123,11 @@ pipeline { steps { container('milvus-test-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" + if (NIGHTLIY_TEST) { + load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy" + else { + load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" + } } } } diff --git a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy new file mode 100644 index 0000000000..91699f533b --- /dev/null +++ b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy @@ -0,0 +1,22 @@ +timeout(time: 90, unit: 'MINUTES') { + dir ("tests/milvus_python_test") { + sh 'python3 -m pip install -r requirements.txt' + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + } + // mysql database backend test + load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" + + if (!fileExists('milvus-helm')) { + dir ("milvus-helm") { + checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + } + } + dir ("milvus-helm") { + dir ("milvus-gpu") { + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." + } + } + dir ("tests/milvus_python_test") { + sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + } +} From 0425ebac504016c213ba12057186daa9749a4526 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 11:27:01 +0800 Subject: [PATCH 048/149] fix ci/jenkins/Jenkinsfile error Former-commit-id: b94643897b9a962fb0fbe7a6dd34bec5a2e37b73 --- ci/jenkins/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 179a7fed38..66c7eb864e 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -125,7 +125,7 @@ pipeline { script { if (NIGHTLIY_TEST) { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy" - else { + } else { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" } } From de26c2bcfccc7c5eccf5a06caea5d259c237526d Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 11:31:49 +0800 Subject: [PATCH 049/149] update ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy Former-commit-id: 05fdd554d4a78d70b4497a93017d285d5d364546 --- ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy index 91699f533b..5140ad858f 100644 --- a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy @@ -1,7 +1,7 @@ timeout(time: 90, unit: 'MINUTES') { dir ("tests/milvus_python_test") { sh 'python3 -m pip install -r requirements.txt' - sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" } // mysql database backend test load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" From e75898f8f288cd4ce878fb9eaad0d4b9cbf85ded Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 12:23:42 +0800 Subject: [PATCH 050/149] update ci/jenkins/jenkinsfile/deploySingle2Dev.groovy Former-commit-id: 213fb87c0bfc8554f34bd2451e019dce12e9c950 --- .../jenkinsfile/deploySingle2Dev.groovy | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy index 718b74778e..738c714a0c 100644 --- a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy +++ b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy @@ -1,14 +1,9 @@ -try { - sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts' - sh 'helm repo update' - dir ('milvus-helm') { - checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) - dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - } +sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts' +sh 'helm repo update' +dir ('milvus-helm') { + checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + dir ("milvus-gpu") { + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } -} catch (exc) { - echo 'Helm running failed!' - sh "helm del --purge ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu" - throw exc } + From 0b563e51108cfea0aad77e4d8bbbdd3482dfb9e2 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sat, 26 Oct 2019 12:29:55 +0800 Subject: [PATCH 051/149] Remove ===== Former-commit-id: 8e916d1012712a59859620b8fc4939c71e12f637 --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index e847f0e8ca..0b71b053ec 100644 --- a/README.md +++ b/README.md @@ -201,9 +201,4 @@ Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upc [Apache 2.0 license](LICENSE) -======= -![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) -![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) -![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Release date](https://img.shields.io/badge/release__date-October-yellowgreen) From 9d751d295ed6ac4472773b528c1749fb7c7bb311 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 14:35:53 +0800 Subject: [PATCH 052/149] time share run nightly test Former-commit-id: c95e691615b2a356e372264dd6eeb845346c5891 --- ci/jenkins/Jenkinsfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 66c7eb864e..813101ba48 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,4 +1,6 @@ -String cron_string = BRANCH_NAME == "master" || BRANCH_NAME == "0.5.0" || BRANCH_NAME == "0.5.1" ? "H 0 * * *" : "" +String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : cron_string +cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string + pipeline { agent none @@ -23,7 +25,7 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" - NIGHTLIY_TEST = "${cron_string == "" ? false : true}" + NIGHTLIY_TEST = "${cron_string ? true : false}" } stages { From 1a1c858f1e7992fd77bfb4f44a6c744e5376231c Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 14:39:56 +0800 Subject: [PATCH 053/149] fix ci/jenkins/Jenkinsfile error Former-commit-id: a2a27530e47b57b2b4eaaa76544f62f9f5a2496c --- ci/jenkins/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 813101ba48..44a2cbb156 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,4 +1,4 @@ -String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : cron_string +String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : "" cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string pipeline { @@ -25,7 +25,7 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" - NIGHTLIY_TEST = "${cron_string ? true : false}" + NIGHTLIY_TEST = "${cron_string == "" ? false : true}" } stages { From 0a7434020f5d3de406749ccecf3237a8c124badc Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 15:07:11 +0800 Subject: [PATCH 054/149] format ci/jenkins/jenkinsfile/cleanupSingleDev.groovy Former-commit-id: 6ce898ed133881f30c04427369b85055b1a4f97f --- ci/jenkins/jenkinsfile/cleanupSingleDev.groovy | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy b/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy index 6e85a678be..3b8c1833b5 100644 --- a/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy +++ b/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy @@ -1,5 +1,8 @@ try { - sh "helm del --purge ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu" + def helmResult = sh script: "helm status ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu", returnStatus: true + if (!helmResult) { + sh "helm del --purge ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu" + } } catch (exc) { def helmResult = sh script: "helm status ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu", returnStatus: true if (!helmResult) { From 66cfb2d527f43c50ce85d12ca762ccfd7b9209fc Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 15:52:35 +0800 Subject: [PATCH 055/149] add isTimeTriggeredBuild function in ci/jenkins/Jenkinsfile Former-commit-id: d19c73db3fb8a3bc942402b547114b9b554ef585 --- ci/jenkins/Jenkinsfile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 44a2cbb156..235a56b781 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -25,7 +25,6 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" - NIGHTLIY_TEST = "${cron_string == "" ? false : true}" } stages { @@ -125,7 +124,8 @@ pipeline { steps { container('milvus-test-env') { script { - if (NIGHTLIY_TEST) { + boolean isNightlyTest = isTimeTriggeredBuild() + if (isNightlyTest) { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy" } else { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" @@ -160,3 +160,9 @@ pipeline { } } +boolean isTimeTriggeredBuild() { + for (Object currentBuildCause : script.currentBuild.rawBuild.getCauses()) { + return currentBuildCause.class.getName().contains('TimerTriggerCause') + } + return false +} From a0c938bc018869ed4fb4ce64d7a04a0860947168 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 17:31:54 +0800 Subject: [PATCH 056/149] update isTimeTriggeredBuild funtion in ci/jenkins/Jenkinsfile Former-commit-id: a4bf86ced62c3191dfdc73b87251477eddf347c4 --- ci/jenkins/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 235a56b781..0a16c7f2c3 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -161,8 +161,8 @@ pipeline { } boolean isTimeTriggeredBuild() { - for (Object currentBuildCause : script.currentBuild.rawBuild.getCauses()) { - return currentBuildCause.class.getName().contains('TimerTriggerCause') + if (currentBuild.getBuildCauses('hudson.triggers.TimerTrigger$TimerTriggerCause').size() != 0) { + return true } return false } From 70c3ae83d97d073a96a8898c9c131313775d3004 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 09:31:46 +0800 Subject: [PATCH 057/149] [skip ci] Minor updates Former-commit-id: ca8c3d047a49e1c580eb290fa088a9bd34b99868 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0b71b053ec..f18328f748 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,7 @@ Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/ex ## Contribution guidelines -Contributions are welcomed and greatly appreciated. If you want to contribute to Milvus, please read our [contribution guidelines](CONTRIBUTING.md). This project adheres to the [code of conduct](CODE_OF_CONDUCT.md) of Milvus. By participating, you are expected to uphold this code. +Contributions are welcomed and greatly appreciated. Please read our [contribution guidelines](CONTRIBUTING.md) for detailed contribution workflow. This project adheres to the [code of conduct](CODE_OF_CONDUCT.md) of Milvus. By participating, you are expected to uphold this code. We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to track issues and bugs. For general questions and public discussions, please join our community. @@ -199,6 +199,6 @@ Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upc ## License -[Apache 2.0 license](LICENSE) +[Apache License 2.0](LICENSE) From b852170b9210a884c9ba683e25f9e81fc76f6bef Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 10:19:07 +0800 Subject: [PATCH 058/149] set cron timezone to Shanghai Former-commit-id: b1f05d796dce95cae1b19c5729ccd4d8da8219c1 --- ci/jenkins/Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 0a16c7f2c3..15663183ab 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,10 +1,14 @@ +String cron_timezone = "TZ=Asia/Shanghai" String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : "" cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string pipeline { agent none - triggers { cron(cron_string) } + triggers { + cron("${cron_timezone} + ${cron_string}") + } options { timestamps() From 3e50d77cdc6433c4c3402a4477bf76e30c851343 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 10:24:20 +0800 Subject: [PATCH 059/149] set cron timezone to Shanghai Former-commit-id: c4f1a0513722460109d1e681b57a196faedc119d --- ci/jenkins/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 15663183ab..7c2d83ba59 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -6,8 +6,8 @@ pipeline { agent none triggers { - cron("${cron_timezone} - ${cron_string}") + cron('''"${cron_timezone}" + "${cron_string}"''') } options { From 60d68defdae9f8c7e57ce4f6a44106fc50ac6eec Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 10:29:59 +0800 Subject: [PATCH 060/149] set cron timezone to Shanghai Former-commit-id: 28271c43dfae440fd2baa541e846d80d6bf354d8 --- ci/jenkins/Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 7c2d83ba59..67bff5ac1e 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,13 +1,13 @@ String cron_timezone = "TZ=Asia/Shanghai" -String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : "" -cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string +String cron_string = BRANCH_NAME == "master" ? "H 0 * * * " : "" +cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * * " : cron_string pipeline { agent none triggers { - cron('''"${cron_timezone}" - "${cron_string}"''') + cron """${cron_timezone} + ${cron_string}""" } options { From c9b2d136192a68c15442babd46d75b624ee52321 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Mon, 28 Oct 2019 11:07:34 +0800 Subject: [PATCH 061/149] Disble mysql-version test Former-commit-id: e1deb82d6ddf2130a945689269c1be9e91f33f5e --- ci/jenkins/jenkinsfile/singleDevTest.groovy | 29 +- .../markdown-link-extractor.py | 420 ++++++++++++++++++ tests/milvus_doc_test/requirements.txt | 3 + tests/milvus_python_test/test_connect.py | 1 - tests/milvus_python_test/test_index.py | 4 +- tests/milvus_python_test/test_ping.py | 2 +- tests/milvus_python_test/test_table.py | 2 +- 7 files changed, 443 insertions(+), 18 deletions(-) create mode 100644 tests/milvus_doc_test/markdown-link-extractor.py create mode 100644 tests/milvus_doc_test/requirements.txt diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index adfadc9271..d0158a9943 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -6,17 +6,20 @@ timeout(time: 60, unit: 'MINUTES') { // mysql database backend test load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" - if (!fileExists('milvus-helm')) { - dir ("milvus-helm") { - checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) - } - } - dir ("milvus-helm") { - dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - } - } - dir ("tests/milvus_python_test") { - sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" - } + + // Remove mysql-version tests: 10-28 + + // if (!fileExists('milvus-helm')) { + // dir ("milvus-helm") { + // checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + // } + // } + // dir ("milvus-helm") { + // dir ("milvus-gpu") { + // sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." + // } + // } + // dir ("tests/milvus_python_test") { + // sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + // } } diff --git a/tests/milvus_doc_test/markdown-link-extractor.py b/tests/milvus_doc_test/markdown-link-extractor.py new file mode 100644 index 0000000000..3744089e69 --- /dev/null +++ b/tests/milvus_doc_test/markdown-link-extractor.py @@ -0,0 +1,420 @@ +# -*- coding: utf-8 -*- +# Using Python 3.x + +import urllib.request +import urllib.error +from pathlib import Path +import requests +import json +from urllib.parse import urlparse +import markdown +import os +from os.path import join, getsize +from bs4 import BeautifulSoup +import re +from sys import platform +import argparse + + +class LinksFromMarkdown(object): + + def __init__(self, repository): + self.dictionary = repository + + def extract_links_from_markdown(self, repository): + + + if platform == "linux" or platform == "linux2": + # linux + link_file = "../link_reports/" + "extracted_links.json" + dirName = "../link_reports" + elif platform == "darwin": + # OS X + link_file = "../link_reports/" + "extracted_links.json" + dirName = "../link_reports" + elif platform == "win32": + # Windows... + link_file = "..\\link_reports\\" + "extracted_links.json" + dirName = "..\\link_reports" + + # repository = "D:\\GithubRepo\\docs-master\\docs-master" + + + try: + # Create target Directory + os.mkdir(dirName) + print("Directory ", dirName, " Created ") + except FileExistsError: + print("Directory ", dirName, " already exists") + + md_files = [] + + for root, dirs, files in os.walk(repository): + # print(root, "consumes", end=" ") + # print(sum(getsize(join(root, name)) for name in files), end=" ") + # print("bytes in", len(files), "non-directory files") + if len(files) != 0: + # print(files) + for file in files: + if file.endswith(".md") or file.endswith(".MD") or file.endswith(".mD") or file.endswith(".Md"): + md_files.append(join(root, file)) + # elif file.endswith(".png") or file.endswith(".PNG"): + # pics.append((join(root, file))) + + # print(md_files) + # print(pics) + + a_href_list = [] + + for md_file in md_files: + with open(md_file, "r", encoding="utf-8") as f: + html = markdown.markdown(f.read()) + # print(html) + soup = BeautifulSoup(html, "lxml") + a_hrefs = [(x.get('href')) for x in soup.find_all("a")] + + a_href_list.append(a_hrefs) + # print(a_hrefs) + # print(md_file) + + # Generates a dictionary that indicates each MD file and links extracted from the MD file + dictionary = dict(zip(md_files, a_href_list)) + + with open(link_file, "w+", encoding="utf-8") as f: + json.dump(dictionary, f) + + return link_file + + + # print(dictionary) + +class CheckExtractedLinksFromMarkdown(object): + + def __init__(self, link_file): + self.link_file = link_file + + def check_extracted_links(self, link_file): + + if platform == "linux" or platform == "linux2": + # linux + report_name = "../link_reports/" + "link_validation_report.html" + + elif platform == "darwin": + # OS X + report_name = "../link_reports/" + "link_validation_report.html" + + elif platform == "win32": + # Windows... + report_name = "..\\link_reports\\" + "link_validation_report.html" + + html_code = """Link Validation Detailed Report

Link Validation Detailed Report

""" + + with open(link_file, "r", encoding="utf-8") as f: + json_text = f.read() + + link_dict = json.loads(json_text) + + + # If the report file exists, remove the file. + text_file = Path(report_name) + if text_file.is_file(): + os.remove(report_name) + + with open(report_name, "w+", encoding="utf-8") as f: + f.write(html_code) + + # Iterate over all MD files + # key ---> MD file location + # value ---> An array of links in the MD file, including internet links and file links + + invalid_counter = 0 + + for key in link_dict.keys(): + head_code = "" + table_code = "" + + if link_dict.get(key) == []: + + with open(report_name, "a", encoding="utf-8") as f: + f.write("""

Checking links in """ + key) + f.write("""

This markdown file does not contain any links.

""") + else: + + head_code = """""" + + with open(report_name, "a", encoding="utf-8") as f: + f.write("""

Checking links in """ + key) + f.write(head_code) + + # Iterate over all links in each MD file + for link in link_dict.get(key): + # Check internet links: http,https + + try: + assert type(link) is str + + except AssertionError as e: + invalid_counter = invalid_counter + 1 + a_row_code = """

""" + with open(report_name, "a", encoding="utf-8") as f: + f.write(a_row_code) + continue + + # MD files that are not well-formed may raise exceptions. If parentheses are not correctly escaped, a NoneType object may be returned + + if link.startswith("http://") or link.startswith("https://"): + try: + link_response = requests.get(link, timeout=60) + status_code = link_response.status_code + + # Informational responses (100–199), + # Successful responses (200–299), + # Redirects (300–399), + # Client errors (400–499), + # and Server errors (500–599). + + if status_code in range(200,299): + # For links that do not contain hashes + if "#" not in link: + row_code = """""" + # For links that contain hashes + else: + + try: + # Acquire the url after "#" + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} + + req = urllib.request.Request(url=str( + urlparse(link).scheme + "://" + urlparse(link).netloc + urlparse(link).path), headers=headers) + response = urllib.request.urlopen(req,data=None) + html_code = response.read() + soup = BeautifulSoup(html_code.decode("utf-8"), "lxml") + a_hash = soup.find("a", {"id": str(urlparse(link).fragment)}) + h1_hash = soup.find("h1", {"id": str(urlparse(link).fragment)}) + h2_hash = soup.find("h2", {"id": str(urlparse(link).fragment)}) + h3_hash = soup.find("h3", {"id": str(urlparse(link).fragment)}) + h4_hash = soup.find("h4", {"id": str(urlparse(link).fragment)}) + h5_hash = soup.find("h5", {"id": str(urlparse(link).fragment)}) + h6_hash = soup.find("h6", {"id": str(urlparse(link).fragment)}) + div_hash = soup.find("div",{"id": str(urlparse(link).fragment)}) + + if (None, None, None, None, None, None, None, None) != ( + a_hash, h1_hash, h2_hash, h3_hash, h4_hash, h5_hash, h6_hash, div_hash): + row_code = """""" + + else: + row_code = """""" """""" + + + except urllib.error.HTTPError as http_error: + row_code = """""" + except urllib.error.URLError as url_error: + row_code = """""" + + elif status_code in range(400,599): + row_code = """""" + + + except requests.exceptions.Timeout as timeout_error: + print(timeout_error) + row_code = """""" + + + + except requests.exceptions.ConnectionError as connection_error: + print(connection_error) + row_code = """""" + + + + except requests.exceptions.HTTPError as http_error: + print(http_error) + row_code = """""" + + + # elif link.startswith("mailto:"): + + # Check MD file links + + # File path formats on Windows systems from https://docs.microsoft.com/en-us/dotnet/standard/io/file-path-formats + # C:\Documents\Newsletters\Summer2018.pdf An absolute file path from the root of drive C: + # \Program Files\Custom Utilities\StringFinder.exe An absolute path from the root of the current drive. + # 2018\January.xlsx A relative path to a file in a subdirectory of the current directory. + # ..\Publications\TravelBrochure.pdf A relative path to file in a directory that is a peer of the current directory. + # C:\Projects\apilibrary\apilibrary.sln An absolute path to a file from the root of drive C: + # C:Projects\apilibrary\apilibrary.sln A relative path from the current directory of the C: drive. + + # We do not use absolute path formats in MD files and path formats are not likely to be from the root of the current drive. So here are possible formats: + # 2018\January.md + # ..\Publications\TravelBrochure.md + + # Check if file exists + + elif link.endswith(".md") or link.endswith(".MD") or link.endswith(".mD") or link.endswith(".Md"): + # A relative path to file in a directory that is a peer of the current directory. + if link.startswith("..\\"): + # Get the absolute location of the linked md + cur_direct = os.path.dirname(key) + final_direct = os.path.dirname(cur_direct) + linked_md = os.path.join(final_direct,link) + # Check if the linked md exists + if Path(linked_md).is_file(): + row_code = """""" + + else: + row_code = """""" + + # A relative path to a file in a subdirectory of the current directory. + else: + # Get the absolute location of the linked md + cur_direct = os.path.dirname(key) + linked_md = os.path.join(cur_direct, link) + # Check if the linked md exists + if Path(linked_md).is_file(): + row_code = """""" + + else: + row_code = """""" + + elif link.startswith("#"): + # Validate if anchors correctly show in the MD file + with open(key,"r",encoding="utf-8") as f: + md_text = f.read() + # print(str(md_text)) + reg = re.compile(str("#" + "\s*" + link[1:])) + + if """""" in str(md_text) or len(re.findall(reg,str(md_text))) == 2: + row_code = """""" + else: + row_code = """""" + # Writes row_code for the link to the table + with open(report_name, "a", encoding="utf-8") as f: + f.write(row_code) + # print(row_code) + # Writes the end of the table for the key + with open(report_name, "a", encoding="utf-8") as f: + f.write("
LinkStatusMarkdown File
Invalid Link Number """ + str(invalid_counter) +"""""" + """This link is not string, which indicates that your MD file may not be well-formed.""" + """""" + key + """
""" + """""" + link + """""" + """""" + str(status_code) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """ The URL looks good but the anchor link does not work or is not using an anchor tag.""" + """""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """ """ + str(http_error) + """ The URL looks good but the page then returns an HTTP error.""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """ """ + str(url_error) + """ The URL looks good but the page then returns a URL error.""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + timeout_error) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + connection_error) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + http_error) + """""" + key + """
""" + link + """The file link looks good.""" + key + """
""" + link + """The file link is broken.""" + key + """
""" + link + """The file link looks good.""" + key + """
""" + link + """The file link is broken.""" + key + """
""" + link + """The anchor link looks good.""" + key + """
""" + link + """The anchor link is broken.""" + key + """
") + print("Completed link checking for " + key) + + with open(report_name, "a", encoding="utf-8") as f: + f.write("") + print("Completed link checking for all markdown files") + + return report_name + + +class GenerateReportSummary(object): + def __init__(self, report_name): + self.report_name = report_name + + def generate_report_summary(self, report_name): + + if platform == "linux" or platform == "linux2": + # linux + summary_name = "../link_reports/" + "link_validation_summary.html" + + elif platform == "darwin": + # OS X + summary_name = "../link_reports/" + "link_validation_summary.html" + + elif platform == "win32": + # Windows... + summary_name = "..\\link_reports\\" + "link_validation_summary.html" + + # Use BeautifulSoup to read this report and return statistics + with open(report_name, "r", encoding="utf-8") as f: + html_code = f.read() + soup = BeautifulSoup(html_code, "lxml") + failed_links_rows = soup.find_all("tr", {"class": "fail"}) + fail_count = len(failed_links_rows) + success_links_rows = soup.find_all("tr", {"class": "success"}) + pass_count = len(success_links_rows) + for failed_links_row in failed_links_rows: + del failed_links_row.attrs["bgcolor"] + # print(type(failed_links_rows)) + + # Write report summary to another HTML file + with open(summary_name, "w+", encoding="utf-8") as f: + f.write( + """Link Validation Report Summary

Link Validation Report Summary

""") + f.write("""

The number of failed links: """ + str(fail_count) + """. The number of passed links: """ + str(pass_count) + """ Pass rate: """ + str(float(pass_count/(pass_count+fail_count))*100) + '%') + f.write("""

Click the button to sort the table by parent page:

+

""") + f.write("""""") + f.write( + """""") + + for failed_link in set(failed_links_rows): + f.write(str(failed_link)) + f.write( + """
Failed LinksStatus CodeParent Page

""" + """Refer to this link for detailed report.""" + """

""") + +# Create the parser +my_parser = argparse.ArgumentParser(description='Check the links for all markdown files of a folder') + +# Add the arguments +my_parser.add_argument('Path', + metavar='path', + type=str, + help='The path to the repository that contains all markdown files.') + +# Execute the parse_args() method +args = my_parser.parse_args() + +repository = args.Path + +# Get link JSON file +LinksFromMarkdown_Milvus = LinksFromMarkdown(repository) +link_file = LinksFromMarkdown_Milvus.extract_links_from_markdown(repository) + +# Generate link validation report +CheckExtractedLinksFromMarkdown_Milvus = CheckExtractedLinksFromMarkdown(link_file) +report_name = CheckExtractedLinksFromMarkdown_Milvus.check_extracted_links(link_file) + +# Generate report summary +GenerateReportSummary_Milvus = GenerateReportSummary(report_name) +GenerateReportSummary_Milvus.generate_report_summary(report_name) \ No newline at end of file diff --git a/tests/milvus_doc_test/requirements.txt b/tests/milvus_doc_test/requirements.txt new file mode 100644 index 0000000000..e099aa161a --- /dev/null +++ b/tests/milvus_doc_test/requirements.txt @@ -0,0 +1,3 @@ +requests>=2.22.0 +markdown>=3.1.1 +beautifulsoup4>=4.8.1 diff --git a/tests/milvus_python_test/test_connect.py b/tests/milvus_python_test/test_connect.py index 5ec9539011..96ce1d3bdf 100644 --- a/tests/milvus_python_test/test_connect.py +++ b/tests/milvus_python_test/test_connect.py @@ -5,7 +5,6 @@ import threading from multiprocessing import Process from utils import * -__version__ = '0.5.0' CONNECT_TIMEOUT = 12 diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 65716d45aa..269e6137da 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -301,7 +301,7 @@ class TestIndexBase: vectors = gen_vectors(nq, dim) table_list = [] for i in range(10): - table_name = gen_unique_str('test_create_index_multi_tables') + table_name = gen_unique_str() table_list.append(table_name) param = {'table_name': table_name, 'dimension': dim, @@ -769,7 +769,7 @@ class TestIndexIP: vectors = gen_vectors(nq, dim) table_list = [] for i in range(10): - table_name = gen_unique_str('test_create_index_multi_tables') + table_name = gen_unique_str() table_list.append(table_name) param = {'table_name': table_name, 'dimension': dim, diff --git a/tests/milvus_python_test/test_ping.py b/tests/milvus_python_test/test_ping.py index a55559bc63..d63ab93f11 100644 --- a/tests/milvus_python_test/test_ping.py +++ b/tests/milvus_python_test/test_ping.py @@ -1,7 +1,7 @@ import logging import pytest -__version__ = '0.5.0' +__version__ = '0.5.1' class TestPing: diff --git a/tests/milvus_python_test/test_table.py b/tests/milvus_python_test/test_table.py index 88f7caca3c..6af38bac15 100644 --- a/tests/milvus_python_test/test_table.py +++ b/tests/milvus_python_test/test_table.py @@ -806,7 +806,7 @@ def create_index(connect, **params): return status func_map = { - 0:has, + # 0:has, 1:show, 10:create_table, 11:describe, From 4e5b778a3d79e2fd95251ddaf03f82152342e4d4 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 11:19:09 +0800 Subject: [PATCH 062/149] [skip ci] Add Contributors section Former-commit-id: 6c3a3fa7b3556bf904eee77225730f3d9a1befba --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index f18328f748..43f117ff92 100644 --- a/README.md +++ b/README.md @@ -178,6 +178,13 @@ We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to To connect with other users and contributors, welcome to join our [slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk). +## Contributors + +Deep thanks and appreciation go to the following people. + +- [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. + + ## Milvus Roadmap Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. From d76ffe9ab3fd17c7a05422d032fed42404c7695a Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 11:20:09 +0800 Subject: [PATCH 063/149] update deploySingle2Dev.groovy Former-commit-id: db4fd829b462037db8ce4ebf7b144ddc6b6f5142 --- ci/jenkins/jenkinsfile/deploySingle2Dev.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy index 738c714a0c..bc6c6f4438 100644 --- a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy +++ b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy @@ -3,7 +3,7 @@ sh 'helm repo update' dir ('milvus-helm') { checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/sqlite_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } From 430584f386ca87736e9d431cf7685150ded460a5 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Mon, 28 Oct 2019 11:50:54 +0800 Subject: [PATCH 064/149] remove mysql test Former-commit-id: f56db45d7fd8c9eb145c4b7bb2df61abd8c54ad9 --- ci/jenkins/jenkinsfile/singleDevTest.groovy | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index d0158a9943..16fe65a9b3 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -4,8 +4,7 @@ timeout(time: 60, unit: 'MINUTES') { sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" } // mysql database backend test - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" - + // load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" // Remove mysql-version tests: 10-28 From 80b9c79c5d052db85a1d98c21d0ce35ccf8a9952 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 12:23:25 +0800 Subject: [PATCH 065/149] Using new structure for tasktable Former-commit-id: 6742f21a429da87456ded0a910d248948dc948b4 --- CHANGELOG.md | 2 + core/src/scheduler/BuildMgr.h | 25 +++-- core/src/scheduler/CircleQueue.h | 119 +++++++++++++++++++++ core/src/scheduler/TaskTable.cpp | 96 ++++++++++++----- core/src/scheduler/TaskTable.h | 40 ++----- core/src/scheduler/resource/Resource.cpp | 7 +- core/unittest/scheduler/test_tasktable.cpp | 94 +++++++--------- 7 files changed, 257 insertions(+), 126 deletions(-) create mode 100644 core/src/scheduler/CircleQueue.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 74a1951142..bcb3f5b70f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ Please mark all change in change log and use the ticket from JIRA. - \#92 - Speed up CMake build process ## Feature +- \#115 - Using new structure for tasktable + ## Task # Milvus 0.5.0 (2019-10-21) diff --git a/core/src/scheduler/BuildMgr.h b/core/src/scheduler/BuildMgr.h index ee7ab38e25..805c01aafd 100644 --- a/core/src/scheduler/BuildMgr.h +++ b/core/src/scheduler/BuildMgr.h @@ -34,27 +34,30 @@ namespace scheduler { class BuildMgr { public: - explicit BuildMgr(int64_t numoftasks) : numoftasks_(numoftasks) { + explicit BuildMgr(int64_t concurrent_limit) : available_(concurrent_limit) { } public: void Put() { - ++numoftasks_; + std::lock_guard lock(mutex_); + ++available_; } - void - take() { - --numoftasks_; - } - - int64_t - numoftasks() { - return (int64_t)numoftasks_; + bool + Take() { + std::lock_guard lock(mutex_); + if (available_ < 1) { + return false; + } else { + --available_; + return true; + } } private: - std::atomic_long numoftasks_; + std::int64_t available_; + std::mutex mutex_; }; using BuildMgrPtr = std::shared_ptr; diff --git a/core/src/scheduler/CircleQueue.h b/core/src/scheduler/CircleQueue.h new file mode 100644 index 0000000000..5da9338ba5 --- /dev/null +++ b/core/src/scheduler/CircleQueue.h @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace milvus { +namespace scheduler { + +template +class CircleQueue { + using value_type = T; + using atomic_size_type = std::atomic_ullong; + using size_type = uint64_t; + using const_reference = const value_type&; +#define MEMORY_ORDER (std::memory_order::memory_order_seq_cst) + + public: + explicit CircleQueue(size_type cap) : data_(cap, nullptr), capacity_(cap), front_() { + front_.store(cap - 1, MEMORY_ORDER); + } + + CircleQueue() = delete; + CircleQueue(const CircleQueue& q) = delete; + CircleQueue(CircleQueue&& q) = delete; + + public: + const_reference operator[](size_type n) { + return data_[n % capacity_]; + } + + size_type + front() { + return front_.load(MEMORY_ORDER); + } + + size_type + rear() { + return rear_; + } + + size_type + size() { + return size_; + } + + size_type + capacity() { + return capacity_; + } + + void + set_front(uint64_t last_finish) { + if (last_finish == rear_) { + throw; + } + front_.store(last_finish % capacity_, MEMORY_ORDER); + } + + void + put(const value_type& x) { + if ((rear_) % capacity_ == front_.load(MEMORY_ORDER)) { + throw; + } + data_[rear_] = x; + rear_ = ++rear_ % capacity_; + if (size_ < capacity_) { + ++size_; + } + } + + void + put(value_type&& x) { + if ((rear_) % capacity_ == front_.load(MEMORY_ORDER)) { + throw; + } + data_[rear_] = std::move(x); + rear_ = ++rear_ % capacity_; + if (size_ < capacity_) { + ++size_; + } + } + + private: + std::vector data_; + size_type capacity_; + atomic_size_type front_; + size_type rear_ = 0; + size_type size_ = 0; +#undef MEMORY_ORDER +}; + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/TaskTable.cpp b/core/src/scheduler/TaskTable.cpp index d0e6c1c38b..e35c7cd255 100644 --- a/core/src/scheduler/TaskTable.cpp +++ b/core/src/scheduler/TaskTable.cpp @@ -20,6 +20,7 @@ #include "event/TaskTableUpdatedEvent.h" #include "scheduler/SchedInst.h" #include "utils/Log.h" +#include "utils/TimeRecorder.h" #include #include @@ -153,7 +154,42 @@ TaskTableItem::Dump() const { std::vector TaskTable::PickToLoad(uint64_t limit) { - std::lock_guard lock(mutex_); +#if 1 + TimeRecorder rc(""); + std::vector indexes; + bool cross = false; + + uint64_t available_begin = table_.front() + 1; + for (uint64_t i = 0, loaded_count = 0, pick_count = 0; i < table_.size() && pick_count < limit; ++i) { + auto index = available_begin + i; + if (not table_[index]) + break; + if (index % table_.capacity() == table_.rear()) + break; + if (not cross && table_[index]->IsFinish()) { + table_.set_front(index); + } else if (table_[index]->state == TaskTableItemState::LOADED) { + cross = true; + ++loaded_count; + if (loaded_count > 2) + return std::vector(); + } else if (table_[index]->state == TaskTableItemState::START) { + auto task = table_[index]->task; + + // if task is a build index task, limit it + if (task->Type() == TaskType::BuildIndexTask && task->path().Current() == "cpu") { + if (not BuildMgrInst::GetInstance()->Take()) { + continue; + } + } + cross = true; + indexes.push_back(index); + ++pick_count; + } + } + rc.ElapseFromBegin("PickToLoad "); + return indexes; +#else size_t count = 0; for (uint64_t j = last_finish_ + 1; j < table_.size(); ++j) { if (not table_[j]) { @@ -197,34 +233,44 @@ TaskTable::PickToLoad(uint64_t limit) { } } return indexes; +#endif } std::vector TaskTable::PickToExecute(uint64_t limit) { - std::lock_guard lock(mutex_); + TimeRecorder rc(""); std::vector indexes; bool cross = false; - for (uint64_t i = last_finish_ + 1, count = 0; i < table_.size() && count < limit; ++i) { - if (not cross && table_[i]->IsFinish()) { - last_finish_ = i; - } else if (table_[i]->state == TaskTableItemState::LOADED) { + uint64_t available_begin = table_.front() + 1; + for (uint64_t i = 0, pick_count = 0; i < table_.size() && pick_count < limit; ++i) { + uint64_t index = available_begin + i; + if (not table_[index]) { + break; + } + if (index % table_.capacity() == table_.rear()) { + break; + } + + if (not cross && table_[index]->IsFinish()) { + table_.set_front(index); + } else if (table_[index]->state == TaskTableItemState::LOADED) { cross = true; - indexes.push_back(i); - ++count; + indexes.push_back(index); + ++pick_count; } } + rc.ElapseFromBegin("PickToExecute "); return indexes; } void TaskTable::Put(TaskPtr task) { - std::lock_guard lock(mutex_); auto item = std::make_shared(); item->id = id_++; item->task = std::move(task); item->state = TaskTableItemState::START; item->timestamp.start = get_current_timestamp(); - table_.push_back(item); + table_.put(std::move(item)); if (subscriber_) { subscriber_(); } @@ -232,14 +278,13 @@ TaskTable::Put(TaskPtr task) { void TaskTable::Put(std::vector& tasks) { - std::lock_guard lock(mutex_); for (auto& task : tasks) { auto item = std::make_shared(); item->id = id_++; item->task = std::move(task); item->state = TaskTableItemState::START; item->timestamp.start = get_current_timestamp(); - table_.push_back(item); + table_.put(std::move(item)); } if (subscriber_) { subscriber_(); @@ -248,26 +293,25 @@ TaskTable::Put(std::vector& tasks) { TaskTableItemPtr TaskTable::Get(uint64_t index) { - std::lock_guard lock(mutex_); return table_[index]; } -// void -// TaskTable::Clear() { -//// find first task is NOT (done or moved), erase from begin to it; -//// auto iterator = table_.begin(); -//// while (iterator->state == TaskTableItemState::EXECUTED or -//// iterator->state == TaskTableItemState::MOVED) -//// iterator++; -//// table_.erase(table_.begin(), iterator); -//} +size_t +TaskTable::TaskToExecute() { + size_t count = 0; + auto begin = table_.front() + 1; + for (size_t i = 0; i < table_.size(); ++i) { + auto index = begin + i; + if (table_[index]->state == TaskTableItemState::LOADED) { + ++count; + } + } + return count; +} json TaskTable::Dump() const { - json ret; - for (auto& item : table_) { - ret.push_back(item->Dump()); - } + json ret{{"error.message", "not support yet."}}; return ret; } diff --git a/core/src/scheduler/TaskTable.h b/core/src/scheduler/TaskTable.h index a9d00043c2..052be66890 100644 --- a/core/src/scheduler/TaskTable.h +++ b/core/src/scheduler/TaskTable.h @@ -25,6 +25,7 @@ #include #include +#include "CircleQueue.h" #include "event/Event.h" #include "interface/interfaces.h" #include "task/SearchTask.h" @@ -99,7 +100,8 @@ using TaskTableItemPtr = std::shared_ptr; class TaskTable : public interface::dumpable { public: - TaskTable() = default; + TaskTable() : table_(1ULL << 16ULL) { + } TaskTable(const TaskTable&) = delete; TaskTable(TaskTable&&) = delete; @@ -128,20 +130,9 @@ class TaskTable : public interface::dumpable { TaskTableItemPtr Get(uint64_t index); - /* - * TODO(wxyu): BIG GC - * Remove sequence task which is DONE or MOVED from front; - * Called by ? - */ - // void - // Clear(); - - /* - * Return true if task table empty, otherwise false; - */ - inline bool - Empty() { - return table_.empty(); + inline size_t + Capacity() { + return table_.capacity(); } /* @@ -152,22 +143,14 @@ class TaskTable : public interface::dumpable { return table_.size(); } + size_t + TaskToExecute(); + public: - TaskTableItemPtr& operator[](uint64_t index) { - std::lock_guard lock(mutex_); + const TaskTableItemPtr& operator[](uint64_t index) { return table_[index]; } - std::deque::iterator - begin() { - return table_.begin(); - } - - std::deque::iterator - end() { - return table_.end(); - } - public: std::vector PickToLoad(uint64_t limit); @@ -249,8 +232,7 @@ class TaskTable : public interface::dumpable { private: std::uint64_t id_ = 0; - mutable std::mutex mutex_; - std::deque table_; + CircleQueue table_; std::function subscriber_ = nullptr; // cache last finish avoid Pick task from begin always diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index 1cd4cde609..2577617dab 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -123,12 +123,7 @@ Resource::Dump() const { uint64_t Resource::NumOfTaskToExec() { - uint64_t count = 0; - for (auto& task : task_table_) { - if (task->state == TaskTableItemState::LOADED) - ++count; - } - return count; + return task_table_.TaskToExecute(); } TaskTableItemPtr diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index e717e40285..97aa1dce66 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. - +#include #include "scheduler/TaskTable.h" #include "scheduler/task/TestTask.h" #include "scheduler/tasklabel/DefaultLabel.h" -#include /************ TaskTableBaseTest ************/ @@ -28,15 +27,11 @@ class TaskTableItemTest : public ::testing::Test { void SetUp() override { std::vector states{ - milvus::scheduler::TaskTableItemState::INVALID, - milvus::scheduler::TaskTableItemState::START, - milvus::scheduler::TaskTableItemState::LOADING, - milvus::scheduler::TaskTableItemState::LOADED, - milvus::scheduler::TaskTableItemState::EXECUTING, - milvus::scheduler::TaskTableItemState::EXECUTED, - milvus::scheduler::TaskTableItemState::MOVING, - milvus::scheduler::TaskTableItemState::MOVED}; - for (auto &state : states) { + milvus::scheduler::TaskTableItemState::INVALID, milvus::scheduler::TaskTableItemState::START, + milvus::scheduler::TaskTableItemState::LOADING, milvus::scheduler::TaskTableItemState::LOADED, + milvus::scheduler::TaskTableItemState::EXECUTING, milvus::scheduler::TaskTableItemState::EXECUTED, + milvus::scheduler::TaskTableItemState::MOVING, milvus::scheduler::TaskTableItemState::MOVED}; + for (auto& state : states) { auto item = std::make_shared(); item->state = state; items_.emplace_back(item); @@ -59,9 +54,9 @@ TEST_F(TaskTableItemTest, DESTRUCT) { } TEST_F(TaskTableItemTest, IS_FINISH) { - for (auto &item : items_) { - if (item->state == milvus::scheduler::TaskTableItemState::EXECUTED - || item->state == milvus::scheduler::TaskTableItemState::MOVED) { + for (auto& item : items_) { + if (item->state == milvus::scheduler::TaskTableItemState::EXECUTED || + item->state == milvus::scheduler::TaskTableItemState::MOVED) { ASSERT_TRUE(item->IsFinish()); } else { ASSERT_FALSE(item->IsFinish()); @@ -70,13 +65,13 @@ TEST_F(TaskTableItemTest, IS_FINISH) { } TEST_F(TaskTableItemTest, DUMP) { - for (auto &item : items_) { + for (auto& item : items_) { ASSERT_FALSE(item->Dump().empty()); } } TEST_F(TaskTableItemTest, LOAD) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Load(); if (before_state == milvus::scheduler::TaskTableItemState::START) { @@ -90,7 +85,7 @@ TEST_F(TaskTableItemTest, LOAD) { } TEST_F(TaskTableItemTest, LOADED) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Loaded(); if (before_state == milvus::scheduler::TaskTableItemState::LOADING) { @@ -104,7 +99,7 @@ TEST_F(TaskTableItemTest, LOADED) { } TEST_F(TaskTableItemTest, EXECUTE) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Execute(); if (before_state == milvus::scheduler::TaskTableItemState::LOADED) { @@ -118,7 +113,7 @@ TEST_F(TaskTableItemTest, EXECUTE) { } TEST_F(TaskTableItemTest, EXECUTED) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Executed(); if (before_state == milvus::scheduler::TaskTableItemState::EXECUTING) { @@ -132,7 +127,7 @@ TEST_F(TaskTableItemTest, EXECUTED) { } TEST_F(TaskTableItemTest, MOVE) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Move(); if (before_state == milvus::scheduler::TaskTableItemState::LOADED) { @@ -146,7 +141,7 @@ TEST_F(TaskTableItemTest, MOVE) { } TEST_F(TaskTableItemTest, MOVED) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Moved(); if (before_state == milvus::scheduler::TaskTableItemState::MOVING) { @@ -180,9 +175,7 @@ class TaskTableBaseTest : public ::testing::Test { TEST_F(TaskTableBaseTest, SUBSCRIBER) { bool flag = false; - auto callback = [&]() { - flag = true; - }; + auto callback = [&]() { flag = true; }; empty_table_.RegisterSubscriber(callback); empty_table_.Put(task1_); ASSERT_TRUE(flag); @@ -210,12 +203,6 @@ TEST_F(TaskTableBaseTest, PUT_EMPTY_BATCH) { empty_table_.Put(tasks); } -TEST_F(TaskTableBaseTest, EMPTY) { - ASSERT_TRUE(empty_table_.Empty()); - empty_table_.Put(task1_); - ASSERT_FALSE(empty_table_.Empty()); -} - TEST_F(TaskTableBaseTest, SIZE) { ASSERT_EQ(empty_table_.Size(), 0); empty_table_.Put(task1_); @@ -237,7 +224,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD) { auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { @@ -250,9 +237,9 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { auto indexes = empty_table_.PickToLoad(3); ASSERT_EQ(indexes.size(), 3); - ASSERT_EQ(indexes[0], 2); - ASSERT_EQ(indexes[1], 3); - ASSERT_EQ(indexes[2], 4); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[1]% empty_table_.Capacity(), 3); + ASSERT_EQ(indexes[2]% empty_table_.Capacity(), 4); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { @@ -266,14 +253,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { @@ -287,7 +274,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { @@ -302,8 +289,8 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { auto indexes = empty_table_.PickToExecute(3); ASSERT_EQ(indexes.size(), 2); - ASSERT_EQ(indexes[0], 2); - ASSERT_EQ(indexes[1], 3); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { @@ -318,14 +305,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } /************ TaskTableAdvanceTest ************/ @@ -356,8 +343,8 @@ class TaskTableAdvanceTest : public ::testing::Test { TEST_F(TaskTableAdvanceTest, LOAD) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -375,8 +362,8 @@ TEST_F(TaskTableAdvanceTest, LOAD) { TEST_F(TaskTableAdvanceTest, LOADED) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -394,8 +381,8 @@ TEST_F(TaskTableAdvanceTest, LOADED) { TEST_F(TaskTableAdvanceTest, EXECUTE) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -413,8 +400,8 @@ TEST_F(TaskTableAdvanceTest, EXECUTE) { TEST_F(TaskTableAdvanceTest, EXECUTED) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -432,8 +419,8 @@ TEST_F(TaskTableAdvanceTest, EXECUTED) { TEST_F(TaskTableAdvanceTest, MOVE) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -451,8 +438,8 @@ TEST_F(TaskTableAdvanceTest, MOVE) { TEST_F(TaskTableAdvanceTest, MOVED) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -467,4 +454,3 @@ TEST_F(TaskTableAdvanceTest, MOVED) { } } } - From 37e4b0a93474132538866391254a56837dbf186e Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 16:35:53 +0800 Subject: [PATCH 066/149] Using new structure for tasktable Former-commit-id: 80376dce0fd24c8c541c05363d702941f69dca0e --- core/unittest/scheduler/test_tasktable.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index 97aa1dce66..54f872c2fc 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -238,8 +238,8 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { auto indexes = empty_table_.PickToLoad(3); ASSERT_EQ(indexes.size(), 3); ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); - ASSERT_EQ(indexes[1]% empty_table_.Capacity(), 3); - ASSERT_EQ(indexes[2]% empty_table_.Capacity(), 4); + ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); + ASSERT_EQ(indexes[2] % empty_table_.Capacity(), 4); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { From a10f83c69c87f72a745aafc033f291f2740badf6 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 17:12:51 +0800 Subject: [PATCH 067/149] [skip ci] Typo change Former-commit-id: 0438b3ee8770228d39d0146e35b154a88381bf1f --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 43f117ff92..9ff0d4a3ae 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ ## What is Milvus -Milvus is an open source similarity search engine for massive feature vectors. Designed with heterogeneous computing architecture for the best cost efficiency. Searches over billion-scale vectors take only milliseconds with minimum computing resources. +Milvus is an open source similarity search engine for massive-scale feature vectors. Built with heterogeneous computing architecture for the best cost efficiency. Searches over billion-scale vectors take only milliseconds with minimum computing resources. Milvus provides stable Python, Java and C++ APIs. @@ -28,7 +28,7 @@ Keep up-to-date with newest releases and latest updates by reading Milvus [relea - Heterogeneous computing - Milvus is designed with heterogeneous computing architecture for the best performance and cost efficiency. + Milvus is built with heterogeneous computing architecture for the best performance and cost efficiency. - Multiple indexes @@ -64,14 +64,14 @@ Keep up-to-date with newest releases and latest updates by reading Milvus [relea ## Get started -### Hardware Requirements +### Hardware requirements | Component | Recommended configuration | | --------- | ----------------------------------- | | CPU | Intel CPU Haswell or higher | | GPU | NVIDIA Pascal series or higher | -| Memory | 8 GB or more (depends on data size) | -| Storage | SATA 3.0 SSD or higher | +| RAM | 8 GB or more (depends on data size) | +| Hard drive| SATA 3.0 SSD or higher | ### Install using docker @@ -185,7 +185,7 @@ Deep thanks and appreciation go to the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. -## Milvus Roadmap +## Milvus roadmap Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. From 5e504b343540ca549d21f9506bfcc0e0edaec817 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 17:27:49 +0800 Subject: [PATCH 068/149] rename functions tasktable, make it accessing likes standard structure Former-commit-id: c0ba41635e710e0807af0fe07d0b6a266f60d044 --- core/src/scheduler/TaskTable.cpp | 5 - core/src/scheduler/TaskTable.h | 57 +++++------ core/src/scheduler/resource/Resource.cpp | 4 +- core/unittest/scheduler/test_scheduler.cpp | 2 +- core/unittest/scheduler/test_tasktable.cpp | 112 ++++++++++----------- 5 files changed, 84 insertions(+), 96 deletions(-) diff --git a/core/src/scheduler/TaskTable.cpp b/core/src/scheduler/TaskTable.cpp index e35c7cd255..bd3dd466a9 100644 --- a/core/src/scheduler/TaskTable.cpp +++ b/core/src/scheduler/TaskTable.cpp @@ -291,11 +291,6 @@ TaskTable::Put(std::vector& tasks) { } } -TaskTableItemPtr -TaskTable::Get(uint64_t index) { - return table_[index]; -} - size_t TaskTable::TaskToExecute() { size_t count = 0; diff --git a/core/src/scheduler/TaskTable.h b/core/src/scheduler/TaskTable.h index 052be66890..898141d028 100644 --- a/core/src/scheduler/TaskTable.h +++ b/core/src/scheduler/TaskTable.h @@ -106,6 +106,11 @@ class TaskTable : public interface::dumpable { TaskTable(const TaskTable&) = delete; TaskTable(TaskTable&&) = delete; + public: + json + Dump() const override; + + public: inline void RegisterSubscriber(std::function subscriber) { subscriber_ = std::move(subscriber); @@ -124,40 +129,35 @@ class TaskTable : public interface::dumpable { void Put(std::vector& tasks); - /* - * Return task table item reference; - */ - TaskTableItemPtr - Get(uint64_t index); - - inline size_t - Capacity() { - return table_.capacity(); - } - - /* - * Return size of task table; - */ - inline size_t - Size() { - return table_.size(); - } - size_t TaskToExecute(); - public: - const TaskTableItemPtr& operator[](uint64_t index) { - return table_[index]; - } - - public: std::vector PickToLoad(uint64_t limit); std::vector PickToExecute(uint64_t limit); + public: + inline const TaskTableItemPtr& operator[](uint64_t index) { + return table_[index]; + } + + inline const TaskTableItemPtr& + at(uint64_t index) { + return table_[index]; + } + + inline size_t + capacity() { + return table_.capacity(); + } + + inline size_t + size() { + return table_.size(); + } + public: /******** Action ********/ @@ -223,13 +223,6 @@ class TaskTable : public interface::dumpable { return table_[index]->Moved(); } - public: - /* - * Dump; - */ - json - Dump() const override; - private: std::uint64_t id_ = 0; CircleQueue table_; diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index 2577617dab..8e10592262 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -132,7 +132,7 @@ Resource::pick_task_load() { for (auto index : indexes) { // try to set one task loading, then return if (task_table_.Load(index)) - return task_table_.Get(index); + return task_table_.at(index); // else try next } return nullptr; @@ -150,7 +150,7 @@ Resource::pick_task_execute() { } if (task_table_.Execute(index)) { - return task_table_.Get(index); + return task_table_.at(index); } // if (task_table_[index]->task->label()->Type() == TaskLabelType::SPECIFIED_RESOURCE) { // if (task_table_.Get(index)->task->path().Current() == task_table_.Get(index)->task->path().Last() diff --git a/core/unittest/scheduler/test_scheduler.cpp b/core/unittest/scheduler/test_scheduler.cpp index aebdfa2af2..b418b7c80e 100644 --- a/core/unittest/scheduler/test_scheduler.cpp +++ b/core/unittest/scheduler/test_scheduler.cpp @@ -165,7 +165,7 @@ TEST_F(SchedulerTest, ON_LOAD_COMPLETED) { } sleep(3); - ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().Size(), NUM); + ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().size(), NUM); } TEST_F(SchedulerTest, PUSH_TASK_TO_NEIGHBOUR_RANDOMLY_TEST) { diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index 54f872c2fc..601bd2431d 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -183,19 +183,19 @@ TEST_F(TaskTableBaseTest, SUBSCRIBER) { TEST_F(TaskTableBaseTest, PUT_TASK) { empty_table_.Put(task1_); - ASSERT_EQ(empty_table_.Get(0)->task, task1_); + ASSERT_EQ(empty_table_.at(0)->task, task1_); } TEST_F(TaskTableBaseTest, PUT_INVALID_TEST) { empty_table_.Put(invalid_task_); - ASSERT_EQ(empty_table_.Get(0)->task, invalid_task_); + ASSERT_EQ(empty_table_.at(0)->task, invalid_task_); } TEST_F(TaskTableBaseTest, PUT_BATCH) { std::vector tasks{task1_, task2_}; empty_table_.Put(tasks); - ASSERT_EQ(empty_table_.Get(0)->task, task1_); - ASSERT_EQ(empty_table_.Get(1)->task, task2_); + ASSERT_EQ(empty_table_.at(0)->task, task1_); + ASSERT_EQ(empty_table_.at(1)->task, task2_); } TEST_F(TaskTableBaseTest, PUT_EMPTY_BATCH) { @@ -204,14 +204,14 @@ TEST_F(TaskTableBaseTest, PUT_EMPTY_BATCH) { } TEST_F(TaskTableBaseTest, SIZE) { - ASSERT_EQ(empty_table_.Size(), 0); + ASSERT_EQ(empty_table_.size(), 0); empty_table_.Put(task1_); - ASSERT_EQ(empty_table_.Size(), 1); + ASSERT_EQ(empty_table_.size(), 1); } TEST_F(TaskTableBaseTest, OPERATOR) { empty_table_.Put(task1_); - ASSERT_EQ(empty_table_.Get(0), empty_table_[0]); + ASSERT_EQ(empty_table_.at(0), empty_table_[0]); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD) { @@ -224,7 +224,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD) { auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { @@ -237,9 +237,9 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { auto indexes = empty_table_.PickToLoad(3); ASSERT_EQ(indexes.size(), 3); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); - ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); - ASSERT_EQ(indexes[2] % empty_table_.Capacity(), 4); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); + ASSERT_EQ(indexes[1] % empty_table_.capacity(), 3); + ASSERT_EQ(indexes[2] % empty_table_.capacity(), 4); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { @@ -253,14 +253,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { @@ -274,7 +274,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { @@ -289,8 +289,8 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { auto indexes = empty_table_.PickToExecute(3); ASSERT_EQ(indexes.size(), 2); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); - ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); + ASSERT_EQ(indexes[1] % empty_table_.capacity(), 3); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { @@ -305,14 +305,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } /************ TaskTableAdvanceTest ************/ @@ -328,14 +328,14 @@ class TaskTableAdvanceTest : public ::testing::Test { table1_.Put(task); } - table1_.Get(0)->state = milvus::scheduler::TaskTableItemState::INVALID; - table1_.Get(1)->state = milvus::scheduler::TaskTableItemState::START; - table1_.Get(2)->state = milvus::scheduler::TaskTableItemState::LOADING; - table1_.Get(3)->state = milvus::scheduler::TaskTableItemState::LOADED; - table1_.Get(4)->state = milvus::scheduler::TaskTableItemState::EXECUTING; - table1_.Get(5)->state = milvus::scheduler::TaskTableItemState::EXECUTED; - table1_.Get(6)->state = milvus::scheduler::TaskTableItemState::MOVING; - table1_.Get(7)->state = milvus::scheduler::TaskTableItemState::MOVED; + table1_.at(0)->state = milvus::scheduler::TaskTableItemState::INVALID; + table1_.at(1)->state = milvus::scheduler::TaskTableItemState::START; + table1_.at(2)->state = milvus::scheduler::TaskTableItemState::LOADING; + table1_.at(3)->state = milvus::scheduler::TaskTableItemState::LOADED; + table1_.at(4)->state = milvus::scheduler::TaskTableItemState::EXECUTING; + table1_.at(5)->state = milvus::scheduler::TaskTableItemState::EXECUTED; + table1_.at(6)->state = milvus::scheduler::TaskTableItemState::MOVING; + table1_.at(7)->state = milvus::scheduler::TaskTableItemState::MOVED; } milvus::scheduler::TaskTable table1_; @@ -343,114 +343,114 @@ class TaskTableAdvanceTest : public ::testing::Test { TEST_F(TaskTableAdvanceTest, LOAD) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Load(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::START) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::LOADING); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::LOADING); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, LOADED) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Loaded(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::LOADING) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::LOADED); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::LOADED); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, EXECUTE) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Execute(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::LOADED) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::EXECUTING); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::EXECUTING); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, EXECUTED) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Executed(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::EXECUTING) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::EXECUTED); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::EXECUTED); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, MOVE) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Move(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::LOADED) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::MOVING); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::MOVING); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, MOVED) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Moved(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::MOVING) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::MOVED); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::MOVED); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } From baeff23c6bba1840ec9c62e1493a71e0bc53a5d0 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 17:32:49 +0800 Subject: [PATCH 069/149] [skip ci] minor change Former-commit-id: 5b3a820dc9d017c5dfcb8aa60ed7d3d92d5ef114 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ff0d4a3ae..3dfd95d2f4 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ To connect with other users and contributors, welcome to join our [slack channel ## Contributors -Deep thanks and appreciation go to the following people. +We greatly appreciate the help and contributions of the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. From a1d5ff06df232b9fdab143cedeac02505b177272 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 17:48:00 +0800 Subject: [PATCH 070/149] [skip ci] minor change Former-commit-id: 10f0bf48d869fc193e33c1065f43d11c76560594 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3dfd95d2f4..5c5065442f 100644 --- a/README.md +++ b/README.md @@ -178,9 +178,9 @@ We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to To connect with other users and contributors, welcome to join our [slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk). -## Contributors +## Thanks -We greatly appreciate the help and contributions of the following people. +We greatly appreciate the help of the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. From 53b3b60db2ed87a8c558ae893608fab0ffc18578 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 19:19:42 +0800 Subject: [PATCH 071/149] Using shared_ptr instead of weak_ptr to avoid performance loss Former-commit-id: 250cb7200b6eefdd9cbb9fd631379d59aca2f368 --- CHANGELOG.md | 1 + core/src/scheduler/Algorithm.cpp | 2 +- core/src/scheduler/Scheduler.cpp | 64 +++++++++---------- core/src/scheduler/Scheduler.h | 6 +- core/src/scheduler/action/Action.h | 5 +- .../scheduler/action/PushTaskToNeighbour.cpp | 22 +++---- core/src/scheduler/event/Event.h | 4 +- core/src/scheduler/event/FinishTaskEvent.h | 2 +- core/src/scheduler/event/LoadCompletedEvent.h | 2 +- core/src/scheduler/event/StartUpEvent.h | 2 +- .../scheduler/event/TaskTableUpdatedEvent.h | 2 +- core/src/scheduler/resource/Node.cpp | 4 +- core/src/scheduler/resource/Node.h | 8 ++- core/unittest/scheduler/test_event.cpp | 8 +-- core/unittest/scheduler/test_node.cpp | 16 ++--- 15 files changed, 75 insertions(+), 73 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcb3f5b70f..00402ea15f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#80 - Print version information into log during server start - \#82 - Move easyloggingpp into "external" directory - \#92 - Speed up CMake build process +- \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss ## Feature - \#115 - Using new structure for tasktable diff --git a/core/src/scheduler/Algorithm.cpp b/core/src/scheduler/Algorithm.cpp index b2156b3f97..fb1742e6e1 100644 --- a/core/src/scheduler/Algorithm.cpp +++ b/core/src/scheduler/Algorithm.cpp @@ -54,7 +54,7 @@ ShortestPath(const ResourcePtr& src, const ResourcePtr& dest, const ResourceMgrP auto cur_neighbours = cur_node->GetNeighbours(); for (auto& neighbour : cur_neighbours) { - auto neighbour_res = std::static_pointer_cast(neighbour.neighbour_node.lock()); + auto neighbour_res = std::static_pointer_cast(neighbour.neighbour_node); dis_matrix[name_id_map.at(res->name())][name_id_map.at(neighbour_res->name())] = neighbour.connection.transport_cost(); } diff --git a/core/src/scheduler/Scheduler.cpp b/core/src/scheduler/Scheduler.cpp index fef5cc1a95..cba847c25e 100644 --- a/core/src/scheduler/Scheduler.cpp +++ b/core/src/scheduler/Scheduler.cpp @@ -26,10 +26,8 @@ namespace milvus { namespace scheduler { -Scheduler::Scheduler(ResourceMgrWPtr res_mgr) : running_(false), res_mgr_(std::move(res_mgr)) { - if (auto mgr = res_mgr_.lock()) { - mgr->RegisterSubscriber(std::bind(&Scheduler::PostEvent, this, std::placeholders::_1)); - } +Scheduler::Scheduler(ResourceMgrPtr res_mgr) : running_(false), res_mgr_(std::move(res_mgr)) { + res_mgr_->RegisterSubscriber(std::bind(&Scheduler::PostEvent, this, std::placeholders::_1)); event_register_.insert(std::make_pair(static_cast(EventType::START_UP), std::bind(&Scheduler::OnStartUp, this, std::placeholders::_1))); event_register_.insert(std::make_pair(static_cast(EventType::LOAD_COMPLETED), @@ -40,6 +38,10 @@ Scheduler::Scheduler(ResourceMgrWPtr res_mgr) : running_(false), res_mgr_(std::m std::bind(&Scheduler::OnFinishTask, this, std::placeholders::_1))); } +Scheduler::~Scheduler() { + res_mgr_ = nullptr; +} + void Scheduler::Start() { running_ = true; @@ -100,51 +102,45 @@ Scheduler::Process(const EventPtr& event) { void Scheduler::OnLoadCompleted(const EventPtr& event) { auto load_completed_event = std::static_pointer_cast(event); - if (auto resource = event->resource_.lock()) { - resource->WakeupExecutor(); - auto task_table_type = load_completed_event->task_table_item_->task->label()->Type(); - switch (task_table_type) { - case TaskLabelType::DEFAULT: { - Action::DefaultLabelTaskScheduler(res_mgr_, resource, load_completed_event); - break; - } - case TaskLabelType::SPECIFIED_RESOURCE: { - Action::SpecifiedResourceLabelTaskScheduler(res_mgr_, resource, load_completed_event); - break; - } - case TaskLabelType::BROADCAST: { - if (resource->HasExecutor() == false) { - load_completed_event->task_table_item_->Move(); - } - Action::PushTaskToAllNeighbour(load_completed_event->task_table_item_->task, resource); - break; - } - default: { break; } + auto resource = event->resource_; + resource->WakeupExecutor(); + + auto task_table_type = load_completed_event->task_table_item_->task->label()->Type(); + switch (task_table_type) { + case TaskLabelType::DEFAULT: { + Action::DefaultLabelTaskScheduler(res_mgr_, resource, load_completed_event); + break; } - resource->WakeupLoader(); + case TaskLabelType::SPECIFIED_RESOURCE: { + Action::SpecifiedResourceLabelTaskScheduler(res_mgr_, resource, load_completed_event); + break; + } + case TaskLabelType::BROADCAST: { + if (resource->HasExecutor() == false) { + load_completed_event->task_table_item_->Move(); + } + Action::PushTaskToAllNeighbour(load_completed_event->task_table_item_->task, resource); + break; + } + default: { break; } } + resource->WakeupLoader(); } void Scheduler::OnStartUp(const EventPtr& event) { - if (auto resource = event->resource_.lock()) { - resource->WakeupLoader(); - } + event->resource_->WakeupLoader(); } void Scheduler::OnFinishTask(const EventPtr& event) { - if (auto resource = event->resource_.lock()) { - resource->WakeupLoader(); - } + event->resource_->WakeupLoader(); } void Scheduler::OnTaskTableUpdated(const EventPtr& event) { - if (auto resource = event->resource_.lock()) { - resource->WakeupLoader(); - } + event->resource_->WakeupLoader(); } } // namespace scheduler diff --git a/core/src/scheduler/Scheduler.h b/core/src/scheduler/Scheduler.h index 8d9ea83794..9e3a864774 100644 --- a/core/src/scheduler/Scheduler.h +++ b/core/src/scheduler/Scheduler.h @@ -34,7 +34,9 @@ namespace scheduler { class Scheduler : public interface::dumpable { public: - explicit Scheduler(ResourceMgrWPtr res_mgr); + explicit Scheduler(ResourceMgrPtr res_mgr); + + ~Scheduler(); Scheduler(const Scheduler&) = delete; Scheduler(Scheduler&&) = delete; @@ -118,7 +120,7 @@ class Scheduler : public interface::dumpable { std::unordered_map> event_register_; - ResourceMgrWPtr res_mgr_; + ResourceMgrPtr res_mgr_; std::queue event_queue_; std::thread worker_thread_; std::mutex event_mutex_; diff --git a/core/src/scheduler/action/Action.h b/core/src/scheduler/action/Action.h index 51c788f82f..ff72910055 100644 --- a/core/src/scheduler/action/Action.h +++ b/core/src/scheduler/action/Action.h @@ -37,10 +37,11 @@ class Action { PushTaskToResource(const TaskPtr& task, const ResourcePtr& dest); static void - DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, std::shared_ptr event); + DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, + std::shared_ptr event); static void - SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, + SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event); }; diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index c64e81dcfa..6f74849eac 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -30,7 +30,7 @@ std::vector get_neighbours(const ResourcePtr& self) { std::vector neighbours; for (auto& neighbour_node : self->GetNeighbours()) { - auto node = neighbour_node.neighbour_node.lock(); + auto node = neighbour_node.neighbour_node; if (not node) continue; @@ -46,7 +46,7 @@ std::vector> get_neighbours_with_connetion(const ResourcePtr& self) { std::vector> neighbours; for (auto& neighbour_node : self->GetNeighbours()) { - auto node = neighbour_node.neighbour_node.lock(); + auto node = neighbour_node.neighbour_node; if (not node) continue; @@ -102,7 +102,7 @@ Action::PushTaskToResource(const TaskPtr& task, const ResourcePtr& dest) { } void -Action::DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, +Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { if (not resource->HasExecutor() && event->task_table_item_->Move()) { auto task = event->task_table_item_->task; @@ -114,11 +114,11 @@ Action::DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, if (auto index_engine = search_task->index_engine_) { auto location = index_engine->GetLocation(); - for (auto i = 0; i < res_mgr.lock()->GetNumGpuResource(); ++i) { + for (auto i = 0; i < res_mgr->GetNumGpuResource(); ++i) { auto index = milvus::cache::GpuCacheMgr::GetInstance(i)->GetIndex(location); if (index != nullptr) { moved = true; - auto dest_resource = res_mgr.lock()->GetResource(ResourceType::GPU, i); + auto dest_resource = res_mgr->GetResource(ResourceType::GPU, i); PushTaskToResource(event->task_table_item_->task, dest_resource); break; } @@ -133,17 +133,17 @@ Action::DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, } void -Action::SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, +Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { auto task = event->task_table_item_->task; if (resource->type() == ResourceType::DISK) { // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr.lock()->GetComputeResources(); + auto compute_resources = res_mgr->GetComputeResources(); std::vector> paths; std::vector transport_costs; for (auto& res : compute_resources) { std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr.lock(), path); + uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); transport_costs.push_back(transport_cost); paths.emplace_back(path); } @@ -187,10 +187,10 @@ Action::SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); bool find_gpu_res = false; - if (res_mgr.lock()->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { + if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { for (uint64_t i = 0; i < compute_resources.size(); ++i) { if (compute_resources[i]->name() == - res_mgr.lock()->GetResource(ResourceType::GPU, build_index_gpu)->name()) { + res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { find_gpu_res = true; Path task_path(paths[i], paths[i].size() - 1); task->path() = task_path; @@ -208,7 +208,7 @@ Action::SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource->WakeupExecutor(); } else { auto next_res_name = task->path().Next(); - auto next_res = res_mgr.lock()->GetResource(next_res_name); + auto next_res = res_mgr->GetResource(next_res_name); // if (event->task_table_item_->Move()) { // next_res->task_table().Put(task); // } diff --git a/core/src/scheduler/event/Event.h b/core/src/scheduler/event/Event.h index 5b1f37fb99..3c29e02225 100644 --- a/core/src/scheduler/event/Event.h +++ b/core/src/scheduler/event/Event.h @@ -30,7 +30,7 @@ class Resource; class Event { public: - explicit Event(EventType type, std::weak_ptr resource) : type_(type), resource_(std::move(resource)) { + explicit Event(EventType type, std::shared_ptr resource) : type_(type), resource_(std::move(resource)) { } inline EventType @@ -46,7 +46,7 @@ class Event { public: EventType type_; - std::weak_ptr resource_; + std::shared_ptr resource_; }; using EventPtr = std::shared_ptr; diff --git a/core/src/scheduler/event/FinishTaskEvent.h b/core/src/scheduler/event/FinishTaskEvent.h index 1b2d8f9818..afaf02de92 100644 --- a/core/src/scheduler/event/FinishTaskEvent.h +++ b/core/src/scheduler/event/FinishTaskEvent.h @@ -29,7 +29,7 @@ namespace scheduler { class FinishTaskEvent : public Event { public: - FinishTaskEvent(std::weak_ptr resource, TaskTableItemPtr task_table_item) + FinishTaskEvent(std::shared_ptr resource, TaskTableItemPtr task_table_item) : Event(EventType::FINISH_TASK, std::move(resource)), task_table_item_(std::move(task_table_item)) { } diff --git a/core/src/scheduler/event/LoadCompletedEvent.h b/core/src/scheduler/event/LoadCompletedEvent.h index 5a701e0dfc..0aa3bf79d6 100644 --- a/core/src/scheduler/event/LoadCompletedEvent.h +++ b/core/src/scheduler/event/LoadCompletedEvent.h @@ -29,7 +29,7 @@ namespace scheduler { class LoadCompletedEvent : public Event { public: - LoadCompletedEvent(std::weak_ptr resource, TaskTableItemPtr task_table_item) + LoadCompletedEvent(std::shared_ptr resource, TaskTableItemPtr task_table_item) : Event(EventType::LOAD_COMPLETED, std::move(resource)), task_table_item_(std::move(task_table_item)) { } diff --git a/core/src/scheduler/event/StartUpEvent.h b/core/src/scheduler/event/StartUpEvent.h index c4abb4e27c..2d8292ea70 100644 --- a/core/src/scheduler/event/StartUpEvent.h +++ b/core/src/scheduler/event/StartUpEvent.h @@ -28,7 +28,7 @@ namespace scheduler { class StartUpEvent : public Event { public: - explicit StartUpEvent(std::weak_ptr resource) : Event(EventType::START_UP, std::move(resource)) { + explicit StartUpEvent(std::shared_ptr resource) : Event(EventType::START_UP, std::move(resource)) { } inline std::string diff --git a/core/src/scheduler/event/TaskTableUpdatedEvent.h b/core/src/scheduler/event/TaskTableUpdatedEvent.h index ed64a42d89..9be27e69b6 100644 --- a/core/src/scheduler/event/TaskTableUpdatedEvent.h +++ b/core/src/scheduler/event/TaskTableUpdatedEvent.h @@ -28,7 +28,7 @@ namespace scheduler { class TaskTableUpdatedEvent : public Event { public: - explicit TaskTableUpdatedEvent(std::weak_ptr resource) + explicit TaskTableUpdatedEvent(std::shared_ptr resource) : Event(EventType::TASK_TABLE_UPDATED, std::move(resource)) { } diff --git a/core/src/scheduler/resource/Node.cpp b/core/src/scheduler/resource/Node.cpp index dcf03a321c..bc0e559175 100644 --- a/core/src/scheduler/resource/Node.cpp +++ b/core/src/scheduler/resource/Node.cpp @@ -58,9 +58,7 @@ Node::Dump() const { void Node::AddNeighbour(const NeighbourNodePtr& neighbour_node, Connection& connection) { std::lock_guard lk(mutex_); - if (auto s = neighbour_node.lock()) { - neighbours_.emplace(std::make_pair(s->id_, Neighbour(neighbour_node, connection))); - } + neighbours_.emplace(std::make_pair(neighbour_node->id_, Neighbour(neighbour_node, connection))); // else do nothing, consider it.. } diff --git a/core/src/scheduler/resource/Node.h b/core/src/scheduler/resource/Node.h index 4539c8c86a..53323fe6e2 100644 --- a/core/src/scheduler/resource/Node.h +++ b/core/src/scheduler/resource/Node.h @@ -31,10 +31,14 @@ namespace scheduler { class Node; -using NeighbourNodePtr = std::weak_ptr; +using NeighbourNodePtr = std::shared_ptr; struct Neighbour { - Neighbour(NeighbourNodePtr nei, Connection conn) : neighbour_node(nei), connection(conn) { + Neighbour(NeighbourNodePtr nei, Connection conn) : neighbour_node(std::move(nei)), connection(std::move(conn)) { + } + + ~Neighbour() { + neighbour_node = nullptr; } NeighbourNodePtr neighbour_node; diff --git a/core/unittest/scheduler/test_event.cpp b/core/unittest/scheduler/test_event.cpp index 07d51e8557..cf627a5d79 100644 --- a/core/unittest/scheduler/test_event.cpp +++ b/core/unittest/scheduler/test_event.cpp @@ -28,7 +28,7 @@ namespace milvus { namespace scheduler { TEST(EventTest, START_UP_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; @@ -36,7 +36,7 @@ TEST(EventTest, START_UP_EVENT) { } TEST(EventTest, LOAD_COMPLETED_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res, nullptr); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; @@ -44,7 +44,7 @@ TEST(EventTest, LOAD_COMPLETED_EVENT) { } TEST(EventTest, FINISH_TASK_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res, nullptr); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; @@ -53,7 +53,7 @@ TEST(EventTest, FINISH_TASK_EVENT) { TEST(EventTest, TASKTABLE_UPDATED_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; diff --git a/core/unittest/scheduler/test_node.cpp b/core/unittest/scheduler/test_node.cpp index 9b34b73191..d2c93971ac 100644 --- a/core/unittest/scheduler/test_node.cpp +++ b/core/unittest/scheduler/test_node.cpp @@ -15,15 +15,14 @@ // specific language governing permissions and limitations // under the License. - -#include "scheduler/resource/Node.h" #include +#include "scheduler/resource/Node.h" namespace { namespace ms = milvus::scheduler; -} // namespace +} // namespace class NodeTest : public ::testing::Test { protected: @@ -73,9 +72,11 @@ TEST_F(NodeTest, GET_NEIGHBOURS) { bool n2 = false, n3 = false; auto node1_neighbours = node1_->GetNeighbours(); ASSERT_EQ(node1_neighbours.size(), 2); - for (auto &n : node1_neighbours) { - if (n.neighbour_node.lock() == node2_) n2 = true; - if (n.neighbour_node.lock() == node3_) n3 = true; + for (auto& n : node1_neighbours) { + if (n.neighbour_node == node2_) + n2 = true; + if (n.neighbour_node == node3_) + n3 = true; } ASSERT_TRUE(n2); ASSERT_TRUE(n3); @@ -84,7 +85,7 @@ TEST_F(NodeTest, GET_NEIGHBOURS) { { auto node2_neighbours = node2_->GetNeighbours(); ASSERT_EQ(node2_neighbours.size(), 1); - ASSERT_EQ(node2_neighbours[0].neighbour_node.lock(), node1_); + ASSERT_EQ(node2_neighbours[0].neighbour_node, node1_); } { @@ -100,4 +101,3 @@ TEST_F(NodeTest, DUMP) { std::cout << node2_->Dump(); ASSERT_FALSE(node2_->Dump().empty()); } - From ada0bf86ce2504fffe1edb0ce49282f2074d383c Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 19:23:05 +0800 Subject: [PATCH 072/149] solve conflicts Former-commit-id: 538671361c228898d0f2a81fdfdd7d3087bf0721 --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 064ad9b439..cc2461a9c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,6 @@ Please mark all change in change log and use the ticket from JIRA. - \#96 - Remove .a file in milvus/lib for docker-version - \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss ->>>>>>> main/0.5.1 ## Feature - \#115 - Using new structure for tasktable From 2b1de98912a499d8a1d88097e5ce9ea9ca8834c4 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 19:29:44 +0800 Subject: [PATCH 073/149] fix cpplint Former-commit-id: df5bb8526ac0fe0662b10fbfb7daa706900e6758 --- core/src/scheduler/resource/Node.h | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/scheduler/resource/Node.h b/core/src/scheduler/resource/Node.h index 53323fe6e2..177cdd735a 100644 --- a/core/src/scheduler/resource/Node.h +++ b/core/src/scheduler/resource/Node.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "Connection.h" From 7f6092b6baf8d529e762f508fbac9feff80b77b4 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 19:34:38 +0800 Subject: [PATCH 074/149] #89 add SQ8Hybrid MIX test and pure-GPU test Former-commit-id: 25b5f419d3deedd4ec39c014ea47822a83a0af38 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 228 +++++++++++------- 1 file changed, 143 insertions(+), 85 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index d1db0e9049..bb50198f92 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -183,9 +184,31 @@ parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::Metric return true; } +int32_t +GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::idx_t* index, size_t ground_k, size_t k, + size_t nq, int32_t index_add_loops) { + assert(ground_k <= k); + int hit = 0; + for (int i = 0; i < nq; i++) { + // count the num of results exist in ground truth result set + // each result replicates INDEX_ADD_LOOPS times + for (int j_c = 0; j_c < ground_k; j_c++) { + int r_c = index[i * k + j_c]; + int j_g = 0; + for (; j_g < ground_k / index_add_loops; j_g++) { + if (ground_index[i * ground_k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + return hit; +} + void test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops, - const std::vector& nprobes) { + const std::vector& nprobes, int32_t search_loops) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -265,8 +288,6 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in for (auto nprobe : nprobes) { faiss::ParameterSpace params; - printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); - std::string nprobe_str = "nprobe=" + std::to_string(nprobe); params.set_index_parameters(index, nprobe_str.c_str()); @@ -277,39 +298,28 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in float* D = new float[NQ * K]; printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("============================================================================================\n"); + printf("======================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; double t_start = elapsed(), t_end; - - index->search(t_nq, xq, t_k, D, I); - + for (int i = 0; i < search_loops; i++) { + index->search(t_nq, xq, t_k, D, I); + } t_end = elapsed(); // k = 100 for ground truth - int hit = 0; - for (int i = 0; i < t_nq; i++) { - // count the num of results exist in ground truth result set - // consider: each result replicates DATA_LOOPS times - for (int j_c = 0; j_c < k; j_c++) { - int r_c = I[i * t_k + j_c]; - for (int j_g = 0; j_g < k / index_add_loops; j_g++) { - if (gt[i * k + j_g] == r_c) { - hit++; - continue; - } - } - } - } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, - (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start) / search_loops, faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); } } - printf("============================================================================================\n"); + printf("======================================================================================\n"); #else printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); @@ -353,7 +363,8 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in #ifdef CUSTOMIZATION void -test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes) { +test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes, + bool pure_gpu_mode, int32_t search_loops) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -423,9 +434,18 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons index_composition.quantizer = nullptr; index_composition.mode = 1; + double copy_time = elapsed(); auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); delete index; + if (pure_gpu_mode) { + index_composition.mode = 2; // 0: all data, 1: copy quantizer, 2: copy data + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + } + + copy_time = elapsed() - copy_time; + printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); + size_t nq; float* xq; { @@ -446,67 +466,98 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; - for (unsigned long i = 0; i < k * nq; ++i) { + for (uint64_t i = 0; i < k * nq; ++i) { gt[i] = gt_int[i]; } delete[] gt_int; } - for (auto nprobe : nprobes) { - printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + const size_t NQ = 1000, K = 1000; + if (!pure_gpu_mode) { + for (auto nprobe : nprobes) { + auto ivf_index = dynamic_cast(cpu_index); + ivf_index->nprobe = nprobe; - auto ivf_index = dynamic_cast(cpu_index); - ivf_index->nprobe = nprobe; - - auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); - if (is_gpu_flat_index == nullptr) { - delete ivf_index->quantizer; - ivf_index->quantizer = index_composition.quantizer; - } - - const size_t NQ = 1000, K = 1000; - long* I = new faiss::Index::idx_t[NQ * K]; - float* D = new float[NQ * K]; - - printf("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} - faiss::indexIVF_stats.quantization_time = 0.0; - faiss::indexIVF_stats.search_time = 0.0; - - double t_start = elapsed(), t_end; - - cpu_index->search(t_nq, xq, t_k, D, I); - - t_end = elapsed(); - - // k = 100 for ground truth - int hit = 0; - for (unsigned long i = 0; i < t_nq; i++) { - // count the num of results exist in ground truth result set - // consider: each result replicates DATA_LOOPS times - for (unsigned long j_c = 0; j_c < k; j_c++) { - int r_c = I[i * t_k + j_c]; - for (unsigned long j_g = 0; j_g < k / index_add_loops; j_g++) { - if (gt[i * k + j_g] == r_c) { - hit++; - continue; - } - } - } - } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, - (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); + auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); + if (is_gpu_flat_index == nullptr) { + delete ivf_index->quantizer; + ivf_index->quantizer = index_composition.quantizer; } + + int64_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; + + printf("\n%s | %s-MIX | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("======================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + + double t_start = elapsed(), t_end; + for (int32_t i = 0; i < search_loops; i++) { + cpu_index->search(t_nq, xq, t_k, D, I); + } + t_end = elapsed(); + + // k = 100 for ground truth + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, + t_k, (t_end - t_start) / search_loops, + faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); + } + } + printf("======================================================================================\n"); + + printf("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete[] I; + delete[] D; } - printf("============================================================================================\n"); + } else { + std::shared_ptr gpu_index_ivf_ptr = std::shared_ptr(index); - printf("[%.3f s] Search test done\n\n", elapsed() - t0); + for (auto nprobe : nprobes) { + faiss::gpu::GpuIndexIVFSQHybrid* gpu_index_ivf_hybrid = + dynamic_cast(gpu_index_ivf_ptr.get()); + gpu_index_ivf_hybrid->setNumProbes(nprobe); - delete[] I; - delete[] D; + int64_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; + + printf("\n%s | %s-GPU | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("======================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + + double t_start = elapsed(), t_end; + for (int32_t i = 0; i < search_loops; i++) { + gpu_index_ivf_ptr->search(nq, xq, k, D, I); + } + t_end = elapsed(); + + // k = 100 for ground truth + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, + t_k, (t_end - t_start) / search_loops, + faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); + } + } + printf("======================================================================================\n"); + + printf("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete[] I; + delete[] D; + } } delete[] xq; @@ -530,17 +581,24 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons *************************************************************************************/ TEST(FAISSTEST, BENCHMARK) { - test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + std::vector param_nprobes = {8, 128}; + const int32_t SEARCH_LOOPS = 5; + const int32_t SIFT_INSERT_LOOPS = 2; // insert twice to get ~1G data set + const int32_t GLOVE_INSERT_LOOPS = 1; + + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #ifdef CUSTOMIZATION - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); - test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); + test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); #endif - test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #ifdef CUSTOMIZATION - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); - test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); + test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); #endif } From 97ae8a780ca0009e7f1749a339f50cb954cc4fa2 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 19:55:43 +0800 Subject: [PATCH 075/149] #89 add README.txt Former-commit-id: a84501ce6c2c94275819ace9e7d7a4afc14fbeca --- core/src/index/unittest/faiss_benchmark/README.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 core/src/index/unittest/faiss_benchmark/README.txt diff --git a/core/src/index/unittest/faiss_benchmark/README.txt b/core/src/index/unittest/faiss_benchmark/README.txt new file mode 100644 index 0000000000..81114d8381 --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/README.txt @@ -0,0 +1,13 @@ +To run this FAISS benchmark, please follow these steps: + +1. Download the HDF5 from: + https://support.hdfgroup.org/ftp/HDF5/releases/ + and install to /usr/local/hdf5 + +2. Download HDF5 data files from: + https://github.com/erikbern/ann-benchmarks + +3. Put HDF5 data files into the same directory with test binary + +4. Run the test binary + From b439da8a36dcfa2f63867cb70786859fe992dcf3 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 20:17:25 +0800 Subject: [PATCH 076/149] #89 update README.md Former-commit-id: 0b85b430c0d4a69e0470e916a76adda16f96c12b --- .../index/unittest/faiss_benchmark/README.md | 25 +++++++++++++++++++ .../index/unittest/faiss_benchmark/README.txt | 13 ---------- 2 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 core/src/index/unittest/faiss_benchmark/README.md delete mode 100644 core/src/index/unittest/faiss_benchmark/README.txt diff --git a/core/src/index/unittest/faiss_benchmark/README.md b/core/src/index/unittest/faiss_benchmark/README.md new file mode 100644 index 0000000000..c451ac13b0 --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/README.md @@ -0,0 +1,25 @@ +### To run this FAISS benchmark, please follow these steps: + +#### Step 1: +Download the HDF5 source from: + https://support.hdfgroup.org/ftp/HDF5/releases/ +and build/install to "/usr/local/hdf5". + +#### Step 2: +Download HDF5 data files from: + https://github.com/erikbern/ann-benchmarks + +#### Step 3: +Update 'milvus/core/src/index/unittest/CMakeLists.txt', +uncomment "#add_subdirectory(faiss_benchmark)". + +#### Step 4: +Build Milvus with unittest enabled: "./build.sh -t Release -u", +binary 'test_faiss_benchmark' will be generated. + +#### Step 5: +Put HDF5 data files into the same directory with binary 'test_faiss_benchmark'. + +#### Step 6: +Run test binary 'test_faiss_benchmark'. + diff --git a/core/src/index/unittest/faiss_benchmark/README.txt b/core/src/index/unittest/faiss_benchmark/README.txt deleted file mode 100644 index 81114d8381..0000000000 --- a/core/src/index/unittest/faiss_benchmark/README.txt +++ /dev/null @@ -1,13 +0,0 @@ -To run this FAISS benchmark, please follow these steps: - -1. Download the HDF5 from: - https://support.hdfgroup.org/ftp/HDF5/releases/ - and install to /usr/local/hdf5 - -2. Download HDF5 data files from: - https://github.com/erikbern/ann-benchmarks - -3. Put HDF5 data files into the same directory with test binary - -4. Run the test binary - From 12b7d6f5d8011ec9b60e3f6184467f11873ec9e7 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 28 Oct 2019 20:24:13 +0800 Subject: [PATCH 077/149] #90 The server start error messages could be improved to enhance user experience Former-commit-id: 746b126621cd845c2848bb850b5c2eb6d3b65a6d --- CHANGELOG.md | 1 + core/src/server/Config.cpp | 134 +++++++++++++++++++++++--------- core/src/server/DBWrapper.cpp | 47 +++++++---- core/unittest/db/utils.cpp | 2 +- core/unittest/wrapper/utils.cpp | 2 +- 5 files changed, 133 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0925fa1a68..c4643c0adc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.5.1 (TODO) ## Bug +- \#90 - The server start error messages could be improved to enhance user experience - \#104 - test_scheduler core dump ## Improvement diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 7de84cbccc..b20d6c2436 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -363,7 +363,9 @@ Config::PrintAll() { Status Config::CheckServerConfigAddress(const std::string& value) { if (!ValidationUtil::ValidateIpAddress(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config address: " + value); + std::string msg = "Invalid server IP address: " + value + + ". Possible reason: server_config.address is invalid in server_config.yaml."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -371,11 +373,15 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config port: " + value); + std::string msg = "Port " + value + " is not a number. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { - return Status(SERVER_INVALID_ARGUMENT, "Server config port out of range (1024, 65535): " + value); + std::string msg = "Port " + value + " is not in range [1025, 65534]. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -385,7 +391,8 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "Invalid server config mode [single, cluster_readonly, cluster_writable]: " + value); + "Error: server_config.deploy_mode in server_config.yaml is not one of " + "single, cluster_readonly, and cluster_writable."); } return Status::OK(); } @@ -411,7 +418,8 @@ Config::CheckServerConfigTimeZone(const std::string& value) { Status Config::CheckDBConfigPrimaryPath(const std::string& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "DB config primary_path empty"); + return Status(SERVER_INVALID_ARGUMENT, + "db_path is empty. Possible reason: db_config.db_path in server_config.yaml is empty."); } return Status::OK(); } @@ -424,7 +432,11 @@ Config::CheckDBConfigSecondaryPath(const std::string& value) { Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config backend_url: " + value); + std::string msg = + "Invalid db_backend_url: " + value + + ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " + + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; + return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } return Status::OK(); } @@ -432,7 +444,9 @@ Config::CheckDBConfigBackendUrl(const std::string& value) { Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_disk_threshold: " + value); + std::string msg = "Invalid archive disk threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -440,7 +454,9 @@ Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_days_threshold: " + value); + std::string msg = "Invalid archive days threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -448,13 +464,17 @@ Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config insert_buffer_size: " + value); + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: db_config.insert_buffer_size in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "DB config insert_buffer_size exceed system memory: " + value); + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: insert buffer size exceed system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -463,7 +483,9 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { Status Config::CheckMetricConfigEnableMonitor(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config auto_bootup: " + value); + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.enable_monitor is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -471,7 +493,9 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) { Status Config::CheckMetricConfigCollector(const std::string& value) { if (value != "prometheus") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config collector: " + value); + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.collector is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -479,6 +503,8 @@ Config::CheckMetricConfigCollector(const std::string& value) { Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.prometheus_config.port is invalid."; return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value); } return Status::OK(); @@ -487,15 +513,19 @@ Config::CheckMetricConfigPrometheusPort(const std::string& value) { Status Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_capacity: " + value); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: cache_config.cpu_cache_capacity is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t cpu_cache_capacity = std::stoi(value) * GB; uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (cpu_cache_capacity >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "Cache config cpu_cache_capacity exceed system memory: " + value); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: Cache config cpu_cache_capacity exceed system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else if (cpu_cache_capacity > static_cast(total_mem * 0.9)) { - std::cerr << "Warning: cpu_cache_capacity value is too big" << std::endl; + std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; } int32_t buffer_value; @@ -506,7 +536,10 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { int64_t insert_buffer_size = buffer_value * GB; if (insert_buffer_size + cpu_cache_capacity >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "Sum of cpu_cache_capacity and buffer_size exceed system memory"); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: sum of cache_config.cpu_cache_capacity and " + "db_config.insert_buffer_size exceeds system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -515,11 +548,15 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value); + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { float cpu_cache_threshold = std::stof(value); if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value); + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -528,7 +565,9 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_capacity: " + value); + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; int gpu_index; @@ -539,13 +578,14 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { size_t gpu_memory; if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { - return Status(SERVER_UNEXPECTED_ERROR, - "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index)); + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); + return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { - return Status(SERVER_INVALID_ARGUMENT, - "Cache config gpu_cache_capacity exceed GPU memory: " + std::to_string(gpu_memory)); + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity exceed GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu_cache_capacity value is too big" << std::endl; + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; } } return Status::OK(); @@ -554,11 +594,15 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value); + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { float gpu_cache_threshold = std::stof(value); if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value); + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -567,7 +611,9 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cache_insert_data: " + value); + std::string msg = "Invalid cache insert option: " + value + + "Possible reason: cache_config.cache_insert_data is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -575,7 +621,9 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) { Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config use_blas_threshold: " + value); + std::string msg = "Invalid blas threshold: " + value + + "Possible reason: engine_config.use_blas_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -583,14 +631,18 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value); + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t omp_thread = std::stoi(value); uint32_t sys_thread_cnt = 8; CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value); + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -598,7 +650,9 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config mode: " + value); + std::string msg = "Invalid resource mode: " + value + + "Possible reason: resource_config.mode is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -608,12 +662,16 @@ CheckGpuDevice(const std::string& value) { const std::regex pat("gpu(\\d+)"); std::cmatch m; if (!std::regex_match(value.c_str(), m, pat)) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value); + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t gpu_index = std::stoi(value.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value); + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -621,11 +679,15 @@ CheckGpuDevice(const std::string& value) { Status Config::CheckResourceConfigSearchResources(const std::vector& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "Empty resource config search_resources"); + std::string msg = "Invalid search resource. " + "Possible reason: resource_config.search_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); } for (auto& gpu_device : value) { if (!CheckGpuDevice(gpu_device).ok()) { + std::string msg = "Invalid search resource: " + gpu_device + + "Possible reason: resource_config.search_resources is invalid."; return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config search_resources: " + gpu_device); } } @@ -635,7 +697,9 @@ Config::CheckResourceConfigSearchResources(const std::vector& value Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { if (!CheckGpuDevice(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config index_build_device: " + value); + std::string msg = "Invalid index build device: " + value + + "Possible reason: resource_config.index_build_device is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index a5b892ad47..2217b29e1c 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -54,7 +54,8 @@ DBWrapper::StartService() { std::string db_slave_path; s = config.GetDBConfigSecondaryPath(db_slave_path); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } StringHelpFunctions::SplitStringByDelimeter(db_slave_path, ";", opt.meta_.slave_paths_); @@ -62,13 +63,15 @@ DBWrapper::StartService() { // cache config s = config.GetCacheConfigCacheInsertData(opt.insert_cache_immediately_); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } std::string mode; s = config.GetServerConfigDeployMode(mode); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (mode == "single") { @@ -78,7 +81,8 @@ DBWrapper::StartService() { } else if (mode == "cluster_writable") { opt.mode_ = engine::DBOptions::MODE::CLUSTER_WRITABLE; } else { - std::cerr << "ERROR: mode specified in server_config must be ['single', 'cluster_readonly', 'cluster_writable']" + std::cerr << "Error: server_config.deploy_mode in server_config.yaml is not one of " + << "single, cluster_readonly, and cluster_writable." << std::endl; kill(0, SIGUSR1); } @@ -87,7 +91,8 @@ DBWrapper::StartService() { int32_t omp_thread; s = config.GetEngineConfigOmpThreadNum(omp_thread); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (omp_thread > 0) { @@ -105,7 +110,8 @@ DBWrapper::StartService() { int32_t use_blas_threshold; s = config.GetEngineConfigUseBlasThreshold(use_blas_threshold); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } faiss::distance_compute_blas_threshold = use_blas_threshold; @@ -115,7 +121,8 @@ DBWrapper::StartService() { int32_t disk, days; s = config.GetDBConfigArchiveDiskThreshold(disk); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (disk > 0) { @@ -124,7 +131,8 @@ DBWrapper::StartService() { s = config.GetDBConfigArchiveDaysThreshold(days); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (days > 0) { @@ -133,16 +141,20 @@ DBWrapper::StartService() { opt.meta_.archive_conf_.SetCriterias(criterial); // create db root folder - Status status = CommonUtil::CreateDirectory(opt.meta_.path_); - if (!status.ok()) { - std::cerr << "ERROR! Failed to create database root path: " << opt.meta_.path_ << std::endl; + s = CommonUtil::CreateDirectory(opt.meta_.path_); + if (!s.ok()) { + std::cerr << "Error: Failed to create database primary path: " << path + << ". Possible reason: db_config.primary_path is wrong in server_config.yaml or not available." + << std::endl; kill(0, SIGUSR1); } for (auto& path : opt.meta_.slave_paths_) { - status = CommonUtil::CreateDirectory(path); - if (!status.ok()) { - std::cerr << "ERROR! Failed to create database slave path: " << path << std::endl; + s = CommonUtil::CreateDirectory(path); + if (!s.ok()) { + std::cerr << "Error: Failed to create database secondary path: " << path + << ". Possible reason: db_config.secondary_path is wrong in server_config.yaml or not available." + << std::endl; kill(0, SIGUSR1); } } @@ -151,7 +163,9 @@ DBWrapper::StartService() { try { db_ = engine::DBFactory::Build(opt); } catch (std::exception& ex) { - std::cerr << "ERROR! Failed to open database: " << ex.what() << std::endl; + std::cerr << "Error: failed to open database: " << ex.what() + << ". Possible reason: the meta system does not work." + << std::endl; kill(0, SIGUSR1); } @@ -161,7 +175,8 @@ DBWrapper::StartService() { std::string preload_tables; s = config.GetDBConfigPreloadTable(preload_tables); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } s = PreloadTables(preload_tables); diff --git a/core/unittest/db/utils.cpp b/core/unittest/db/utils.cpp index 8903ce14ea..7cc2f28745 100644 --- a/core/unittest/db/utils.cpp +++ b/core/unittest/db/utils.cpp @@ -68,7 +68,7 @@ static const char " blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index"; diff --git a/core/unittest/wrapper/utils.cpp b/core/unittest/wrapper/utils.cpp index 6204ac0c05..b397a35d7c 100644 --- a/core/unittest/wrapper/utils.cpp +++ b/core/unittest/wrapper/utils.cpp @@ -58,7 +58,7 @@ static const char " blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index"; From 7510f1f7a2e3853bd93498dfd3cb2399a7eadb68 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 20:28:36 +0800 Subject: [PATCH 078/149] remove unused code Former-commit-id: 630cb776ec1a736f78241835fbbc8cc95b68deaa --- core/src/scheduler/SchedInst.cpp | 70 -------------------------------- core/src/scheduler/job/Job.cpp | 21 ++++++++++ 2 files changed, 21 insertions(+), 70 deletions(-) create mode 100644 core/src/scheduler/job/Job.cpp diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index f3f293a0f3..8474e93c1f 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -82,79 +82,9 @@ load_simple_config() { } } -void -load_advance_config() { - // try { - // server::ConfigNode &config = server::Config::GetInstance().GetConfig(server::CONFIG_RESOURCE); - // - // if (config.GetChildren().empty()) throw "resource_config null exception"; - // - // auto resources = config.GetChild(server::CONFIG_RESOURCES).GetChildren(); - // - // if (resources.empty()) throw "Children of resource_config null exception"; - // - // for (auto &resource : resources) { - // auto &resname = resource.first; - // auto &resconf = resource.second; - // auto type = resconf.GetValue(server::CONFIG_RESOURCE_TYPE); - //// auto memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_MEMORY); - // auto device_id = resconf.GetInt64Value(server::CONFIG_RESOURCE_DEVICE_ID); - //// auto enable_loader = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_LOADER); - // auto enable_loader = true; - // auto enable_executor = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_EXECUTOR); - // auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY); - // auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY); - // auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM); - // - // auto res = ResMgrInst::GetInstance()->Add(ResourceFactory::Create(resname, - // type, - // device_id, - // enable_loader, - // enable_executor)); - // - // if (res.lock()->type() == ResourceType::GPU) { - // auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY, 300); - // auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY, 300); - // auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM, 2); - // pinned_memory = 1024 * 1024 * pinned_memory; - // temp_memory = 1024 * 1024 * temp_memory; - // knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, - // pinned_memory, - // temp_memory, - // resource_num); - // } - // } - // - // knowhere::FaissGpuResourceMgr::GetInstance().InitResource(); - // - // auto connections = config.GetChild(server::CONFIG_RESOURCE_CONNECTIONS).GetChildren(); - // if (connections.empty()) throw "connections config null exception"; - // for (auto &conn : connections) { - // auto &connect_name = conn.first; - // auto &connect_conf = conn.second; - // auto connect_speed = connect_conf.GetInt64Value(server::CONFIG_SPEED_CONNECTIONS); - // auto connect_endpoint = connect_conf.GetValue(server::CONFIG_ENDPOINT_CONNECTIONS); - // - // std::string delimiter = "==="; - // std::string left = connect_endpoint.substr(0, connect_endpoint.find(delimiter)); - // std::string right = connect_endpoint.substr(connect_endpoint.find(delimiter) + 3, - // connect_endpoint.length()); - // - // auto connection = Connection(connect_name, connect_speed); - // ResMgrInst::GetInstance()->Connect(left, right, connection); - // } - // } catch (const char *msg) { - // SERVER_LOG_ERROR << msg; - // // TODO(wxyu): throw exception instead - // exit(-1); - //// throw std::exception(); - // } -} - void StartSchedulerService() { load_simple_config(); - // load_advance_config(); ResMgrInst::GetInstance()->Start(); SchedInst::GetInstance()->Start(); JobMgrInst::GetInstance()->Start(); diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp new file mode 100644 index 0000000000..954ea11f1b --- /dev/null +++ b/core/src/scheduler/job/Job.cpp @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// +// Created by wxyu on 2019/10/28. +// + From de2bb68daa557c53bb4ad01f7561bd72222de2bd Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 20:34:26 +0800 Subject: [PATCH 079/149] Add unique id for Job Former-commit-id: 1865dbd859f345a3febc3ad76682f928678e59f5 --- CHANGELOG.md | 1 + core/src/db/DBImpl.cpp | 6 ++--- core/src/scheduler/ResourceMgr.h | 1 - core/src/scheduler/job/BuildIndexJob.cpp | 6 +++-- core/src/scheduler/job/BuildIndexJob.h | 2 +- core/src/scheduler/job/DeleteJob.cpp | 6 +++-- core/src/scheduler/job/DeleteJob.h | 2 +- core/src/scheduler/job/Job.cpp | 28 +++++++++++++++++++++--- core/src/scheduler/job/Job.h | 8 ++++--- core/src/scheduler/job/SearchJob.cpp | 6 +++-- core/src/scheduler/job/SearchJob.h | 2 +- 11 files changed, 49 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc2461a9c2..785b7c89ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#92 - Speed up CMake build process - \#96 - Remove .a file in milvus/lib for docker-version - \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss +- \#122 - Add unique id for Job ## Feature - \#115 - Using new structure for tasktable diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 324d304e2a..6995de3d14 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -136,7 +136,7 @@ DBImpl::DeleteTable(const std::string& table_id, const meta::DatesT& dates) { // scheduler will determine when to delete table files auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource(); - scheduler::DeleteJobPtr job = std::make_shared(0, table_id, meta_ptr_, nres); + scheduler::DeleteJobPtr job = std::make_shared(table_id, meta_ptr_, nres); scheduler::JobMgrInst::GetInstance()->Put(job); job->WaitAndDelete(); } else { @@ -439,7 +439,7 @@ DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& fi // step 1: get files to search ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size(); - scheduler::SearchJobPtr job = std::make_shared(0, k, nq, nprobe, vectors); + scheduler::SearchJobPtr job = std::make_shared(k, nq, nprobe, vectors); for (auto& file : files) { scheduler::TableFileSchemaPtr file_ptr = std::make_shared(file); job->AddIndexFile(file_ptr); @@ -754,7 +754,7 @@ DBImpl::BackgroundBuildIndex() { Status status; if (!to_index_files.empty()) { - scheduler::BuildIndexJobPtr job = std::make_shared(0, meta_ptr_, options_); + scheduler::BuildIndexJobPtr job = std::make_shared(meta_ptr_, options_); // step 2: put build index task to scheduler for (auto& file : to_index_files) { diff --git a/core/src/scheduler/ResourceMgr.h b/core/src/scheduler/ResourceMgr.h index 4d2361fb3d..31a1063e5d 100644 --- a/core/src/scheduler/ResourceMgr.h +++ b/core/src/scheduler/ResourceMgr.h @@ -75,7 +75,6 @@ class ResourceMgr : public interface::dumpable { return gpu_resources_; } - // TODO(wxyu): why return shared pointer inline std::vector GetAllResources() { return resources_; diff --git a/core/src/scheduler/job/BuildIndexJob.cpp b/core/src/scheduler/job/BuildIndexJob.cpp index 39c08b6b51..4c4c3b5054 100644 --- a/core/src/scheduler/job/BuildIndexJob.cpp +++ b/core/src/scheduler/job/BuildIndexJob.cpp @@ -23,8 +23,8 @@ namespace milvus { namespace scheduler { -BuildIndexJob::BuildIndexJob(JobId id, engine::meta::MetaPtr meta_ptr, engine::DBOptions options) - : Job(id, JobType::BUILD), meta_ptr_(std::move(meta_ptr)), options_(std::move(options)) { +BuildIndexJob::BuildIndexJob(engine::meta::MetaPtr meta_ptr, engine::DBOptions options) + : Job(JobType::BUILD), meta_ptr_(std::move(meta_ptr)), options_(std::move(options)) { } bool @@ -59,6 +59,8 @@ BuildIndexJob::Dump() const { json ret{ {"number_of_to_index_file", to_index_files_.size()}, }; + auto base = Job::Dump(); + ret.insert(base.begin(), base.end()); return ret; } diff --git a/core/src/scheduler/job/BuildIndexJob.h b/core/src/scheduler/job/BuildIndexJob.h index e3450ee048..9dba5854b6 100644 --- a/core/src/scheduler/job/BuildIndexJob.h +++ b/core/src/scheduler/job/BuildIndexJob.h @@ -41,7 +41,7 @@ using Id2ToTableFileMap = std::unordered_map; class BuildIndexJob : public Job { public: - explicit BuildIndexJob(JobId id, engine::meta::MetaPtr meta_ptr, engine::DBOptions options); + explicit BuildIndexJob(engine::meta::MetaPtr meta_ptr, engine::DBOptions options); public: bool diff --git a/core/src/scheduler/job/DeleteJob.cpp b/core/src/scheduler/job/DeleteJob.cpp index 04a9557177..f2131ffb5b 100644 --- a/core/src/scheduler/job/DeleteJob.cpp +++ b/core/src/scheduler/job/DeleteJob.cpp @@ -22,8 +22,8 @@ namespace milvus { namespace scheduler { -DeleteJob::DeleteJob(JobId id, std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource) - : Job(id, JobType::DELETE), +DeleteJob::DeleteJob(std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource) + : Job(JobType::DELETE), table_id_(std::move(table_id)), meta_ptr_(std::move(meta_ptr)), num_resource_(num_resource) { @@ -52,6 +52,8 @@ DeleteJob::Dump() const { {"number_of_resource", num_resource_}, {"number_of_done", done_resource}, }; + auto base = Job::Dump(); + ret.insert(base.begin(), base.end()); return ret; } diff --git a/core/src/scheduler/job/DeleteJob.h b/core/src/scheduler/job/DeleteJob.h index 93e5aa40cc..a20d67d45a 100644 --- a/core/src/scheduler/job/DeleteJob.h +++ b/core/src/scheduler/job/DeleteJob.h @@ -35,7 +35,7 @@ namespace scheduler { class DeleteJob : public Job { public: - DeleteJob(JobId id, std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource); + DeleteJob(std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource); public: void diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp index 954ea11f1b..1199fe17a6 100644 --- a/core/src/scheduler/job/Job.cpp +++ b/core/src/scheduler/job/Job.cpp @@ -15,7 +15,29 @@ // specific language governing permissions and limitations // under the License. -// -// Created by wxyu on 2019/10/28. -// +#include "Job.h" +namespace milvus { +namespace scheduler { + +namespace { +std::mutex unique_job_mutex; +uint64_t unique_job_id = 0; +} // namespace + +Job::Job(JobType type) : type_(type) { + std::lock_guard lock(unique_job_mutex); + id_ = unique_job_id++; +} + +json +Job::Dump() const { + json ret{ + {"id", id_}, + {"type", type_}, + }; + return ret; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/job/Job.h b/core/src/scheduler/job/Job.h index 709db8cffc..949164a8d0 100644 --- a/core/src/scheduler/job/Job.h +++ b/core/src/scheduler/job/Job.h @@ -53,12 +53,14 @@ class Job : public interface::dumpable { return type_; } + json + Dump() const override; + protected: - Job(JobId id, JobType type) : id_(id), type_(type) { - } + explicit Job(JobType type); private: - JobId id_; + JobId id_ = 0; JobType type_; }; diff --git a/core/src/scheduler/job/SearchJob.cpp b/core/src/scheduler/job/SearchJob.cpp index 1143e33add..47c825c122 100644 --- a/core/src/scheduler/job/SearchJob.cpp +++ b/core/src/scheduler/job/SearchJob.cpp @@ -21,8 +21,8 @@ namespace milvus { namespace scheduler { -SearchJob::SearchJob(milvus::scheduler::JobId id, uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors) - : Job(id, JobType::SEARCH), topk_(topk), nq_(nq), nprobe_(nprobe), vectors_(vectors) { +SearchJob::SearchJob(uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors) + : Job(JobType::SEARCH), topk_(topk), nq_(nq), nprobe_(nprobe), vectors_(vectors) { } bool @@ -70,6 +70,8 @@ SearchJob::Dump() const { {"nq", nq_}, {"nprobe", nprobe_}, }; + auto base = Job::Dump(); + ret.insert(base.begin(), base.end()); return ret; } diff --git a/core/src/scheduler/job/SearchJob.h b/core/src/scheduler/job/SearchJob.h index 6c2bd7eea9..1e586090b9 100644 --- a/core/src/scheduler/job/SearchJob.h +++ b/core/src/scheduler/job/SearchJob.h @@ -43,7 +43,7 @@ using ResultSet = std::vector; class SearchJob : public Job { public: - SearchJob(JobId id, uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors); + SearchJob(uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors); public: bool From d95b3906c8afdc47d34d1ba43d045967993320a2 Mon Sep 17 00:00:00 2001 From: Heisenberg Date: Mon, 28 Oct 2019 20:41:08 +0800 Subject: [PATCH 080/149] [skip ci] refine the copy time test Former-commit-id: d370cec5e350d3f3740d9b56182ad0a990ed2ec8 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index bb50198f92..e80b85e024 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -426,26 +426,6 @@ test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const st cpu_ivf_index->to_readonly(); } - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - faiss::IndexComposition index_composition; - index_composition.index = cpu_index; - index_composition.quantizer = nullptr; - index_composition.mode = 1; - - double copy_time = elapsed(); - auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - delete index; - - if (pure_gpu_mode) { - index_composition.mode = 2; // 0: all data, 1: copy quantizer, 2: copy data - index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - } - - copy_time = elapsed() - copy_time; - printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); - size_t nq; float* xq; { @@ -472,6 +452,36 @@ test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const st delete[] gt_int; } + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = cpu_index; + index_composition.quantizer = nullptr; + + faiss::Index* index; + double copy_time; + + if (!pure_gpu_mode) { + index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + + copy_time = elapsed(); + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + } else { + index_composition.mode = 2; + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + + copy_time = elapsed(); + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + } + + copy_time = elapsed() - copy_time; + printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); + const size_t NQ = 1000, K = 1000; if (!pure_gpu_mode) { for (auto nprobe : nprobes) { From d109a3778c4849548298921833390acc18a1b680 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Tue, 29 Oct 2019 09:32:36 +0800 Subject: [PATCH 081/149] [skip ci] Move Roadmap section ahead Former-commit-id: c2a065f0192c4bc6ab948d8c4c972847bfcb771b --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5c5065442f..884ddb01ca 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,10 @@ Make sure Java 8 or higher is already installed. Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples) for the example code. +## Milvus roadmap + +Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. + ## Contribution guidelines Contributions are welcomed and greatly appreciated. Please read our [contribution guidelines](CONTRIBUTING.md) for detailed contribution workflow. This project adheres to the [code of conduct](CODE_OF_CONDUCT.md) of Milvus. By participating, you are expected to uphold this code. @@ -184,11 +188,6 @@ We greatly appreciate the help of the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. - -## Milvus roadmap - -Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. - ## Resources [Milvus official website](https://www.milvus.io) @@ -203,7 +202,6 @@ Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upc [Milvus roadmap](https://milvus.io/docs/en/roadmap/) - ## License [Apache License 2.0](LICENSE) From a78e928dfaa389d2c6b02ae1dc7cc60bc0a712f0 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 15:16:04 +0800 Subject: [PATCH 082/149] #90 The server start error messages could be improved to enhance user experience Former-commit-id: e3ad89ab67f2c46bc4cb4e6d094a7763fb098664 --- core/conf/server_config.template | 20 ++--- core/src/server/Config.cpp | 145 +++++++++++++++++-------------- core/src/server/DBWrapper.cpp | 6 +- 3 files changed, 91 insertions(+), 80 deletions(-) diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 7abfb8b055..3b366f1bd4 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -4,7 +4,7 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) port: 19530 # port range: 1025 ~ 65534 deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable - time_zone: UTC+8 + time_zone: UTC+8 # time zone, must be in format: UTC+X db_config: primary_path: @MILVUS_DB_PATH@ # path used to store data and meta @@ -14,30 +14,30 @@ db_config: # Keep 'dialect://:@:/', and replace other texts with real values # Replace 'dialect' with 'mysql' or 'sqlite' - insert_buffer_size: 4 # GB, maximum insert buffer size allowed + insert_buffer_size: 4 # GB, maximum insert buffer size allowed, must be a positive integer # sum of insert_buffer_size and cpu_cache_capacity cannot exceed total memory preload_table: # preload data at startup, '*' means load all tables, empty value means no preload # you can specify preload tables like this: table1,table2,table3 metric_config: - enable_monitor: false # enable monitoring or not + enable_monitor: false # enable monitoring or not, must be a boolean collector: prometheus # prometheus prometheus_config: - port: 8080 # port prometheus uses to fetch metrics + port: 8080 # port prometheus uses to fetch metrics, range: 1025 ~ 65534 cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache - cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered - gpu_cache_capacity: 4 # GB, GPU memory used for cache - gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered - cache_insert_data: false # whether to load inserted data into cache + cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer + cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer + gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: use_blas_threshold: 20 # if nq < use_blas_threshold, use SSE, faster with fluctuated response times # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times resource_config: - search_resources: # define the GPUs used for search computation, valid value: gpux + search_resources: # define the GPUs used for search computation, must be in format: gpux - gpu0 index_build_device: gpu0 # GPU used for building index \ No newline at end of file diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index b20d6c2436..51449cb1de 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -363,8 +363,8 @@ Config::PrintAll() { Status Config::CheckServerConfigAddress(const std::string& value) { if (!ValidationUtil::ValidateIpAddress(value).ok()) { - std::string msg = "Invalid server IP address: " + value - + ". Possible reason: server_config.address is invalid in server_config.yaml."; + std::string msg = "Invalid server IP address: " + value + + ". Possible reason: server_config.address is invalid in server_config.yaml."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -373,14 +373,14 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Port " + value + " is not a number. " - + "Possible reason: server_config.port in server_config.yaml is invalid."; + std::string msg = "Port " + value + " is not a number. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { - std::string msg = "Port " + value + " is not in range [1025, 65534]. " - + "Possible reason: server_config.port in server_config.yaml is invalid."; + std::string msg = "Port " + value + " is not in range [1025, 65534]. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -433,9 +433,9 @@ Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { std::string msg = - "Invalid db_backend_url: " + value - + ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " - + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; + "Invalid db_backend_url: " + value + + ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " + + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } return Status::OK(); @@ -444,8 +444,8 @@ Config::CheckDBConfigBackendUrl(const std::string& value) { Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid archive disk threshold: " + value - + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + std::string msg = "Invalid archive disk threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -454,8 +454,8 @@ Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid archive days threshold: " + value - + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + std::string msg = "Invalid archive days threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -464,16 +464,24 @@ Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid insert buffer size: " + value - + "Possible reason: db_config.insert_buffer_size in server_config.yaml is invalid."; + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: db_config.insert_buffer_size in server_config.yaml " + "is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; + if (buffer_size <= 0) { + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: db_config.insert_buffer_size in server_config.yaml " + "is not a positive integer."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { - std::string msg = "Invalid insert buffer size: " + value - + "Possible reason: insert buffer size exceed system memory."; + std::string msg = + "Invalid insert buffer size: " + value + "Possible reason: insert buffer size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -483,8 +491,8 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { Status Config::CheckMetricConfigEnableMonitor(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - std::string msg = "Invalid metric config: " + value - + "Possible reason: metric_config.enable_monitor is invalid."; + std::string msg = + "Invalid metric config: " + value + "Possible reason: metric_config.enable_monitor is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -493,8 +501,7 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) { Status Config::CheckMetricConfigCollector(const std::string& value) { if (value != "prometheus") { - std::string msg = "Invalid metric config: " + value - + "Possible reason: metric_config.collector is invalid."; + std::string msg = "Invalid metric config: " + value + "Possible reason: metric_config.collector is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -503,8 +510,8 @@ Config::CheckMetricConfigCollector(const std::string& value) { Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid metric config: " + value - + "Possible reason: metric_config.prometheus_config.port is invalid."; + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.prometheus_config.port is not in range [1025, 65534]."; return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value); } return Status::OK(); @@ -513,18 +520,24 @@ Config::CheckMetricConfigPrometheusPort(const std::string& value) { Status Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid cpu cache capacity: " + value - + "Possible reason: cache_config.cpu_cache_capacity is invalid."; + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { - uint64_t cpu_cache_capacity = std::stoi(value) * GB; + int64_t cpu_cache_capacity = std::stoi(value) * GB; + if (cpu_cache_capacity <= 0) { + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); - if (cpu_cache_capacity >= total_mem) { - std::string msg = "Invalid cpu cache capacity: " + value - + "Possible reason: Cache config cpu_cache_capacity exceed system memory."; + if (static_cast(cpu_cache_capacity) >= total_mem) { + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: Cache config cpu_cache_capacity exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (cpu_cache_capacity > static_cast(total_mem * 0.9)) { + } else if (static_cast(cpu_cache_capacity) > static_cast(total_mem * 0.9)) { std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; } @@ -536,9 +549,9 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { int64_t insert_buffer_size = buffer_value * GB; if (insert_buffer_size + cpu_cache_capacity >= total_mem) { - std::string msg = "Invalid cpu cache capacity: " + value - + "Possible reason: sum of cache_config.cpu_cache_capacity and " - "db_config.insert_buffer_size exceeds system memory."; + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: sum of cache_config.cpu_cache_capacity and " + "db_config.insert_buffer_size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -548,14 +561,14 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - std::string msg = "Invalid cpu cache threshold: " + value - + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float cpu_cache_threshold = std::stof(value); if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) { - std::string msg = "Invalid cpu cache threshold: " + value - + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -565,8 +578,8 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid gpu cache capacity: " + value - + "Possible reason: cache_config.gpu_cache_capacity is invalid."; + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; @@ -581,8 +594,8 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value - + "Possible reason: cache_config.gpu_cache_capacity exceed GPU memory."; + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; @@ -594,14 +607,14 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - std::string msg = "Invalid gpu cache threshold: " + value - + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float gpu_cache_threshold = std::stof(value); if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { - std::string msg = "Invalid gpu cache threshold: " + value - + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -611,8 +624,8 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - std::string msg = "Invalid cache insert option: " + value - + "Possible reason: cache_config.cache_insert_data is invalid."; + std::string msg = "Invalid cache insert option: " + value + + "Possible reason: cache_config.cache_insert_data is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -621,8 +634,8 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) { Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid blas threshold: " + value - + "Possible reason: engine_config.use_blas_threshold is invalid."; + std::string msg = "Invalid blas threshold: " + value + + "Possible reason: engine_config.use_blas_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -631,8 +644,8 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid omp thread number: " + value - + "Possible reason: engine_config.omp_thread_num is invalid."; + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -640,8 +653,8 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { uint32_t sys_thread_cnt = 8; CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { - std::string msg = "Invalid omp thread number: " + value - + "Possible reason: engine_config.omp_thread_num is invalid."; + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -650,8 +663,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { - std::string msg = "Invalid resource mode: " + value - + "Possible reason: resource_config.mode is invalid."; + std::string msg = "Invalid resource mode: " + value + "Possible reason: resource_config.mode is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -662,15 +674,15 @@ CheckGpuDevice(const std::string& value) { const std::regex pat("gpu(\\d+)"); std::cmatch m; if (!std::regex_match(value.c_str(), m, pat)) { - std::string msg = "Invalid gpu device: " + value - + "Possible reason: resource_config.search_resources is invalid."; + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t gpu_index = std::stoi(value.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { - std::string msg = "Invalid gpu device: " + value - + "Possible reason: resource_config.search_resources is invalid."; + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -679,16 +691,17 @@ CheckGpuDevice(const std::string& value) { Status Config::CheckResourceConfigSearchResources(const std::vector& value) { if (value.empty()) { - std::string msg = "Invalid search resource. " - "Possible reason: resource_config.search_resources is empty."; + std::string msg = + "Invalid search resource. " + "Possible reason: resource_config.search_resources is empty."; return Status(SERVER_INVALID_ARGUMENT, msg); } for (auto& gpu_device : value) { if (!CheckGpuDevice(gpu_device).ok()) { - std::string msg = "Invalid search resource: " + gpu_device - + "Possible reason: resource_config.search_resources is invalid."; - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config search_resources: " + gpu_device); + std::string msg = "Invalid search resource: " + gpu_device + + "Possible reason: resource_config.search_resources does not match your hardware."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -697,8 +710,8 @@ Config::CheckResourceConfigSearchResources(const std::vector& value Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { if (!CheckGpuDevice(value).ok()) { - std::string msg = "Invalid index build device: " + value - + "Possible reason: resource_config.index_build_device is invalid."; + std::string msg = "Invalid index build device: " + value + + "Possible reason: resource_config.index_build_device does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index 2217b29e1c..7efb71075a 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -82,8 +82,7 @@ DBWrapper::StartService() { opt.mode_ = engine::DBOptions::MODE::CLUSTER_WRITABLE; } else { std::cerr << "Error: server_config.deploy_mode in server_config.yaml is not one of " - << "single, cluster_readonly, and cluster_writable." - << std::endl; + << "single, cluster_readonly, and cluster_writable." << std::endl; kill(0, SIGUSR1); } @@ -164,8 +163,7 @@ DBWrapper::StartService() { db_ = engine::DBFactory::Build(opt); } catch (std::exception& ex) { std::cerr << "Error: failed to open database: " << ex.what() - << ". Possible reason: the meta system does not work." - << std::endl; + << ". Possible reason: the meta system does not work." << std::endl; kill(0, SIGUSR1); } From a05cebf5133538650fcbee0601bcc1bdebdfdfa9 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 15:18:54 +0800 Subject: [PATCH 083/149] fix job.h header cpplint error Former-commit-id: c5bfb2f7acdfad00adf818b9cc4b20ce42b7c9e1 --- core/src/scheduler/job/Job.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp index 1199fe17a6..06a163b959 100644 --- a/core/src/scheduler/job/Job.cpp +++ b/core/src/scheduler/job/Job.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "Job.h" +#include "scheduler/job/Job.h" namespace milvus { namespace scheduler { From 96dffcf8ab7219b20621a64efe62ffbf2f7996e1 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 15:37:08 +0800 Subject: [PATCH 084/149] format code Former-commit-id: f809ffd4505ada713620e4996d6fe1004bcc69dc --- core/src/server/DBWrapper.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index 7efb71075a..e3d319ac53 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -40,12 +40,14 @@ DBWrapper::StartService() { engine::DBOptions opt; s = config.GetDBConfigBackendUrl(opt.meta_.backend_uri_); if (!s.ok()) { + std::cerr << s.ToString() << std::endl; return s; } std::string path; s = config.GetDBConfigPrimaryPath(path); if (!s.ok()) { + std::cerr << s.ToString() << std::endl; return s; } @@ -55,7 +57,7 @@ DBWrapper::StartService() { s = config.GetDBConfigSecondaryPath(db_slave_path); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } StringHelpFunctions::SplitStringByDelimeter(db_slave_path, ";", opt.meta_.slave_paths_); @@ -64,14 +66,14 @@ DBWrapper::StartService() { s = config.GetCacheConfigCacheInsertData(opt.insert_cache_immediately_); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } std::string mode; s = config.GetServerConfigDeployMode(mode); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (mode == "single") { @@ -91,7 +93,7 @@ DBWrapper::StartService() { s = config.GetEngineConfigOmpThreadNum(omp_thread); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (omp_thread > 0) { @@ -110,7 +112,7 @@ DBWrapper::StartService() { s = config.GetEngineConfigUseBlasThreshold(use_blas_threshold); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } faiss::distance_compute_blas_threshold = use_blas_threshold; @@ -121,7 +123,7 @@ DBWrapper::StartService() { s = config.GetDBConfigArchiveDiskThreshold(disk); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (disk > 0) { @@ -131,7 +133,7 @@ DBWrapper::StartService() { s = config.GetDBConfigArchiveDaysThreshold(days); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (days > 0) { @@ -174,7 +176,7 @@ DBWrapper::StartService() { s = config.GetDBConfigPreloadTable(preload_tables); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } s = PreloadTables(preload_tables); From c63c622cf5c4bcec43a33dacfa2b68518a5a1ddc Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 16:06:27 +0800 Subject: [PATCH 085/149] update message Former-commit-id: 568dbdc5d6dc233716682ebec33e6bd3dfe45500 --- core/conf/server_config.template | 6 +++--- core/src/server/Config.cpp | 29 ++++++++++++----------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 3b366f1bd4..3cb899d1b9 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -2,7 +2,7 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # port range: 1025 ~ 65534 + port: 19530 # milvus server port, must in range [1025, 6553] deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable time_zone: UTC+8 # time zone, must be in format: UTC+X @@ -24,7 +24,7 @@ metric_config: enable_monitor: false # enable monitoring or not, must be a boolean collector: prometheus # prometheus prometheus_config: - port: 8080 # port prometheus uses to fetch metrics, range: 1025 ~ 65534 + port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 6553] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer @@ -40,4 +40,4 @@ engine_config: resource_config: search_resources: # define the GPUs used for search computation, must be in format: gpux - gpu0 - index_build_device: gpu0 # GPU used for building index \ No newline at end of file + index_build_device: gpu0 # GPU used for building index, must be in format: gpux \ No newline at end of file diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 51449cb1de..684d92674c 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -363,8 +363,8 @@ Config::PrintAll() { Status Config::CheckServerConfigAddress(const std::string& value) { if (!ValidationUtil::ValidateIpAddress(value).ok()) { - std::string msg = "Invalid server IP address: " + value + - ". Possible reason: server_config.address is invalid in server_config.yaml."; + std::string msg = + "Invalid server IP address: " + value + ". Possible reason: server_config.address is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -373,14 +373,13 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Port " + value + " is not a number. " + - "Possible reason: server_config.port in server_config.yaml is invalid."; + std::string msg = "Port " + value + " is not a number. " + "Possible reason: server_config.port is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { std::string msg = "Port " + value + " is not in range [1025, 65534]. " + - "Possible reason: server_config.port in server_config.yaml is invalid."; + "Possible reason: server_config.port is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -391,7 +390,7 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "Error: server_config.deploy_mode in server_config.yaml is not one of " + "Error: server_config.deploy_mode is not one of " "single, cluster_readonly, and cluster_writable."); } return Status::OK(); @@ -418,8 +417,7 @@ Config::CheckServerConfigTimeZone(const std::string& value) { Status Config::CheckDBConfigPrimaryPath(const std::string& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, - "db_path is empty. Possible reason: db_config.db_path in server_config.yaml is empty."); + return Status(SERVER_INVALID_ARGUMENT, "db_path is empty. Possible reason: db_config.db_path is empty."); } return Status::OK(); } @@ -433,8 +431,7 @@ Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { std::string msg = - "Invalid db_backend_url: " + value + - ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " + + "Invalid db_backend_url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } @@ -445,7 +442,7 @@ Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive disk threshold: " + value + - "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + "Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -455,7 +452,7 @@ Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive days threshold: " + value + - "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + "Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -465,15 +462,13 @@ Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size in server_config.yaml " - "is not a positive integer."; + "Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; if (buffer_size <= 0) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size in server_config.yaml " - "is not a positive integer."; + "Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -535,7 +530,7 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (static_cast(cpu_cache_capacity) >= total_mem) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: Cache config cpu_cache_capacity exceeds system memory."; + "Possible reason: cache_config.cpu_cache_capacity exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (static_cast(cpu_cache_capacity) > static_cast(total_mem * 0.9)) { std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; From bfdc80401d4c560b256cb9e887543cbd17b57e7b Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 16:15:14 +0800 Subject: [PATCH 086/149] fix typo Former-commit-id: bc177ec75ef30265a1358961f89966c8d6ea535c --- core/conf/server_config.template | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 3cb899d1b9..3feb16fd63 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -2,7 +2,7 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # milvus server port, must in range [1025, 6553] + port: 19530 # milvus server port, must in range [1025, 65534] deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable time_zone: UTC+8 # time zone, must be in format: UTC+X @@ -24,7 +24,7 @@ metric_config: enable_monitor: false # enable monitoring or not, must be a boolean collector: prometheus # prometheus prometheus_config: - port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 6553] + port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer From dcc7fbd439ca674bc39aeb859e001c6e4fbe4311 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 16:50:55 +0800 Subject: [PATCH 087/149] update message Former-commit-id: 84f9b8ce2ac8050128f678754123ae2716e3b342 --- core/src/server/Config.cpp | 75 +++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 684d92674c..937556824e 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -373,13 +373,13 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Port " + value + " is not a number. " + "Possible reason: server_config.port is invalid."; + std::string msg = "Invalid server port: " + value + ". Possible reason: server_config.port is not a number."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { - std::string msg = "Port " + value + " is not in range [1025, 65534]. " + - "Possible reason: server_config.port is invalid."; + std::string msg = "Invalid server port: " + value + + ". Possible reason: server_config.port is not in range [1025, 65534]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -390,7 +390,7 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "Error: server_config.deploy_mode is not one of " + "server_config.deploy_mode is not one of " "single, cluster_readonly, and cluster_writable."); } return Status::OK(); @@ -399,15 +399,15 @@ Config::CheckServerConfigDeployMode(const std::string& value) { Status Config::CheckServerConfigTimeZone(const std::string& value) { if (value.length() <= 3) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value); + return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value); } else { if (value.substr(0, 3) != "UTC") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value); + return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value); } else { try { stoi(value.substr(3)); } catch (...) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value); + return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value); } } } @@ -417,7 +417,7 @@ Config::CheckServerConfigTimeZone(const std::string& value) { Status Config::CheckDBConfigPrimaryPath(const std::string& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "db_path is empty. Possible reason: db_config.db_path is empty."); + return Status(SERVER_INVALID_ARGUMENT, "db_config.db_path is empty."); } return Status::OK(); } @@ -431,7 +431,7 @@ Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { std::string msg = - "Invalid db_backend_url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " + + "Invalid backend url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } @@ -442,7 +442,7 @@ Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive disk threshold: " + value + - "Possible reason: db_config.archive_disk_threshold is invalid."; + ". Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -452,7 +452,7 @@ Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive days threshold: " + value + - "Possible reason: db_config.archive_disk_threshold is invalid."; + ". Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -462,13 +462,13 @@ Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size is not a positive integer."; + ". Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; if (buffer_size <= 0) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size is not a positive integer."; + ". Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -476,7 +476,7 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { std::string msg = - "Invalid insert buffer size: " + value + "Possible reason: insert buffer size exceeds system memory."; + "Invalid insert buffer size: " + value + ". Possible reason: insert buffer size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -487,7 +487,7 @@ Status Config::CheckMetricConfigEnableMonitor(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { std::string msg = - "Invalid metric config: " + value + "Possible reason: metric_config.enable_monitor is not a boolean."; + "Invalid metric config: " + value + ". Possible reason: metric_config.enable_monitor is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -496,7 +496,8 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) { Status Config::CheckMetricConfigCollector(const std::string& value) { if (value != "prometheus") { - std::string msg = "Invalid metric config: " + value + "Possible reason: metric_config.collector is invalid."; + std::string msg = + "Invalid metric collector: " + value + ". Possible reason: metric_config.collector is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -505,8 +506,8 @@ Config::CheckMetricConfigCollector(const std::string& value) { Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid metric config: " + value + - "Possible reason: metric_config.prometheus_config.port is not in range [1025, 65534]."; + std::string msg = "Invalid metric port: " + value + + ". Possible reason: metric_config.prometheus_config.port is not in range [1025, 65534]."; return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value); } return Status::OK(); @@ -516,13 +517,13 @@ Status Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; + ". Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t cpu_cache_capacity = std::stoi(value) * GB; if (cpu_cache_capacity <= 0) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; + ". Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -530,7 +531,7 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (static_cast(cpu_cache_capacity) >= total_mem) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: cache_config.cpu_cache_capacity exceeds system memory."; + ". Possible reason: cache_config.cpu_cache_capacity exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (static_cast(cpu_cache_capacity) > static_cast(total_mem * 0.9)) { std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; @@ -545,7 +546,7 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { int64_t insert_buffer_size = buffer_value * GB; if (insert_buffer_size + cpu_cache_capacity >= total_mem) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: sum of cache_config.cpu_cache_capacity and " + ". Possible reason: sum of cache_config.cpu_cache_capacity and " "db_config.insert_buffer_size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -557,13 +558,13 @@ Status Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { std::string msg = "Invalid cpu cache threshold: " + value + - "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float cpu_cache_threshold = std::stof(value); if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) { std::string msg = "Invalid cpu cache threshold: " + value + - "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -574,7 +575,7 @@ Status Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid gpu cache capacity: " + value + - "Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; + ". Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; @@ -590,7 +591,7 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { std::string msg = "Invalid gpu cache capacity: " + value + - "Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; + ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; @@ -603,13 +604,13 @@ Status Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { std::string msg = "Invalid gpu cache threshold: " + value + - "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float gpu_cache_threshold = std::stof(value); if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { std::string msg = "Invalid gpu cache threshold: " + value + - "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -620,7 +621,7 @@ Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { std::string msg = "Invalid cache insert option: " + value + - "Possible reason: cache_config.cache_insert_data is not a boolean."; + ". Possible reason: cache_config.cache_insert_data is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -630,7 +631,7 @@ Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid blas threshold: " + value + - "Possible reason: engine_config.use_blas_threshold is not a positive integer."; + ". Possible reason: engine_config.use_blas_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -640,7 +641,7 @@ Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid omp thread number: " + value + - "Possible reason: engine_config.omp_thread_num is not a positive integer."; + ". Possible reason: engine_config.omp_thread_num is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -649,7 +650,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { std::string msg = "Invalid omp thread number: " + value + - "Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; + ". Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -658,7 +659,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { - std::string msg = "Invalid resource mode: " + value + "Possible reason: resource_config.mode is invalid."; + std::string msg = "Invalid resource mode: " + value + ". Possible reason: resource_config.mode is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -670,14 +671,14 @@ CheckGpuDevice(const std::string& value) { std::cmatch m; if (!std::regex_match(value.c_str(), m, pat)) { std::string msg = "Invalid gpu device: " + value + - "Possible reason: resource_config.search_resources does not match your hardware."; + ". Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t gpu_index = std::stoi(value.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { std::string msg = "Invalid gpu device: " + value + - "Possible reason: resource_config.search_resources does not match your hardware."; + ". Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -695,7 +696,7 @@ Config::CheckResourceConfigSearchResources(const std::vector& value for (auto& gpu_device : value) { if (!CheckGpuDevice(gpu_device).ok()) { std::string msg = "Invalid search resource: " + gpu_device + - "Possible reason: resource_config.search_resources does not match your hardware."; + ". Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -706,7 +707,7 @@ Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { if (!CheckGpuDevice(value).ok()) { std::string msg = "Invalid index build device: " + value + - "Possible reason: resource_config.index_build_device does not match your hardware."; + ". Possible reason: resource_config.index_build_device does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); From bb43a771306dd8ef4237d72189a260fe5a28296c Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 17:19:02 +0800 Subject: [PATCH 088/149] update message Former-commit-id: 0a5ad5dfd42d76032c5008d9d9e4e2213b161232 --- core/src/server/Config.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 937556824e..111cc26f9c 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -475,8 +475,8 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { - std::string msg = - "Invalid insert buffer size: " + value + ". Possible reason: insert buffer size exceeds system memory."; + std::string msg = "Invalid insert buffer size: " + value + + ". Possible reason: db_config.insert_buffer_size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } From b43b9bac27b0bd0187dc0e04c0488a2613319e9e Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Tue, 29 Oct 2019 19:15:54 +0800 Subject: [PATCH 089/149] #89 do normalize() for IP test Former-commit-id: 11be41ee43e3dd0e2ecbcd50ba70bab2df58688d --- .../faiss_benchmark/faiss_benchmark_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index e80b85e024..dfce6970cc 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -54,6 +54,20 @@ elapsed() { return tv.tv_sec + tv.tv_usec * 1e-6; } +void normalize(float* arr, size_t nq, size_t dim) { + for (size_t i = 0; i < nq; i++) { + double vecLen = 0.0; + for (size_t j = 0; j < dim; j++) { + double val = arr[i * dim + j]; + vecLen += val * val; + } + vecLen = std::sqrt(vecLen); + for (size_t j = 0; j < dim; j++) { + arr[i * dim + j] = (float) (arr[i * dim + j] / vecLen); + } + } +} + void* hdf5_read(const char* file_name, const char* dataset_name, H5T_class_t dataset_class, size_t& d_out, size_t& n_out) { hid_t file, dataset, datatype, dataspace, memspace; @@ -237,6 +251,11 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); + if (metric_type == faiss::METRIC_INNER_PRODUCT) { + printf("[%.3f s] Normalizing data set \n", elapsed() - t0); + normalize(xb, nb, d); + } + printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); index = faiss::index_factory(d, index_key.c_str(), metric_type); From b6484475d891620d639c2d5e3240a2fdc5d87a5b Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Tue, 29 Oct 2019 19:22:21 +0800 Subject: [PATCH 090/149] #89 fix clang-format Former-commit-id: 0c7feb7370d58e7511a1089d181d256e9c9e6905 --- .../index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index dfce6970cc..618ee00b28 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -54,7 +54,8 @@ elapsed() { return tv.tv_sec + tv.tv_usec * 1e-6; } -void normalize(float* arr, size_t nq, size_t dim) { +void +normalize(float* arr, size_t nq, size_t dim) { for (size_t i = 0; i < nq; i++) { double vecLen = 0.0; for (size_t j = 0; j < dim; j++) { @@ -63,7 +64,7 @@ void normalize(float* arr, size_t nq, size_t dim) { } vecLen = std::sqrt(vecLen); for (size_t j = 0; j < dim; j++) { - arr[i * dim + j] = (float) (arr[i * dim + j] / vecLen); + arr[i * dim + j] = (float)(arr[i * dim + j] / vecLen); } } } From 39e127d70752737351efaa1a6ed07d05a6437296 Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Tue, 29 Oct 2019 20:05:01 +0800 Subject: [PATCH 091/149] Upgrade Faiss to v1.6 Former-commit-id: 637f6933c77c2644d4d90120d9d2aed77e4e70ff --- core/src/db/engine/ExecutionEngineImpl.cpp | 2 -- core/src/index/cmake/ThirdPartyPackagesCore.cmake | 6 +++--- .../knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp | 3 ++- .../knowhere/index/vector_index/IndexIVFSQHybrid.cpp | 7 ++++++- core/src/scheduler/job/Job.cpp | 2 +- core/unittest/db/test_engine.cpp | 1 - core/unittest/db/utils.cpp | 4 ++-- 7 files changed, 14 insertions(+), 11 deletions(-) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 1ecba677fe..66e9795ff3 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -257,7 +257,6 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { -#if 1 const std::string key = location_ + ".quantizer"; std::vector gpus = scheduler::get_gpu_pool(); @@ -306,7 +305,6 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { auto cached_quantizer = std::make_shared(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } -#endif return Status::OK(); } diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index 3635b4fa1b..e8a5c8a995 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -245,11 +245,11 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1 # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 # set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1 - set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 + set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 endif() else() - set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") - set(FAISS_MD5 "0bc12737b23def156f6a1eb782050135") + set(FAISS_SOURCE_URL "https://github.com/milvus-io/faiss/archive/1.6.0.tar.gz") + set(FAISS_MD5 "eb96d84f98b078a9eec04a796f5c792e") endif() message(STATUS "FAISS URL = ${FAISS_SOURCE_URL}") diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index a26f947181..251dfc12ed 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -86,7 +86,8 @@ GPUIVF::SerializeImpl() { faiss::Index* index = index_.get(); faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(index); - SealImpl(); + // TODO(linxj): support seal + // SealImpl(); faiss::write_index(host_index, &writer); delete host_index; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index 7b229db21e..84bf594421 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -160,7 +160,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { index_composition->quantizer = nullptr; index_composition->mode = quantizer_conf->mode; // only 1 - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option); + auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id, index_composition, &option); delete gpu_index; auto q = std::make_shared(); @@ -354,5 +354,10 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { GPUIVF::LoadImpl(index_binary); } +void +IVFSQHybrid::set_index_model(IndexModelPtr model) { + GPUIVF::set_index_model(model); +} + #endif } // namespace knowhere diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp index 1199fe17a6..06a163b959 100644 --- a/core/src/scheduler/job/Job.cpp +++ b/core/src/scheduler/job/Job.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "Job.h" +#include "scheduler/job/Job.h" namespace milvus { namespace scheduler { diff --git a/core/unittest/db/test_engine.cpp b/core/unittest/db/test_engine.cpp index 147de5399c..eb2c60ec4b 100644 --- a/core/unittest/db/test_engine.cpp +++ b/core/unittest/db/test_engine.cpp @@ -108,7 +108,6 @@ TEST_F(EngineTest, ENGINE_IMPL_TEST) { ASSERT_EQ(engine_ptr->Dimension(), dimension); ASSERT_EQ(engine_ptr->Count(), ids.size()); - status = engine_ptr->CopyToGpu(0, true); status = engine_ptr->CopyToGpu(0, false); //ASSERT_TRUE(status.ok()); diff --git a/core/unittest/db/utils.cpp b/core/unittest/db/utils.cpp index 8903ce14ea..16e195079c 100644 --- a/core/unittest/db/utils.cpp +++ b/core/unittest/db/utils.cpp @@ -65,10 +65,10 @@ static const char " cache_insert_data: false # whether load inserted data into cache\n" "\n" "engine_config:\n" - " blas_threshold: 20\n" + " use_blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index"; From 5f2f41244af64dfce7aae10f79349ec8593e1b46 Mon Sep 17 00:00:00 2001 From: Sijie Zhang <36330442+akihoni@users.noreply.github.com> Date: Wed, 30 Oct 2019 10:21:26 +0800 Subject: [PATCH 092/149] Create README_CN.md Former-commit-id: cc3731ea250d77c159f01d996af5ddc2d2ce817a --- README_CN.md | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 README_CN.md diff --git a/README_CN.md b/README_CN.md new file mode 100644 index 0000000000..aac2a57cc6 --- /dev/null +++ b/README_CN.md @@ -0,0 +1,197 @@ +![Milvuslogo](https://raw.githubusercontent.com/milvus-io/docs/master/assets/milvus_logo.png) + +![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) +![Language](https://img.shields.io/badge/language-C%2B%2B-blue) +[![codebeat badge](https://codebeat.co/badges/e030a4f6-b126-4475-a938-4723d54ec3a7?style=plastic)](https://codebeat.co/projects/github-com-jinhai-cn-milvus-master) + +![Release](https://img.shields.io/badge/release-v0.5.0-orange) +![Release_date](https://img.shields.io/badge/release_date-October-yellowgreen) + +- [Slack 频道](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) +- [Twitter](https://twitter.com/milvusio) +- [Facebook](https://www.facebook.com/io.milvus.5) +- [博客](https://www.milvus.io/blog/) +- [CSDN](https://zilliz.blog.csdn.net/) +- [中文官网](https://www.milvus.io/zh-CN/) + +# 欢迎来到 Milvus + +## Milvus 是什么 + +Milvus 是一款开源的、针对海量特征向量的相似性搜索引擎。基于异构众核计算框架设计,成本更低,性能更好。在有限的计算资源下,十亿向量搜索仅毫秒响应。 + +Milvus 提供稳定的 Python、Java 以及 C++ 的 API 接口。 + +通过 [版本发布说明](https://milvus.io/docs/zh-CN/release/v0.5.0/) 获取最新发行版本的 Milvus。 + +- 异构众核 + + Milvus 基于异构众核计算框架设计,成本更低,性能更好。 + +- 多元化索引 + + Milvus 支持多种索引方式,使用量化索引、基于树的索引和图索引等算法。 + +- 资源智能管理 + + Milvus 根据实际数据规模和可利用资源,智能调节优化查询计算和索引构建过程。 + +- 水平扩容 + + Milvus 支持在线 / 离线扩容,仅需执行简单命令,便可弹性伸缩计算节点和存储节点。 + +- 高可用性 + + Milvus 集成了 Kubernetes 框架,能有效避免单点障碍情况的发生。 + +- 简单易用 + + Milvus 安装简单,使用方便,并可使您专注于特征向量。 + +- 可视化监控 + + 您可以使用基于 Prometheus 的图形化监控,以便实时跟踪系统性能。 + +## 整体架构 + +![](https://raw.githubusercontent.com/yamasite/docs/master/assets/milvus_arch.png) + +## 开始使用 Milvus + +### 硬件要求 + +| 硬件设备 | 推荐配置 | +| -------- | ------------------------------------- | +| CPU | Intel CPU Haswell 及以上 | +| GPU | NVIDIA Pascal 系列及以上 | +| 内存 | 8 GB 或以上(取决于具体向量数据规模) | +| 硬盘 | SATA 3.0 SSD 及以上 | + +### 使用 Docker + +您可以方便地使用 Docker 安装 Milvus。具体请查看 [Milvus 安装指南](https://milvus.io/docs/zh-CN/userguide/install_milvus/)。 + +### 从源代码编译 + +#### 软件要求 + +- Ubuntu 18.04 及以上 +- CMake 3.14 及以上 +- CUDA 10.0 及以上 +- NVIDIA driver 418 及以上 + +#### 编译 + +##### 第一步 安装依赖项 + +```shell +$ cd [Milvus sourcecode path]/core +$ ./ubuntu_build_deps.sh +``` + +##### 第二步 编译 + +```shell +$ cd [Milvus sourcecode path]/core +$ ./build.sh -t Debug +or +$ ./build.sh -t Release +``` + +当您成功编译后,所有 Milvus 必需组件将安装在`[Milvus root path]/core/milvus`路径下。 + +##### 启动 Milvus 服务 + +```shell +$ cd [Milvus root path]/core/milvus +``` + +在 `LD_LIBRARY_PATH` 中添加 `lib/` 目录: + +```shell +$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib +``` + +启动 Milvus 服务: + +```shell +$ cd scripts +$ ./start_server.sh +``` + +若要停止 Milvus 服务,请使用如下命令: + +```shell +$ ./stop_server.sh +``` + +若需要修改 Milvus 配置文件 `conf/server_config.yaml` 和`conf/log_config.conf`,请查看 [Milvus 配置](https://milvus.io/docs/zh-CN/reference/milvus_config/)。 + +### 开始您的第一个 Milvus 程序 + +#### 运行 Python 示例代码 + +请确保系统的 Python 版本为 [Python 3.5](https://www.python.org/downloads/) 或以上。 + +安装 Milvus Python SDK。 + +```shell +# Install Milvus Python SDK +$ pip install pymilvus==0.2.3 +``` + +创建 `example.py` 文件,并向文件中加入 [Python 示例代码](https://github.com/milvus-io/pymilvus/blob/master/examples/AdvancedExample.py)。 + +运行示例代码 + +```shell +# Run Milvus Python example +$ python3 example.py +``` + +#### 运行 C++ 示例代码 + +```shell + # Run Milvus C++ example + $ cd [Milvus root path]/core/milvus/bin + $ ./sdk_simple +``` + +#### 运行 Java 示例代码 + +请确保系统的 Java 版本为 Java 8 或以上。 + +请从[此处](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples)获取 Java 示例代码。 + +## 贡献者指南 + +我们由衷欢迎您推送贡献。关于贡献流程的详细信息,请参阅 [贡献者指南](https://github.com/milvus-io/milvus/blob/master/CONTRIBUTING.md)。本项目遵循 Milvus [行为准则](https://github.com/milvus-io/milvus/blob/master/CODE_OF_CONDUCT.md)。如果您希望参与本项目,请遵守该准则的内容。 + +我们使用 [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) 追踪问题和补丁。若您希望提出问题或进行讨论,请加入我们的社区。 + +## 加入 Milvus 社区 + +欢迎加入我们的 [Slack 频道](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) 以便与其他用户和贡献者进行交流。 + +## Milvus 路线图 + +请阅读我们的[路线图](https://milvus.io/docs/zh-CN/roadmap/)以获得更多即将开发的新功能。 + +## 相关链接 + +[Milvus 官方网站](https://www.milvus.io/) + +[Milvus 文档](https://www.milvus.io/docs/en/userguide/install_milvus/) + +[Milvus 在线训练营](https://github.com/milvus-io/bootcamp) + +[Milvus 博客](https://www.milvus.io/blog/) + +[Milvus CSDN](https://zilliz.blog.csdn.net/) + +[Milvus 路线图](https://milvus.io/docs/en/roadmap/) + +## 许可协议 + +[Apache 许可协议2.0版](https://github.com/milvus-io/milvus/blob/master/LICENSE) + From 2aa7176d462b5e6c634d7b8e954d03f1007cc4c1 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 30 Oct 2019 11:21:28 +0800 Subject: [PATCH 093/149] show branch name Former-commit-id: 8c5eca7a3372f553b13a349ad0e5407ec3683bf2 --- core/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 51cb2270a1..ae4366af2f 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -36,6 +36,7 @@ MACRO (GET_GIT_BRANCH_NAME GIT_BRANCH_NAME) ENDMACRO (GET_GIT_BRANCH_NAME) GET_GIT_BRANCH_NAME(GIT_BRANCH_NAME) +message(STATUS "GIT_BRANCH_NAME = ${GIT_BRANCH_NAME}") if(NOT GIT_BRANCH_NAME STREQUAL "") string(REGEX REPLACE "\n" "" GIT_BRANCH_NAME ${GIT_BRANCH_NAME}) endif() From 3fa2c1e69a690588d890c153c09dc502e2e8fc51 Mon Sep 17 00:00:00 2001 From: wxyu Date: Wed, 30 Oct 2019 12:34:09 +0800 Subject: [PATCH 094/149] Set task state MOVED after resource copy it completed Former-commit-id: a97e306b62f6a0a7a06c881e93a973ad75b8ac9d --- CHANGELOG.md | 1 + core/src/scheduler/JobMgr.cpp | 2 +- core/src/scheduler/Scheduler.cpp | 2 +- core/src/scheduler/TaskTable.cpp | 19 ++---------------- core/src/scheduler/TaskTable.h | 18 +++++++---------- core/src/scheduler/action/Action.h | 6 +++--- .../scheduler/action/PushTaskToNeighbour.cpp | 20 ++++++++++--------- core/src/scheduler/job/Job.cpp | 2 +- core/src/scheduler/resource/Resource.cpp | 4 ++++ core/unittest/scheduler/test_tasktable.cpp | 9 +++------ 10 files changed, 34 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 785b7c89ef..b506f6d988 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#96 - Remove .a file in milvus/lib for docker-version - \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss - \#122 - Add unique id for Job +- \#130 - Set task state MOVED after resource copy it completed ## Feature - \#115 - Using new structure for tasktable diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index e7b15a8185..794f6a0f37 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -91,7 +91,7 @@ JobMgr::worker_function() { // disk resources NEVER be empty. if (auto disk = res_mgr_->GetDiskResources()[0].lock()) { for (auto& task : tasks) { - disk->task_table().Put(task); + disk->task_table().Put(task, nullptr); } } } diff --git a/core/src/scheduler/Scheduler.cpp b/core/src/scheduler/Scheduler.cpp index cba847c25e..8d2d4406f8 100644 --- a/core/src/scheduler/Scheduler.cpp +++ b/core/src/scheduler/Scheduler.cpp @@ -120,7 +120,7 @@ Scheduler::OnLoadCompleted(const EventPtr& event) { if (resource->HasExecutor() == false) { load_completed_event->task_table_item_->Move(); } - Action::PushTaskToAllNeighbour(load_completed_event->task_table_item_->task, resource); + Action::PushTaskToAllNeighbour(load_completed_event->task_table_item_, resource); break; } default: { break; } diff --git a/core/src/scheduler/TaskTable.cpp b/core/src/scheduler/TaskTable.cpp index bd3dd466a9..425eb0ab06 100644 --- a/core/src/scheduler/TaskTable.cpp +++ b/core/src/scheduler/TaskTable.cpp @@ -264,8 +264,8 @@ TaskTable::PickToExecute(uint64_t limit) { } void -TaskTable::Put(TaskPtr task) { - auto item = std::make_shared(); +TaskTable::Put(TaskPtr task, TaskTableItemPtr from) { + auto item = std::make_shared(std::move(from)); item->id = id_++; item->task = std::move(task); item->state = TaskTableItemState::START; @@ -276,21 +276,6 @@ TaskTable::Put(TaskPtr task) { } } -void -TaskTable::Put(std::vector& tasks) { - for (auto& task : tasks) { - auto item = std::make_shared(); - item->id = id_++; - item->task = std::move(task); - item->state = TaskTableItemState::START; - item->timestamp.start = get_current_timestamp(); - table_.put(std::move(item)); - } - if (subscriber_) { - subscriber_(); - } -} - size_t TaskTable::TaskToExecute() { size_t count = 0; diff --git a/core/src/scheduler/TaskTable.h b/core/src/scheduler/TaskTable.h index 898141d028..37e2747343 100644 --- a/core/src/scheduler/TaskTable.h +++ b/core/src/scheduler/TaskTable.h @@ -58,8 +58,12 @@ struct TaskTimestamp : public interface::dumpable { Dump() const override; }; +struct TaskTableItem; +using TaskTableItemPtr = std::shared_ptr; + struct TaskTableItem : public interface::dumpable { - TaskTableItem() : id(0), task(nullptr), state(TaskTableItemState::INVALID), mutex() { + explicit TaskTableItem(TaskTableItemPtr f = nullptr) + : id(0), task(nullptr), state(TaskTableItemState::INVALID), mutex(), from(std::move(f)) { } TaskTableItem(const TaskTableItem& src) = delete; @@ -70,6 +74,7 @@ struct TaskTableItem : public interface::dumpable { TaskTableItemState state; // the state; std::mutex mutex; TaskTimestamp timestamp; + TaskTableItemPtr from; bool IsFinish(); @@ -96,8 +101,6 @@ struct TaskTableItem : public interface::dumpable { Dump() const override; }; -using TaskTableItemPtr = std::shared_ptr; - class TaskTable : public interface::dumpable { public: TaskTable() : table_(1ULL << 16ULL) { @@ -120,14 +123,7 @@ class TaskTable : public interface::dumpable { * Put one task; */ void - Put(TaskPtr task); - - /* - * Put tasks back of task table; - * Called by DBImpl; - */ - void - Put(std::vector& tasks); + Put(TaskPtr task, TaskTableItemPtr from = nullptr); size_t TaskToExecute(); diff --git a/core/src/scheduler/action/Action.h b/core/src/scheduler/action/Action.h index ff72910055..f5f828cbf6 100644 --- a/core/src/scheduler/action/Action.h +++ b/core/src/scheduler/action/Action.h @@ -28,13 +28,13 @@ namespace scheduler { class Action { public: static void - PushTaskToNeighbourRandomly(const TaskPtr& task, const ResourcePtr& self); + PushTaskToNeighbourRandomly(TaskTableItemPtr task_item, const ResourcePtr& self); static void - PushTaskToAllNeighbour(const TaskPtr& task, const ResourcePtr& self); + PushTaskToAllNeighbour(TaskTableItemPtr task_item, const ResourcePtr& self); static void - PushTaskToResource(const TaskPtr& task, const ResourcePtr& dest); + PushTaskToResource(TaskTableItemPtr task_item, const ResourcePtr& dest); static void DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index 6f74849eac..b8a4a1164b 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -59,7 +59,7 @@ get_neighbours_with_connetion(const ResourcePtr& self) { } void -Action::PushTaskToNeighbourRandomly(const TaskPtr& task, const ResourcePtr& self) { +Action::PushTaskToNeighbourRandomly(TaskTableItemPtr task_item, const ResourcePtr& self) { auto neighbours = get_neighbours_with_connetion(self); if (not neighbours.empty()) { std::vector speeds; @@ -78,7 +78,7 @@ Action::PushTaskToNeighbourRandomly(const TaskPtr& task, const ResourcePtr& self for (uint64_t i = 0; i < speeds.size(); ++i) { rd_speed -= speeds[i]; if (rd_speed <= 0) { - neighbours[i].first->task_table().Put(task); + neighbours[i].first->task_table().Put(task_item->task, task_item); return; } } @@ -89,22 +89,23 @@ Action::PushTaskToNeighbourRandomly(const TaskPtr& task, const ResourcePtr& self } void -Action::PushTaskToAllNeighbour(const TaskPtr& task, const ResourcePtr& self) { +Action::PushTaskToAllNeighbour(TaskTableItemPtr task_item, const ResourcePtr& self) { auto neighbours = get_neighbours(self); for (auto& neighbour : neighbours) { - neighbour->task_table().Put(task); + neighbour->task_table().Put(task_item->task, task_item); } } void -Action::PushTaskToResource(const TaskPtr& task, const ResourcePtr& dest) { - dest->task_table().Put(task); +Action::PushTaskToResource(TaskTableItemPtr task_item, const ResourcePtr& dest) { + dest->task_table().Put(task_item->task, task_item); } void Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { if (not resource->HasExecutor() && event->task_table_item_->Move()) { + auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; auto search_task = std::static_pointer_cast(task); bool moved = false; @@ -119,7 +120,7 @@ Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr res if (index != nullptr) { moved = true; auto dest_resource = res_mgr->GetResource(ResourceType::GPU, i); - PushTaskToResource(event->task_table_item_->task, dest_resource); + PushTaskToResource(event->task_table_item_, dest_resource); break; } } @@ -127,7 +128,7 @@ Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr res } if (not moved) { - PushTaskToNeighbourRandomly(task, resource); + PushTaskToNeighbourRandomly(task_item, resource); } } } @@ -135,6 +136,7 @@ Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr res void Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { + auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; if (resource->type() == ResourceType::DISK) { // step 1: calculate shortest path per resource, from disk to compute resource @@ -213,7 +215,7 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou // next_res->task_table().Put(task); // } event->task_table_item_->Move(); - next_res->task_table().Put(task); + next_res->task_table().Put(task, task_item); } } diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp index 1199fe17a6..06a163b959 100644 --- a/core/src/scheduler/job/Job.cpp +++ b/core/src/scheduler/job/Job.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "Job.h" +#include "scheduler/job/Job.h" namespace milvus { namespace scheduler { diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index 8e10592262..8cf03275f7 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -180,6 +180,10 @@ Resource::loader_function() { } LoadFile(task_item->task); task_item->Loaded(); + if (task_item->from) { + task_item->from->Moved(); + task_item->from = nullptr; + } if (subscriber_) { auto event = std::make_shared(shared_from_this(), task_item); subscriber_(std::static_pointer_cast(event)); diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index 601bd2431d..28a2e29c98 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -193,16 +193,13 @@ TEST_F(TaskTableBaseTest, PUT_INVALID_TEST) { TEST_F(TaskTableBaseTest, PUT_BATCH) { std::vector tasks{task1_, task2_}; - empty_table_.Put(tasks); + for (auto& task : tasks) { + empty_table_.Put(task); + } ASSERT_EQ(empty_table_.at(0)->task, task1_); ASSERT_EQ(empty_table_.at(1)->task, task2_); } -TEST_F(TaskTableBaseTest, PUT_EMPTY_BATCH) { - std::vector tasks{}; - empty_table_.Put(tasks); -} - TEST_F(TaskTableBaseTest, SIZE) { ASSERT_EQ(empty_table_.size(), 0); empty_table_.Put(task1_); From c41ebb7e7db13ec6eecfa4bcead2b1650196c05d Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Wed, 30 Oct 2019 15:27:05 +0800 Subject: [PATCH 095/149] fix JFrog cache error Former-commit-id: 8b75351080e9201d6f9f47c643dfb7eec32d5a86 --- CHANGELOG.md | 1 + core/cmake/ThirdPartyPackages.cmake | 376 ++++++++++++++-------------- 2 files changed, 195 insertions(+), 182 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b28c7bd3a7..4580c6d55e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.5.1 (TODO) ## Bug +- \#134 - JFrog cache error ## Feature - \#90 - The server start error messages could be improved to enhance user experience diff --git a/core/cmake/ThirdPartyPackages.cmake b/core/cmake/ThirdPartyPackages.cmake index d0057d3c22..788c09ef2e 100644 --- a/core/cmake/ThirdPartyPackages.cmake +++ b/core/cmake/ThirdPartyPackages.cmake @@ -30,11 +30,11 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES message(STATUS "Using ${MILVUS_DEPENDENCY_SOURCE} approach to find dependencies") # For each dependency, set dependency source to global default, if unset -foreach(DEPENDENCY ${MILVUS_THIRDPARTY_DEPENDENCIES}) - if("${${DEPENDENCY}_SOURCE}" STREQUAL "") +foreach (DEPENDENCY ${MILVUS_THIRDPARTY_DEPENDENCIES}) + if ("${${DEPENDENCY}_SOURCE}" STREQUAL "") set(${DEPENDENCY}_SOURCE ${MILVUS_DEPENDENCY_SOURCE}) - endif() -endforeach() + endif () +endforeach () macro(build_dependency DEPENDENCY_NAME) if ("${DEPENDENCY_NAME}" STREQUAL "GTest") @@ -47,17 +47,17 @@ macro(build_dependency DEPENDENCY_NAME) build_sqlite() elseif ("${DEPENDENCY_NAME}" STREQUAL "SQLite_ORM") build_sqlite_orm() - elseif("${DEPENDENCY_NAME}" STREQUAL "yaml-cpp") + elseif ("${DEPENDENCY_NAME}" STREQUAL "yaml-cpp") build_yamlcpp() - elseif("${DEPENDENCY_NAME}" STREQUAL "libunwind") + elseif ("${DEPENDENCY_NAME}" STREQUAL "libunwind") build_libunwind() - elseif("${DEPENDENCY_NAME}" STREQUAL "gperftools") + elseif ("${DEPENDENCY_NAME}" STREQUAL "gperftools") build_gperftools() - elseif("${DEPENDENCY_NAME}" STREQUAL "GRPC") + elseif ("${DEPENDENCY_NAME}" STREQUAL "GRPC") build_grpc() - elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") + elseif ("${DEPENDENCY_NAME}" STREQUAL "ZLIB") build_zlib() - else() + else () message(FATAL_ERROR "Unknown thirdparty dependency to build: ${DEPENDENCY_NAME}") endif () endmacro() @@ -66,28 +66,28 @@ endmacro() # Identify OS if (UNIX) if (APPLE) - set (CMAKE_OS_NAME "osx" CACHE STRING "Operating system name" FORCE) + set(CMAKE_OS_NAME "osx" CACHE STRING "Operating system name" FORCE) else (APPLE) ## Check for Debian GNU/Linux ________________ - find_file (DEBIAN_FOUND debian_version debconf.conf - PATHS /etc - ) + find_file(DEBIAN_FOUND debian_version debconf.conf + PATHS /etc + ) if (DEBIAN_FOUND) - set (CMAKE_OS_NAME "debian" CACHE STRING "Operating system name" FORCE) + set(CMAKE_OS_NAME "debian" CACHE STRING "Operating system name" FORCE) endif (DEBIAN_FOUND) ## Check for Fedora _________________________ - find_file (FEDORA_FOUND fedora-release - PATHS /etc - ) + find_file(FEDORA_FOUND fedora-release + PATHS /etc + ) if (FEDORA_FOUND) - set (CMAKE_OS_NAME "fedora" CACHE STRING "Operating system name" FORCE) + set(CMAKE_OS_NAME "fedora" CACHE STRING "Operating system name" FORCE) endif (FEDORA_FOUND) ## Check for RedHat _________________________ - find_file (REDHAT_FOUND redhat-release inittab.RH - PATHS /etc - ) + find_file(REDHAT_FOUND redhat-release inittab.RH + PATHS /etc + ) if (REDHAT_FOUND) - set (CMAKE_OS_NAME "redhat" CACHE STRING "Operating system name" FORCE) + set(CMAKE_OS_NAME "redhat" CACHE STRING "Operating system name" FORCE) endif (REDHAT_FOUND) ## Extra check for Ubuntu ____________________ if (DEBIAN_FOUND) @@ -96,18 +96,25 @@ if (UNIX) ## a first superficial inspection a system will ## be considered as Debian, which signifies an ## extra check is required. - find_file (UBUNTU_EXTRA legal issue - PATHS /etc - ) + find_file(UBUNTU_EXTRA legal issue + PATHS /etc + ) if (UBUNTU_EXTRA) ## Scan contents of file - file (STRINGS ${UBUNTU_EXTRA} UBUNTU_FOUND - REGEX Ubuntu - ) + file(STRINGS ${UBUNTU_EXTRA} UBUNTU_FOUND + REGEX Ubuntu + ) ## Check result of string search if (UBUNTU_FOUND) - set (CMAKE_OS_NAME "ubuntu" CACHE STRING "Operating system name" FORCE) - set (DEBIAN_FOUND FALSE) + set(CMAKE_OS_NAME "ubuntu" CACHE STRING "Operating system name" FORCE) + set(DEBIAN_FOUND FALSE) + + find_program(LSB_RELEASE_EXEC lsb_release) + execute_process(COMMAND ${LSB_RELEASE_EXEC} -rs + OUTPUT_VARIABLE LSB_RELEASE_ID_SHORT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + STRING(REGEX REPLACE "\\." "_" UBUNTU_VERSION ${LSB_RELEASE_ID_SHORT}) endif (UBUNTU_FOUND) endif (UBUNTU_EXTRA) endif (DEBIAN_FOUND) @@ -120,36 +127,41 @@ set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") # ---------------------------------------------------------------------- # JFrog -if(NOT DEFINED USE_JFROG_CACHE) +if (NOT DEFINED USE_JFROG_CACHE) set(USE_JFROG_CACHE "OFF") -endif() -if(USE_JFROG_CACHE STREQUAL "ON") - if(DEFINED ENV{JFROG_ARTFACTORY_URL}) +endif () +if (USE_JFROG_CACHE STREQUAL "ON") + if (DEFINED ENV{JFROG_ARTFACTORY_URL}) set(JFROG_ARTFACTORY_URL "$ENV{JFROG_ARTFACTORY_URL}") - endif() - if(NOT DEFINED JFROG_ARTFACTORY_URL) + endif () + if (NOT DEFINED JFROG_ARTFACTORY_URL) message(FATAL_ERROR "JFROG_ARTFACTORY_URL is not set") - endif() - set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") - if(DEFINED ENV{JFROG_USER_NAME}) + endif () + if (UBUNTU_FOUND) + set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${UBUNTU_VERSION}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") + message(${JFROG_ARTFACTORY_CACHE_URL}) + else () + set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") + endif () + if (DEFINED ENV{JFROG_USER_NAME}) set(JFROG_USER_NAME "$ENV{JFROG_USER_NAME}") - endif() - if(NOT DEFINED JFROG_USER_NAME) + endif () + if (NOT DEFINED JFROG_USER_NAME) message(FATAL_ERROR "JFROG_USER_NAME is not set") - endif() - if(DEFINED ENV{JFROG_PASSWORD}) + endif () + if (DEFINED ENV{JFROG_PASSWORD}) set(JFROG_PASSWORD "$ENV{JFROG_PASSWORD}") - endif() - if(NOT DEFINED JFROG_PASSWORD) + endif () + if (NOT DEFINED JFROG_PASSWORD) message(FATAL_ERROR "JFROG_PASSWORD is not set") - endif() + endif () set(THIRDPARTY_PACKAGE_CACHE "${THIRDPARTY_DIR}/cache") - if(NOT EXISTS ${THIRDPARTY_PACKAGE_CACHE}) + if (NOT EXISTS ${THIRDPARTY_PACKAGE_CACHE}) message(STATUS "Will create cached directory: ${THIRDPARTY_PACKAGE_CACHE}") file(MAKE_DIRECTORY ${THIRDPARTY_PACKAGE_CACHE}) - endif() -endif() + endif () +endif () macro(resolve_dependency DEPENDENCY_NAME) if (${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO") @@ -182,13 +194,13 @@ set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC") set(EP_COMMON_TOOLCHAIN -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) -if(CMAKE_AR) +if (CMAKE_AR) set(EP_COMMON_TOOLCHAIN ${EP_COMMON_TOOLCHAIN} -DCMAKE_AR=${CMAKE_AR}) -endif() +endif () -if(CMAKE_RANLIB) +if (CMAKE_RANLIB) set(EP_COMMON_TOOLCHAIN ${EP_COMMON_TOOLCHAIN} -DCMAKE_RANLIB=${CMAKE_RANLIB}) -endif() +endif () # External projects are still able to override the following declarations. # cmake command line will favor the last defined variable when a duplicate is @@ -202,20 +214,20 @@ set(EP_COMMON_CMAKE_ARGS -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS} -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}) -if(NOT MILVUS_VERBOSE_THIRDPARTY_BUILD) +if (NOT MILVUS_VERBOSE_THIRDPARTY_BUILD) set(EP_LOG_OPTIONS LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 LOG_DOWNLOAD 1) -else() +else () set(EP_LOG_OPTIONS) -endif() +endif () # Ensure that a default make is set -if("${MAKE}" STREQUAL "") +if ("${MAKE}" STREQUAL "") find_program(MAKE make) -endif() +endif () if (NOT DEFINED MAKE_BUILD_ARGS) set(MAKE_BUILD_ARGS "-j8") -endif() +endif () message(STATUS "Third Party MAKE_BUILD_ARGS = ${MAKE_BUILD_ARGS}") # ---------------------------------------------------------------------- @@ -230,39 +242,39 @@ find_package(Threads REQUIRED) # Read toolchain versions from cpp/thirdparty/versions.txt file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT) -foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) +foreach (_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) # Exclude comments - if(NOT _VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=") + if (NOT _VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=") continue() - endif() + endif () string(REGEX MATCH "^[^=]*" _LIB_NAME ${_VERSION_ENTRY}) string(REPLACE "${_LIB_NAME}=" "" _LIB_VERSION ${_VERSION_ENTRY}) # Skip blank or malformed lines - if(${_LIB_VERSION} STREQUAL "") + if (${_LIB_VERSION} STREQUAL "") continue() - endif() + endif () # For debugging #message(STATUS "${_LIB_NAME}: ${_LIB_VERSION}") set(${_LIB_NAME} "${_LIB_VERSION}") -endforeach() +endforeach () if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () set(GTEST_SOURCE_URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz") -endif() +endif () set(GTEST_MD5 "2e6fbeb6a91310a16efe181886c59596") -if(DEFINED ENV{MILVUS_MYSQLPP_URL}) +if (DEFINED ENV{MILVUS_MYSQLPP_URL}) set(MYSQLPP_SOURCE_URL "$ENV{MILVUS_MYSQLPP_URL}") -else() +else () set(MYSQLPP_SOURCE_URL "https://tangentsoft.com/mysqlpp/releases/mysql++-${MYSQLPP_VERSION}.tar.gz") -endif() +endif () set(MYSQLPP_MD5 "cda38b5ecc0117de91f7c42292dd1e79") if (DEFINED ENV{MILVUS_PROMETHEUS_URL}) @@ -270,60 +282,60 @@ if (DEFINED ENV{MILVUS_PROMETHEUS_URL}) else () set(PROMETHEUS_SOURCE_URL https://github.com/jupp0r/prometheus-cpp.git) -endif() +endif () -if(DEFINED ENV{MILVUS_SQLITE_URL}) +if (DEFINED ENV{MILVUS_SQLITE_URL}) set(SQLITE_SOURCE_URL "$ENV{MILVUS_SQLITE_URL}") -else() +else () set(SQLITE_SOURCE_URL "https://www.sqlite.org/2019/sqlite-autoconf-${SQLITE_VERSION}.tar.gz") -endif() +endif () set(SQLITE_MD5 "3c68eb400f8354605736cd55400e1572") -if(DEFINED ENV{MILVUS_SQLITE_ORM_URL}) +if (DEFINED ENV{MILVUS_SQLITE_ORM_URL}) set(SQLITE_ORM_SOURCE_URL "$ENV{MILVUS_SQLITE_ORM_URL}") -else() +else () set(SQLITE_ORM_SOURCE_URL "https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.zip") -endif() +endif () set(SQLITE_ORM_MD5 "ba9a405a8a1421c093aa8ce988ff8598") -if(DEFINED ENV{MILVUS_YAMLCPP_URL}) +if (DEFINED ENV{MILVUS_YAMLCPP_URL}) set(YAMLCPP_SOURCE_URL "$ENV{MILVUS_YAMLCPP_URL}") -else() +else () set(YAMLCPP_SOURCE_URL "https://github.com/jbeder/yaml-cpp/archive/yaml-cpp-${YAMLCPP_VERSION}.tar.gz") -endif() +endif () set(YAMLCPP_MD5 "5b943e9af0060d0811148b037449ef82") -if(DEFINED ENV{MILVUS_LIBUNWIND_URL}) +if (DEFINED ENV{MILVUS_LIBUNWIND_URL}) set(LIBUNWIND_SOURCE_URL "$ENV{MILVUS_LIBUNWIND_URL}") -else() +else () set(LIBUNWIND_SOURCE_URL "https://github.com/libunwind/libunwind/releases/download/v${LIBUNWIND_VERSION}/libunwind-${LIBUNWIND_VERSION}.tar.gz") -endif() +endif () set(LIBUNWIND_MD5 "a04f69d66d8e16f8bf3ab72a69112cd6") -if(DEFINED ENV{MILVUS_GPERFTOOLS_URL}) +if (DEFINED ENV{MILVUS_GPERFTOOLS_URL}) set(GPERFTOOLS_SOURCE_URL "$ENV{MILVUS_GPERFTOOLS_URL}") -else() +else () set(GPERFTOOLS_SOURCE_URL "https://github.com/gperftools/gperftools/releases/download/gperftools-${GPERFTOOLS_VERSION}/gperftools-${GPERFTOOLS_VERSION}.tar.gz") -endif() +endif () set(GPERFTOOLS_MD5 "c6a852a817e9160c79bdb2d3101b4601") -if(DEFINED ENV{MILVUS_GRPC_URL}) +if (DEFINED ENV{MILVUS_GRPC_URL}) set(GRPC_SOURCE_URL "$ENV{MILVUS_GRPC_URL}") -else() +else () set(GRPC_SOURCE_URL "https://github.com/youny626/grpc-milvus/archive/${GRPC_VERSION}.zip") -endif() +endif () set(GRPC_MD5 "0362ba219f59432c530070b5f5c3df73") -if(DEFINED ENV{MILVUS_ZLIB_URL}) +if (DEFINED ENV{MILVUS_ZLIB_URL}) set(ZLIB_SOURCE_URL "$ENV{MILVUS_ZLIB_URL}") -else() +else () set(ZLIB_SOURCE_URL "https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz") -endif() +endif () set(ZLIB_MD5 "0095d2d2d1f3442ce1318336637b695f") # ---------------------------------------------------------------------- @@ -334,13 +346,13 @@ macro(build_gtest) set(GTEST_VENDORED TRUE) set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS}") - if(APPLE) + if (APPLE) set(GTEST_CMAKE_CXX_FLAGS ${GTEST_CMAKE_CXX_FLAGS} -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes) - endif() + endif () set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep") set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include") @@ -359,9 +371,9 @@ macro(build_gtest) set(GMOCK_INCLUDE_DIR "${GTEST_PREFIX}/include") set(GMOCK_STATIC_LIB "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmock${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) + ) - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(GTEST_CACHE_PACKAGE_NAME "googletest_${GTEST_MD5}.tar.gz") set(GTEST_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${GTEST_CACHE_PACKAGE_NAME}") set(GTEST_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${GTEST_CACHE_PACKAGE_NAME}") @@ -385,10 +397,10 @@ macro(build_gtest) ${EP_LOG_OPTIONS}) ExternalProject_Create_Cache(googletest_ep ${GTEST_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${GTEST_CACHE_URL}) - else() + else () ExternalProject_Use_Cache(googletest_ep ${GTEST_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - else() + endif () + else () ExternalProject_Add(googletest_ep URL ${GTEST_SOURCE_URL} @@ -402,20 +414,20 @@ macro(build_gtest) CMAKE_ARGS ${GTEST_CMAKE_ARGS} ${EP_LOG_OPTIONS}) - endif() + endif () # The include directory must exist before it is referenced by a target. file(MAKE_DIRECTORY "${GTEST_INCLUDE_DIR}") add_library(gtest STATIC IMPORTED) set_target_properties(gtest - PROPERTIES IMPORTED_LOCATION "${GTEST_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIR}") + PROPERTIES IMPORTED_LOCATION "${GTEST_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIR}") add_library(gtest_main STATIC IMPORTED) set_target_properties(gtest_main - PROPERTIES IMPORTED_LOCATION "${GTEST_MAIN_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIR}") + PROPERTIES IMPORTED_LOCATION "${GTEST_MAIN_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIR}") add_library(gmock STATIC IMPORTED) set_target_properties(gmock @@ -431,13 +443,13 @@ endmacro() if (MILVUS_BUILD_TESTS) resolve_dependency(GTest) - if(NOT GTEST_VENDORED) - endif() + if (NOT GTEST_VENDORED) + endif () get_target_property(GTEST_INCLUDE_DIR gtest INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM "${GTEST_PREFIX}/lib") include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) -endif() +endif () # ---------------------------------------------------------------------- # MySQL++ @@ -456,7 +468,7 @@ macro(build_mysqlpp) "CXXFLAGS=${EP_CXX_FLAGS}" "LDFLAGS=-pthread") - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(MYSQLPP_CACHE_PACKAGE_NAME "mysqlpp_${MYSQLPP_MD5}.tar.gz") set(MYSQLPP_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${MYSQLPP_CACHE_PACKAGE_NAME}") set(MYSQLPP_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${MYSQLPP_CACHE_PACKAGE_NAME}") @@ -479,15 +491,15 @@ macro(build_mysqlpp) ${MYSQLPP_SHARED_LIB}) ExternalProject_Create_Cache(mysqlpp_ep ${MYSQLPP_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/mysqlpp_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${MYSQLPP_CACHE_URL}) - else() + else () file(DOWNLOAD ${MYSQLPP_CACHE_URL} ${MYSQLPP_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${MYSQLPP_CACHE_URL} TO ${MYSQLPP_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(mysqlpp_ep ${MYSQLPP_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () externalproject_add(mysqlpp_ep URL ${MYSQLPP_SOURCE_URL} @@ -501,7 +513,7 @@ macro(build_mysqlpp) 1 BUILD_BYPRODUCTS ${MYSQLPP_SHARED_LIB}) - endif() + endif () file(MAKE_DIRECTORY "${MYSQLPP_INCLUDE_DIR}") add_library(mysqlpp SHARED IMPORTED) @@ -515,13 +527,13 @@ macro(build_mysqlpp) endmacro() -if(MILVUS_WITH_MYSQLPP) +if (MILVUS_WITH_MYSQLPP) resolve_dependency(MySQLPP) get_target_property(MYSQLPP_INCLUDE_DIR mysqlpp INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM "${MYSQLPP_INCLUDE_DIR}") link_directories(SYSTEM ${MYSQLPP_PREFIX}/lib) -endif() +endif () # ---------------------------------------------------------------------- # Prometheus @@ -547,9 +559,9 @@ macro(build_prometheus) "-DCMAKE_INSTALL_PREFIX=${PROMETHEUS_PREFIX}" -DCMAKE_BUILD_TYPE=Release) - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") execute_process(COMMAND sh -c "git ls-remote --heads --tags ${PROMETHEUS_SOURCE_URL} ${PROMETHEUS_VERSION} | cut -f 1" OUTPUT_VARIABLE PROMETHEUS_LAST_COMMIT_ID) - if(${PROMETHEUS_LAST_COMMIT_ID} MATCHES "^[^#][a-z0-9]+") + if (${PROMETHEUS_LAST_COMMIT_ID} MATCHES "^[^#][a-z0-9]+") string(MD5 PROMETHEUS_COMBINE_MD5 "${PROMETHEUS_LAST_COMMIT_ID}") set(PROMETHEUS_CACHE_PACKAGE_NAME "prometheus_${PROMETHEUS_COMBINE_MD5}.tar.gz") set(PROMETHEUS_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${PROMETHEUS_CACHE_PACKAGE_NAME}") @@ -583,18 +595,18 @@ macro(build_prometheus) "${PROMETHEUS_PULL_STATIC_LIB}") ExternalProject_Create_Cache(prometheus_ep ${PROMETHEUS_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/prometheus_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${PROMETHEUS_CACHE_URL}) - else() + else () file(DOWNLOAD ${PROMETHEUS_CACHE_URL} ${PROMETHEUS_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${PROMETHEUS_CACHE_URL} TO ${PROMETHEUS_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(prometheus_ep ${PROMETHEUS_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () message(FATAL_ERROR "The last commit ID of \"${PROMETHEUS_SOURCE_URL}\" repository don't match!") - endif() - else() + endif () + else () externalproject_add(prometheus_ep GIT_REPOSITORY ${PROMETHEUS_SOURCE_URL} @@ -618,7 +630,7 @@ macro(build_prometheus) "${PROMETHEUS_CORE_STATIC_LIB}" "${PROMETHEUS_PUSH_STATIC_LIB}" "${PROMETHEUS_PULL_STATIC_LIB}") - endif() + endif () file(MAKE_DIRECTORY "${PROMETHEUS_PREFIX}/push/include") add_library(prometheus-cpp-push STATIC IMPORTED) @@ -642,7 +654,7 @@ macro(build_prometheus) add_dependencies(prometheus-cpp-core prometheus_ep) endmacro() -if(MILVUS_WITH_PROMETHEUS) +if (MILVUS_WITH_PROMETHEUS) resolve_dependency(Prometheus) @@ -655,7 +667,7 @@ if(MILVUS_WITH_PROMETHEUS) link_directories(SYSTEM ${PROMETHEUS_PREFIX}/core/) include_directories(SYSTEM ${PROMETHEUS_PREFIX}/core/include) -endif() +endif () # ---------------------------------------------------------------------- # SQLite @@ -674,7 +686,7 @@ macro(build_sqlite) "CFLAGS=${EP_C_FLAGS}" "CXXFLAGS=${EP_CXX_FLAGS}") - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(SQLITE_CACHE_PACKAGE_NAME "sqlite_${SQLITE_MD5}.tar.gz") set(SQLITE_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${SQLITE_CACHE_PACKAGE_NAME}") set(SQLITE_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${SQLITE_CACHE_PACKAGE_NAME}") @@ -698,15 +710,15 @@ macro(build_sqlite) "${SQLITE_STATIC_LIB}") ExternalProject_Create_Cache(sqlite_ep ${SQLITE_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/sqlite_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${SQLITE_CACHE_URL}) - else() + else () file(DOWNLOAD ${SQLITE_CACHE_URL} ${SQLITE_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${SQLITE_CACHE_URL} TO ${SQLITE_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(sqlite_ep ${SQLITE_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () externalproject_add(sqlite_ep URL ${SQLITE_SOURCE_URL} @@ -721,7 +733,7 @@ macro(build_sqlite) 1 BUILD_BYPRODUCTS "${SQLITE_STATIC_LIB}") - endif() + endif () file(MAKE_DIRECTORY "${SQLITE_INCLUDE_DIR}") add_library(sqlite STATIC IMPORTED) @@ -733,11 +745,11 @@ macro(build_sqlite) add_dependencies(sqlite sqlite_ep) endmacro() -if(MILVUS_WITH_SQLITE) +if (MILVUS_WITH_SQLITE) resolve_dependency(SQLite) include_directories(SYSTEM "${SQLITE_INCLUDE_DIR}") link_directories(SYSTEM ${SQLITE_PREFIX}/lib/) -endif() +endif () # ---------------------------------------------------------------------- # SQLite_ORM @@ -753,16 +765,16 @@ macro(build_sqlite_orm) file(DOWNLOAD ${SQLITE_ORM_SOURCE_URL} ${SQLITE_ORM_TAR_NAME}) execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${SQLITE_ORM_TAR_NAME} - WORKING_DIRECTORY ${SQLITE_ORM_PREFIX}) + WORKING_DIRECTORY ${SQLITE_ORM_PREFIX}) endif () endmacro() -if(MILVUS_WITH_SQLITE_ORM) +if (MILVUS_WITH_SQLITE_ORM) resolve_dependency(SQLite_ORM) include_directories(SYSTEM "${SQLITE_ORM_INCLUDE_DIR}") -endif() +endif () # ---------------------------------------------------------------------- # yaml-cpp @@ -779,7 +791,7 @@ macro(build_yamlcpp) -DYAML_CPP_BUILD_TESTS=OFF -DYAML_CPP_BUILD_TOOLS=OFF) - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(YAMLCPP_CACHE_PACKAGE_NAME "yaml-cpp_${YAMLCPP_MD5}.tar.gz") set(YAMLCPP_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${YAMLCPP_CACHE_PACKAGE_NAME}") set(YAMLCPP_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${YAMLCPP_CACHE_PACKAGE_NAME}") @@ -800,15 +812,15 @@ macro(build_yamlcpp) ${YAMLCPP_CMAKE_ARGS}) ExternalProject_Create_Cache(yaml-cpp_ep ${YAMLCPP_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/yaml-cpp_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${YAMLCPP_CACHE_URL}) - else() + else () file(DOWNLOAD ${YAMLCPP_CACHE_URL} ${YAMLCPP_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${YAMLCPP_CACHE_URL} TO ${YAMLCPP_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(yaml-cpp_ep ${YAMLCPP_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () externalproject_add(yaml-cpp_ep URL ${YAMLCPP_SOURCE_URL} @@ -820,7 +832,7 @@ macro(build_yamlcpp) "${YAMLCPP_STATIC_LIB}" CMAKE_ARGS ${YAMLCPP_CMAKE_ARGS}) - endif() + endif () file(MAKE_DIRECTORY "${YAMLCPP_INCLUDE_DIR}") add_library(yaml-cpp STATIC IMPORTED) @@ -831,13 +843,13 @@ macro(build_yamlcpp) add_dependencies(yaml-cpp yaml-cpp_ep) endmacro() -if(MILVUS_WITH_YAMLCPP) +if (MILVUS_WITH_YAMLCPP) resolve_dependency(yaml-cpp) get_target_property(YAMLCPP_INCLUDE_DIR yaml-cpp INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${YAMLCPP_PREFIX}/lib/) include_directories(SYSTEM ${YAMLCPP_INCLUDE_DIR}) -endif() +endif () # ---------------------------------------------------------------------- # libunwind @@ -849,7 +861,7 @@ macro(build_libunwind) set(LIBUNWIND_SHARED_LIB "${LIBUNWIND_PREFIX}/lib/libunwind${CMAKE_SHARED_LIBRARY_SUFFIX}") set(LIBUNWIND_CONFIGURE_ARGS "--prefix=${LIBUNWIND_PREFIX}") - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(LIBUNWIND_CACHE_PACKAGE_NAME "libunwind_${LIBUNWIND_MD5}.tar.gz") set(LIBUNWIND_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${LIBUNWIND_CACHE_PACKAGE_NAME}") set(LIBUNWIND_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${LIBUNWIND_CACHE_PACKAGE_NAME}") @@ -874,15 +886,15 @@ macro(build_libunwind) ${LIBUNWIND_SHARED_LIB}) ExternalProject_Create_Cache(libunwind_ep ${LIBUNWIND_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/libunwind_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${LIBUNWIND_CACHE_URL}) - else() + else () file(DOWNLOAD ${LIBUNWIND_CACHE_URL} ${LIBUNWIND_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${LIBUNWIND_CACHE_URL} TO ${LIBUNWIND_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(libunwind_ep ${LIBUNWIND_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () externalproject_add(libunwind_ep URL ${LIBUNWIND_SOURCE_URL} @@ -898,7 +910,7 @@ macro(build_libunwind) ${MAKE} install BUILD_BYPRODUCTS ${LIBUNWIND_SHARED_LIB}) - endif() + endif () file(MAKE_DIRECTORY "${LIBUNWIND_INCLUDE_DIR}") @@ -910,12 +922,12 @@ macro(build_libunwind) add_dependencies(libunwind libunwind_ep) endmacro() -if(MILVUS_WITH_LIBUNWIND) +if (MILVUS_WITH_LIBUNWIND) resolve_dependency(libunwind) get_target_property(LIBUNWIND_INCLUDE_DIR libunwind INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${LIBUNWIND_INCLUDE_DIR}) -endif() +endif () # ---------------------------------------------------------------------- # gperftools @@ -927,7 +939,7 @@ macro(build_gperftools) set(GPERFTOOLS_STATIC_LIB "${GPERFTOOLS_PREFIX}/lib/libprofiler${CMAKE_STATIC_LIBRARY_SUFFIX}") set(GPERFTOOLS_CONFIGURE_ARGS "--prefix=${GPERFTOOLS_PREFIX}") - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(GPERFTOOLS_CACHE_PACKAGE_NAME "gperftools_${GPERFTOOLS_MD5}.tar.gz") set(GPERFTOOLS_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${GPERFTOOLS_CACHE_PACKAGE_NAME}") set(GPERFTOOLS_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${GPERFTOOLS_CACHE_PACKAGE_NAME}") @@ -952,15 +964,15 @@ macro(build_gperftools) ${GPERFTOOLS_STATIC_LIB}) ExternalProject_Create_Cache(gperftools_ep ${GPERFTOOLS_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/gperftools_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${GPERFTOOLS_CACHE_URL}) - else() + else () file(DOWNLOAD ${GPERFTOOLS_CACHE_URL} ${GPERFTOOLS_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${GPERFTOOLS_CACHE_URL} TO ${GPERFTOOLS_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(gperftools_ep ${GPERFTOOLS_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () externalproject_add(gperftools_ep URL ${GPERFTOOLS_SOURCE_URL} @@ -976,7 +988,7 @@ macro(build_gperftools) ${MAKE} install BUILD_BYPRODUCTS ${GPERFTOOLS_STATIC_LIB}) - endif() + endif () ExternalProject_Add_StepDependencies(gperftools_ep build libunwind_ep) @@ -992,13 +1004,13 @@ macro(build_gperftools) add_dependencies(gperftools libunwind_ep) endmacro() -if(MILVUS_WITH_GPERFTOOLS) +if (MILVUS_WITH_GPERFTOOLS) resolve_dependency(gperftools) get_target_property(GPERFTOOLS_INCLUDE_DIR gperftools INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${GPERFTOOLS_INCLUDE_DIR}) link_directories(SYSTEM ${GPERFTOOLS_PREFIX}/lib) -endif() +endif () # ---------------------------------------------------------------------- # GRPC @@ -1014,7 +1026,7 @@ macro(build_grpc) set(GRPC_PROTOBUF_STATIC_LIB "${GRPC_PROTOBUF_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}") set(GRPC_PROTOC_STATIC_LIB "${GRPC_PROTOBUF_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}") - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(GRPC_CACHE_PACKAGE_NAME "grpc_${GRPC_MD5}.tar.gz") set(GRPC_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${GRPC_CACHE_PACKAGE_NAME}") set(GRPC_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${GRPC_CACHE_PACKAGE_NAME}") @@ -1044,15 +1056,15 @@ macro(build_grpc) ExternalProject_Add_StepDependencies(grpc_ep build zlib_ep) ExternalProject_Create_Cache(grpc_ep ${GRPC_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/grpc_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${GRPC_CACHE_URL}) - else() + else () file(DOWNLOAD ${GRPC_CACHE_URL} ${GRPC_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${GRPC_CACHE_URL} TO ${GRPC_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(grpc_ep ${GRPC_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () externalproject_add(grpc_ep URL ${GRPC_SOURCE_URL} @@ -1074,7 +1086,7 @@ macro(build_grpc) ExternalProject_Add_StepDependencies(grpc_ep build zlib_ep) - endif() + endif () file(MAKE_DIRECTORY "${GRPC_INCLUDE_DIR}") @@ -1082,29 +1094,29 @@ macro(build_grpc) set_target_properties(grpc PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "zlib" ) + INTERFACE_LINK_LIBRARIES "zlib") add_library(grpc++ STATIC IMPORTED) set_target_properties(grpc++ PROPERTIES IMPORTED_LOCATION "${GRPC++_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "zlib" ) + INTERFACE_LINK_LIBRARIES "zlib") add_library(grpcpp_channelz STATIC IMPORTED) set_target_properties(grpcpp_channelz PROPERTIES IMPORTED_LOCATION "${GRPCPP_CHANNELZ_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "zlib" ) + INTERFACE_LINK_LIBRARIES "zlib") add_library(grpc_protobuf STATIC IMPORTED) set_target_properties(grpc_protobuf PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOBUF_STATIC_LIB}" - INTERFACE_LINK_LIBRARIES "zlib" ) + INTERFACE_LINK_LIBRARIES "zlib") add_library(grpc_protoc STATIC IMPORTED) set_target_properties(grpc_protoc PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOC_STATIC_LIB}" - INTERFACE_LINK_LIBRARIES "zlib" ) + INTERFACE_LINK_LIBRARIES "zlib") add_dependencies(grpc grpc_ep) add_dependencies(grpc++ grpc_ep) @@ -1113,7 +1125,7 @@ macro(build_grpc) add_dependencies(grpc_protoc grpc_ep) endmacro() -if(MILVUS_WITH_GRPC) +if (MILVUS_WITH_GRPC) resolve_dependency(GRPC) get_target_property(GRPC_INCLUDE_DIR grpc INTERFACE_INCLUDE_DIRECTORIES) @@ -1123,7 +1135,7 @@ if(MILVUS_WITH_GRPC) set(GRPC_THIRD_PARTY_DIR ${CMAKE_CURRENT_BINARY_DIR}/grpc_ep-prefix/src/grpc_ep/third_party) include_directories(SYSTEM ${GRPC_THIRD_PARTY_DIR}/protobuf/src) link_directories(SYSTEM ${GRPC_PROTOBUF_LIB_DIR}) -endif() +endif () # ---------------------------------------------------------------------- # zlib @@ -1137,7 +1149,7 @@ macro(build_zlib) set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}" -DBUILD_SHARED_LIBS=OFF) - if(USE_JFROG_CACHE STREQUAL "ON") + if (USE_JFROG_CACHE STREQUAL "ON") set(ZLIB_CACHE_PACKAGE_NAME "zlib_${ZLIB_MD5}.tar.gz") set(ZLIB_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${ZLIB_CACHE_PACKAGE_NAME}") set(ZLIB_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${ZLIB_CACHE_PACKAGE_NAME}") @@ -1158,15 +1170,15 @@ macro(build_zlib) ${ZLIB_CMAKE_ARGS}) ExternalProject_Create_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${ZLIB_CACHE_URL}) - else() + else () file(DOWNLOAD ${ZLIB_CACHE_URL} ${ZLIB_CACHE_PACKAGE_PATH} STATUS status) list(GET status 0 status_code) message(STATUS "DOWNLOADING FROM ${ZLIB_CACHE_URL} TO ${ZLIB_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") if (status_code EQUAL 0) ExternalProject_Use_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() + endif () + endif () + else () externalproject_add(zlib_ep URL ${ZLIB_SOURCE_URL} @@ -1178,7 +1190,7 @@ macro(build_zlib) "${ZLIB_STATIC_LIB}" CMAKE_ARGS ${ZLIB_CMAKE_ARGS}) - endif() + endif () file(MAKE_DIRECTORY "${ZLIB_INCLUDE_DIR}") add_library(zlib STATIC IMPORTED) @@ -1189,9 +1201,9 @@ macro(build_zlib) add_dependencies(zlib zlib_ep) endmacro() -if(MILVUS_WITH_ZLIB) +if (MILVUS_WITH_ZLIB) resolve_dependency(ZLIB) get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) -endif() +endif () From c99f407099748295356aed5d01a412f1801dabc3 Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Wed, 30 Oct 2019 15:30:28 +0800 Subject: [PATCH 096/149] delete message Former-commit-id: 577b8693eca42bf3537c8691f754dd1876d579a3 --- core/cmake/ThirdPartyPackages.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/core/cmake/ThirdPartyPackages.cmake b/core/cmake/ThirdPartyPackages.cmake index 788c09ef2e..62d7713024 100644 --- a/core/cmake/ThirdPartyPackages.cmake +++ b/core/cmake/ThirdPartyPackages.cmake @@ -139,7 +139,6 @@ if (USE_JFROG_CACHE STREQUAL "ON") endif () if (UBUNTU_FOUND) set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${UBUNTU_VERSION}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") - message(${JFROG_ARTFACTORY_CACHE_URL}) else () set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") endif () From 3dd61fee0608b058015657ba7a5ae45eec418e21 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 30 Oct 2019 17:17:38 +0800 Subject: [PATCH 097/149] #89 re-org faiss_benchmark_test Former-commit-id: 0fb0c3f91e9c864b301f5a9b8d29049e456d99e4 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 579 ++++++++---------- 1 file changed, 243 insertions(+), 336 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 618ee00b28..14e22aa55f 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -28,7 +28,9 @@ #include #include #include +#ifdef CUSTOMIZATION #include +#endif #include #include #include @@ -46,6 +48,19 @@ * https://support.hdfgroup.org/ftp/HDF5/releases/ * and install it to /usr/local/hdf5 . *****************************************************/ +#define DEBUG_VERBOSE 0 + +const std::string HDF5_POSTFIX = ".hdf5"; +const std::string HDF5_DATASET_TRAIN = "train"; +const std::string HDF5_DATASET_TEST = "test"; +const std::string HDF5_DATASET_NEIGHBORS = "neighbors"; +const std::string HDF5_DATASET_DISTANCES = "distances"; + +enum QueryMode { + MODE_CPU = 0, + MODE_MIX, + MODE_GPU +}; double elapsed() { @@ -57,67 +72,45 @@ elapsed() { void normalize(float* arr, size_t nq, size_t dim) { for (size_t i = 0; i < nq; i++) { - double vecLen = 0.0; + double vecLen = 0.0, inv_vecLen = 0.0; for (size_t j = 0; j < dim; j++) { double val = arr[i * dim + j]; vecLen += val * val; } - vecLen = std::sqrt(vecLen); + inv_vecLen = 1.0 / std::sqrt(vecLen); for (size_t j = 0; j < dim; j++) { - arr[i * dim + j] = (float)(arr[i * dim + j] / vecLen); + arr[i * dim + j] = (float)(arr[i * dim + j] * inv_vecLen); } } } void* -hdf5_read(const char* file_name, const char* dataset_name, H5T_class_t dataset_class, size_t& d_out, size_t& n_out) { +hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_class_t dataset_class, + size_t& d_out, size_t& n_out) { hid_t file, dataset, datatype, dataspace, memspace; H5T_class_t t_class; /* data type class */ - H5T_order_t order; /* data order */ - size_t size; /* size of the data element stored in file */ hsize_t dimsm[3]; /* memory space dimensions */ hsize_t dims_out[2]; /* dataset dimensions */ hsize_t count[2]; /* size of the hyperslab in the file */ hsize_t offset[2]; /* hyperslab offset in the file */ hsize_t count_out[3]; /* size of the hyperslab in memory */ hsize_t offset_out[3]; /* hyperslab offset in memory */ - int rank; void* data_out; /* output buffer */ /* Open the file and the dataset. */ - file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); - dataset = H5Dopen2(file, dataset_name, H5P_DEFAULT); + file = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + dataset = H5Dopen2(file, dataset_name.c_str(), H5P_DEFAULT); - /* - * Get datatype and dataspace handles and then query - * dataset class, order, size, rank and dimensions. - */ + /* Get datatype and dataspace handles and then query + * dataset class, order, size, rank and dimensions. */ datatype = H5Dget_type(dataset); /* datatype handle */ t_class = H5Tget_class(datatype); assert(t_class == dataset_class || !"Illegal dataset class type"); - order = H5Tget_order(datatype); - switch (order) { - case H5T_ORDER_LE: - printf("Little endian order \n"); - break; - case H5T_ORDER_BE: - printf("Big endian order \n"); - break; - default: - printf("Illegal endian order \n"); - break; - } - - size = H5Tget_size(datatype); - printf("Data size is %d \n", (int)size); - dataspace = H5Dget_space(dataset); /* dataspace handle */ - rank = H5Sget_simple_extent_ndims(dataspace); H5Sget_simple_extent_dims(dataspace, dims_out, NULL); n_out = dims_out[0]; d_out = dims_out[1]; - printf("rank %d, dimensions %lu x %lu \n", rank, n_out, d_out); /* Define hyperslab in the dataset. */ offset[0] = offset[1] = 0; @@ -209,8 +202,7 @@ GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::i // each result replicates INDEX_ADD_LOOPS times for (int j_c = 0; j_c < ground_k; j_c++) { int r_c = index[i * k + j_c]; - int j_g = 0; - for (; j_g < ground_k / index_add_loops; j_g++) { + for (int j_g = 0; j_g < ground_k / index_add_loops; j_g++) { if (ground_index[i * ground_k + j_g] == r_c) { hit++; continue; @@ -221,106 +213,211 @@ GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::i return hit; } +#if DEBUG_VERBOSE void -test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops, - const std::vector& nprobes, int32_t search_loops) { +print_array(const char* header, bool is_integer, const void* arr, size_t nq, size_t k) { + const int ROW = 10; + const int COL = 10; + assert(ROW <= nq); + assert(COL <= k); + printf("%s\n", header); + printf("==============================================\n"); + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + if (is_integer) { + printf("%7ld ", ((int64_t*)arr)[i * k + j]); + } else { + printf("%.6f ", ((float*)arr)[i * k + j]); + } + } + printf("\n"); + } + printf("\n"); +} +#endif + +void +load_base_data(faiss::Index* &index, const std::string& ann_test_name, const std::string& index_key, + faiss::gpu::StandardGpuResources& res, const faiss::MetricType metric_type, const size_t dim, + int32_t index_add_loops, QueryMode mode = MODE_CPU) { double t0 = elapsed(); - const std::string ann_file_name = ann_test_name + ".hdf5"; + const std::string ann_file_name = ann_test_name + HDF5_POSTFIX; + const int GPU_DEVICE_IDX = 0; - faiss::MetricType metric_type; - size_t dim; - - if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { - printf("Invalid ann test name: %s\n", ann_test_name.c_str()); - return; - } - - faiss::Index* index; - size_t d; + faiss::Index *cpu_index = nullptr, *gpu_index = nullptr; + faiss::distance_compute_blas_threshold = 800; std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); + try { - index = faiss::read_index(index_file_name.c_str()); - d = dim; + printf("[%.3f s] Reading index file: %s\n", elapsed() - t0, index_file_name.c_str()); + cpu_index = faiss::read_index(index_file_name.c_str()); + + if (mode != MODE_CPU) { + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = cpu_index; + index_composition.quantizer = nullptr; + + switch (mode) { + case MODE_CPU: + assert(false); + break; + case MODE_MIX: + index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data + break; + case MODE_GPU: + index_composition.mode = 0; // 0: all data, 1: copy quantizer, 2: copy data + break; + } + + printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0); + gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option); + } } catch (...) { - printf("Cannot read index file: %s\n", index_file_name.c_str()); - - printf("[%.3f s] Loading train set\n", elapsed() - t0); - - size_t nb; - float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + size_t nb, d; + printf("[%.3f s] Loading HDF5 file: %s\n", elapsed() - t0, ann_file_name.c_str()); + float* xb = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TRAIN, H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); if (metric_type == faiss::METRIC_INNER_PRODUCT) { - printf("[%.3f s] Normalizing data set \n", elapsed() - t0); + printf("[%.3f s] Normalizing base data set \n", elapsed() - t0); normalize(xb, nb, d); } - printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + printf("[%.3f s] Creating CPU index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + cpu_index = faiss::index_factory(d, index_key.c_str(), metric_type); - index = faiss::index_factory(d, index_key.c_str(), metric_type); + printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0); + gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, cpu_index); printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); - - index->train(nb, xb); - - printf("[%.3f s] Loading database\n", elapsed() - t0); + gpu_index->train(nb, xb); // add index multiple times to get ~1G data set for (int i = 0; i < index_add_loops; i++) { - printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); - index->add(nb, xb); + printf("[%.3f s] No.%d Indexing database, size %ld*%ld\n", elapsed() - t0, i, nb, d); + gpu_index->add(nb, xb); } - faiss::write_index(index, index_file_name.c_str()); + printf("[%.3f s] Coping GPU index to CPU\n", elapsed() - t0); + delete cpu_index; + cpu_index = faiss::gpu::index_gpu_to_cpu(gpu_index); + + faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); + if (cpu_ivf_index != nullptr) { + cpu_ivf_index->to_readonly(); + } + + printf("[%.3f s] Writing index file: %s\n", elapsed() - t0, index_file_name.c_str()); + faiss::write_index(cpu_index, index_file_name.c_str()); delete[] xb; } - size_t nq; - float* xq; - { - printf("[%.3f s] Loading queries\n", elapsed() - t0); - - size_t d2; - xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); - assert(d == d2 || !"query does not have same dimension as train set"); + switch (mode) { + case MODE_CPU: + case MODE_MIX: + index = cpu_index; + if (gpu_index) { + delete gpu_index; + } + break; + case MODE_GPU: + index = gpu_index; + if (cpu_index) { + delete cpu_index; + } + break; } +} - size_t k; // nb of results per query in the GT - faiss::Index::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors - { - printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); +void +load_query_data(faiss::Index::distance_t* &xq, size_t& nq, const std::string& ann_test_name, + const faiss::MetricType metric_type, const size_t dim) { + double t0 = elapsed(); + size_t d; - // load ground-truth and convert int to long - size_t nq2; - int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); - assert(nq2 == nq || !"incorrect nb of ground truth entries"); + const std::string ann_file_name = ann_test_name + HDF5_POSTFIX; - gt = new faiss::Index::idx_t[k * nq]; - for (int i = 0; i < k * nq; i++) { - gt[i] = gt_int[i]; - } - delete[] gt_int; + xq = (float *) hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, d, nq); + assert(d == dim || !"query does not have same dimension as train set"); + + if (metric_type == faiss::METRIC_INNER_PRODUCT) { + printf("[%.3f s] Normalizing query data \n", elapsed() - t0); + normalize(xq, nq, d); } +} + +void +load_ground_truth(faiss::Index::idx_t* >, size_t& k, const std::string& ann_test_name, const size_t nq) { + const std::string ann_file_name = ann_test_name + HDF5_POSTFIX; + + // load ground-truth and convert int to long + size_t nq2; + int *gt_int = (int *) hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth index"); + + gt = new faiss::Index::idx_t[k * nq]; + for (int i = 0; i < k * nq; i++) { + gt[i] = gt_int[i]; + } + delete[] gt_int; + +#if DEBUG_VERBOSE + faiss::Index::distance_t* gt_dist; // nq * k matrix of ground-truth nearest-neighbors distances + gt_dist = (float*)hdf5_read(ann_file_name, HDF5_DATASET_DISTANCES, H5T_FLOAT, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth distance"); + + std::string str; + str = ann_test_name + " ground truth index"; + print_array(str.c_str(), true, gt, nq, k); + str = ann_test_name + " ground truth distance"; + print_array(str.c_str(), false, gt_dist, nq, k); + + delete gt_dist; +#endif +} + +void +test_with_nprobes(const std::string& ann_test_name, const std::string& index_key, faiss::Index* index, + faiss::gpu::StandardGpuResources& res, const QueryMode query_mode, + const faiss::Index::distance_t *xq, const faiss::Index::idx_t *gt, const std::vector nprobes, + const int32_t index_add_loops, const int32_t search_loops) { + const size_t NQ = 1000, NQ_START = 10, NQ_STEP = 10; + const size_t K = 1000, K_START = 100, K_STEP = 10; + const size_t GK = 100; // topk of ground truth + + std::unordered_map mode_str_map = + {{MODE_CPU, "MODE_CPU"}, {MODE_MIX, "MODE_MIX"}, {MODE_GPU, "MODE_GPU"}}; for (auto nprobe : nprobes) { - faiss::ParameterSpace params; - - std::string nprobe_str = "nprobe=" + std::to_string(nprobe); - params.set_index_parameters(index, nprobe_str.c_str()); + switch (query_mode) { + case MODE_CPU: + case MODE_MIX: { + faiss::ParameterSpace params; + std::string nprobe_str = "nprobe=" + std::to_string(nprobe); + params.set_index_parameters(index, nprobe_str.c_str()); + break; + } + case MODE_GPU: { + faiss::gpu::GpuIndexIVF *gpu_index_ivf = dynamic_cast(index); + gpu_index_ivf->setNumProbes(nprobe); + } + } // output buffers -#if 1 - const size_t NQ = 1000, K = 1000; - faiss::Index::idx_t* I = new faiss::Index::idx_t[NQ * K]; - float* D = new float[NQ * K]; + faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; + faiss::Index::distance_t *D = new faiss::Index::distance_t[NQ * K]; - printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("\n%s | %s - %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), + mode_str_map[query_mode].c_str(), nprobe); printf("======================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + for (size_t t_nq = NQ_START; t_nq <= NQ; t_nq *= NQ_STEP) { // nq = {10, 100, 1000} + for (size_t t_k = K_START; t_k <= K; t_k *= K_STEP) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -330,271 +427,71 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in } t_end = elapsed(); +#if DEBUG_VERBOSE + std::string str; + str = "I (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")"; + print_array(str.c_str(), true, I, t_nq, t_k); + str = "D (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")"; + print_array(str.c_str(), false, D, t_nq, t_k); +#endif + // k = 100 for ground truth - int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + int32_t hit = GetResultHitCount(gt, I, GK, t_k, t_nq, index_add_loops); printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, t_k, (t_end - t_start) / search_loops, faiss::indexIVF_stats.quantization_time / 1000 / search_loops, faiss::indexIVF_stats.search_time / 1000 / search_loops, - (hit / float(t_nq * k / index_add_loops))); + (hit / float(t_nq * GK / index_add_loops))); } } printf("======================================================================================\n"); -#else - printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); - - faiss::Index::idx_t* I = new faiss::Index::idx_t[nq * k]; - float* D = new float[nq * k]; - - index->search(nq, xq, k, D, I); - - printf("[%.3f s] Compute recalls\n", elapsed() - t0); - - // evaluate result by hand. - int n_1 = 0, n_10 = 0, n_100 = 0; - for (int i = 0; i < nq; i++) { - int gt_nn = gt[i * k]; - for (int j = 0; j < k; j++) { - if (I[i * k + j] == gt_nn) { - if (j < 1) - n_1++; - if (j < 10) - n_10++; - if (j < 100) - n_100++; - } - } - } - printf("R@1 = %.4f\n", n_1 / float(nq)); - printf("R@10 = %.4f\n", n_10 / float(nq)); - printf("R@100 = %.4f\n", n_100 / float(nq)); -#endif - - printf("[%.3f s] Search test done\n\n", elapsed() - t0); delete[] I; delete[] D; } - - delete[] xq; - delete[] gt; - delete index; } -#ifdef CUSTOMIZATION void -test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes, - bool pure_gpu_mode, int32_t search_loops) { +test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, const QueryMode query_mode, + int32_t index_add_loops, const std::vector& nprobes, int32_t search_loops) { double t0 = elapsed(); - const std::string ann_file_name = ann_test_name + ".hdf5"; + faiss::gpu::StandardGpuResources res; faiss::MetricType metric_type; size_t dim; + if (query_mode == MODE_MIX && index_key.find("SQ8Hybrid") == std::string::npos) { + printf("Only SQ8Hybrid support MODE_MIX\n"); + return; + } + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { printf("Invalid ann test name: %s\n", ann_test_name.c_str()); return; } - faiss::distance_compute_blas_threshold = 800; - faiss::gpu::StandardGpuResources res; - - const std::string index_key = "IVF16384,SQ8Hybrid"; - - faiss::Index* cpu_index = nullptr; - size_t d; - - std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); - try { - cpu_index = faiss::read_index(index_file_name.c_str()); - d = dim; - } catch (...) { - printf("Cannot read index file: %s\n", index_file_name.c_str()); - - printf("[%.3f s] Loading train set\n", elapsed() - t0); - - size_t nb; - float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); - assert(d == dim || !"dataset does not have correct dimension"); - - printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); - - faiss::Index* ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); - - auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); - - printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); - - device_index->train(nb, xb); - - printf("[%.3f s] Loading database\n", elapsed() - t0); - - for (int i = 0; i < index_add_loops; i++) { - printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); - device_index->add(nb, xb); - } - - cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); - faiss::write_index(cpu_index, index_file_name.c_str()); - - delete[] xb; - } - - faiss::IndexIVF* cpu_ivf_index = dynamic_cast(cpu_index); - if (cpu_ivf_index != nullptr) { - cpu_ivf_index->to_readonly(); - } - - size_t nq; - float* xq; - { - printf("[%.3f s] Loading queries\n", elapsed() - t0); - - size_t d2; - xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); - assert(d == d2 || !"query does not have same dimension as train set"); - } - - size_t k; - faiss::Index::idx_t* gt; - { - printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); - - size_t nq2; - int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); - assert(nq2 == nq || !"incorrect nb of ground truth entries"); - - gt = new faiss::Index::idx_t[k * nq]; - for (uint64_t i = 0; i < k * nq; ++i) { - gt[i] = gt_int[i]; - } - delete[] gt_int; - } - - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - faiss::IndexComposition index_composition; - index_composition.index = cpu_index; - index_composition.quantizer = nullptr; - + size_t nq, k; faiss::Index* index; - double copy_time; + faiss::Index::distance_t* xq; + faiss::Index::idx_t* gt; // ground-truth index - if (!pure_gpu_mode) { - index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data - index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - delete index; + printf("[%.3f s] Loading base data\n", elapsed() - t0); + load_base_data(index, ann_test_name, index_key, res, metric_type, dim, index_add_loops, query_mode); - copy_time = elapsed(); - index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - delete index; - } else { - index_composition.mode = 2; - index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - delete index; + printf("[%.3f s] Loading queries\n", elapsed() - t0); + load_query_data(xq, nq, ann_test_name, metric_type, dim); - copy_time = elapsed(); - index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - } + printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + load_ground_truth(gt, k, ann_test_name, nq); - copy_time = elapsed() - copy_time; - printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); - - const size_t NQ = 1000, K = 1000; - if (!pure_gpu_mode) { - for (auto nprobe : nprobes) { - auto ivf_index = dynamic_cast(cpu_index); - ivf_index->nprobe = nprobe; - - auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); - if (is_gpu_flat_index == nullptr) { - delete ivf_index->quantizer; - ivf_index->quantizer = index_composition.quantizer; - } - - int64_t* I = new faiss::Index::idx_t[NQ * K]; - float* D = new float[NQ * K]; - - printf("\n%s | %s-MIX | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("======================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} - faiss::indexIVF_stats.quantization_time = 0.0; - faiss::indexIVF_stats.search_time = 0.0; - - double t_start = elapsed(), t_end; - for (int32_t i = 0; i < search_loops; i++) { - cpu_index->search(t_nq, xq, t_k, D, I); - } - t_end = elapsed(); - - // k = 100 for ground truth - int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); - - printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, - t_k, (t_end - t_start) / search_loops, - faiss::indexIVF_stats.quantization_time / 1000 / search_loops, - faiss::indexIVF_stats.search_time / 1000 / search_loops, - (hit / float(t_nq * k / index_add_loops))); - } - } - printf("======================================================================================\n"); - - printf("[%.3f s] Search test done\n\n", elapsed() - t0); - - delete[] I; - delete[] D; - } - } else { - std::shared_ptr gpu_index_ivf_ptr = std::shared_ptr(index); - - for (auto nprobe : nprobes) { - faiss::gpu::GpuIndexIVFSQHybrid* gpu_index_ivf_hybrid = - dynamic_cast(gpu_index_ivf_ptr.get()); - gpu_index_ivf_hybrid->setNumProbes(nprobe); - - int64_t* I = new faiss::Index::idx_t[NQ * K]; - float* D = new float[NQ * K]; - - printf("\n%s | %s-GPU | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("======================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} - faiss::indexIVF_stats.quantization_time = 0.0; - faiss::indexIVF_stats.search_time = 0.0; - - double t_start = elapsed(), t_end; - for (int32_t i = 0; i < search_loops; i++) { - gpu_index_ivf_ptr->search(nq, xq, k, D, I); - } - t_end = elapsed(); - - // k = 100 for ground truth - int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); - - printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, - t_k, (t_end - t_start) / search_loops, - faiss::indexIVF_stats.quantization_time / 1000 / search_loops, - faiss::indexIVF_stats.search_time / 1000 / search_loops, - (hit / float(t_nq * k / index_add_loops))); - } - } - printf("======================================================================================\n"); - - printf("[%.3f s] Search test done\n\n", elapsed() - t0); - - delete[] I; - delete[] D; - } - } + test_with_nprobes(ann_test_name, index_key, index, res, query_mode, xq, gt, nprobes, index_add_loops, search_loops); + printf("[%.3f s] Search test done\n\n", elapsed() - t0); delete[] xq; delete[] gt; - delete cpu_index; + delete index; } -#endif /************************************************************************************ * https://github.com/erikbern/ann-benchmarks @@ -616,19 +513,29 @@ TEST(FAISSTEST, BENCHMARK) { const int32_t SIFT_INSERT_LOOPS = 2; // insert twice to get ~1G data set const int32_t GLOVE_INSERT_LOOPS = 1; - test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + #ifdef CUSTOMIZATION - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); - test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +// test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #endif - test_ann_hdf5("glove-200-angular", "IVF4096,Flat", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + #ifdef CUSTOMIZATION - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); - test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +// test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #endif } From 12b8d14a10f31bb80100449779503a8de01a4b6f Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 30 Oct 2019 17:22:30 +0800 Subject: [PATCH 098/149] #89 clang-format Former-commit-id: ddfd7fe63f48b0822d536b1079a129b84ea537cc --- .../faiss_benchmark/faiss_benchmark_test.cpp | 66 +++++++++---------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 14e22aa55f..3c5a396fef 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -50,17 +50,13 @@ *****************************************************/ #define DEBUG_VERBOSE 0 -const std::string HDF5_POSTFIX = ".hdf5"; -const std::string HDF5_DATASET_TRAIN = "train"; -const std::string HDF5_DATASET_TEST = "test"; -const std::string HDF5_DATASET_NEIGHBORS = "neighbors"; -const std::string HDF5_DATASET_DISTANCES = "distances"; +const char HDF5_POSTFIX[] = ".hdf5"; +const char HDF5_DATASET_TRAIN[] = "train"; +const char HDF5_DATASET_TEST[] = "test"; +const char HDF5_DATASET_NEIGHBORS[] = "neighbors"; +const char HDF5_DATASET_DISTANCES[] = "distances"; -enum QueryMode { - MODE_CPU = 0, - MODE_MIX, - MODE_GPU -}; +enum QueryMode { MODE_CPU = 0, MODE_MIX, MODE_GPU }; double elapsed() { @@ -85,8 +81,8 @@ normalize(float* arr, size_t nq, size_t dim) { } void* -hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_class_t dataset_class, - size_t& d_out, size_t& n_out) { +hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_class_t dataset_class, size_t& d_out, + size_t& n_out) { hid_t file, dataset, datatype, dataspace, memspace; H5T_class_t t_class; /* data type class */ hsize_t dimsm[3]; /* memory space dimensions */ @@ -95,7 +91,7 @@ hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_cla hsize_t offset[2]; /* hyperslab offset in the file */ hsize_t count_out[3]; /* size of the hyperslab in memory */ hsize_t offset_out[3]; /* hyperslab offset in memory */ - void* data_out; /* output buffer */ + void* data_out; /* output buffer */ /* Open the file and the dataset. */ file = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); @@ -237,7 +233,7 @@ print_array(const char* header, bool is_integer, const void* arr, size_t nq, siz #endif void -load_base_data(faiss::Index* &index, const std::string& ann_test_name, const std::string& index_key, +load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std::string& index_key, faiss::gpu::StandardGpuResources& res, const faiss::MetricType metric_type, const size_t dim, int32_t index_add_loops, QueryMode mode = MODE_CPU) { double t0 = elapsed(); @@ -307,7 +303,7 @@ load_base_data(faiss::Index* &index, const std::string& ann_test_name, const std delete cpu_index; cpu_index = faiss::gpu::index_gpu_to_cpu(gpu_index); - faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); + faiss::IndexIVF* cpu_ivf_index = dynamic_cast(cpu_index); if (cpu_ivf_index != nullptr) { cpu_ivf_index->to_readonly(); } @@ -336,14 +332,14 @@ load_base_data(faiss::Index* &index, const std::string& ann_test_name, const std } void -load_query_data(faiss::Index::distance_t* &xq, size_t& nq, const std::string& ann_test_name, +load_query_data(faiss::Index::distance_t*& xq, size_t& nq, const std::string& ann_test_name, const faiss::MetricType metric_type, const size_t dim) { double t0 = elapsed(); size_t d; const std::string ann_file_name = ann_test_name + HDF5_POSTFIX; - xq = (float *) hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, d, nq); + xq = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, d, nq); assert(d == dim || !"query does not have same dimension as train set"); if (metric_type == faiss::METRIC_INNER_PRODUCT) { @@ -353,12 +349,12 @@ load_query_data(faiss::Index::distance_t* &xq, size_t& nq, const std::string& an } void -load_ground_truth(faiss::Index::idx_t* >, size_t& k, const std::string& ann_test_name, const size_t nq) { +load_ground_truth(faiss::Index::idx_t*& gt, size_t& k, const std::string& ann_test_name, const size_t nq) { const std::string ann_file_name = ann_test_name + HDF5_POSTFIX; // load ground-truth and convert int to long size_t nq2; - int *gt_int = (int *) hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, k, nq2); + int* gt_int = (int*)hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, k, nq2); assert(nq2 == nq || !"incorrect nb of ground truth index"); gt = new faiss::Index::idx_t[k * nq]; @@ -384,15 +380,15 @@ load_ground_truth(faiss::Index::idx_t* >, size_t& k, const std::string& ann_te void test_with_nprobes(const std::string& ann_test_name, const std::string& index_key, faiss::Index* index, - faiss::gpu::StandardGpuResources& res, const QueryMode query_mode, - const faiss::Index::distance_t *xq, const faiss::Index::idx_t *gt, const std::vector nprobes, - const int32_t index_add_loops, const int32_t search_loops) { + faiss::gpu::StandardGpuResources& res, const QueryMode query_mode, const faiss::Index::distance_t* xq, + const faiss::Index::idx_t* gt, const std::vector nprobes, const int32_t index_add_loops, + const int32_t search_loops) { const size_t NQ = 1000, NQ_START = 10, NQ_STEP = 10; const size_t K = 1000, K_START = 100, K_STEP = 10; const size_t GK = 100; // topk of ground truth - std::unordered_map mode_str_map = - {{MODE_CPU, "MODE_CPU"}, {MODE_MIX, "MODE_MIX"}, {MODE_GPU, "MODE_GPU"}}; + std::unordered_map mode_str_map = { + {MODE_CPU, "MODE_CPU"}, {MODE_MIX, "MODE_MIX"}, {MODE_GPU, "MODE_GPU"}}; for (auto nprobe : nprobes) { switch (query_mode) { @@ -404,20 +400,20 @@ test_with_nprobes(const std::string& ann_test_name, const std::string& index_key break; } case MODE_GPU: { - faiss::gpu::GpuIndexIVF *gpu_index_ivf = dynamic_cast(index); + faiss::gpu::GpuIndexIVF* gpu_index_ivf = dynamic_cast(index); gpu_index_ivf->setNumProbes(nprobe); } } // output buffers - faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; - faiss::Index::distance_t *D = new faiss::Index::distance_t[NQ * K]; + faiss::Index::idx_t* I = new faiss::Index::idx_t[NQ * K]; + faiss::Index::distance_t* D = new faiss::Index::distance_t[NQ * K]; printf("\n%s | %s - %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), - mode_str_map[query_mode].c_str(), nprobe); + mode_str_map[query_mode].c_str(), nprobe); printf("======================================================================================\n"); - for (size_t t_nq = NQ_START; t_nq <= NQ; t_nq *= NQ_STEP) { // nq = {10, 100, 1000} - for (size_t t_k = K_START; t_k <= K; t_k *= K_STEP) { // k = {100, 1000} + for (size_t t_nq = NQ_START; t_nq <= NQ; t_nq *= NQ_STEP) { // nq = {10, 100, 1000} + for (size_t t_k = K_START; t_k <= K; t_k *= K_STEP) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -513,7 +509,7 @@ TEST(FAISSTEST, BENCHMARK) { const int32_t SIFT_INSERT_LOOPS = 2; // insert twice to get ~1G data set const int32_t GLOVE_INSERT_LOOPS = 1; -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); @@ -523,10 +519,11 @@ TEST(FAISSTEST, BENCHMARK) { #ifdef CUSTOMIZATION test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); -// test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +// test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, +// SEARCH_LOOPS); #endif -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); @@ -536,6 +533,7 @@ TEST(FAISSTEST, BENCHMARK) { #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); -// test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +// test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, +// SEARCH_LOOPS); #endif } From a4f0f4bd13844d183f4b1e5c4c7c3f961ebe6ce1 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 30 Oct 2019 17:37:44 +0800 Subject: [PATCH 099/149] get branch name Former-commit-id: eb75eb1a41845c294479a8e15176feac629be9a8 --- core/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index ae4366af2f..df7cd9183b 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -32,7 +32,10 @@ string(REGEX REPLACE "\n" "" BUILD_TIME ${BUILD_TIME}) message(STATUS "Build time = ${BUILD_TIME}") MACRO (GET_GIT_BRANCH_NAME GIT_BRANCH_NAME) - execute_process(COMMAND "git" symbolic-ref --short HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME}) + execute_process(COMMAND "git" rev-parse --abbrev-ref HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME}) + if(GIT_BRANCH_NAME STREQUAL "") + execute_process(COMMAND "git" symbolic-ref --short -q HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME}) + endif() ENDMACRO (GET_GIT_BRANCH_NAME) GET_GIT_BRANCH_NAME(GIT_BRANCH_NAME) From 74f7b359ae3728ac24185a6b8dfc716e74a4d370 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 30 Oct 2019 17:41:59 +0800 Subject: [PATCH 100/149] #89 update Former-commit-id: 924960514c582b1c53f2115861c88e90252b63ac --- .../faiss_benchmark/faiss_benchmark_test.cpp | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 3c5a396fef..5d38a6c1bc 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -448,7 +448,7 @@ test_with_nprobes(const std::string& ann_test_name, const std::string& index_key } void -test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, const QueryMode query_mode, +test_ann_hdf5(const std::string& ann_test_name, const std::string& index_type, const QueryMode query_mode, int32_t index_add_loops, const std::vector& nprobes, int32_t search_loops) { double t0 = elapsed(); @@ -457,11 +457,13 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, co faiss::MetricType metric_type; size_t dim; - if (query_mode == MODE_MIX && index_key.find("SQ8Hybrid") == std::string::npos) { - printf("Only SQ8Hybrid support MODE_MIX\n"); + if (query_mode == MODE_MIX && index_type != "SQ8Hybrid") { + assert(index_type == "SQ8Hybrid" || !"Only SQ8Hybrid support MODE_MIX"); return; } + std::string index_key = "IVF16384," + index_type; + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { printf("Invalid ann test name: %s\n", ann_test_name.c_str()); return; @@ -506,34 +508,34 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, co TEST(FAISSTEST, BENCHMARK) { std::vector param_nprobes = {8, 128}; const int32_t SEARCH_LOOPS = 5; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// const int32_t SIFT_INSERT_LOOPS = 2; // insert twice to get ~1G data set + + test_ann_hdf5("sift-128-euclidean", "Flat", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "Flat", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + + test_ann_hdf5("sift-128-euclidean", "SQ8", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "SQ8", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + +#ifdef CUSTOMIZATION + test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +// test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +#endif + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// const int32_t GLOVE_INSERT_LOOPS = 1; - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "Flat", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "Flat", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "SQ8", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "SQ8", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #ifdef CUSTOMIZATION - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); -// test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, -// SEARCH_LOOPS); -#endif - - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - -#ifdef CUSTOMIZATION - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); -// test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, -// SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); +// test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #endif } From 06ec2a04a4821db039b981b64811bc612a7c7a67 Mon Sep 17 00:00:00 2001 From: Sijie Zhang <36330442+akihoni@users.noreply.github.com> Date: Wed, 30 Oct 2019 18:36:08 +0800 Subject: [PATCH 101/149] Update README_CN.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更换无效链接。 Former-commit-id: 5ac8a003c25e68f1d81c57ce08f91ab20e3b8e24 --- README_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_CN.md b/README_CN.md index aac2a57cc6..4fdb44df40 100644 --- a/README_CN.md +++ b/README_CN.md @@ -140,7 +140,7 @@ $ ./stop_server.sh $ pip install pymilvus==0.2.3 ``` -创建 `example.py` 文件,并向文件中加入 [Python 示例代码](https://github.com/milvus-io/pymilvus/blob/master/examples/AdvancedExample.py)。 +创建 `example.py` 文件,并向文件中加入 [Python 示例代码](https://github.com/milvus-io/pymilvus/blob/master/examples/advanced_example.py)。 运行示例代码 From 7dc552fdea6f488e8e18d97a0ed8053a4948cf31 Mon Sep 17 00:00:00 2001 From: wxyu Date: Wed, 30 Oct 2019 19:57:55 +0800 Subject: [PATCH 102/149] New config opion use_gpu_threshold Former-commit-id: 3d33145d8fa473f2b81cc98022c1bb93853a2fbc --- CHANGELOG.md | 1 + core/conf/server_config.template | 1 + .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 12 ++++- core/src/scheduler/optimizer/LargeSQ8HPass.h | 5 ++- core/src/server/Config.cpp | 45 +++++++++++++++++++ core/src/server/Config.h | 8 ++++ 6 files changed, 70 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b28c7bd3a7..5176ce2b79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#90 - The server start error messages could be improved to enhance user experience - \#104 - test_scheduler core dump - \#115 - Using new structure for tasktable +- \#139 - New config opion use_gpu_threshold ## Improvement - \#64 - Improvement dump function in scheduler diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 3feb16fd63..a23707be5d 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -36,6 +36,7 @@ cache_config: engine_config: use_blas_threshold: 20 # if nq < use_blas_threshold, use SSE, faster with fluctuated response times # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times + use_gpu_threshold: 1000 resource_config: search_resources: # define the GPUs used for search computation, must be in format: gpux diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index 62d0e57902..ddcee414a7 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -21,11 +21,20 @@ #include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" #include "utils/Log.h" namespace milvus { namespace scheduler { +LargeSQ8HPass::LargeSQ8HPass() { + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigUseGpuThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } +} + bool LargeSQ8HPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask) { @@ -40,7 +49,8 @@ LargeSQ8HPass::Run(const TaskPtr& task) { auto search_job = std::static_pointer_cast(search_task->job_.lock()); // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu - if (search_job->nq() < 100) { + + if (search_job->nq() < threshold_) { return false; } diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/LargeSQ8HPass.h index 49e658002f..f293e1db73 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.h @@ -34,11 +34,14 @@ namespace scheduler { class LargeSQ8HPass : public Pass { public: - LargeSQ8HPass() = default; + LargeSQ8HPass(); public: bool Run(const TaskPtr& task) override; + + private: + int32_t threshold_ = std::numeric_limits::max(); }; using LargeSQ8HPassPtr = std::shared_ptr; diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 111cc26f9c..86caf6dd37 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -193,6 +193,12 @@ Config::ValidateConfig() { return s; } + int32_t engine_use_gpu_threshold; + s = GetEngineConfigUseGpuThreshold(engine_use_gpu_threshold); + if (!s.ok()) { + return s; + } + /* resource config */ std::string resource_mode; s = GetResourceConfigMode(resource_mode); @@ -324,6 +330,11 @@ Config::ResetDefaultConfig() { return s; } + s = SetEngineConfigUseGpuThreshold(CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT); + if (!s.ok()) { + return s; + } + /* resource config */ s = SetResourceConfigMode(CONFIG_RESOURCE_MODE_DEFAULT); if (!s.ok()) { @@ -656,6 +667,16 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { return Status::OK(); } +Status +Config::CheckEngineConfigUseGpuThreshold(const std::string& value) { + if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { + std::string msg = "Invalid gpu threshold: " + value + + ". Possible reason: engine_config.use_gpu_threshold is not a positive integer."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + return Status::OK(); +} + Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { @@ -951,6 +972,19 @@ Config::GetEngineConfigOmpThreadNum(int32_t& value) { return Status::OK(); } +Status +Config::GetEngineConfigUseGpuThreshold(int32_t& value) { + std::string str = + GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_USE_GPU_THRESHOLD, CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT); + Status s = CheckEngineConfigUseGpuThreshold(str); + if (!s.ok()) { + return s; + } + + value = std::stoi(str); + return Status::OK(); +} + Status Config::GetResourceConfigMode(std::string& value) { value = GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_MODE, CONFIG_RESOURCE_MODE_DEFAULT); @@ -1203,6 +1237,17 @@ Config::SetEngineConfigOmpThreadNum(const std::string& value) { return Status::OK(); } +Status +Config::SetEngineConfigUseGpuThreshold(const std::string& value) { + Status s = CheckEngineConfigUseGpuThreshold(value); + if (!s.ok()) { + return s; + } + + SetConfigValueInMem(CONFIG_DB, CONFIG_ENGINE_USE_GPU_THRESHOLD, value); + return Status::OK(); +} + /* resource config */ Status Config::SetResourceConfigMode(const std::string& value) { diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 4cab25a1c6..3e7ae0c818 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -84,6 +84,8 @@ static const char* CONFIG_ENGINE_USE_BLAS_THRESHOLD = "use_blas_threshold"; static const char* CONFIG_ENGINE_USE_BLAS_THRESHOLD_DEFAULT = "20"; static const char* CONFIG_ENGINE_OMP_THREAD_NUM = "omp_thread_num"; static const char* CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT = "0"; +static const char* CONFIG_ENGINE_USE_GPU_THRESHOLD = "use_gpu_threshold"; +static const char* CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT = "1000"; /* resource config */ static const char* CONFIG_RESOURCE = "resource_config"; @@ -166,6 +168,8 @@ class Config { CheckEngineConfigUseBlasThreshold(const std::string& value); Status CheckEngineConfigOmpThreadNum(const std::string& value); + Status + CheckEngineConfigUseGpuThreshold(const std::string& value); /* resource config */ Status @@ -230,6 +234,8 @@ class Config { GetEngineConfigUseBlasThreshold(int32_t& value); Status GetEngineConfigOmpThreadNum(int32_t& value); + Status + GetEngineConfigUseGpuThreshold(int32_t& value); /* resource config */ Status @@ -289,6 +295,8 @@ class Config { SetEngineConfigUseBlasThreshold(const std::string& value); Status SetEngineConfigOmpThreadNum(const std::string& value); + Status + SetEngineConfigUseGpuThreshold(const std::string& value); /* resource config */ Status From 87963408bf24e73d1f655c7011f2b97ed8fe34b7 Mon Sep 17 00:00:00 2001 From: wxyu Date: Wed, 30 Oct 2019 20:24:10 +0800 Subject: [PATCH 103/149] fix lint Former-commit-id: 2f5166b4c2a461c379c55eb5a49d431e878aa19a --- core/src/scheduler/optimizer/LargeSQ8HPass.h | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/LargeSQ8HPass.h index f293e1db73..1e00aa4a1b 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include From e3e22cedc5b0665600a399be0a211bc092eaf0c8 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Thu, 31 Oct 2019 10:34:55 +0800 Subject: [PATCH 104/149] delete GIT_CREDENTIALS_ID parameter Former-commit-id: 8308271362483b047c3b406f6cbe0de32734e73d --- ci/jenkins/Jenkinsfile | 19 +++++++++---------- ci/jenkins/{jenkinsfile => step}/build.groovy | 0 .../cleanupSingleDev.groovy | 0 .../{jenkinsfile => step}/coverage.groovy | 0 .../deploySingle2Dev.groovy | 2 +- .../{jenkinsfile => step}/package.groovy | 0 .../publishImages.groovy | 0 .../singleDevNightlyTest.groovy | 2 +- .../singleDevTest.groovy | 2 +- 9 files changed, 12 insertions(+), 13 deletions(-) rename ci/jenkins/{jenkinsfile => step}/build.groovy (100%) rename ci/jenkins/{jenkinsfile => step}/cleanupSingleDev.groovy (100%) rename ci/jenkins/{jenkinsfile => step}/coverage.groovy (100%) rename ci/jenkins/{jenkinsfile => step}/deploySingle2Dev.groovy (57%) rename ci/jenkins/{jenkinsfile => step}/package.groovy (100%) rename ci/jenkins/{jenkinsfile => step}/publishImages.groovy (100%) rename ci/jenkins/{jenkinsfile => step}/singleDevNightlyTest.groovy (79%) rename ci/jenkins/{jenkinsfile => step}/singleDevTest.groovy (81%) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 67bff5ac1e..24f972748a 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -16,7 +16,6 @@ pipeline { parameters{ choice choices: ['Release', 'Debug'], description: '', name: 'BUILD_TYPE' - string defaultValue: 'cf1434e7-5a4b-4d25-82e8-88d667aef9e5', description: 'GIT CREDENTIALS ID', name: 'GIT_CREDENTIALS_ID', trim: true string defaultValue: 'registry.zilliz.com', description: 'DOCKER REGISTRY URL', name: 'DOKCER_REGISTRY_URL', trim: true string defaultValue: 'ba070c98-c8cc-4f7c-b657-897715f359fc', description: 'DOCKER CREDENTIALS ID', name: 'DOCKER_CREDENTIALS_ID', trim: true string defaultValue: 'http://192.168.1.202/artifactory/milvus', description: 'JFROG ARTFACTORY URL', name: 'JFROG_ARTFACTORY_URL', trim: true @@ -56,7 +55,7 @@ pipeline { steps { container('milvus-build-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/build.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/build.groovy" } } } @@ -65,7 +64,7 @@ pipeline { steps { container('milvus-build-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/coverage.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/coverage.groovy" } } } @@ -74,7 +73,7 @@ pipeline { steps { container('milvus-build-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/package.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/package.groovy" } } } @@ -96,7 +95,7 @@ pipeline { steps { container('publish-images'){ script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/publishImages.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy" } } } @@ -118,7 +117,7 @@ pipeline { steps { container('milvus-test-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/deploySingle2Dev.groovy" } } } @@ -130,9 +129,9 @@ pipeline { script { boolean isNightlyTest = isTimeTriggeredBuild() if (isNightlyTest) { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/singleDevNightlyTest.groovy" } else { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/singleDevTest.groovy" } } } @@ -143,7 +142,7 @@ pipeline { steps { container('milvus-test-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy" } } } @@ -153,7 +152,7 @@ pipeline { unsuccessful { container('milvus-test-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" + load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy" } } } diff --git a/ci/jenkins/jenkinsfile/build.groovy b/ci/jenkins/step/build.groovy similarity index 100% rename from ci/jenkins/jenkinsfile/build.groovy rename to ci/jenkins/step/build.groovy diff --git a/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy b/ci/jenkins/step/cleanupSingleDev.groovy similarity index 100% rename from ci/jenkins/jenkinsfile/cleanupSingleDev.groovy rename to ci/jenkins/step/cleanupSingleDev.groovy diff --git a/ci/jenkins/jenkinsfile/coverage.groovy b/ci/jenkins/step/coverage.groovy similarity index 100% rename from ci/jenkins/jenkinsfile/coverage.groovy rename to ci/jenkins/step/coverage.groovy diff --git a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy b/ci/jenkins/step/deploySingle2Dev.groovy similarity index 57% rename from ci/jenkins/jenkinsfile/deploySingle2Dev.groovy rename to ci/jenkins/step/deploySingle2Dev.groovy index bc6c6f4438..f4964df5e2 100644 --- a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy +++ b/ci/jenkins/step/deploySingle2Dev.groovy @@ -1,7 +1,7 @@ sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts' sh 'helm repo update' dir ('milvus-helm') { - checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) dir ("milvus-gpu") { sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/sqlite_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } diff --git a/ci/jenkins/jenkinsfile/package.groovy b/ci/jenkins/step/package.groovy similarity index 100% rename from ci/jenkins/jenkinsfile/package.groovy rename to ci/jenkins/step/package.groovy diff --git a/ci/jenkins/jenkinsfile/publishImages.groovy b/ci/jenkins/step/publishImages.groovy similarity index 100% rename from ci/jenkins/jenkinsfile/publishImages.groovy rename to ci/jenkins/step/publishImages.groovy diff --git a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy b/ci/jenkins/step/singleDevNightlyTest.groovy similarity index 79% rename from ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy rename to ci/jenkins/step/singleDevNightlyTest.groovy index 5140ad858f..9aeab2eb4e 100644 --- a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy +++ b/ci/jenkins/step/singleDevNightlyTest.groovy @@ -8,7 +8,7 @@ timeout(time: 90, unit: 'MINUTES') { if (!fileExists('milvus-helm')) { dir ("milvus-helm") { - checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) } } dir ("milvus-helm") { diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/step/singleDevTest.groovy similarity index 81% rename from ci/jenkins/jenkinsfile/singleDevTest.groovy rename to ci/jenkins/step/singleDevTest.groovy index 16fe65a9b3..86e6f126d9 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/step/singleDevTest.groovy @@ -10,7 +10,7 @@ timeout(time: 60, unit: 'MINUTES') { // if (!fileExists('milvus-helm')) { // dir ("milvus-helm") { - // checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + // checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) // } // } // dir ("milvus-helm") { From eeb503f7c548a5d5c373ea08debc0cd2f5bf4339 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Thu, 31 Oct 2019 14:07:23 +0800 Subject: [PATCH 105/149] Update faiss link Former-commit-id: 22febba9b19f8de8422eae922ec75d7106e37e7a --- core/src/index/cmake/ThirdPartyPackagesCore.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index e8a5c8a995..c066f7b392 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -245,7 +245,8 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1 # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 # set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1 - set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 + # set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 + set(FAISS_MD5 "bb30722c22390ce5f6759ccb216c1b2a") # commit-id d324db297475286afe107847c7fb7a0f9dc7e90e branch-0.3.0 endif() else() set(FAISS_SOURCE_URL "https://github.com/milvus-io/faiss/archive/1.6.0.tar.gz") From 7d07e51072a056ed68e91984df68c396f4fee87c Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Thu, 31 Oct 2019 14:19:58 +0800 Subject: [PATCH 106/149] [skip ci] Correct a link issue Former-commit-id: da7d00fb1e64047c699bf35d6fa10e6e552a4265 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 884ddb01ca..4faab3c111 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ Install Milvus Python SDK. $ pip install pymilvus==0.2.3 ``` -Create a new file `example.py`, and add [Python example code](https://github.com/milvus-io/pymilvus/blob/master/examples/AdvancedExample.py) to it. +Create a new file `example.py`, and add [Python example code](https://github.com/milvus-io/pymilvus/blob/master/examples/advancedexample.py) to it. Run the example code. From eec29282b434a88af3aabcb3348ccd69db51e0d7 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Thu, 31 Oct 2019 14:21:38 +0800 Subject: [PATCH 107/149] [skip ci] Update Python example code link Former-commit-id: 369a101beda4041a537b7cdb391f60f1ef237d7a --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4faab3c111..31cc2f684f 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ Install Milvus Python SDK. $ pip install pymilvus==0.2.3 ``` -Create a new file `example.py`, and add [Python example code](https://github.com/milvus-io/pymilvus/blob/master/examples/advancedexample.py) to it. +Create a new file `example.py`, and add [Python example code](https://github.com/milvus-io/pymilvus/blob/master/examples/advanced_example.py) to it. Run the example code. From 892bd4daacd24d5a04e6f2e9e9f9ed061464b73d Mon Sep 17 00:00:00 2001 From: wxyu Date: Thu, 31 Oct 2019 14:30:17 +0800 Subject: [PATCH 108/149] Improve large query optimizer pass Former-commit-id: 1f0c283ec4dc5560c14d4e17d76719d38dfc2280 --- CHANGELOG.md | 1 + core/src/db/engine/ExecutionEngineImpl.cpp | 2 +- .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 22 ++++++++++--------- core/src/scheduler/optimizer/LargeSQ8HPass.h | 1 + 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5176ce2b79..a0381852bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss - \#122 - Add unique id for Job - \#130 - Set task state MOVED after resource copy it completed +- \#149 - Improve large query optimizer pass ## Task diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 66e9795ff3..b6db2616cb 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -258,7 +258,7 @@ Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { const std::string key = location_ + ".quantizer"; - std::vector gpus = scheduler::get_gpu_pool(); + std::vector gpus{device_id}; const int64_t NOT_FOUND = -1; int64_t device_id = NOT_FOUND; diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index ddcee414a7..cacedd6208 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -55,16 +55,18 @@ LargeSQ8HPass::Run(const TaskPtr& task) { } std::vector gpus = scheduler::get_gpu_pool(); - std::vector all_free_mem; - for (auto& gpu : gpus) { - auto cache = cache::GpuCacheMgr::GetInstance(gpu); - auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); - all_free_mem.push_back(free_mem); - } - - auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); - auto best_index = std::distance(all_free_mem.begin(), max_e); - auto best_device_id = gpus[best_index]; + // std::vector all_free_mem; + // for (auto& gpu : gpus) { + // auto cache = cache::GpuCacheMgr::GetInstance(gpu); + // auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); + // all_free_mem.push_back(free_mem); + // } + // + // auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); + // auto best_index = std::distance(all_free_mem.begin(), max_e); + // auto best_device_id = gpus[best_index]; + auto best_device_id = count_ % gpus.size(); + count_++; ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); if (not res_ptr) { diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/LargeSQ8HPass.h index 1e00aa4a1b..9d135d413a 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.h @@ -43,6 +43,7 @@ class LargeSQ8HPass : public Pass { private: int32_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; }; using LargeSQ8HPassPtr = std::shared_ptr; From 28ac93a22653b44bccfaad9b2ce50ff2d3d6f12c Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Thu, 31 Oct 2019 14:30:55 +0800 Subject: [PATCH 109/149] make sure to put #!/usr/bin/env groovy at the top of the Jenkinsfile Former-commit-id: 5af0774e12ea9d10f3fe10b77336df22d748c7de --- ci/jenkins/Jenkinsfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 24f972748a..40d9686415 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,3 +1,5 @@ +#!/usr/bin/env groovy + String cron_timezone = "TZ=Asia/Shanghai" String cron_string = BRANCH_NAME == "master" ? "H 0 * * * " : "" cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * * " : cron_string From 23996fc0ebcb289c131c9d80a6f9e3ab65182cb5 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Thu, 31 Oct 2019 14:37:41 +0800 Subject: [PATCH 110/149] [skip ci] Correct EASYLOGGINGPP link Former-commit-id: 100df31ff199591596e6909ab43cd3edfcf22bc2 --- NOTICE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NOTICE.md b/NOTICE.md index 40ea77d3b7..018e2b80e0 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -25,6 +25,6 @@ | libunwind | [MIT](https://github.com/libunwind/libunwind/blob/master/LICENSE) | | gperftools | [BSD 3-Clause](https://github.com/gperftools/gperftools/blob/master/COPYING) | | grpc | [Apache 2.0](https://github.com/grpc/grpc/blob/master/LICENSE) | -| EASYLOGGINGPP | [MIT](https://github.com/zuhd-org/easyloggingpp/blob/master/LICENSEhttps://github.com/zuhd-org/easyloggingpp/blob/master/LICENSE) | +| EASYLOGGINGPP | [MIT](https://github.com/zuhd-org/easyloggingpp/blob/master/LICENSE) | | Json | [MIT](https://github.com/nlohmann/json/blob/develop/LICENSE.MIT) | From 49105e44f329242f9bf175ec369c10d8db78cbcd Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Thu, 31 Oct 2019 14:39:02 +0800 Subject: [PATCH 111/149] Add only GPU and only CPU version for IVF_SQ8 and IVF_FLAT Former-commit-id: 14f5b095df2988d5f65ecd1cb2628fd3a3791a14 --- CHANGELOG.md | 1 + core/src/scheduler/SchedInst.h | 4 ++ core/src/scheduler/optimizer/OnlyCPUPass.cpp | 48 ++++++++++++++ core/src/scheduler/optimizer/OnlyCPUPass.h | 47 ++++++++++++++ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 66 ++++++++++++++++++++ core/src/scheduler/optimizer/OnlyGPUPass.h | 50 +++++++++++++++ 6 files changed, 216 insertions(+) create mode 100644 core/src/scheduler/optimizer/OnlyCPUPass.cpp create mode 100644 core/src/scheduler/optimizer/OnlyCPUPass.h create mode 100644 core/src/scheduler/optimizer/OnlyGPUPass.cpp create mode 100644 core/src/scheduler/optimizer/OnlyGPUPass.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 5176ce2b79..f277ec2a59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#104 - test_scheduler core dump - \#115 - Using new structure for tasktable - \#139 - New config opion use_gpu_threshold +- \#146 - Add only GPU and only CPU version for IVF_SQ8 and IVF_FLAT ## Improvement - \#64 - Improvement dump function in scheduler diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index b9153d3bc3..e758f37851 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -23,6 +23,8 @@ #include "Scheduler.h" #include "optimizer/HybridPass.h" #include "optimizer/LargeSQ8HPass.h" +#include "optimizer/OnlyCPUPass.h" +#include "optimizer/OnlyGPUPass.h" #include "optimizer/Optimizer.h" #include @@ -96,6 +98,8 @@ class OptimizerInst { std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); } } diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.cpp b/core/src/scheduler/optimizer/OnlyCPUPass.cpp new file mode 100644 index 0000000000..d974887bb7 --- /dev/null +++ b/core/src/scheduler/optimizer/OnlyCPUPass.cpp @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/OnlyCPUPass.h" +#include "scheduler/SchedInst.h" +#include "scheduler/task/SearchTask.h" +#include "scheduler/tasklabel/SpecResLabel.h" +#include "scheduler/Utils.h" + +namespace milvus { +namespace scheduler { + +bool +OnlyCPUPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) + return false; + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFSQ8 && + search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFFLAT) { + return false; + } + + auto gpu_id = get_gpu_pool(); + if (gpu_id.empty()) { + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + return true; + } + return false; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.h b/core/src/scheduler/optimizer/OnlyCPUPass.h new file mode 100644 index 0000000000..edc3848ed4 --- /dev/null +++ b/core/src/scheduler/optimizer/OnlyCPUPass.h @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class OnlyCPUPass : public Pass { +public: + OnlyCPUPass() = default; + +public: + bool + Run(const TaskPtr &task) override; +}; + +using OnlyCPUPassPtr = std::shared_ptr; + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp new file mode 100644 index 0000000000..20127aefb8 --- /dev/null +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/OnlyGPUPass.h" +#include "scheduler/SchedInst.h" +#include "scheduler/task/SearchTask.h" +#include "scheduler/tasklabel/SpecResLabel.h" +#include "scheduler/Utils.h" +#include "server/Config.h" + +namespace milvus { +namespace scheduler { + +bool +OnlyGPUPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) + return false; + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFSQ8 && + search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFFLAT) { + return false; + } + + server::Config& config = server::Config::GetInstance(); + std::vector search_resources; + config.GetResourceConfigSearchResources(search_resources); + for (auto &resource : search_resources) { + if (resource == "cpu") { + return false; + } + } + + auto gpu_id = get_gpu_pool(); + if (!gpu_id.empty()) { + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, gpu_id[specified_gpu_id_]); + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + } else { + return false; + } + + if (specified_gpu_id_ < gpu_id.size() - 1) { + ++specified_gpu_id_; + } else { + specified_gpu_id_ = 0; + } + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.h b/core/src/scheduler/optimizer/OnlyGPUPass.h new file mode 100644 index 0000000000..9220bab11d --- /dev/null +++ b/core/src/scheduler/optimizer/OnlyGPUPass.h @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class OnlyGPUPass : public Pass { +public: + OnlyGPUPass() = default; + +public: + bool + Run(const TaskPtr &task) override; + +private: + uint64_t specified_gpu_id_ = 0; +}; + +using OnlyGPUPassPtr = std::shared_ptr; + +} // namespace scheduler +} // namespace milvus From b9b1d0deb92fc79c0d9fc91b2d36e79d066e8b6d Mon Sep 17 00:00:00 2001 From: Sijie Zhang <36330442+akihoni@users.noreply.github.com> Date: Thu, 31 Oct 2019 14:40:56 +0800 Subject: [PATCH 112/149] Update README_CN.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更换产品架构图地址 Former-commit-id: 4e1fcae26649370dad7745ffb5ba0dda2d19de03 --- README_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_CN.md b/README_CN.md index 4fdb44df40..5dad64af9b 100644 --- a/README_CN.md +++ b/README_CN.md @@ -54,7 +54,7 @@ Milvus 提供稳定的 Python、Java 以及 C++ 的 API 接口。 ## 整体架构 -![](https://raw.githubusercontent.com/yamasite/docs/master/assets/milvus_arch.png) +![Milvus_arch](https://github.com/milvus-io/docs/blob/master/assets/milvus_arch.png) ## 开始使用 Milvus From a68255f9d005a53bf0769a3bb3610f6c85322173 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Thu, 31 Oct 2019 14:44:59 +0800 Subject: [PATCH 113/149] clang format Former-commit-id: 3f99d7df6b042683696dd70c9dc0f14fee57b388 --- core/src/scheduler/optimizer/OnlyCPUPass.cpp | 6 +++--- core/src/scheduler/optimizer/OnlyCPUPass.h | 8 ++++---- core/src/scheduler/optimizer/OnlyGPUPass.cpp | 8 ++++---- core/src/scheduler/optimizer/OnlyGPUPass.h | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.cpp b/core/src/scheduler/optimizer/OnlyCPUPass.cpp index d974887bb7..2651a6e1a5 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyCPUPass.cpp @@ -17,9 +17,9 @@ #include "scheduler/optimizer/OnlyCPUPass.h" #include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" -#include "scheduler/Utils.h" namespace milvus { namespace scheduler { @@ -29,8 +29,8 @@ OnlyCPUPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask) return false; auto search_task = std::static_pointer_cast(task); - if (search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFSQ8 && - search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFFLAT) { + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8 && + search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) { return false; } diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.h b/core/src/scheduler/optimizer/OnlyCPUPass.h index edc3848ed4..76b42e3766 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.h +++ b/core/src/scheduler/optimizer/OnlyCPUPass.h @@ -32,13 +32,13 @@ namespace milvus { namespace scheduler { -class OnlyCPUPass : public Pass { -public: +class OnlyCPUPass : public Pass { + public: OnlyCPUPass() = default; -public: + public: bool - Run(const TaskPtr &task) override; + Run(const TaskPtr& task) override; }; using OnlyCPUPassPtr = std::shared_ptr; diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 20127aefb8..f39ca1a042 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -17,9 +17,9 @@ #include "scheduler/optimizer/OnlyGPUPass.h" #include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" -#include "scheduler/Utils.h" #include "server/Config.h" namespace milvus { @@ -31,15 +31,15 @@ OnlyGPUPass::Run(const TaskPtr& task) { return false; auto search_task = std::static_pointer_cast(task); - if (search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFSQ8 && - search_task->file_->engine_type_ != (int) engine::EngineType::FAISS_IVFFLAT) { + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8 && + search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) { return false; } server::Config& config = server::Config::GetInstance(); std::vector search_resources; config.GetResourceConfigSearchResources(search_resources); - for (auto &resource : search_resources) { + for (auto& resource : search_resources) { if (resource == "cpu") { return false; } diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.h b/core/src/scheduler/optimizer/OnlyGPUPass.h index 9220bab11d..75a5f9e4f1 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.h +++ b/core/src/scheduler/optimizer/OnlyGPUPass.h @@ -32,15 +32,15 @@ namespace milvus { namespace scheduler { -class OnlyGPUPass : public Pass { -public: +class OnlyGPUPass : public Pass { + public: OnlyGPUPass() = default; -public: + public: bool - Run(const TaskPtr &task) override; + Run(const TaskPtr& task) override; -private: + private: uint64_t specified_gpu_id_ = 0; }; From c8da2c3d64c418f4c558ffd51f291a688a79dcb5 Mon Sep 17 00:00:00 2001 From: Heisenberg Date: Thu, 31 Oct 2019 14:46:13 +0800 Subject: [PATCH 114/149] add mix mode for faiss baseline make baseline compatible for different faiss version Former-commit-id: 90f96b4d437136539e7aa805f58c49e2d8a23ed1 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 146 +++++++++++------- 1 file changed, 88 insertions(+), 58 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5d38a6c1bc..780e620e6a 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -17,31 +17,40 @@ #include +#include +#include +#include +#include #include #include #include +#include + +#define USE_FAISS_V1_5_3 0 + +#if USE_FAISS_V1_5_3 +#include +#include +#include #include #include +#else +#include +#include +#include +#endif + #include #include #include -#include #include +#include +#include + #ifdef CUSTOMIZATION #include #endif -#include -#include -#include - -#include - -#include -#include -#include -#include -#include /***************************************************** * To run this test, please download the HDF5 from @@ -56,6 +65,8 @@ const char HDF5_DATASET_TEST[] = "test"; const char HDF5_DATASET_NEIGHBORS[] = "neighbors"; const char HDF5_DATASET_DISTANCES[] = "distances"; +const int32_t GPU_DEVICE_IDX = 0; + enum QueryMode { MODE_CPU = 0, MODE_MIX, MODE_GPU }; double @@ -196,7 +207,7 @@ GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::i for (int i = 0; i < nq; i++) { // count the num of results exist in ground truth result set // each result replicates INDEX_ADD_LOOPS times - for (int j_c = 0; j_c < ground_k; j_c++) { + for (int j_c = 0; j_c < k; j_c++) { int r_c = index[i * k + j_c]; for (int j_g = 0; j_g < ground_k / index_add_loops; j_g++) { if (ground_index[i * ground_k + j_g] == r_c) { @@ -239,7 +250,6 @@ load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std double t0 = elapsed(); const std::string ann_file_name = ann_test_name + HDF5_POSTFIX; - const int GPU_DEVICE_IDX = 0; faiss::Index *cpu_index = nullptr, *gpu_index = nullptr; faiss::distance_compute_blas_threshold = 800; @@ -249,30 +259,6 @@ load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std try { printf("[%.3f s] Reading index file: %s\n", elapsed() - t0, index_file_name.c_str()); cpu_index = faiss::read_index(index_file_name.c_str()); - - if (mode != MODE_CPU) { - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - faiss::IndexComposition index_composition; - index_composition.index = cpu_index; - index_composition.quantizer = nullptr; - - switch (mode) { - case MODE_CPU: - assert(false); - break; - case MODE_MIX: - index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data - break; - case MODE_GPU: - index_composition.mode = 0; // 0: all data, 1: copy quantizer, 2: copy data - break; - } - - printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0); - gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option); - } } catch (...) { size_t nb, d; printf("[%.3f s] Loading HDF5 file: %s\n", elapsed() - t0, ann_file_name.c_str()); @@ -289,6 +275,7 @@ load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0); gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, cpu_index); + delete cpu_index; printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); gpu_index->train(nb, xb); @@ -300,8 +287,9 @@ load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std } printf("[%.3f s] Coping GPU index to CPU\n", elapsed() - t0); - delete cpu_index; + cpu_index = faiss::gpu::index_gpu_to_cpu(gpu_index); + delete gpu_index; faiss::IndexIVF* cpu_ivf_index = dynamic_cast(cpu_index); if (cpu_ivf_index != nullptr) { @@ -314,21 +302,7 @@ load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std delete[] xb; } - switch (mode) { - case MODE_CPU: - case MODE_MIX: - index = cpu_index; - if (gpu_index) { - delete gpu_index; - } - break; - case MODE_GPU: - index = gpu_index; - if (cpu_index) { - delete cpu_index; - } - break; - } + index = cpu_index; } void @@ -379,10 +353,12 @@ load_ground_truth(faiss::Index::idx_t*& gt, size_t& k, const std::string& ann_te } void -test_with_nprobes(const std::string& ann_test_name, const std::string& index_key, faiss::Index* index, +test_with_nprobes(const std::string& ann_test_name, const std::string& index_key, faiss::Index* cpu_index, faiss::gpu::StandardGpuResources& res, const QueryMode query_mode, const faiss::Index::distance_t* xq, const faiss::Index::idx_t* gt, const std::vector nprobes, const int32_t index_add_loops, const int32_t search_loops) { + double t0 = elapsed(); + const size_t NQ = 1000, NQ_START = 10, NQ_STEP = 10; const size_t K = 1000, K_START = 100, K_STEP = 10; const size_t GK = 100; // topk of ground truth @@ -390,6 +366,59 @@ test_with_nprobes(const std::string& ann_test_name, const std::string& index_key std::unordered_map mode_str_map = { {MODE_CPU, "MODE_CPU"}, {MODE_MIX, "MODE_MIX"}, {MODE_GPU, "MODE_GPU"}}; + faiss::Index *gpu_index, *index; + if (query_mode != MODE_CPU) { + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = cpu_index; + index_composition.quantizer = nullptr; + + double copy_time; + switch (query_mode) { + case MODE_MIX: { + index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data + + // warm up the transmission + gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option); + delete gpu_index; + + copy_time = elapsed(); + gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option); + delete gpu_index; + copy_time = elapsed() - copy_time; + printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); + + auto ivf_index = dynamic_cast(cpu_index); + auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); + if (is_gpu_flat_index == nullptr) { + delete ivf_index->quantizer; + ivf_index->quantizer = index_composition.quantizer; + } + index = cpu_index; + break; + } + case MODE_GPU: + index_composition.mode = 0; // 0: all data, 1: copy quantizer, 2: copy data + + // warm up the transmission + gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option); + delete gpu_index; + + copy_time = elapsed(); + gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option); + copy_time = elapsed() - copy_time; + printf("[%.3f s] Copy data completed, cost %f s\n", elapsed() - t0, copy_time); + + delete cpu_index; + index = gpu_index; + break; + } + } else { + index = cpu_index; + } + for (auto nprobe : nprobes) { switch (query_mode) { case MODE_CPU: @@ -445,6 +474,8 @@ test_with_nprobes(const std::string& ann_test_name, const std::string& index_key delete[] I; delete[] D; } + + delete index; } void @@ -488,7 +519,6 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_type, c delete[] xq; delete[] gt; - delete index; } /************************************************************************************ @@ -520,8 +550,8 @@ TEST(FAISSTEST, BENCHMARK) { #ifdef CUSTOMIZATION test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); -// test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #endif /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -535,7 +565,7 @@ TEST(FAISSTEST, BENCHMARK) { #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); -// test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #endif } From 530b2a0f280306348b1437a15aa71028b4436088 Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Thu, 31 Oct 2019 14:57:09 +0800 Subject: [PATCH 115/149] fix cmake error: STRING sub-command REGEX, mode REPLACE needs at least 6 arguments total to command Former-commit-id: 773f938125ed03582fc8242af0a23076677bf848 --- core/cmake/ThirdPartyPackages.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/cmake/ThirdPartyPackages.cmake b/core/cmake/ThirdPartyPackages.cmake index 62d7713024..657efa2eef 100644 --- a/core/cmake/ThirdPartyPackages.cmake +++ b/core/cmake/ThirdPartyPackages.cmake @@ -114,7 +114,7 @@ if (UNIX) OUTPUT_VARIABLE LSB_RELEASE_ID_SHORT OUTPUT_STRIP_TRAILING_WHITESPACE ) - STRING(REGEX REPLACE "\\." "_" UBUNTU_VERSION ${LSB_RELEASE_ID_SHORT}) + STRING(REGEX REPLACE "\\." "_" UBUNTU_VERSION "${LSB_RELEASE_ID_SHORT}") endif (UBUNTU_FOUND) endif (UBUNTU_EXTRA) endif (DEBIAN_FOUND) From c219fe0c1faaf5c75c3d4c54a2ba9ddc5c3c8651 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Thu, 31 Oct 2019 15:02:19 +0800 Subject: [PATCH 116/149] [skip ci] Update Contributing code Former-commit-id: a96d53012ffe755b1b3ef5cc7ac55ceec96e2df5 --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 475387c319..1ff37372db 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,7 +28,7 @@ Contributions to Milvus fall into the following categories. If you have improvements to Milvus, send us your pull requests! For those just getting started, see [GitHub workflow](#github-workflow). -The Milvus team members will review your pull requests, and once it is accepted, it will be given a `ready to merge` label. This means we are working on submitting your pull request to the internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub. +The Milvus team members will review your pull requests, and once it is accepted, the status of the projects to which it is associated will be changed to **Reviewer approved**. This means we are working on submitting your pull request to the internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub. ### GitHub workflow From 8d5b940b1d1ce29405c5e52d33d74b71387c3d62 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Thu, 31 Oct 2019 16:37:46 +0800 Subject: [PATCH 117/149] update milvus ubuntu18.04 build enviroment images version to 0.5.1 Former-commit-id: 30aff373647040a72d5923e3087b9b5c44e5bb9c --- ci/jenkins/pod/milvus-build-env-pod.yaml | 2 +- docker/build_env/ubuntu16.04/Dockerfile | 2 +- docker/build_env/ubuntu18.04/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/jenkins/pod/milvus-build-env-pod.yaml b/ci/jenkins/pod/milvus-build-env-pod.yaml index bb4499711f..da938d8ba2 100644 --- a/ci/jenkins/pod/milvus-build-env-pod.yaml +++ b/ci/jenkins/pod/milvus-build-env-pod.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: milvus-build-env - image: registry.zilliz.com/milvus/milvus-build-env:v0.5.0-ubuntu18.04 + image: registry.zilliz.com/milvus/milvus-build-env:v0.5.1-ubuntu18.04 env: - name: POD_IP valueFrom: diff --git a/docker/build_env/ubuntu16.04/Dockerfile b/docker/build_env/ubuntu16.04/Dockerfile index a0ccecce5f..7b1943f578 100644 --- a/docker/build_env/ubuntu16.04/Dockerfile +++ b/docker/build_env/ubuntu16.04/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends wget && \ apt-key add /tmp/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && \ apt-get update && apt-get install -y --no-install-recommends \ - git flex bison gfortran \ + git flex bison gfortran lsb-core \ curl libtool automake libboost1.58-all-dev libssl-dev pkg-config libcurl4-openssl-dev \ clang-format-6.0 clang-tidy-6.0 \ lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.4-243 intel-mkl-core-2019.4-243 && \ diff --git a/docker/build_env/ubuntu18.04/Dockerfile b/docker/build_env/ubuntu18.04/Dockerfile index e7c528f48e..60d881428b 100644 --- a/docker/build_env/ubuntu18.04/Dockerfile +++ b/docker/build_env/ubuntu18.04/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends wget && \ apt-key add /tmp/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && \ apt-get update && apt-get install -y --no-install-recommends \ - git flex bison gfortran \ + git flex bison gfortran lsb-core \ curl libtool automake libboost-all-dev libssl-dev pkg-config libcurl4-openssl-dev \ clang-format-6.0 clang-tidy-6.0 \ lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.4-243 intel-mkl-core-2019.4-243 && \ From 1dc7b321a45a178813350ea7c87d2d0ce57103c8 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Thu, 31 Oct 2019 16:41:48 +0800 Subject: [PATCH 118/149] Read config once in optimizer Former-commit-id: 8fee2a3cfa20cdc8b51e86867b7b3eb71dd2e759 --- core/src/scheduler/SchedInst.h | 14 ++++++++- core/src/scheduler/optimizer/OnlyCPUPass.cpp | 14 ++++----- core/src/scheduler/optimizer/OnlyGPUPass.cpp | 32 ++++++-------------- core/src/scheduler/optimizer/OnlyGPUPass.h | 3 +- 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index e758f37851..a3048069f9 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -26,9 +26,11 @@ #include "optimizer/OnlyCPUPass.h" #include "optimizer/OnlyGPUPass.h" #include "optimizer/Optimizer.h" +#include "server/Config.h" #include #include +#include #include namespace milvus { @@ -95,11 +97,21 @@ class OptimizerInst { if (instance == nullptr) { std::lock_guard lock(mutex_); if (instance == nullptr) { + server::Config& config = server::Config::GetInstance(); + std::vector search_resources; + bool has_cpu = false; + config.GetResourceConfigSearchResources(search_resources); + for (auto& resource : search_resources) { + if (resource == "cpu") { + has_cpu = true; + } + } + std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); - pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared(has_cpu)); instance = std::make_shared(pass_list); } } diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.cpp b/core/src/scheduler/optimizer/OnlyCPUPass.cpp index 2651a6e1a5..238a91a82c 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyCPUPass.cpp @@ -35,13 +35,13 @@ OnlyCPUPass::Run(const TaskPtr& task) { } auto gpu_id = get_gpu_pool(); - if (gpu_id.empty()) { - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); - auto label = std::make_shared(std::weak_ptr(res_ptr)); - task->label() = label; - return true; - } - return false; + if (not gpu_id.empty()) + return false; + + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + return true; } } // namespace scheduler diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index f39ca1a042..3fcda0e8a3 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -20,14 +20,16 @@ #include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" -#include "server/Config.h" namespace milvus { namespace scheduler { +OnlyGPUPass::OnlyGPUPass(bool has_cpu) : has_cpu_(has_cpu) { +} + bool OnlyGPUPass::Run(const TaskPtr& task) { - if (task->Type() != TaskType::SearchTask) + if (task->Type() != TaskType::SearchTask || has_cpu_) return false; auto search_task = std::static_pointer_cast(task); @@ -36,29 +38,15 @@ OnlyGPUPass::Run(const TaskPtr& task) { return false; } - server::Config& config = server::Config::GetInstance(); - std::vector search_resources; - config.GetResourceConfigSearchResources(search_resources); - for (auto& resource : search_resources) { - if (resource == "cpu") { - return false; - } - } - auto gpu_id = get_gpu_pool(); - if (!gpu_id.empty()) { - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, gpu_id[specified_gpu_id_]); - auto label = std::make_shared(std::weak_ptr(res_ptr)); - task->label() = label; - } else { + if (gpu_id.empty()) return false; - } - if (specified_gpu_id_ < gpu_id.size() - 1) { - ++specified_gpu_id_; - } else { - specified_gpu_id_ = 0; - } + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, gpu_id[specified_gpu_id_]); + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + + specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size(); return true; } diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.h b/core/src/scheduler/optimizer/OnlyGPUPass.h index 75a5f9e4f1..10d909d30e 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.h +++ b/core/src/scheduler/optimizer/OnlyGPUPass.h @@ -34,7 +34,7 @@ namespace scheduler { class OnlyGPUPass : public Pass { public: - OnlyGPUPass() = default; + explicit OnlyGPUPass(bool has_cpu); public: bool @@ -42,6 +42,7 @@ class OnlyGPUPass : public Pass { private: uint64_t specified_gpu_id_ = 0; + bool has_cpu_ = false; }; using OnlyGPUPassPtr = std::shared_ptr; From 4df6f4b5a93fa9c6cfc54479d52edf0371224ddd Mon Sep 17 00:00:00 2001 From: wxyu Date: Thu, 31 Oct 2019 16:58:47 +0800 Subject: [PATCH 119/149] Not return error when search_resources and index_build_device set cpu Former-commit-id: 0e0313e59cfbf8f8b17fc62db02f2e21a25d25a2 --- CHANGELOG.md | 1 + core/src/server/Config.cpp | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f399b87f6..e2ed8f4b7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#122 - Add unique id for Job - \#130 - Set task state MOVED after resource copy it completed - \#149 - Improve large query optimizer pass +- \#156 - Not return error when search_resources and index_build_device set cpu ## Task diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 86caf6dd37..2a04128411 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -714,9 +714,12 @@ Config::CheckResourceConfigSearchResources(const std::vector& value return Status(SERVER_INVALID_ARGUMENT, msg); } - for (auto& gpu_device : value) { - if (!CheckGpuDevice(gpu_device).ok()) { - std::string msg = "Invalid search resource: " + gpu_device + + for (auto& device : value) { + if (device == "cpu") { + continue; + } + if (!CheckGpuDevice(device).ok()) { + std::string msg = "Invalid search resource: " + device + ". Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -726,6 +729,9 @@ Config::CheckResourceConfigSearchResources(const std::vector& value Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { + if (value == "cpu") { + return Status::OK(); + } if (!CheckGpuDevice(value).ok()) { std::string msg = "Invalid index build device: " + value + ". Possible reason: resource_config.index_build_device does not match your hardware."; From 71fa8bdd05dca378bfbb6ec042c5102436080d92 Mon Sep 17 00:00:00 2001 From: jinhai Date: Thu, 31 Oct 2019 11:03:22 +0000 Subject: [PATCH 120/149] Fix parallel merge issue Former-commit-id: 7eae24c65e50c6c0abc423b841c14bf16a8c0a1d --- core/src/scheduler/job/SearchJob.h | 4 ++++ core/src/scheduler/task/SearchTask.cpp | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core/src/scheduler/job/SearchJob.h b/core/src/scheduler/job/SearchJob.h index 1e586090b9..40cf59e24e 100644 --- a/core/src/scheduler/job/SearchJob.h +++ b/core/src/scheduler/job/SearchJob.h @@ -90,6 +90,10 @@ class SearchJob : public Job { return index_files_; } + std::mutex& mutex() { + return mutex_; + } + private: uint64_t topk_ = 0; uint64_t nq_ = 0; diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 1bf1caff76..7daa6268e8 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -219,8 +219,11 @@ XSearchTask::Execute() { // step 3: pick up topk result auto spec_k = index_engine_->Count() < topk ? index_engine_->Count() : topk; - XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, metric_l2, + { + std::unique_lock lock(search_job->mutex()); + XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, metric_l2, search_job->GetResult()); + } span = rc.RecordSection(hdr + ", reduce topk"); // search_job->AccumReduceCost(span); From d01c555604554db44dbdb4c7bcca962cce730fe3 Mon Sep 17 00:00:00 2001 From: jinhai Date: Thu, 31 Oct 2019 11:05:04 +0000 Subject: [PATCH 121/149] Fix lint Former-commit-id: 52ff843377f1463bb4cad7fcf67366b3facd8d10 --- core/src/scheduler/job/SearchJob.h | 3 ++- core/src/scheduler/task/SearchTask.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/scheduler/job/SearchJob.h b/core/src/scheduler/job/SearchJob.h index 40cf59e24e..90fcf36773 100644 --- a/core/src/scheduler/job/SearchJob.h +++ b/core/src/scheduler/job/SearchJob.h @@ -90,7 +90,8 @@ class SearchJob : public Job { return index_files_; } - std::mutex& mutex() { + std::mutex& + mutex() { return mutex_; } diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 7daa6268e8..edeb41bdbe 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -222,7 +222,7 @@ XSearchTask::Execute() { { std::unique_lock lock(search_job->mutex()); XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, metric_l2, - search_job->GetResult()); + search_job->GetResult()); } span = rc.RecordSection(hdr + ", reduce topk"); From 85b136b36437d6c3fdcaa54deb3388c76d3a9044 Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Fri, 1 Nov 2019 14:40:52 +0800 Subject: [PATCH 122/149] Fix bug #161 Former-commit-id: 5daf77b3344351a8030e53ff704b7e8bf1cce400 --- CHANGELOG.md | 3 ++- core/src/db/engine/ExecutionEngineImpl.cpp | 2 ++ .../index/vector_index/IndexIVFSQHybrid.cpp | 20 ++++++------------- core/src/index/unittest/test_ivf.cpp | 4 ++++ 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5176ce2b79..dbba3a3b92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,13 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.5.1 (TODO) ## Bug +- \#161 - Search IVFSQHybrid crash on gpu ## Feature - \#90 - The server start error messages could be improved to enhance user experience - \#104 - test_scheduler core dump - \#115 - Using new structure for tasktable -- \#139 - New config opion use_gpu_threshold +- \#139 - New config option use_gpu_threshold ## Improvement - \#64 - Improvement dump function in scheduler diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 66e9795ff3..6c2ca59d8d 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -256,6 +256,7 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { +#if 0 if (hybrid) { const std::string key = location_ + ".quantizer"; std::vector gpus = scheduler::get_gpu_pool(); @@ -307,6 +308,7 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { } return Status::OK(); } +#endif auto index = std::static_pointer_cast(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_)); bool already_in_cache = (index != nullptr); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index 84bf594421..f2a963eef2 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -84,22 +84,14 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) { VectorIndexPtr IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { - if (gpu_mode != 0) { - KNOWHERE_THROW_MSG("Not a GpuIndex Type"); - } - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; - faiss::IndexComposition index_composition; - index_composition.index = index_.get(); - index_composition.quantizer = nullptr; - index_composition.mode = 0; // copy all - - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option); - + auto idx = dynamic_cast(index_.get()); + idx->restore_quantizer(); + auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get(), &option); std::shared_ptr device_index = std::shared_ptr(gpu_index); auto new_idx = std::make_shared(device_index, device_id, res); return new_idx; @@ -119,9 +111,9 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { - // std::lock_guard lk(g_mutex); - // static int64_t search_count; - // ++search_count; + // std::lock_guard lk(g_mutex); + // static int64_t search_count; + // ++search_count; if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); diff --git a/core/src/index/unittest/test_ivf.cpp b/core/src/index/unittest/test_ivf.cpp index 20addc82bb..8b17e08272 100644 --- a/core/src/index/unittest/test_ivf.cpp +++ b/core/src/index/unittest/test_ivf.cpp @@ -213,6 +213,10 @@ TEST_P(IVFTest, clone_test) { } } + if (index_type == "IVFSQHybrid") { + return; + } + { // copy to gpu std::vector support_idx_vec{"IVF", "GPUIVF", "IVFSQ", "GPUIVFSQ"}; From 6ad1ed74e9874ede8c8f98760916cdd0da0a953e Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Fri, 1 Nov 2019 14:51:20 +0800 Subject: [PATCH 123/149] #159 Change the configuration name from 'use_gpu_threshold' to 'gpu_search_threshold' Former-commit-id: b4e576b389a720d66ec5455ca05b20184c8c911d --- CHANGELOG.md | 1 + core/conf/server_config.template | 2 +- .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 2 +- core/src/server/Config.cpp | 34 +++++++++---------- core/src/server/Config.h | 10 +++--- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 266690a82c..5b4069ff65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#130 - Set task state MOVED after resource copy it completed - \#149 - Improve large query optimizer pass - \#156 - Not return error when search_resources and index_build_device set cpu +- \#159 - Change the configuration name from 'use_gpu_threshold' to 'gpu_search_threshold' ## Task diff --git a/core/conf/server_config.template b/core/conf/server_config.template index a23707be5d..8dfb30f534 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -36,7 +36,7 @@ cache_config: engine_config: use_blas_threshold: 20 # if nq < use_blas_threshold, use SSE, faster with fluctuated response times # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times - use_gpu_threshold: 1000 + gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the GPUs used for search computation, must be in format: gpux diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index cacedd6208..b9784e3c0a 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -29,7 +29,7 @@ namespace scheduler { LargeSQ8HPass::LargeSQ8HPass() { server::Config& config = server::Config::GetInstance(); - Status s = config.GetEngineConfigUseGpuThreshold(threshold_); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); if (!s.ok()) { threshold_ = std::numeric_limits::max(); } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 2a04128411..d651f5b3b3 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -193,8 +193,8 @@ Config::ValidateConfig() { return s; } - int32_t engine_use_gpu_threshold; - s = GetEngineConfigUseGpuThreshold(engine_use_gpu_threshold); + int32_t engine_gpu_search_threshold; + s = GetEngineConfigGpuSearchThreshold(engine_gpu_search_threshold); if (!s.ok()) { return s; } @@ -330,7 +330,7 @@ Config::ResetDefaultConfig() { return s; } - s = SetEngineConfigUseGpuThreshold(CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT); + s = SetEngineConfigGpuSearchThreshold(CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT); if (!s.ok()) { return s; } @@ -463,7 +463,7 @@ Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive days threshold: " + value + - ". Possible reason: db_config.archive_disk_threshold is invalid."; + ". Possible reason: db_config.archive_days_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -631,7 +631,7 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - std::string msg = "Invalid cache insert option: " + value + + std::string msg = "Invalid cache insert data option: " + value + ". Possible reason: cache_config.cache_insert_data is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -641,7 +641,7 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) { Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid blas threshold: " + value + + std::string msg = "Invalid use blas threshold: " + value + ". Possible reason: engine_config.use_blas_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -651,7 +651,7 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid omp thread number: " + value + + std::string msg = "Invalid omp thread num: " + value + ". Possible reason: engine_config.omp_thread_num is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -660,7 +660,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { uint32_t sys_thread_cnt = 8; CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { - std::string msg = "Invalid omp thread number: " + value + + std::string msg = "Invalid omp thread num: " + value + ". Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -668,10 +668,10 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { } Status -Config::CheckEngineConfigUseGpuThreshold(const std::string& value) { +Config::CheckEngineConfigGpuSearchThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid gpu threshold: " + value + - ". Possible reason: engine_config.use_gpu_threshold is not a positive integer."; + std::string msg = "Invalid gpu search threshold: " + value + + ". Possible reason: engine_config.gpu_search_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -979,10 +979,10 @@ Config::GetEngineConfigOmpThreadNum(int32_t& value) { } Status -Config::GetEngineConfigUseGpuThreshold(int32_t& value) { +Config::GetEngineConfigGpuSearchThreshold(int32_t& value) { std::string str = - GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_USE_GPU_THRESHOLD, CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT); - Status s = CheckEngineConfigUseGpuThreshold(str); + GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT); + Status s = CheckEngineConfigGpuSearchThreshold(str); if (!s.ok()) { return s; } @@ -1244,13 +1244,13 @@ Config::SetEngineConfigOmpThreadNum(const std::string& value) { } Status -Config::SetEngineConfigUseGpuThreshold(const std::string& value) { - Status s = CheckEngineConfigUseGpuThreshold(value); +Config::SetEngineConfigGpuSearchThreshold(const std::string& value) { + Status s = CheckEngineConfigGpuSearchThreshold(value); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_ENGINE_USE_GPU_THRESHOLD, value); + SetConfigValueInMem(CONFIG_DB, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3e7ae0c818..c93847b216 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -84,8 +84,8 @@ static const char* CONFIG_ENGINE_USE_BLAS_THRESHOLD = "use_blas_threshold"; static const char* CONFIG_ENGINE_USE_BLAS_THRESHOLD_DEFAULT = "20"; static const char* CONFIG_ENGINE_OMP_THREAD_NUM = "omp_thread_num"; static const char* CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT = "0"; -static const char* CONFIG_ENGINE_USE_GPU_THRESHOLD = "use_gpu_threshold"; -static const char* CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT = "1000"; +static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD = "gpu_search_threshold"; +static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; /* resource config */ static const char* CONFIG_RESOURCE = "resource_config"; @@ -169,7 +169,7 @@ class Config { Status CheckEngineConfigOmpThreadNum(const std::string& value); Status - CheckEngineConfigUseGpuThreshold(const std::string& value); + CheckEngineConfigGpuSearchThreshold(const std::string& value); /* resource config */ Status @@ -235,7 +235,7 @@ class Config { Status GetEngineConfigOmpThreadNum(int32_t& value); Status - GetEngineConfigUseGpuThreshold(int32_t& value); + GetEngineConfigGpuSearchThreshold(int32_t& value); /* resource config */ Status @@ -296,7 +296,7 @@ class Config { Status SetEngineConfigOmpThreadNum(const std::string& value); Status - SetEngineConfigUseGpuThreshold(const std::string& value); + SetEngineConfigGpuSearchThreshold(const std::string& value); /* resource config */ Status From be63ba0c8e9a6b6003487e94f874044433998b05 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 1 Nov 2019 14:55:49 +0800 Subject: [PATCH 124/149] [skip ci] Simplified README with reference links Former-commit-id: be550f395896049d290fa47cec12e0a00031078c --- README.md | 59 ++++--------------------------------------------------- 1 file changed, 4 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 31cc2f684f..74743c4e8b 100644 --- a/README.md +++ b/README.md @@ -22,45 +22,9 @@ Milvus is an open source similarity search engine for massive-scale feature vectors. Built with heterogeneous computing architecture for the best cost efficiency. Searches over billion-scale vectors take only milliseconds with minimum computing resources. -Milvus provides stable Python, Java and C++ APIs. +Milvus provides stable Python, Java and C++ APIs. For more detailed introduction of Milvus and its architecture, see [Milvus overview](https://www.milvus.io/docs/en/aboutmilvus/overview/). -Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/release/v0.5.0/). - -- Heterogeneous computing - - Milvus is built with heterogeneous computing architecture for the best performance and cost efficiency. - -- Multiple indexes - - Milvus supports a variety of indexing types that employs quantization, tree-based, and graph indexing techniques. - -- Intelligent resource management - - Milvus automatically adapts search computation and index building processes based on your datasets and available resources. - -- Horizontal scalability - - Milvus supports online / offline expansion to scale both storage and computation resources with simple commands. - -- High availability - - Milvus is integrated with Kubernetes framework so that all single point of failures could be avoided. - -- High compatibility - - Milvus is compatible with almost all deep learning models and major programming languages such as Python, Java and C++, etc. - -- Ease of use - - Milvus can be easily installed in a few steps and enables you to exclusively focus on feature vectors. - -- Visualized monitor - - You can track system performance on Prometheus-based GUI monitor dashboards. - -## Architecture - -![Milvus_arch](https://github.com/milvus-io/docs/blob/master/assets/milvus_arch.png) +Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://www.milvus.io/docs/en/release/v0.5.0/). ## Get started @@ -137,23 +101,7 @@ To edit Milvus settings in `conf/server_config.yaml` and `conf/log_config.conf`, #### Run Python example code -Make sure [Python 3.5](https://www.python.org/downloads/) or higher is already installed and in use. - -Install Milvus Python SDK. - -```shell -# Install Milvus Python SDK -$ pip install pymilvus==0.2.3 -``` - -Create a new file `example.py`, and add [Python example code](https://github.com/milvus-io/pymilvus/blob/master/examples/advanced_example.py) to it. - -Run the example code. - -```shell -# Run Milvus Python example -$ python3 example.py -``` +Please read [this page](https://www.milvus.io/docs/en/userguide/example_code/) for how to run an example program using Python SDK. #### Run C++ example code @@ -164,6 +112,7 @@ $ python3 example.py ``` #### Run Java example code + Make sure Java 8 or higher is already installed. Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples) for the example code. From 5b5c42d1861192c99b8fbedabe93992461f4b2ab Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 1 Nov 2019 15:11:46 +0800 Subject: [PATCH 125/149] [skip ci] Minor change Former-commit-id: 166ff3b7e998da253a2cfc1aaab8909bad262aac --- README.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 74743c4e8b..debd8e2a2a 100644 --- a/README.md +++ b/README.md @@ -16,13 +16,13 @@ - [中文官网](https://www.milvus.io/zh-CN/) -# Welcome to Milvus - ## What is Milvus Milvus is an open source similarity search engine for massive-scale feature vectors. Built with heterogeneous computing architecture for the best cost efficiency. Searches over billion-scale vectors take only milliseconds with minimum computing resources. -Milvus provides stable Python, Java and C++ APIs. For more detailed introduction of Milvus and its architecture, see [Milvus overview](https://www.milvus.io/docs/en/aboutmilvus/overview/). +For more detailed introduction of Milvus and its architecture, see [Milvus overview](https://www.milvus.io/docs/en/aboutmilvus/overview/). + +Milvus provides stable [Python](https://github.com/milvus-io/pymilvus), [Java](https://milvus-io.github.io/milvus-sdk-java/javadoc/io/milvus/client/package-summary.html) and C++ APIs. Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://www.milvus.io/docs/en/release/v0.5.0/). @@ -39,7 +39,7 @@ Keep up-to-date with newest releases and latest updates by reading Milvus [relea ### Install using docker -Use Docker to install Milvus is a breeze. See the [Milvus install guide](https://milvus.io/docs/en/userguide/install_milvus/) for details. +Using Docker to install Milvus is a breeze. See the [Milvus install guide](https://milvus.io/docs/en/userguide/install_milvus/) for details. ### Build from source @@ -103,6 +103,12 @@ To edit Milvus settings in `conf/server_config.yaml` and `conf/log_config.conf`, Please read [this page](https://www.milvus.io/docs/en/userguide/example_code/) for how to run an example program using Python SDK. +#### Run Java example code + +Make sure Java 8 or higher is already installed. + +Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples) for the example code. + #### Run C++ example code ```shell @@ -111,13 +117,7 @@ Please read [this page](https://www.milvus.io/docs/en/userguide/example_code/) f $ ./sdk_simple ``` -#### Run Java example code - -Make sure Java 8 or higher is already installed. - -Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples) for the example code. - -## Milvus roadmap +## Roadmap Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. @@ -127,7 +127,7 @@ Contributions are welcomed and greatly appreciated. Please read our [contributio We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to track issues and bugs. For general questions and public discussions, please join our community. -## Join the Milvus community +## Join our community To connect with other users and contributors, welcome to join our [slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk). From 38c6a48718ce5759ed596532801c3feb43467b36 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 1 Nov 2019 15:42:04 +0800 Subject: [PATCH 126/149] [skip ci] Create build from source doc Former-commit-id: ab66f08498bc2ed9062e002fc9bbb14db2962108 --- install.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 install.md diff --git a/install.md b/install.md new file mode 100644 index 0000000000..f869520ba3 --- /dev/null +++ b/install.md @@ -0,0 +1,53 @@ +# Install Milvus from Source Code + +## Software requirements + +- Ubuntu 18.04 or higher +- CMake 3.14 or higher +- CUDA 10.0 or higher +- NVIDIA driver 418 or higher + +## Compilation + +### Step 1 Install dependencies + +```shell +$ cd [Milvus sourcecode path]/core +$ ./ubuntu_build_deps.sh +``` + +### Step 2 Build + +```shell +$ cd [Milvus sourcecode path]/core +$ ./build.sh -t Debug +or +$ ./build.sh -t Release +``` + +When the build is completed, all the stuff that you need in order to run Milvus will be installed under `[Milvus root path]/core/milvus`. + +## Launch Milvus server + +```shell +$ cd [Milvus root path]/core/milvus +``` + +Add `lib/` directory to `LD_LIBRARY_PATH` + +``` +$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib +``` + +Then start Milvus server: + +``` +$ cd scripts +$ ./start_server.sh +``` + +To stop Milvus server, run: + +```shell +$ ./stop_server.sh +``` From 102aa353501193445a8c5cce306b8002aa87bba7 Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Fri, 1 Nov 2019 15:54:58 +0800 Subject: [PATCH 127/149] add info Former-commit-id: e2a9b4550ff966180f6fdf3d4046e2886849ab81 --- .../knowhere/knowhere/index/vector_index/IndexIVF.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 201071c0a4..dff6dd62cc 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -221,9 +221,13 @@ IVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int6 faiss::ivflib::search_with_parameters(index_.get(), n, (float*)data, k, distances, labels, params.get()); stdclock::time_point after = stdclock::now(); double search_cost = (std::chrono::duration(after - before)).count(); - KNOWHERE_LOG_DEBUG << "IVF search cost: " << search_cost - << ", quantization cost: " << faiss::indexIVF_stats.quantization_time - << ", data search cost: " << faiss::indexIVF_stats.search_time; + KNOWHERE_LOG_DEBUG << "K=" << k + << " NQ=" << n + << " NL=" << faiss::indexIVF_stats.nlist + << " ND=" << faiss::indexIVF_stats.ndis + << " NH=" << faiss::indexIVF_stats.nheap_updates + << " Q=" << faiss::indexIVF_stats.quantization_time + << " S=" << faiss::indexIVF_stats.search_time; faiss::indexIVF_stats.quantization_time = 0; faiss::indexIVF_stats.search_time = 0; } From 70f69125333b7b1ce2fd35cb90dc70c06c66e25e Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Fri, 1 Nov 2019 16:04:45 +0800 Subject: [PATCH 128/149] info lint: Former-commit-id: f3b68279e7473d817745941e2cb056489f3e8e76 --- .../knowhere/knowhere/index/vector_index/IndexIVF.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index dff6dd62cc..6da5db38ec 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -221,11 +221,8 @@ IVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int6 faiss::ivflib::search_with_parameters(index_.get(), n, (float*)data, k, distances, labels, params.get()); stdclock::time_point after = stdclock::now(); double search_cost = (std::chrono::duration(after - before)).count(); - KNOWHERE_LOG_DEBUG << "K=" << k - << " NQ=" << n - << " NL=" << faiss::indexIVF_stats.nlist - << " ND=" << faiss::indexIVF_stats.ndis - << " NH=" << faiss::indexIVF_stats.nheap_updates + KNOWHERE_LOG_DEBUG << "K=" << k << " NQ=" << n << " NL=" << faiss::indexIVF_stats.nlist + << " ND=" << faiss::indexIVF_stats.ndis << " NH=" << faiss::indexIVF_stats.nheap_updates << " Q=" << faiss::indexIVF_stats.quantization_time << " S=" << faiss::indexIVF_stats.search_time; faiss::indexIVF_stats.quantization_time = 0; From bf63c3c33745a9e41d7e37b5b0f76cd1403fefdc Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 1 Nov 2019 16:09:46 +0800 Subject: [PATCH 129/149] [skip ci] Reorganize the structure Former-commit-id: c5c7b37ded00b8bb6b57a51f39b4c02361555c8a --- README.md | 89 +++++-------------------------------------------------- 1 file changed, 8 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index debd8e2a2a..3419c2dee2 100644 --- a/README.md +++ b/README.md @@ -22,94 +22,21 @@ Milvus is an open source similarity search engine for massive-scale feature vect For more detailed introduction of Milvus and its architecture, see [Milvus overview](https://www.milvus.io/docs/en/aboutmilvus/overview/). -Milvus provides stable [Python](https://github.com/milvus-io/pymilvus), [Java](https://milvus-io.github.io/milvus-sdk-java/javadoc/io/milvus/client/package-summary.html) and C++ APIs. +Milvus provides stable [Python](https://pypi.org/project/pymilvus/), [Java](https://milvus-io.github.io/milvus-sdk-java/javadoc/io/milvus/client/package-summary.html) and C++ APIs. Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://www.milvus.io/docs/en/release/v0.5.0/). ## Get started -### Hardware requirements +See the [Milvus install guide](https://www.milvus.io/docs/en/userguide/install_milvus/) for using Docker containers. To install Milvus from source code, see [build from source](install.md). -| Component | Recommended configuration | -| --------- | ----------------------------------- | -| CPU | Intel CPU Haswell or higher | -| GPU | NVIDIA Pascal series or higher | -| RAM | 8 GB or more (depends on data size) | -| Hard drive| SATA 3.0 SSD or higher | - -### Install using docker - -Using Docker to install Milvus is a breeze. See the [Milvus install guide](https://milvus.io/docs/en/userguide/install_milvus/) for details. - -### Build from source - -#### Software requirements - -- Ubuntu 18.04 or higher -- CMake 3.14 or higher -- CUDA 10.0 or higher -- NVIDIA driver 418 or higher - -#### Compilation - -##### Step 1 Install dependencies - -```shell -$ cd [Milvus sourcecode path]/core -$ ./ubuntu_build_deps.sh -``` - -##### Step 2 Build - -```shell -$ cd [Milvus sourcecode path]/core -$ ./build.sh -t Debug -or -$ ./build.sh -t Release -``` - -When the build is completed, all the stuff that you need in order to run Milvus will be installed under `[Milvus root path]/core/milvus`. - -#### Launch Milvus server - -```shell -$ cd [Milvus root path]/core/milvus -``` - -Add `lib/` directory to `LD_LIBRARY_PATH` - -``` -$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib -``` - -Then start Milvus server: - -``` -$ cd scripts -$ ./start_server.sh -``` - -To stop Milvus server, run: - -```shell -$ ./stop_server.sh -``` - -To edit Milvus settings in `conf/server_config.yaml` and `conf/log_config.conf`, please read [Milvus Configuration](https://github.com/milvus-io/docs/blob/master/reference/milvus_config.md). +To edit Milvus settings, read [Milvus configuration](https://www.milvus.io/docs/en/reference/milvus_config/). ### Try your first Milvus program -#### Run Python example code +Try running a program with Milvus using [Python](https://www.milvus.io/docs/en/userguide/example_code/) or [Java example code](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples). -Please read [this page](https://www.milvus.io/docs/en/userguide/example_code/) for how to run an example program using Python SDK. - -#### Run Java example code - -Make sure Java 8 or higher is already installed. - -Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples) for the example code. - -#### Run C++ example code +To use C++ example code, use below command: ```shell # Run Milvus C++ example @@ -119,7 +46,7 @@ Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/ex ## Roadmap -Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. +Please read our [roadmap](https://milvus.io/docs/en/roadmap/) for upcoming features. ## Contribution guidelines @@ -147,9 +74,9 @@ We greatly appreciate the help of the following people. [Milvus blog](https://www.milvus.io/blog/) -[Milvus CSDN](https://zilliz.blog.csdn.net/) +[Milvus Medium](https://medium.com/@milvusio) -[Milvus roadmap](https://milvus.io/docs/en/roadmap/) +[Milvus CSDN](https://zilliz.blog.csdn.net/) ## License From 34a72696ec7518ac9b92de573a3e01fdb8b730fa Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 1 Nov 2019 16:14:33 +0800 Subject: [PATCH 130/149] [skip ci] minor change Former-commit-id: 9aebb2b5489c1e9e3ec79c0536449a159d72a196 --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 3419c2dee2..5340777ac2 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ - [Twitter](https://twitter.com/milvusio) - [Facebook](https://www.facebook.com/io.milvus.5) - [Blog](https://www.milvus.io/blog/) -- [CSDN](https://zilliz.blog.csdn.net/) - [中文官网](https://www.milvus.io/zh-CN/) @@ -72,8 +71,6 @@ We greatly appreciate the help of the following people. [Milvus bootcamp](https://github.com/milvus-io/bootcamp) -[Milvus blog](https://www.milvus.io/blog/) - [Milvus Medium](https://medium.com/@milvusio) [Milvus CSDN](https://zilliz.blog.csdn.net/) From 752014541439c9b423311698daa9fafa80d4e557 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 1 Nov 2019 16:44:34 +0800 Subject: [PATCH 131/149] [skip ci] Update Resources Former-commit-id: 2b135aeb664641e607b84f2403d9dc964b3b85be --- README.md | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 5340777ac2..45dac99510 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,7 @@ ![Release](https://img.shields.io/badge/release-v0.5.0-orange) ![Release_date](https://img.shields.io/badge/release_date-October-yellowgreen) -- [Slack Community](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) -- [Twitter](https://twitter.com/milvusio) -- [Facebook](https://www.facebook.com/io.milvus.5) -- [Blog](https://www.milvus.io/blog/) -- [中文官网](https://www.milvus.io/zh-CN/) - +[中文版](README_CN.md) ## What is Milvus @@ -65,15 +60,17 @@ We greatly appreciate the help of the following people. ## Resources -[Milvus official website](https://www.milvus.io) +- [Milvus.io](https://www.milvus.io) -[Milvus docs](https://www.milvus.io/docs/en/userguide/install_milvus/) +- [Milvus bootcamp](https://github.com/milvus-io/bootcamp) -[Milvus bootcamp](https://github.com/milvus-io/bootcamp) +- [Milvus Medium](https://medium.com/@milvusio) -[Milvus Medium](https://medium.com/@milvusio) +- [Milvus CSDN](https://zilliz.blog.csdn.net/) -[Milvus CSDN](https://zilliz.blog.csdn.net/) +- [Milvus Twitter](https://twitter.com/milvusio) + +- [Milvus Facebook](https://www.facebook.com/io.milvus.5) ## License From f2f9ebb98c4e94f640674686b373dc65e8620ca6 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 1 Nov 2019 16:46:03 +0800 Subject: [PATCH 132/149] [skip ci] Typo change Former-commit-id: 68966d259693535b25458771b3f3c100a1615b2f --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 45dac99510..d5c7651eb6 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to ## Join our community -To connect with other users and contributors, welcome to join our [slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk). +To connect with other users and contributors, welcome to join our [Slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk). ## Thanks From e1ad8d34fd14c105883f3309e26ec51f239afb88 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 1 Nov 2019 16:54:59 +0800 Subject: [PATCH 133/149] #164 - Add CPU version for building index Former-commit-id: d03346e95d746a4c95a0e42765df9c0dc1394e42 --- CHANGELOG.md | 1 + core/src/scheduler/JobMgr.cpp | 30 +++--- core/src/scheduler/SchedInst.cpp | 2 +- core/src/scheduler/SchedInst.h | 1 - core/src/scheduler/TaskCreator.cpp | 12 ++- .../scheduler/action/PushTaskToNeighbour.cpp | 102 ++++++------------ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +- core/src/server/Config.cpp | 10 +- core/src/server/Config.h | 2 + 9 files changed, 78 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aac9a2eaf6..93021475d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#115 - Using new structure for tasktable - \#139 - New config option use_gpu_threshold - \#146 - Add only GPU and only CPU version for IVF_SQ8 and IVF_FLAT +- \#164 - Add CPU version for building index ## Improvement - \#64 - Improvement dump function in scheduler diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 794f6a0f37..8e038e51b2 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -104,20 +104,26 @@ JobMgr::build_task(const JobPtr& job) { void JobMgr::calculate_path(const TaskPtr& task) { - if (task->type_ != TaskType::SearchTask) { - return; - } + if (task->type_ == TaskType::SearchTask) { + if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { + return; + } - if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { - return; + std::vector path; + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); + + } else if (task->type_ == TaskType::BuildIndexTask) { + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + std::vector path; + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); } - - std::vector path; - auto spec_label = std::static_pointer_cast(task->label()); - auto src = res_mgr_->GetDiskResources()[0]; - auto dest = spec_label->resource(); - ShortestPath(src.lock(), dest.lock(), res_mgr_, path); - task->path() = Path(path, path.size() - 1); } } // namespace scheduler diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 8474e93c1f..8d58b831bf 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -75,7 +75,7 @@ load_simple_config() { } } - if (not find_build_gpu_id) { + if (not find_build_gpu_id && build_gpu_id != server::CPU_DEVICE_ID) { ResMgrInst::GetInstance()->Add( ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index a3048069f9..d51611af26 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -106,7 +106,6 @@ class OptimizerInst { has_cpu = true; } } - std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); diff --git a/core/src/scheduler/TaskCreator.cpp b/core/src/scheduler/TaskCreator.cpp index 40cfa9aac6..3d2a2072cd 100644 --- a/core/src/scheduler/TaskCreator.cpp +++ b/core/src/scheduler/TaskCreator.cpp @@ -70,8 +70,16 @@ TaskCreator::Create(const DeleteJobPtr& job) { std::vector TaskCreator::Create(const BuildIndexJobPtr& job) { std::vector tasks; - // TODO(yukun): remove "disk" hardcode here - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("disk"); + + server::Config& config = server::Config::GetInstance(); + int32_t build_index_id; + Status stat = config.GetResourceConfigIndexBuildDevice(build_index_id); + ResourcePtr res_ptr; + if (build_index_id == server::CPU_DEVICE_ID) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_index_id); + } for (auto& to_index_file : job->to_index_files()) { auto label = std::make_shared(std::weak_ptr(res_ptr)); diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index b8a4a1164b..6e52708abc 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -138,73 +138,41 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou std::shared_ptr event) { auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; - if (resource->type() == ResourceType::DISK) { - // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr->GetComputeResources(); - std::vector> paths; - std::vector transport_costs; - for (auto& res : compute_resources) { - std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); - transport_costs.push_back(transport_cost); - paths.emplace_back(path); - } - // if (task->job_.lock()->type() == JobType::SEARCH) { - // auto label = task->label(); - // auto spec_label = std::static_pointer_cast(label); - // if (spec_label->resource().lock()->type() == ResourceType::CPU) { - // std::vector spec_path; - // spec_path.push_back(spec_label->resource().lock()->name()); - // spec_path.push_back(resource->name()); - // task->path() = Path(spec_path, spec_path.size() - 1); - // } else { - // // step 2: select min cost, cost(resource) = avg_cost * task_to_do + transport_cost - // uint64_t min_cost = std::numeric_limits::max(); - // uint64_t min_cost_idx = 0; - // for (uint64_t i = 0; i < compute_resources.size(); ++i) { - // if (compute_resources[i]->TotalTasks() == 0) { - // min_cost_idx = i; - // break; - // } - // uint64_t cost = compute_resources[i]->TaskAvgCost() * - // compute_resources[i]->NumOfTaskToExec() + - // transport_costs[i]; - // if (min_cost > cost) { - // min_cost = cost; - // min_cost_idx = i; - // } - // } - // - // // step 3: set path in task - // Path task_path(paths[min_cost_idx], paths[min_cost_idx].size() - 1); - // task->path() = task_path; - // } - // - // } else - if (task->job_.lock()->type() == JobType::BUILD) { - // step2: Read device id in config - // get build index gpu resource - server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); - - bool find_gpu_res = false; - if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { - for (uint64_t i = 0; i < compute_resources.size(); ++i) { - if (compute_resources[i]->name() == - res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { - find_gpu_res = true; - Path task_path(paths[i], paths[i].size() - 1); - task->path() = task_path; - break; - } - } - } - if (not find_gpu_res) { - task->path() = Path(paths[0], paths[0].size() - 1); - } - } - } +// if (resource->type() == ResourceType::DISK) { +// // step 1: calculate shortest path per resource, from disk to compute resource +// auto compute_resources = res_mgr->GetComputeResources(); +// std::vector> paths; +// std::vector transport_costs; +// for (auto& res : compute_resources) { +// std::vector path; +// uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); +// transport_costs.push_back(transport_cost); +// paths.emplace_back(path); +// } +// if (task->job_.lock()->type() == JobType::BUILD) { +// // step2: Read device id in config +// // get build index gpu resource +// server::Config& config = server::Config::GetInstance(); +// int32_t build_index_gpu; +// Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); +// +// bool find_gpu_res = false; +// if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { +// for (uint64_t i = 0; i < compute_resources.size(); ++i) { +// if (compute_resources[i]->name() == +// res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { +// find_gpu_res = true; +// Path task_path(paths[i], paths[i].size() - 1); +// task->path() = task_path; +// break; +// } +// } +// } +// if (not find_gpu_res) { +// task->path() = Path(paths[0], paths[0].size() - 1); +// } +// } +// } if (resource->name() == task->path().Last()) { resource->WakeupExecutor(); diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 3fcda0e8a3..a49b75b17e 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -46,7 +46,8 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size(); + ++specified_gpu_id_; + specified_gpu_id_ = specified_gpu_id_ % gpu_id.size(); return true; } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index d651f5b3b3..0c56d69c39 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -596,6 +596,9 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return s; } + if (gpu_index == server::CPU_DEVICE_ID) + return Status::OK(); + size_t gpu_memory; if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); @@ -1013,7 +1016,12 @@ Config::GetResourceConfigIndexBuildDevice(int32_t& value) { return s; } - value = std::stoi(str.substr(3)); + if (str == "cpu") { + value = CPU_DEVICE_ID; + } else { + value = std::stoi(str.substr(3)); + } + return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index c93847b216..45591fb5ec 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -95,6 +95,8 @@ static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +const int32_t CPU_DEVICE_ID = -1; + class Config { public: static Config& From 44d648c3afc94c5d7a224ff447aeb85f12f09d88 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 1 Nov 2019 16:54:59 +0800 Subject: [PATCH 134/149] #164 - Add CPU version for building index Former-commit-id: 02f9f2a04dc765f41c474af04659f70d121cdc18 --- CHANGELOG.md | 1 + core/src/scheduler/JobMgr.cpp | 29 ++--- core/src/scheduler/SchedInst.cpp | 2 +- core/src/scheduler/SchedInst.h | 1 - core/src/scheduler/TaskCreator.cpp | 11 +- .../scheduler/action/PushTaskToNeighbour.cpp | 102 ++++++------------ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +- core/src/server/Config.cpp | 10 +- core/src/server/Config.h | 2 + 9 files changed, 76 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aac9a2eaf6..93021475d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#115 - Using new structure for tasktable - \#139 - New config option use_gpu_threshold - \#146 - Add only GPU and only CPU version for IVF_SQ8 and IVF_FLAT +- \#164 - Add CPU version for building index ## Improvement - \#64 - Improvement dump function in scheduler diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 794f6a0f37..4404d95763 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -104,20 +104,25 @@ JobMgr::build_task(const JobPtr& job) { void JobMgr::calculate_path(const TaskPtr& task) { - if (task->type_ != TaskType::SearchTask) { - return; - } + if (task->type_ == TaskType::SearchTask) { + if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { + return; + } - if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { - return; + std::vector path; + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); + } else if (task->type_ == TaskType::BuildIndexTask) { + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + std::vector path; + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); } - - std::vector path; - auto spec_label = std::static_pointer_cast(task->label()); - auto src = res_mgr_->GetDiskResources()[0]; - auto dest = spec_label->resource(); - ShortestPath(src.lock(), dest.lock(), res_mgr_, path); - task->path() = Path(path, path.size() - 1); } } // namespace scheduler diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 8474e93c1f..8d58b831bf 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -75,7 +75,7 @@ load_simple_config() { } } - if (not find_build_gpu_id) { + if (not find_build_gpu_id && build_gpu_id != server::CPU_DEVICE_ID) { ResMgrInst::GetInstance()->Add( ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index a3048069f9..d51611af26 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -106,7 +106,6 @@ class OptimizerInst { has_cpu = true; } } - std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); diff --git a/core/src/scheduler/TaskCreator.cpp b/core/src/scheduler/TaskCreator.cpp index 40cfa9aac6..9f3bc2ae9a 100644 --- a/core/src/scheduler/TaskCreator.cpp +++ b/core/src/scheduler/TaskCreator.cpp @@ -70,8 +70,15 @@ TaskCreator::Create(const DeleteJobPtr& job) { std::vector TaskCreator::Create(const BuildIndexJobPtr& job) { std::vector tasks; - // TODO(yukun): remove "disk" hardcode here - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("disk"); + server::Config& config = server::Config::GetInstance(); + int32_t build_index_id; + Status stat = config.GetResourceConfigIndexBuildDevice(build_index_id); + ResourcePtr res_ptr; + if (build_index_id == server::CPU_DEVICE_ID) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_index_id); + } for (auto& to_index_file : job->to_index_files()) { auto label = std::make_shared(std::weak_ptr(res_ptr)); diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index b8a4a1164b..9aed678937 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -138,73 +138,41 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou std::shared_ptr event) { auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; - if (resource->type() == ResourceType::DISK) { - // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr->GetComputeResources(); - std::vector> paths; - std::vector transport_costs; - for (auto& res : compute_resources) { - std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); - transport_costs.push_back(transport_cost); - paths.emplace_back(path); - } - // if (task->job_.lock()->type() == JobType::SEARCH) { - // auto label = task->label(); - // auto spec_label = std::static_pointer_cast(label); - // if (spec_label->resource().lock()->type() == ResourceType::CPU) { - // std::vector spec_path; - // spec_path.push_back(spec_label->resource().lock()->name()); - // spec_path.push_back(resource->name()); - // task->path() = Path(spec_path, spec_path.size() - 1); - // } else { - // // step 2: select min cost, cost(resource) = avg_cost * task_to_do + transport_cost - // uint64_t min_cost = std::numeric_limits::max(); - // uint64_t min_cost_idx = 0; - // for (uint64_t i = 0; i < compute_resources.size(); ++i) { - // if (compute_resources[i]->TotalTasks() == 0) { - // min_cost_idx = i; - // break; - // } - // uint64_t cost = compute_resources[i]->TaskAvgCost() * - // compute_resources[i]->NumOfTaskToExec() + - // transport_costs[i]; - // if (min_cost > cost) { - // min_cost = cost; - // min_cost_idx = i; - // } - // } - // - // // step 3: set path in task - // Path task_path(paths[min_cost_idx], paths[min_cost_idx].size() - 1); - // task->path() = task_path; - // } - // - // } else - if (task->job_.lock()->type() == JobType::BUILD) { - // step2: Read device id in config - // get build index gpu resource - server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); - - bool find_gpu_res = false; - if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { - for (uint64_t i = 0; i < compute_resources.size(); ++i) { - if (compute_resources[i]->name() == - res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { - find_gpu_res = true; - Path task_path(paths[i], paths[i].size() - 1); - task->path() = task_path; - break; - } - } - } - if (not find_gpu_res) { - task->path() = Path(paths[0], paths[0].size() - 1); - } - } - } + // if (resource->type() == ResourceType::DISK) { + // // step 1: calculate shortest path per resource, from disk to compute resource + // auto compute_resources = res_mgr->GetComputeResources(); + // std::vector> paths; + // std::vector transport_costs; + // for (auto& res : compute_resources) { + // std::vector path; + // uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); + // transport_costs.push_back(transport_cost); + // paths.emplace_back(path); + // } + // if (task->job_.lock()->type() == JobType::BUILD) { + // // step2: Read device id in config + // // get build index gpu resource + // server::Config& config = server::Config::GetInstance(); + // int32_t build_index_gpu; + // Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); + // + // bool find_gpu_res = false; + // if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { + // for (uint64_t i = 0; i < compute_resources.size(); ++i) { + // if (compute_resources[i]->name() == + // res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { + // find_gpu_res = true; + // Path task_path(paths[i], paths[i].size() - 1); + // task->path() = task_path; + // break; + // } + // } + // } + // if (not find_gpu_res) { + // task->path() = Path(paths[0], paths[0].size() - 1); + // } + // } + // } if (resource->name() == task->path().Last()) { resource->WakeupExecutor(); diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 3fcda0e8a3..a49b75b17e 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -46,7 +46,8 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size(); + ++specified_gpu_id_; + specified_gpu_id_ = specified_gpu_id_ % gpu_id.size(); return true; } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index d651f5b3b3..0c56d69c39 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -596,6 +596,9 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return s; } + if (gpu_index == server::CPU_DEVICE_ID) + return Status::OK(); + size_t gpu_memory; if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); @@ -1013,7 +1016,12 @@ Config::GetResourceConfigIndexBuildDevice(int32_t& value) { return s; } - value = std::stoi(str.substr(3)); + if (str == "cpu") { + value = CPU_DEVICE_ID; + } else { + value = std::stoi(str.substr(3)); + } + return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index c93847b216..45591fb5ec 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -95,6 +95,8 @@ static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +const int32_t CPU_DEVICE_ID = -1; + class Config { public: static Config& From 63033ebc1f484afc8ba4df467ce685c4044e7f4d Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 1 Nov 2019 20:14:18 +0800 Subject: [PATCH 135/149] Code format Former-commit-id: 8c9c010fd45c0a6737b8ad6c4493469440292707 --- core/src/scheduler/SchedInst.cpp | 12 ++++++------ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 8d58b831bf..f0c00c2d2a 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -55,8 +55,8 @@ load_simple_config() { // get resources auto gpu_ids = get_gpu_pool(); - int32_t build_gpu_id; - config.GetResourceConfigIndexBuildDevice(build_gpu_id); + int32_t index_build_device_id; + config.GetResourceConfigIndexBuildDevice(index_build_device_id); // create and connect ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false)); @@ -70,15 +70,15 @@ load_simple_config() { for (auto& gpu_id : gpu_ids) { ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), pcie); - if (build_gpu_id == gpu_id) { + if (index_build_device_id == gpu_id) { find_build_gpu_id = true; } } - if (not find_build_gpu_id && build_gpu_id != server::CPU_DEVICE_ID) { + if (not find_build_gpu_id && index_build_device_id != server::CPU_DEVICE_ID) { ResMgrInst::GetInstance()->Add( - ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); - ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); + ResourceFactory::Create(std::to_string(index_build_device_id), "GPU", index_build_device_id, true, true)); + ResMgrInst::GetInstance()->Connect("cpu", std::to_string(index_build_device_id), pcie); } } diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index a49b75b17e..e5d3c71fd3 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -46,8 +46,7 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - ++specified_gpu_id_; - specified_gpu_id_ = specified_gpu_id_ % gpu_id.size(); + specified_gpu_id_ = (specified_gpu_id_ + 1) % gpu_id.size(); return true; } From 2f28f053d28363a5bc7aa17fb741586916a54b37 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Sat, 2 Nov 2019 11:26:08 +0800 Subject: [PATCH 136/149] change gpu_index to device_id Former-commit-id: 6993e98e19e31f942ffee5833b593a59cffa94a7 --- core/src/server/Config.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 0c56d69c39..dbe7d260c5 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -590,18 +590,18 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int gpu_index; - Status s = GetResourceConfigIndexBuildDevice(gpu_index); + int device_id; + Status s = GetResourceConfigIndexBuildDevice(device_id); if (!s.ok()) { return s; } - if (gpu_index == server::CPU_DEVICE_ID) + if (device_id == server::CPU_DEVICE_ID) return Status::OK(); size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); + if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { std::string msg = "Invalid gpu cache capacity: " + value + From fed65830b14f2afc4d1327688319d01f9e07bc5e Mon Sep 17 00:00:00 2001 From: zhenwu Date: Sat, 2 Nov 2019 11:49:31 +0800 Subject: [PATCH 137/149] add test params Former-commit-id: c50a44661a54e0269a6dc2dbe368b9d5b7d5a712 --- ci/jenkinsfile/dev_test.groovy | 4 ++-- ci/jenkinsfile/dev_test_all.groovy | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/jenkinsfile/dev_test.groovy b/ci/jenkinsfile/dev_test.groovy index f9df9b4065..89e64efea1 100644 --- a/ci/jenkinsfile/dev_test.groovy +++ b/ci/jenkinsfile/dev_test.groovy @@ -3,7 +3,7 @@ timeout(time: 30, unit: 'MINUTES') { dir ("${PROJECT_NAME}_test") { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) sh 'python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com' - sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local" + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local --internal" } // mysql database backend test load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy" @@ -19,7 +19,7 @@ timeout(time: 30, unit: 'MINUTES') { } } dir ("${PROJECT_NAME}_test") { - sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local" + sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local --internal" } } catch (exc) { echo 'Milvus Test Failed !' diff --git a/ci/jenkinsfile/dev_test_all.groovy b/ci/jenkinsfile/dev_test_all.groovy index b11ea755b9..0253090942 100644 --- a/ci/jenkinsfile/dev_test_all.groovy +++ b/ci/jenkinsfile/dev_test_all.groovy @@ -3,7 +3,7 @@ timeout(time: 60, unit: 'MINUTES') { dir ("${PROJECT_NAME}_test") { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) sh 'python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com' - sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local" + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local --internal" } // mysql database backend test @@ -20,7 +20,7 @@ timeout(time: 60, unit: 'MINUTES') { } } dir ("${PROJECT_NAME}_test") { - sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local" + sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local --internal" } } catch (exc) { echo 'Milvus Test Failed !' From 97a3d974d3085c64983641cbed1d8eedc6629a2e Mon Sep 17 00:00:00 2001 From: zhenwu Date: Sat, 2 Nov 2019 13:19:00 +0800 Subject: [PATCH 138/149] fix test groovy Former-commit-id: cbc94db875e7078902a6238160ad581d7679150f --- ci/jenkinsfile/dev_test.groovy | 4 ++-- ci/jenkinsfile/dev_test_all.groovy | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/jenkinsfile/dev_test.groovy b/ci/jenkinsfile/dev_test.groovy index 89e64efea1..9b265ac401 100644 --- a/ci/jenkinsfile/dev_test.groovy +++ b/ci/jenkinsfile/dev_test.groovy @@ -3,7 +3,7 @@ timeout(time: 30, unit: 'MINUTES') { dir ("${PROJECT_NAME}_test") { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) sh 'python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com' - sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local --internal" + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local --internal=true" } // mysql database backend test load "${env.WORKSPACE}/ci/jenkinsfile/cleanup_dev.groovy" @@ -19,7 +19,7 @@ timeout(time: 30, unit: 'MINUTES') { } } dir ("${PROJECT_NAME}_test") { - sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local --internal" + sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local --internal=true" } } catch (exc) { echo 'Milvus Test Failed !' diff --git a/ci/jenkinsfile/dev_test_all.groovy b/ci/jenkinsfile/dev_test_all.groovy index 0253090942..b82d995d8c 100644 --- a/ci/jenkinsfile/dev_test_all.groovy +++ b/ci/jenkinsfile/dev_test_all.groovy @@ -3,7 +3,7 @@ timeout(time: 60, unit: 'MINUTES') { dir ("${PROJECT_NAME}_test") { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) sh 'python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com' - sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local --internal" + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-1.svc.cluster.local --internal=true" } // mysql database backend test @@ -20,7 +20,7 @@ timeout(time: 60, unit: 'MINUTES') { } } dir ("${PROJECT_NAME}_test") { - sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local --internal" + sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --ip ${env.JOB_NAME}-${env.BUILD_NUMBER}-milvus-gpu-engine.milvus-2.svc.cluster.local --internal=true" } } catch (exc) { echo 'Milvus Test Failed !' From bf35fe2df3503af8a78ccef2a24cfec2d0009a0a Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Sat, 2 Nov 2019 16:28:34 +0800 Subject: [PATCH 139/149] #169 - IVF_FLAT search out of memory Former-commit-id: 0a231da138e4a3bbefb1ca0744bb121544834b65 --- CHANGELOG.md | 1 + core/src/db/engine/ExecutionEngineImpl.cpp | 29 +++++----------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93021475d9..bd6edf52f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug - \#134 - JFrog cache error - \#161 - Search IVFSQHybrid crash on gpu +- \#169 - IVF_FLAT search out of memory ## Feature - \#90 - The server start error messages could be improved to enhance user experience diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index e0e5c39de6..ee04191fef 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -309,30 +309,13 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { return Status::OK(); } #endif - - auto index = std::static_pointer_cast(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_)); - bool already_in_cache = (index != nullptr); - if (already_in_cache) { - index_ = index; - } else { - if (index_ == nullptr) { - ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to copy to gpu"; - return Status(DB_ERROR, "index is null"); - } - - try { - index_ = index_->CopyToGpu(device_id); - ENGINE_LOG_DEBUG << "CPU to GPU" << device_id; - } catch (std::exception& e) { - ENGINE_LOG_ERROR << e.what(); - return Status(DB_ERROR, e.what()); - } + try { + index_ = index_->CopyToGpu(device_id); + ENGINE_LOG_DEBUG << "CPU to GPU" << device_id; + } catch (std::exception& e) { + ENGINE_LOG_ERROR << e.what(); + return Status(DB_ERROR, e.what()); } - - if (!already_in_cache) { - GpuCache(device_id); - } - return Status::OK(); } From f49d23badd412ca7356e287dc7ed2f989c87db4a Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sat, 2 Nov 2019 18:43:54 +0800 Subject: [PATCH 140/149] #168 improve result reduce Former-commit-id: 21887800b9afe47b759a09c1f9fbed97b467cab1 --- CHANGELOG.md | 1 + core/src/db/DB.h | 7 +- core/src/db/DBImpl.cpp | 18 +-- core/src/db/DBImpl.h | 9 +- core/src/db/Types.h | 8 +- core/src/scheduler/job/SearchJob.cpp | 11 +- core/src/scheduler/job/SearchJob.h | 17 ++- core/src/scheduler/task/SearchTask.cpp | 116 +++++++++--------- core/src/scheduler/task/SearchTask.h | 5 +- core/src/server/grpc_impl/GrpcRequestTask.cpp | 26 ++-- core/unittest/db/test_db.cpp | 47 ++++--- core/unittest/db/test_db_mysql.cpp | 22 ++-- core/unittest/db/test_mem.cpp | 24 ++-- core/unittest/db/test_search.cpp | 41 ++++--- core/unittest/metrics/test_metrics.cpp | 5 +- 15 files changed, 198 insertions(+), 159 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd6edf52f6..419a123390 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#149 - Improve large query optimizer pass - \#156 - Not return error when search_resources and index_build_device set cpu - \#159 - Change the configuration name from 'use_gpu_threshold' to 'gpu_search_threshold' +- \#168 - Improve result reduce ## Task diff --git a/core/src/db/DB.h b/core/src/db/DB.h index a790fadb50..07fe30babd 100644 --- a/core/src/db/DB.h +++ b/core/src/db/DB.h @@ -67,15 +67,16 @@ class DB { virtual Status Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, - QueryResults& results) = 0; + ResultIds& result_ids, ResultDistances& result_distances) = 0; virtual Status Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, - const meta::DatesT& dates, QueryResults& results) = 0; + const meta::DatesT& dates, ResultIds& result_ids, ResultDistances& result_distances) = 0; virtual Status Query(const std::string& table_id, const std::vector& file_ids, uint64_t k, uint64_t nq, - uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0; + uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids, + ResultDistances& result_distances) = 0; virtual Status Size(uint64_t& result) = 0; diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 6995de3d14..fc31846bd3 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -336,20 +336,20 @@ DBImpl::DropIndex(const std::string& table_id) { Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, - QueryResults& results) { + ResultIds& result_ids, ResultDistances& result_distances) { if (shutting_down_.load(std::memory_order_acquire)) { return Status(DB_ERROR, "Milsvus server is shutdown!"); } meta::DatesT dates = {utils::GetDate()}; - Status result = Query(table_id, k, nq, nprobe, vectors, dates, results); + Status result = Query(table_id, k, nq, nprobe, vectors, dates, result_ids, result_distances); return result; } Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, - const meta::DatesT& dates, QueryResults& results) { + const meta::DatesT& dates, ResultIds& result_ids, ResultDistances& result_distances) { if (shutting_down_.load(std::memory_order_acquire)) { return Status(DB_ERROR, "Milsvus server is shutdown!"); } @@ -372,14 +372,15 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr } cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query - status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results); + status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, result_ids, result_distances); cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query return status; } Status DBImpl::Query(const std::string& table_id, const std::vector& file_ids, uint64_t k, uint64_t nq, - uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) { + uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids, + ResultDistances& result_distances) { if (shutting_down_.load(std::memory_order_acquire)) { return Status(DB_ERROR, "Milsvus server is shutdown!"); } @@ -413,7 +414,7 @@ DBImpl::Query(const std::string& table_id, const std::vector& file_ } cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info before query - status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results); + status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, result_ids, result_distances); cache::CpuCacheMgr::GetInstance()->PrintInfo(); // print cache info after query return status; } @@ -432,7 +433,7 @@ DBImpl::Size(uint64_t& result) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, - uint64_t nprobe, const float* vectors, QueryResults& results) { + uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) { server::CollectQueryMetrics metrics(nq); TimeRecorder rc(""); @@ -453,7 +454,8 @@ DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& fi } // step 3: construct results - results = job->GetResult(); + result_ids = job->GetResultIds(); + result_distances = job->GetResultDistances(); rc.ElapseFromBegin("Engine query totally cost"); return Status::OK(); diff --git a/core/src/db/DBImpl.h b/core/src/db/DBImpl.h index e1e030cc32..ad9c574bb1 100644 --- a/core/src/db/DBImpl.h +++ b/core/src/db/DBImpl.h @@ -91,15 +91,16 @@ class DBImpl : public DB { Status Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, - QueryResults& results) override; + ResultIds& result_ids, ResultDistances& result_distances) override; Status Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, - const meta::DatesT& dates, QueryResults& results) override; + const meta::DatesT& dates, ResultIds& result_ids, ResultDistances& result_distances) override; Status Query(const std::string& table_id, const std::vector& file_ids, uint64_t k, uint64_t nq, - uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) override; + uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids, + ResultDistances& result_distances) override; Status Size(uint64_t& result) override; @@ -107,7 +108,7 @@ class DBImpl : public DB { private: Status QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, - uint64_t nprobe, const float* vectors, QueryResults& results); + uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances); void BackgroundTimerTask(); diff --git a/core/src/db/Types.h b/core/src/db/Types.h index 94528a9a8a..cc2eab0383 100644 --- a/core/src/db/Types.h +++ b/core/src/db/Types.h @@ -19,6 +19,7 @@ #include "db/engine/ExecutionEngine.h" +#include #include #include #include @@ -26,12 +27,13 @@ namespace milvus { namespace engine { -typedef int64_t IDNumber; +using IDNumber = faiss::Index::idx_t; + typedef IDNumber* IDNumberPtr; typedef std::vector IDNumbers; -typedef std::vector> QueryResult; -typedef std::vector QueryResults; +typedef std::vector ResultIds; +typedef std::vector ResultDistances; struct TableIndex { int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; diff --git a/core/src/scheduler/job/SearchJob.cpp b/core/src/scheduler/job/SearchJob.cpp index 47c825c122..ee41c1ae06 100644 --- a/core/src/scheduler/job/SearchJob.cpp +++ b/core/src/scheduler/job/SearchJob.cpp @@ -53,9 +53,14 @@ SearchJob::SearchDone(size_t index_id) { SERVER_LOG_DEBUG << "SearchJob " << id() << " finish index file: " << index_id; } -ResultSet& -SearchJob::GetResult() { - return result_; +ResultIds& +SearchJob::GetResultIds() { + return result_ids_; +} + +ResultDistances& +SearchJob::GetResultDistances() { + return result_distances_; } Status& diff --git a/core/src/scheduler/job/SearchJob.h b/core/src/scheduler/job/SearchJob.h index 90fcf36773..ff5ab34131 100644 --- a/core/src/scheduler/job/SearchJob.h +++ b/core/src/scheduler/job/SearchJob.h @@ -29,6 +29,7 @@ #include #include "Job.h" +#include "db/Types.h" #include "db/meta/MetaTypes.h" namespace milvus { @@ -37,9 +38,9 @@ namespace scheduler { using engine::meta::TableFileSchemaPtr; using Id2IndexMap = std::unordered_map; -using IdDistPair = std::pair; -using Id2DistVec = std::vector; -using ResultSet = std::vector; + +using ResultIds = engine::ResultIds; +using ResultDistances = engine::ResultDistances; class SearchJob : public Job { public: @@ -55,8 +56,11 @@ class SearchJob : public Job { void SearchDone(size_t index_id); - ResultSet& - GetResult(); + ResultIds& + GetResultIds(); + + ResultDistances& + GetResultDistances(); Status& GetStatus(); @@ -104,7 +108,8 @@ class SearchJob : public Job { Id2IndexMap index_files_; // TODO: column-base better ? - ResultSet result_; + ResultIds result_ids_; + ResultDistances result_distances_; Status status_; std::mutex mutex_; diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index edeb41bdbe..d6ee3c5bd5 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -222,7 +222,7 @@ XSearchTask::Execute() { { std::unique_lock lock(search_job->mutex()); XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, metric_l2, - search_job->GetResult()); + search_job->GetResultIds(), search_job->GetResultDistances()); } span = rc.RecordSection(hdr + ", reduce topk"); @@ -243,71 +243,75 @@ XSearchTask::Execute() { } void -XSearchTask::MergeTopkToResultSet(const std::vector& input_ids, const std::vector& input_distance, - uint64_t input_k, uint64_t nq, uint64_t topk, bool ascending, - scheduler::ResultSet& result) { - if (result.empty()) { - result.resize(nq); +XSearchTask::MergeTopkToResultSet(const std::vector& src_ids, const std::vector& src_distances, + size_t src_k, size_t nq, size_t topk, bool ascending, scheduler::ResultIds& tar_ids, + scheduler::ResultDistances& tar_distances) { + if (src_ids.empty()) { + return; } + if (tar_ids.empty()) { + tar_ids = src_ids; + tar_distances = src_distances; + return; + } + + size_t tar_k = tar_ids.size() / nq; + size_t buf_k = std::min(topk, src_k + tar_k); + + scheduler::ResultIds buf_ids(nq * buf_k, -1); + scheduler::ResultDistances buf_distances(nq * buf_k, 0.0); + for (uint64_t i = 0; i < nq; i++) { - scheduler::Id2DistVec result_buf; - auto& result_i = result[i]; + size_t buf_k_j = 0, src_k_j = 0, tar_k_j = 0; + size_t buf_idx, src_idx, tar_idx; - if (result[i].empty()) { - result_buf.resize(input_k, scheduler::IdDistPair(-1, 0.0)); - uint64_t input_k_multi_i = topk * i; - for (auto k = 0; k < input_k; ++k) { - uint64_t idx = input_k_multi_i + k; - auto& result_buf_item = result_buf[k]; - result_buf_item.first = input_ids[idx]; - result_buf_item.second = input_distance[idx]; + size_t buf_k_multi_i = buf_k * i; + size_t src_k_multi_i = topk * i; + size_t tar_k_multi_i = tar_k * i; + + while (buf_k_j < buf_k && src_k_j < src_k && tar_k_j < tar_k) { + src_idx = src_k_multi_i + src_k_j; + tar_idx = tar_k_multi_i + tar_k_j; + buf_idx = buf_k_multi_i + buf_k_j; + + if ((ascending && src_distances[src_idx] < tar_distances[tar_idx]) || + (!ascending && src_distances[src_idx] > tar_distances[tar_idx])) { + buf_ids[buf_idx] = src_ids[src_idx]; + buf_distances[buf_idx] = src_distances[src_idx]; + src_k_j++; + } else { + buf_ids[buf_idx] = tar_ids[tar_idx]; + buf_distances[buf_idx] = tar_distances[tar_idx]; + tar_k_j++; } - } else { - size_t tar_size = result_i.size(); - uint64_t output_k = std::min(topk, input_k + tar_size); - result_buf.resize(output_k, scheduler::IdDistPair(-1, 0.0)); - size_t buf_k = 0, src_k = 0, tar_k = 0; - uint64_t src_idx; - uint64_t input_k_multi_i = topk * i; - while (buf_k < output_k && src_k < input_k && tar_k < tar_size) { - src_idx = input_k_multi_i + src_k; - auto& result_buf_item = result_buf[buf_k]; - auto& result_item = result_i[tar_k]; - if ((ascending && input_distance[src_idx] < result_item.second) || - (!ascending && input_distance[src_idx] > result_item.second)) { - result_buf_item.first = input_ids[src_idx]; - result_buf_item.second = input_distance[src_idx]; - src_k++; - } else { - result_buf_item = result_item; - tar_k++; + buf_k_j++; + } + + if (buf_k_j < buf_k) { + if (src_k_j < src_k) { + while (buf_k_j < buf_k && src_k_j < src_k) { + buf_idx = buf_k_multi_i + buf_k_j; + src_idx = src_k_multi_i + src_k_j; + buf_ids[buf_idx] = src_ids[src_idx]; + buf_distances[buf_idx] = src_distances[src_idx]; + src_k_j++; + buf_k_j++; } - buf_k++; - } - - if (buf_k < output_k) { - if (src_k < input_k) { - while (buf_k < output_k && src_k < input_k) { - src_idx = input_k_multi_i + src_k; - auto& result_buf_item = result_buf[buf_k]; - result_buf_item.first = input_ids[src_idx]; - result_buf_item.second = input_distance[src_idx]; - src_k++; - buf_k++; - } - } else { - while (buf_k < output_k && tar_k < tar_size) { - result_buf[buf_k] = result_i[tar_k]; - tar_k++; - buf_k++; - } + } else { + while (buf_k_j < buf_k && tar_k_j < tar_k) { + buf_idx = buf_k_multi_i + buf_k_j; + tar_idx = tar_k_multi_i + tar_k_j; + buf_ids[buf_idx] = tar_ids[tar_idx]; + buf_distances[buf_idx] = tar_distances[tar_idx]; + tar_k_j++; + buf_k_j++; } } } - - result_i.swap(result_buf); } + tar_ids.swap(buf_ids); + tar_distances.swap(buf_distances); } // void diff --git a/core/src/scheduler/task/SearchTask.h b/core/src/scheduler/task/SearchTask.h index bbc8b5bd8f..14dbf15605 100644 --- a/core/src/scheduler/task/SearchTask.h +++ b/core/src/scheduler/task/SearchTask.h @@ -39,8 +39,9 @@ class XSearchTask : public Task { public: static void - MergeTopkToResultSet(const std::vector& input_ids, const std::vector& input_distance, - uint64_t input_k, uint64_t nq, uint64_t topk, bool ascending, scheduler::ResultSet& result); + MergeTopkToResultSet(const std::vector& src_ids, const std::vector& src_distances, uint64_t src_k, + uint64_t nq, uint64_t topk, bool ascending, scheduler::ResultIds& tar_ids, + scheduler::ResultDistances& tar_distances); // static void // MergeTopkArray(std::vector& tar_ids, std::vector& tar_distance, uint64_t& tar_input_k, diff --git a/core/src/server/grpc_impl/GrpcRequestTask.cpp b/core/src/server/grpc_impl/GrpcRequestTask.cpp index be1fca0186..77f262bda6 100644 --- a/core/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/core/src/server/grpc_impl/GrpcRequestTask.cpp @@ -637,7 +637,8 @@ SearchTask::OnExecute() { rc.RecordSection("prepare vector data"); // step 6: search vectors - engine::QueryResults results; + engine::ResultIds result_ids; + engine::ResultDistances result_distances; auto record_count = (uint64_t)search_param_->query_record_array().size(); #ifdef MILVUS_ENABLE_PROFILING @@ -647,11 +648,11 @@ SearchTask::OnExecute() { #endif if (file_id_array_.empty()) { - status = - DBWrapper::DB()->Query(table_name_, (size_t)top_k, record_count, nprobe, vec_f.data(), dates, results); + status = DBWrapper::DB()->Query(table_name_, (size_t)top_k, record_count, nprobe, vec_f.data(), dates, + result_ids, result_distances); } else { status = DBWrapper::DB()->Query(table_name_, file_id_array_, (size_t)top_k, record_count, nprobe, - vec_f.data(), dates, results); + vec_f.data(), dates, result_ids, result_distances); } #ifdef MILVUS_ENABLE_PROFILING @@ -663,23 +664,20 @@ SearchTask::OnExecute() { return status; } - if (results.empty()) { + if (result_ids.empty()) { return Status::OK(); // empty table } - if (results.size() != record_count) { - std::string msg = "Search " + std::to_string(record_count) + " vectors but only return " + - std::to_string(results.size()) + " results"; - return Status(SERVER_ILLEGAL_SEARCH_RESULT, msg); - } + size_t result_k = result_ids.size() / record_count; // step 7: construct result array - for (auto& result : results) { + for (size_t i = 0; i < record_count; i++) { ::milvus::grpc::TopKQueryResult* topk_query_result = topk_result_list->add_topk_query_result(); - for (auto& pair : result) { + for (size_t j = 0; j < result_k; j++) { ::milvus::grpc::QueryResult* grpc_result = topk_query_result->add_query_result_arrays(); - grpc_result->set_id(pair.first); - grpc_result->set_distance(pair.second); + size_t idx = i * result_k + j; + grpc_result->set_id(result_ids[idx]); + grpc_result->set_distance(result_distances[idx]); } } diff --git a/core/unittest/db/test_db.cpp b/core/unittest/db/test_db.cpp index b869d17388..f9e8da9c0f 100644 --- a/core/unittest/db/test_db.cpp +++ b/core/unittest/db/test_db.cpp @@ -175,7 +175,8 @@ TEST_F(DBTest, DB_TEST) { BuildVectors(qb, qxb); std::thread search([&]() { - milvus::engine::QueryResults results; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; int k = 10; std::this_thread::sleep_for(std::chrono::seconds(2)); @@ -190,17 +191,17 @@ TEST_F(DBTest, DB_TEST) { prev_count = count; START_TIMER; - stat = db_->Query(TABLE_NAME, k, qb, 10, qxb.data(), results); + stat = db_->Query(TABLE_NAME, k, qb, 10, qxb.data(), result_ids, result_distances); ss << "Search " << j << " With Size " << count / milvus::engine::M << " M"; STOP_TIMER(ss.str()); ASSERT_TRUE(stat.ok()); - for (auto k = 0; k < qb; ++k) { - ASSERT_EQ(results[k][0].first, target_ids[k]); + for (auto i = 0; i < qb; ++i) { + ASSERT_EQ(result_ids[i*k], target_ids[i]); ss.str(""); - ss << "Result [" << k << "]:"; - for (auto result : results[k]) { - ss << result.first << " "; + ss << "Result [" << i << "]:"; + for (auto t = 0; t < k; t++) { + ss << result_ids[i * k + t] << " "; } /* LOG(DEBUG) << ss.str(); */ } @@ -284,16 +285,18 @@ TEST_F(DBTest, SEARCH_TEST) { db_->CreateIndex(TABLE_NAME, index); // wait until build index finish { - milvus::engine::QueryResults results; - stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), results); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), result_ids, result_distances); ASSERT_TRUE(stat.ok()); } {//search by specify index file milvus::engine::meta::DatesT dates; std::vector file_ids = {"1", "2", "3", "4", "5", "6"}; - milvus::engine::QueryResults results; - stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, results); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, result_ids, result_distances); ASSERT_TRUE(stat.ok()); } @@ -303,22 +306,25 @@ TEST_F(DBTest, SEARCH_TEST) { db_->CreateIndex(TABLE_NAME, index); // wait until build index finish { - milvus::engine::QueryResults results; - stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), results); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), result_ids, result_distances); ASSERT_TRUE(stat.ok()); } { - milvus::engine::QueryResults large_nq_results; - stat = db_->Query(TABLE_NAME, k, 200, 10, xq.data(), large_nq_results); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(TABLE_NAME, k, 200, 10, xq.data(), result_ids, result_distances); ASSERT_TRUE(stat.ok()); } {//search by specify index file milvus::engine::meta::DatesT dates; std::vector file_ids = {"1", "2", "3", "4", "5", "6"}; - milvus::engine::QueryResults results; - stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, results); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, result_ids, result_distances); ASSERT_TRUE(stat.ok()); } @@ -391,11 +397,12 @@ TEST_F(DBTest, SHUTDOWN_TEST) { ASSERT_FALSE(stat.ok()); milvus::engine::meta::DatesT dates; - milvus::engine::QueryResults results; - stat = db_->Query(table_info.table_id_, 1, 1, 1, nullptr, dates, results); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(table_info.table_id_, 1, 1, 1, nullptr, dates, result_ids, result_distances); ASSERT_FALSE(stat.ok()); std::vector file_ids; - stat = db_->Query(table_info.table_id_, file_ids, 1, 1, 1, nullptr, dates, results); + stat = db_->Query(table_info.table_id_, file_ids, 1, 1, 1, nullptr, dates, result_ids, result_distances); ASSERT_FALSE(stat.ok()); stat = db_->DeleteTable(table_info.table_id_, dates); diff --git a/core/unittest/db/test_db_mysql.cpp b/core/unittest/db/test_db_mysql.cpp index ae1da8012a..30a616e662 100644 --- a/core/unittest/db/test_db_mysql.cpp +++ b/core/unittest/db/test_db_mysql.cpp @@ -81,7 +81,8 @@ TEST_F(MySqlDBTest, DB_TEST) { ASSERT_EQ(target_ids.size(), qb); std::thread search([&]() { - milvus::engine::QueryResults results; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; int k = 10; std::this_thread::sleep_for(std::chrono::seconds(5)); @@ -96,25 +97,25 @@ TEST_F(MySqlDBTest, DB_TEST) { prev_count = count; START_TIMER; - stat = db_->Query(TABLE_NAME, k, qb, 10, qxb.data(), results); + stat = db_->Query(TABLE_NAME, k, qb, 10, qxb.data(), result_ids, result_distances); ss << "Search " << j << " With Size " << count / milvus::engine::M << " M"; STOP_TIMER(ss.str()); ASSERT_TRUE(stat.ok()); - for (auto k = 0; k < qb; ++k) { + for (auto i = 0; i < qb; ++i) { // std::cout << results[k][0].first << " " << target_ids[k] << std::endl; // ASSERT_EQ(results[k][0].first, target_ids[k]); bool exists = false; - for (auto &result : results[k]) { - if (result.first == target_ids[k]) { + for (auto t = 0; t < k; t++) { + if (result_ids[i * k + t] == target_ids[i]) { exists = true; } } ASSERT_TRUE(exists); ss.str(""); - ss << "Result [" << k << "]:"; - for (auto result : results[k]) { - ss << result.first << " "; + ss << "Result [" << i << "]:"; + for (auto t = 0; t < k; t++) { + ss << result_ids[i * k + t] << " "; } /* LOG(DEBUG) << ss.str(); */ } @@ -188,8 +189,9 @@ TEST_F(MySqlDBTest, SEARCH_TEST) { sleep(2); // wait until build index finish - milvus::engine::QueryResults results; - stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), results); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), result_ids, result_distances); ASSERT_TRUE(stat.ok()); } diff --git a/core/unittest/db/test_mem.cpp b/core/unittest/db/test_mem.cpp index e05811ff9e..939e61246c 100644 --- a/core/unittest/db/test_mem.cpp +++ b/core/unittest/db/test_mem.cpp @@ -259,10 +259,11 @@ TEST_F(MemManagerTest2, SERIAL_INSERT_SEARCH_TEST) { int topk = 10, nprobe = 10; for (auto& pair : search_vectors) { auto& search = pair.second; - milvus::engine::QueryResults results; - stat = db_->Query(GetTableName(), topk, 1, nprobe, search.data(), results); - ASSERT_EQ(results[0][0].first, pair.first); - ASSERT_LT(results[0][0].second, 1e-4); + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(GetTableName(), topk, 1, nprobe, search.data(), result_ids, result_distances); + ASSERT_EQ(result_ids[0], pair.first); + ASSERT_LT(result_distances[0], 1e-4); } } @@ -314,7 +315,8 @@ TEST_F(MemManagerTest2, CONCURRENT_INSERT_SEARCH_TEST) { BuildVectors(qb, qxb); std::thread search([&]() { - milvus::engine::QueryResults results; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; int k = 10; std::this_thread::sleep_for(std::chrono::seconds(2)); @@ -329,17 +331,17 @@ TEST_F(MemManagerTest2, CONCURRENT_INSERT_SEARCH_TEST) { prev_count = count; START_TIMER; - stat = db_->Query(GetTableName(), k, qb, 10, qxb.data(), results); + stat = db_->Query(GetTableName(), k, qb, 10, qxb.data(), result_ids, result_distances); ss << "Search " << j << " With Size " << count / milvus::engine::M << " M"; STOP_TIMER(ss.str()); ASSERT_TRUE(stat.ok()); - for (auto k = 0; k < qb; ++k) { - ASSERT_EQ(results[k][0].first, target_ids[k]); + for (auto i = 0; i < qb; ++i) { + ASSERT_EQ(result_ids[i * k], target_ids[i]); ss.str(""); - ss << "Result [" << k << "]:"; - for (auto result : results[k]) { - ss << result.first << " "; + ss << "Result [" << i << "]:"; + for (auto t = 0; t < k; t++) { + ss << result_ids[i * k + t] << " "; } /* LOG(DEBUG) << ss.str(); */ } diff --git a/core/unittest/db/test_search.cpp b/core/unittest/db/test_search.cpp index b8cf08b3e2..adf25e581c 100644 --- a/core/unittest/db/test_search.cpp +++ b/core/unittest/db/test_search.cpp @@ -85,8 +85,10 @@ CheckTopkResult(const std::vector& input_ids_1, uint64_t topk, uint64_t nq, bool ascending, - const milvus::scheduler::ResultSet& result) { - ASSERT_EQ(result.size(), nq); + const ms::ResultIds& result_ids, + const ms::ResultDistances& result_distances) { + ASSERT_EQ(result_ids.size(), nq * topk); + ASSERT_EQ(result_distances.size(), nq * topk); ASSERT_EQ(input_ids_1.size(), input_distance_1.size()); ASSERT_EQ(input_ids_2.size(), input_distance_2.size()); @@ -111,15 +113,16 @@ CheckTopkResult(const std::vector& input_ids_1, ++iter; } - uint64_t n = std::min(topk, result[i].size()); + uint64_t n = std::min(topk, result_ids.size() / nq); for (uint64_t j = 0; j < n; j++) { - if (result[i][j].first < 0) { + uint64_t idx = i * n + j; + if (result_ids[idx] < 0) { continue; } - if (src_vec[j] != result[i][j].second) { - std::cout << src_vec[j] << " " << result[i][j].second << std::endl; + if (src_vec[j] != result_distances[idx]) { + std::cout << src_vec[j] << " " << result_distances[idx] << std::endl; } - ASSERT_TRUE(src_vec[j] == result[i][j].second); + ASSERT_TRUE(src_vec[j] == result_distances[idx]); } } } @@ -130,12 +133,13 @@ void MergeTopkToResultSetTest(uint64_t topk_1, uint64_t topk_2, uint64_t nq, uint64_t topk, bool ascending) { std::vector ids1, ids2; std::vector dist1, dist2; - ms::ResultSet result; + ms::ResultIds result_ids; + ms::ResultDistances result_distances; BuildResult(ids1, dist1, topk_1, topk, nq, ascending); BuildResult(ids2, dist2, topk_2, topk, nq, ascending); - ms::XSearchTask::MergeTopkToResultSet(ids1, dist1, topk_1, nq, topk, ascending, result); - ms::XSearchTask::MergeTopkToResultSet(ids2, dist2, topk_2, nq, topk, ascending, result); - CheckTopkResult(ids1, dist1, ids2, dist2, topk, nq, ascending, result); + ms::XSearchTask::MergeTopkToResultSet(ids1, dist1, topk_1, nq, topk, ascending, result_ids, result_distances); + ms::XSearchTask::MergeTopkToResultSet(ids2, dist2, topk_2, nq, topk, ascending, result_ids, result_distances); + CheckTopkResult(ids1, dist1, ids2, dist2, topk, nq, ascending, result_ids, result_distances); } TEST(DBSearchTest, MERGE_RESULT_SET_TEST) { @@ -222,9 +226,9 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { int32_t index_file_num = 478; /* sift1B dataset, index files num */ bool ascending = true; - std::vector thread_vec = {4, 8}; - std::vector nq_vec = {1, 10, 100}; - std::vector topk_vec = {1, 4, 16, 64}; + std::vector thread_vec = {4}; + std::vector nq_vec = {1000}; + std::vector topk_vec = {64}; int32_t NQ = nq_vec[nq_vec.size() - 1]; int32_t TOPK = topk_vec[topk_vec.size() - 1]; @@ -247,7 +251,8 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { for (int32_t nq : nq_vec) { for (int32_t top_k : topk_vec) { - ms::ResultSet final_result, final_result_2, final_result_3; + ms::ResultIds final_result_ids, final_result_ids_2, final_result_ids_3; + ms::ResultDistances final_result_distances, final_result_distances_2, final_result_distances_3; std::vector> id_vec_1(index_file_num); std::vector> dist_vec_1(index_file_num); @@ -268,8 +273,10 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { nq, top_k, ascending, - final_result); - ASSERT_EQ(final_result.size(), nq); + final_result_ids, + final_result_distances); + ASSERT_EQ(final_result_ids.size(), nq * top_k); + ASSERT_EQ(final_result_distances.size(), nq * top_k); } rc1.RecordSection("reduce done"); diff --git a/core/unittest/metrics/test_metrics.cpp b/core/unittest/metrics/test_metrics.cpp index c0d1044bb4..1b26ad097b 100644 --- a/core/unittest/metrics/test_metrics.cpp +++ b/core/unittest/metrics/test_metrics.cpp @@ -75,7 +75,8 @@ TEST_F(MetricTest, METRIC_TEST) { } std::thread search([&]() { - milvus::engine::QueryResults results; +// milvus::engine::ResultIds result_ids; +// milvus::engine::ResultDistances result_distances; int k = 10; std::this_thread::sleep_for(std::chrono::seconds(2)); @@ -90,7 +91,7 @@ TEST_F(MetricTest, METRIC_TEST) { prev_count = count; START_TIMER; -// stat = db_->Query(group_name, k, qb, qxb, results); +// stat = db_->Query(group_name, k, qb, qxb, result_ids, result_distances); ss << "Search " << j << " With Size " << (float) (count * group_dim * sizeof(float)) / (1024 * 1024) << " M"; From 5150647eeb283d9238633a3bd1bcd4f82f504c9d Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sat, 2 Nov 2019 20:43:14 +0800 Subject: [PATCH 141/149] #168 fix reduce bug Former-commit-id: 5c6f2e25cbf9efe4a58bf89757fc4983ebda4f35 --- core/src/scheduler/task/SearchTask.cpp | 8 +- core/src/scheduler/task/SearchTask.h | 4 +- core/unittest/db/test_search.cpp | 143 +++++++++++++------------ 3 files changed, 77 insertions(+), 78 deletions(-) diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index d6ee3c5bd5..08bc6525aa 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -243,19 +243,13 @@ XSearchTask::Execute() { } void -XSearchTask::MergeTopkToResultSet(const std::vector& src_ids, const std::vector& src_distances, +XSearchTask::MergeTopkToResultSet(const scheduler::ResultIds& src_ids, const scheduler::ResultDistances& src_distances, size_t src_k, size_t nq, size_t topk, bool ascending, scheduler::ResultIds& tar_ids, scheduler::ResultDistances& tar_distances) { if (src_ids.empty()) { return; } - if (tar_ids.empty()) { - tar_ids = src_ids; - tar_distances = src_distances; - return; - } - size_t tar_k = tar_ids.size() / nq; size_t buf_k = std::min(topk, src_k + tar_k); diff --git a/core/src/scheduler/task/SearchTask.h b/core/src/scheduler/task/SearchTask.h index 14dbf15605..bd51137341 100644 --- a/core/src/scheduler/task/SearchTask.h +++ b/core/src/scheduler/task/SearchTask.h @@ -39,8 +39,8 @@ class XSearchTask : public Task { public: static void - MergeTopkToResultSet(const std::vector& src_ids, const std::vector& src_distances, uint64_t src_k, - uint64_t nq, uint64_t topk, bool ascending, scheduler::ResultIds& tar_ids, + MergeTopkToResultSet(const scheduler::ResultIds& src_ids, const scheduler::ResultDistances& src_distances, + size_t src_k, size_t nq, size_t topk, bool ascending, scheduler::ResultIds& tar_ids, scheduler::ResultDistances& tar_distances); // static void diff --git a/core/unittest/db/test_search.cpp b/core/unittest/db/test_search.cpp index adf25e581c..1d1d9a677a 100644 --- a/core/unittest/db/test_search.cpp +++ b/core/unittest/db/test_search.cpp @@ -19,6 +19,7 @@ #include #include +#include "scheduler/job/SearchJob.h" #include "scheduler/task/SearchTask.h" #include "utils/TimeRecorder.h" #include "utils/ThreadPool.h" @@ -28,76 +29,80 @@ namespace { namespace ms = milvus::scheduler; void -BuildResult(std::vector& output_ids, - std::vector& output_distance, - uint64_t input_k, - uint64_t topk, - uint64_t nq, +BuildResult(ms::ResultIds& output_ids, + ms::ResultDistances & output_distances, + size_t input_k, + size_t topk, + size_t nq, bool ascending) { output_ids.clear(); output_ids.resize(nq * topk); - output_distance.clear(); - output_distance.resize(nq * topk); + output_distances.clear(); + output_distances.resize(nq * topk); - for (uint64_t i = 0; i < nq; i++) { + for (size_t i = 0; i < nq; i++) { //insert valid items - for (uint64_t j = 0; j < input_k; j++) { + for (size_t j = 0; j < input_k; j++) { output_ids[i * topk + j] = (int64_t)(drand48() * 100000); - output_distance[i * topk + j] = ascending ? (j + drand48()) : ((input_k - j) + drand48()); + output_distances[i * topk + j] = ascending ? (j + drand48()) : ((input_k - j) + drand48()); } //insert invalid items - for (uint64_t j = input_k; j < topk; j++) { + for (size_t j = input_k; j < topk; j++) { output_ids[i * topk + j] = -1; - output_distance[i * topk + j] = -1.0; + output_distances[i * topk + j] = -1.0; } } } void -CopyResult(std::vector& output_ids, - std::vector& output_distance, - uint64_t output_topk, - std::vector& input_ids, - std::vector& input_distance, - uint64_t input_topk, - uint64_t nq) { +CopyResult(ms::ResultIds& output_ids, + ms::ResultDistances& output_distances, + size_t output_topk, + ms::ResultIds& input_ids, + ms::ResultDistances& input_distances, + size_t input_topk, + size_t nq) { ASSERT_TRUE(input_ids.size() >= nq * input_topk); - ASSERT_TRUE(input_distance.size() >= nq * input_topk); + ASSERT_TRUE(input_distances.size() >= nq * input_topk); ASSERT_TRUE(output_topk <= input_topk); output_ids.clear(); output_ids.resize(nq * output_topk); - output_distance.clear(); - output_distance.resize(nq * output_topk); + output_distances.clear(); + output_distances.resize(nq * output_topk); - for (uint64_t i = 0; i < nq; i++) { - for (uint64_t j = 0; j < output_topk; j++) { + for (size_t i = 0; i < nq; i++) { + for (size_t j = 0; j < output_topk; j++) { output_ids[i * output_topk + j] = input_ids[i * input_topk + j]; - output_distance[i * output_topk + j] = input_distance[i * input_topk + j]; + output_distances[i * output_topk + j] = input_distances[i * input_topk + j]; } } } void -CheckTopkResult(const std::vector& input_ids_1, - const std::vector& input_distance_1, - const std::vector& input_ids_2, - const std::vector& input_distance_2, - uint64_t topk, - uint64_t nq, +CheckTopkResult(const ms::ResultIds& input_ids_1, + const ms::ResultDistances& input_distances_1, + size_t input_k_1, + const ms::ResultIds& input_ids_2, + const ms::ResultDistances& input_distances_2, + size_t input_k_2, + size_t topk, + size_t nq, bool ascending, const ms::ResultIds& result_ids, const ms::ResultDistances& result_distances) { - ASSERT_EQ(result_ids.size(), nq * topk); - ASSERT_EQ(result_distances.size(), nq * topk); - ASSERT_EQ(input_ids_1.size(), input_distance_1.size()); - ASSERT_EQ(input_ids_2.size(), input_distance_2.size()); + ASSERT_EQ(result_ids.size(), result_distances.size()); + ASSERT_EQ(input_ids_1.size(), input_distances_1.size()); + ASSERT_EQ(input_ids_2.size(), input_distances_2.size()); - for (int64_t i = 0; i < nq; i++) { + size_t result_k = result_distances.size() / nq; + ASSERT_EQ(result_k, std::min(topk, input_k_1 + input_k_2)); + + for (size_t i = 0; i < nq; i++) { std::vector - src_vec(input_distance_1.begin() + i * topk, input_distance_1.begin() + (i + 1) * topk); + src_vec(input_distances_1.begin() + i * topk, input_distances_1.begin() + (i + 1) * topk); src_vec.insert(src_vec.end(), - input_distance_2.begin() + i * topk, - input_distance_2.begin() + (i + 1) * topk); + input_distances_2.begin() + i * topk, + input_distances_2.begin() + (i + 1) * topk); if (ascending) { std::sort(src_vec.begin(), src_vec.end()); } else { @@ -113,9 +118,9 @@ CheckTopkResult(const std::vector& input_ids_1, ++iter; } - uint64_t n = std::min(topk, result_ids.size() / nq); - for (uint64_t j = 0; j < n; j++) { - uint64_t idx = i * n + j; + size_t n = std::min(topk, result_ids.size() / nq); + for (size_t j = 0; j < n; j++) { + size_t idx = i * n + j; if (result_ids[idx] < 0) { continue; } @@ -130,21 +135,21 @@ CheckTopkResult(const std::vector& input_ids_1, } // namespace void -MergeTopkToResultSetTest(uint64_t topk_1, uint64_t topk_2, uint64_t nq, uint64_t topk, bool ascending) { - std::vector ids1, ids2; - std::vector dist1, dist2; +MergeTopkToResultSetTest(size_t topk_1, size_t topk_2, size_t nq, size_t topk, bool ascending) { + ms::ResultIds ids1, ids2; + ms::ResultDistances dist1, dist2; ms::ResultIds result_ids; ms::ResultDistances result_distances; BuildResult(ids1, dist1, topk_1, topk, nq, ascending); BuildResult(ids2, dist2, topk_2, topk, nq, ascending); ms::XSearchTask::MergeTopkToResultSet(ids1, dist1, topk_1, nq, topk, ascending, result_ids, result_distances); ms::XSearchTask::MergeTopkToResultSet(ids2, dist2, topk_2, nq, topk, ascending, result_ids, result_distances); - CheckTopkResult(ids1, dist1, ids2, dist2, topk, nq, ascending, result_ids, result_distances); + CheckTopkResult(ids1, dist1, topk_1, ids2, dist2, topk_2, topk, nq, ascending, result_ids, result_distances); } TEST(DBSearchTest, MERGE_RESULT_SET_TEST) { - uint64_t NQ = 15; - uint64_t TOP_K = 64; + size_t NQ = 15; + size_t TOP_K = 64; /* test1, id1/dist1 valid, id2/dist2 empty */ MergeTopkToResultSetTest(TOP_K, 0, NQ, TOP_K, true); @@ -163,21 +168,21 @@ TEST(DBSearchTest, MERGE_RESULT_SET_TEST) { MergeTopkToResultSetTest(TOP_K / 2, TOP_K / 3, NQ, TOP_K, false); } -//void MergeTopkArrayTest(uint64_t topk_1, uint64_t topk_2, uint64_t nq, uint64_t topk, bool ascending) { +//void MergeTopkArrayTest(size_t topk_1, size_t topk_2, size_t nq, size_t topk, bool ascending) { // std::vector ids1, ids2; // std::vector dist1, dist2; // ms::ResultSet result; // BuildResult(ids1, dist1, topk_1, topk, nq, ascending); // BuildResult(ids2, dist2, topk_2, topk, nq, ascending); -// uint64_t result_topk = std::min(topk, topk_1 + topk_2); +// size_t result_topk = std::min(topk, topk_1 + topk_2); // ms::XSearchTask::MergeTopkArray(ids1, dist1, topk_1, ids2, dist2, topk_2, nq, topk, ascending); // if (ids1.size() != result_topk * nq) { // std::cout << ids1.size() << " " << result_topk * nq << std::endl; // } // ASSERT_TRUE(ids1.size() == result_topk * nq); // ASSERT_TRUE(dist1.size() == result_topk * nq); -// for (uint64_t i = 0; i < nq; i++) { -// for (uint64_t k = 1; k < result_topk; k++) { +// for (size_t i = 0; i < nq; i++) { +// for (size_t k = 1; k < result_topk; k++) { // float f0 = dist1[i * topk + k - 1]; // float f1 = dist1[i * topk + k]; // if (ascending) { @@ -196,8 +201,8 @@ TEST(DBSearchTest, MERGE_RESULT_SET_TEST) { //} //TEST(DBSearchTest, MERGE_ARRAY_TEST) { -// uint64_t NQ = 15; -// uint64_t TOP_K = 64; +// size_t NQ = 15; +// size_t TOP_K = 64; // // /* test1, id1/dist1 valid, id2/dist2 empty */ // MergeTopkArrayTest(TOP_K, 0, NQ, TOP_K, true); @@ -226,23 +231,23 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { int32_t index_file_num = 478; /* sift1B dataset, index files num */ bool ascending = true; - std::vector thread_vec = {4}; - std::vector nq_vec = {1000}; - std::vector topk_vec = {64}; - int32_t NQ = nq_vec[nq_vec.size() - 1]; - int32_t TOPK = topk_vec[topk_vec.size() - 1]; + std::vector thread_vec = {4}; + std::vector nq_vec = {1000}; + std::vector topk_vec = {64}; + size_t NQ = nq_vec[nq_vec.size() - 1]; + size_t TOPK = topk_vec[topk_vec.size() - 1]; - std::vector> id_vec; - std::vector> dist_vec; - std::vector input_ids; - std::vector input_distance; + std::vector id_vec; + std::vector dist_vec; + ms::ResultIds input_ids; + ms::ResultDistances input_distances; int32_t i, k, step; /* generate testing data */ for (i = 0; i < index_file_num; i++) { - BuildResult(input_ids, input_distance, TOPK, TOPK, NQ, ascending); + BuildResult(input_ids, input_distances, TOPK, TOPK, NQ, ascending); id_vec.push_back(input_ids); - dist_vec.push_back(input_distance); + dist_vec.push_back(input_distances); } for (int32_t max_thread_num : thread_vec) { @@ -254,8 +259,8 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { ms::ResultIds final_result_ids, final_result_ids_2, final_result_ids_3; ms::ResultDistances final_result_distances, final_result_distances_2, final_result_distances_3; - std::vector> id_vec_1(index_file_num); - std::vector> dist_vec_1(index_file_num); + std::vector id_vec_1(index_file_num); + std::vector dist_vec_1(index_file_num); for (i = 0; i < index_file_num; i++) { CopyResult(id_vec_1[i], dist_vec_1[i], top_k, id_vec[i], dist_vec[i], TOPK, nq); } @@ -285,7 +290,7 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { // /* method-2 */ // std::vector> id_vec_2(index_file_num); // std::vector> dist_vec_2(index_file_num); -// std::vector k_vec_2(index_file_num); +// std::vector k_vec_2(index_file_num); // for (i = 0; i < index_file_num; i++) { // CopyResult(id_vec_2[i], dist_vec_2[i], top_k, id_vec[i], dist_vec[i], TOPK, nq); // k_vec_2[i] = top_k; @@ -328,7 +333,7 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { // /* method-3 parallel */ // std::vector> id_vec_3(index_file_num); // std::vector> dist_vec_3(index_file_num); -// std::vector k_vec_3(index_file_num); +// std::vector k_vec_3(index_file_num); // for (i = 0; i < index_file_num; i++) { // CopyResult(id_vec_3[i], dist_vec_3[i], top_k, id_vec[i], dist_vec[i], TOPK, nq); // k_vec_3[i] = top_k; From 0ec462c6034eb197ce2d3c27789c3790dec1d05f Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sat, 2 Nov 2019 22:10:14 +0800 Subject: [PATCH 142/149] Update faiss Former-commit-id: ec0c88417aa5b3197b90feb53cad0b0ff791244a --- core/src/index/cmake/ThirdPartyPackagesCore.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index c066f7b392..66ad5e9bbe 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -246,7 +246,8 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 # set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1 # set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 - set(FAISS_MD5 "bb30722c22390ce5f6759ccb216c1b2a") # commit-id d324db297475286afe107847c7fb7a0f9dc7e90e branch-0.3.0 + # set(FAISS_MD5 "bb30722c22390ce5f6759ccb216c1b2a") # commit-id d324db297475286afe107847c7fb7a0f9dc7e90e branch-0.3.0 + set(FAISS_MD5 "2293cdb209c3718e3b19f3edae8b32b3") # commit-id a13c1205dc52977a9ad3b33a14efa958604a8bff branch-0.3.0 endif() else() set(FAISS_SOURCE_URL "https://github.com/milvus-io/faiss/archive/1.6.0.tar.gz") From 9ff83e140e88a7275ff0a19a5648a0e35b623661 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sun, 3 Nov 2019 12:51:47 +0800 Subject: [PATCH 143/149] add invalid config unittest Former-commit-id: b2201e3394a953e7e2055bdf351a094a6d6e5315 --- core/src/server/Config.cpp | 3 +- core/unittest/server/test_config.cpp | 117 +++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 2 deletions(-) diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index dbe7d260c5..cc88dccffa 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -401,8 +401,7 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "server_config.deploy_mode is not one of " - "single, cluster_readonly, and cluster_writable."); + "server_config.deploy_mode is not one of single, cluster_readonly, and cluster_writable."); } return Status::OK(); } diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 76230cbcc3..96f9bd5e0e 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -112,3 +112,120 @@ TEST_F(ConfigTest, SERVER_CONFIG_TEST) { s = config.ResetDefaultConfig(); ASSERT_TRUE(s.ok()); } + +TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { + std::string config_path(CONFIG_PATH); + milvus::server::Config& config = milvus::server::Config::GetInstance(); + milvus::Status s; + + s = config.LoadConfigFile(""); + ASSERT_FALSE(s.ok()); + + s = config.LoadConfigFile(config_path + INVALID_CONFIG_FILE); + ASSERT_FALSE(s.ok()); + + /* server config */ + s = config.SetServerConfigAddress("0.0.0"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigAddress("0.0.0.256"); + ASSERT_FALSE(s.ok()); + + s = config.SetServerConfigPort("a"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigPort("99999"); + ASSERT_FALSE(s.ok()); + + s = config.SetServerConfigDeployMode("cluster"); + ASSERT_FALSE(s.ok()); + + s = config.SetServerConfigTimeZone("GM"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigTimeZone("GMT8"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigTimeZone("UTCA"); + ASSERT_FALSE(s.ok()); + + /* db config */ + s = config.SetDBConfigPrimaryPath(""); + ASSERT_FALSE(s.ok()); + +// s = config.SetDBConfigSecondaryPath(""); +// ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigBackendUrl("http://www.google.com"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigBackendUrl("sqlite://:@:"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigBackendUrl("mysql://root:123456@127.0.0.1/milvus"); + ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigArchiveDiskThreshold("0x10"); + ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigArchiveDaysThreshold("0x10"); + ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigInsertBufferSize("a"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigInsertBufferSize("-1"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigInsertBufferSize("2048"); + ASSERT_FALSE(s.ok()); + + /* metric config */ + s = config.SetMetricConfigEnableMonitor("Y"); + ASSERT_FALSE(s.ok()); + + s = config.SetMetricConfigCollector("zilliz"); + ASSERT_FALSE(s.ok()); + + s = config.SetMetricConfigPrometheusPort("0xff"); + ASSERT_FALSE(s.ok()); + + /* cache config */ + s = config.SetCacheConfigCpuCacheCapacity("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigCpuCacheCapacity("-1"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigCpuCacheCapacity("2048"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigCpuCacheThreshold("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigCpuCacheThreshold("1.0"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigGpuCacheCapacity("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigGpuCacheCapacity("128"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigGpuCacheThreshold("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigGpuCacheThreshold("1.0"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigCacheInsertData("N"); + ASSERT_FALSE(s.ok()); + + /* engine config */ + s = config.SetEngineConfigUseBlasThreshold("0xff"); + ASSERT_FALSE(s.ok()); + + s = config.SetEngineConfigOmpThreadNum("a"); + ASSERT_FALSE(s.ok()); + s = config.SetEngineConfigOmpThreadNum("-1"); + ASSERT_FALSE(s.ok()); + + s = config.SetEngineConfigGpuSearchThreshold("-1"); + ASSERT_FALSE(s.ok()); + + /* resource config */ + s = config.SetResourceConfigMode("default"); + ASSERT_FALSE(s.ok()); + + s = config.SetResourceConfigIndexBuildDevice("gup2"); + ASSERT_FALSE(s.ok()); + s = config.SetResourceConfigIndexBuildDevice("gpu16"); + ASSERT_FALSE(s.ok()); +} \ No newline at end of file From 35bad8b63f5865f4134555cacb3b4f753fc6e90a Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sun, 3 Nov 2019 13:06:29 +0800 Subject: [PATCH 144/149] #175 add invalid config unittest Former-commit-id: 86f629e902b707a5b0aaed17e4ecb9d1e46e5070 --- CHANGELOG.md | 1 + core/src/server/Config.cpp | 3 +- core/unittest/server/test_config.cpp | 117 +++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 419a123390..bddf0e9467 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#156 - Not return error when search_resources and index_build_device set cpu - \#159 - Change the configuration name from 'use_gpu_threshold' to 'gpu_search_threshold' - \#168 - Improve result reduce +- \#175 - add invalid config unittest ## Task diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index dbe7d260c5..cc88dccffa 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -401,8 +401,7 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "server_config.deploy_mode is not one of " - "single, cluster_readonly, and cluster_writable."); + "server_config.deploy_mode is not one of single, cluster_readonly, and cluster_writable."); } return Status::OK(); } diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 76230cbcc3..96f9bd5e0e 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -112,3 +112,120 @@ TEST_F(ConfigTest, SERVER_CONFIG_TEST) { s = config.ResetDefaultConfig(); ASSERT_TRUE(s.ok()); } + +TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { + std::string config_path(CONFIG_PATH); + milvus::server::Config& config = milvus::server::Config::GetInstance(); + milvus::Status s; + + s = config.LoadConfigFile(""); + ASSERT_FALSE(s.ok()); + + s = config.LoadConfigFile(config_path + INVALID_CONFIG_FILE); + ASSERT_FALSE(s.ok()); + + /* server config */ + s = config.SetServerConfigAddress("0.0.0"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigAddress("0.0.0.256"); + ASSERT_FALSE(s.ok()); + + s = config.SetServerConfigPort("a"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigPort("99999"); + ASSERT_FALSE(s.ok()); + + s = config.SetServerConfigDeployMode("cluster"); + ASSERT_FALSE(s.ok()); + + s = config.SetServerConfigTimeZone("GM"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigTimeZone("GMT8"); + ASSERT_FALSE(s.ok()); + s = config.SetServerConfigTimeZone("UTCA"); + ASSERT_FALSE(s.ok()); + + /* db config */ + s = config.SetDBConfigPrimaryPath(""); + ASSERT_FALSE(s.ok()); + +// s = config.SetDBConfigSecondaryPath(""); +// ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigBackendUrl("http://www.google.com"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigBackendUrl("sqlite://:@:"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigBackendUrl("mysql://root:123456@127.0.0.1/milvus"); + ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigArchiveDiskThreshold("0x10"); + ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigArchiveDaysThreshold("0x10"); + ASSERT_FALSE(s.ok()); + + s = config.SetDBConfigInsertBufferSize("a"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigInsertBufferSize("-1"); + ASSERT_FALSE(s.ok()); + s = config.SetDBConfigInsertBufferSize("2048"); + ASSERT_FALSE(s.ok()); + + /* metric config */ + s = config.SetMetricConfigEnableMonitor("Y"); + ASSERT_FALSE(s.ok()); + + s = config.SetMetricConfigCollector("zilliz"); + ASSERT_FALSE(s.ok()); + + s = config.SetMetricConfigPrometheusPort("0xff"); + ASSERT_FALSE(s.ok()); + + /* cache config */ + s = config.SetCacheConfigCpuCacheCapacity("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigCpuCacheCapacity("-1"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigCpuCacheCapacity("2048"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigCpuCacheThreshold("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigCpuCacheThreshold("1.0"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigGpuCacheCapacity("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigGpuCacheCapacity("128"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigGpuCacheThreshold("a"); + ASSERT_FALSE(s.ok()); + s = config.SetCacheConfigGpuCacheThreshold("1.0"); + ASSERT_FALSE(s.ok()); + + s = config.SetCacheConfigCacheInsertData("N"); + ASSERT_FALSE(s.ok()); + + /* engine config */ + s = config.SetEngineConfigUseBlasThreshold("0xff"); + ASSERT_FALSE(s.ok()); + + s = config.SetEngineConfigOmpThreadNum("a"); + ASSERT_FALSE(s.ok()); + s = config.SetEngineConfigOmpThreadNum("-1"); + ASSERT_FALSE(s.ok()); + + s = config.SetEngineConfigGpuSearchThreshold("-1"); + ASSERT_FALSE(s.ok()); + + /* resource config */ + s = config.SetResourceConfigMode("default"); + ASSERT_FALSE(s.ok()); + + s = config.SetResourceConfigIndexBuildDevice("gup2"); + ASSERT_FALSE(s.ok()); + s = config.SetResourceConfigIndexBuildDevice("gpu16"); + ASSERT_FALSE(s.ok()); +} \ No newline at end of file From bfafb8cdef7a606c5766f9a6ec2621b32caf4f76 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sun, 3 Nov 2019 14:28:15 +0800 Subject: [PATCH 145/149] #175 fix clang-format Former-commit-id: e7818d67ca090ced2a934e885074700b46619e15 --- core/unittest/server/test_config.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 96f9bd5e0e..a4b76ca157 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -228,4 +228,5 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { ASSERT_FALSE(s.ok()); s = config.SetResourceConfigIndexBuildDevice("gpu16"); ASSERT_FALSE(s.ok()); -} \ No newline at end of file +} + From 47d52d250d2aa368f41862337e74cd81503d9f1a Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sun, 3 Nov 2019 15:14:42 +0800 Subject: [PATCH 146/149] Fix typo bug Former-commit-id: 8b544b2158f89fea48a174e2f64128fa61706259 --- core/src/scheduler/job/BuildIndexJob.cpp | 5 ++++- core/src/scheduler/job/SearchJob.cpp | 5 ++++- core/src/wrapper/ConfAdapter.cpp | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/src/scheduler/job/BuildIndexJob.cpp b/core/src/scheduler/job/BuildIndexJob.cpp index 4c4c3b5054..3247383db3 100644 --- a/core/src/scheduler/job/BuildIndexJob.cpp +++ b/core/src/scheduler/job/BuildIndexJob.cpp @@ -50,7 +50,10 @@ void BuildIndexJob::BuildIndexDone(size_t to_index_id) { std::unique_lock lock(mutex_); to_index_files_.erase(to_index_id); - cv_.notify_all(); + if (to_index_files_.empty()) { + cv_.notify_all(); + } + SERVER_LOG_DEBUG << "BuildIndexJob " << id() << " finish index file: " << to_index_id; } diff --git a/core/src/scheduler/job/SearchJob.cpp b/core/src/scheduler/job/SearchJob.cpp index ee41c1ae06..ec93c69f55 100644 --- a/core/src/scheduler/job/SearchJob.cpp +++ b/core/src/scheduler/job/SearchJob.cpp @@ -49,7 +49,10 @@ void SearchJob::SearchDone(size_t index_id) { std::unique_lock lock(mutex_); index_files_.erase(index_id); - cv_.notify_all(); + if (index_files_.empty()) { + cv_.notify_all(); + } + SERVER_LOG_DEBUG << "SearchJob " << id() << " finish index file: " << index_id; } diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index 2dcf6bab7e..a27bceecbb 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -109,7 +109,7 @@ IVFSQConfAdapter::Match(const TempMetaConf& metaconf) { conf->nlist = MatchNlist(metaconf.size, metaconf.nlist); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; - conf->gpu_id = conf->gpu_id; + conf->gpu_id = metaconf->gpu_id; conf->nbits = 8; MatchBase(conf); return conf; From 46e265503339569b5cefd57fccf494308dd2a0d5 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sun, 3 Nov 2019 15:35:28 +0800 Subject: [PATCH 147/149] update Former-commit-id: 977f322ddccf4e1b1a6b87a1fc1710d47eb5d48d --- core/src/wrapper/ConfAdapter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index a27bceecbb..4ac8e22f52 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -109,7 +109,7 @@ IVFSQConfAdapter::Match(const TempMetaConf& metaconf) { conf->nlist = MatchNlist(metaconf.size, metaconf.nlist); conf->d = metaconf.dim; conf->metric_type = metaconf.metric_type; - conf->gpu_id = metaconf->gpu_id; + conf->gpu_id = metaconf.gpu_id; conf->nbits = 8; MatchBase(conf); return conf; From ad395a7f06b92a6ed67c790a9f04eb157d56b776 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Sun, 3 Nov 2019 16:05:38 +0800 Subject: [PATCH 148/149] #175 update invalid config unittest Former-commit-id: 06ef1ca314a4ad5d55a5a3352f75ec6e265cb4a0 --- core/unittest/server/test_config.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index a4b76ca157..123ddf5265 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -123,6 +123,8 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.LoadConfigFile(config_path + INVALID_CONFIG_FILE); ASSERT_FALSE(s.ok()); + s = config.LoadConfigFile(config_path + "dummy.yaml"); + ASSERT_FALSE(s.ok()); /* server config */ s = config.SetServerConfigAddress("0.0.0"); @@ -167,7 +169,7 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetDBConfigInsertBufferSize("a"); ASSERT_FALSE(s.ok()); - s = config.SetDBConfigInsertBufferSize("-1"); + s = config.SetDBConfigInsertBufferSize("0"); ASSERT_FALSE(s.ok()); s = config.SetDBConfigInsertBufferSize("2048"); ASSERT_FALSE(s.ok()); @@ -185,7 +187,7 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { /* cache config */ s = config.SetCacheConfigCpuCacheCapacity("a"); ASSERT_FALSE(s.ok()); - s = config.SetCacheConfigCpuCacheCapacity("-1"); + s = config.SetCacheConfigCpuCacheCapacity("0"); ASSERT_FALSE(s.ok()); s = config.SetCacheConfigCpuCacheCapacity("2048"); ASSERT_FALSE(s.ok()); @@ -214,7 +216,7 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetEngineConfigOmpThreadNum("a"); ASSERT_FALSE(s.ok()); - s = config.SetEngineConfigOmpThreadNum("-1"); + s = config.SetEngineConfigOmpThreadNum("10000"); ASSERT_FALSE(s.ok()); s = config.SetEngineConfigGpuSearchThreshold("-1"); From bf2e3e790c9d231437f356937346c33570226911 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sun, 3 Nov 2019 17:19:28 +0800 Subject: [PATCH 149/149] Update milvus version Former-commit-id: 3d345b0dc6eaaba41b78f87a46ef70e5d4ca6c19 --- core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index df7cd9183b..402e65fb10 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -44,7 +44,7 @@ if(NOT GIT_BRANCH_NAME STREQUAL "") string(REGEX REPLACE "\n" "" GIT_BRANCH_NAME ${GIT_BRANCH_NAME}) endif() -set(MILVUS_VERSION "${GIT_BRANCH_NAME}") +set(MILVUS_VERSION "0.5.1") string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]" MILVUS_VERSION "${MILVUS_VERSION}") find_package(ClangTools)