From fd0bf04cf87b4209613983e0fd28fc534c564b59 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 20 Sep 2019 18:32:17 +0800 Subject: [PATCH 01/89] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f496cdd884..69470bd0b2 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ -# Milvus is coming soon! \ No newline at end of file +![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) +# Milvus is coming soon! From c804b91e782f2802a73b3a5f57f25313144eb7f7 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 20 Sep 2019 18:37:57 +0800 Subject: [PATCH 02/89] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 69470bd0b2..5249aace41 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ -![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) +![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) # Milvus is coming soon! From c85d7feb8db4c6bf60a8fb4f880995105149da7a Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 15:49:34 +0800 Subject: [PATCH 03/89] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5249aace41..17928c6538 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) +![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) # Milvus is coming soon! From 1f1511d1ccc9906951c7bdac97a474c3216d876d Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 15:51:13 +0800 Subject: [PATCH 04/89] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 17928c6538..5163824f0a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) +![Release](https://img.shields.io/badge/Release-v0.5.0-orange) # Milvus is coming soon! From ef4bce71b4bc32dc1944c77bda35d0c56cfcbc0a Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:05:25 +0800 Subject: [PATCH 05/89] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5163824f0a..eec486fdb2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) -![Release](https://img.shields.io/badge/Release-v0.5.0-orange) +![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) # Milvus is coming soon! From 16f34710cea79850582bcb946e0ad1c5e9398c49 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:06:13 +0800 Subject: [PATCH 06/89] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index eec486fdb2..ff9007e353 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) +![Release date](https://img.shields.io/badge/release date-October-yellowgreen) + # Milvus is coming soon! From 45c970a78369042acdca003b9b0a2d1062ff9833 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:06:44 +0800 Subject: [PATCH 07/89] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ff9007e353..cdedb2a096 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Release date](https://img.shields.io/badge/release date-October-yellowgreen) +![Releasedate](https://img.shields.io/badge/release date-October-yellowgreen.svg) # Milvus is coming soon! From 6a879914e04a3961f4f3edbee92b8b123a4b6fc9 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:07:34 +0800 Subject: [PATCH 08/89] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cdedb2a096..1aba7639f2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Releasedate](https://img.shields.io/badge/release date-October-yellowgreen.svg) +![Release date](https://img.shields.io/badge/release date-October-yellowgreen.svg) # Milvus is coming soon! From 428d52970ac7d7d926874036ffcaec429d640938 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sun, 29 Sep 2019 16:08:58 +0800 Subject: [PATCH 09/89] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1aba7639f2..13c7f1137c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) ![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Release date](https://img.shields.io/badge/release date-October-yellowgreen.svg) +![Release date](https://img.shields.io/badge/release__date-October-yellowgreen) # Milvus is coming soon! From fbffc78b9e9a36432011bb7b69bf5be221840c24 Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Fri, 18 Oct 2019 17:06:48 +0800 Subject: [PATCH 10/89] Update FAISS package to 0.3.0 Former-commit-id: d8baa122f0a9e2e02709d50bc7d4a5105cab7f2f --- core/src/index/cmake/ThirdPartyPackagesCore.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index ee1d88ee32..2dd619f7bc 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -243,7 +243,8 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0 # set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0 # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1 - set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 endif() else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") From 6572e6bf25cf3908b4c27c5812dbd15290380c4b Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sat, 19 Oct 2019 17:49:28 +0800 Subject: [PATCH 11/89] MS-671 Fix HybridIndex Crash MS-670 Adapt to faiss-1.6 Former-commit-id: 12300b390b6d300dc460f0729a368a240c972a80 --- core/src/db/engine/ExecutionEngineImpl.cpp | 53 +++++++- .../index/vector_index/FaissBaseIndex.h | 2 +- .../index/vector_index/IndexGPUIVF.cpp | 11 +- .../index/vector_index/IndexGPUIVFPQ.cpp | 4 +- .../index/vector_index/IndexGPUIVFSQ.cpp | 14 +- .../index/vector_index/IndexGPUIVFSQ.h | 4 - .../index/vector_index/IndexIDMAP.cpp | 5 +- .../knowhere/index/vector_index/IndexIVF.cpp | 6 +- .../knowhere/index/vector_index/IndexIVF.h | 2 +- .../index/vector_index/IndexIVFSQ.cpp | 10 +- .../index/vector_index/IndexIVFSQHybrid.cpp | 97 ++++++++++---- .../index/vector_index/IndexIVFSQHybrid.h | 7 + .../knowhere/index/vector_index/VectorIndex.h | 1 + .../index/vector_index/helpers/FaissIO.h | 2 +- core/src/index/unittest/Helper.h | 2 +- .../index/unittest/test_customized_index.cpp | 120 +++++++++++++++++- core/src/index/unittest/test_ivf.cpp | 41 ++---- core/src/scheduler/SchedInst.h | 1 + .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 84 ++++++------ core/src/scheduler/optimizer/LargeSQ8HPass.h | 4 +- core/src/server/DBWrapper.cpp | 2 +- core/src/wrapper/VecIndex.h | 1 + core/unittest/wrapper/test_wrapper.cpp | 2 +- 23 files changed, 325 insertions(+), 150 deletions(-) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 862c1026d2..9c411deba1 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -22,10 +22,7 @@ #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" - #include "knowhere/common/Config.h" -#include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "scheduler/Utils.h" #include "server/Config.h" #include "wrapper/ConfAdapter.h" @@ -249,6 +246,56 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { +#if 1 + const std::string key = location_ + ".quantizer"; + std::vector gpus = scheduler::get_gpu_pool(); + + const int64_t NOT_FOUND = -1; + int64_t device_id = NOT_FOUND; + + // cache hit + { + knowhere::QuantizerPtr quantizer = nullptr; + + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + if (auto cached_quantizer = cache->GetIndex(key)) { + device_id = gpu; + quantizer = std::static_pointer_cast(cached_quantizer)->Data(); + } + } + + if (device_id != NOT_FOUND) { + // cache hit + auto config = std::make_shared(); + config->gpu_id = device_id; + config->mode = 2; + auto new_index = index_->LoadData(quantizer, config); + index_ = new_index; + } + } + + if (device_id == NOT_FOUND) { + // cache miss + std::vector all_free_mem; + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); + all_free_mem.push_back(free_mem); + } + + auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); + auto best_index = std::distance(all_free_mem.begin(), max_e); + device_id = gpus[best_index]; + + auto pair = index_->CopyToGpuWithQuantizer(device_id); + index_ = pair.first; + + // cache + auto cached_quantizer = std::make_shared(pair.second); + cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); + } +#endif return Status::OK(); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h index f3fceebb88..359af97d90 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h @@ -38,7 +38,7 @@ class FaissBaseIndex { virtual void SealImpl(); - protected: + public: std::shared_ptr index_ = nullptr; }; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index a5e8f90f34..65938e1630 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include + #include #include #include -#include +#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -130,13 +130,12 @@ void GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { std::lock_guard lk(mutex_); - // TODO(linxj): gpu index support GenParams if (auto device_index = std::dynamic_pointer_cast(index_)) { auto search_cfg = std::dynamic_pointer_cast(cfg); - device_index->setNumProbes(search_cfg->nprobe); + device_index->nprobe = search_cfg->nprobe; +// assert(device_index->getNumProbes() == search_cfg->nprobe); { - // TODO(linxj): allocate gpu mem ResScope rs(res_, gpu_id_); device_index->search(n, (float*)data, k, distances, labels); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp index 213141b3ac..9ba8dd0456 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp @@ -16,8 +16,10 @@ // under the License. #include -#include #include +#include +#include + #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp index 5e1f5226f2..fff27cd7db 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include +#include +#include + #include -#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -71,13 +72,4 @@ GPUIVFSQ::CopyGpuToCpu(const Config& config) { return std::make_shared(new_index); } -void -GPUIVFSQ::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { -#ifdef CUSTOMIZATION - GPUIVF::search_impl(n, data, k, distances, labels, cfg); -#else - IVF::search_impl(n, data, k, distances, labels, cfg); -#endif -} - } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h index 7332bce691..ed8013d77f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h @@ -38,10 +38,6 @@ class GPUIVFSQ : public GPUIVF { VectorIndexPtr CopyGpuToCpu(const Config& config) override; - - protected: - void - search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override; }; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 2371591b5c..643bb16076 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include #include #include -#include #include +#include +#include + #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 0c4856f2b6..02708ff5d7 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -15,15 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include #include #include #include #include -#include -#include -#include #include #include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index ef9982fa30..e064b6f08c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -30,7 +30,7 @@ namespace knowhere { using Graph = std::vector>; -class IVF : public VectorIndex, protected FaissBaseIndex { +class IVF : public VectorIndex, public FaissBaseIndex { public: IVF() : FaissBaseIndex(nullptr) { } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index 063dc63550..80b4c78883 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include +#include +#include #include #include "knowhere/adapter/VectorAdapter.h" @@ -56,14 +57,7 @@ IVFSQ::CopyCpuToGpu(const int64_t& device_id, const Config& config) { if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); -#ifdef CUSTOMIZATION - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get(), &option); -#else auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get()); -#endif std::shared_ptr device_index; device_index.reset(gpu_index); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index fe5bf0990a..af67722266 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -17,19 +17,25 @@ // under the License. #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" -#include -#include "faiss/AutoTune.h" -#include "faiss/gpu/GpuAutoTune.h" -#include "faiss/gpu/GpuIndexIVF.h" #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" +#include + +#include +#include +#include + namespace knowhere { #ifdef CUSTOMIZATION +//std::mutex g_mutex; + IndexModelPtr IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { +// std::lock_guard lk(g_mutex); + auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { build_cfg->CheckValid(); // throw exception @@ -63,23 +69,25 @@ IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { VectorIndexPtr IVFSQHybrid::CopyGpuToCpu(const Config& config) { + if (gpu_mode == 0) { + return std::make_shared(index_); + } std::lock_guard lk(mutex_); - if (auto device_idx = std::dynamic_pointer_cast(index_)) { faiss::Index* device_index = index_.get(); faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); std::shared_ptr new_index; new_index.reset(host_index); return std::make_shared(new_index); - } else { - // TODO(linxj): why? jinhai - return std::make_shared(index_); - } } VectorIndexPtr IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { + if (gpu_mode != 0) { + KNOWHERE_THROW_MSG("Not a GpuIndex Type"); + } + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; @@ -105,16 +113,26 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { FaissBaseIndex::LoadImpl(index_binary); // load on cpu auto* ivf_index = dynamic_cast(index_.get()); ivf_index->backup_quantizer(); + gpu_mode = 0; } void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { +// std::lock_guard lk(g_mutex); +// static int64_t search_count; +// ++search_count; + if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); - } else if (gpu_mode == 1) { - ResScope rs(res_, gpu_id_); - IVF::search_impl(n, data, k, distances, labels, cfg); +// index_->search(n, (float*)data, k, distances, labels); + } else if (gpu_mode == 1) { // hybrid + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(quantizer_gpu_id_)) { + ResScope rs(res, quantizer_gpu_id_, true); + IVF::search_impl(n, data, k, distances, labels, cfg); + } else { + KNOWHERE_THROW_MSG("Hybrid Search Error, can't get gpu: " + std::to_string(quantizer_gpu_id_) + "resource"); + } } else if (gpu_mode == 0) { IVF::search_impl(n, data, k, distances, labels, cfg); } @@ -122,16 +140,18 @@ IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distanc QuantizerPtr IVFSQHybrid::LoadQuantizer(const Config& conf) { +// std::lock_guard lk(g_mutex); + auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { if (quantizer_conf->mode != 1) { KNOWHERE_THROW_MSG("mode only support 1 in this func"); } } - gpu_id_ = quantizer_conf->gpu_id; + auto gpu_id = quantizer_conf->gpu_id; - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { - ResScope rs(res, gpu_id_, false); + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id)) { + ResScope rs(res, gpu_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; @@ -148,16 +168,19 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { auto& q_ptr = index_composition->quantizer; q->size = q_ptr->d * q_ptr->getNumVecs() * sizeof(float); q->quantizer = q_ptr; + q->gpu_id = gpu_id; res_ = res; gpu_mode = 1; return q; } else { - KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); + KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id) + "resource"); } } void IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { +// std::lock_guard lk(g_mutex); + auto ivf_quantizer = std::dynamic_pointer_cast(q); if (ivf_quantizer == nullptr) { KNOWHERE_THROW_MSG("Quantizer type error"); @@ -170,20 +193,27 @@ IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { // delete ivf_index->quantizer; ivf_index->quantizer = ivf_quantizer->quantizer; } + quantizer_gpu_id_ = ivf_quantizer->gpu_id; + gpu_mode = 1; } void IVFSQHybrid::UnsetQuantizer() { +// std::lock_guard lk(g_mutex); + auto* ivf_index = dynamic_cast(index_.get()); if (ivf_index == nullptr) { KNOWHERE_THROW_MSG("Index type error"); } ivf_index->quantizer = nullptr; + quantizer_gpu_id_ = -1; } VectorIndexPtr IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { +// std::lock_guard lk(g_mutex); + auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { if (quantizer_conf->mode != 2) { @@ -192,13 +222,11 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { } else { KNOWHERE_THROW_MSG("conf error"); } - // if (quantizer_conf->gpu_id != gpu_id_) { - // KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card"); - // } - gpu_id_ = quantizer_conf->gpu_id; - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { - ResScope rs(res, gpu_id_, false); + auto gpu_id = quantizer_conf->gpu_id; + + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id)) { + ResScope rs(res, gpu_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; @@ -211,18 +239,20 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { index_composition->quantizer = ivf_quantizer->quantizer; index_composition->mode = quantizer_conf->mode; // only 2 - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option); + auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id, index_composition, &option); std::shared_ptr new_idx; new_idx.reset(gpu_index); - auto sq_idx = std::make_shared(new_idx, gpu_id_, res); + auto sq_idx = std::make_shared(new_idx, gpu_id, res); return sq_idx; } else { - KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); + KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id) + "resource"); } } std::pair IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) { +// std::lock_guard lk(g_mutex); + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; @@ -242,12 +272,29 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c auto q = std::make_shared(); q->quantizer = index_composition.quantizer; q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float); + q->gpu_id = device_id; return std::make_pair(new_idx, q); } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } +void +IVFSQHybrid::set_index_model(IndexModelPtr model) { + std::lock_guard lk(mutex_); + + auto host_index = std::static_pointer_cast(model); + if (auto gpures = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { + ResScope rs(gpures, gpu_id_, false); + auto device_index = faiss::gpu::index_cpu_to_gpu(gpures->faiss_res.get(), gpu_id_, host_index->index_.get()); + index_.reset(device_index); + res_ = gpures; + gpu_mode = 2; + } else { + KNOWHERE_THROW_MSG("load index model error, can't get gpu_resource"); + } +} + FaissIVFQuantizer::~FaissIVFQuantizer() { if (quantizer != nullptr) { delete quantizer; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h index f54c61c20f..87cc22931f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h @@ -18,6 +18,8 @@ #pragma once #include +#include + #include #include @@ -29,6 +31,7 @@ namespace knowhere { #ifdef CUSTOMIZATION struct FaissIVFQuantizer : public Quantizer { faiss::gpu::GpuIndexFlat* quantizer = nullptr; + int64_t gpu_id; ~FaissIVFQuantizer() override; }; @@ -52,6 +55,9 @@ class IVFSQHybrid : public GPUIVFSQ { } public: + void + set_index_model(IndexModelPtr model) override; + QuantizerPtr LoadQuantizer(const Config& conf); @@ -85,6 +91,7 @@ class IVFSQHybrid : public GPUIVFSQ { protected: int64_t gpu_mode = 0; // 0,1,2 + int64_t quantizer_gpu_id_ = -1; }; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h b/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h index 810c4d2ea4..6509458b7b 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h @@ -48,6 +48,7 @@ class VectorIndex : public Index { virtual void Seal() = 0; + // TODO(linxj): Deprecated virtual VectorIndexPtr Clone() = 0; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h index 7cce5bbbac..a7f8f349e1 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/FaissIO.h @@ -17,7 +17,7 @@ #pragma once -#include +#include namespace knowhere { diff --git a/core/src/index/unittest/Helper.h b/core/src/index/unittest/Helper.h index d11a484c03..8d4bb0f4ae 100644 --- a/core/src/index/unittest/Helper.h +++ b/core/src/index/unittest/Helper.h @@ -26,7 +26,7 @@ #include "knowhere/index/vector_index/IndexIVFSQ.h" #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" -constexpr int DEVICEID = 0; +int DEVICEID = 0; constexpr int64_t DIM = 128; constexpr int64_t NB = 10000; constexpr int64_t NQ = 10; diff --git a/core/src/index/unittest/test_customized_index.cpp b/core/src/index/unittest/test_customized_index.cpp index 1e0b1d932d..f9b48b8b67 100644 --- a/core/src/index/unittest/test_customized_index.cpp +++ b/core/src/index/unittest/test_customized_index.cpp @@ -16,17 +16,23 @@ // under the License. #include +#include #include "unittest/Helper.h" #include "unittest/utils.h" +#include "knowhere/common/Timer.h" + class SingleIndexTest : public DataGen, public TestGpuIndexBase { protected: void SetUp() override { TestGpuIndexBase::SetUp(); - Generate(DIM, NB, NQ); - k = K; + nb = 1000000; + nq = 1000; + dim = DIM; + Generate(dim, nb, nq); + k = 1000; } void @@ -119,4 +125,114 @@ TEST_F(SingleIndexTest, IVFSQHybrid) { } } +//TEST_F(SingleIndexTest, thread_safe) { +// assert(!xb.empty()); +// +// index_type = "IVFSQHybrid"; +// index_ = IndexFactory(index_type); +// auto base = ParamGenerator::GetInstance().Gen(ParameterType::ivfsq); +// auto conf = std::dynamic_pointer_cast(base); +// conf->nlist = 16384; +// conf->k = k; +// conf->nprobe = 10; +// conf->d = dim; +// auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); +// index_->set_preprocessor(preprocessor); +// +// auto model = index_->Train(base_dataset, conf); +// index_->set_index_model(model); +// index_->Add(base_dataset, conf); +// EXPECT_EQ(index_->Count(), nb); +// EXPECT_EQ(index_->Dimension(), dim); +// +// auto binaryset = index_->Serialize(); +// +// +// +// auto cpu_idx = std::make_shared(DEVICEID); +// cpu_idx->Load(binaryset); +// auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICEID, conf); +// auto quantizer = pair.second; +// +// auto quantizer_conf = std::make_shared(); +// quantizer_conf->mode = 2; // only copy data +// quantizer_conf->gpu_id = DEVICEID; +// +// auto CopyAllToGpu = [&](int64_t search_count, bool do_search = false) { +// for (int i = 0; i < search_count; ++i) { +// auto gpu_idx = cpu_idx->CopyCpuToGpu(DEVICEID, conf); +// if (do_search) { +// auto result = gpu_idx->Search(query_dataset, conf); +// AssertAnns(result, nq, conf->k); +// } +// } +// }; +// +// auto hybrid_qt_idx = std::make_shared(DEVICEID); +// hybrid_qt_idx->Load(binaryset); +// auto SetQuantizerDoSearch = [&](int64_t search_count) { +// for (int i = 0; i < search_count; ++i) { +// hybrid_qt_idx->SetQuantizer(quantizer); +// auto result = hybrid_qt_idx->Search(query_dataset, conf); +// AssertAnns(result, nq, conf->k); +// // PrintResult(result, nq, k); +// hybrid_qt_idx->UnsetQuantizer(); +// } +// }; +// +// auto hybrid_data_idx = std::make_shared(DEVICEID); +// hybrid_data_idx->Load(binaryset); +// auto LoadDataDoSearch = [&](int64_t search_count, bool do_search = false) { +// for (int i = 0; i < search_count; ++i) { +// auto hybrid_idx = hybrid_data_idx->LoadData(quantizer, quantizer_conf); +// if (do_search) { +// auto result = hybrid_idx->Search(query_dataset, conf); +//// AssertAnns(result, nq, conf->k); +// } +// } +// }; +// +// knowhere::TimeRecorder tc(""); +// CopyAllToGpu(2000/2, false); +// tc.RecordSection("CopyAllToGpu witout search"); +// CopyAllToGpu(400/2, true); +// tc.RecordSection("CopyAllToGpu with search"); +// SetQuantizerDoSearch(6); +// tc.RecordSection("SetQuantizer with search"); +// LoadDataDoSearch(2000/2, false); +// tc.RecordSection("LoadData without search"); +// LoadDataDoSearch(400/2, true); +// tc.RecordSection("LoadData with search"); +// +// { +// std::thread t1(CopyAllToGpu, 2000, false); +// std::thread t2(CopyAllToGpu, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(SetQuantizerDoSearch, 12); +// std::thread t2(CopyAllToGpu, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(SetQuantizerDoSearch, 12); +// std::thread t2(LoadDataDoSearch, 400, true); +// t1.join(); +// t2.join(); +// } +// +// { +// std::thread t1(LoadDataDoSearch, 2000, false); +// std::thread t2(LoadDataDoSearch, 400, true); +// t1.join(); +// t2.join(); +// } +// +//} + + #endif diff --git a/core/src/index/unittest/test_ivf.cpp b/core/src/index/unittest/test_ivf.cpp index fae27b0dd3..3fd3e16d0e 100644 --- a/core/src/index/unittest/test_ivf.cpp +++ b/core/src/index/unittest/test_ivf.cpp @@ -20,19 +20,12 @@ #include #include -#include -#include #include #include "knowhere/common/Exception.h" #include "knowhere/common/Timer.h" #include "knowhere/index/vector_index/IndexGPUIVF.h" -#include "knowhere/index/vector_index/IndexGPUIVFPQ.h" -#include "knowhere/index/vector_index/IndexGPUIVFSQ.h" #include "knowhere/index/vector_index/IndexIVF.h" -#include "knowhere/index/vector_index/IndexIVFPQ.h" -#include "knowhere/index/vector_index/IndexIVFSQ.h" -#include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "knowhere/index/vector_index/helpers/Cloner.h" #include "unittest/Helper.h" @@ -51,6 +44,9 @@ class IVFTest : public DataGen, public TestWithParam<::std::tuple gpu_idx{"GPUIVFSQ"}; - auto finder = std::find(gpu_idx.cbegin(), gpu_idx.cend(), index_type); - if (finder != gpu_idx.cend()) { - return knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, knowhere::Config()); - } - return index_; - } - protected: std::string index_type; knowhere::Config conf; @@ -100,8 +86,7 @@ TEST_P(IVFTest, ivf_basic) { EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); // PrintResult(result, nq, k); } @@ -134,8 +119,7 @@ TEST_P(IVFTest, ivf_serialize) { index_->set_index_model(model); index_->Add(base_dataset, conf); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); } @@ -159,8 +143,7 @@ TEST_P(IVFTest, ivf_serialize) { index_->Load(binaryset); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); } } @@ -176,8 +159,7 @@ TEST_P(IVFTest, clone_test) { index_->Add(base_dataset, conf); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); // PrintResult(result, nq, k); @@ -210,12 +192,6 @@ TEST_P(IVFTest, clone_test) { // } // } - { - if (index_type == "IVFSQHybrid") { - return; - } - } - { // copy from gpu to cpu std::vector support_idx_vec{"GPUIVF", "GPUIVFSQ", "IVFSQHybrid"}; @@ -277,8 +253,7 @@ TEST_P(IVFTest, gpu_seal_test) { index_->Add(base_dataset, conf); EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - auto new_idx = ChooseTodo(); - auto result = new_idx->Search(query_dataset, conf); + auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config()); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 60033731ae..b9153d3bc3 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -94,6 +94,7 @@ class OptimizerInst { std::lock_guard lock(mutex_); if (instance == nullptr) { std::vector pass_list; + pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); } diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index 8368a90000..0d5a81a7b6 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -26,48 +26,48 @@ namespace milvus { namespace scheduler { -// bool -// LargeSQ8HPass::Run(const TaskPtr& task) { -// if (task->Type() != TaskType::SearchTask) { -// return false; -// } -// -// auto search_task = std::static_pointer_cast(task); -// if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8H) { -// return false; -// } -// -// auto search_job = std::static_pointer_cast(search_task->job_.lock()); -// -// // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu -// if (search_job->nq() < 100) { -// return false; -// } -// -// std::vector gpus = scheduler::get_gpu_pool(); -// std::vector all_free_mem; -// for (auto& gpu : gpus) { -// auto cache = cache::GpuCacheMgr::GetInstance(gpu); -// auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); -// all_free_mem.push_back(free_mem); -// } -// -// auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); -// auto best_index = std::distance(all_free_mem.begin(), max_e); -// auto best_device_id = gpus[best_index]; -// -// ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); -// if (not res_ptr) { -// SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; -// // TODO: throw critical error and exit -// return false; -// } -// -// auto label = std::make_shared(std::weak_ptr(res_ptr)); -// task->label() = label; -// -// return true; -// } + bool + LargeSQ8HPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8H) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + + // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu + if (search_job->nq() < 100) { + return false; + } + + std::vector gpus = scheduler::get_gpu_pool(); + std::vector all_free_mem; + for (auto& gpu : gpus) { + auto cache = cache::GpuCacheMgr::GetInstance(gpu); + auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); + all_free_mem.push_back(free_mem); + } + + auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); + auto best_index = std::distance(all_free_mem.begin(), max_e); + auto best_device_id = gpus[best_index]; + + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + if (not res_ptr) { + SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; + // TODO: throw critical error and exit + return false; + } + + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + + return true; + } } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/LargeSQ8HPass.h index 3335a37cc7..49e658002f 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.h @@ -37,8 +37,8 @@ class LargeSQ8HPass : public Pass { LargeSQ8HPass() = default; public: - // bool - // Run(const TaskPtr& task) override; + bool + Run(const TaskPtr& task) override; }; using LargeSQ8HPassPtr = std::shared_ptr; diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index a5b892ad47..34c8d38faf 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include +#include #include #include #include diff --git a/core/src/wrapper/VecIndex.h b/core/src/wrapper/VecIndex.h index 05da9ccc03..1729d583ae 100644 --- a/core/src/wrapper/VecIndex.h +++ b/core/src/wrapper/VecIndex.h @@ -70,6 +70,7 @@ class VecIndex : public cache::DataObj { virtual VecIndexPtr CopyToCpu(const Config& cfg = Config()) = 0; + // TODO(linxj): Deprecated virtual VecIndexPtr Clone() = 0; diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index f112fc7e65..2f8fd6fafe 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -74,7 +74,7 @@ INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_CPU, "Default", DIM, NB, 10, 10), -// std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), + std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_MIX, "Default", DIM, NB, 10, 10), // std::make_tuple(IndexType::NSG_MIX, "Default", 128, 250000, 10, 10), // std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 128, 250000, 10, 10), From f23e8e2143caac8c5fbb4b1332f0124d159919ac Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Sat, 19 Oct 2019 17:51:59 +0800 Subject: [PATCH 12/89] clang-format... Former-commit-id: 3bd5e246b34d8ca7800dbc6ce0e784dfa26ce18a --- core/src/db/engine/ExecutionEngineImpl.cpp | 6 ++-- .../index/vector_index/IndexGPUIVF.cpp | 4 +-- .../index/vector_index/IndexGPUIVFPQ.cpp | 2 +- .../index/vector_index/IndexGPUIVFSQ.cpp | 2 +- .../index/vector_index/IndexIDMAP.cpp | 4 +-- .../knowhere/index/vector_index/IndexIVF.cpp | 2 +- .../index/vector_index/IndexIVFSQ.cpp | 2 +- .../index/vector_index/IndexIVFSQHybrid.cpp | 36 +++++++++---------- .../index/vector_index/IndexIVFSQHybrid.h | 2 +- .../index/unittest/test_customized_index.cpp | 3 +- core/src/index/unittest/test_ivf.cpp | 6 ++-- .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 6 ++-- 12 files changed, 37 insertions(+), 38 deletions(-) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 9c411deba1..aeb17203ef 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -18,13 +18,13 @@ #include "db/engine/ExecutionEngineImpl.h" #include "cache/CpuCacheMgr.h" #include "cache/GpuCacheMgr.h" +#include "knowhere/common/Config.h" #include "metrics/Metrics.h" +#include "scheduler/Utils.h" +#include "server/Config.h" #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" -#include "knowhere/common/Config.h" -#include "scheduler/Utils.h" -#include "server/Config.h" #include "wrapper/ConfAdapter.h" #include "wrapper/ConfAdapterMgr.h" #include "wrapper/VecImpl.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index 65938e1630..a26f947181 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -17,10 +17,10 @@ #include +#include #include #include #include -#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" @@ -133,7 +133,7 @@ GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, i if (auto device_index = std::dynamic_pointer_cast(index_)) { auto search_cfg = std::dynamic_pointer_cast(cfg); device_index->nprobe = search_cfg->nprobe; -// assert(device_index->getNumProbes() == search_cfg->nprobe); + // assert(device_index->getNumProbes() == search_cfg->nprobe); { ResScope rs(res_, gpu_id_); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp index 9ba8dd0456..b027539c37 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp @@ -16,9 +16,9 @@ // under the License. #include +#include #include #include -#include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp index fff27cd7db..941f9adc48 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include #include +#include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 643bb16076..f926951736 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include #include +#include +#include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 02708ff5d7..73f5c4164b 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include #include #include #include #include +#include #include #include #include diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index 80b4c78883..6e9a1d94da 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include #include +#include #include #include "knowhere/adapter/VectorAdapter.h" diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index af67722266..7b229db21e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -22,19 +22,19 @@ #include +#include #include #include -#include namespace knowhere { #ifdef CUSTOMIZATION -//std::mutex g_mutex; +// std::mutex g_mutex; IndexModelPtr IVFSQHybrid::Train(const DatasetPtr& dataset, const Config& config) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { @@ -74,12 +74,12 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) { } std::lock_guard lk(mutex_); - faiss::Index* device_index = index_.get(); - faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::Index* device_index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); - std::shared_ptr new_index; - new_index.reset(host_index); - return std::make_shared(new_index); + std::shared_ptr new_index; + new_index.reset(host_index); + return std::make_shared(new_index); } VectorIndexPtr @@ -119,14 +119,14 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { -// std::lock_guard lk(g_mutex); -// static int64_t search_count; -// ++search_count; + // std::lock_guard lk(g_mutex); + // static int64_t search_count; + // ++search_count; if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); -// index_->search(n, (float*)data, k, distances, labels); - } else if (gpu_mode == 1) { // hybrid + // index_->search(n, (float*)data, k, distances, labels); + } else if (gpu_mode == 1) { // hybrid if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(quantizer_gpu_id_)) { ResScope rs(res, quantizer_gpu_id_, true); IVF::search_impl(n, data, k, distances, labels, cfg); @@ -140,7 +140,7 @@ IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distanc QuantizerPtr IVFSQHybrid::LoadQuantizer(const Config& conf) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { @@ -179,7 +179,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { void IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto ivf_quantizer = std::dynamic_pointer_cast(q); if (ivf_quantizer == nullptr) { @@ -199,7 +199,7 @@ IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { void IVFSQHybrid::UnsetQuantizer() { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto* ivf_index = dynamic_cast(index_.get()); if (ivf_index == nullptr) { @@ -212,7 +212,7 @@ IVFSQHybrid::UnsetQuantizer() { VectorIndexPtr IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); auto quantizer_conf = std::dynamic_pointer_cast(conf); if (quantizer_conf != nullptr) { @@ -251,7 +251,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { std::pair IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) { -// std::lock_guard lk(g_mutex); + // std::lock_guard lk(g_mutex); if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h index 87cc22931f..d2a3be6c39 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include #include diff --git a/core/src/index/unittest/test_customized_index.cpp b/core/src/index/unittest/test_customized_index.cpp index f9b48b8b67..346e8e3d93 100644 --- a/core/src/index/unittest/test_customized_index.cpp +++ b/core/src/index/unittest/test_customized_index.cpp @@ -125,7 +125,7 @@ TEST_F(SingleIndexTest, IVFSQHybrid) { } } -//TEST_F(SingleIndexTest, thread_safe) { +// TEST_F(SingleIndexTest, thread_safe) { // assert(!xb.empty()); // // index_type = "IVFSQHybrid"; @@ -234,5 +234,4 @@ TEST_F(SingleIndexTest, IVFSQHybrid) { // //} - #endif diff --git a/core/src/index/unittest/test_ivf.cpp b/core/src/index/unittest/test_ivf.cpp index 3fd3e16d0e..20addc82bb 100644 --- a/core/src/index/unittest/test_ivf.cpp +++ b/core/src/index/unittest/test_ivf.cpp @@ -44,9 +44,9 @@ class IVFTest : public DataGen, public TestWithParam<::std::tupleType() != TaskType::SearchTask) { return false; } @@ -67,7 +67,7 @@ namespace scheduler { task->label() = label; return true; - } +} } // namespace scheduler } // namespace milvus From 6a005b9f714a71284a6fcfb5018948b4b34cab5a Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sat, 19 Oct 2019 20:33:55 +0800 Subject: [PATCH 13/89] update ci files Former-commit-id: 1dc3c1153a1e3f2d21bf7981c4349abc29b305ee --- ci/jenkinsfile/milvus_build.groovy | 2 +- ci/jenkinsfile/milvus_build_no_ut.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkinsfile/milvus_build.groovy b/ci/jenkinsfile/milvus_build.groovy index 11f8c51d33..e7341988b2 100644 --- a/ci/jenkinsfile/milvus_build.groovy +++ b/ci/jenkinsfile/milvus_build.groovy @@ -14,7 +14,7 @@ container('milvus-build-env') { sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' \ && export JFROG_USER_NAME='${USERNAME}' \ && export JFROG_PASSWORD='${PASSWORD}' \ - && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.2.1/faiss-branch-0.2.1.tar.gz' \ + && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.3.0/faiss-branch-0.3.0.tar.gz' \ && ./build.sh -t ${params.BUILD_TYPE} -d /opt/milvus -j -u -c" sh "./coverage.sh -u root -p 123456 -t 192.168.1.194" diff --git a/ci/jenkinsfile/milvus_build_no_ut.groovy b/ci/jenkinsfile/milvus_build_no_ut.groovy index 1dd3361106..3f221b8947 100644 --- a/ci/jenkinsfile/milvus_build_no_ut.groovy +++ b/ci/jenkinsfile/milvus_build_no_ut.groovy @@ -14,7 +14,7 @@ container('milvus-build-env') { sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' \ && export JFROG_USER_NAME='${USERNAME}' \ && export JFROG_PASSWORD='${PASSWORD}' \ - && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.2.1/faiss-branch-0.2.1.tar.gz' \ + && export FAISS_URL='http://192.168.1.105:6060/jinhai/faiss/-/archive/branch-0.3.0/faiss-branch-0.3.0.tar.gz' \ && ./build.sh -t ${params.BUILD_TYPE} -j -d /opt/milvus" } } From cdb28ceaaea5c5af642f2d1c9403566bfafd0acf Mon Sep 17 00:00:00 2001 From: JinHai-CN Date: Sat, 19 Oct 2019 21:07:17 +0800 Subject: [PATCH 14/89] Update ci files Former-commit-id: 7972762f8bf8607d8f77e07a3226f08a24981308 --- core/src/index/thirdparty/versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/thirdparty/versions.txt b/core/src/index/thirdparty/versions.txt index 9ee845f1e3..a2b16414c2 100644 --- a/core/src/index/thirdparty/versions.txt +++ b/core/src/index/thirdparty/versions.txt @@ -3,4 +3,4 @@ BOOST_VERSION=1.70.0 GTEST_VERSION=1.8.1 LAPACK_VERSION=v3.8.0 OPENBLAS_VERSION=v0.3.6 -FAISS_VERSION=branch-0.2.1 \ No newline at end of file +FAISS_VERSION=branch-0.3.0 \ No newline at end of file From d9138e19601b36aeed78c385179d4530a83b4f3d Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 22 Oct 2019 18:40:42 +0800 Subject: [PATCH 15/89] #80 Print version information into log during server start Former-commit-id: 874acccc6856b18b9f5992a2ee7bf14382f0f3c6 --- CHANGELOG.md | 1 + core/src/server/Server.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b897c608d..2d94687930 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement - \#64 - Improvement dump function in scheduler +- \#80 - Print version information into log during server start ## Feature ## Task diff --git a/core/src/server/Server.cpp b/core/src/server/Server.cpp index 465ed62ddf..4933af72b7 100644 --- a/core/src/server/Server.cpp +++ b/core/src/server/Server.cpp @@ -19,6 +19,7 @@ #include #include +#include "../../version.h" #include "metrics/Metrics.h" #include "scheduler/SchedInst.h" #include "server/Config.h" @@ -180,6 +181,9 @@ Server::Start() { InitLog(log_config_file_); + // print version information + SERVER_LOG_INFO << "Milvus " << BUILD_TYPE << " version: v" << MILVUS_VERSION << ", built at " << BUILD_TIME; + server::Metrics::GetInstance().Init(); server::SystemInfo::GetInstance().Init(); From 4f3fa67be30ca1420e515f4fa5d3ac6476c85158 Mon Sep 17 00:00:00 2001 From: wxyu Date: Tue, 22 Oct 2019 19:11:17 +0800 Subject: [PATCH 16/89] Move easyloggingpp into external directory Former-commit-id: f2392522699d094720b92e5ee281973e3835bb18 --- CHANGELOG.md | 1 + ci/jenkins/scripts/coverage.sh | 4 ++-- core/coverage.sh | 4 ++-- core/src/CMakeLists.txt | 8 ++++++++ .../{utils => external/easyloggingpp}/easylogging++.cc | 0 .../{utils => external/easyloggingpp}/easylogging++.h | 0 core/src/index/knowhere/knowhere/common/Log.h | 2 +- core/src/index/unittest/CMakeLists.txt | 2 +- core/src/main.cpp | 2 +- core/src/utils/Log.h | 2 +- core/src/utils/LogUtil.h | 2 +- core/unittest/CMakeLists.txt | 10 +++++++++- core/unittest/main.cpp | 2 +- core/unittest/wrapper/CMakeLists.txt | 2 +- core/unittest/wrapper/test_wrapper.cpp | 2 +- 15 files changed, 30 insertions(+), 13 deletions(-) rename core/src/{utils => external/easyloggingpp}/easylogging++.cc (100%) rename core/src/{utils => external/easyloggingpp}/easylogging++.h (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b897c608d..6ce6403028 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement - \#64 - Improvement dump function in scheduler +- \#82 - Move easyloggingpp into "external" directory ## Feature ## Task diff --git a/ci/jenkins/scripts/coverage.sh b/ci/jenkins/scripts/coverage.sh index ecbb2dfbe9..dca8c63982 100755 --- a/ci/jenkins/scripts/coverage.sh +++ b/ci/jenkins/scripts/coverage.sh @@ -131,8 +131,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/utils/easylogging++.h" \ - "*/src/utils/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.h" \ + "*/src/external/easyloggingpp/easylogging++.cc" # gen html report # ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ diff --git a/core/coverage.sh b/core/coverage.sh index 74f9f4219d..6db1e18d39 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -121,8 +121,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/utils/easylogging++.h" \ - "*/src/utils/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.h" \ + "*/src/external/easyloggingpp/easylogging++.cc" # gen html report ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index b0228bd090..d086955078 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -64,6 +64,13 @@ set(scheduler_files ${scheduler_task_files} ) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/easyloggingpp external_easyloggingpp_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/nlohmann external_nlohmann_files) +set(external_files + ${external_easyloggingpp_files} + ${external_nlohmann_files} + ) + aux_source_directory(${MILVUS_ENGINE_SRC}/server server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/server/grpc_impl grpc_server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/utils utils_files) @@ -77,6 +84,7 @@ set(engine_files ${db_insert_files} ${db_meta_files} ${metrics_files} + ${external_files} ${utils_files} ${wrapper_files} ) diff --git a/core/src/utils/easylogging++.cc b/core/src/external/easyloggingpp/easylogging++.cc similarity index 100% rename from core/src/utils/easylogging++.cc rename to core/src/external/easyloggingpp/easylogging++.cc diff --git a/core/src/utils/easylogging++.h b/core/src/external/easyloggingpp/easylogging++.h similarity index 100% rename from core/src/utils/easylogging++.h rename to core/src/external/easyloggingpp/easylogging++.h diff --git a/core/src/index/knowhere/knowhere/common/Log.h b/core/src/index/knowhere/knowhere/common/Log.h index 222d03d73e..369e7143af 100644 --- a/core/src/index/knowhere/knowhere/common/Log.h +++ b/core/src/index/knowhere/knowhere/common/Log.h @@ -17,7 +17,7 @@ #pragma once -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" namespace knowhere { diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index 8a5e089486..f840b28e28 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -20,7 +20,7 @@ set(basic_libs ) set(util_srcs - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp diff --git a/core/src/main.cpp b/core/src/main.cpp index d1c9ba6dfd..d60f26d702 100644 --- a/core/src/main.cpp +++ b/core/src/main.cpp @@ -23,11 +23,11 @@ #include #include "../version.h" +#include "external/easyloggingpp/easylogging++.h" #include "metrics/Metrics.h" #include "server/Server.h" #include "utils/CommonUtil.h" #include "utils/SignalUtil.h" -#include "utils/easylogging++.h" INITIALIZE_EASYLOGGINGPP diff --git a/core/src/utils/Log.h b/core/src/utils/Log.h index 1dd116367a..4aa3146b01 100644 --- a/core/src/utils/Log.h +++ b/core/src/utils/Log.h @@ -17,7 +17,7 @@ #pragma once -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" namespace milvus { diff --git a/core/src/utils/LogUtil.h b/core/src/utils/LogUtil.h index 9926939442..7e5afd087a 100644 --- a/core/src/utils/LogUtil.h +++ b/core/src/utils/LogUtil.h @@ -17,8 +17,8 @@ #pragma once +#include "external/easyloggingpp/easylogging++.h" #include "utils/Status.h" -#include "utils/easylogging++.h" #include #include diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index 258fd76a8e..aae7fb8d7f 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -60,6 +60,13 @@ set(scheduler_files ${scheduler_optimizer_files} ) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/easyloggingpp external_easyloggingpp_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/nlohmann external_nlohmann_files) +set(external_files + ${external_easyloggingpp_files} + ${external_nlohmann_files} + ) + aux_source_directory(${MILVUS_ENGINE_SRC}/server server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/server/grpc_impl grpc_server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/utils utils_files) @@ -74,7 +81,7 @@ set(helper_files ${MILVUS_ENGINE_SRC}/utils/TimeRecorder.cpp ${MILVUS_ENGINE_SRC}/utils/Status.cpp ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ) set(common_files @@ -85,6 +92,7 @@ set(common_files ${db_insert_files} ${db_meta_files} ${metrics_files} + ${external_files} ${scheduler_files} ${wrapper_files} ${helper_files} diff --git a/core/unittest/main.cpp b/core/unittest/main.cpp index d17cf9da58..2cd0624f7b 100644 --- a/core/unittest/main.cpp +++ b/core/unittest/main.cpp @@ -18,7 +18,7 @@ #include #include -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" INITIALIZE_EASYLOGGINGPP diff --git a/core/unittest/wrapper/CMakeLists.txt b/core/unittest/wrapper/CMakeLists.txt index 156d89b241..a8015f8d34 100644 --- a/core/unittest/wrapper/CMakeLists.txt +++ b/core/unittest/wrapper/CMakeLists.txt @@ -26,7 +26,7 @@ set(wrapper_files set(util_files utils.cpp - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ${MILVUS_ENGINE_SRC}/utils/Status.cpp ) diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index f112fc7e65..1ec98ccb5d 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" #include "wrapper/VecIndex.h" #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" From 9f8bcb2698014b55cb038c540560b587f43f1098 Mon Sep 17 00:00:00 2001 From: wxyu Date: Tue, 22 Oct 2019 19:56:39 +0800 Subject: [PATCH 17/89] update lint exclusions Former-commit-id: 84ebefc29597fd228c4efb9f27e485b56ae9e413 --- core/build-support/lint_exclusions.txt | 1 + core/coverage.sh | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/core/build-support/lint_exclusions.txt b/core/build-support/lint_exclusions.txt index 226db75a43..2be060f121 100644 --- a/core/build-support/lint_exclusions.txt +++ b/core/build-support/lint_exclusions.txt @@ -6,4 +6,5 @@ *easylogging++* *SqliteMetaImpl.cpp *src/grpc* +*src/external* *milvus/include* \ No newline at end of file diff --git a/core/coverage.sh b/core/coverage.sh index 6db1e18d39..5792af5ec2 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -122,7 +122,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ "*/src/external/easyloggingpp/easylogging++.h" \ - "*/src/external/easyloggingpp/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.cc" \ + "*/src/external/*" # gen html report ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ From 13d7b1971a7e2b12f88491f7a1b96fd188d374f4 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 09:53:33 +0800 Subject: [PATCH 18/89] Update test nq size Former-commit-id: 933de17077aa4027a827f01f30278cab6e8b8434 --- tests/milvus_python_test/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 435a547855..9e9f0830ac 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -11,7 +11,7 @@ import numpy from milvus import Milvus, IndexType, MetricType from utils import * -nb = 100000 +nb = 10000 dim = 128 index_file_size = 10 vectors = gen_vectors(nb, dim) From dead8ee68867935e2eb8f7357ad7a6e0911143e3 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 14:33:01 +0800 Subject: [PATCH 19/89] add exit code in coverage.sh Former-commit-id: 27a834232561cbda1360cd245f73d89db5d48750 --- core/coverage.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/coverage.sh b/core/coverage.sh index 74f9f4219d..b3c2e96eed 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -99,6 +99,7 @@ for test in `ls ${DIR_UNITTEST}`; do if [ $? -ne 0 ]; then echo ${args} echo ${DIR_UNITTEST}/${test} "run failed" + exit -1 fi done @@ -123,6 +124,10 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ "*/src/utils/easylogging++.h" \ "*/src/utils/easylogging++.cc" +if [ $? -ne 0 ]; then + echo "generate ${FILE_INFO_OUTPUT_NEW} failed" + exit -2 +fi # gen html report ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ From 07cc917c5612e18173c6d6dce7de34054394e2d2 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 14:52:12 +0800 Subject: [PATCH 20/89] update coverage timeout && add coverage exit code Former-commit-id: 0c623d2348e73dc0c1df24f96098c4c5568e73ba --- ci/jenkins/jenkinsfile/coverage.groovy | 2 +- ci/jenkins/scripts/coverage.sh | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/coverage.groovy b/ci/jenkins/jenkinsfile/coverage.groovy index 7c3b16c029..ff2e3e6fa2 100644 --- a/ci/jenkins/jenkinsfile/coverage.groovy +++ b/ci/jenkins/jenkinsfile/coverage.groovy @@ -1,4 +1,4 @@ -timeout(time: 60, unit: 'MINUTES') { +timeout(time: 30, unit: 'MINUTES') { dir ("ci/jenkins/scripts") { sh "./coverage.sh -o /opt/milvus -u root -p 123456 -t \$POD_IP" // Set some env variables so codecov detection script works correctly diff --git a/ci/jenkins/scripts/coverage.sh b/ci/jenkins/scripts/coverage.sh index ecbb2dfbe9..dd52df442c 100755 --- a/ci/jenkins/scripts/coverage.sh +++ b/ci/jenkins/scripts/coverage.sh @@ -109,6 +109,7 @@ for test in `ls ${DIR_UNITTEST}`; do if [ $? -ne 0 ]; then echo ${args} echo ${DIR_UNITTEST}/${test} "run failed" + exit -1 fi done @@ -134,5 +135,10 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/utils/easylogging++.h" \ "*/src/utils/easylogging++.cc" +if [ $? -ne 0 ]; then + echo "gen ${FILE_INFO_OUTPUT_NEW} failed" + exit -2 +fi + # gen html report # ${LCOV_GEN_CMD} "${FILE_INFO_OUTPUT_NEW}" --output-directory ${DIR_LCOV_OUTPUT}/ From e7876f6343ce77a6b6a4a02a471be59bd6163f70 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 23 Oct 2019 15:05:22 +0800 Subject: [PATCH 21/89] avoid relative header path Former-commit-id: d4a01b720ac7011e17076b7c36a20af0ebe2b58a --- core/CMakeLists.txt | 2 +- core/src/main.cpp | 2 +- core/src/sdk/grpc/ClientProxy.cpp | 2 +- core/src/server/Server.cpp | 2 +- core/src/server/grpc_impl/GrpcRequestTask.cpp | 2 +- core/{ => src}/version.h.macro | 0 core/unittest/db/CMakeLists.txt | 7 --- core/unittest/db/appendix/log_config.conf | 27 ---------- core/unittest/db/appendix/server_config.yaml | 37 ------------- core/unittest/db/test_db.cpp | 6 +-- core/unittest/db/utils.cpp | 53 ++++++++++++++++++- core/unittest/db/utils.h | 3 ++ core/unittest/server/test_config.cpp | 16 +++--- core/unittest/server/test_rpc.cpp | 38 ++++++------- 14 files changed, 90 insertions(+), 107 deletions(-) rename core/{ => src}/version.h.macro (100%) delete mode 100644 core/unittest/db/appendix/log_config.conf delete mode 100644 core/unittest/db/appendix/server_config.yaml diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 5915006ca1..51cb2270a1 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -71,7 +71,7 @@ if(MILVUS_VERSION_MAJOR STREQUAL "" endif() message(STATUS "Build version = ${MILVUS_VERSION}") -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version.h.macro ${CMAKE_CURRENT_SOURCE_DIR}/version.h) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/version.h.macro ${CMAKE_CURRENT_SOURCE_DIR}/src/version.h) message(STATUS "Milvus version: " "${MILVUS_VERSION_MAJOR}.${MILVUS_VERSION_MINOR}.${MILVUS_VERSION_PATCH} " diff --git a/core/src/main.cpp b/core/src/main.cpp index d60f26d702..85f91469bb 100644 --- a/core/src/main.cpp +++ b/core/src/main.cpp @@ -22,10 +22,10 @@ #include #include -#include "../version.h" #include "external/easyloggingpp/easylogging++.h" #include "metrics/Metrics.h" #include "server/Server.h" +#include "src/version.h" #include "utils/CommonUtil.h" #include "utils/SignalUtil.h" diff --git a/core/src/sdk/grpc/ClientProxy.cpp b/core/src/sdk/grpc/ClientProxy.cpp index 7e1955b04b..c726cfc532 100644 --- a/core/src/sdk/grpc/ClientProxy.cpp +++ b/core/src/sdk/grpc/ClientProxy.cpp @@ -16,8 +16,8 @@ // under the License. #include "sdk/grpc/ClientProxy.h" -#include "../../../version.h" #include "grpc/gen-milvus/milvus.grpc.pb.h" +#include "src/version.h" #include #include diff --git a/core/src/server/Server.cpp b/core/src/server/Server.cpp index 4933af72b7..5676504722 100644 --- a/core/src/server/Server.cpp +++ b/core/src/server/Server.cpp @@ -19,13 +19,13 @@ #include #include -#include "../../version.h" #include "metrics/Metrics.h" #include "scheduler/SchedInst.h" #include "server/Config.h" #include "server/DBWrapper.h" #include "server/Server.h" #include "server/grpc_impl/GrpcServer.h" +#include "src/version.h" #include "utils/Log.h" #include "utils/LogUtil.h" #include "utils/SignalUtil.h" diff --git a/core/src/server/grpc_impl/GrpcRequestTask.cpp b/core/src/server/grpc_impl/GrpcRequestTask.cpp index 86ff23b3d0..be1fca0186 100644 --- a/core/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/core/src/server/grpc_impl/GrpcRequestTask.cpp @@ -23,12 +23,12 @@ #include //#include -#include "../../../version.h" #include "GrpcServer.h" #include "db/Utils.h" #include "scheduler/SchedInst.h" #include "server/DBWrapper.h" #include "server/Server.h" +#include "src/version.h" #include "utils/CommonUtil.h" #include "utils/Log.h" #include "utils/TimeRecorder.h" diff --git a/core/version.h.macro b/core/src/version.h.macro similarity index 100% rename from core/version.h.macro rename to core/src/version.h.macro diff --git a/core/unittest/db/CMakeLists.txt b/core/unittest/db/CMakeLists.txt index 4bce9f35b3..3954dd8656 100644 --- a/core/unittest/db/CMakeLists.txt +++ b/core/unittest/db/CMakeLists.txt @@ -31,12 +31,5 @@ target_link_libraries(test_db install(TARGETS test_db DESTINATION unittest) -configure_file(appendix/server_config.yaml - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/server_config.yaml" - COPYONLY) - -configure_file(appendix/log_config.conf - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/log_config.conf" - COPYONLY) diff --git a/core/unittest/db/appendix/log_config.conf b/core/unittest/db/appendix/log_config.conf deleted file mode 100644 index 0a3e0d21af..0000000000 --- a/core/unittest/db/appendix/log_config.conf +++ /dev/null @@ -1,27 +0,0 @@ -* GLOBAL: - FORMAT = "%datetime | %level | %logger | %msg" - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-global.log" - ENABLED = true - TO_FILE = true - TO_STANDARD_OUTPUT = false - SUBSECOND_PRECISION = 3 - PERFORMANCE_TRACKING = false - MAX_LOG_FILE_SIZE = 209715200 ## Throw log files away after 200MB -* DEBUG: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-debug.log" - ENABLED = true -* WARNING: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-warning.log" -* TRACE: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-trace.log" -* VERBOSE: - FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" - TO_FILE = false - TO_STANDARD_OUTPUT = false -## Error logs -* ERROR: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-error.log" -* FATAL: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-fatal.log" diff --git a/core/unittest/db/appendix/server_config.yaml b/core/unittest/db/appendix/server_config.yaml deleted file mode 100644 index f92b2f1a18..0000000000 --- a/core/unittest/db/appendix/server_config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# All the following configurations are default values. - -server_config: - address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # port range: 1025 ~ 65534 - deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable - time_zone: UTC+8 - -db_config: - primary_path: /tmp/milvus # path used to store data and meta - secondary_path: # path used to store data only, split by semicolon - - backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database - # Keep 'dialect://:@:/', and replace other texts with real values. - # Replace 'dialect' with 'mysql' or 'sqlite' - - insert_buffer_size: 4 # GB, maximum insert buffer size allowed - build_index_gpu: 0 # gpu id used for building index - -metric_config: - enable_monitor: false # enable monitoring or not - collector: prometheus # prometheus - prometheus_config: - port: 8080 # port prometheus used to fetch metrics - -cache_config: - cpu_mem_capacity: 16 # GB, CPU memory used for cache - cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered - cache_insert_data: false # whether load inserted data into cache - -engine_config: - blas_threshold: 20 - -resource_config: - resource_pool: - - cpu - - gpu0 diff --git a/core/unittest/db/test_db.cpp b/core/unittest/db/test_db.cpp index 5e6ecc2ac4..b869d17388 100644 --- a/core/unittest/db/test_db.cpp +++ b/core/unittest/db/test_db.cpp @@ -33,8 +33,6 @@ namespace { -static const char *CONFIG_FILE_PATH = "./milvus/conf/server_config.yaml"; - static const char *TABLE_NAME = "test_group"; static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t VECTOR_COUNT = 25000; @@ -232,8 +230,10 @@ TEST_F(DBTest, DB_TEST) { } TEST_F(DBTest, SEARCH_TEST) { + std::string config_path(CONFIG_PATH); + config_path += CONFIG_FILE; milvus::server::Config &config = milvus::server::Config::GetInstance(); - milvus::Status s = config.LoadConfigFile(CONFIG_FILE_PATH); + milvus::Status s = config.LoadConfigFile(config_path); milvus::engine::meta::TableSchema table_info = BuildTableSchema(); auto stat = db_->CreateTable(table_info); diff --git a/core/unittest/db/utils.cpp b/core/unittest/db/utils.cpp index 67beeba36f..8903ce14ea 100644 --- a/core/unittest/db/utils.cpp +++ b/core/unittest/db/utils.cpp @@ -28,11 +28,59 @@ #include "db/DBFactory.h" #include "db/Options.h" #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" +#include "utils/CommonUtil.h" INITIALIZE_EASYLOGGINGPP namespace { +static const char + * CONFIG_STR = "# All the following configurations are default values.\n" + "\n" + "server_config:\n" + " address: 0.0.0.0 # milvus server ip address (IPv4)\n" + " port: 19530 # port range: 1025 ~ 65534\n" + " deploy_mode: single \n" + " time_zone: UTC+8\n" + "\n" + "db_config:\n" + " primary_path: /tmp/milvus # path used to store data and meta\n" + " secondary_path: # path used to store data only, split by semicolon\n" + "\n" + " backend_url: sqlite://:@:/ \n" + " \n" + " # Replace 'dialect' with 'mysql' or 'sqlite'\n" + "\n" + " insert_buffer_size: 4 # GB, maximum insert buffer size allowed\n" + "\n" + "metric_config:\n" + " enable_monitor: false # enable monitoring or not\n" + " collector: prometheus # prometheus\n" + " prometheus_config:\n" + " port: 8080 # port prometheus used to fetch metrics\n" + "\n" + "cache_config:\n" + " cpu_mem_capacity: 16 # GB, CPU memory used for cache\n" + " cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered\n" + " cache_insert_data: false # whether load inserted data into cache\n" + "\n" + "engine_config:\n" + " blas_threshold: 20\n" + "\n" + "resource_config:\n" + " resource_pool:\n" + " - gpu0\n" + " index_build_device: gpu0 # GPU used for building index"; + +void +WriteToFile(const std::string& file_path, const char* content) { + std::fstream fs(file_path.c_str(), std::ios_base::out); + + //write data to file + fs << content; + fs.close(); +} + class DBTestEnvironment : public ::testing::Environment { public: explicit DBTestEnvironment(const std::string& uri) @@ -84,7 +132,7 @@ BaseTest::TearDown() { milvus::engine::DBOptions BaseTest::GetOptions() { auto options = milvus::engine::DBFactory::BuildOption(); - options.meta_.path_ = "/tmp/milvus_test"; + options.meta_.path_ = CONFIG_PATH; options.meta_.backend_uri_ = "sqlite://:@:/"; return options; } @@ -111,6 +159,9 @@ DBTest::SetUp() { auto options = GetOptions(); db_ = milvus::engine::DBFactory::Build(options); + + std::string config_path(options.meta_.path_ + CONFIG_FILE); + WriteToFile(config_path, CONFIG_STR); } void diff --git a/core/unittest/db/utils.h b/core/unittest/db/utils.h index 8da160dc92..94735e4886 100644 --- a/core/unittest/db/utils.h +++ b/core/unittest/db/utils.h @@ -42,6 +42,9 @@ #define STOP_TIMER(name) #endif +static const char *CONFIG_PATH = "/tmp/milvus_test"; +static const char *CONFIG_FILE = "/server_config.yaml"; + class BaseTest : public ::testing::Test { protected: void InitLog(); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index f3adf8a2c3..76230cbcc3 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -33,13 +33,13 @@ static constexpr uint64_t GB = MB * 1024; } // namespace TEST_F(ConfigTest, CONFIG_TEST) { - milvus::server::ConfigMgr *config_mgr = milvus::server::YamlConfigMgr::GetInstance(); + milvus::server::ConfigMgr* config_mgr = milvus::server::YamlConfigMgr::GetInstance(); milvus::Status s = config_mgr->LoadConfigFile(""); ASSERT_FALSE(s.ok()); std::string config_path(CONFIG_PATH); - s = config_mgr->LoadConfigFile(config_path+ INVALID_CONFIG_FILE); + s = config_mgr->LoadConfigFile(config_path + INVALID_CONFIG_FILE); ASSERT_FALSE(s.ok()); s = config_mgr->LoadConfigFile(config_path + VALID_CONFIG_FILE); @@ -47,11 +47,11 @@ TEST_F(ConfigTest, CONFIG_TEST) { config_mgr->Print(); - milvus::server::ConfigNode &root_config = config_mgr->GetRootNode(); - milvus::server::ConfigNode &server_config = root_config.GetChild("server_config"); - milvus::server::ConfigNode &db_config = root_config.GetChild("db_config"); - milvus::server::ConfigNode &metric_config = root_config.GetChild("metric_config"); - milvus::server::ConfigNode &cache_config = root_config.GetChild("cache_config"); + milvus::server::ConfigNode& root_config = config_mgr->GetRootNode(); + milvus::server::ConfigNode& server_config = root_config.GetChild("server_config"); + milvus::server::ConfigNode& db_config = root_config.GetChild("db_config"); + milvus::server::ConfigNode& metric_config = root_config.GetChild("metric_config"); + milvus::server::ConfigNode& cache_config = root_config.GetChild("cache_config"); milvus::server::ConfigNode invalid_config = root_config.GetChild("invalid_config"); auto valus = invalid_config.GetSequence("not_exist"); float ff = invalid_config.GetFloatValue("not_exist", 3.0); @@ -100,7 +100,7 @@ TEST_F(ConfigTest, CONFIG_TEST) { TEST_F(ConfigTest, SERVER_CONFIG_TEST) { std::string config_path(CONFIG_PATH); - milvus::server::Config &config = milvus::server::Config::GetInstance(); + milvus::server::Config& config = milvus::server::Config::GetInstance(); milvus::Status s = config.LoadConfigFile(config_path + VALID_CONFIG_FILE); ASSERT_TRUE(s.ok()); diff --git a/core/unittest/server/test_rpc.cpp b/core/unittest/server/test_rpc.cpp index 09a56699ea..ebbcd810c1 100644 --- a/core/unittest/server/test_rpc.cpp +++ b/core/unittest/server/test_rpc.cpp @@ -23,7 +23,7 @@ #include "server/grpc_impl/GrpcRequestHandler.h" #include "server/grpc_impl/GrpcRequestScheduler.h" #include "server/grpc_impl/GrpcRequestTask.h" -#include "../version.h" +#include "src/version.h" #include "grpc/gen-milvus/milvus.grpc.pb.h" #include "grpc/gen-status/status.pb.h" @@ -36,7 +36,7 @@ namespace { -static const char *TABLE_NAME = "test_grpc"; +static const char* TABLE_NAME = "test_grpc"; static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t INDEX_FILE_SIZE = 1024; static constexpr int64_t VECTOR_COUNT = 1000; @@ -109,7 +109,7 @@ class RpcHandlerTest : public testing::Test { void BuildVectors(int64_t from, int64_t to, - std::vector> &vector_record_array) { + std::vector>& vector_record_array) { if (to <= from) { return; } @@ -119,7 +119,7 @@ BuildVectors(int64_t from, int64_t to, std::vector record; record.resize(TABLE_DIM); for (int64_t i = 0; i < TABLE_DIM; i++) { - record[i] = (float) (k % (i + 1)); + record[i] = (float)(k % (i + 1)); } vector_record_array.emplace_back(record); @@ -136,7 +136,7 @@ CurrentTmDate(int64_t offset_day = 0) { gmtime_r(&tt, &t); std::string str = std::to_string(t.tm_year + 1900) + "-" + std::to_string(t.tm_mon + 1) - + "-" + std::to_string(t.tm_mday); + + "-" + std::to_string(t.tm_mday); return str; } @@ -200,8 +200,8 @@ TEST_F(RpcHandlerTest, INSERT_TEST) { std::vector> record_array; BuildVectors(0, VECTOR_COUNT, record_array); ::milvus::grpc::VectorIds vector_ids; - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *grpc_record = request.add_row_record_array(); + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* grpc_record = request.add_row_record_array(); for (size_t i = 0; i < record.size(); i++) { grpc_record->add_vector_data(record[i]); } @@ -239,8 +239,8 @@ TEST_F(RpcHandlerTest, SEARCH_TEST) { std::vector> record_array; BuildVectors(0, VECTOR_COUNT, record_array); ::milvus::grpc::InsertParam insert_param; - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *grpc_record = insert_param.add_row_record_array(); + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* grpc_record = insert_param.add_row_record_array(); for (size_t i = 0; i < record.size(); i++) { grpc_record->add_vector_data(record[i]); } @@ -252,16 +252,16 @@ TEST_F(RpcHandlerTest, SEARCH_TEST) { sleep(7); BuildVectors(0, 10, record_array); - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *row_record = request.add_query_record_array(); - for (auto &rec : record) { + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* row_record = request.add_query_record_array(); + for (auto& rec : record) { row_record->add_vector_data(rec); } } handler->Search(&context, &request, &response); //test search with range - ::milvus::grpc::Range *range = request.mutable_query_range_array()->Add(); + ::milvus::grpc::Range* range = request.mutable_query_range_array()->Add(); range->set_start_value(CurrentTmDate(-2)); range->set_end_value(CurrentTmDate(-3)); handler->Search(&context, &request, &response); @@ -273,7 +273,7 @@ TEST_F(RpcHandlerTest, SEARCH_TEST) { handler->Search(&context, &request, &response); ::milvus::grpc::SearchInFilesParam search_in_files_param; - std::string *file_id = search_in_files_param.add_file_id_array(); + std::string* file_id = search_in_files_param.add_file_id_array(); *file_id = "test_tbl"; handler->SearchInFiles(&context, &search_in_files_param, &response); } @@ -323,8 +323,8 @@ TEST_F(RpcHandlerTest, TABLES_TEST) { //test empty row record handler->Insert(&context, &request, &vector_ids); - for (auto &record : record_array) { - ::milvus::grpc::RowRecord *grpc_record = request.add_row_record_array(); + for (auto& record : record_array) { + ::milvus::grpc::RowRecord* grpc_record = request.add_row_record_array(); for (size_t i = 0; i < record.size(); i++) { grpc_record->add_vector_data(record[i]); } @@ -341,7 +341,7 @@ TEST_F(RpcHandlerTest, TABLES_TEST) { request.clear_row_record_array(); vector_ids.clear_vector_id_array(); for (uint64_t i = 0; i < 10; ++i) { - ::milvus::grpc::RowRecord *grpc_record = request.add_row_record_array(); + ::milvus::grpc::RowRecord* grpc_record = request.add_row_record_array(); for (size_t j = 0; j < 10; j++) { grpc_record->add_vector_data(record_array[i][j]); } @@ -431,12 +431,12 @@ class DummyTask : public milvus::server::grpc::GrpcBaseTask { } static milvus::server::grpc::BaseTaskPtr - Create(std::string &dummy) { + Create(std::string& dummy) { return std::shared_ptr(new DummyTask(dummy)); } public: - explicit DummyTask(std::string &dummy) : GrpcBaseTask(dummy) { + explicit DummyTask(std::string& dummy) : GrpcBaseTask(dummy) { } }; From f410219fc29c54cf1dcb19f193ab91cea299f6f2 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Wed, 23 Oct 2019 16:43:08 +0800 Subject: [PATCH 22/89] update utils.py Former-commit-id: 837591d04e95503c6e66f3960866949f628224e4 --- tests/milvus_python_test/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/milvus_python_test/utils.py b/tests/milvus_python_test/utils.py index 806af62f57..007bff9c75 100644 --- a/tests/milvus_python_test/utils.py +++ b/tests/milvus_python_test/utils.py @@ -475,7 +475,7 @@ if __name__ == "__main__": table = "test" - file_name = '/poc/yuncong/ann_1000m/query.npy' + file_name = 'query.npy' data = np.load(file_name) vectors = data[0:nq].tolist() # print(vectors) From edcb0bc5e1e375d9345f822cccea53af110a9ac4 Mon Sep 17 00:00:00 2001 From: wxyu Date: Wed, 23 Oct 2019 17:12:36 +0800 Subject: [PATCH 23/89] Improvement dump function in scheduler Former-commit-id: b7c550a7fb4e05c66afa217f435b401d9cae5613 --- core/src/scheduler/JobMgr.cpp | 9 ++++++ core/src/scheduler/JobMgr.h | 6 +++- core/src/scheduler/ResourceMgr.cpp | 28 +++++++++-------- core/src/scheduler/ResourceMgr.h | 7 +++-- core/src/scheduler/Scheduler.cpp | 10 ++++-- core/src/scheduler/Scheduler.h | 11 +++---- core/src/scheduler/TaskTable.cpp | 6 ++-- core/src/scheduler/TaskTable.h | 6 ++-- core/src/scheduler/interface/interfaces.h | 2 +- core/src/scheduler/job/BuildIndexJob.cpp | 8 +++++ core/src/scheduler/job/BuildIndexJob.h | 3 ++ core/src/scheduler/job/DeleteJob.cpp | 10 ++++++ core/src/scheduler/job/DeleteJob.h | 3 ++ core/src/scheduler/job/Job.h | 4 ++- core/src/scheduler/job/SearchJob.cpp | 10 ++++++ core/src/scheduler/job/SearchJob.h | 3 ++ core/src/scheduler/resource/CpuResource.cpp | 2 +- core/src/scheduler/resource/CpuResource.h | 5 --- core/src/scheduler/resource/DiskResource.h | 5 --- core/src/scheduler/resource/GpuResource.cpp | 2 +- core/src/scheduler/resource/GpuResource.h | 5 --- core/src/scheduler/resource/Node.cpp | 22 ++++++++----- core/src/scheduler/resource/Node.h | 7 +++-- core/src/scheduler/resource/Resource.cpp | 34 +++++++++++++++++++++ core/src/scheduler/resource/Resource.h | 6 ++-- core/src/scheduler/resource/TestResource.h | 5 --- 26 files changed, 148 insertions(+), 71 deletions(-) diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 70f1352a5c..e7b15a8185 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -49,6 +49,15 @@ JobMgr::Stop() { } } +json +JobMgr::Dump() const { + json ret{ + {"running", running_}, + {"event_queue_length", queue_.size()}, + }; + return ret; +} + void JobMgr::Put(const JobPtr& job) { { diff --git a/core/src/scheduler/JobMgr.h b/core/src/scheduler/JobMgr.h index b4c706d359..fbd6c0ee45 100644 --- a/core/src/scheduler/JobMgr.h +++ b/core/src/scheduler/JobMgr.h @@ -28,13 +28,14 @@ #include #include "ResourceMgr.h" +#include "interface/interfaces.h" #include "job/Job.h" #include "task/Task.h" namespace milvus { namespace scheduler { -class JobMgr { +class JobMgr : public interface::dumpable { public: explicit JobMgr(ResourceMgrPtr res_mgr); @@ -44,6 +45,9 @@ class JobMgr { void Stop(); + json + Dump() const override; + public: void Put(const JobPtr& job); diff --git a/core/src/scheduler/ResourceMgr.cpp b/core/src/scheduler/ResourceMgr.cpp index 6e839062ef..383ad89c4e 100644 --- a/core/src/scheduler/ResourceMgr.cpp +++ b/core/src/scheduler/ResourceMgr.cpp @@ -170,16 +170,20 @@ ResourceMgr::GetNumGpuResource() const { return num; } -std::string -ResourceMgr::Dump() { - std::stringstream ss; - ss << "ResourceMgr contains " << resources_.size() << " resources." << std::endl; - - for (auto& res : resources_) { - ss << res->Dump(); +json +ResourceMgr::Dump() const { + json resources{}; + for (auto &res : resources_) { + resources.push_back(res->Dump()); } - - return ss.str(); + json ret{ + {"number_of_resource", resources_.size()}, + {"number_of_disk_resource", disk_resources_.size()}, + {"number_of_cpu_resource", cpu_resources_.size()}, + {"number_of_gpu_resource", gpu_resources_.size()}, + {"resources", resources}, + }; + return ret; } std::string @@ -187,9 +191,9 @@ ResourceMgr::DumpTaskTables() { std::stringstream ss; ss << ">>>>>>>>>>>>>>>ResourceMgr::DumpTaskTable<<<<<<<<<<<<<<<" << std::endl; for (auto& resource : resources_) { - ss << resource->Dump() << std::endl; - ss << resource->task_table().Dump(); - ss << resource->Dump() << std::endl << std::endl; + ss << resource->name() << std::endl; + ss << resource->task_table().Dump().dump(); + ss << resource->name() << std::endl << std::endl; } return ss.str(); } diff --git a/core/src/scheduler/ResourceMgr.h b/core/src/scheduler/ResourceMgr.h index 7a8e1ca4ca..4d2361fb3d 100644 --- a/core/src/scheduler/ResourceMgr.h +++ b/core/src/scheduler/ResourceMgr.h @@ -25,13 +25,14 @@ #include #include +#include "interface/interfaces.h" #include "resource/Resource.h" #include "utils/Log.h" namespace milvus { namespace scheduler { -class ResourceMgr { +class ResourceMgr : public interface::dumpable { public: ResourceMgr() = default; @@ -103,8 +104,8 @@ class ResourceMgr { public: /******** Utility Functions ********/ - std::string - Dump(); + json + Dump() const override; std::string DumpTaskTables(); diff --git a/core/src/scheduler/Scheduler.cpp b/core/src/scheduler/Scheduler.cpp index 19197b4168..fef5cc1a95 100644 --- a/core/src/scheduler/Scheduler.cpp +++ b/core/src/scheduler/Scheduler.cpp @@ -66,9 +66,13 @@ Scheduler::PostEvent(const EventPtr& event) { event_cv_.notify_one(); } -std::string -Scheduler::Dump() { - return std::string(); +json +Scheduler::Dump() const { + json ret{ + {"running", running_}, + {"event_queue_length", event_queue_.size()}, + }; + return ret; } void diff --git a/core/src/scheduler/Scheduler.h b/core/src/scheduler/Scheduler.h index 5b222cc41a..8d9ea83794 100644 --- a/core/src/scheduler/Scheduler.h +++ b/core/src/scheduler/Scheduler.h @@ -25,14 +25,14 @@ #include #include "ResourceMgr.h" +#include "interface/interfaces.h" #include "resource/Resource.h" #include "utils/Log.h" namespace milvus { namespace scheduler { -// TODO(wxyu): refactor, not friendly to unittest, logical in framework code -class Scheduler { +class Scheduler : public interface::dumpable { public: explicit Scheduler(ResourceMgrWPtr res_mgr); @@ -57,11 +57,8 @@ class Scheduler { void PostEvent(const EventPtr& event); - /* - * Dump as string; - */ - std::string - Dump(); + json + Dump() const override; private: /******** Events ********/ diff --git a/core/src/scheduler/TaskTable.cpp b/core/src/scheduler/TaskTable.cpp index cad7ce3a74..d0e6c1c38b 100644 --- a/core/src/scheduler/TaskTable.cpp +++ b/core/src/scheduler/TaskTable.cpp @@ -53,7 +53,7 @@ ToString(TaskTableItemState state) { } json -TaskTimestamp::Dump() { +TaskTimestamp::Dump() const { json ret{ {"start", start}, {"load", load}, {"loaded", loaded}, {"execute", execute}, {"executed", executed}, {"move", move}, {"moved", moved}, {"finish", finish}, @@ -141,7 +141,7 @@ TaskTableItem::Moved() { } json -TaskTableItem::Dump() { +TaskTableItem::Dump() const { json ret{ {"id", id}, {"task", (int64_t)task.get()}, @@ -263,7 +263,7 @@ TaskTable::Get(uint64_t index) { //} json -TaskTable::Dump() { +TaskTable::Dump() const { json ret; for (auto& item : table_) { ret.push_back(item->Dump()); diff --git a/core/src/scheduler/TaskTable.h b/core/src/scheduler/TaskTable.h index 307528fffb..a9d00043c2 100644 --- a/core/src/scheduler/TaskTable.h +++ b/core/src/scheduler/TaskTable.h @@ -54,7 +54,7 @@ struct TaskTimestamp : public interface::dumpable { uint64_t finish = 0; json - Dump() override; + Dump() const override; }; struct TaskTableItem : public interface::dumpable { @@ -92,7 +92,7 @@ struct TaskTableItem : public interface::dumpable { Moved(); json - Dump() override; + Dump() const override; }; using TaskTableItemPtr = std::shared_ptr; @@ -245,7 +245,7 @@ class TaskTable : public interface::dumpable { * Dump; */ json - Dump() override; + Dump() const override; private: std::uint64_t id_ = 0; diff --git a/core/src/scheduler/interface/interfaces.h b/core/src/scheduler/interface/interfaces.h index 68e5af8cdb..9920e4f80a 100644 --- a/core/src/scheduler/interface/interfaces.h +++ b/core/src/scheduler/interface/interfaces.h @@ -37,7 +37,7 @@ struct dumpable { } virtual json - Dump() = 0; + Dump() const = 0; }; } // namespace interface diff --git a/core/src/scheduler/job/BuildIndexJob.cpp b/core/src/scheduler/job/BuildIndexJob.cpp index 423121c5fb..39c08b6b51 100644 --- a/core/src/scheduler/job/BuildIndexJob.cpp +++ b/core/src/scheduler/job/BuildIndexJob.cpp @@ -54,5 +54,13 @@ BuildIndexJob::BuildIndexDone(size_t to_index_id) { SERVER_LOG_DEBUG << "BuildIndexJob " << id() << " finish index file: " << to_index_id; } +json +BuildIndexJob::Dump() const { + json ret{ + {"number_of_to_index_file", to_index_files_.size()}, + }; + return ret; +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/job/BuildIndexJob.h b/core/src/scheduler/job/BuildIndexJob.h index b6ca462537..e3450ee048 100644 --- a/core/src/scheduler/job/BuildIndexJob.h +++ b/core/src/scheduler/job/BuildIndexJob.h @@ -53,6 +53,9 @@ class BuildIndexJob : public Job { void BuildIndexDone(size_t to_index_id); + json + Dump() const override; + public: Status& GetStatus() { diff --git a/core/src/scheduler/job/DeleteJob.cpp b/core/src/scheduler/job/DeleteJob.cpp index 96a6bb1817..04a9557177 100644 --- a/core/src/scheduler/job/DeleteJob.cpp +++ b/core/src/scheduler/job/DeleteJob.cpp @@ -45,5 +45,15 @@ DeleteJob::ResourceDone() { cv_.notify_one(); } +json +DeleteJob::Dump() const { + json ret{ + {"table_id", table_id_}, + {"number_of_resource", num_resource_}, + {"number_of_done", done_resource}, + }; + return ret; +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/job/DeleteJob.h b/core/src/scheduler/job/DeleteJob.h index 4ac48f6913..93e5aa40cc 100644 --- a/core/src/scheduler/job/DeleteJob.h +++ b/core/src/scheduler/job/DeleteJob.h @@ -44,6 +44,9 @@ class DeleteJob : public Job { void ResourceDone(); + json + Dump() const override; + public: std::string table_id() const { diff --git a/core/src/scheduler/job/Job.h b/core/src/scheduler/job/Job.h index 5fe645363f..709db8cffc 100644 --- a/core/src/scheduler/job/Job.h +++ b/core/src/scheduler/job/Job.h @@ -27,6 +27,8 @@ #include #include +#include "scheduler/interface/interfaces.h" + namespace milvus { namespace scheduler { @@ -39,7 +41,7 @@ enum class JobType { using JobId = std::uint64_t; -class Job { +class Job : public interface::dumpable { public: inline JobId id() const { diff --git a/core/src/scheduler/job/SearchJob.cpp b/core/src/scheduler/job/SearchJob.cpp index 518e3111c0..1143e33add 100644 --- a/core/src/scheduler/job/SearchJob.cpp +++ b/core/src/scheduler/job/SearchJob.cpp @@ -63,5 +63,15 @@ SearchJob::GetStatus() { return status_; } +json +SearchJob::Dump() const { + json ret{ + {"topk", topk_}, + {"nq", nq_}, + {"nprobe", nprobe_}, + }; + return ret; +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/job/SearchJob.h b/core/src/scheduler/job/SearchJob.h index fb2d87d876..6c2bd7eea9 100644 --- a/core/src/scheduler/job/SearchJob.h +++ b/core/src/scheduler/job/SearchJob.h @@ -61,6 +61,9 @@ class SearchJob : public Job { Status& GetStatus(); + json + Dump() const override; + public: uint64_t topk() const { diff --git a/core/src/scheduler/resource/CpuResource.cpp b/core/src/scheduler/resource/CpuResource.cpp index 500737a829..eb43a863cc 100644 --- a/core/src/scheduler/resource/CpuResource.cpp +++ b/core/src/scheduler/resource/CpuResource.cpp @@ -24,7 +24,7 @@ namespace scheduler { std::ostream& operator<<(std::ostream& out, const CpuResource& resource) { - out << resource.Dump(); + out << resource.Dump().dump(); return out; } diff --git a/core/src/scheduler/resource/CpuResource.h b/core/src/scheduler/resource/CpuResource.h index e3e4fc383f..10cd88ea2d 100644 --- a/core/src/scheduler/resource/CpuResource.h +++ b/core/src/scheduler/resource/CpuResource.h @@ -28,11 +28,6 @@ class CpuResource : public Resource { public: explicit CpuResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const CpuResource& resource); diff --git a/core/src/scheduler/resource/DiskResource.h b/core/src/scheduler/resource/DiskResource.h index 2346cd115a..384e44b4f2 100644 --- a/core/src/scheduler/resource/DiskResource.h +++ b/core/src/scheduler/resource/DiskResource.h @@ -28,11 +28,6 @@ class DiskResource : public Resource { public: explicit DiskResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const DiskResource& resource); diff --git a/core/src/scheduler/resource/GpuResource.cpp b/core/src/scheduler/resource/GpuResource.cpp index 20ed73e38c..f6363ff01d 100644 --- a/core/src/scheduler/resource/GpuResource.cpp +++ b/core/src/scheduler/resource/GpuResource.cpp @@ -22,7 +22,7 @@ namespace scheduler { std::ostream& operator<<(std::ostream& out, const GpuResource& resource) { - out << resource.Dump(); + out << resource.Dump().dump(); return out; } diff --git a/core/src/scheduler/resource/GpuResource.h b/core/src/scheduler/resource/GpuResource.h index e0df03d5a7..86b3b6658c 100644 --- a/core/src/scheduler/resource/GpuResource.h +++ b/core/src/scheduler/resource/GpuResource.h @@ -29,11 +29,6 @@ class GpuResource : public Resource { public: explicit GpuResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const GpuResource& resource); diff --git a/core/src/scheduler/resource/Node.cpp b/core/src/scheduler/resource/Node.cpp index 5401c36441..9621c4324e 100644 --- a/core/src/scheduler/resource/Node.cpp +++ b/core/src/scheduler/resource/Node.cpp @@ -38,15 +38,21 @@ Node::GetNeighbours() { return ret; } -std::string -Node::Dump() { - std::stringstream ss; - ss << "::neighbours:" << std::endl; - for (auto& neighbour : neighbours_) { - ss << "\t" << std::endl; +json +Node::Dump() const { + json neighbours; + for (auto & neighbour : neighbours_) { + json n; + n["id"] = neighbour.first; + n["connection"] = neighbour.second.connection.Dump(); + neighbours.push_back(n); } - return ss.str(); + + json ret{ + {"id", id_}, + {"neighbours", neighbours}, + }; + return ret; } void diff --git a/core/src/scheduler/resource/Node.h b/core/src/scheduler/resource/Node.h index 071ee9bab8..4539c8c86a 100644 --- a/core/src/scheduler/resource/Node.h +++ b/core/src/scheduler/resource/Node.h @@ -24,6 +24,7 @@ #include "Connection.h" #include "scheduler/TaskTable.h" +#include "scheduler/interface/interfaces.h" namespace milvus { namespace scheduler { @@ -41,7 +42,7 @@ struct Neighbour { }; // TODO(lxj): return type void -> Status -class Node { +class Node : public interface::dumpable { public: Node(); @@ -52,8 +53,8 @@ class Node { GetNeighbours(); public: - std::string - Dump(); + json + Dump() const override; private: std::mutex mutex_; diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index 59fd22b5d6..aac1953845 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -32,6 +32,24 @@ operator<<(std::ostream& out, const Resource& resource) { return out; } +std::string +ToString(ResourceType type) { + switch (type) { + case ResourceType::DISK: { + return "DISK"; + } + case ResourceType::CPU: { + return "CPU"; + } + case ResourceType::GPU: { + return "GPU"; + } + default: { + return "UNKNOWN"; + } + } +} + Resource::Resource(std::string name, ResourceType type, uint64_t device_id, bool enable_loader, bool enable_executor) : name_(std::move(name)), type_(type), @@ -89,6 +107,22 @@ Resource::WakeupExecutor() { exec_cv_.notify_one(); } +json +Resource::Dump() const { + json ret{ + {"device_id", device_id_}, + {"name", name_}, + {"type", ToString(type_)}, + {"task_average_cost", TaskAvgCost()}, + {"task_total_cost", total_cost_}, + {"total_tasks", total_task_}, + {"running", running_}, + {"enable_loader", enable_loader_}, + {"enable_executor", enable_executor_}, + }; + return ret; +} + uint64_t Resource::NumOfTaskToExec() { uint64_t count = 0; diff --git a/core/src/scheduler/resource/Resource.h b/core/src/scheduler/resource/Resource.h index c9026f13b6..c797e13de8 100644 --- a/core/src/scheduler/resource/Resource.h +++ b/core/src/scheduler/resource/Resource.h @@ -77,10 +77,8 @@ class Resource : public Node, public std::enable_shared_from_this { subscriber_ = std::move(subscriber); } - inline virtual std::string - Dump() const { - return ""; - } + json + Dump() const override; public: inline std::string diff --git a/core/src/scheduler/resource/TestResource.h b/core/src/scheduler/resource/TestResource.h index 9bbc5a54d0..4e4e148d6f 100644 --- a/core/src/scheduler/resource/TestResource.h +++ b/core/src/scheduler/resource/TestResource.h @@ -29,11 +29,6 @@ class TestResource : public Resource { public: explicit TestResource(std::string name, uint64_t device_id, bool enable_loader, bool enable_executor); - inline std::string - Dump() const override { - return ""; - } - friend std::ostream& operator<<(std::ostream& out, const TestResource& resource); From 72ae6f59cc484c4d67fd424f155ab335c141093e Mon Sep 17 00:00:00 2001 From: wxyu Date: Wed, 23 Oct 2019 17:14:50 +0800 Subject: [PATCH 24/89] clang-format Former-commit-id: 0ef52c892fdff56800f509cecf010f95c6e7e421 --- core/src/scheduler/ResourceMgr.cpp | 2 +- core/src/scheduler/resource/Node.cpp | 2 +- core/src/scheduler/resource/Resource.cpp | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/src/scheduler/ResourceMgr.cpp b/core/src/scheduler/ResourceMgr.cpp index 383ad89c4e..187cd97bc7 100644 --- a/core/src/scheduler/ResourceMgr.cpp +++ b/core/src/scheduler/ResourceMgr.cpp @@ -173,7 +173,7 @@ ResourceMgr::GetNumGpuResource() const { json ResourceMgr::Dump() const { json resources{}; - for (auto &res : resources_) { + for (auto& res : resources_) { resources.push_back(res->Dump()); } json ret{ diff --git a/core/src/scheduler/resource/Node.cpp b/core/src/scheduler/resource/Node.cpp index 9621c4324e..dcf03a321c 100644 --- a/core/src/scheduler/resource/Node.cpp +++ b/core/src/scheduler/resource/Node.cpp @@ -41,7 +41,7 @@ Node::GetNeighbours() { json Node::Dump() const { json neighbours; - for (auto & neighbour : neighbours_) { + for (auto& neighbour : neighbours_) { json n; n["id"] = neighbour.first; n["connection"] = neighbour.second.connection.Dump(); diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index aac1953845..1cd4cde609 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -44,9 +44,7 @@ ToString(ResourceType type) { case ResourceType::GPU: { return "GPU"; } - default: { - return "UNKNOWN"; - } + default: { return "UNKNOWN"; } } } From 635434126c8b7a6372ee0a46a653cdc517ecdf5d Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 17:44:57 +0800 Subject: [PATCH 25/89] #89 add faiss benchmark Former-commit-id: 12f9741900e36bb22cd8b7839f16174fd5d0c6f9 --- core/src/index/unittest/CMakeLists.txt | 1 + .../unittest/faiss_benchmark/CMakeLists.txt | 24 + .../faiss_benchmark/faiss_benchmark_test.cpp | 546 ++++++++++++++++++ 3 files changed, 571 insertions(+) create mode 100644 core/src/index/unittest/faiss_benchmark/CMakeLists.txt create mode 100644 core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index f840b28e28..2e84908cd7 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -86,5 +86,6 @@ install(TARGETS test_gpuresource DESTINATION unittest) install(TARGETS test_customized_index DESTINATION unittest) #add_subdirectory(faiss_ori) +#add_subdirectory(faiss_benchmark) add_subdirectory(test_nsg) diff --git a/core/src/index/unittest/faiss_benchmark/CMakeLists.txt b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt new file mode 100644 index 0000000000..556364b68a --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt @@ -0,0 +1,24 @@ +include_directories(${INDEX_SOURCE_DIR}/thirdparty) +include_directories(${INDEX_SOURCE_DIR}/include) +include_directories(/usr/local/cuda/include) +include_directories(/usr/local/hdf5/include) + +link_directories(/usr/local/cuda/lib64) +link_directories(/usr/local/hdf5/lib) + +set(unittest_libs + gtest gmock gtest_main gmock_main) + +set(depend_libs + faiss openblas lapack hdf5 + arrow ${ARROW_PREFIX}/lib/libjemalloc_pic.a + ) + +set(basic_libs + cudart cublas + gomp gfortran pthread + ) + +add_executable(test_faiss_benchmark faiss_benchmark_test.cpp) +target_link_libraries(test_faiss_benchmark ${depend_libs} ${unittest_libs} ${basic_libs}) +install(TARGETS test_faiss_benchmark DESTINATION unittest) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp new file mode 100644 index 0000000000..5ece23c7aa --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -0,0 +1,546 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +/***************************************************** + * To run this test, please download the HDF5 from + * https://support.hdfgroup.org/ftp/HDF5/releases/ + * and install it to /usr/local/hdf5 . + *****************************************************/ + +double elapsed() { + struct timeval tv; + gettimeofday(&tv, nullptr); + return tv.tv_sec + tv.tv_usec * 1e-6; +} + +void* hdf5_read(const char *file_name, + const char *dataset_name, + H5T_class_t dataset_class, + size_t &d_out, + size_t &n_out) { + hid_t file, dataset, datatype, dataspace, memspace; + H5T_class_t t_class; /* data type class */ + H5T_order_t order; /* data order */ + size_t size; /* size of the data element stored in file */ + hsize_t dimsm[3]; /* memory space dimensions */ + hsize_t dims_out[2]; /* dataset dimensions */ + hsize_t count[2]; /* size of the hyperslab in the file */ + hsize_t offset[2]; /* hyperslab offset in the file */ + hsize_t count_out[3]; /* size of the hyperslab in memory */ + hsize_t offset_out[3]; /* hyperslab offset in memory */ + int rank; + void* data_out; /* output buffer */ + + /* Open the file and the dataset. */ + file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); + dataset = H5Dopen2(file, dataset_name, H5P_DEFAULT); + + /* + * Get datatype and dataspace handles and then query + * dataset class, order, size, rank and dimensions. + */ + datatype = H5Dget_type(dataset); /* datatype handle */ + t_class = H5Tget_class(datatype); + assert(t_class == dataset_class || !"Illegal dataset class type"); + + order = H5Tget_order(datatype); + switch (order) { + case H5T_ORDER_LE: + printf("Little endian order \n"); + break; + case H5T_ORDER_BE: + printf("Big endian order \n"); + break; + default: + printf("Illegal endian order \n"); + break; + } + + size = H5Tget_size(datatype); + printf("Data size is %d \n", (int)size); + + dataspace = H5Dget_space(dataset); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); + H5Sget_simple_extent_dims(dataspace, dims_out, NULL); + n_out = dims_out[0]; + d_out = dims_out[1]; + printf("rank %d, dimensions %lu x %lu \n", rank, n_out, d_out); + + /* Define hyperslab in the dataset. */ + offset[0] = offset[1] = 0; + count[0] = dims_out[0]; + count[1] = dims_out[1]; + H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); + + /* Define the memory dataspace. */ + dimsm[0] = dims_out[0]; + dimsm[1] = dims_out[1]; + dimsm[2] = 1; + memspace = H5Screate_simple(3, dimsm, NULL); + + /* Define memory hyperslab. */ + offset_out[0] = offset_out[1] = offset_out[2] = 0; + count_out[0] = dims_out[0]; + count_out[1] = dims_out[1]; + count_out[2] = 1; + H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); + + /* Read data from hyperslab in the file into the hyperslab in memory and display. */ + switch (t_class) { + case H5T_INTEGER: + data_out = new int[dims_out[0] * dims_out[1]]; + H5Dread(dataset, H5T_NATIVE_INT, memspace, dataspace, H5P_DEFAULT, data_out); + break; + case H5T_FLOAT: + data_out = new float[dims_out[0] * dims_out[1]]; + H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace, H5P_DEFAULT, data_out); + break; + default: + printf("Illegal dataset class type\n"); + break; + } + + /* Close/release resources. */ + H5Tclose(datatype); + H5Dclose(dataset); + H5Sclose(dataspace); + H5Sclose(memspace); + H5Fclose(file); + + return data_out; +} + +std::string get_index_file_name(const std::string& ann_test_name, + const std::string& index_key, + int32_t data_loops) { + size_t pos = index_key.find_first_of(',', 0); + std::string file_name = ann_test_name; + file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos+1); + file_name = file_name + "_" + std::to_string(data_loops) + ".index"; + return file_name; +} + +bool parse_ann_test_name(const std::string& ann_test_name, + size_t &dim, + faiss::MetricType &metric_type) { + size_t pos1, pos2; + + if (ann_test_name.empty()) return false; + + pos1 = ann_test_name.find_first_of('-', 0); + if (pos1 == std::string::npos) return false; + pos2 = ann_test_name.find_first_of('-', pos1 + 1); + if (pos2 == std::string::npos) return false; + + dim = std::stoi(ann_test_name.substr(pos1+1, pos2-pos1-1)); + std::string metric_str = ann_test_name.substr(pos2+1); + if (metric_str == "angular") { + metric_type = faiss::METRIC_INNER_PRODUCT; + } else if (metric_str == "euclidean") { + metric_type = faiss::METRIC_L2; + } else { + return false; + } + + return true; +} + +void test_ann_hdf5(const std::string& ann_test_name, + const std::string& index_key, + int32_t index_add_loops, + const std::vector& nprobes) { + double t0 = elapsed(); + + const std::string ann_file_name = ann_test_name + ".hdf5"; + + faiss::MetricType metric_type; + size_t dim; + + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { + printf("Invalid ann test name: %s\n", ann_test_name.c_str()); + return; + } + + faiss::Index * index; + size_t d; + + std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); + try { + index = faiss::read_index(index_file_name.c_str()); + d = dim; + } + catch (...) { + printf("Cannot read index file: %s\n", index_file_name.c_str()); + + printf ("[%.3f s] Loading train set\n", elapsed() - t0); + + size_t nb; + float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + assert(d == dim || !"dataset does not have correct dimension"); + + printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", + elapsed() - t0, index_key.c_str(), d); + + index = faiss::index_factory(d, index_key.c_str(), metric_type); + + printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + + index->train(nb, xb); + + printf ("[%.3f s] Loading database\n", elapsed() - t0); + + // add index multiple times to get ~1G data set + for (int i = 0; i < index_add_loops; i++) { + printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + index->add(nb, xb); + } + + faiss::write_index(index, index_file_name.c_str()); + + delete [] xb; + } + + size_t nq; + float *xq; + { + printf ("[%.3f s] Loading queries\n", elapsed() - t0); + + size_t d2; + xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); + assert(d == d2 || !"query does not have same dimension as train set"); + } + + size_t k; // nb of results per query in the GT + faiss::Index::idx_t *gt; // nq * k matrix of ground-truth nearest-neighbors + { + printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + + // load ground-truth and convert int to long + size_t nq2; + int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth entries"); + + gt = new faiss::Index::idx_t[k * nq]; + for(int i = 0; i < k * nq; i++) { + gt[i] = gt_int[i]; + } + delete [] gt_int; + } + + for (auto nprobe : nprobes) { + + faiss::ParameterSpace params; + + printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + + std::string nprobe_str = "nprobe=" + std::to_string(nprobe); + params.set_index_parameters(index, nprobe_str.c_str()); + + // output buffers +#if 1 + const size_t NQ = 1000, K = 1000; + faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; + float *D = new float[NQ * K]; + + printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("====================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + double t_start = elapsed(), t_end; + + index->search(t_nq, xq, t_k, D, I); + + t_end = elapsed(); + + // k = 100 for ground truth + int hit = 0; + for (int i = 0; i < t_nq; i++) { + // count the num of results exist in ground truth result set + // consider: each result replicates DATA_LOOPS times + for (int j_c = 0; j_c < k; j_c++) { + int r_c = I[i * t_k + j_c]; + for (int j_g = 0; j_g < k/index_add_loops; j_g++) { + if (gt[i * k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + } + } + printf ("====================================================\n"); +#else + printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); + + faiss::Index::idx_t *I = new faiss::Index::idx_t[nq * k]; + float *D = new float[nq * k]; + + index->search(nq, xq, k, D, I); + + printf ("[%.3f s] Compute recalls\n", elapsed() - t0); + + // evaluate result by hand. + int n_1 = 0, n_10 = 0, n_100 = 0; + for(int i = 0; i < nq; i++) { + int gt_nn = gt[i * k]; + for(int j = 0; j < k; j++) { + if (I[i * k + j] == gt_nn) { + if(j < 1) n_1++; + if(j < 10) n_10++; + if(j < 100) n_100++; + } + } + } + printf("R@1 = %.4f\n", n_1 / float(nq)); + printf("R@10 = %.4f\n", n_10 / float(nq)); + printf("R@100 = %.4f\n", n_100 / float(nq)); +#endif + + printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete [] I; + delete [] D; + } + + delete [] xq; + delete [] gt; + delete index; +} + +#ifdef CUSTOMIZATION +void test_ivfsq8h_gpu(const std::string& ann_test_name, + int32_t index_add_loops, + const std::vector& nprobes){ + double t0 = elapsed(); + + const std::string ann_file_name = ann_test_name + ".hdf5"; + + faiss::MetricType metric_type; + size_t dim; + + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { + printf("Invalid ann test name: %s\n", ann_test_name.c_str()); + return; + } + + faiss::distance_compute_blas_threshold = 800; + faiss::gpu::StandardGpuResources res; + + const std::string index_key = "IVF16384,SQ8Hybrid"; + + faiss::Index* cpu_index = nullptr; + size_t d; + + std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); + try{ + cpu_index = faiss::read_index(index_file_name.c_str()); + d = dim; + } + catch (...){ + printf("Cannot read index file: %s\n", index_file_name.c_str()); + + printf ("[%.3f s] Loading train set\n", elapsed() - t0); + + size_t nb; + float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + assert(d == dim || !"dataset does not have correct dimension"); + + printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + + faiss::Index *ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); + + auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); + + printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + + device_index->train(nb, xb); + + printf ("[%.3f s] Loading database\n", elapsed() - t0); + + for (int i = 0; i < index_add_loops; i++) { + printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + device_index->add(nb, xb); + } + + cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::write_index(cpu_index, index_file_name.c_str()); + + delete []xb; + } + + faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); + if(cpu_ivf_index != nullptr) { + cpu_ivf_index->to_readonly(); + } + + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = cpu_index; + index_composition.quantizer = nullptr; + index_composition.mode = 1; + + auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + + size_t nq; + float *xq; + { + printf ("[%.3f s] Loading queries\n", elapsed() - t0); + + size_t d2; + xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); + assert(d == d2 || !"query does not have same dimension as train set"); + } + + size_t k; + faiss::Index::idx_t *gt; + { + printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + + size_t nq2; + int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth entries"); + + gt = new faiss::Index::idx_t[k * nq]; + for (unsigned long i = 0; i < k * nq; ++i) { + gt[i] = gt_int[i]; + } + delete []gt_int; + } + + for (auto nprobe : nprobes){ + printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", + elapsed() - t0, nprobe); + + auto ivf_index = dynamic_cast(cpu_index); + ivf_index->nprobe = nprobe; + + auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); + if(is_gpu_flat_index == nullptr) { + delete ivf_index->quantizer; + ivf_index->quantizer = index_composition.quantizer; + } + + const size_t NQ = 1000, K = 1000; + long *I = new faiss::Index::idx_t[NQ * K]; + float *D = new float[NQ * K]; + + printf ("\n%s %ld\n", index_key.c_str(), nprobe); + printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("====================================================\n"); + + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + double t_start = elapsed(), t_end; + + cpu_index->search(t_nq, xq, t_k, D, I); + + t_end = elapsed(); + + // k = 100 for ground truth + int hit = 0; + for (unsigned long i = 0; i < t_nq; i++) { + // count the num of results exist in ground truth result set + // consider: each result replicates DATA_LOOPS times + for (unsigned long j_c = 0; j_c < k; j_c++) { + int r_c = I[i * t_k + j_c]; + for (unsigned long j_g = 0; j_g < k/index_add_loops; j_g++) { + if (gt[i * k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + } + } + printf ("====================================================\n"); + + printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete [] I; + delete [] D; + } + + delete [] xq; + delete [] gt; + delete cpu_index; +} +#endif + +/************************************************************************************ + * https://github.com/erikbern/ann-benchmarks + * + * Dataset Dimensions Train_size Test_size Neighbors Distance Download + * Fashion- + * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) + * GIST 960 1,000,000 1,000 100 Euclidean HDF5 (3.6GB) + * GloVe 100 1,183,514 10,000 100 Angular HDF5 (463MB) + * GloVe 200 1,183,514 10,000 100 Angular HDF5 (918MB) + * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) + * NYTimes 256 290,000 10,000 100 Angular HDF5 (301MB) + * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) +*************************************************************************************/ + +TEST(FAISSTEST, sift1m_L2) { + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); +#ifdef CUSTOMIZATION + test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); +#endif + + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); +#ifdef CUSTOMIZATION + test_ivfsq8h_gpu("glove-200-angular", 2, {128, 1024}); +#endif +} + From 52ca4c4ae2578e270ca4b41a6f8f1f4e1b2a68d9 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 17:46:27 +0800 Subject: [PATCH 26/89] update test parameter Former-commit-id: 0871f4f0acd87280b5327caf4f14dae5b0d5e165 --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5ece23c7aa..5f787a415c 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -540,7 +540,7 @@ TEST(FAISSTEST, sift1m_L2) { test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); #ifdef CUSTOMIZATION - test_ivfsq8h_gpu("glove-200-angular", 2, {128, 1024}); + test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); #endif } From 4c7d590eb22323d1ef0f6a474a1f332d5c4ebd66 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 18:24:35 +0800 Subject: [PATCH 27/89] 9 update unittest name Former-commit-id: 4bd87de1ceee872b9de13c5e5c65bab7291eb9c7 --- .../index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5f787a415c..f1dc060825 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -469,10 +469,8 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, long *I = new faiss::Index::idx_t[NQ * K]; float *D = new float[NQ * K]; - printf ("\n%s %ld\n", index_key.c_str(), nprobe); printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); printf ("====================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} double t_start = elapsed(), t_end; @@ -528,7 +526,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) *************************************************************************************/ -TEST(FAISSTEST, sift1m_L2) { +TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); From 82ddcf8340b84a1eb2f4ea0209554a006f4dac6a Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 18:39:43 +0800 Subject: [PATCH 28/89] #89 display quant/search time Former-commit-id: 563141ab22274ca0e3e84253df8a79af58c16eca --- .../faiss_benchmark/faiss_benchmark_test.cpp | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index f1dc060825..5d63d63003 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -282,9 +282,12 @@ void test_ann_hdf5(const std::string& ann_test_name, float *D = new float[NQ * K]; printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("====================================================\n"); + printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + double t_start = elapsed(), t_end; index->search(t_nq, xq, t_k, D, I); @@ -306,11 +309,14 @@ void test_ann_hdf5(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), + faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, + (hit / float(t_nq * k / index_add_loops))); } } - printf ("====================================================\n"); + printf ("============================================================================================\n"); #else printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); @@ -470,9 +476,12 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, float *D = new float[NQ * K]; printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("====================================================\n"); + printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + double t_start = elapsed(), t_end; cpu_index->search(t_nq, xq, t_k, D, I); @@ -494,11 +503,14 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), + faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, + (hit / float(t_nq * k / index_add_loops))); } } - printf ("====================================================\n"); + printf ("============================================================================================\n"); printf ("[%.3f s] Search test done\n\n", elapsed() - t0); From e25caa572cc295ba55ca82c3eebc53c2c47e32d5 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 23 Oct 2019 20:19:45 +0800 Subject: [PATCH 29/89] remove unused files of wrapper test Former-commit-id: b2b30f0d09085207c70d386e19c3c1fb6bbc0e2f --- core/unittest/wrapper/CMakeLists.txt | 8 -- .../unittest/wrapper/appendix/log_config.conf | 27 ---- .../wrapper/appendix/server_config.yaml | 37 ------ core/unittest/wrapper/test_knowhere.cpp | 16 +-- core/unittest/wrapper/utils.cpp | 125 +++++++++++++----- core/unittest/wrapper/utils.h | 10 ++ 6 files changed, 111 insertions(+), 112 deletions(-) delete mode 100644 core/unittest/wrapper/appendix/log_config.conf delete mode 100644 core/unittest/wrapper/appendix/server_config.yaml diff --git a/core/unittest/wrapper/CMakeLists.txt b/core/unittest/wrapper/CMakeLists.txt index a8015f8d34..ef145a9f50 100644 --- a/core/unittest/wrapper/CMakeLists.txt +++ b/core/unittest/wrapper/CMakeLists.txt @@ -41,11 +41,3 @@ target_link_libraries(test_wrapper ${unittest_libs}) install(TARGETS test_wrapper DESTINATION unittest) - -configure_file(appendix/server_config.yaml - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/server_config.yaml" - COPYONLY) - -configure_file(appendix/log_config.conf - "${CMAKE_CURRENT_BINARY_DIR}/milvus/conf/log_config.conf" - COPYONLY) \ No newline at end of file diff --git a/core/unittest/wrapper/appendix/log_config.conf b/core/unittest/wrapper/appendix/log_config.conf deleted file mode 100644 index 0a3e0d21af..0000000000 --- a/core/unittest/wrapper/appendix/log_config.conf +++ /dev/null @@ -1,27 +0,0 @@ -* GLOBAL: - FORMAT = "%datetime | %level | %logger | %msg" - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-global.log" - ENABLED = true - TO_FILE = true - TO_STANDARD_OUTPUT = false - SUBSECOND_PRECISION = 3 - PERFORMANCE_TRACKING = false - MAX_LOG_FILE_SIZE = 209715200 ## Throw log files away after 200MB -* DEBUG: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-debug.log" - ENABLED = true -* WARNING: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-warning.log" -* TRACE: - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-trace.log" -* VERBOSE: - FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" - TO_FILE = false - TO_STANDARD_OUTPUT = false -## Error logs -* ERROR: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-error.log" -* FATAL: - ENABLED = true - FILENAME = "/tmp/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-fatal.log" diff --git a/core/unittest/wrapper/appendix/server_config.yaml b/core/unittest/wrapper/appendix/server_config.yaml deleted file mode 100644 index f92b2f1a18..0000000000 --- a/core/unittest/wrapper/appendix/server_config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# All the following configurations are default values. - -server_config: - address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # port range: 1025 ~ 65534 - deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable - time_zone: UTC+8 - -db_config: - primary_path: /tmp/milvus # path used to store data and meta - secondary_path: # path used to store data only, split by semicolon - - backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database - # Keep 'dialect://:@:/', and replace other texts with real values. - # Replace 'dialect' with 'mysql' or 'sqlite' - - insert_buffer_size: 4 # GB, maximum insert buffer size allowed - build_index_gpu: 0 # gpu id used for building index - -metric_config: - enable_monitor: false # enable monitoring or not - collector: prometheus # prometheus - prometheus_config: - port: 8080 # port prometheus used to fetch metrics - -cache_config: - cpu_mem_capacity: 16 # GB, CPU memory used for cache - cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered - cache_insert_data: false # whether load inserted data into cache - -engine_config: - blas_threshold: 20 - -resource_config: - resource_pool: - - cpu - - gpu0 diff --git a/core/unittest/wrapper/test_knowhere.cpp b/core/unittest/wrapper/test_knowhere.cpp index e9b93fb63e..455fdbdebe 100644 --- a/core/unittest/wrapper/test_knowhere.cpp +++ b/core/unittest/wrapper/test_knowhere.cpp @@ -16,20 +16,16 @@ // under the License. #include "wrapper/KnowhereResource.h" +#include "wrapper/utils.h" #include "server/Config.h" #include -namespace { - -static const char* CONFIG_FILE_PATH = "./milvus/conf/server_config.yaml"; -static const char* LOG_FILE_PATH = "./milvus/conf/log_config.conf"; - -} // namespace - -TEST(KnowhereTest, KNOWHERE_RESOURCE_TEST) { - milvus::server::Config &config = milvus::server::Config::GetInstance(); - milvus::Status s = config.LoadConfigFile(CONFIG_FILE_PATH); +TEST_F(KnowhereTest, KNOWHERE_RESOURCE_TEST) { + std::string config_path(CONFIG_PATH); + config_path += CONFIG_FILE; + milvus::server::Config& config = milvus::server::Config::GetInstance(); + milvus::Status s = config.LoadConfigFile(config_path); ASSERT_TRUE(s.ok()); milvus::engine::KnowhereResource::Initialize(); diff --git a/core/unittest/wrapper/utils.cpp b/core/unittest/wrapper/utils.cpp index 445b7a2de6..6204ac0c05 100644 --- a/core/unittest/wrapper/utils.cpp +++ b/core/unittest/wrapper/utils.cpp @@ -18,13 +18,78 @@ #include #include +#include #include "wrapper/utils.h" +#include "utils/CommonUtil.h" + +namespace { +static const char + * CONFIG_STR = "# All the following configurations are default values.\n" + "\n" + "server_config:\n" + " address: 0.0.0.0 # milvus server ip address (IPv4)\n" + " port: 19530 # port range: 1025 ~ 65534\n" + " deploy_mode: single \n" + " time_zone: UTC+8\n" + "\n" + "db_config:\n" + " primary_path: /tmp/milvus # path used to store data and meta\n" + " secondary_path: # path used to store data only, split by semicolon\n" + "\n" + " backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database\n" + " \n" + " # Replace 'dialect' with 'mysql' or 'sqlite'\n" + "\n" + " insert_buffer_size: 4 # GB, maximum insert buffer size allowed\n" + "\n" + "metric_config:\n" + " enable_monitor: false # enable monitoring or not\n" + " collector: prometheus # prometheus\n" + " prometheus_config:\n" + " port: 8080 # port prometheus used to fetch metrics\n" + "\n" + "cache_config:\n" + " cpu_mem_capacity: 16 # GB, CPU memory used for cache\n" + " cpu_mem_threshold: 0.85 # percentage of data kept when cache cleanup triggered\n" + " cache_insert_data: false # whether load inserted data into cache\n" + "\n" + "engine_config:\n" + " blas_threshold: 20\n" + "\n" + "resource_config:\n" + " resource_pool:\n" + " - gpu0\n" + " index_build_device: gpu0 # GPU used for building index"; void -DataGenBase::GenData(const int &dim, const int &nb, const int &nq, - float *xb, float *xq, int64_t *ids, - const int &k, int64_t *gt_ids, float *gt_dis) { +WriteToFile(const std::string& file_path, const char* content) { + std::fstream fs(file_path.c_str(), std::ios_base::out); + + //write data to file + fs << content; + fs.close(); +} + +} // namespace + +void +KnowhereTest::SetUp() { + std::string config_path(CONFIG_PATH); + milvus::server::CommonUtil::CreateDirectory(config_path); + WriteToFile(config_path + CONFIG_FILE, CONFIG_STR); +} + +void +KnowhereTest::TearDown() { + std::string config_path(CONFIG_PATH); + milvus::server::CommonUtil::DeleteDirectory(config_path); +} + +void +DataGenBase::GenData(const int& dim, const int& nb, const int& nq, + float* xb, float* xq, int64_t* ids, + const int& k, int64_t* gt_ids, float* gt_dis) { for (auto i = 0; i < nb; ++i) { for (auto j = 0; j < dim; ++j) { //p_data[i * d + j] = float(base + i); @@ -44,15 +109,15 @@ DataGenBase::GenData(const int &dim, const int &nb, const int &nq, } void -DataGenBase::GenData(const int &dim, - const int &nb, - const int &nq, - std::vector &xb, - std::vector &xq, - std::vector &ids, - const int &k, - std::vector >_ids, - std::vector >_dis) { +DataGenBase::GenData(const int& dim, + const int& nb, + const int& nq, + std::vector& xb, + std::vector& xq, + std::vector& ids, + const int& k, + std::vector& gt_ids, + std::vector& gt_dis) { xb.resize(nb * dim); xq.resize(nq * dim); ids.resize(nb); @@ -63,27 +128,27 @@ DataGenBase::GenData(const int &dim, void DataGenBase::AssertResult(const std::vector& ids, const std::vector& dis) { - EXPECT_EQ(ids.size(), nq * k); - EXPECT_EQ(dis.size(), nq * k); + EXPECT_EQ(ids.size(), nq * k); + EXPECT_EQ(dis.size(), nq * k); - for (auto i = 0; i < nq; i++) { - EXPECT_EQ(ids[i * k], gt_ids[i * k]); - //EXPECT_EQ(dis[i * k], gt_dis[i * k]); - } + for (auto i = 0; i < nq; i++) { + EXPECT_EQ(ids[i * k], gt_ids[i * k]); + //EXPECT_EQ(dis[i * k], gt_dis[i * k]); + } - int match = 0; - for (int i = 0; i < nq; ++i) { - for (int j = 0; j < k; ++j) { - for (int l = 0; l < k; ++l) { - if (ids[i * nq + j] == gt_ids[i * nq + l]) match++; - } + int match = 0; + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + for (int l = 0; l < k; ++l) { + if (ids[i * nq + j] == gt_ids[i * nq + l]) match++; } } + } - auto precision = float(match) / (nq * k); - EXPECT_GT(precision, 0.5); - std::cout << std::endl << "Precision: " << precision - << ", match: " << match - << ", total: " << nq * k - << std::endl; + auto precision = float(match) / (nq * k); + EXPECT_GT(precision, 0.5); + std::cout << std::endl << "Precision: " << precision + << ", match: " << match + << ", total: " << nq * k + << std::endl; } diff --git a/core/unittest/wrapper/utils.h b/core/unittest/wrapper/utils.h index 5a614543c9..0b9e422152 100644 --- a/core/unittest/wrapper/utils.h +++ b/core/unittest/wrapper/utils.h @@ -18,6 +18,7 @@ #pragma once +#include #include #include #include @@ -40,6 +41,15 @@ constexpr int64_t PINMEM = 1024 * 1024 * 200; constexpr int64_t TEMPMEM = 1024 * 1024 * 300; constexpr int64_t RESNUM = 2; +static const char *CONFIG_PATH = "/tmp/milvus_test"; +static const char *CONFIG_FILE = "/server_config.yaml"; + +class KnowhereTest : public ::testing::Test { + protected: + void SetUp() override; + void TearDown() override; +}; + class DataGenBase { public: virtual void GenData(const int& dim, const int& nb, const int& nq, float* xb, float* xq, int64_t* ids, From a6279d2acb1992b2fb53930e084ca3b5c7e77ee5 Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Wed, 23 Oct 2019 21:45:48 +0800 Subject: [PATCH 30/89] Speed up CMake build process Former-commit-id: 2edb2ae26d93ed9b7a8d25b89d30152e60257250 --- CHANGELOG.md | 3 +- core/cmake/DefineOptions.cmake | 23 +- core/cmake/ThirdPartyPackages.cmake | 707 +++--------------- core/src/CMakeLists.txt | 8 +- .../index/cmake/ThirdPartyPackagesCore.cmake | 23 +- core/src/sdk/CMakeLists.txt | 3 - core/thirdparty/versions.txt | 7 +- core/ubuntu_build_deps.sh | 3 +- core/unittest/CMakeLists.txt | 1 - core/unittest/server/CMakeLists.txt | 3 - 10 files changed, 125 insertions(+), 656 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0efa5cebcc..ebad4e2f32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,9 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement -- \#64 - Improvement dump function in scheduler +- \#64 - Improvement dump function in - \#82 - Move easyloggingpp into "external" directory +- \#92 - Speed up CMake build process ## Feature ## Task diff --git a/core/cmake/DefineOptions.cmake b/core/cmake/DefineOptions.cmake index 7aae177f0b..167b6e9d66 100644 --- a/core/cmake/DefineOptions.cmake +++ b/core/cmake/DefineOptions.cmake @@ -55,21 +55,10 @@ define_option_string(MILVUS_DEPENDENCY_SOURCE define_option(MILVUS_VERBOSE_THIRDPARTY_BUILD "Show output from ExternalProjects rather than just logging to files" ON) -define_option(MILVUS_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \ -Note that this requires linking Boost statically" OFF) - -define_option(MILVUS_BOOST_HEADER_ONLY "Use only BOOST headers" OFF) - -define_option(MILVUS_WITH_BZ2 "Build with BZ2 compression" ON) - define_option(MILVUS_WITH_EASYLOGGINGPP "Build with Easylogging++ library" ON) -define_option(MILVUS_WITH_LZ4 "Build with lz4 compression" ON) - define_option(MILVUS_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) -define_option(MILVUS_WITH_SNAPPY "Build with Snappy compression" ON) - define_option(MILVUS_WITH_SQLITE "Build with SQLite library" ON) define_option(MILVUS_WITH_SQLITE_ORM "Build with SQLite ORM library" ON) @@ -78,16 +67,6 @@ define_option(MILVUS_WITH_MYSQLPP "Build with MySQL++" ON) define_option(MILVUS_WITH_YAMLCPP "Build with yaml-cpp library" ON) -define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) - -if(CMAKE_VERSION VERSION_LESS 3.7) - set(MILVUS_WITH_ZSTD_DEFAULT OFF) -else() - # ExternalProject_Add(SOURCE_SUBDIR) is available since CMake 3.7. - set(MILVUS_WITH_ZSTD_DEFAULT ON) -endif() -define_option(MILVUS_WITH_ZSTD "Build with zstd compression" ${MILVUS_WITH_ZSTD_DEFAULT}) - if (MILVUS_ENABLE_PROFILING STREQUAL "ON") define_option(MILVUS_WITH_LIBUNWIND "Build with libunwind" ON) define_option(MILVUS_WITH_GPERFTOOLS "Build with gperftools" ON) @@ -95,6 +74,8 @@ endif() define_option(MILVUS_WITH_GRPC "Build with GRPC" ON) +define_option(MILVUS_WITH_ZLIB "Build with zlib compression" ON) + #---------------------------------------------------------------------- if(MSVC) set_option_category("MSVC") diff --git a/core/cmake/ThirdPartyPackages.cmake b/core/cmake/ThirdPartyPackages.cmake index ade57c06ad..d0057d3c22 100644 --- a/core/cmake/ThirdPartyPackages.cmake +++ b/core/cmake/ThirdPartyPackages.cmake @@ -16,21 +16,16 @@ set(MILVUS_THIRDPARTY_DEPENDENCIES - BOOST - BZip2 GTest - Lz4 MySQLPP Prometheus - Snappy SQLite SQLite_ORM yaml-cpp - ZLIB - ZSTD libunwind gperftools - GRPC) + GRPC + ZLIB) message(STATUS "Using ${MILVUS_DEPENDENCY_SOURCE} approach to find dependencies") @@ -42,34 +37,26 @@ foreach(DEPENDENCY ${MILVUS_THIRDPARTY_DEPENDENCIES}) endforeach() macro(build_dependency DEPENDENCY_NAME) - if("${DEPENDENCY_NAME}" STREQUAL "BZip2") - build_bzip2() - elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest") + if ("${DEPENDENCY_NAME}" STREQUAL "GTest") build_gtest() - elseif("${DEPENDENCY_NAME}" STREQUAL "Lz4") - build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "MySQLPP") build_mysqlpp() elseif ("${DEPENDENCY_NAME}" STREQUAL "Prometheus") build_prometheus() - elseif ("${DEPENDENCY_NAME}" STREQUAL "Snappy") - build_snappy() elseif ("${DEPENDENCY_NAME}" STREQUAL "SQLite") build_sqlite() elseif ("${DEPENDENCY_NAME}" STREQUAL "SQLite_ORM") build_sqlite_orm() elseif("${DEPENDENCY_NAME}" STREQUAL "yaml-cpp") build_yamlcpp() - elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") - build_zlib() - elseif("${DEPENDENCY_NAME}" STREQUAL "ZSTD") - build_zstd() elseif("${DEPENDENCY_NAME}" STREQUAL "libunwind") build_libunwind() elseif("${DEPENDENCY_NAME}" STREQUAL "gperftools") build_gperftools() elseif("${DEPENDENCY_NAME}" STREQUAL "GRPC") build_grpc() + elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") + build_zlib() else() message(FATAL_ERROR "Unknown thirdparty dependency to build: ${DEPENDENCY_NAME}") endif () @@ -263,23 +250,6 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) set(${_LIB_NAME} "${_LIB_VERSION}") endforeach() -if(DEFINED ENV{MILVUS_BOOST_URL}) - set(BOOST_SOURCE_URL "$ENV{MILVUS_BOOST_URL}") -else() - string(REPLACE "." "_" BOOST_VERSION_UNDERSCORES ${BOOST_VERSION}) - set(BOOST_SOURCE_URL - "https://nchc.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION_UNDERSCORES}.tar.gz") - #"https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION_UNDERSCORES}.tar.gz") -endif() -set(BOOST_MD5 "fea771fe8176828fabf9c09242ee8c26") - -if(DEFINED ENV{MILVUS_BZIP2_URL}) - set(BZIP2_SOURCE_URL "$ENV{MILVUS_BZIP2_URL}") -else() - set(BZIP2_SOURCE_URL "https://sourceware.org/pub/bzip2/bzip2-${BZIP2_VERSION}.tar.gz") -endif() -set(BZIP2_MD5 "00b516f4704d4a7cb50a1d97e6e8e15b") - if (DEFINED ENV{MILVUS_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{MILVUS_GTEST_URL}") else () @@ -288,13 +258,6 @@ else () endif() set(GTEST_MD5 "2e6fbeb6a91310a16efe181886c59596") -if(DEFINED ENV{MILVUS_LZ4_URL}) - set(LZ4_SOURCE_URL "$ENV{MILVUS_LZ4_URL}") -else() - set(LZ4_SOURCE_URL "https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz") -endif() -set(LZ4_MD5 "a80f28f2a2e5fe59ebfe8407f793da22") - if(DEFINED ENV{MILVUS_MYSQLPP_URL}) set(MYSQLPP_SOURCE_URL "$ENV{MILVUS_MYSQLPP_URL}") else() @@ -309,14 +272,6 @@ else () https://github.com/jupp0r/prometheus-cpp.git) endif() -if(DEFINED ENV{MILVUS_SNAPPY_URL}) - set(SNAPPY_SOURCE_URL "$ENV{MILVUS_SNAPPY_URL}") -else() - set(SNAPPY_SOURCE_URL - "https://github.com/google/snappy/archive/${SNAPPY_VERSION}.tar.gz") -endif() -set(SNAPPY_MD5 "ee9086291c9ae8deb4dac5e0b85bf54a") - if(DEFINED ENV{MILVUS_SQLITE_URL}) set(SQLITE_SOURCE_URL "$ENV{MILVUS_SQLITE_URL}") else() @@ -329,7 +284,6 @@ if(DEFINED ENV{MILVUS_SQLITE_ORM_URL}) set(SQLITE_ORM_SOURCE_URL "$ENV{MILVUS_SQLITE_ORM_URL}") else() set(SQLITE_ORM_SOURCE_URL -# "http://192.168.1.105:6060/Test/sqlite_orm/-/archive/master/sqlite_orm-master.zip") "https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.zip") endif() set(SQLITE_ORM_MD5 "ba9a405a8a1421c093aa8ce988ff8598") @@ -341,20 +295,6 @@ else() endif() set(YAMLCPP_MD5 "5b943e9af0060d0811148b037449ef82") -if(DEFINED ENV{MILVUS_ZLIB_URL}) - set(ZLIB_SOURCE_URL "$ENV{MILVUS_ZLIB_URL}") -else() - set(ZLIB_SOURCE_URL "https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz") -endif() -set(ZLIB_MD5 "0095d2d2d1f3442ce1318336637b695f") - -if(DEFINED ENV{MILVUS_ZSTD_URL}) - set(ZSTD_SOURCE_URL "$ENV{MILVUS_ZSTD_URL}") -else() - set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz") -endif() -set(ZSTD_MD5 "340c837db48354f8d5eafe74c6077120") - if(DEFINED ENV{MILVUS_LIBUNWIND_URL}) set(LIBUNWIND_SOURCE_URL "$ENV{MILVUS_LIBUNWIND_URL}") else() @@ -379,202 +319,12 @@ else() endif() set(GRPC_MD5 "0362ba219f59432c530070b5f5c3df73") - -# ---------------------------------------------------------------------- -# Add Boost dependencies (code adapted from Apache Kudu (incubating)) - -set(Boost_USE_MULTITHREADED ON) -set(Boost_ADDITIONAL_VERSIONS - "1.70.0" - "1.70" - "1.69.0" - "1.69" - "1.68.0" - "1.68" - "1.67.0" - "1.67" - "1.66.0" - "1.66" - "1.65.0" - "1.65" - "1.64.0" - "1.64" - "1.63.0" - "1.63" - "1.62.0" - "1.61" - "1.61.0" - "1.62" - "1.60.0" - "1.60") - -if(MILVUS_BOOST_VENDORED) - set(BOOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-prefix/src/boost_ep") - set(BOOST_LIB_DIR "${BOOST_PREFIX}/stage/lib") - set(BOOST_BUILD_LINK "static") - set(BOOST_STATIC_SYSTEM_LIBRARY - "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_system${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set(BOOST_STATIC_FILESYSTEM_LIBRARY - "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_filesystem${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set(BOOST_STATIC_SERIALIZATION_LIBRARY - "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_serialization${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set(BOOST_SYSTEM_LIBRARY boost_system_static) - set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static) - set(BOOST_SERIALIZATION_LIBRARY boost_serialization_static) - - if(MILVUS_BOOST_HEADER_ONLY) - set(BOOST_BUILD_PRODUCTS) - set(BOOST_CONFIGURE_COMMAND "") - set(BOOST_BUILD_COMMAND "") - else() - set(BOOST_BUILD_PRODUCTS ${BOOST_STATIC_SYSTEM_LIBRARY} - ${BOOST_STATIC_FILESYSTEM_LIBRARY} ${BOOST_STATIC_SERIALIZATION_LIBRARY}) - set(BOOST_CONFIGURE_COMMAND "./bootstrap.sh" "--prefix=${BOOST_PREFIX}" - "--with-libraries=filesystem,serialization,system") - if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(BOOST_BUILD_VARIANT "debug") - else() - set(BOOST_BUILD_VARIANT "release") - endif() - set(BOOST_BUILD_COMMAND - "./b2" - "link=${BOOST_BUILD_LINK}" - "variant=${BOOST_BUILD_VARIANT}" - "cxxflags=-fPIC") - - add_thirdparty_lib(boost_system STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}") - - add_thirdparty_lib(boost_filesystem STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}") - - add_thirdparty_lib(boost_serialization STATIC_LIB "${BOOST_STATIC_SERIALIZATION_LIBRARY}") - - set(MILVUS_BOOST_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY} ${BOOST_STATIC_SERIALIZATION_LIBRARY}) - endif() - externalproject_add(boost_ep - URL - ${BOOST_SOURCE_URL} - BUILD_BYPRODUCTS - ${BOOST_BUILD_PRODUCTS} - BUILD_IN_SOURCE - 1 - CONFIGURE_COMMAND - ${BOOST_CONFIGURE_COMMAND} - BUILD_COMMAND - ${BOOST_BUILD_COMMAND} - INSTALL_COMMAND - "" - ${EP_LOG_OPTIONS}) - - - set(Boost_INCLUDE_DIR "${BOOST_PREFIX}") - set(Boost_INCLUDE_DIRS "${Boost_INCLUDE_DIR}") - add_dependencies(boost_system_static boost_ep) - add_dependencies(boost_filesystem_static boost_ep) - add_dependencies(boost_serialization_static boost_ep) - -endif() - -include_directories(SYSTEM ${Boost_INCLUDE_DIR}) -link_directories(SYSTEM ${BOOST_LIB_DIR}) - -# ---------------------------------------------------------------------- -# bzip2 - -macro(build_bzip2) - message(STATUS "Building BZip2-${BZIP2_VERSION} from source") - set(BZIP2_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/bzip2_ep-prefix/src/bzip2_ep") - set(BZIP2_INCLUDE_DIR "${BZIP2_PREFIX}/include") - set(BZIP2_STATIC_LIB - "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}") - - if(USE_JFROG_CACHE STREQUAL "ON") - set(BZIP2_CACHE_PACKAGE_NAME "bzip2_${BZIP2_MD5}.tar.gz") - set(BZIP2_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${BZIP2_CACHE_PACKAGE_NAME}") - set(BZIP2_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${BZIP2_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${BZIP2_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote cache file ${BZIP2_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(bzip2_ep - ${EP_LOG_OPTIONS} - CONFIGURE_COMMAND - "" - BUILD_IN_SOURCE - 1 - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - CFLAGS=${EP_C_FLAGS} - INSTALL_COMMAND - ${MAKE} - install - PREFIX=${BZIP2_PREFIX} - CFLAGS=${EP_C_FLAGS} - INSTALL_DIR - ${BZIP2_PREFIX} - URL - ${BZIP2_SOURCE_URL} - BUILD_BYPRODUCTS - "${BZIP2_STATIC_LIB}") - - ExternalProject_Create_Cache(bzip2_ep ${BZIP2_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/bzip2_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${BZIP2_CACHE_URL}) - else() - file(DOWNLOAD ${BZIP2_CACHE_URL} ${BZIP2_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${BZIP2_CACHE_URL} TO ${BZIP2_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(bzip2_ep ${BZIP2_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(bzip2_ep - ${EP_LOG_OPTIONS} - CONFIGURE_COMMAND - "" - BUILD_IN_SOURCE - 1 - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - CFLAGS=${EP_C_FLAGS} - INSTALL_COMMAND - ${MAKE} - install - PREFIX=${BZIP2_PREFIX} - CFLAGS=${EP_C_FLAGS} - INSTALL_DIR - ${BZIP2_PREFIX} - URL - ${BZIP2_SOURCE_URL} - BUILD_BYPRODUCTS - "${BZIP2_STATIC_LIB}") - endif() - - file(MAKE_DIRECTORY "${BZIP2_INCLUDE_DIR}") - add_library(bzip2 STATIC IMPORTED) - set_target_properties( - bzip2 - PROPERTIES IMPORTED_LOCATION "${BZIP2_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}") - - add_dependencies(bzip2 bzip2_ep) -endmacro() - -if(MILVUS_WITH_BZ2) - resolve_dependency(BZip2) - - if(NOT TARGET bzip2) - add_library(bzip2 UNKNOWN IMPORTED) - set_target_properties(bzip2 - PROPERTIES IMPORTED_LOCATION "${BZIP2_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}") - endif() - link_directories(SYSTEM ${BZIP2_PREFIX}/lib/) - include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") +if(DEFINED ENV{MILVUS_ZLIB_URL}) + set(ZLIB_SOURCE_URL "$ENV{MILVUS_ZLIB_URL}") +else() + set(ZLIB_SOURCE_URL "https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz") endif() +set(ZLIB_MD5 "0095d2d2d1f3442ce1318336637b695f") # ---------------------------------------------------------------------- # Google gtest @@ -689,95 +439,6 @@ if (MILVUS_BUILD_TESTS) include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) endif() -# ---------------------------------------------------------------------- -# lz4 - -macro(build_lz4) - message(STATUS "Building lz4-${LZ4_VERSION} from source") - set(LZ4_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix/src/lz4_ep") - set(LZ4_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix/") - - set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a") - set(LZ4_BUILD_COMMAND BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS} CFLAGS=${EP_C_FLAGS}) - - # We need to copy the header in lib to directory outside of the build - if(USE_JFROG_CACHE STREQUAL "ON") - set(LZ4_CACHE_PACKAGE_NAME "lz4_${LZ4_MD5}.tar.gz") - set(LZ4_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${LZ4_CACHE_PACKAGE_NAME}") - set(LZ4_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${LZ4_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${LZ4_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${LZ4_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(lz4_ep - URL - ${LZ4_SOURCE_URL} - ${EP_LOG_OPTIONS} - UPDATE_COMMAND - ${CMAKE_COMMAND} - -E - copy_directory - "${LZ4_BUILD_DIR}/lib" - "${LZ4_PREFIX}/include" - ${LZ4_PATCH_COMMAND} - CONFIGURE_COMMAND - "" - INSTALL_COMMAND - "" - BINARY_DIR - ${LZ4_BUILD_DIR} - BUILD_BYPRODUCTS - ${LZ4_STATIC_LIB} - ${LZ4_BUILD_COMMAND}) - - ExternalProject_Create_Cache(lz4_ep ${LZ4_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${LZ4_CACHE_URL}) - else() - file(DOWNLOAD ${LZ4_CACHE_URL} ${LZ4_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${LZ4_CACHE_URL} TO ${LZ4_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(lz4_ep ${LZ4_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(lz4_ep - URL - ${LZ4_SOURCE_URL} - ${EP_LOG_OPTIONS} - UPDATE_COMMAND - ${CMAKE_COMMAND} - -E - copy_directory - "${LZ4_BUILD_DIR}/lib" - "${LZ4_PREFIX}/include" - ${LZ4_PATCH_COMMAND} - CONFIGURE_COMMAND - "" - INSTALL_COMMAND - "" - BINARY_DIR - ${LZ4_BUILD_DIR} - BUILD_BYPRODUCTS - ${LZ4_STATIC_LIB} - ${LZ4_BUILD_COMMAND}) - endif() - - file(MAKE_DIRECTORY "${LZ4_PREFIX}/include") - add_library(lz4 STATIC IMPORTED) - set_target_properties(lz4 - PROPERTIES IMPORTED_LOCATION "${LZ4_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${LZ4_PREFIX}/include") - add_dependencies(lz4 lz4_ep) -endmacro() - -if(MILVUS_WITH_LZ4) - resolve_dependency(Lz4) - - get_target_property(LZ4_INCLUDE_DIR lz4 INTERFACE_INCLUDE_DIRECTORIES) - link_directories(SYSTEM ${LZ4_BUILD_DIR}/lib/) - include_directories(SYSTEM ${LZ4_INCLUDE_DIR}) -endif() - # ---------------------------------------------------------------------- # MySQL++ @@ -996,93 +657,6 @@ if(MILVUS_WITH_PROMETHEUS) endif() -# ---------------------------------------------------------------------- -# Snappy - -macro(build_snappy) - message(STATUS "Building snappy-${SNAPPY_VERSION} from source") - set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep-prefix/src/snappy_ep") - set(SNAPPY_INCLUDE_DIRS "${SNAPPY_PREFIX}/include") - set(SNAPPY_STATIC_LIB_NAME snappy) - set(SNAPPY_STATIC_LIB - "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - - set(SNAPPY_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - -DCMAKE_INSTALL_LIBDIR=lib - -DSNAPPY_BUILD_TESTS=OFF - "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}") - - if(USE_JFROG_CACHE STREQUAL "ON") - set(SNAPPY_CACHE_PACKAGE_NAME "snappy_${SNAPPY_MD5}.tar.gz") - set(SNAPPY_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${SNAPPY_CACHE_PACKAGE_NAME}") - set(SNAPPY_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${SNAPPY_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${SNAPPY_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${SNAPPY_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(snappy_ep - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_IN_SOURCE - 1 - INSTALL_DIR - ${SNAPPY_PREFIX} - URL - ${SNAPPY_SOURCE_URL} - CMAKE_ARGS - ${SNAPPY_CMAKE_ARGS} - BUILD_BYPRODUCTS - "${SNAPPY_STATIC_LIB}") - - ExternalProject_Create_Cache(snappy_ep ${SNAPPY_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${SNAPPY_CACHE_URL}) - else() - file(DOWNLOAD ${SNAPPY_CACHE_URL} ${SNAPPY_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${SNAPPY_CACHE_URL} TO ${SNAPPY_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(snappy_ep ${SNAPPY_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(snappy_ep - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_IN_SOURCE - 1 - INSTALL_DIR - ${SNAPPY_PREFIX} - URL - ${SNAPPY_SOURCE_URL} - CMAKE_ARGS - ${SNAPPY_CMAKE_ARGS} - BUILD_BYPRODUCTS - "${SNAPPY_STATIC_LIB}") - endif() - - file(MAKE_DIRECTORY "${SNAPPY_INCLUDE_DIR}") - add_library(snappy STATIC IMPORTED) - set_target_properties(snappy - PROPERTIES IMPORTED_LOCATION "${SNAPPY_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES - "${SNAPPY_INCLUDE_DIR}") - add_dependencies(snappy snappy_ep) -endmacro() - -if(MILVUS_WITH_SNAPPY) - - resolve_dependency(Snappy) - - get_target_property(SNAPPY_INCLUDE_DIRS snappy INTERFACE_INCLUDE_DIRECTORIES) - link_directories(SYSTEM ${SNAPPY_PREFIX}/lib/) - include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS}) -endif() - # ---------------------------------------------------------------------- # SQLite @@ -1265,176 +839,6 @@ if(MILVUS_WITH_YAMLCPP) include_directories(SYSTEM ${YAMLCPP_INCLUDE_DIR}) endif() -# ---------------------------------------------------------------------- -# zlib - -macro(build_zlib) - message(STATUS "Building ZLIB-${ZLIB_VERSION} from source") - set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix/src/zlib_ep") - set(ZLIB_STATIC_LIB_NAME libz.a) - set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") - set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") - set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}" - -DBUILD_SHARED_LIBS=OFF) - - if(USE_JFROG_CACHE STREQUAL "ON") - set(ZLIB_CACHE_PACKAGE_NAME "zlib_${ZLIB_MD5}.tar.gz") - set(ZLIB_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${ZLIB_CACHE_PACKAGE_NAME}") - set(ZLIB_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${ZLIB_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${ZLIB_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${ZLIB_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(zlib_ep - URL - ${ZLIB_SOURCE_URL} - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_BYPRODUCTS - "${ZLIB_STATIC_LIB}" - CMAKE_ARGS - ${ZLIB_CMAKE_ARGS}) - - ExternalProject_Create_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${ZLIB_CACHE_URL}) - else() - file(DOWNLOAD ${ZLIB_CACHE_URL} ${ZLIB_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${ZLIB_CACHE_URL} TO ${ZLIB_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(zlib_ep - URL - ${ZLIB_SOURCE_URL} - ${EP_LOG_OPTIONS} - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - BUILD_BYPRODUCTS - "${ZLIB_STATIC_LIB}" - CMAKE_ARGS - ${ZLIB_CMAKE_ARGS}) - endif() - - file(MAKE_DIRECTORY "${ZLIB_INCLUDE_DIR}") - add_library(zlib STATIC IMPORTED) - set_target_properties(zlib - PROPERTIES IMPORTED_LOCATION "${ZLIB_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${ZLIB_INCLUDE_DIR}") - - add_dependencies(zlib zlib_ep) -endmacro() - -if(MILVUS_WITH_ZLIB) - resolve_dependency(ZLIB) - - get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) - include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) -endif() - -# ---------------------------------------------------------------------- -# zstd - -macro(build_zstd) - message(STATUS "Building zstd-${ZSTD_VERSION} from source") - set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix/src/zstd_ep") - - set(ZSTD_CMAKE_ARGS - ${EP_COMMON_TOOLCHAIN} - "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}" - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_INSTALL_LIBDIR=lib #${CMAKE_INSTALL_LIBDIR} - -DZSTD_BUILD_PROGRAMS=off - -DZSTD_BUILD_SHARED=off - -DZSTD_BUILD_STATIC=on - -DZSTD_MULTITHREAD_SUPPORT=off) - - - set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/libzstd.a") - set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include") - set(ZSTD_CMAKE_ARGS - ${ZSTD_CMAKE_ARGS} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_FLAGS=${EP_C_FLAGS} - -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}) - - if(CMAKE_VERSION VERSION_LESS 3.7) - message(FATAL_ERROR "Building zstd using ExternalProject requires at least CMake 3.7") - endif() - - if(USE_JFROG_CACHE STREQUAL "ON") - set(ZSTD_CACHE_PACKAGE_NAME "zstd_${ZSTD_MD5}.tar.gz") - set(ZSTD_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${ZSTD_CACHE_PACKAGE_NAME}") - set(ZSTD_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${ZSTD_CACHE_PACKAGE_NAME}") - - execute_process(COMMAND wget -q --method HEAD ${ZSTD_CACHE_URL} RESULT_VARIABLE return_code) - message(STATUS "Check the remote file ${ZSTD_CACHE_URL}. return code = ${return_code}") - if (NOT return_code EQUAL 0) - externalproject_add(zstd_ep - ${EP_LOG_OPTIONS} - CMAKE_ARGS - ${ZSTD_CMAKE_ARGS} - SOURCE_SUBDIR - "build/cmake" - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - INSTALL_DIR - ${ZSTD_PREFIX} - URL - ${ZSTD_SOURCE_URL} - BUILD_BYPRODUCTS - "${ZSTD_STATIC_LIB}") - - ExternalProject_Create_Cache(zstd_ep ${ZSTD_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${ZSTD_CACHE_URL}) - else() - file(DOWNLOAD ${ZSTD_CACHE_URL} ${ZSTD_CACHE_PACKAGE_PATH} STATUS status) - list(GET status 0 status_code) - message(STATUS "DOWNLOADING FROM ${ZSTD_CACHE_URL} TO ${ZSTD_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") - if (status_code EQUAL 0) - ExternalProject_Use_Cache(zstd_ep ${ZSTD_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) - endif() - endif() - else() - externalproject_add(zstd_ep - ${EP_LOG_OPTIONS} - CMAKE_ARGS - ${ZSTD_CMAKE_ARGS} - SOURCE_SUBDIR - "build/cmake" - BUILD_COMMAND - ${MAKE} - ${MAKE_BUILD_ARGS} - INSTALL_DIR - ${ZSTD_PREFIX} - URL - ${ZSTD_SOURCE_URL} - BUILD_BYPRODUCTS - "${ZSTD_STATIC_LIB}") - endif() - - file(MAKE_DIRECTORY "${ZSTD_INCLUDE_DIR}") - add_library(zstd STATIC IMPORTED) - set_target_properties(zstd - PROPERTIES IMPORTED_LOCATION "${ZSTD_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIR}") - - add_dependencies(zstd zstd_ep) -endmacro() - -if(MILVUS_WITH_ZSTD) - resolve_dependency(ZSTD) - - get_target_property(ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) - link_directories(SYSTEM ${ZSTD_PREFIX}/lib) - include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) -endif() - # ---------------------------------------------------------------------- # libunwind @@ -1637,6 +1041,8 @@ macro(build_grpc) ${GRPC_PROTOBUF_STATIC_LIB} ${GRPC_PROTOC_STATIC_LIB}) + ExternalProject_Add_StepDependencies(grpc_ep build zlib_ep) + ExternalProject_Create_Cache(grpc_ep ${GRPC_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/grpc_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${GRPC_CACHE_URL}) else() file(DOWNLOAD ${GRPC_CACHE_URL} ${GRPC_CACHE_PACKAGE_PATH} STATUS status) @@ -1665,6 +1071,9 @@ macro(build_grpc) ${GRPCPP_CHANNELZ_STATIC_LIB} ${GRPC_PROTOBUF_STATIC_LIB} ${GRPC_PROTOC_STATIC_LIB}) + + ExternalProject_Add_StepDependencies(grpc_ep build zlib_ep) + endif() file(MAKE_DIRECTORY "${GRPC_INCLUDE_DIR}") @@ -1672,25 +1081,30 @@ macro(build_grpc) add_library(grpc STATIC IMPORTED) set_target_properties(grpc PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpc++ STATIC IMPORTED) set_target_properties(grpc++ PROPERTIES IMPORTED_LOCATION "${GRPC++_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpcpp_channelz STATIC IMPORTED) set_target_properties(grpcpp_channelz PROPERTIES IMPORTED_LOCATION "${GRPCPP_CHANNELZ_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpc_protobuf STATIC IMPORTED) set_target_properties(grpc_protobuf - PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOBUF_STATIC_LIB}") + PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOBUF_STATIC_LIB}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_library(grpc_protoc STATIC IMPORTED) set_target_properties(grpc_protoc - PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOC_STATIC_LIB}") + PROPERTIES IMPORTED_LOCATION "${GRPC_PROTOC_STATIC_LIB}" + INTERFACE_LINK_LIBRARIES "zlib" ) add_dependencies(grpc grpc_ep) add_dependencies(grpc++ grpc_ep) @@ -1710,3 +1124,74 @@ if(MILVUS_WITH_GRPC) include_directories(SYSTEM ${GRPC_THIRD_PARTY_DIR}/protobuf/src) link_directories(SYSTEM ${GRPC_PROTOBUF_LIB_DIR}) endif() + +# ---------------------------------------------------------------------- +# zlib + +macro(build_zlib) + message(STATUS "Building ZLIB-${ZLIB_VERSION} from source") + set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix/src/zlib_ep") + set(ZLIB_STATIC_LIB_NAME libz.a) + set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") + set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") + set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}" + -DBUILD_SHARED_LIBS=OFF) + + if(USE_JFROG_CACHE STREQUAL "ON") + set(ZLIB_CACHE_PACKAGE_NAME "zlib_${ZLIB_MD5}.tar.gz") + set(ZLIB_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${ZLIB_CACHE_PACKAGE_NAME}") + set(ZLIB_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${ZLIB_CACHE_PACKAGE_NAME}") + + execute_process(COMMAND wget -q --method HEAD ${ZLIB_CACHE_URL} RESULT_VARIABLE return_code) + message(STATUS "Check the remote file ${ZLIB_CACHE_URL}. return code = ${return_code}") + if (NOT return_code EQUAL 0) + externalproject_add(zlib_ep + URL + ${ZLIB_SOURCE_URL} + ${EP_LOG_OPTIONS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + "${ZLIB_STATIC_LIB}" + CMAKE_ARGS + ${ZLIB_CMAKE_ARGS}) + + ExternalProject_Create_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep-prefix" ${JFROG_USER_NAME} ${JFROG_PASSWORD} ${ZLIB_CACHE_URL}) + else() + file(DOWNLOAD ${ZLIB_CACHE_URL} ${ZLIB_CACHE_PACKAGE_PATH} STATUS status) + list(GET status 0 status_code) + message(STATUS "DOWNLOADING FROM ${ZLIB_CACHE_URL} TO ${ZLIB_CACHE_PACKAGE_PATH}. STATUS = ${status_code}") + if (status_code EQUAL 0) + ExternalProject_Use_Cache(zlib_ep ${ZLIB_CACHE_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR}) + endif() + endif() + else() + externalproject_add(zlib_ep + URL + ${ZLIB_SOURCE_URL} + ${EP_LOG_OPTIONS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + BUILD_BYPRODUCTS + "${ZLIB_STATIC_LIB}" + CMAKE_ARGS + ${ZLIB_CMAKE_ARGS}) + endif() + + file(MAKE_DIRECTORY "${ZLIB_INCLUDE_DIR}") + add_library(zlib STATIC IMPORTED) + set_target_properties(zlib + PROPERTIES IMPORTED_LOCATION "${ZLIB_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ZLIB_INCLUDE_DIR}") + + add_dependencies(zlib zlib_ep) +endmacro() + +if(MILVUS_WITH_ZLIB) + resolve_dependency(ZLIB) + + get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) +endif() diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index d086955078..ae3a458987 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -120,14 +120,10 @@ set(third_party_libs ${client_grpc_lib} yaml-cpp ${prometheus_lib} - ${boost_lib} - bzip2 - lz4 - snappy - zlib - zstd ${cuda_lib} mysqlpp + zlib + ${boost_lib} ) if (MILVUS_ENABLE_PROFILING STREQUAL "ON") diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index 7e9bb0b671..0712966d9c 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -299,12 +299,29 @@ macro(build_arrow) ${EP_COMMON_CMAKE_ARGS} -DARROW_BUILD_STATIC=ON -DARROW_BUILD_SHARED=OFF - -DARROW_PARQUET=OFF -DARROW_USE_GLOG=OFF -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX} - "-DCMAKE_LIBRARY_PATH=${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs" + -DARROW_CUDA=OFF + -DARROW_FLIGHT=OFF + -DARROW_GANDIVA=OFF + -DARROW_GANDIVA_JAVA=OFF + -DARROW_HDFS=OFF + -DARROW_HIVESERVER2=OFF + -DARROW_ORC=OFF + -DARROW_PARQUET=OFF + -DARROW_PLASMA=OFF + -DARROW_PLASMA_JAVA_CLIENT=OFF + -DARROW_PYTHON=OFF + -DARROW_WITH_BZ2=OFF + -DARROW_WITH_ZLIB=OFF + -DARROW_WITH_LZ4=OFF + -DARROW_WITH_SNAPPY=OFF + -DARROW_WITH_ZSTD=OFF + -DARROW_WITH_BROTLI=OFF -DCMAKE_BUILD_TYPE=Release - -DARROW_DEPENDENCY_SOURCE=BUNDLED) #Build all arrow dependencies from source instead of calling find_package first + -DARROW_DEPENDENCY_SOURCE=BUNDLED #Build all arrow dependencies from source instead of calling find_package first + -DBOOST_SOURCE=AUTO #try to find BOOST in the system default locations and build from source if not found + ) if(USE_JFROG_CACHE STREQUAL "ON") diff --git a/core/src/sdk/CMakeLists.txt b/core/src/sdk/CMakeLists.txt index a2991a49b4..c68712d34c 100644 --- a/core/src/sdk/CMakeLists.txt +++ b/core/src/sdk/CMakeLists.txt @@ -30,9 +30,6 @@ add_library(milvus_sdk STATIC target_link_libraries(milvus_sdk ${client_grpc_lib} - bzip2 - lz4 - snappy zlib ) diff --git a/core/thirdparty/versions.txt b/core/thirdparty/versions.txt index ec270c0670..4faaf119e4 100644 --- a/core/thirdparty/versions.txt +++ b/core/thirdparty/versions.txt @@ -1,18 +1,13 @@ -BOOST_VERSION=1.70.0 -BZIP2_VERSION=1.0.6 EASYLOGGINGPP_VERSION=v9.96.7 GTEST_VERSION=1.8.1 -LZ4_VERSION=v1.9.1 MYSQLPP_VERSION=3.2.4 PROMETHEUS_VERSION=v0.7.0 -SNAPPY_VERSION=1.1.7 SQLITE_VERSION=3280000 SQLITE_ORM_VERSION=master YAMLCPP_VERSION=0.6.2 -ZLIB_VERSION=v1.2.11 -ZSTD_VERSION=v1.4.0 LIBUNWIND_VERSION=1.3.1 GPERFTOOLS_VERSION=2.7 GRPC_VERSION=master +ZLIB_VERSION=v1.2.11 # vim: set filetype=sh: diff --git a/core/ubuntu_build_deps.sh b/core/ubuntu_build_deps.sh index ed9eb9dee5..e454a147ac 100755 --- a/core/ubuntu_build_deps.sh +++ b/core/ubuntu_build_deps.sh @@ -1,5 +1,6 @@ #!/bin/bash -sudo apt-get install -y gfortran libmysqlclient-dev mysql-client libcurl4-openssl-dev libboost-system-dev libboost-filesystem-dev libboost-serialization-dev +sudo apt-get install -y gfortran libmysqlclient-dev mysql-client libcurl4-openssl-dev libboost-system-dev \ +libboost-filesystem-dev libboost-serialization-dev libboost-regex-dev sudo ln -s /usr/lib/x86_64-linux-gnu/libmysqlclient.so /usr/lib/x86_64-linux-gnu/libmysqlclient_r.so diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index aae7fb8d7f..62b5bdf256 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -102,7 +102,6 @@ set(unittest_libs sqlite libboost_system.a libboost_filesystem.a - lz4 mysqlpp yaml-cpp gtest diff --git a/core/unittest/server/CMakeLists.txt b/core/unittest/server/CMakeLists.txt index 180dcfa6d5..1f89de8d3f 100644 --- a/core/unittest/server/CMakeLists.txt +++ b/core/unittest/server/CMakeLists.txt @@ -59,9 +59,6 @@ set(client_grpc_lib target_link_libraries(test_server knowhere stdc++ - snappy - bz2 - zstd ${client_grpc_lib} ${unittest_libs} ) From 581c662b61c99a94f4969c9b56c7fc1c0e3654f2 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 00:52:33 +0800 Subject: [PATCH 31/89] remove sq8h Former-commit-id: 31deed25ae121396fe8352efad36609452ac6c01 --- tests/milvus_python_test/test_add_vectors.py | 8 ++++---- tests/milvus_python_test/test_index.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index 51c12dcd87..e223eaa5f6 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -573,7 +573,7 @@ class TestAddBase: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): + for i in range(20): table_name = gen_unique_str('test_add_vector_multi_tables') table_list.append(table_name) param = {'table_name': table_name, @@ -581,9 +581,9 @@ class TestAddBase: 'index_file_size': index_file_size, 'metric_type': MetricType.L2} connect.create_table(param) - time.sleep(2) - for j in range(10): - for i in range(50): + time.sleep(5) + for j in range(5): + for i in range(20): status, ids = connect.add_vectors(table_name=table_list[i], records=vectors) assert status.OK() diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 9e9f0830ac..76774c0cf9 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -37,7 +37,10 @@ class TestIndexBase: params=gen_simple_index_params() ) def get_simple_index_params(self, request): - yield request.param + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param """ ****************************************************************** @@ -515,7 +518,10 @@ class TestIndexIP: params=gen_simple_index_params() ) def get_simple_index_params(self, request): - yield request.param + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param """ ****************************************************************** From c01107e2c7cb40d65ea49e2527ecafc5f99f0695 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 01:00:20 +0800 Subject: [PATCH 32/89] re-define case level Former-commit-id: 6c2ae8329c1ea9e22f0cba75d6603987a874b8c8 --- tests/milvus_python_test/test_add_vectors.py | 4 +++- tests/milvus_python_test/test_index.py | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index e223eaa5f6..e33328625a 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -407,6 +407,7 @@ class TestAddBase: def get_vector_id(self, request): yield request.param + @pytest.mark.level(2) def test_add_vectors_ids_invalid(self, connect, table, get_vector_id): ''' target: test add vectors in table, use customize ids, which are not int64 @@ -974,6 +975,7 @@ class TestAddIP: def get_vector_id(self, request): yield request.param + @pytest.mark.level(2) def test_add_vectors_ids_invalid(self, connect, ip_table, get_vector_id): ''' target: test add vectors in table, use customize ids, which are not int64 @@ -1223,7 +1225,7 @@ class TestAddTableVectorsInvalid(object): with pytest.raises(Exception) as e: status, result = connect.add_vectors(table, tmp_single_vector) - @pytest.mark.level(1) + @pytest.mark.level(2) def test_add_vectors_with_invalid_vectors(self, connect, table, gen_vector): tmp_vectors = copy.deepcopy(self.vectors) tmp_vectors[1][1] = gen_vector diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 76774c0cf9..e4c8848d63 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -528,7 +528,7 @@ class TestIndexIP: The following cases are used to test `create_index` function ****************************************************************** """ - + @pytest.mark.level(2) @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index(self, connect, ip_table, get_index_params): ''' @@ -563,6 +563,7 @@ class TestIndexIP: logging.getLogger().info(index_params) status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) + assert status.OK() logging.getLogger().info(connect.describe_index(ip_table)) query_vecs = [vectors[0], vectors[1], vectors[2]] top_k = 5 @@ -933,19 +934,19 @@ class TestIndexTableInvalid(object): def get_table_name(self, request): yield request.param - # @pytest.mark.level(1) + @pytest.mark.level(2) def test_create_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status = connect.create_index(table_name, random.choice(gen_index_params())) assert not status.OK() - # @pytest.mark.level(1) + @pytest.mark.level(2) def test_describe_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status, result = connect.describe_index(table_name) assert not status.OK() - # @pytest.mark.level(1) + @pytest.mark.level(2) def test_drop_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name status = connect.drop_index(table_name) From ec022c330d264ec6fcbd28ef28e613b4d3804b45 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 09:33:09 +0800 Subject: [PATCH 33/89] #89 update unittest Former-commit-id: f9b518f2961f3c7da30a76a53a49da8403208a0b --- .../index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5d63d63003..3d60574231 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -541,15 +541,15 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); #ifdef CUSTOMIZATION + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); #endif test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); #ifdef CUSTOMIZATION + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); #endif } From 4f5906b9bc6316523f2de708b2d3e6cb448aec2d Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 09:44:01 +0800 Subject: [PATCH 34/89] #89 update SQ8Hybrid-gpu log Former-commit-id: c2e70121ee65ed044c059ac3948b3412353e829e --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 3d60574231..0c7cb97807 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -475,7 +475,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, long *I = new faiss::Index::idx_t[NQ * K]; float *D = new float[NQ * K]; - printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} From d3d6077eb2daedb92b1a33a2e281e6445afd548f Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 10:24:02 +0800 Subject: [PATCH 35/89] #89 update unittest parameter Former-commit-id: 4692890b67109edefbd0cc0a0a5a628f6433306d --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 0c7cb97807..ed00e74a98 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -550,7 +550,7 @@ TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); - test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); + test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); #endif } From f7a7f9b7da16cade2fa96b4701d4f913ad325a78 Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 24 Oct 2019 10:43:14 +0800 Subject: [PATCH 36/89] ignore easylogging files Former-commit-id: 0b609d729f3914e8d57b2f5a7d0baa38ad92cc2f --- core/coverage.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/coverage.sh b/core/coverage.sh index 5792af5ec2..e47e720ce5 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -99,6 +99,7 @@ for test in `ls ${DIR_UNITTEST}`; do if [ $? -ne 0 ]; then echo ${args} echo ${DIR_UNITTEST}/${test} "run failed" + exit -1 fi done @@ -121,8 +122,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/external/easyloggingpp/easylogging++.h" \ - "*/src/external/easyloggingpp/easylogging++.cc" \ + "*/easylogging++.h" \ + "*/easylogging++.cc" \ "*/src/external/*" # gen html report From 460bbf2782394107c4f539edae7c1b46eb1c53df Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Thu, 24 Oct 2019 10:49:02 +0800 Subject: [PATCH 37/89] Update CHANGELOG.md Former-commit-id: 47da01a8c9193284097d22eb35c79b377f20253d --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebad4e2f32..bf91805d39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement -- \#64 - Improvement dump function in +- \#64 - Improvement dump function in scheduler - \#82 - Move easyloggingpp into "external" directory - \#92 - Speed up CMake build process From 65b46de1ac7629948cd27a2ba00c5872b48b2f8b Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 10:56:12 +0800 Subject: [PATCH 38/89] #89 code format Former-commit-id: ced158f26d9c18e38c7afb84ad17fdb6f9057259 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 282 +++++++++--------- 1 file changed, 136 insertions(+), 146 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index ed00e74a98..d1db0e9049 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -17,28 +17,28 @@ #include +#include #include #include #include -#include #include #include #include #include -#include #include #include +#include #include #include #include -#include #include #include #include #include +#include /***************************************************** * To run this test, please download the HDF5 from @@ -46,29 +46,27 @@ * and install it to /usr/local/hdf5 . *****************************************************/ -double elapsed() { +double +elapsed() { struct timeval tv; gettimeofday(&tv, nullptr); return tv.tv_sec + tv.tv_usec * 1e-6; } -void* hdf5_read(const char *file_name, - const char *dataset_name, - H5T_class_t dataset_class, - size_t &d_out, - size_t &n_out) { - hid_t file, dataset, datatype, dataspace, memspace; - H5T_class_t t_class; /* data type class */ - H5T_order_t order; /* data order */ - size_t size; /* size of the data element stored in file */ - hsize_t dimsm[3]; /* memory space dimensions */ - hsize_t dims_out[2]; /* dataset dimensions */ - hsize_t count[2]; /* size of the hyperslab in the file */ - hsize_t offset[2]; /* hyperslab offset in the file */ - hsize_t count_out[3]; /* size of the hyperslab in memory */ - hsize_t offset_out[3]; /* hyperslab offset in memory */ - int rank; - void* data_out; /* output buffer */ +void* +hdf5_read(const char* file_name, const char* dataset_name, H5T_class_t dataset_class, size_t& d_out, size_t& n_out) { + hid_t file, dataset, datatype, dataspace, memspace; + H5T_class_t t_class; /* data type class */ + H5T_order_t order; /* data order */ + size_t size; /* size of the data element stored in file */ + hsize_t dimsm[3]; /* memory space dimensions */ + hsize_t dims_out[2]; /* dataset dimensions */ + hsize_t count[2]; /* size of the hyperslab in the file */ + hsize_t offset[2]; /* hyperslab offset in the file */ + hsize_t count_out[3]; /* size of the hyperslab in memory */ + hsize_t offset_out[3]; /* hyperslab offset in memory */ + int rank; + void* data_out; /* output buffer */ /* Open the file and the dataset. */ file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); @@ -78,7 +76,7 @@ void* hdf5_read(const char *file_name, * Get datatype and dataspace handles and then query * dataset class, order, size, rank and dimensions. */ - datatype = H5Dget_type(dataset); /* datatype handle */ + datatype = H5Dget_type(dataset); /* datatype handle */ t_class = H5Tget_class(datatype); assert(t_class == dataset_class || !"Illegal dataset class type"); @@ -95,11 +93,11 @@ void* hdf5_read(const char *file_name, break; } - size = H5Tget_size(datatype); + size = H5Tget_size(datatype); printf("Data size is %d \n", (int)size); - dataspace = H5Dget_space(dataset); /* dataspace handle */ - rank = H5Sget_simple_extent_ndims(dataspace); + dataspace = H5Dget_space(dataset); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); H5Sget_simple_extent_dims(dataspace, dims_out, NULL); n_out = dims_out[0]; d_out = dims_out[1]; @@ -107,8 +105,8 @@ void* hdf5_read(const char *file_name, /* Define hyperslab in the dataset. */ offset[0] = offset[1] = 0; - count[0] = dims_out[0]; - count[1] = dims_out[1]; + count[0] = dims_out[0]; + count[1] = dims_out[1]; H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); /* Define the memory dataspace. */ @@ -119,9 +117,9 @@ void* hdf5_read(const char *file_name, /* Define memory hyperslab. */ offset_out[0] = offset_out[1] = offset_out[2] = 0; - count_out[0] = dims_out[0]; - count_out[1] = dims_out[1]; - count_out[2] = 1; + count_out[0] = dims_out[0]; + count_out[1] = dims_out[1]; + count_out[2] = 1; H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); /* Read data from hyperslab in the file into the hyperslab in memory and display. */ @@ -149,30 +147,31 @@ void* hdf5_read(const char *file_name, return data_out; } -std::string get_index_file_name(const std::string& ann_test_name, - const std::string& index_key, - int32_t data_loops) { +std::string +get_index_file_name(const std::string& ann_test_name, const std::string& index_key, int32_t data_loops) { size_t pos = index_key.find_first_of(',', 0); std::string file_name = ann_test_name; - file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos+1); + file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos + 1); file_name = file_name + "_" + std::to_string(data_loops) + ".index"; return file_name; } -bool parse_ann_test_name(const std::string& ann_test_name, - size_t &dim, - faiss::MetricType &metric_type) { +bool +parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::MetricType& metric_type) { size_t pos1, pos2; - if (ann_test_name.empty()) return false; + if (ann_test_name.empty()) + return false; pos1 = ann_test_name.find_first_of('-', 0); - if (pos1 == std::string::npos) return false; + if (pos1 == std::string::npos) + return false; pos2 = ann_test_name.find_first_of('-', pos1 + 1); - if (pos2 == std::string::npos) return false; + if (pos2 == std::string::npos) + return false; - dim = std::stoi(ann_test_name.substr(pos1+1, pos2-pos1-1)); - std::string metric_str = ann_test_name.substr(pos2+1); + dim = std::stoi(ann_test_name.substr(pos1 + 1, pos2 - pos1 - 1)); + std::string metric_str = ann_test_name.substr(pos2 + 1); if (metric_str == "angular") { metric_type = faiss::METRIC_INNER_PRODUCT; } else if (metric_str == "euclidean") { @@ -184,10 +183,9 @@ bool parse_ann_test_name(const std::string& ann_test_name, return true; } -void test_ann_hdf5(const std::string& ann_test_name, - const std::string& index_key, - int32_t index_add_loops, - const std::vector& nprobes) { +void +test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops, + const std::vector& nprobes) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -200,77 +198,74 @@ void test_ann_hdf5(const std::string& ann_test_name, return; } - faiss::Index * index; + faiss::Index* index; size_t d; std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); try { index = faiss::read_index(index_file_name.c_str()); d = dim; - } - catch (...) { + } catch (...) { printf("Cannot read index file: %s\n", index_file_name.c_str()); - printf ("[%.3f s] Loading train set\n", elapsed() - t0); + printf("[%.3f s] Loading train set\n", elapsed() - t0); size_t nb; - float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); - printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", - elapsed() - t0, index_key.c_str(), d); + printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); index = faiss::index_factory(d, index_key.c_str(), metric_type); - printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); index->train(nb, xb); - printf ("[%.3f s] Loading database\n", elapsed() - t0); + printf("[%.3f s] Loading database\n", elapsed() - t0); // add index multiple times to get ~1G data set for (int i = 0; i < index_add_loops; i++) { - printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); index->add(nb, xb); } faiss::write_index(index, index_file_name.c_str()); - delete [] xb; + delete[] xb; } size_t nq; - float *xq; + float* xq; { - printf ("[%.3f s] Loading queries\n", elapsed() - t0); + printf("[%.3f s] Loading queries\n", elapsed() - t0); size_t d2; xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); assert(d == d2 || !"query does not have same dimension as train set"); } - size_t k; // nb of results per query in the GT - faiss::Index::idx_t *gt; // nq * k matrix of ground-truth nearest-neighbors + size_t k; // nb of results per query in the GT + faiss::Index::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors { - printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); // load ground-truth and convert int to long size_t nq2; - int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; - for(int i = 0; i < k * nq; i++) { + for (int i = 0; i < k * nq; i++) { gt[i] = gt_int[i]; } - delete [] gt_int; + delete[] gt_int; } for (auto nprobe : nprobes) { - faiss::ParameterSpace params; - printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); std::string nprobe_str = "nprobe=" + std::to_string(nprobe); params.set_index_parameters(index, nprobe_str.c_str()); @@ -278,13 +273,13 @@ void test_ann_hdf5(const std::string& ann_test_name, // output buffers #if 1 const size_t NQ = 1000, K = 1000; - faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; - float *D = new float[NQ * K]; + faiss::Index::idx_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; - printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("============================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -301,7 +296,7 @@ void test_ann_hdf5(const std::string& ann_test_name, // consider: each result replicates DATA_LOOPS times for (int j_c = 0; j_c < k; j_c++) { int r_c = I[i * t_k + j_c]; - for (int j_g = 0; j_g < k/index_add_loops; j_g++) { + for (int j_g = 0; j_g < k / index_add_loops; j_g++) { if (gt[i * k + j_g] == r_c) { hit++; continue; @@ -309,33 +304,34 @@ void test_ann_hdf5(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), - faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, - (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); } } - printf ("============================================================================================\n"); + printf("============================================================================================\n"); #else - printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); - faiss::Index::idx_t *I = new faiss::Index::idx_t[nq * k]; - float *D = new float[nq * k]; + faiss::Index::idx_t* I = new faiss::Index::idx_t[nq * k]; + float* D = new float[nq * k]; index->search(nq, xq, k, D, I); - printf ("[%.3f s] Compute recalls\n", elapsed() - t0); + printf("[%.3f s] Compute recalls\n", elapsed() - t0); // evaluate result by hand. int n_1 = 0, n_10 = 0, n_100 = 0; - for(int i = 0; i < nq; i++) { + for (int i = 0; i < nq; i++) { int gt_nn = gt[i * k]; - for(int j = 0; j < k; j++) { + for (int j = 0; j < k; j++) { if (I[i * k + j] == gt_nn) { - if(j < 1) n_1++; - if(j < 10) n_10++; - if(j < 100) n_100++; + if (j < 1) + n_1++; + if (j < 10) + n_10++; + if (j < 100) + n_100++; } } } @@ -344,21 +340,20 @@ void test_ann_hdf5(const std::string& ann_test_name, printf("R@100 = %.4f\n", n_100 / float(nq)); #endif - printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + printf("[%.3f s] Search test done\n\n", elapsed() - t0); - delete [] I; - delete [] D; + delete[] I; + delete[] D; } - delete [] xq; - delete [] gt; + delete[] xq; + delete[] gt; delete index; } #ifdef CUSTOMIZATION -void test_ivfsq8h_gpu(const std::string& ann_test_name, - int32_t index_add_loops, - const std::vector& nprobes){ +void +test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -380,44 +375,43 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, size_t d; std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); - try{ + try { cpu_index = faiss::read_index(index_file_name.c_str()); d = dim; - } - catch (...){ + } catch (...) { printf("Cannot read index file: %s\n", index_file_name.c_str()); - printf ("[%.3f s] Loading train set\n", elapsed() - t0); + printf("[%.3f s] Loading train set\n", elapsed() - t0); - size_t nb; - float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + size_t nb; + float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); - printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); - faiss::Index *ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); + faiss::Index* ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); - printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); device_index->train(nb, xb); - printf ("[%.3f s] Loading database\n", elapsed() - t0); + printf("[%.3f s] Loading database\n", elapsed() - t0); for (int i = 0; i < index_add_loops; i++) { - printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); device_index->add(nb, xb); } cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); faiss::write_index(cpu_index, index_file_name.c_str()); - delete []xb; + delete[] xb; } - faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); - if(cpu_ivf_index != nullptr) { + faiss::IndexIVF* cpu_ivf_index = dynamic_cast(cpu_index); + if (cpu_ivf_index != nullptr) { cpu_ivf_index->to_readonly(); } @@ -433,9 +427,9 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, delete index; size_t nq; - float *xq; + float* xq; { - printf ("[%.3f s] Loading queries\n", elapsed() - t0); + printf("[%.3f s] Loading queries\n", elapsed() - t0); size_t d2; xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); @@ -443,42 +437,41 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } size_t k; - faiss::Index::idx_t *gt; + faiss::Index::idx_t* gt; { - printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); size_t nq2; - int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; for (unsigned long i = 0; i < k * nq; ++i) { gt[i] = gt_int[i]; } - delete []gt_int; + delete[] gt_int; } - for (auto nprobe : nprobes){ - printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", - elapsed() - t0, nprobe); + for (auto nprobe : nprobes) { + printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); - auto ivf_index = dynamic_cast(cpu_index); + auto ivf_index = dynamic_cast(cpu_index); ivf_index->nprobe = nprobe; auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); - if(is_gpu_flat_index == nullptr) { + if (is_gpu_flat_index == nullptr) { delete ivf_index->quantizer; ivf_index->quantizer = index_composition.quantizer; } const size_t NQ = 1000, K = 1000; - long *I = new faiss::Index::idx_t[NQ * K]; - float *D = new float[NQ * K]; + long* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; - printf ("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + printf("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("============================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -495,7 +488,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, // consider: each result replicates DATA_LOOPS times for (unsigned long j_c = 0; j_c < k; j_c++) { int r_c = I[i * t_k + j_c]; - for (unsigned long j_g = 0; j_g < k/index_add_loops; j_g++) { + for (unsigned long j_g = 0; j_g < k / index_add_loops; j_g++) { if (gt[i * k + j_g] == r_c) { hit++; continue; @@ -503,23 +496,21 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), - faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, - (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); } } - printf ("============================================================================================\n"); + printf("============================================================================================\n"); - printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + printf("[%.3f s] Search test done\n\n", elapsed() - t0); - delete [] I; - delete [] D; + delete[] I; + delete[] D; } - delete [] xq; - delete [] gt; + delete[] xq; + delete[] gt; delete cpu_index; } #endif @@ -536,21 +527,20 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) * NYTimes 256 290,000 10,000 100 Angular HDF5 (301MB) * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) -*************************************************************************************/ + *************************************************************************************/ TEST(FAISSTEST, BENCHMARK) { - test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); #endif - test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); #endif } - From 82a271943c92995213e1a3c71f32cacdd028c7ec Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 16:05:14 +0800 Subject: [PATCH 39/89] Update tests timeout Former-commit-id: 15c28be882db3cc2fda3bc1520b277c144c44558 --- ci/jenkins/jenkinsfile/singleDevTest.groovy | 2 +- tests/milvus_python_test/test_table.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index ae57ffd42b..44f6361835 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -1,4 +1,4 @@ -timeout(time: 30, unit: 'MINUTES') { +timeout(time: 60, unit: 'MINUTES') { dir ("tests/milvus_python_test") { sh 'python3 -m pip install -r requirements.txt' sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" diff --git a/tests/milvus_python_test/test_table.py b/tests/milvus_python_test/test_table.py index eb538281ed..934f3c2f9f 100644 --- a/tests/milvus_python_test/test_table.py +++ b/tests/milvus_python_test/test_table.py @@ -656,6 +656,7 @@ class TestTableInvalid(object): def get_table_name(self, request): yield request.param + @pytest.mark.level(2) def test_create_table_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name param = {'table_name': table_name, @@ -691,6 +692,7 @@ class TestCreateTableDimInvalid(object): def get_dim(self, request): yield request.param + @pytest.mark.level(2) @pytest.mark.timeout(5) def test_create_table_with_invalid_dimension(self, connect, get_dim): dimension = get_dim From 80682fc766d9ea5cb25c054a3f745c1b2500e535 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Thu, 24 Oct 2019 18:04:34 +0800 Subject: [PATCH 40/89] fix test case Former-commit-id: 99aef46da1dda4e750f445f1de03d3f3701ebeec --- tests/milvus_python_test/test_index.py | 114 ++++++++++++++++--------- 1 file changed, 72 insertions(+), 42 deletions(-) diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index e4c8848d63..47b0db64e3 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -36,7 +36,7 @@ class TestIndexBase: scope="function", params=gen_simple_index_params() ) - def get_simple_index_params(self, request): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -68,8 +68,10 @@ class TestIndexBase: method: create table and add vectors in it, check if added successfully expected: raise exception ''' + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} with pytest.raises(Exception) as e: - status = dis_connect.create_index(table, random.choice(gen_index_params())) + status = dis_connect.create_index(table, index_param) @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_search_with_query_vectors(self, connect, table, get_index_params): @@ -182,12 +184,14 @@ class TestIndexBase: def test_create_index_table_not_existed(self, connect): ''' target: test create index interface when table name not existed - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, create index failed ''' table_name = gen_unique_str(self.__class__.__name__) - status = connect.create_index(table_name, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table_name, index_param) assert not status.OK() def test_create_index_table_None(self, connect): @@ -197,8 +201,10 @@ class TestIndexBase: expected: return code not equals to 0, create index failed ''' table_name = None + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} with pytest.raises(Exception) as e: - status = connect.create_index(table_name, random.choice(gen_index_params())) + status = connect.create_index(table_name, index_param) def test_create_index_no_vectors(self, connect, table): ''' @@ -206,7 +212,9 @@ class TestIndexBase: method: create table and add no vectors in it, and then create index expected: return code equals to 0 ''' - status = connect.create_index(table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table, index_param) assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) @@ -216,7 +224,9 @@ class TestIndexBase: method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - status = connect.create_index(table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table, index_param) status, ids = connect.add_vectors(table, vectors) assert status.OK() @@ -227,11 +237,12 @@ class TestIndexBase: method: create index after index have been built expected: return code success, and search ok ''' + nlist = 16384 status, ids = connect.add_vectors(table, vectors) - index_params = random.choice(gen_index_params()) + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} # index_params = get_index_params - status = connect.create_index(table, index_params) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) + status = connect.create_index(table, index_param) assert status.OK() query_vec = [vectors[0]] top_k = 1 @@ -246,16 +257,19 @@ class TestIndexBase: method: create another index with different index_params after index have been built expected: return code 0, and describe index result equals with the second index params ''' + nlist = 16384 status, ids = connect.add_vectors(table, vectors) - index_params = random.sample(gen_index_params(), 2) + index_type_1 = IndexType.IVF_SQ8 + index_type_2 = IndexType.IVFLAT + index_params = [{"index_type": index_type_1, "nlist": nlist}, {"index_type": index_type_2, "nlist": nlist}] logging.getLogger().info(index_params) - status = connect.create_index(table, index_params[0]) - status = connect.create_index(table, index_params[1]) - assert status.OK() + for index_param in index_params: + status = connect.create_index(table, index_param) + assert status.OK() status, result = connect.describe_index(table) - assert result._nlist == index_params[1]["nlist"] + assert result._nlist == nlist assert result._table_name == table - assert result._index_type == index_params[1]["index_type"] + assert result._index_type == index_type_2 """ ****************************************************************** @@ -331,7 +345,7 @@ class TestIndexBase: def test_describe_index_table_not_existed(self, connect): ''' target: test describe index interface when table name not existed - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, describe index failed ''' @@ -352,7 +366,7 @@ class TestIndexBase: def test_describe_index_not_create(self, connect, table): ''' target: test describe index interface when index not created - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, describe index failed ''' @@ -425,7 +439,7 @@ class TestIndexBase: def test_drop_index_table_not_existed(self, connect): ''' target: test drop index interface when table name not existed - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index, and then drop it expected: return code not equals to 0, drop index failed ''' @@ -449,8 +463,8 @@ class TestIndexBase: method: create table and add vectors in it, create index expected: return code not equals to 0, drop index failed ''' - index_params = random.choice(gen_index_params()) - logging.getLogger().info(index_params) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} status, ids = connect.add_vectors(table, vectors) status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -486,7 +500,8 @@ class TestIndexBase: method: create index, drop index, four times, each tme use different index_params to create index expected: return code 0 ''' - index_params = random.sample(gen_index_params(), 2) + nlist = 16384 + index_params = [{"index_type": IndexType.IVFLAT, "nlist": nlist}, {"index_type": IndexType.IVF_SQ8, "nlist": nlist}] status, ids = connect.add_vectors(table, vectors) for i in range(2): status = connect.create_index(table, index_params[i]) @@ -517,7 +532,7 @@ class TestIndexIP: scope="function", params=gen_simple_index_params() ) - def get_simple_index_params(self, request): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -549,8 +564,10 @@ class TestIndexIP: method: create table and add vectors in it, check if added successfully expected: raise exception ''' + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} with pytest.raises(Exception) as e: - status = dis_connect.create_index(ip_table, random.choice(gen_index_params())) + status = dis_connect.create_index(ip_table, index_param) @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_search_with_query_vectors(self, connect, ip_table, get_index_params): @@ -665,7 +682,9 @@ class TestIndexIP: method: create table and add no vectors in it, and then create index expected: return code equals to 0 ''' - status = connect.create_index(ip_table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(ip_table, index_param) assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) @@ -675,7 +694,9 @@ class TestIndexIP: method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - status = connect.create_index(ip_table, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(ip_table, index_param) status, ids = connect.add_vectors(ip_table, vectors) assert status.OK() @@ -686,11 +707,11 @@ class TestIndexIP: method: create index after index have been built expected: return code success, and search ok ''' + nlist = 16384 status, ids = connect.add_vectors(ip_table, vectors) - index_params = random.choice(gen_index_params()) - # index_params = get_index_params - status = connect.create_index(ip_table, index_params) - status = connect.create_index(ip_table, index_params) + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(ip_table, index_param) + status = connect.create_index(ip_table, index_param) assert status.OK() query_vec = [vectors[0]] top_k = 1 @@ -705,16 +726,19 @@ class TestIndexIP: method: create another index with different index_params after index have been built expected: return code 0, and describe index result equals with the second index params ''' + nlist = 16384 status, ids = connect.add_vectors(ip_table, vectors) - index_params = random.sample(gen_index_params(), 2) + index_type_1 = IndexType.IVF_SQ8 + index_type_2 = IndexType.IVFLAT + index_params = [{"index_type": index_type_1, "nlist": nlist}, {"index_type": index_type_2, "nlist": nlist}] logging.getLogger().info(index_params) - status = connect.create_index(ip_table, index_params[0]) - status = connect.create_index(ip_table, index_params[1]) - assert status.OK() + for index_param in index_params: + status = connect.create_index(ip_table, index_param) + assert status.OK() status, result = connect.describe_index(ip_table) - assert result._nlist == index_params[1]["nlist"] + assert result._nlist == nlist assert result._table_name == ip_table - assert result._index_type == index_params[1]["index_type"] + assert result._index_type == index_type_2 """ ****************************************************************** @@ -790,7 +814,7 @@ class TestIndexIP: def test_describe_index_not_create(self, connect, ip_table): ''' target: test describe index interface when index not created - method: create table and add vectors in it, create index with an random table_name + method: create table and add vectors in it, create index , make sure the table name not in index expected: return code not equals to 0, describe index failed ''' @@ -857,8 +881,10 @@ class TestIndexIP: method: drop index, and check if drop successfully expected: raise exception ''' + nlist = 16384 + index_param = {"index_type": IndexType.IVFLAT, "nlist": nlist} with pytest.raises(Exception) as e: - status = dis_connect.drop_index(ip_table, random.choice(gen_index_params())) + status = dis_connect.drop_index(ip_table, index_param) def test_drop_index_table_not_create(self, connect, ip_table): ''' @@ -866,8 +892,9 @@ class TestIndexIP: method: create table and add vectors in it, create index expected: return code not equals to 0, drop index failed ''' - index_params = random.choice(gen_index_params()) - logging.getLogger().info(index_params) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + logging.getLogger().info(index_param) status, ids = connect.add_vectors(ip_table, vectors) status, result = connect.describe_index(ip_table) logging.getLogger().info(result) @@ -903,7 +930,8 @@ class TestIndexIP: method: create index, drop index, four times, each tme use different index_params to create index expected: return code 0 ''' - index_params = random.sample(gen_index_params(), 2) + nlist = 16384 + index_params = [{"index_type": IndexType.IVFLAT, "nlist": nlist}, {"index_type": IndexType.IVF_SQ8, "nlist": nlist}] status, ids = connect.add_vectors(ip_table, vectors) for i in range(2): status = connect.create_index(ip_table, index_params[i]) @@ -937,7 +965,9 @@ class TestIndexTableInvalid(object): @pytest.mark.level(2) def test_create_index_with_invalid_tablename(self, connect, get_table_name): table_name = get_table_name - status = connect.create_index(table_name, random.choice(gen_index_params())) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + status = connect.create_index(table_name, index_param) assert not status.OK() @pytest.mark.level(2) From 8f9b4715b05bcfe42ee0bae0c7fcd6595b66e52e Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Thu, 24 Oct 2019 22:32:48 +0800 Subject: [PATCH 41/89] Add ELK for Jenkins CI Former-commit-id: 2ba8fbbd356fbb05eaf6305d10cdade6e78843ce --- ci/jenkins/jenkinsfile/deploySingle2Dev.groovy | 2 +- ci/jenkins/jenkinsfile/singleDevTest.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy index 2ab13486a6..718b74778e 100644 --- a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy +++ b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy @@ -4,7 +4,7 @@ try { dir ('milvus-helm') { checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml --namespace milvus ." + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } } catch (exc) { diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index 44f6361835..adfadc9271 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -13,7 +13,7 @@ timeout(time: 60, unit: 'MINUTES') { } dir ("milvus-helm") { dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml --namespace milvus ." + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } dir ("tests/milvus_python_test") { From 408b81f613c361bd922718f0a9d32960f32d228e Mon Sep 17 00:00:00 2001 From: zhenwu Date: Fri, 25 Oct 2019 16:14:51 +0800 Subject: [PATCH 42/89] fix test case for open-version Former-commit-id: 16427e4b2bfbe63c2d1bea4aaaa68a41fd8e1622 --- tests/milvus_python_test/requirements.txt | 4 +- tests/milvus_python_test/test_add_vectors.py | 133 ++++++++++-------- tests/milvus_python_test/test_index.py | 29 ++-- tests/milvus_python_test/test_mix.py | 17 ++- .../milvus_python_test/test_search_vectors.py | 32 +++-- tests/milvus_python_test/test_table.py | 39 ++--- tests/milvus_python_test/test_table_count.py | 38 +++-- tests/milvus_python_test/utils.py | 7 +- 8 files changed, 160 insertions(+), 139 deletions(-) diff --git a/tests/milvus_python_test/requirements.txt b/tests/milvus_python_test/requirements.txt index 4bdecd6033..c8fc02c096 100644 --- a/tests/milvus_python_test/requirements.txt +++ b/tests/milvus_python_test/requirements.txt @@ -17,9 +17,9 @@ allure-pytest==2.7.0 pytest-print==0.1.2 pytest-level==0.1.1 six==1.12.0 -thrift==0.11.0 typed-ast==1.3.5 wcwidth==0.1.7 wrapt==1.11.1 zipp==0.5.1 -pymilvus-test>=0.2.0 +scikit-learn>=0.19.1 +pymilvus-test>=0.2.0 \ No newline at end of file diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index e33328625a..f9f7f7d4ca 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -16,9 +16,6 @@ ADD_TIMEOUT = 60 nprobe = 1 epsilon = 0.0001 -index_params = random.choice(gen_index_params()) -logging.getLogger().info(index_params) - class TestAddBase: """ @@ -26,6 +23,15 @@ class TestAddBase: The following cases are used to test `add_vectors / index / search / delete` mixed function ****************************************************************** """ + @pytest.fixture( + scope="function", + params=gen_simple_index_params() + ) + def get_simple_index_params(self, request, args): + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param def test_add_vector_create_table(self, connect, table): ''' @@ -71,7 +77,7 @@ class TestAddBase: method: delete table_2 and add vector to table_1 expected: status ok ''' - param = {'table_name': 'test_delete_table_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -79,7 +85,6 @@ class TestAddBase: status = connect.delete_table(table) vector = gen_single_vector(dim) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -101,14 +106,13 @@ class TestAddBase: method: add vector and delete table expected: status ok ''' - param = {'table_name': 'test_add_vector_delete_another_table', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) - status = connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -131,7 +135,7 @@ class TestAddBase: method: add vector , sleep, and delete table expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_delete_another_table', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -143,86 +147,91 @@ class TestAddBase: assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector(self, connect, table): + def test_create_index_add_vector(self, connect, table, get_simple_index_params): ''' target: test add vector after build index method: build index and add vector expected: status ok ''' - status = connect.create_index(table, index_params) + index_param = get_simple_index_params + status = connect.create_index(table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector_another(self, connect, table): + def test_create_index_add_vector_another(self, connect, table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_create_index_add_vector_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index(self, connect, table): + def test_add_vector_create_index(self, connect, table, get_simple_index_params): ''' target: test build index add after vector method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index_another(self, connect, table): + def test_add_vector_create_index_another(self, connect, table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index(self, connect, table): + def test_add_vector_sleep_create_index(self, connect, table, get_simple_index_params): ''' target: test build index add after vector for a while method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) time.sleep(1) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index_another(self, connect, table): + def test_add_vector_sleep_create_index_another(self, connect, table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 for a while method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -230,8 +239,7 @@ class TestAddBase: vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) time.sleep(1) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -253,7 +261,7 @@ class TestAddBase: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_search_vector_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -261,7 +269,6 @@ class TestAddBase: vector = gen_single_vector(dim) status, result = connect.search_vectors(table, 1, nprobe, vector) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -283,7 +290,7 @@ class TestAddBase: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -291,7 +298,6 @@ class TestAddBase: vector = gen_single_vector(dim) status, ids = connect.add_vectors(table, vector) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -314,7 +320,7 @@ class TestAddBase: method: search table , sleep, and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -323,7 +329,6 @@ class TestAddBase: status, ids = connect.add_vectors(table, vector) time.sleep(1) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() """ @@ -594,6 +599,15 @@ class TestAddIP: The following cases are used to test `add_vectors / index / search / delete` mixed function ****************************************************************** """ + @pytest.fixture( + scope="function", + params=gen_simple_index_params() + ) + def get_simple_index_params(self, request, args): + if "internal" not in args: + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in open source") + return request.param def test_add_vector_create_table(self, connect, ip_table): ''' @@ -639,7 +653,7 @@ class TestAddIP: method: delete table_2 and add vector to table_1 expected: status ok ''' - param = {'table_name': 'test_delete_table_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -647,7 +661,6 @@ class TestAddIP: status = connect.delete_table(ip_table) vector = gen_single_vector(dim) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -699,7 +712,7 @@ class TestAddIP: method: add vector , sleep, and delete table expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_delete_another_table', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -711,86 +724,90 @@ class TestAddIP: assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector(self, connect, ip_table): + def test_create_index_add_vector(self, connect, ip_table, get_simple_index_params): ''' target: test add vector after build index method: build index and add vector expected: status ok ''' - status = connect.create_index(ip_table, index_params) + index_param = get_simple_index_params + status = connect.create_index(ip_table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector_another(self, connect, ip_table): + def test_create_index_add_vector_another(self, connect, ip_table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_create_index_add_vector_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index(self, connect, ip_table): + def test_add_vector_create_index(self, connect, ip_table, get_simple_index_params): ''' target: test build index add after vector method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index_another(self, connect, ip_table): + def test_add_vector_create_index_another(self, connect, ip_table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} status = connect.create_table(param) vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index(self, connect, ip_table): + def test_add_vector_sleep_create_index(self, connect, ip_table, get_simple_index_params): ''' target: test build index add after vector for a while method: add vector and build index expected: status ok ''' + index_param = get_simple_index_params vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) time.sleep(1) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index_another(self, connect, ip_table): + def test_add_vector_sleep_create_index_another(self, connect, ip_table, get_simple_index_params): ''' target: test add vector to table_2 after build index for table_1 for a while method: build index and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_create_index_another', + index_param = get_simple_index_params + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -798,8 +815,7 @@ class TestAddIP: vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) time.sleep(1) - status = connect.create_index(param['table_name'], index_params) - connect.delete_table(param['table_name']) + status = connect.create_index(param['table_name'], index_param) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -821,7 +837,7 @@ class TestAddIP: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_search_vector_add_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -829,7 +845,6 @@ class TestAddIP: vector = gen_single_vector(dim) status, result = connect.search_vectors(ip_table, 1, nprobe, vector) status, ids = connect.add_vectors(param['table_name'], vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -851,7 +866,7 @@ class TestAddIP: method: search table and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -859,7 +874,6 @@ class TestAddIP: vector = gen_single_vector(dim) status, ids = connect.add_vectors(ip_table, vector) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() @pytest.mark.timeout(ADD_TIMEOUT) @@ -882,7 +896,7 @@ class TestAddIP: method: search table , sleep, and add vector expected: status ok ''' - param = {'table_name': 'test_add_vector_sleep_search_vector_another', + param = {'table_name': gen_unique_str(), 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2} @@ -891,7 +905,6 @@ class TestAddIP: status, ids = connect.add_vectors(ip_table, vector) time.sleep(1) status, result = connect.search_vectors(param['table_name'], 1, nprobe, vector) - connect.delete_table(param['table_name']) assert status.OK() """ @@ -1130,7 +1143,7 @@ class TestAddIP: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): + for i in range(20): table_name = gen_unique_str('test_add_vector_multi_tables') table_list.append(table_name) param = {'table_name': table_name, @@ -1140,7 +1153,7 @@ class TestAddIP: connect.create_table(param) time.sleep(2) for j in range(10): - for i in range(50): + for i in range(20): status, ids = connect.add_vectors(table_name=table_list[i], records=vectors) assert status.OK() diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 47b0db64e3..65716d45aa 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -8,6 +8,7 @@ import pdb import threading from multiprocessing import Pool, Process import numpy +import sklearn.preprocessing from milvus import Milvus, IndexType, MetricType from utils import * @@ -15,7 +16,7 @@ nb = 10000 dim = 128 index_file_size = 10 vectors = gen_vectors(nb, dim) -vectors /= numpy.linalg.norm(vectors) +vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() BUILD_TIMEOUT = 60 nprobe = 1 @@ -218,29 +219,26 @@ class TestIndexBase: assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_no_vectors_then_add_vectors(self, connect, table): + def test_create_index_no_vectors_then_add_vectors(self, connect, table, get_simple_index_params): ''' target: test create index interface when there is no vectors in table, and does not affect the subsequent process method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - nlist = 16384 - index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + index_param = get_simple_index_params status = connect.create_index(table, index_param) status, ids = connect.add_vectors(table, vectors) assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_same_index_repeatedly(self, connect, table): + def test_create_same_index_repeatedly(self, connect, table, get_simple_index_params): ''' target: check if index can be created repeatedly, with the same create_index params method: create index after index have been built expected: return code success, and search ok ''' - nlist = 16384 status, ids = connect.add_vectors(table, vectors) - index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} - # index_params = get_index_params + index_param = get_simple_index_params status = connect.create_index(table, index_param) status = connect.create_index(table, index_param) assert status.OK() @@ -390,9 +388,9 @@ class TestIndexBase: method: create table and add vectors in it, create index, call drop index expected: return code 0, and default index param ''' - index_params = get_index_params + index_param = get_index_params status, ids = connect.add_vectors(table, vectors) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -404,15 +402,15 @@ class TestIndexBase: assert result._table_name == table assert result._index_type == IndexType.FLAT - def test_drop_index_repeatly(self, connect, table, get_simple_index_params): + def test_drop_index_repeatly(self, connect, table, get_index_params): ''' target: test drop index repeatly method: create index, call drop index, and drop again expected: return code 0 ''' - index_params = get_simple_index_params + index_param = get_index_params status, ids = connect.add_vectors(table, vectors) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) assert status.OK() status, result = connect.describe_index(table) logging.getLogger().info(result) @@ -688,14 +686,13 @@ class TestIndexIP: assert status.OK() @pytest.mark.timeout(BUILD_TIMEOUT) - def test_create_index_no_vectors_then_add_vectors(self, connect, ip_table): + def test_create_index_no_vectors_then_add_vectors(self, connect, ip_table, get_simple_index_params): ''' target: test create index interface when there is no vectors in table, and does not affect the subsequent process method: create table and add no vectors in it, and then create index, add vectors in it expected: return code equals to 0 ''' - nlist = 16384 - index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + index_param = get_simple_index_params status = connect.create_index(ip_table, index_param) status, ids = connect.add_vectors(ip_table, vectors) assert status.OK() diff --git a/tests/milvus_python_test/test_mix.py b/tests/milvus_python_test/test_mix.py index 4578e330b3..f099db5c31 100644 --- a/tests/milvus_python_test/test_mix.py +++ b/tests/milvus_python_test/test_mix.py @@ -6,7 +6,7 @@ import datetime import logging from time import sleep from multiprocessing import Process -import numpy +import sklearn.preprocessing from milvus import Milvus, IndexType, MetricType from utils import * @@ -15,7 +15,7 @@ index_file_size = 10 table_id = "test_mix" add_interval_time = 2 vectors = gen_vectors(100000, dim) -vectors /= numpy.linalg.norm(vectors) +vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() top_k = 1 nprobe = 1 @@ -26,9 +26,9 @@ index_params = {'index_type': IndexType.IVFLAT, 'nlist': 16384} class TestMixBase: # TODO: enable - def _test_search_during_createIndex(self, args): + def test_search_during_createIndex(self, args): loops = 100000 - table = "test_search_during_createIndex" + table = gen_unique_str() query_vecs = [vectors[0], vectors[1]] uri = "tcp://%s:%s" % (args["ip"], args["port"]) id_0 = 0; id_1 = 0 @@ -54,6 +54,7 @@ class TestMixBase: status, ids = milvus_instance.add_vectors(table, vectors) logging.getLogger().info(status) def search(milvus_instance): + logging.getLogger().info("In search vectors") for i in range(loops): status, result = milvus_instance.search_vectors(table, top_k, nprobe, query_vecs) logging.getLogger().info(status) @@ -69,6 +70,7 @@ class TestMixBase: p_create.start() p_create.join() + @pytest.mark.level(2) def test_mix_multi_tables(self, connect): ''' target: test functions with multiple tables of different metric_types and index_types @@ -77,6 +79,7 @@ class TestMixBase: expected: status ok ''' nq = 10000 + nlist= 16384 vectors = gen_vectors(nq, dim) table_list = [] idx = [] @@ -112,17 +115,17 @@ class TestMixBase: #create index for i in range(10): - index_params = {'index_type': IndexType.FLAT, 'nlist': 16384} + index_params = {'index_type': IndexType.FLAT, 'nlist': nlist} status = connect.create_index(table_list[i], index_params) assert status.OK() status = connect.create_index(table_list[30 + i], index_params) assert status.OK() - index_params = {'index_type': IndexType.IVFLAT, 'nlist': 16384} + index_params = {'index_type': IndexType.IVFLAT, 'nlist': nlist} status = connect.create_index(table_list[10 + i], index_params) assert status.OK() status = connect.create_index(table_list[40 + i], index_params) assert status.OK() - index_params = {'index_type': IndexType.IVF_SQ8, 'nlist': 16384} + index_params = {'index_type': IndexType.IVF_SQ8, 'nlist': nlist} status = connect.create_index(table_list[20 + i], index_params) assert status.OK() status = connect.create_index(table_list[50 + i], index_params) diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py index e52e0d2d08..da53466828 100644 --- a/tests/milvus_python_test/test_search_vectors.py +++ b/tests/milvus_python_test/test_search_vectors.py @@ -54,7 +54,7 @@ class TestSearchBase: """ @pytest.fixture( scope="function", - params=[1, 99, 101, 1024, 2048, 2049] + params=[1, 99, 1024, 2048, 2049] ) def get_top_k(self, request): yield request.param @@ -482,8 +482,9 @@ class TestSearchBase: """ class TestSearchParamsInvalid(object): - index_params = random.choice(gen_index_params()) - logging.getLogger().info(index_params) + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} + logging.getLogger().info(index_param) def init_data(self, connect, table, nb=100): ''' @@ -528,7 +529,7 @@ class TestSearchParamsInvalid(object): def get_top_k(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_search_with_invalid_top_k(self, connect, table, get_top_k): ''' target: test search fuction, with the wrong top_k @@ -539,9 +540,12 @@ class TestSearchParamsInvalid(object): logging.getLogger().info(top_k) nprobe = 1 query_vecs = gen_vectors(1, dim) - with pytest.raises(Exception) as e: + if isinstance(top_k, int): status, result = connect.search_vectors(table, top_k, nprobe, query_vecs) - res = connect.server_version() + assert not status.OK() + else: + with pytest.raises(Exception) as e: + status, result = connect.search_vectors(table, top_k, nprobe, query_vecs) @pytest.mark.level(2) def test_search_with_invalid_top_k_ip(self, connect, ip_table, get_top_k): @@ -554,10 +558,12 @@ class TestSearchParamsInvalid(object): logging.getLogger().info(top_k) nprobe = 1 query_vecs = gen_vectors(1, dim) - with pytest.raises(Exception) as e: + if isinstance(top_k, int): status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) - res = connect.server_version() - + assert not status.OK() + else: + with pytest.raises(Exception) as e: + status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) """ Test search table with invalid nprobe """ @@ -568,7 +574,7 @@ class TestSearchParamsInvalid(object): def get_nprobes(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_search_with_invalid_nrpobe(self, connect, table, get_nprobes): ''' target: test search fuction, with the wrong top_k @@ -579,7 +585,7 @@ class TestSearchParamsInvalid(object): nprobe = get_nprobes logging.getLogger().info(nprobe) query_vecs = gen_vectors(1, dim) - if isinstance(nprobe, int) and nprobe > 0: + if isinstance(nprobe, int): status, result = connect.search_vectors(table, top_k, nprobe, query_vecs) assert not status.OK() else: @@ -597,7 +603,7 @@ class TestSearchParamsInvalid(object): nprobe = get_nprobes logging.getLogger().info(nprobe) query_vecs = gen_vectors(1, dim) - if isinstance(nprobe, int) and nprobe > 0: + if isinstance(nprobe, int): status, result = connect.search_vectors(ip_table, top_k, nprobe, query_vecs) assert not status.OK() else: @@ -614,7 +620,7 @@ class TestSearchParamsInvalid(object): def get_query_ranges(self, request): yield request.param - @pytest.mark.level(2) + @pytest.mark.level(1) def test_search_flat_with_invalid_query_range(self, connect, table, get_query_ranges): ''' target: test search fuction, with the wrong query_range diff --git a/tests/milvus_python_test/test_table.py b/tests/milvus_python_test/test_table.py index 934f3c2f9f..88f7caca3c 100644 --- a/tests/milvus_python_test/test_table.py +++ b/tests/milvus_python_test/test_table.py @@ -178,6 +178,7 @@ class TestTable: assert res.table_name == table_name assert res.metric_type == MetricType.L2 + @pytest.mark.level(2) def test_table_describe_table_name_ip(self, connect): ''' target: test describe table created with correct params @@ -266,6 +267,7 @@ class TestTable: status = connect.delete_table(table) assert not assert_has_table(connect, table) + @pytest.mark.level(2) def test_delete_table_ip(self, connect, ip_table): ''' target: test delete table created with correct params @@ -335,7 +337,6 @@ class TestTable: time.sleep(2) assert status.OK() - @pytest.mark.level(2) def test_delete_create_table_repeatedly_ip(self, connect): ''' target: test delete and create the same table repeatedly @@ -587,25 +588,25 @@ class TestTable: """ @pytest.fixture( scope="function", - params=gen_index_params() + params=gen_simple_index_params() ) - def get_index_params(self, request, args): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") return request.param @pytest.mark.level(1) - def test_preload_table(self, connect, table, get_index_params): - index_params = get_index_params + def test_preload_table(self, connect, table, get_simple_index_params): + index_params = get_simple_index_params status, ids = connect.add_vectors(table, vectors) status = connect.create_index(table, index_params) status = connect.preload_table(table) assert status.OK() @pytest.mark.level(1) - def test_preload_table_ip(self, connect, ip_table, get_index_params): - index_params = get_index_params + def test_preload_table_ip(self, connect, ip_table, get_simple_index_params): + index_params = get_simple_index_params status, ids = connect.add_vectors(ip_table, vectors) status = connect.create_index(ip_table, index_params) status = connect.preload_table(ip_table) @@ -613,19 +614,21 @@ class TestTable: @pytest.mark.level(1) def test_preload_table_not_existed(self, connect, table): - table_name = gen_unique_str("test_preload_table_not_existed") - index_params = random.choice(gen_index_params()) + table_name = gen_unique_str() + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} status, ids = connect.add_vectors(table, vectors) - status = connect.create_index(table, index_params) + status = connect.create_index(table, index_param) status = connect.preload_table(table_name) assert not status.OK() - @pytest.mark.level(1) + @pytest.mark.level(2) def test_preload_table_not_existed_ip(self, connect, ip_table): - table_name = gen_unique_str("test_preload_table_not_existed") - index_params = random.choice(gen_index_params()) + table_name = gen_unique_str() + nlist = 16384 + index_param = {"index_type": IndexType.IVF_SQ8, "nlist": nlist} status, ids = connect.add_vectors(ip_table, vectors) - status = connect.create_index(ip_table, index_params) + status = connect.create_index(ip_table, index_param) status = connect.preload_table(table_name) assert not status.OK() @@ -634,7 +637,7 @@ class TestTable: status = connect.preload_table(table) assert status.OK() - @pytest.mark.level(1) + @pytest.mark.level(2) def test_preload_table_no_vectors_ip(self, connect, ip_table): status = connect.preload_table(ip_table) assert status.OK() @@ -728,7 +731,7 @@ class TestCreateTableIndexSizeInvalid(object): 'dimension': dim, 'index_file_size': file_size, 'metric_type': MetricType.L2} - if isinstance(file_size, int) and file_size > 0: + if isinstance(file_size, int): status = connect.create_table(param) assert not status.OK() else: @@ -779,7 +782,7 @@ def preload_table(connect, **params): return status def has(connect, **params): - status = assert_has_table(connect, params["table_name"]) + status, result = connect.has_table(params["table_name"]) return status def show(connect, **params): @@ -803,7 +806,7 @@ def create_index(connect, **params): return status func_map = { - # 0:has, + 0:has, 1:show, 10:create_table, 11:describe, diff --git a/tests/milvus_python_test/test_table_count.py b/tests/milvus_python_test/test_table_count.py index 820fb9d546..4e8a780c62 100644 --- a/tests/milvus_python_test/test_table_count.py +++ b/tests/milvus_python_test/test_table_count.py @@ -23,7 +23,7 @@ class TestTableCount: @pytest.fixture( scope="function", params=[ - 100, + 1, 5000, 100000, ], @@ -36,9 +36,9 @@ class TestTableCount: """ @pytest.fixture( scope="function", - params=gen_index_params() + params=gen_simple_index_params() ) - def get_index_params(self, request, args): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -58,14 +58,14 @@ class TestTableCount: status, res = connect.get_table_row_count(table) assert res == nb - def test_table_rows_count_after_index_created(self, connect, table, get_index_params): + def test_table_rows_count_after_index_created(self, connect, table, get_simple_index_params): ''' target: test get_table_row_count, after index have been created method: add vectors in db, and create index, then calling get_table_row_count with correct params expected: get_table_row_count raise exception ''' nb = 100 - index_params = get_index_params + index_params = get_simple_index_params vectors = gen_vectors(nb, dim) res = connect.add_vectors(table_name=table, records=vectors) time.sleep(add_time_interval) @@ -91,7 +91,7 @@ class TestTableCount: assert the value returned by get_table_row_count method is equal to 0 expected: the count is equal to 0 ''' - table_name = gen_unique_str("test_table") + table_name = gen_unique_str() param = {'table_name': table_name, 'dimension': dim, 'index_file_size': index_file_size} @@ -142,8 +142,8 @@ class TestTableCount: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): - table_name = gen_unique_str('test_table_rows_count_multi_tables') + for i in range(20): + table_name = gen_unique_str() table_list.append(table_name) param = {'table_name': table_name, 'dimension': dim, @@ -152,7 +152,7 @@ class TestTableCount: connect.create_table(param) res = connect.add_vectors(table_name=table_name, records=vectors) time.sleep(2) - for i in range(50): + for i in range(20): status, res = connect.get_table_row_count(table_list[i]) assert status.OK() assert res == nq @@ -166,7 +166,7 @@ class TestTableCountIP: @pytest.fixture( scope="function", params=[ - 100, + 1, 5000, 100000, ], @@ -180,9 +180,9 @@ class TestTableCountIP: @pytest.fixture( scope="function", - params=gen_index_params() + params=gen_simple_index_params() ) - def get_index_params(self, request, args): + def get_simple_index_params(self, request, args): if "internal" not in args: if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in open source") @@ -202,14 +202,14 @@ class TestTableCountIP: status, res = connect.get_table_row_count(ip_table) assert res == nb - def test_table_rows_count_after_index_created(self, connect, ip_table, get_index_params): + def test_table_rows_count_after_index_created(self, connect, ip_table, get_simple_index_params): ''' target: test get_table_row_count, after index have been created method: add vectors in db, and create index, then calling get_table_row_count with correct params expected: get_table_row_count raise exception ''' nb = 100 - index_params = get_index_params + index_params = get_simple_index_params vectors = gen_vectors(nb, dim) res = connect.add_vectors(table_name=ip_table, records=vectors) time.sleep(add_time_interval) @@ -243,10 +243,8 @@ class TestTableCountIP: status, res = connect.get_table_row_count(ip_table) assert res == 0 - # TODO: enable - @pytest.mark.level(2) - @pytest.mark.timeout(20) - def _test_table_rows_count_multiprocessing(self, connect, ip_table, args): + @pytest.mark.timeout(60) + def test_table_rows_count_multiprocessing(self, connect, ip_table, args): ''' target: test table rows_count is correct or not with multiprocess method: create table and add vectors in it, @@ -286,7 +284,7 @@ class TestTableCountIP: nq = 100 vectors = gen_vectors(nq, dim) table_list = [] - for i in range(50): + for i in range(20): table_name = gen_unique_str('test_table_rows_count_multi_tables') table_list.append(table_name) param = {'table_name': table_name, @@ -296,7 +294,7 @@ class TestTableCountIP: connect.create_table(param) res = connect.add_vectors(table_name=table_name, records=vectors) time.sleep(2) - for i in range(50): + for i in range(20): status, res = connect.get_table_row_count(table_list[i]) assert status.OK() assert res == nq \ No newline at end of file diff --git a/tests/milvus_python_test/utils.py b/tests/milvus_python_test/utils.py index 007bff9c75..159c8407c6 100644 --- a/tests/milvus_python_test/utils.py +++ b/tests/milvus_python_test/utils.py @@ -26,9 +26,9 @@ def gen_vector(nb, d, seed=np.random.RandomState(1234)): return xb.tolist() -def gen_unique_str(str=None): +def gen_unique_str(str_value=None): prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) - return prefix if str is None else str + "_" + prefix + return "test_"+prefix if str_value is None else str_value+"_"+prefix def get_current_day(): @@ -449,10 +449,11 @@ def gen_index_params(): return gen_params(index_types, nlists) + def gen_simple_index_params(): index_params = [] index_types = [IndexType.FLAT, IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H] - nlists = [16384] + nlists = [1024] def gen_params(index_types, nlists): return [ {"index_type": index_type, "nlist": nlist} \ From 2ca28c44ae970242ad339909022e9d6f07e486b0 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Fri, 25 Oct 2019 16:27:56 +0800 Subject: [PATCH 43/89] Update README.md Former-commit-id: a5f25b72a07bdabef85a2097764a55261159cbf3 --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4fd8bbae2e..e847f0e8ca 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue) [![codebeat badge](https://codebeat.co/badges/e030a4f6-b126-4475-a938-4723d54ec3a7?style=plastic)](https://codebeat.co/projects/github-com-jinhai-cn-milvus-master) +![Release](https://img.shields.io/badge/release-v0.5.0-orange) +![Release_date](https://img.shields.io/badge/release_date-October-yellowgreen) - [Slack Community](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) - [Twitter](https://twitter.com/milvusio) @@ -22,7 +24,7 @@ Milvus is an open source similarity search engine for massive feature vectors. D Milvus provides stable Python, Java and C++ APIs. -Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/releases/v0.5.0/). +Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/release/v0.5.0/). - Heterogeneous computing @@ -90,7 +92,7 @@ Use Docker to install Milvus is a breeze. See the [Milvus install guide](https:/ ```shell $ cd [Milvus sourcecode path]/core -./ubuntu_build_deps.sh +$ ./ubuntu_build_deps.sh ``` ##### Step 2 Build From 5a66b9aa964c25942671b3a211aed179d1af440a Mon Sep 17 00:00:00 2001 From: zhenwu Date: Fri, 25 Oct 2019 16:42:37 +0800 Subject: [PATCH 44/89] remove one case in search Former-commit-id: 23aa50a7a89d72d1334c9a0a405020aa84004931 --- tests/milvus_python_test/test_search_vectors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py index da53466828..10892d6de3 100644 --- a/tests/milvus_python_test/test_search_vectors.py +++ b/tests/milvus_python_test/test_search_vectors.py @@ -220,7 +220,6 @@ class TestSearchBase: scope="function", params=[ (get_last_day(2), get_last_day(1)), - (get_last_day(2), get_current_day()), (get_next_day(1), get_next_day(2)) ] ) From d65160e4a12b0748d327ceb75fc98936544b676e Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Fri, 25 Oct 2019 18:01:35 +0800 Subject: [PATCH 45/89] Remove .a file in milvus/lib for docker-version Former-commit-id: b27fd9ddcf9d0dcbcceca2422b8bf6230bf4e21a --- CHANGELOG.md | 2 +- core/src/index/knowhere/CMakeLists.txt | 36 +------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7865430dd9..770098f7a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#80 - Print version information into log during server start - \#82 - Move easyloggingpp into "external" directory - \#92 - Speed up CMake build process - +- \#96 - Remove .a file in milvus/lib for docker-version ## Feature ## Task diff --git a/core/src/index/knowhere/CMakeLists.txt b/core/src/index/knowhere/CMakeLists.txt index 2a499dc1a3..bece9058a9 100644 --- a/core/src/index/knowhere/CMakeLists.txt +++ b/core/src/index/knowhere/CMakeLists.txt @@ -81,27 +81,6 @@ target_link_libraries( ${depend_libs} ) -INSTALL(TARGETS - knowhere - SPTAGLibStatic - DESTINATION - lib) - -INSTALL(FILES - ${ARROW_STATIC_LIB} - ${ARROW_PREFIX}/lib/libjemalloc_pic.a - ${FAISS_STATIC_LIB} - ${LAPACK_STATIC_LIB} - ${BLAS_STATIC_LIB} - DESTINATION - lib - ) - -INSTALL(FILES ${OPENBLAS_REAL_STATIC_LIB} - RENAME "libopenblas.a" - DESTINATION lib - ) - set(INDEX_INCLUDE_DIRS ${INDEX_SOURCE_DIR}/knowhere ${INDEX_SOURCE_DIR}/thirdparty @@ -112,17 +91,4 @@ set(INDEX_INCLUDE_DIRS ${LAPACK_INCLUDE_DIR} ) -set(INDEX_INCLUDE_DIRS ${INDEX_INCLUDE_DIRS} PARENT_SCOPE) - -#INSTALL(DIRECTORY -# ${INDEX_SOURCE_DIR}/include/knowhere -# ${ARROW_INCLUDE_DIR}/arrow -# ${FAISS_PREFIX}/include/faiss -# ${OPENBLAS_INCLUDE_DIR}/ -# DESTINATION -# include) -# -#INSTALL(DIRECTORY -# ${SPTAG_SOURCE_DIR}/AnnService/inc/ -# DESTINATION -# include/SPTAG/AnnService/inc) +set(INDEX_INCLUDE_DIRS ${INDEX_INCLUDE_DIRS} PARENT_SCOPE) \ No newline at end of file From f32921f0fb2c39c5b2cca2688def619d22679681 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 25 Oct 2019 19:43:07 +0800 Subject: [PATCH 46/89] test_scheduler core dump Former-commit-id: 79208b55f7b016bde5100cc7474850160dc72503 --- CHANGELOG.md | 2 ++ core/src/scheduler/resource/Resource.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7865430dd9..74a1951142 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.5.1 (TODO) ## Bug +- \#104 - test_scheduler core dump + ## Improvement - \#64 - Improvement dump function in scheduler - \#80 - Print version information into log during server start diff --git a/core/src/scheduler/resource/Resource.h b/core/src/scheduler/resource/Resource.h index c797e13de8..2af44b3d90 100644 --- a/core/src/scheduler/resource/Resource.h +++ b/core/src/scheduler/resource/Resource.h @@ -119,6 +119,9 @@ class Resource : public Node, public std::enable_shared_from_this { // TODO(wxyu): need double ? inline uint64_t TaskAvgCost() const { + if (total_task_ == 0) { + return 0; + } return total_cost_ / total_task_; } From 02cfe7cf49def66bdb57272f8a2f830985b6b6a5 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 11:25:08 +0800 Subject: [PATCH 47/89] add build periodically in ci/jenkins/Jenkinsfile Former-commit-id: 2455e5910f0111e5dec7a7f966ccbbdaf47fba0c --- ci/jenkins/Jenkinsfile | 10 ++++++++- .../jenkinsfile/singleDevNightlyTest.groovy | 22 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index fbdf3a3096..179a7fed38 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,6 +1,9 @@ +String cron_string = BRANCH_NAME == "master" || BRANCH_NAME == "0.5.0" || BRANCH_NAME == "0.5.1" ? "H 0 * * *" : "" pipeline { agent none + triggers { cron(cron_string) } + options { timestamps() } @@ -20,6 +23,7 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" + NIGHTLIY_TEST = "${cron_string == "" ? false : true}" } stages { @@ -119,7 +123,11 @@ pipeline { steps { container('milvus-test-env') { script { - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" + if (NIGHTLIY_TEST) { + load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy" + else { + load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" + } } } } diff --git a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy new file mode 100644 index 0000000000..91699f533b --- /dev/null +++ b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy @@ -0,0 +1,22 @@ +timeout(time: 90, unit: 'MINUTES') { + dir ("tests/milvus_python_test") { + sh 'python3 -m pip install -r requirements.txt' + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + } + // mysql database backend test + load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" + + if (!fileExists('milvus-helm')) { + dir ("milvus-helm") { + checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + } + } + dir ("milvus-helm") { + dir ("milvus-gpu") { + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." + } + } + dir ("tests/milvus_python_test") { + sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + } +} From 0425ebac504016c213ba12057186daa9749a4526 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 11:27:01 +0800 Subject: [PATCH 48/89] fix ci/jenkins/Jenkinsfile error Former-commit-id: b94643897b9a962fb0fbe7a6dd34bec5a2e37b73 --- ci/jenkins/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 179a7fed38..66c7eb864e 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -125,7 +125,7 @@ pipeline { script { if (NIGHTLIY_TEST) { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy" - else { + } else { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" } } From de26c2bcfccc7c5eccf5a06caea5d259c237526d Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 11:31:49 +0800 Subject: [PATCH 49/89] update ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy Former-commit-id: 05fdd554d4a78d70b4497a93017d285d5d364546 --- ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy index 91699f533b..5140ad858f 100644 --- a/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy @@ -1,7 +1,7 @@ timeout(time: 90, unit: 'MINUTES') { dir ("tests/milvus_python_test") { sh 'python3 -m pip install -r requirements.txt' - sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" } // mysql database backend test load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" From e75898f8f288cd4ce878fb9eaad0d4b9cbf85ded Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 12:23:42 +0800 Subject: [PATCH 50/89] update ci/jenkins/jenkinsfile/deploySingle2Dev.groovy Former-commit-id: 213fb87c0bfc8554f34bd2451e019dce12e9c950 --- .../jenkinsfile/deploySingle2Dev.groovy | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy index 718b74778e..738c714a0c 100644 --- a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy +++ b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy @@ -1,14 +1,9 @@ -try { - sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts' - sh 'helm repo update' - dir ('milvus-helm') { - checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) - dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - } +sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts' +sh 'helm repo update' +dir ('milvus-helm') { + checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + dir ("milvus-gpu") { + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } -} catch (exc) { - echo 'Helm running failed!' - sh "helm del --purge ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu" - throw exc } + From 0b563e51108cfea0aad77e4d8bbbdd3482dfb9e2 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Sat, 26 Oct 2019 12:29:55 +0800 Subject: [PATCH 51/89] Remove ===== Former-commit-id: 8e916d1012712a59859620b8fc4939c71e12f637 --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index e847f0e8ca..0b71b053ec 100644 --- a/README.md +++ b/README.md @@ -201,9 +201,4 @@ Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upc [Apache 2.0 license](LICENSE) -======= -![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen.svg) -![Language](https://img.shields.io/badge/language-C%2B%2B-blue.svg) -![Release](https://img.shields.io/badge/Release-v0.5.0-orange.svg) -![Release date](https://img.shields.io/badge/release__date-October-yellowgreen) From 9d751d295ed6ac4472773b528c1749fb7c7bb311 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 14:35:53 +0800 Subject: [PATCH 52/89] time share run nightly test Former-commit-id: c95e691615b2a356e372264dd6eeb845346c5891 --- ci/jenkins/Jenkinsfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 66c7eb864e..813101ba48 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,4 +1,6 @@ -String cron_string = BRANCH_NAME == "master" || BRANCH_NAME == "0.5.0" || BRANCH_NAME == "0.5.1" ? "H 0 * * *" : "" +String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : cron_string +cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string + pipeline { agent none @@ -23,7 +25,7 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" - NIGHTLIY_TEST = "${cron_string == "" ? false : true}" + NIGHTLIY_TEST = "${cron_string ? true : false}" } stages { From 1a1c858f1e7992fd77bfb4f44a6c744e5376231c Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 14:39:56 +0800 Subject: [PATCH 53/89] fix ci/jenkins/Jenkinsfile error Former-commit-id: a2a27530e47b57b2b4eaaa76544f62f9f5a2496c --- ci/jenkins/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 813101ba48..44a2cbb156 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,4 +1,4 @@ -String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : cron_string +String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : "" cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string pipeline { @@ -25,7 +25,7 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" - NIGHTLIY_TEST = "${cron_string ? true : false}" + NIGHTLIY_TEST = "${cron_string == "" ? false : true}" } stages { From 0a7434020f5d3de406749ccecf3237a8c124badc Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 15:07:11 +0800 Subject: [PATCH 54/89] format ci/jenkins/jenkinsfile/cleanupSingleDev.groovy Former-commit-id: 6ce898ed133881f30c04427369b85055b1a4f97f --- ci/jenkins/jenkinsfile/cleanupSingleDev.groovy | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy b/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy index 6e85a678be..3b8c1833b5 100644 --- a/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy +++ b/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy @@ -1,5 +1,8 @@ try { - sh "helm del --purge ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu" + def helmResult = sh script: "helm status ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu", returnStatus: true + if (!helmResult) { + sh "helm del --purge ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu" + } } catch (exc) { def helmResult = sh script: "helm status ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu", returnStatus: true if (!helmResult) { From 66cfb2d527f43c50ce85d12ca762ccfd7b9209fc Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 15:52:35 +0800 Subject: [PATCH 55/89] add isTimeTriggeredBuild function in ci/jenkins/Jenkinsfile Former-commit-id: d19c73db3fb8a3bc942402b547114b9b554ef585 --- ci/jenkins/Jenkinsfile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 44a2cbb156..235a56b781 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -25,7 +25,6 @@ pipeline { SEMVER = "${BRANCH_NAME}" JOBNAMES = env.JOB_NAME.split('/') PIPELINE_NAME = "${JOBNAMES[0]}" - NIGHTLIY_TEST = "${cron_string == "" ? false : true}" } stages { @@ -125,7 +124,8 @@ pipeline { steps { container('milvus-test-env') { script { - if (NIGHTLIY_TEST) { + boolean isNightlyTest = isTimeTriggeredBuild() + if (isNightlyTest) { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy" } else { load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy" @@ -160,3 +160,9 @@ pipeline { } } +boolean isTimeTriggeredBuild() { + for (Object currentBuildCause : script.currentBuild.rawBuild.getCauses()) { + return currentBuildCause.class.getName().contains('TimerTriggerCause') + } + return false +} From a0c938bc018869ed4fb4ce64d7a04a0860947168 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Sat, 26 Oct 2019 17:31:54 +0800 Subject: [PATCH 56/89] update isTimeTriggeredBuild funtion in ci/jenkins/Jenkinsfile Former-commit-id: a4bf86ced62c3191dfdc73b87251477eddf347c4 --- ci/jenkins/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 235a56b781..0a16c7f2c3 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -161,8 +161,8 @@ pipeline { } boolean isTimeTriggeredBuild() { - for (Object currentBuildCause : script.currentBuild.rawBuild.getCauses()) { - return currentBuildCause.class.getName().contains('TimerTriggerCause') + if (currentBuild.getBuildCauses('hudson.triggers.TimerTrigger$TimerTriggerCause').size() != 0) { + return true } return false } From 70c3ae83d97d073a96a8898c9c131313775d3004 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 09:31:46 +0800 Subject: [PATCH 57/89] [skip ci] Minor updates Former-commit-id: ca8c3d047a49e1c580eb290fa088a9bd34b99868 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0b71b053ec..f18328f748 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,7 @@ Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/ex ## Contribution guidelines -Contributions are welcomed and greatly appreciated. If you want to contribute to Milvus, please read our [contribution guidelines](CONTRIBUTING.md). This project adheres to the [code of conduct](CODE_OF_CONDUCT.md) of Milvus. By participating, you are expected to uphold this code. +Contributions are welcomed and greatly appreciated. Please read our [contribution guidelines](CONTRIBUTING.md) for detailed contribution workflow. This project adheres to the [code of conduct](CODE_OF_CONDUCT.md) of Milvus. By participating, you are expected to uphold this code. We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to track issues and bugs. For general questions and public discussions, please join our community. @@ -199,6 +199,6 @@ Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upc ## License -[Apache 2.0 license](LICENSE) +[Apache License 2.0](LICENSE) From b852170b9210a884c9ba683e25f9e81fc76f6bef Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 10:19:07 +0800 Subject: [PATCH 58/89] set cron timezone to Shanghai Former-commit-id: b1f05d796dce95cae1b19c5729ccd4d8da8219c1 --- ci/jenkins/Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 0a16c7f2c3..15663183ab 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,10 +1,14 @@ +String cron_timezone = "TZ=Asia/Shanghai" String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : "" cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string pipeline { agent none - triggers { cron(cron_string) } + triggers { + cron("${cron_timezone} + ${cron_string}") + } options { timestamps() From 3e50d77cdc6433c4c3402a4477bf76e30c851343 Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 10:24:20 +0800 Subject: [PATCH 59/89] set cron timezone to Shanghai Former-commit-id: c4f1a0513722460109d1e681b57a196faedc119d --- ci/jenkins/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 15663183ab..7c2d83ba59 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -6,8 +6,8 @@ pipeline { agent none triggers { - cron("${cron_timezone} - ${cron_string}") + cron('''"${cron_timezone}" + "${cron_string}"''') } options { From 60d68defdae9f8c7e57ce4f6a44106fc50ac6eec Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 10:29:59 +0800 Subject: [PATCH 60/89] set cron timezone to Shanghai Former-commit-id: 28271c43dfae440fd2baa541e846d80d6bf354d8 --- ci/jenkins/Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index 7c2d83ba59..67bff5ac1e 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -1,13 +1,13 @@ String cron_timezone = "TZ=Asia/Shanghai" -String cron_string = BRANCH_NAME == "master" ? "H 0 * * *" : "" -cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * *" : cron_string +String cron_string = BRANCH_NAME == "master" ? "H 0 * * * " : "" +cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * * " : cron_string pipeline { agent none triggers { - cron('''"${cron_timezone}" - "${cron_string}"''') + cron """${cron_timezone} + ${cron_string}""" } options { From c9b2d136192a68c15442babd46d75b624ee52321 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Mon, 28 Oct 2019 11:07:34 +0800 Subject: [PATCH 61/89] Disble mysql-version test Former-commit-id: e1deb82d6ddf2130a945689269c1be9e91f33f5e --- ci/jenkins/jenkinsfile/singleDevTest.groovy | 29 +- .../markdown-link-extractor.py | 420 ++++++++++++++++++ tests/milvus_doc_test/requirements.txt | 3 + tests/milvus_python_test/test_connect.py | 1 - tests/milvus_python_test/test_index.py | 4 +- tests/milvus_python_test/test_ping.py | 2 +- tests/milvus_python_test/test_table.py | 2 +- 7 files changed, 443 insertions(+), 18 deletions(-) create mode 100644 tests/milvus_doc_test/markdown-link-extractor.py create mode 100644 tests/milvus_doc_test/requirements.txt diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index adfadc9271..d0158a9943 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -6,17 +6,20 @@ timeout(time: 60, unit: 'MINUTES') { // mysql database backend test load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" - if (!fileExists('milvus-helm')) { - dir ("milvus-helm") { - checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) - } - } - dir ("milvus-helm") { - dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - } - } - dir ("tests/milvus_python_test") { - sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" - } + + // Remove mysql-version tests: 10-28 + + // if (!fileExists('milvus-helm')) { + // dir ("milvus-helm") { + // checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) + // } + // } + // dir ("milvus-helm") { + // dir ("milvus-gpu") { + // sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." + // } + // } + // dir ("tests/milvus_python_test") { + // sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" + // } } diff --git a/tests/milvus_doc_test/markdown-link-extractor.py b/tests/milvus_doc_test/markdown-link-extractor.py new file mode 100644 index 0000000000..3744089e69 --- /dev/null +++ b/tests/milvus_doc_test/markdown-link-extractor.py @@ -0,0 +1,420 @@ +# -*- coding: utf-8 -*- +# Using Python 3.x + +import urllib.request +import urllib.error +from pathlib import Path +import requests +import json +from urllib.parse import urlparse +import markdown +import os +from os.path import join, getsize +from bs4 import BeautifulSoup +import re +from sys import platform +import argparse + + +class LinksFromMarkdown(object): + + def __init__(self, repository): + self.dictionary = repository + + def extract_links_from_markdown(self, repository): + + + if platform == "linux" or platform == "linux2": + # linux + link_file = "../link_reports/" + "extracted_links.json" + dirName = "../link_reports" + elif platform == "darwin": + # OS X + link_file = "../link_reports/" + "extracted_links.json" + dirName = "../link_reports" + elif platform == "win32": + # Windows... + link_file = "..\\link_reports\\" + "extracted_links.json" + dirName = "..\\link_reports" + + # repository = "D:\\GithubRepo\\docs-master\\docs-master" + + + try: + # Create target Directory + os.mkdir(dirName) + print("Directory ", dirName, " Created ") + except FileExistsError: + print("Directory ", dirName, " already exists") + + md_files = [] + + for root, dirs, files in os.walk(repository): + # print(root, "consumes", end=" ") + # print(sum(getsize(join(root, name)) for name in files), end=" ") + # print("bytes in", len(files), "non-directory files") + if len(files) != 0: + # print(files) + for file in files: + if file.endswith(".md") or file.endswith(".MD") or file.endswith(".mD") or file.endswith(".Md"): + md_files.append(join(root, file)) + # elif file.endswith(".png") or file.endswith(".PNG"): + # pics.append((join(root, file))) + + # print(md_files) + # print(pics) + + a_href_list = [] + + for md_file in md_files: + with open(md_file, "r", encoding="utf-8") as f: + html = markdown.markdown(f.read()) + # print(html) + soup = BeautifulSoup(html, "lxml") + a_hrefs = [(x.get('href')) for x in soup.find_all("a")] + + a_href_list.append(a_hrefs) + # print(a_hrefs) + # print(md_file) + + # Generates a dictionary that indicates each MD file and links extracted from the MD file + dictionary = dict(zip(md_files, a_href_list)) + + with open(link_file, "w+", encoding="utf-8") as f: + json.dump(dictionary, f) + + return link_file + + + # print(dictionary) + +class CheckExtractedLinksFromMarkdown(object): + + def __init__(self, link_file): + self.link_file = link_file + + def check_extracted_links(self, link_file): + + if platform == "linux" or platform == "linux2": + # linux + report_name = "../link_reports/" + "link_validation_report.html" + + elif platform == "darwin": + # OS X + report_name = "../link_reports/" + "link_validation_report.html" + + elif platform == "win32": + # Windows... + report_name = "..\\link_reports\\" + "link_validation_report.html" + + html_code = """Link Validation Detailed Report

Link Validation Detailed Report

""" + + with open(link_file, "r", encoding="utf-8") as f: + json_text = f.read() + + link_dict = json.loads(json_text) + + + # If the report file exists, remove the file. + text_file = Path(report_name) + if text_file.is_file(): + os.remove(report_name) + + with open(report_name, "w+", encoding="utf-8") as f: + f.write(html_code) + + # Iterate over all MD files + # key ---> MD file location + # value ---> An array of links in the MD file, including internet links and file links + + invalid_counter = 0 + + for key in link_dict.keys(): + head_code = "" + table_code = "" + + if link_dict.get(key) == []: + + with open(report_name, "a", encoding="utf-8") as f: + f.write("""

Checking links in """ + key) + f.write("""

This markdown file does not contain any links.

""") + else: + + head_code = """""" + + with open(report_name, "a", encoding="utf-8") as f: + f.write("""

Checking links in """ + key) + f.write(head_code) + + # Iterate over all links in each MD file + for link in link_dict.get(key): + # Check internet links: http,https + + try: + assert type(link) is str + + except AssertionError as e: + invalid_counter = invalid_counter + 1 + a_row_code = """

""" + with open(report_name, "a", encoding="utf-8") as f: + f.write(a_row_code) + continue + + # MD files that are not well-formed may raise exceptions. If parentheses are not correctly escaped, a NoneType object may be returned + + if link.startswith("http://") or link.startswith("https://"): + try: + link_response = requests.get(link, timeout=60) + status_code = link_response.status_code + + # Informational responses (100–199), + # Successful responses (200–299), + # Redirects (300–399), + # Client errors (400–499), + # and Server errors (500–599). + + if status_code in range(200,299): + # For links that do not contain hashes + if "#" not in link: + row_code = """""" + # For links that contain hashes + else: + + try: + # Acquire the url after "#" + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} + + req = urllib.request.Request(url=str( + urlparse(link).scheme + "://" + urlparse(link).netloc + urlparse(link).path), headers=headers) + response = urllib.request.urlopen(req,data=None) + html_code = response.read() + soup = BeautifulSoup(html_code.decode("utf-8"), "lxml") + a_hash = soup.find("a", {"id": str(urlparse(link).fragment)}) + h1_hash = soup.find("h1", {"id": str(urlparse(link).fragment)}) + h2_hash = soup.find("h2", {"id": str(urlparse(link).fragment)}) + h3_hash = soup.find("h3", {"id": str(urlparse(link).fragment)}) + h4_hash = soup.find("h4", {"id": str(urlparse(link).fragment)}) + h5_hash = soup.find("h5", {"id": str(urlparse(link).fragment)}) + h6_hash = soup.find("h6", {"id": str(urlparse(link).fragment)}) + div_hash = soup.find("div",{"id": str(urlparse(link).fragment)}) + + if (None, None, None, None, None, None, None, None) != ( + a_hash, h1_hash, h2_hash, h3_hash, h4_hash, h5_hash, h6_hash, div_hash): + row_code = """""" + + else: + row_code = """""" """""" + + + except urllib.error.HTTPError as http_error: + row_code = """""" + except urllib.error.URLError as url_error: + row_code = """""" + + elif status_code in range(400,599): + row_code = """""" + + + except requests.exceptions.Timeout as timeout_error: + print(timeout_error) + row_code = """""" + + + + except requests.exceptions.ConnectionError as connection_error: + print(connection_error) + row_code = """""" + + + + except requests.exceptions.HTTPError as http_error: + print(http_error) + row_code = """""" + + + # elif link.startswith("mailto:"): + + # Check MD file links + + # File path formats on Windows systems from https://docs.microsoft.com/en-us/dotnet/standard/io/file-path-formats + # C:\Documents\Newsletters\Summer2018.pdf An absolute file path from the root of drive C: + # \Program Files\Custom Utilities\StringFinder.exe An absolute path from the root of the current drive. + # 2018\January.xlsx A relative path to a file in a subdirectory of the current directory. + # ..\Publications\TravelBrochure.pdf A relative path to file in a directory that is a peer of the current directory. + # C:\Projects\apilibrary\apilibrary.sln An absolute path to a file from the root of drive C: + # C:Projects\apilibrary\apilibrary.sln A relative path from the current directory of the C: drive. + + # We do not use absolute path formats in MD files and path formats are not likely to be from the root of the current drive. So here are possible formats: + # 2018\January.md + # ..\Publications\TravelBrochure.md + + # Check if file exists + + elif link.endswith(".md") or link.endswith(".MD") or link.endswith(".mD") or link.endswith(".Md"): + # A relative path to file in a directory that is a peer of the current directory. + if link.startswith("..\\"): + # Get the absolute location of the linked md + cur_direct = os.path.dirname(key) + final_direct = os.path.dirname(cur_direct) + linked_md = os.path.join(final_direct,link) + # Check if the linked md exists + if Path(linked_md).is_file(): + row_code = """""" + + else: + row_code = """""" + + # A relative path to a file in a subdirectory of the current directory. + else: + # Get the absolute location of the linked md + cur_direct = os.path.dirname(key) + linked_md = os.path.join(cur_direct, link) + # Check if the linked md exists + if Path(linked_md).is_file(): + row_code = """""" + + else: + row_code = """""" + + elif link.startswith("#"): + # Validate if anchors correctly show in the MD file + with open(key,"r",encoding="utf-8") as f: + md_text = f.read() + # print(str(md_text)) + reg = re.compile(str("#" + "\s*" + link[1:])) + + if """""" in str(md_text) or len(re.findall(reg,str(md_text))) == 2: + row_code = """""" + else: + row_code = """""" + # Writes row_code for the link to the table + with open(report_name, "a", encoding="utf-8") as f: + f.write(row_code) + # print(row_code) + # Writes the end of the table for the key + with open(report_name, "a", encoding="utf-8") as f: + f.write("
LinkStatusMarkdown File
Invalid Link Number """ + str(invalid_counter) +"""""" + """This link is not string, which indicates that your MD file may not be well-formed.""" + """""" + key + """
""" + """""" + link + """""" + """""" + str(status_code) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """ The URL looks good but the anchor link does not work or is not using an anchor tag.""" + """""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """ """ + str(http_error) + """ The URL looks good but the page then returns an HTTP error.""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """ """ + str(url_error) + """ The URL looks good but the page then returns a URL error.""" + key + """
""" + """""" + link + """""" + """""" + str( + status_code) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + timeout_error) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + connection_error) + """""" + key + """
""" + """""" + link + """""" + """""" + str( + http_error) + """""" + key + """
""" + link + """The file link looks good.""" + key + """
""" + link + """The file link is broken.""" + key + """
""" + link + """The file link looks good.""" + key + """
""" + link + """The file link is broken.""" + key + """
""" + link + """The anchor link looks good.""" + key + """
""" + link + """The anchor link is broken.""" + key + """
") + print("Completed link checking for " + key) + + with open(report_name, "a", encoding="utf-8") as f: + f.write("") + print("Completed link checking for all markdown files") + + return report_name + + +class GenerateReportSummary(object): + def __init__(self, report_name): + self.report_name = report_name + + def generate_report_summary(self, report_name): + + if platform == "linux" or platform == "linux2": + # linux + summary_name = "../link_reports/" + "link_validation_summary.html" + + elif platform == "darwin": + # OS X + summary_name = "../link_reports/" + "link_validation_summary.html" + + elif platform == "win32": + # Windows... + summary_name = "..\\link_reports\\" + "link_validation_summary.html" + + # Use BeautifulSoup to read this report and return statistics + with open(report_name, "r", encoding="utf-8") as f: + html_code = f.read() + soup = BeautifulSoup(html_code, "lxml") + failed_links_rows = soup.find_all("tr", {"class": "fail"}) + fail_count = len(failed_links_rows) + success_links_rows = soup.find_all("tr", {"class": "success"}) + pass_count = len(success_links_rows) + for failed_links_row in failed_links_rows: + del failed_links_row.attrs["bgcolor"] + # print(type(failed_links_rows)) + + # Write report summary to another HTML file + with open(summary_name, "w+", encoding="utf-8") as f: + f.write( + """Link Validation Report Summary

Link Validation Report Summary

""") + f.write("""

The number of failed links: """ + str(fail_count) + """. The number of passed links: """ + str(pass_count) + """ Pass rate: """ + str(float(pass_count/(pass_count+fail_count))*100) + '%') + f.write("""

Click the button to sort the table by parent page:

+

""") + f.write("""""") + f.write( + """""") + + for failed_link in set(failed_links_rows): + f.write(str(failed_link)) + f.write( + """
Failed LinksStatus CodeParent Page

""" + """Refer to this link for detailed report.""" + """

""") + +# Create the parser +my_parser = argparse.ArgumentParser(description='Check the links for all markdown files of a folder') + +# Add the arguments +my_parser.add_argument('Path', + metavar='path', + type=str, + help='The path to the repository that contains all markdown files.') + +# Execute the parse_args() method +args = my_parser.parse_args() + +repository = args.Path + +# Get link JSON file +LinksFromMarkdown_Milvus = LinksFromMarkdown(repository) +link_file = LinksFromMarkdown_Milvus.extract_links_from_markdown(repository) + +# Generate link validation report +CheckExtractedLinksFromMarkdown_Milvus = CheckExtractedLinksFromMarkdown(link_file) +report_name = CheckExtractedLinksFromMarkdown_Milvus.check_extracted_links(link_file) + +# Generate report summary +GenerateReportSummary_Milvus = GenerateReportSummary(report_name) +GenerateReportSummary_Milvus.generate_report_summary(report_name) \ No newline at end of file diff --git a/tests/milvus_doc_test/requirements.txt b/tests/milvus_doc_test/requirements.txt new file mode 100644 index 0000000000..e099aa161a --- /dev/null +++ b/tests/milvus_doc_test/requirements.txt @@ -0,0 +1,3 @@ +requests>=2.22.0 +markdown>=3.1.1 +beautifulsoup4>=4.8.1 diff --git a/tests/milvus_python_test/test_connect.py b/tests/milvus_python_test/test_connect.py index 5ec9539011..96ce1d3bdf 100644 --- a/tests/milvus_python_test/test_connect.py +++ b/tests/milvus_python_test/test_connect.py @@ -5,7 +5,6 @@ import threading from multiprocessing import Process from utils import * -__version__ = '0.5.0' CONNECT_TIMEOUT = 12 diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 65716d45aa..269e6137da 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -301,7 +301,7 @@ class TestIndexBase: vectors = gen_vectors(nq, dim) table_list = [] for i in range(10): - table_name = gen_unique_str('test_create_index_multi_tables') + table_name = gen_unique_str() table_list.append(table_name) param = {'table_name': table_name, 'dimension': dim, @@ -769,7 +769,7 @@ class TestIndexIP: vectors = gen_vectors(nq, dim) table_list = [] for i in range(10): - table_name = gen_unique_str('test_create_index_multi_tables') + table_name = gen_unique_str() table_list.append(table_name) param = {'table_name': table_name, 'dimension': dim, diff --git a/tests/milvus_python_test/test_ping.py b/tests/milvus_python_test/test_ping.py index a55559bc63..d63ab93f11 100644 --- a/tests/milvus_python_test/test_ping.py +++ b/tests/milvus_python_test/test_ping.py @@ -1,7 +1,7 @@ import logging import pytest -__version__ = '0.5.0' +__version__ = '0.5.1' class TestPing: diff --git a/tests/milvus_python_test/test_table.py b/tests/milvus_python_test/test_table.py index 88f7caca3c..6af38bac15 100644 --- a/tests/milvus_python_test/test_table.py +++ b/tests/milvus_python_test/test_table.py @@ -806,7 +806,7 @@ def create_index(connect, **params): return status func_map = { - 0:has, + # 0:has, 1:show, 10:create_table, 11:describe, From 4e5b778a3d79e2fd95251ddaf03f82152342e4d4 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 11:19:09 +0800 Subject: [PATCH 62/89] [skip ci] Add Contributors section Former-commit-id: 6c3a3fa7b3556bf904eee77225730f3d9a1befba --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index f18328f748..43f117ff92 100644 --- a/README.md +++ b/README.md @@ -178,6 +178,13 @@ We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to To connect with other users and contributors, welcome to join our [slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk). +## Contributors + +Deep thanks and appreciation go to the following people. + +- [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. + + ## Milvus Roadmap Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. From d76ffe9ab3fd17c7a05422d032fed42404c7695a Mon Sep 17 00:00:00 2001 From: milvus-ci-robot Date: Mon, 28 Oct 2019 11:20:09 +0800 Subject: [PATCH 63/89] update deploySingle2Dev.groovy Former-commit-id: db4fd829b462037db8ce4ebf7b144ddc6b6f5142 --- ci/jenkins/jenkinsfile/deploySingle2Dev.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy index 738c714a0c..bc6c6f4438 100644 --- a/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy +++ b/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy @@ -3,7 +3,7 @@ sh 'helm repo update' dir ('milvus-helm') { checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]]) dir ("milvus-gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/values.yaml -f ci/filebeat/values.yaml --namespace milvus ." + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/sqlite_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } From 430584f386ca87736e9d431cf7685150ded460a5 Mon Sep 17 00:00:00 2001 From: zhenwu Date: Mon, 28 Oct 2019 11:50:54 +0800 Subject: [PATCH 64/89] remove mysql test Former-commit-id: f56db45d7fd8c9eb145c4b7bb2df61abd8c54ad9 --- ci/jenkins/jenkinsfile/singleDevTest.groovy | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/jenkins/jenkinsfile/singleDevTest.groovy b/ci/jenkins/jenkinsfile/singleDevTest.groovy index d0158a9943..16fe65a9b3 100644 --- a/ci/jenkins/jenkinsfile/singleDevTest.groovy +++ b/ci/jenkins/jenkinsfile/singleDevTest.groovy @@ -4,8 +4,7 @@ timeout(time: 60, unit: 'MINUTES') { sh "pytest . --alluredir=\"test_out/dev/single/sqlite\" --level=1 --ip ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu-milvus-gpu-engine.milvus.svc.cluster.local" } // mysql database backend test - load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" - + // load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy" // Remove mysql-version tests: 10-28 From 80b9c79c5d052db85a1d98c21d0ce35ccf8a9952 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 12:23:25 +0800 Subject: [PATCH 65/89] Using new structure for tasktable Former-commit-id: 6742f21a429da87456ded0a910d248948dc948b4 --- CHANGELOG.md | 2 + core/src/scheduler/BuildMgr.h | 25 +++-- core/src/scheduler/CircleQueue.h | 119 +++++++++++++++++++++ core/src/scheduler/TaskTable.cpp | 96 ++++++++++++----- core/src/scheduler/TaskTable.h | 40 ++----- core/src/scheduler/resource/Resource.cpp | 7 +- core/unittest/scheduler/test_tasktable.cpp | 94 +++++++--------- 7 files changed, 257 insertions(+), 126 deletions(-) create mode 100644 core/src/scheduler/CircleQueue.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 74a1951142..bcb3f5b70f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ Please mark all change in change log and use the ticket from JIRA. - \#92 - Speed up CMake build process ## Feature +- \#115 - Using new structure for tasktable + ## Task # Milvus 0.5.0 (2019-10-21) diff --git a/core/src/scheduler/BuildMgr.h b/core/src/scheduler/BuildMgr.h index ee7ab38e25..805c01aafd 100644 --- a/core/src/scheduler/BuildMgr.h +++ b/core/src/scheduler/BuildMgr.h @@ -34,27 +34,30 @@ namespace scheduler { class BuildMgr { public: - explicit BuildMgr(int64_t numoftasks) : numoftasks_(numoftasks) { + explicit BuildMgr(int64_t concurrent_limit) : available_(concurrent_limit) { } public: void Put() { - ++numoftasks_; + std::lock_guard lock(mutex_); + ++available_; } - void - take() { - --numoftasks_; - } - - int64_t - numoftasks() { - return (int64_t)numoftasks_; + bool + Take() { + std::lock_guard lock(mutex_); + if (available_ < 1) { + return false; + } else { + --available_; + return true; + } } private: - std::atomic_long numoftasks_; + std::int64_t available_; + std::mutex mutex_; }; using BuildMgrPtr = std::shared_ptr; diff --git a/core/src/scheduler/CircleQueue.h b/core/src/scheduler/CircleQueue.h new file mode 100644 index 0000000000..5da9338ba5 --- /dev/null +++ b/core/src/scheduler/CircleQueue.h @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace milvus { +namespace scheduler { + +template +class CircleQueue { + using value_type = T; + using atomic_size_type = std::atomic_ullong; + using size_type = uint64_t; + using const_reference = const value_type&; +#define MEMORY_ORDER (std::memory_order::memory_order_seq_cst) + + public: + explicit CircleQueue(size_type cap) : data_(cap, nullptr), capacity_(cap), front_() { + front_.store(cap - 1, MEMORY_ORDER); + } + + CircleQueue() = delete; + CircleQueue(const CircleQueue& q) = delete; + CircleQueue(CircleQueue&& q) = delete; + + public: + const_reference operator[](size_type n) { + return data_[n % capacity_]; + } + + size_type + front() { + return front_.load(MEMORY_ORDER); + } + + size_type + rear() { + return rear_; + } + + size_type + size() { + return size_; + } + + size_type + capacity() { + return capacity_; + } + + void + set_front(uint64_t last_finish) { + if (last_finish == rear_) { + throw; + } + front_.store(last_finish % capacity_, MEMORY_ORDER); + } + + void + put(const value_type& x) { + if ((rear_) % capacity_ == front_.load(MEMORY_ORDER)) { + throw; + } + data_[rear_] = x; + rear_ = ++rear_ % capacity_; + if (size_ < capacity_) { + ++size_; + } + } + + void + put(value_type&& x) { + if ((rear_) % capacity_ == front_.load(MEMORY_ORDER)) { + throw; + } + data_[rear_] = std::move(x); + rear_ = ++rear_ % capacity_; + if (size_ < capacity_) { + ++size_; + } + } + + private: + std::vector data_; + size_type capacity_; + atomic_size_type front_; + size_type rear_ = 0; + size_type size_ = 0; +#undef MEMORY_ORDER +}; + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/TaskTable.cpp b/core/src/scheduler/TaskTable.cpp index d0e6c1c38b..e35c7cd255 100644 --- a/core/src/scheduler/TaskTable.cpp +++ b/core/src/scheduler/TaskTable.cpp @@ -20,6 +20,7 @@ #include "event/TaskTableUpdatedEvent.h" #include "scheduler/SchedInst.h" #include "utils/Log.h" +#include "utils/TimeRecorder.h" #include #include @@ -153,7 +154,42 @@ TaskTableItem::Dump() const { std::vector TaskTable::PickToLoad(uint64_t limit) { - std::lock_guard lock(mutex_); +#if 1 + TimeRecorder rc(""); + std::vector indexes; + bool cross = false; + + uint64_t available_begin = table_.front() + 1; + for (uint64_t i = 0, loaded_count = 0, pick_count = 0; i < table_.size() && pick_count < limit; ++i) { + auto index = available_begin + i; + if (not table_[index]) + break; + if (index % table_.capacity() == table_.rear()) + break; + if (not cross && table_[index]->IsFinish()) { + table_.set_front(index); + } else if (table_[index]->state == TaskTableItemState::LOADED) { + cross = true; + ++loaded_count; + if (loaded_count > 2) + return std::vector(); + } else if (table_[index]->state == TaskTableItemState::START) { + auto task = table_[index]->task; + + // if task is a build index task, limit it + if (task->Type() == TaskType::BuildIndexTask && task->path().Current() == "cpu") { + if (not BuildMgrInst::GetInstance()->Take()) { + continue; + } + } + cross = true; + indexes.push_back(index); + ++pick_count; + } + } + rc.ElapseFromBegin("PickToLoad "); + return indexes; +#else size_t count = 0; for (uint64_t j = last_finish_ + 1; j < table_.size(); ++j) { if (not table_[j]) { @@ -197,34 +233,44 @@ TaskTable::PickToLoad(uint64_t limit) { } } return indexes; +#endif } std::vector TaskTable::PickToExecute(uint64_t limit) { - std::lock_guard lock(mutex_); + TimeRecorder rc(""); std::vector indexes; bool cross = false; - for (uint64_t i = last_finish_ + 1, count = 0; i < table_.size() && count < limit; ++i) { - if (not cross && table_[i]->IsFinish()) { - last_finish_ = i; - } else if (table_[i]->state == TaskTableItemState::LOADED) { + uint64_t available_begin = table_.front() + 1; + for (uint64_t i = 0, pick_count = 0; i < table_.size() && pick_count < limit; ++i) { + uint64_t index = available_begin + i; + if (not table_[index]) { + break; + } + if (index % table_.capacity() == table_.rear()) { + break; + } + + if (not cross && table_[index]->IsFinish()) { + table_.set_front(index); + } else if (table_[index]->state == TaskTableItemState::LOADED) { cross = true; - indexes.push_back(i); - ++count; + indexes.push_back(index); + ++pick_count; } } + rc.ElapseFromBegin("PickToExecute "); return indexes; } void TaskTable::Put(TaskPtr task) { - std::lock_guard lock(mutex_); auto item = std::make_shared(); item->id = id_++; item->task = std::move(task); item->state = TaskTableItemState::START; item->timestamp.start = get_current_timestamp(); - table_.push_back(item); + table_.put(std::move(item)); if (subscriber_) { subscriber_(); } @@ -232,14 +278,13 @@ TaskTable::Put(TaskPtr task) { void TaskTable::Put(std::vector& tasks) { - std::lock_guard lock(mutex_); for (auto& task : tasks) { auto item = std::make_shared(); item->id = id_++; item->task = std::move(task); item->state = TaskTableItemState::START; item->timestamp.start = get_current_timestamp(); - table_.push_back(item); + table_.put(std::move(item)); } if (subscriber_) { subscriber_(); @@ -248,26 +293,25 @@ TaskTable::Put(std::vector& tasks) { TaskTableItemPtr TaskTable::Get(uint64_t index) { - std::lock_guard lock(mutex_); return table_[index]; } -// void -// TaskTable::Clear() { -//// find first task is NOT (done or moved), erase from begin to it; -//// auto iterator = table_.begin(); -//// while (iterator->state == TaskTableItemState::EXECUTED or -//// iterator->state == TaskTableItemState::MOVED) -//// iterator++; -//// table_.erase(table_.begin(), iterator); -//} +size_t +TaskTable::TaskToExecute() { + size_t count = 0; + auto begin = table_.front() + 1; + for (size_t i = 0; i < table_.size(); ++i) { + auto index = begin + i; + if (table_[index]->state == TaskTableItemState::LOADED) { + ++count; + } + } + return count; +} json TaskTable::Dump() const { - json ret; - for (auto& item : table_) { - ret.push_back(item->Dump()); - } + json ret{{"error.message", "not support yet."}}; return ret; } diff --git a/core/src/scheduler/TaskTable.h b/core/src/scheduler/TaskTable.h index a9d00043c2..052be66890 100644 --- a/core/src/scheduler/TaskTable.h +++ b/core/src/scheduler/TaskTable.h @@ -25,6 +25,7 @@ #include #include +#include "CircleQueue.h" #include "event/Event.h" #include "interface/interfaces.h" #include "task/SearchTask.h" @@ -99,7 +100,8 @@ using TaskTableItemPtr = std::shared_ptr; class TaskTable : public interface::dumpable { public: - TaskTable() = default; + TaskTable() : table_(1ULL << 16ULL) { + } TaskTable(const TaskTable&) = delete; TaskTable(TaskTable&&) = delete; @@ -128,20 +130,9 @@ class TaskTable : public interface::dumpable { TaskTableItemPtr Get(uint64_t index); - /* - * TODO(wxyu): BIG GC - * Remove sequence task which is DONE or MOVED from front; - * Called by ? - */ - // void - // Clear(); - - /* - * Return true if task table empty, otherwise false; - */ - inline bool - Empty() { - return table_.empty(); + inline size_t + Capacity() { + return table_.capacity(); } /* @@ -152,22 +143,14 @@ class TaskTable : public interface::dumpable { return table_.size(); } + size_t + TaskToExecute(); + public: - TaskTableItemPtr& operator[](uint64_t index) { - std::lock_guard lock(mutex_); + const TaskTableItemPtr& operator[](uint64_t index) { return table_[index]; } - std::deque::iterator - begin() { - return table_.begin(); - } - - std::deque::iterator - end() { - return table_.end(); - } - public: std::vector PickToLoad(uint64_t limit); @@ -249,8 +232,7 @@ class TaskTable : public interface::dumpable { private: std::uint64_t id_ = 0; - mutable std::mutex mutex_; - std::deque table_; + CircleQueue table_; std::function subscriber_ = nullptr; // cache last finish avoid Pick task from begin always diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index 1cd4cde609..2577617dab 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -123,12 +123,7 @@ Resource::Dump() const { uint64_t Resource::NumOfTaskToExec() { - uint64_t count = 0; - for (auto& task : task_table_) { - if (task->state == TaskTableItemState::LOADED) - ++count; - } - return count; + return task_table_.TaskToExecute(); } TaskTableItemPtr diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index e717e40285..97aa1dce66 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. - +#include #include "scheduler/TaskTable.h" #include "scheduler/task/TestTask.h" #include "scheduler/tasklabel/DefaultLabel.h" -#include /************ TaskTableBaseTest ************/ @@ -28,15 +27,11 @@ class TaskTableItemTest : public ::testing::Test { void SetUp() override { std::vector states{ - milvus::scheduler::TaskTableItemState::INVALID, - milvus::scheduler::TaskTableItemState::START, - milvus::scheduler::TaskTableItemState::LOADING, - milvus::scheduler::TaskTableItemState::LOADED, - milvus::scheduler::TaskTableItemState::EXECUTING, - milvus::scheduler::TaskTableItemState::EXECUTED, - milvus::scheduler::TaskTableItemState::MOVING, - milvus::scheduler::TaskTableItemState::MOVED}; - for (auto &state : states) { + milvus::scheduler::TaskTableItemState::INVALID, milvus::scheduler::TaskTableItemState::START, + milvus::scheduler::TaskTableItemState::LOADING, milvus::scheduler::TaskTableItemState::LOADED, + milvus::scheduler::TaskTableItemState::EXECUTING, milvus::scheduler::TaskTableItemState::EXECUTED, + milvus::scheduler::TaskTableItemState::MOVING, milvus::scheduler::TaskTableItemState::MOVED}; + for (auto& state : states) { auto item = std::make_shared(); item->state = state; items_.emplace_back(item); @@ -59,9 +54,9 @@ TEST_F(TaskTableItemTest, DESTRUCT) { } TEST_F(TaskTableItemTest, IS_FINISH) { - for (auto &item : items_) { - if (item->state == milvus::scheduler::TaskTableItemState::EXECUTED - || item->state == milvus::scheduler::TaskTableItemState::MOVED) { + for (auto& item : items_) { + if (item->state == milvus::scheduler::TaskTableItemState::EXECUTED || + item->state == milvus::scheduler::TaskTableItemState::MOVED) { ASSERT_TRUE(item->IsFinish()); } else { ASSERT_FALSE(item->IsFinish()); @@ -70,13 +65,13 @@ TEST_F(TaskTableItemTest, IS_FINISH) { } TEST_F(TaskTableItemTest, DUMP) { - for (auto &item : items_) { + for (auto& item : items_) { ASSERT_FALSE(item->Dump().empty()); } } TEST_F(TaskTableItemTest, LOAD) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Load(); if (before_state == milvus::scheduler::TaskTableItemState::START) { @@ -90,7 +85,7 @@ TEST_F(TaskTableItemTest, LOAD) { } TEST_F(TaskTableItemTest, LOADED) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Loaded(); if (before_state == milvus::scheduler::TaskTableItemState::LOADING) { @@ -104,7 +99,7 @@ TEST_F(TaskTableItemTest, LOADED) { } TEST_F(TaskTableItemTest, EXECUTE) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Execute(); if (before_state == milvus::scheduler::TaskTableItemState::LOADED) { @@ -118,7 +113,7 @@ TEST_F(TaskTableItemTest, EXECUTE) { } TEST_F(TaskTableItemTest, EXECUTED) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Executed(); if (before_state == milvus::scheduler::TaskTableItemState::EXECUTING) { @@ -132,7 +127,7 @@ TEST_F(TaskTableItemTest, EXECUTED) { } TEST_F(TaskTableItemTest, MOVE) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Move(); if (before_state == milvus::scheduler::TaskTableItemState::LOADED) { @@ -146,7 +141,7 @@ TEST_F(TaskTableItemTest, MOVE) { } TEST_F(TaskTableItemTest, MOVED) { - for (auto &item : items_) { + for (auto& item : items_) { auto before_state = item->state; auto ret = item->Moved(); if (before_state == milvus::scheduler::TaskTableItemState::MOVING) { @@ -180,9 +175,7 @@ class TaskTableBaseTest : public ::testing::Test { TEST_F(TaskTableBaseTest, SUBSCRIBER) { bool flag = false; - auto callback = [&]() { - flag = true; - }; + auto callback = [&]() { flag = true; }; empty_table_.RegisterSubscriber(callback); empty_table_.Put(task1_); ASSERT_TRUE(flag); @@ -210,12 +203,6 @@ TEST_F(TaskTableBaseTest, PUT_EMPTY_BATCH) { empty_table_.Put(tasks); } -TEST_F(TaskTableBaseTest, EMPTY) { - ASSERT_TRUE(empty_table_.Empty()); - empty_table_.Put(task1_); - ASSERT_FALSE(empty_table_.Empty()); -} - TEST_F(TaskTableBaseTest, SIZE) { ASSERT_EQ(empty_table_.Size(), 0); empty_table_.Put(task1_); @@ -237,7 +224,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD) { auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { @@ -250,9 +237,9 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { auto indexes = empty_table_.PickToLoad(3); ASSERT_EQ(indexes.size(), 3); - ASSERT_EQ(indexes[0], 2); - ASSERT_EQ(indexes[1], 3); - ASSERT_EQ(indexes[2], 4); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[1]% empty_table_.Capacity(), 3); + ASSERT_EQ(indexes[2]% empty_table_.Capacity(), 4); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { @@ -266,14 +253,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { @@ -287,7 +274,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { @@ -302,8 +289,8 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { auto indexes = empty_table_.PickToExecute(3); ASSERT_EQ(indexes.size(), 2); - ASSERT_EQ(indexes[0], 2); - ASSERT_EQ(indexes[1], 3); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { @@ -318,14 +305,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0], 2); + ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); } /************ TaskTableAdvanceTest ************/ @@ -356,8 +343,8 @@ class TaskTableAdvanceTest : public ::testing::Test { TEST_F(TaskTableAdvanceTest, LOAD) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -375,8 +362,8 @@ TEST_F(TaskTableAdvanceTest, LOAD) { TEST_F(TaskTableAdvanceTest, LOADED) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -394,8 +381,8 @@ TEST_F(TaskTableAdvanceTest, LOADED) { TEST_F(TaskTableAdvanceTest, EXECUTE) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -413,8 +400,8 @@ TEST_F(TaskTableAdvanceTest, EXECUTE) { TEST_F(TaskTableAdvanceTest, EXECUTED) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -432,8 +419,8 @@ TEST_F(TaskTableAdvanceTest, EXECUTED) { TEST_F(TaskTableAdvanceTest, MOVE) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -451,8 +438,8 @@ TEST_F(TaskTableAdvanceTest, MOVE) { TEST_F(TaskTableAdvanceTest, MOVED) { std::vector before_state; - for (auto &task : table1_) { - before_state.push_back(task->state); + for (size_t i = 0; i < table1_.Size(); ++i) { + before_state.push_back(table1_[i]->state); } for (size_t i = 0; i < table1_.Size(); ++i) { @@ -467,4 +454,3 @@ TEST_F(TaskTableAdvanceTest, MOVED) { } } } - From 37e4b0a93474132538866391254a56837dbf186e Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 16:35:53 +0800 Subject: [PATCH 66/89] Using new structure for tasktable Former-commit-id: 80376dce0fd24c8c541c05363d702941f69dca0e --- core/unittest/scheduler/test_tasktable.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index 97aa1dce66..54f872c2fc 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -238,8 +238,8 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { auto indexes = empty_table_.PickToLoad(3); ASSERT_EQ(indexes.size(), 3); ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); - ASSERT_EQ(indexes[1]% empty_table_.Capacity(), 3); - ASSERT_EQ(indexes[2]% empty_table_.Capacity(), 4); + ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); + ASSERT_EQ(indexes[2] % empty_table_.Capacity(), 4); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { From a10f83c69c87f72a745aafc033f291f2740badf6 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 17:12:51 +0800 Subject: [PATCH 67/89] [skip ci] Typo change Former-commit-id: 0438b3ee8770228d39d0146e35b154a88381bf1f --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 43f117ff92..9ff0d4a3ae 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ ## What is Milvus -Milvus is an open source similarity search engine for massive feature vectors. Designed with heterogeneous computing architecture for the best cost efficiency. Searches over billion-scale vectors take only milliseconds with minimum computing resources. +Milvus is an open source similarity search engine for massive-scale feature vectors. Built with heterogeneous computing architecture for the best cost efficiency. Searches over billion-scale vectors take only milliseconds with minimum computing resources. Milvus provides stable Python, Java and C++ APIs. @@ -28,7 +28,7 @@ Keep up-to-date with newest releases and latest updates by reading Milvus [relea - Heterogeneous computing - Milvus is designed with heterogeneous computing architecture for the best performance and cost efficiency. + Milvus is built with heterogeneous computing architecture for the best performance and cost efficiency. - Multiple indexes @@ -64,14 +64,14 @@ Keep up-to-date with newest releases and latest updates by reading Milvus [relea ## Get started -### Hardware Requirements +### Hardware requirements | Component | Recommended configuration | | --------- | ----------------------------------- | | CPU | Intel CPU Haswell or higher | | GPU | NVIDIA Pascal series or higher | -| Memory | 8 GB or more (depends on data size) | -| Storage | SATA 3.0 SSD or higher | +| RAM | 8 GB or more (depends on data size) | +| Hard drive| SATA 3.0 SSD or higher | ### Install using docker @@ -185,7 +185,7 @@ Deep thanks and appreciation go to the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. -## Milvus Roadmap +## Milvus roadmap Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. From 5e504b343540ca549d21f9506bfcc0e0edaec817 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 17:27:49 +0800 Subject: [PATCH 68/89] rename functions tasktable, make it accessing likes standard structure Former-commit-id: c0ba41635e710e0807af0fe07d0b6a266f60d044 --- core/src/scheduler/TaskTable.cpp | 5 - core/src/scheduler/TaskTable.h | 57 +++++------ core/src/scheduler/resource/Resource.cpp | 4 +- core/unittest/scheduler/test_scheduler.cpp | 2 +- core/unittest/scheduler/test_tasktable.cpp | 112 ++++++++++----------- 5 files changed, 84 insertions(+), 96 deletions(-) diff --git a/core/src/scheduler/TaskTable.cpp b/core/src/scheduler/TaskTable.cpp index e35c7cd255..bd3dd466a9 100644 --- a/core/src/scheduler/TaskTable.cpp +++ b/core/src/scheduler/TaskTable.cpp @@ -291,11 +291,6 @@ TaskTable::Put(std::vector& tasks) { } } -TaskTableItemPtr -TaskTable::Get(uint64_t index) { - return table_[index]; -} - size_t TaskTable::TaskToExecute() { size_t count = 0; diff --git a/core/src/scheduler/TaskTable.h b/core/src/scheduler/TaskTable.h index 052be66890..898141d028 100644 --- a/core/src/scheduler/TaskTable.h +++ b/core/src/scheduler/TaskTable.h @@ -106,6 +106,11 @@ class TaskTable : public interface::dumpable { TaskTable(const TaskTable&) = delete; TaskTable(TaskTable&&) = delete; + public: + json + Dump() const override; + + public: inline void RegisterSubscriber(std::function subscriber) { subscriber_ = std::move(subscriber); @@ -124,40 +129,35 @@ class TaskTable : public interface::dumpable { void Put(std::vector& tasks); - /* - * Return task table item reference; - */ - TaskTableItemPtr - Get(uint64_t index); - - inline size_t - Capacity() { - return table_.capacity(); - } - - /* - * Return size of task table; - */ - inline size_t - Size() { - return table_.size(); - } - size_t TaskToExecute(); - public: - const TaskTableItemPtr& operator[](uint64_t index) { - return table_[index]; - } - - public: std::vector PickToLoad(uint64_t limit); std::vector PickToExecute(uint64_t limit); + public: + inline const TaskTableItemPtr& operator[](uint64_t index) { + return table_[index]; + } + + inline const TaskTableItemPtr& + at(uint64_t index) { + return table_[index]; + } + + inline size_t + capacity() { + return table_.capacity(); + } + + inline size_t + size() { + return table_.size(); + } + public: /******** Action ********/ @@ -223,13 +223,6 @@ class TaskTable : public interface::dumpable { return table_[index]->Moved(); } - public: - /* - * Dump; - */ - json - Dump() const override; - private: std::uint64_t id_ = 0; CircleQueue table_; diff --git a/core/src/scheduler/resource/Resource.cpp b/core/src/scheduler/resource/Resource.cpp index 2577617dab..8e10592262 100644 --- a/core/src/scheduler/resource/Resource.cpp +++ b/core/src/scheduler/resource/Resource.cpp @@ -132,7 +132,7 @@ Resource::pick_task_load() { for (auto index : indexes) { // try to set one task loading, then return if (task_table_.Load(index)) - return task_table_.Get(index); + return task_table_.at(index); // else try next } return nullptr; @@ -150,7 +150,7 @@ Resource::pick_task_execute() { } if (task_table_.Execute(index)) { - return task_table_.Get(index); + return task_table_.at(index); } // if (task_table_[index]->task->label()->Type() == TaskLabelType::SPECIFIED_RESOURCE) { // if (task_table_.Get(index)->task->path().Current() == task_table_.Get(index)->task->path().Last() diff --git a/core/unittest/scheduler/test_scheduler.cpp b/core/unittest/scheduler/test_scheduler.cpp index aebdfa2af2..b418b7c80e 100644 --- a/core/unittest/scheduler/test_scheduler.cpp +++ b/core/unittest/scheduler/test_scheduler.cpp @@ -165,7 +165,7 @@ TEST_F(SchedulerTest, ON_LOAD_COMPLETED) { } sleep(3); - ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().Size(), NUM); + ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().size(), NUM); } TEST_F(SchedulerTest, PUSH_TASK_TO_NEIGHBOUR_RANDOMLY_TEST) { diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index 54f872c2fc..601bd2431d 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -183,19 +183,19 @@ TEST_F(TaskTableBaseTest, SUBSCRIBER) { TEST_F(TaskTableBaseTest, PUT_TASK) { empty_table_.Put(task1_); - ASSERT_EQ(empty_table_.Get(0)->task, task1_); + ASSERT_EQ(empty_table_.at(0)->task, task1_); } TEST_F(TaskTableBaseTest, PUT_INVALID_TEST) { empty_table_.Put(invalid_task_); - ASSERT_EQ(empty_table_.Get(0)->task, invalid_task_); + ASSERT_EQ(empty_table_.at(0)->task, invalid_task_); } TEST_F(TaskTableBaseTest, PUT_BATCH) { std::vector tasks{task1_, task2_}; empty_table_.Put(tasks); - ASSERT_EQ(empty_table_.Get(0)->task, task1_); - ASSERT_EQ(empty_table_.Get(1)->task, task2_); + ASSERT_EQ(empty_table_.at(0)->task, task1_); + ASSERT_EQ(empty_table_.at(1)->task, task2_); } TEST_F(TaskTableBaseTest, PUT_EMPTY_BATCH) { @@ -204,14 +204,14 @@ TEST_F(TaskTableBaseTest, PUT_EMPTY_BATCH) { } TEST_F(TaskTableBaseTest, SIZE) { - ASSERT_EQ(empty_table_.Size(), 0); + ASSERT_EQ(empty_table_.size(), 0); empty_table_.Put(task1_); - ASSERT_EQ(empty_table_.Size(), 1); + ASSERT_EQ(empty_table_.size(), 1); } TEST_F(TaskTableBaseTest, OPERATOR) { empty_table_.Put(task1_); - ASSERT_EQ(empty_table_.Get(0), empty_table_[0]); + ASSERT_EQ(empty_table_.at(0), empty_table_[0]); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD) { @@ -224,7 +224,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD) { auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { @@ -237,9 +237,9 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_LIMIT) { auto indexes = empty_table_.PickToLoad(3); ASSERT_EQ(indexes.size(), 3); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); - ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); - ASSERT_EQ(indexes[2] % empty_table_.Capacity(), 4); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); + ASSERT_EQ(indexes[1] % empty_table_.capacity(), 3); + ASSERT_EQ(indexes[2] % empty_table_.capacity(), 4); } TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { @@ -253,14 +253,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_LOAD_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToLoad(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { @@ -274,7 +274,7 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE) { auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { @@ -289,8 +289,8 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_LIMIT) { auto indexes = empty_table_.PickToExecute(3); ASSERT_EQ(indexes.size(), 2); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); - ASSERT_EQ(indexes[1] % empty_table_.Capacity(), 3); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); + ASSERT_EQ(indexes[1] % empty_table_.capacity(), 3); } TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { @@ -305,14 +305,14 @@ TEST_F(TaskTableBaseTest, PICK_TO_EXECUTE_CACHE) { // first pick, non-cache auto indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); // second pick, iterate from 2 // invalid state change empty_table_[1]->state = milvus::scheduler::TaskTableItemState::START; indexes = empty_table_.PickToExecute(1); ASSERT_EQ(indexes.size(), 1); - ASSERT_EQ(indexes[0] % empty_table_.Capacity(), 2); + ASSERT_EQ(indexes[0] % empty_table_.capacity(), 2); } /************ TaskTableAdvanceTest ************/ @@ -328,14 +328,14 @@ class TaskTableAdvanceTest : public ::testing::Test { table1_.Put(task); } - table1_.Get(0)->state = milvus::scheduler::TaskTableItemState::INVALID; - table1_.Get(1)->state = milvus::scheduler::TaskTableItemState::START; - table1_.Get(2)->state = milvus::scheduler::TaskTableItemState::LOADING; - table1_.Get(3)->state = milvus::scheduler::TaskTableItemState::LOADED; - table1_.Get(4)->state = milvus::scheduler::TaskTableItemState::EXECUTING; - table1_.Get(5)->state = milvus::scheduler::TaskTableItemState::EXECUTED; - table1_.Get(6)->state = milvus::scheduler::TaskTableItemState::MOVING; - table1_.Get(7)->state = milvus::scheduler::TaskTableItemState::MOVED; + table1_.at(0)->state = milvus::scheduler::TaskTableItemState::INVALID; + table1_.at(1)->state = milvus::scheduler::TaskTableItemState::START; + table1_.at(2)->state = milvus::scheduler::TaskTableItemState::LOADING; + table1_.at(3)->state = milvus::scheduler::TaskTableItemState::LOADED; + table1_.at(4)->state = milvus::scheduler::TaskTableItemState::EXECUTING; + table1_.at(5)->state = milvus::scheduler::TaskTableItemState::EXECUTED; + table1_.at(6)->state = milvus::scheduler::TaskTableItemState::MOVING; + table1_.at(7)->state = milvus::scheduler::TaskTableItemState::MOVED; } milvus::scheduler::TaskTable table1_; @@ -343,114 +343,114 @@ class TaskTableAdvanceTest : public ::testing::Test { TEST_F(TaskTableAdvanceTest, LOAD) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Load(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::START) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::LOADING); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::LOADING); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, LOADED) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Loaded(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::LOADING) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::LOADED); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::LOADED); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, EXECUTE) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Execute(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::LOADED) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::EXECUTING); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::EXECUTING); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, EXECUTED) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Executed(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::EXECUTING) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::EXECUTED); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::EXECUTED); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, MOVE) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Move(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::LOADED) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::MOVING); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::MOVING); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } TEST_F(TaskTableAdvanceTest, MOVED) { std::vector before_state; - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { before_state.push_back(table1_[i]->state); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { table1_.Moved(i); } - for (size_t i = 0; i < table1_.Size(); ++i) { + for (size_t i = 0; i < table1_.size(); ++i) { if (before_state[i] == milvus::scheduler::TaskTableItemState::MOVING) { - ASSERT_EQ(table1_.Get(i)->state, milvus::scheduler::TaskTableItemState::MOVED); + ASSERT_EQ(table1_.at(i)->state, milvus::scheduler::TaskTableItemState::MOVED); } else { - ASSERT_EQ(table1_.Get(i)->state, before_state[i]); + ASSERT_EQ(table1_.at(i)->state, before_state[i]); } } } From baeff23c6bba1840ec9c62e1493a71e0bc53a5d0 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 17:32:49 +0800 Subject: [PATCH 69/89] [skip ci] minor change Former-commit-id: 5b3a820dc9d017c5dfcb8aa60ed7d3d92d5ef114 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ff0d4a3ae..3dfd95d2f4 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ To connect with other users and contributors, welcome to join our [slack channel ## Contributors -Deep thanks and appreciation go to the following people. +We greatly appreciate the help and contributions of the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. From a1d5ff06df232b9fdab143cedeac02505b177272 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Mon, 28 Oct 2019 17:48:00 +0800 Subject: [PATCH 70/89] [skip ci] minor change Former-commit-id: 10f0bf48d869fc193e33c1065f43d11c76560594 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3dfd95d2f4..5c5065442f 100644 --- a/README.md +++ b/README.md @@ -178,9 +178,9 @@ We use [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) to To connect with other users and contributors, welcome to join our [slack channel](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk). -## Contributors +## Thanks -We greatly appreciate the help and contributions of the following people. +We greatly appreciate the help of the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. From 53b3b60db2ed87a8c558ae893608fab0ffc18578 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 19:19:42 +0800 Subject: [PATCH 71/89] Using shared_ptr instead of weak_ptr to avoid performance loss Former-commit-id: 250cb7200b6eefdd9cbb9fd631379d59aca2f368 --- CHANGELOG.md | 1 + core/src/scheduler/Algorithm.cpp | 2 +- core/src/scheduler/Scheduler.cpp | 64 +++++++++---------- core/src/scheduler/Scheduler.h | 6 +- core/src/scheduler/action/Action.h | 5 +- .../scheduler/action/PushTaskToNeighbour.cpp | 22 +++---- core/src/scheduler/event/Event.h | 4 +- core/src/scheduler/event/FinishTaskEvent.h | 2 +- core/src/scheduler/event/LoadCompletedEvent.h | 2 +- core/src/scheduler/event/StartUpEvent.h | 2 +- .../scheduler/event/TaskTableUpdatedEvent.h | 2 +- core/src/scheduler/resource/Node.cpp | 4 +- core/src/scheduler/resource/Node.h | 8 ++- core/unittest/scheduler/test_event.cpp | 8 +-- core/unittest/scheduler/test_node.cpp | 16 ++--- 15 files changed, 75 insertions(+), 73 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcb3f5b70f..00402ea15f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#80 - Print version information into log during server start - \#82 - Move easyloggingpp into "external" directory - \#92 - Speed up CMake build process +- \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss ## Feature - \#115 - Using new structure for tasktable diff --git a/core/src/scheduler/Algorithm.cpp b/core/src/scheduler/Algorithm.cpp index b2156b3f97..fb1742e6e1 100644 --- a/core/src/scheduler/Algorithm.cpp +++ b/core/src/scheduler/Algorithm.cpp @@ -54,7 +54,7 @@ ShortestPath(const ResourcePtr& src, const ResourcePtr& dest, const ResourceMgrP auto cur_neighbours = cur_node->GetNeighbours(); for (auto& neighbour : cur_neighbours) { - auto neighbour_res = std::static_pointer_cast(neighbour.neighbour_node.lock()); + auto neighbour_res = std::static_pointer_cast(neighbour.neighbour_node); dis_matrix[name_id_map.at(res->name())][name_id_map.at(neighbour_res->name())] = neighbour.connection.transport_cost(); } diff --git a/core/src/scheduler/Scheduler.cpp b/core/src/scheduler/Scheduler.cpp index fef5cc1a95..cba847c25e 100644 --- a/core/src/scheduler/Scheduler.cpp +++ b/core/src/scheduler/Scheduler.cpp @@ -26,10 +26,8 @@ namespace milvus { namespace scheduler { -Scheduler::Scheduler(ResourceMgrWPtr res_mgr) : running_(false), res_mgr_(std::move(res_mgr)) { - if (auto mgr = res_mgr_.lock()) { - mgr->RegisterSubscriber(std::bind(&Scheduler::PostEvent, this, std::placeholders::_1)); - } +Scheduler::Scheduler(ResourceMgrPtr res_mgr) : running_(false), res_mgr_(std::move(res_mgr)) { + res_mgr_->RegisterSubscriber(std::bind(&Scheduler::PostEvent, this, std::placeholders::_1)); event_register_.insert(std::make_pair(static_cast(EventType::START_UP), std::bind(&Scheduler::OnStartUp, this, std::placeholders::_1))); event_register_.insert(std::make_pair(static_cast(EventType::LOAD_COMPLETED), @@ -40,6 +38,10 @@ Scheduler::Scheduler(ResourceMgrWPtr res_mgr) : running_(false), res_mgr_(std::m std::bind(&Scheduler::OnFinishTask, this, std::placeholders::_1))); } +Scheduler::~Scheduler() { + res_mgr_ = nullptr; +} + void Scheduler::Start() { running_ = true; @@ -100,51 +102,45 @@ Scheduler::Process(const EventPtr& event) { void Scheduler::OnLoadCompleted(const EventPtr& event) { auto load_completed_event = std::static_pointer_cast(event); - if (auto resource = event->resource_.lock()) { - resource->WakeupExecutor(); - auto task_table_type = load_completed_event->task_table_item_->task->label()->Type(); - switch (task_table_type) { - case TaskLabelType::DEFAULT: { - Action::DefaultLabelTaskScheduler(res_mgr_, resource, load_completed_event); - break; - } - case TaskLabelType::SPECIFIED_RESOURCE: { - Action::SpecifiedResourceLabelTaskScheduler(res_mgr_, resource, load_completed_event); - break; - } - case TaskLabelType::BROADCAST: { - if (resource->HasExecutor() == false) { - load_completed_event->task_table_item_->Move(); - } - Action::PushTaskToAllNeighbour(load_completed_event->task_table_item_->task, resource); - break; - } - default: { break; } + auto resource = event->resource_; + resource->WakeupExecutor(); + + auto task_table_type = load_completed_event->task_table_item_->task->label()->Type(); + switch (task_table_type) { + case TaskLabelType::DEFAULT: { + Action::DefaultLabelTaskScheduler(res_mgr_, resource, load_completed_event); + break; } - resource->WakeupLoader(); + case TaskLabelType::SPECIFIED_RESOURCE: { + Action::SpecifiedResourceLabelTaskScheduler(res_mgr_, resource, load_completed_event); + break; + } + case TaskLabelType::BROADCAST: { + if (resource->HasExecutor() == false) { + load_completed_event->task_table_item_->Move(); + } + Action::PushTaskToAllNeighbour(load_completed_event->task_table_item_->task, resource); + break; + } + default: { break; } } + resource->WakeupLoader(); } void Scheduler::OnStartUp(const EventPtr& event) { - if (auto resource = event->resource_.lock()) { - resource->WakeupLoader(); - } + event->resource_->WakeupLoader(); } void Scheduler::OnFinishTask(const EventPtr& event) { - if (auto resource = event->resource_.lock()) { - resource->WakeupLoader(); - } + event->resource_->WakeupLoader(); } void Scheduler::OnTaskTableUpdated(const EventPtr& event) { - if (auto resource = event->resource_.lock()) { - resource->WakeupLoader(); - } + event->resource_->WakeupLoader(); } } // namespace scheduler diff --git a/core/src/scheduler/Scheduler.h b/core/src/scheduler/Scheduler.h index 8d9ea83794..9e3a864774 100644 --- a/core/src/scheduler/Scheduler.h +++ b/core/src/scheduler/Scheduler.h @@ -34,7 +34,9 @@ namespace scheduler { class Scheduler : public interface::dumpable { public: - explicit Scheduler(ResourceMgrWPtr res_mgr); + explicit Scheduler(ResourceMgrPtr res_mgr); + + ~Scheduler(); Scheduler(const Scheduler&) = delete; Scheduler(Scheduler&&) = delete; @@ -118,7 +120,7 @@ class Scheduler : public interface::dumpable { std::unordered_map> event_register_; - ResourceMgrWPtr res_mgr_; + ResourceMgrPtr res_mgr_; std::queue event_queue_; std::thread worker_thread_; std::mutex event_mutex_; diff --git a/core/src/scheduler/action/Action.h b/core/src/scheduler/action/Action.h index 51c788f82f..ff72910055 100644 --- a/core/src/scheduler/action/Action.h +++ b/core/src/scheduler/action/Action.h @@ -37,10 +37,11 @@ class Action { PushTaskToResource(const TaskPtr& task, const ResourcePtr& dest); static void - DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, std::shared_ptr event); + DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, + std::shared_ptr event); static void - SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, + SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event); }; diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index c64e81dcfa..6f74849eac 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -30,7 +30,7 @@ std::vector get_neighbours(const ResourcePtr& self) { std::vector neighbours; for (auto& neighbour_node : self->GetNeighbours()) { - auto node = neighbour_node.neighbour_node.lock(); + auto node = neighbour_node.neighbour_node; if (not node) continue; @@ -46,7 +46,7 @@ std::vector> get_neighbours_with_connetion(const ResourcePtr& self) { std::vector> neighbours; for (auto& neighbour_node : self->GetNeighbours()) { - auto node = neighbour_node.neighbour_node.lock(); + auto node = neighbour_node.neighbour_node; if (not node) continue; @@ -102,7 +102,7 @@ Action::PushTaskToResource(const TaskPtr& task, const ResourcePtr& dest) { } void -Action::DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, +Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { if (not resource->HasExecutor() && event->task_table_item_->Move()) { auto task = event->task_table_item_->task; @@ -114,11 +114,11 @@ Action::DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, if (auto index_engine = search_task->index_engine_) { auto location = index_engine->GetLocation(); - for (auto i = 0; i < res_mgr.lock()->GetNumGpuResource(); ++i) { + for (auto i = 0; i < res_mgr->GetNumGpuResource(); ++i) { auto index = milvus::cache::GpuCacheMgr::GetInstance(i)->GetIndex(location); if (index != nullptr) { moved = true; - auto dest_resource = res_mgr.lock()->GetResource(ResourceType::GPU, i); + auto dest_resource = res_mgr->GetResource(ResourceType::GPU, i); PushTaskToResource(event->task_table_item_->task, dest_resource); break; } @@ -133,17 +133,17 @@ Action::DefaultLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, } void -Action::SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource, +Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { auto task = event->task_table_item_->task; if (resource->type() == ResourceType::DISK) { // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr.lock()->GetComputeResources(); + auto compute_resources = res_mgr->GetComputeResources(); std::vector> paths; std::vector transport_costs; for (auto& res : compute_resources) { std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr.lock(), path); + uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); transport_costs.push_back(transport_cost); paths.emplace_back(path); } @@ -187,10 +187,10 @@ Action::SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); bool find_gpu_res = false; - if (res_mgr.lock()->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { + if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { for (uint64_t i = 0; i < compute_resources.size(); ++i) { if (compute_resources[i]->name() == - res_mgr.lock()->GetResource(ResourceType::GPU, build_index_gpu)->name()) { + res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { find_gpu_res = true; Path task_path(paths[i], paths[i].size() - 1); task->path() = task_path; @@ -208,7 +208,7 @@ Action::SpecifiedResourceLabelTaskScheduler(ResourceMgrWPtr res_mgr, ResourcePtr resource->WakeupExecutor(); } else { auto next_res_name = task->path().Next(); - auto next_res = res_mgr.lock()->GetResource(next_res_name); + auto next_res = res_mgr->GetResource(next_res_name); // if (event->task_table_item_->Move()) { // next_res->task_table().Put(task); // } diff --git a/core/src/scheduler/event/Event.h b/core/src/scheduler/event/Event.h index 5b1f37fb99..3c29e02225 100644 --- a/core/src/scheduler/event/Event.h +++ b/core/src/scheduler/event/Event.h @@ -30,7 +30,7 @@ class Resource; class Event { public: - explicit Event(EventType type, std::weak_ptr resource) : type_(type), resource_(std::move(resource)) { + explicit Event(EventType type, std::shared_ptr resource) : type_(type), resource_(std::move(resource)) { } inline EventType @@ -46,7 +46,7 @@ class Event { public: EventType type_; - std::weak_ptr resource_; + std::shared_ptr resource_; }; using EventPtr = std::shared_ptr; diff --git a/core/src/scheduler/event/FinishTaskEvent.h b/core/src/scheduler/event/FinishTaskEvent.h index 1b2d8f9818..afaf02de92 100644 --- a/core/src/scheduler/event/FinishTaskEvent.h +++ b/core/src/scheduler/event/FinishTaskEvent.h @@ -29,7 +29,7 @@ namespace scheduler { class FinishTaskEvent : public Event { public: - FinishTaskEvent(std::weak_ptr resource, TaskTableItemPtr task_table_item) + FinishTaskEvent(std::shared_ptr resource, TaskTableItemPtr task_table_item) : Event(EventType::FINISH_TASK, std::move(resource)), task_table_item_(std::move(task_table_item)) { } diff --git a/core/src/scheduler/event/LoadCompletedEvent.h b/core/src/scheduler/event/LoadCompletedEvent.h index 5a701e0dfc..0aa3bf79d6 100644 --- a/core/src/scheduler/event/LoadCompletedEvent.h +++ b/core/src/scheduler/event/LoadCompletedEvent.h @@ -29,7 +29,7 @@ namespace scheduler { class LoadCompletedEvent : public Event { public: - LoadCompletedEvent(std::weak_ptr resource, TaskTableItemPtr task_table_item) + LoadCompletedEvent(std::shared_ptr resource, TaskTableItemPtr task_table_item) : Event(EventType::LOAD_COMPLETED, std::move(resource)), task_table_item_(std::move(task_table_item)) { } diff --git a/core/src/scheduler/event/StartUpEvent.h b/core/src/scheduler/event/StartUpEvent.h index c4abb4e27c..2d8292ea70 100644 --- a/core/src/scheduler/event/StartUpEvent.h +++ b/core/src/scheduler/event/StartUpEvent.h @@ -28,7 +28,7 @@ namespace scheduler { class StartUpEvent : public Event { public: - explicit StartUpEvent(std::weak_ptr resource) : Event(EventType::START_UP, std::move(resource)) { + explicit StartUpEvent(std::shared_ptr resource) : Event(EventType::START_UP, std::move(resource)) { } inline std::string diff --git a/core/src/scheduler/event/TaskTableUpdatedEvent.h b/core/src/scheduler/event/TaskTableUpdatedEvent.h index ed64a42d89..9be27e69b6 100644 --- a/core/src/scheduler/event/TaskTableUpdatedEvent.h +++ b/core/src/scheduler/event/TaskTableUpdatedEvent.h @@ -28,7 +28,7 @@ namespace scheduler { class TaskTableUpdatedEvent : public Event { public: - explicit TaskTableUpdatedEvent(std::weak_ptr resource) + explicit TaskTableUpdatedEvent(std::shared_ptr resource) : Event(EventType::TASK_TABLE_UPDATED, std::move(resource)) { } diff --git a/core/src/scheduler/resource/Node.cpp b/core/src/scheduler/resource/Node.cpp index dcf03a321c..bc0e559175 100644 --- a/core/src/scheduler/resource/Node.cpp +++ b/core/src/scheduler/resource/Node.cpp @@ -58,9 +58,7 @@ Node::Dump() const { void Node::AddNeighbour(const NeighbourNodePtr& neighbour_node, Connection& connection) { std::lock_guard lk(mutex_); - if (auto s = neighbour_node.lock()) { - neighbours_.emplace(std::make_pair(s->id_, Neighbour(neighbour_node, connection))); - } + neighbours_.emplace(std::make_pair(neighbour_node->id_, Neighbour(neighbour_node, connection))); // else do nothing, consider it.. } diff --git a/core/src/scheduler/resource/Node.h b/core/src/scheduler/resource/Node.h index 4539c8c86a..53323fe6e2 100644 --- a/core/src/scheduler/resource/Node.h +++ b/core/src/scheduler/resource/Node.h @@ -31,10 +31,14 @@ namespace scheduler { class Node; -using NeighbourNodePtr = std::weak_ptr; +using NeighbourNodePtr = std::shared_ptr; struct Neighbour { - Neighbour(NeighbourNodePtr nei, Connection conn) : neighbour_node(nei), connection(conn) { + Neighbour(NeighbourNodePtr nei, Connection conn) : neighbour_node(std::move(nei)), connection(std::move(conn)) { + } + + ~Neighbour() { + neighbour_node = nullptr; } NeighbourNodePtr neighbour_node; diff --git a/core/unittest/scheduler/test_event.cpp b/core/unittest/scheduler/test_event.cpp index 07d51e8557..cf627a5d79 100644 --- a/core/unittest/scheduler/test_event.cpp +++ b/core/unittest/scheduler/test_event.cpp @@ -28,7 +28,7 @@ namespace milvus { namespace scheduler { TEST(EventTest, START_UP_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; @@ -36,7 +36,7 @@ TEST(EventTest, START_UP_EVENT) { } TEST(EventTest, LOAD_COMPLETED_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res, nullptr); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; @@ -44,7 +44,7 @@ TEST(EventTest, LOAD_COMPLETED_EVENT) { } TEST(EventTest, FINISH_TASK_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res, nullptr); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; @@ -53,7 +53,7 @@ TEST(EventTest, FINISH_TASK_EVENT) { TEST(EventTest, TASKTABLE_UPDATED_EVENT) { - ResourceWPtr res(ResourcePtr(nullptr)); + ResourcePtr res(nullptr); auto event = std::make_shared(res); ASSERT_FALSE(event->Dump().empty()); std::cout << *event; diff --git a/core/unittest/scheduler/test_node.cpp b/core/unittest/scheduler/test_node.cpp index 9b34b73191..d2c93971ac 100644 --- a/core/unittest/scheduler/test_node.cpp +++ b/core/unittest/scheduler/test_node.cpp @@ -15,15 +15,14 @@ // specific language governing permissions and limitations // under the License. - -#include "scheduler/resource/Node.h" #include +#include "scheduler/resource/Node.h" namespace { namespace ms = milvus::scheduler; -} // namespace +} // namespace class NodeTest : public ::testing::Test { protected: @@ -73,9 +72,11 @@ TEST_F(NodeTest, GET_NEIGHBOURS) { bool n2 = false, n3 = false; auto node1_neighbours = node1_->GetNeighbours(); ASSERT_EQ(node1_neighbours.size(), 2); - for (auto &n : node1_neighbours) { - if (n.neighbour_node.lock() == node2_) n2 = true; - if (n.neighbour_node.lock() == node3_) n3 = true; + for (auto& n : node1_neighbours) { + if (n.neighbour_node == node2_) + n2 = true; + if (n.neighbour_node == node3_) + n3 = true; } ASSERT_TRUE(n2); ASSERT_TRUE(n3); @@ -84,7 +85,7 @@ TEST_F(NodeTest, GET_NEIGHBOURS) { { auto node2_neighbours = node2_->GetNeighbours(); ASSERT_EQ(node2_neighbours.size(), 1); - ASSERT_EQ(node2_neighbours[0].neighbour_node.lock(), node1_); + ASSERT_EQ(node2_neighbours[0].neighbour_node, node1_); } { @@ -100,4 +101,3 @@ TEST_F(NodeTest, DUMP) { std::cout << node2_->Dump(); ASSERT_FALSE(node2_->Dump().empty()); } - From ada0bf86ce2504fffe1edb0ce49282f2074d383c Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 19:23:05 +0800 Subject: [PATCH 72/89] solve conflicts Former-commit-id: 538671361c228898d0f2a81fdfdd7d3087bf0721 --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 064ad9b439..cc2461a9c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,6 @@ Please mark all change in change log and use the ticket from JIRA. - \#96 - Remove .a file in milvus/lib for docker-version - \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss ->>>>>>> main/0.5.1 ## Feature - \#115 - Using new structure for tasktable From 2b1de98912a499d8a1d88097e5ce9ea9ca8834c4 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 19:29:44 +0800 Subject: [PATCH 73/89] fix cpplint Former-commit-id: df5bb8526ac0fe0662b10fbfb7daa706900e6758 --- core/src/scheduler/resource/Node.h | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/scheduler/resource/Node.h b/core/src/scheduler/resource/Node.h index 53323fe6e2..177cdd735a 100644 --- a/core/src/scheduler/resource/Node.h +++ b/core/src/scheduler/resource/Node.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "Connection.h" From 7f6092b6baf8d529e762f508fbac9feff80b77b4 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 19:34:38 +0800 Subject: [PATCH 74/89] #89 add SQ8Hybrid MIX test and pure-GPU test Former-commit-id: 25b5f419d3deedd4ec39c014ea47822a83a0af38 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 228 +++++++++++------- 1 file changed, 143 insertions(+), 85 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index d1db0e9049..bb50198f92 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -183,9 +184,31 @@ parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::Metric return true; } +int32_t +GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::idx_t* index, size_t ground_k, size_t k, + size_t nq, int32_t index_add_loops) { + assert(ground_k <= k); + int hit = 0; + for (int i = 0; i < nq; i++) { + // count the num of results exist in ground truth result set + // each result replicates INDEX_ADD_LOOPS times + for (int j_c = 0; j_c < ground_k; j_c++) { + int r_c = index[i * k + j_c]; + int j_g = 0; + for (; j_g < ground_k / index_add_loops; j_g++) { + if (ground_index[i * ground_k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + return hit; +} + void test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops, - const std::vector& nprobes) { + const std::vector& nprobes, int32_t search_loops) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -265,8 +288,6 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in for (auto nprobe : nprobes) { faiss::ParameterSpace params; - printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); - std::string nprobe_str = "nprobe=" + std::to_string(nprobe); params.set_index_parameters(index, nprobe_str.c_str()); @@ -277,39 +298,28 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in float* D = new float[NQ * K]; printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("============================================================================================\n"); + printf("======================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; double t_start = elapsed(), t_end; - - index->search(t_nq, xq, t_k, D, I); - + for (int i = 0; i < search_loops; i++) { + index->search(t_nq, xq, t_k, D, I); + } t_end = elapsed(); // k = 100 for ground truth - int hit = 0; - for (int i = 0; i < t_nq; i++) { - // count the num of results exist in ground truth result set - // consider: each result replicates DATA_LOOPS times - for (int j_c = 0; j_c < k; j_c++) { - int r_c = I[i * t_k + j_c]; - for (int j_g = 0; j_g < k / index_add_loops; j_g++) { - if (gt[i * k + j_g] == r_c) { - hit++; - continue; - } - } - } - } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, - (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start) / search_loops, faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); } } - printf("============================================================================================\n"); + printf("======================================================================================\n"); #else printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); @@ -353,7 +363,8 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in #ifdef CUSTOMIZATION void -test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes) { +test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes, + bool pure_gpu_mode, int32_t search_loops) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -423,9 +434,18 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons index_composition.quantizer = nullptr; index_composition.mode = 1; + double copy_time = elapsed(); auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); delete index; + if (pure_gpu_mode) { + index_composition.mode = 2; // 0: all data, 1: copy quantizer, 2: copy data + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + } + + copy_time = elapsed() - copy_time; + printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); + size_t nq; float* xq; { @@ -446,67 +466,98 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; - for (unsigned long i = 0; i < k * nq; ++i) { + for (uint64_t i = 0; i < k * nq; ++i) { gt[i] = gt_int[i]; } delete[] gt_int; } - for (auto nprobe : nprobes) { - printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + const size_t NQ = 1000, K = 1000; + if (!pure_gpu_mode) { + for (auto nprobe : nprobes) { + auto ivf_index = dynamic_cast(cpu_index); + ivf_index->nprobe = nprobe; - auto ivf_index = dynamic_cast(cpu_index); - ivf_index->nprobe = nprobe; - - auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); - if (is_gpu_flat_index == nullptr) { - delete ivf_index->quantizer; - ivf_index->quantizer = index_composition.quantizer; - } - - const size_t NQ = 1000, K = 1000; - long* I = new faiss::Index::idx_t[NQ * K]; - float* D = new float[NQ * K]; - - printf("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} - faiss::indexIVF_stats.quantization_time = 0.0; - faiss::indexIVF_stats.search_time = 0.0; - - double t_start = elapsed(), t_end; - - cpu_index->search(t_nq, xq, t_k, D, I); - - t_end = elapsed(); - - // k = 100 for ground truth - int hit = 0; - for (unsigned long i = 0; i < t_nq; i++) { - // count the num of results exist in ground truth result set - // consider: each result replicates DATA_LOOPS times - for (unsigned long j_c = 0; j_c < k; j_c++) { - int r_c = I[i * t_k + j_c]; - for (unsigned long j_g = 0; j_g < k / index_add_loops; j_g++) { - if (gt[i * k + j_g] == r_c) { - hit++; - continue; - } - } - } - } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, - (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); + auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); + if (is_gpu_flat_index == nullptr) { + delete ivf_index->quantizer; + ivf_index->quantizer = index_composition.quantizer; } + + int64_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; + + printf("\n%s | %s-MIX | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("======================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + + double t_start = elapsed(), t_end; + for (int32_t i = 0; i < search_loops; i++) { + cpu_index->search(t_nq, xq, t_k, D, I); + } + t_end = elapsed(); + + // k = 100 for ground truth + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, + t_k, (t_end - t_start) / search_loops, + faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); + } + } + printf("======================================================================================\n"); + + printf("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete[] I; + delete[] D; } - printf("============================================================================================\n"); + } else { + std::shared_ptr gpu_index_ivf_ptr = std::shared_ptr(index); - printf("[%.3f s] Search test done\n\n", elapsed() - t0); + for (auto nprobe : nprobes) { + faiss::gpu::GpuIndexIVFSQHybrid* gpu_index_ivf_hybrid = + dynamic_cast(gpu_index_ivf_ptr.get()); + gpu_index_ivf_hybrid->setNumProbes(nprobe); - delete[] I; - delete[] D; + int64_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; + + printf("\n%s | %s-GPU | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("======================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + + double t_start = elapsed(), t_end; + for (int32_t i = 0; i < search_loops; i++) { + gpu_index_ivf_ptr->search(nq, xq, k, D, I); + } + t_end = elapsed(); + + // k = 100 for ground truth + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, + t_k, (t_end - t_start) / search_loops, + faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); + } + } + printf("======================================================================================\n"); + + printf("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete[] I; + delete[] D; + } } delete[] xq; @@ -530,17 +581,24 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons *************************************************************************************/ TEST(FAISSTEST, BENCHMARK) { - test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + std::vector param_nprobes = {8, 128}; + const int32_t SEARCH_LOOPS = 5; + const int32_t SIFT_INSERT_LOOPS = 2; // insert twice to get ~1G data set + const int32_t GLOVE_INSERT_LOOPS = 1; + + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #ifdef CUSTOMIZATION - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); - test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); + test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); #endif - test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #ifdef CUSTOMIZATION - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); - test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); + test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); #endif } From 97ae8a780ca0009e7f1749a339f50cb954cc4fa2 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 19:55:43 +0800 Subject: [PATCH 75/89] #89 add README.txt Former-commit-id: a84501ce6c2c94275819ace9e7d7a4afc14fbeca --- core/src/index/unittest/faiss_benchmark/README.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 core/src/index/unittest/faiss_benchmark/README.txt diff --git a/core/src/index/unittest/faiss_benchmark/README.txt b/core/src/index/unittest/faiss_benchmark/README.txt new file mode 100644 index 0000000000..81114d8381 --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/README.txt @@ -0,0 +1,13 @@ +To run this FAISS benchmark, please follow these steps: + +1. Download the HDF5 from: + https://support.hdfgroup.org/ftp/HDF5/releases/ + and install to /usr/local/hdf5 + +2. Download HDF5 data files from: + https://github.com/erikbern/ann-benchmarks + +3. Put HDF5 data files into the same directory with test binary + +4. Run the test binary + From b439da8a36dcfa2f63867cb70786859fe992dcf3 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 20:17:25 +0800 Subject: [PATCH 76/89] #89 update README.md Former-commit-id: 0b85b430c0d4a69e0470e916a76adda16f96c12b --- .../index/unittest/faiss_benchmark/README.md | 25 +++++++++++++++++++ .../index/unittest/faiss_benchmark/README.txt | 13 ---------- 2 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 core/src/index/unittest/faiss_benchmark/README.md delete mode 100644 core/src/index/unittest/faiss_benchmark/README.txt diff --git a/core/src/index/unittest/faiss_benchmark/README.md b/core/src/index/unittest/faiss_benchmark/README.md new file mode 100644 index 0000000000..c451ac13b0 --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/README.md @@ -0,0 +1,25 @@ +### To run this FAISS benchmark, please follow these steps: + +#### Step 1: +Download the HDF5 source from: + https://support.hdfgroup.org/ftp/HDF5/releases/ +and build/install to "/usr/local/hdf5". + +#### Step 2: +Download HDF5 data files from: + https://github.com/erikbern/ann-benchmarks + +#### Step 3: +Update 'milvus/core/src/index/unittest/CMakeLists.txt', +uncomment "#add_subdirectory(faiss_benchmark)". + +#### Step 4: +Build Milvus with unittest enabled: "./build.sh -t Release -u", +binary 'test_faiss_benchmark' will be generated. + +#### Step 5: +Put HDF5 data files into the same directory with binary 'test_faiss_benchmark'. + +#### Step 6: +Run test binary 'test_faiss_benchmark'. + diff --git a/core/src/index/unittest/faiss_benchmark/README.txt b/core/src/index/unittest/faiss_benchmark/README.txt deleted file mode 100644 index 81114d8381..0000000000 --- a/core/src/index/unittest/faiss_benchmark/README.txt +++ /dev/null @@ -1,13 +0,0 @@ -To run this FAISS benchmark, please follow these steps: - -1. Download the HDF5 from: - https://support.hdfgroup.org/ftp/HDF5/releases/ - and install to /usr/local/hdf5 - -2. Download HDF5 data files from: - https://github.com/erikbern/ann-benchmarks - -3. Put HDF5 data files into the same directory with test binary - -4. Run the test binary - From 12b7d6f5d8011ec9b60e3f6184467f11873ec9e7 Mon Sep 17 00:00:00 2001 From: starlord Date: Mon, 28 Oct 2019 20:24:13 +0800 Subject: [PATCH 77/89] #90 The server start error messages could be improved to enhance user experience Former-commit-id: 746b126621cd845c2848bb850b5c2eb6d3b65a6d --- CHANGELOG.md | 1 + core/src/server/Config.cpp | 134 +++++++++++++++++++++++--------- core/src/server/DBWrapper.cpp | 47 +++++++---- core/unittest/db/utils.cpp | 2 +- core/unittest/wrapper/utils.cpp | 2 +- 5 files changed, 133 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0925fa1a68..c4643c0adc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA. # Milvus 0.5.1 (TODO) ## Bug +- \#90 - The server start error messages could be improved to enhance user experience - \#104 - test_scheduler core dump ## Improvement diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 7de84cbccc..b20d6c2436 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -363,7 +363,9 @@ Config::PrintAll() { Status Config::CheckServerConfigAddress(const std::string& value) { if (!ValidationUtil::ValidateIpAddress(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config address: " + value); + std::string msg = "Invalid server IP address: " + value + + ". Possible reason: server_config.address is invalid in server_config.yaml."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -371,11 +373,15 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config port: " + value); + std::string msg = "Port " + value + " is not a number. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { - return Status(SERVER_INVALID_ARGUMENT, "Server config port out of range (1024, 65535): " + value); + std::string msg = "Port " + value + " is not in range [1025, 65534]. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -385,7 +391,8 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "Invalid server config mode [single, cluster_readonly, cluster_writable]: " + value); + "Error: server_config.deploy_mode in server_config.yaml is not one of " + "single, cluster_readonly, and cluster_writable."); } return Status::OK(); } @@ -411,7 +418,8 @@ Config::CheckServerConfigTimeZone(const std::string& value) { Status Config::CheckDBConfigPrimaryPath(const std::string& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "DB config primary_path empty"); + return Status(SERVER_INVALID_ARGUMENT, + "db_path is empty. Possible reason: db_config.db_path in server_config.yaml is empty."); } return Status::OK(); } @@ -424,7 +432,11 @@ Config::CheckDBConfigSecondaryPath(const std::string& value) { Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config backend_url: " + value); + std::string msg = + "Invalid db_backend_url: " + value + + ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " + + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; + return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } return Status::OK(); } @@ -432,7 +444,9 @@ Config::CheckDBConfigBackendUrl(const std::string& value) { Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_disk_threshold: " + value); + std::string msg = "Invalid archive disk threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -440,7 +454,9 @@ Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_days_threshold: " + value); + std::string msg = "Invalid archive days threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -448,13 +464,17 @@ Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config insert_buffer_size: " + value); + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: db_config.insert_buffer_size in server_config.yaml is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "DB config insert_buffer_size exceed system memory: " + value); + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: insert buffer size exceed system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -463,7 +483,9 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { Status Config::CheckMetricConfigEnableMonitor(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config auto_bootup: " + value); + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.enable_monitor is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -471,7 +493,9 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) { Status Config::CheckMetricConfigCollector(const std::string& value) { if (value != "prometheus") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config collector: " + value); + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.collector is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -479,6 +503,8 @@ Config::CheckMetricConfigCollector(const std::string& value) { Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.prometheus_config.port is invalid."; return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value); } return Status::OK(); @@ -487,15 +513,19 @@ Config::CheckMetricConfigPrometheusPort(const std::string& value) { Status Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_capacity: " + value); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: cache_config.cpu_cache_capacity is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t cpu_cache_capacity = std::stoi(value) * GB; uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (cpu_cache_capacity >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "Cache config cpu_cache_capacity exceed system memory: " + value); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: Cache config cpu_cache_capacity exceed system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else if (cpu_cache_capacity > static_cast(total_mem * 0.9)) { - std::cerr << "Warning: cpu_cache_capacity value is too big" << std::endl; + std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; } int32_t buffer_value; @@ -506,7 +536,10 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { int64_t insert_buffer_size = buffer_value * GB; if (insert_buffer_size + cpu_cache_capacity >= total_mem) { - return Status(SERVER_INVALID_ARGUMENT, "Sum of cpu_cache_capacity and buffer_size exceed system memory"); + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: sum of cache_config.cpu_cache_capacity and " + "db_config.insert_buffer_size exceeds system memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -515,11 +548,15 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value); + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { float cpu_cache_threshold = std::stof(value); if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value); + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -528,7 +565,9 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_capacity: " + value); + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; int gpu_index; @@ -539,13 +578,14 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { size_t gpu_memory; if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { - return Status(SERVER_UNEXPECTED_ERROR, - "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index)); + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); + return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { - return Status(SERVER_INVALID_ARGUMENT, - "Cache config gpu_cache_capacity exceed GPU memory: " + std::to_string(gpu_memory)); + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity exceed GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu_cache_capacity value is too big" << std::endl; + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; } } return Status::OK(); @@ -554,11 +594,15 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value); + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } else { float gpu_cache_threshold = std::stof(value); if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value); + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -567,7 +611,9 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cache_insert_data: " + value); + std::string msg = "Invalid cache insert option: " + value + + "Possible reason: cache_config.cache_insert_data is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -575,7 +621,9 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) { Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config use_blas_threshold: " + value); + std::string msg = "Invalid blas threshold: " + value + + "Possible reason: engine_config.use_blas_threshold is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -583,14 +631,18 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value); + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t omp_thread = std::stoi(value); uint32_t sys_thread_cnt = 8; CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value); + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -598,7 +650,9 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config mode: " + value); + std::string msg = "Invalid resource mode: " + value + + "Possible reason: resource_config.mode is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -608,12 +662,16 @@ CheckGpuDevice(const std::string& value) { const std::regex pat("gpu(\\d+)"); std::cmatch m; if (!std::regex_match(value.c_str(), m, pat)) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value); + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t gpu_index = std::stoi(value.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value); + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } @@ -621,11 +679,15 @@ CheckGpuDevice(const std::string& value) { Status Config::CheckResourceConfigSearchResources(const std::vector& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "Empty resource config search_resources"); + std::string msg = "Invalid search resource. " + "Possible reason: resource_config.search_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); } for (auto& gpu_device : value) { if (!CheckGpuDevice(gpu_device).ok()) { + std::string msg = "Invalid search resource: " + gpu_device + + "Possible reason: resource_config.search_resources is invalid."; return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config search_resources: " + gpu_device); } } @@ -635,7 +697,9 @@ Config::CheckResourceConfigSearchResources(const std::vector& value Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { if (!CheckGpuDevice(value).ok()) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config index_build_device: " + value); + std::string msg = "Invalid index build device: " + value + + "Possible reason: resource_config.index_build_device is invalid."; + return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index a5b892ad47..2217b29e1c 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -54,7 +54,8 @@ DBWrapper::StartService() { std::string db_slave_path; s = config.GetDBConfigSecondaryPath(db_slave_path); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } StringHelpFunctions::SplitStringByDelimeter(db_slave_path, ";", opt.meta_.slave_paths_); @@ -62,13 +63,15 @@ DBWrapper::StartService() { // cache config s = config.GetCacheConfigCacheInsertData(opt.insert_cache_immediately_); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } std::string mode; s = config.GetServerConfigDeployMode(mode); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (mode == "single") { @@ -78,7 +81,8 @@ DBWrapper::StartService() { } else if (mode == "cluster_writable") { opt.mode_ = engine::DBOptions::MODE::CLUSTER_WRITABLE; } else { - std::cerr << "ERROR: mode specified in server_config must be ['single', 'cluster_readonly', 'cluster_writable']" + std::cerr << "Error: server_config.deploy_mode in server_config.yaml is not one of " + << "single, cluster_readonly, and cluster_writable." << std::endl; kill(0, SIGUSR1); } @@ -87,7 +91,8 @@ DBWrapper::StartService() { int32_t omp_thread; s = config.GetEngineConfigOmpThreadNum(omp_thread); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (omp_thread > 0) { @@ -105,7 +110,8 @@ DBWrapper::StartService() { int32_t use_blas_threshold; s = config.GetEngineConfigUseBlasThreshold(use_blas_threshold); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } faiss::distance_compute_blas_threshold = use_blas_threshold; @@ -115,7 +121,8 @@ DBWrapper::StartService() { int32_t disk, days; s = config.GetDBConfigArchiveDiskThreshold(disk); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (disk > 0) { @@ -124,7 +131,8 @@ DBWrapper::StartService() { s = config.GetDBConfigArchiveDaysThreshold(days); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } if (days > 0) { @@ -133,16 +141,20 @@ DBWrapper::StartService() { opt.meta_.archive_conf_.SetCriterias(criterial); // create db root folder - Status status = CommonUtil::CreateDirectory(opt.meta_.path_); - if (!status.ok()) { - std::cerr << "ERROR! Failed to create database root path: " << opt.meta_.path_ << std::endl; + s = CommonUtil::CreateDirectory(opt.meta_.path_); + if (!s.ok()) { + std::cerr << "Error: Failed to create database primary path: " << path + << ". Possible reason: db_config.primary_path is wrong in server_config.yaml or not available." + << std::endl; kill(0, SIGUSR1); } for (auto& path : opt.meta_.slave_paths_) { - status = CommonUtil::CreateDirectory(path); - if (!status.ok()) { - std::cerr << "ERROR! Failed to create database slave path: " << path << std::endl; + s = CommonUtil::CreateDirectory(path); + if (!s.ok()) { + std::cerr << "Error: Failed to create database secondary path: " << path + << ". Possible reason: db_config.secondary_path is wrong in server_config.yaml or not available." + << std::endl; kill(0, SIGUSR1); } } @@ -151,7 +163,9 @@ DBWrapper::StartService() { try { db_ = engine::DBFactory::Build(opt); } catch (std::exception& ex) { - std::cerr << "ERROR! Failed to open database: " << ex.what() << std::endl; + std::cerr << "Error: failed to open database: " << ex.what() + << ". Possible reason: the meta system does not work." + << std::endl; kill(0, SIGUSR1); } @@ -161,7 +175,8 @@ DBWrapper::StartService() { std::string preload_tables; s = config.GetDBConfigPreloadTable(preload_tables); if (!s.ok()) { - return s; + std::cerr << s.ToString() << std::endl; + kill(0, SIGUSR1); } s = PreloadTables(preload_tables); diff --git a/core/unittest/db/utils.cpp b/core/unittest/db/utils.cpp index 8903ce14ea..7cc2f28745 100644 --- a/core/unittest/db/utils.cpp +++ b/core/unittest/db/utils.cpp @@ -68,7 +68,7 @@ static const char " blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index"; diff --git a/core/unittest/wrapper/utils.cpp b/core/unittest/wrapper/utils.cpp index 6204ac0c05..b397a35d7c 100644 --- a/core/unittest/wrapper/utils.cpp +++ b/core/unittest/wrapper/utils.cpp @@ -58,7 +58,7 @@ static const char " blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index"; From 7510f1f7a2e3853bd93498dfd3cb2399a7eadb68 Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 20:28:36 +0800 Subject: [PATCH 78/89] remove unused code Former-commit-id: 630cb776ec1a736f78241835fbbc8cc95b68deaa --- core/src/scheduler/SchedInst.cpp | 70 -------------------------------- core/src/scheduler/job/Job.cpp | 21 ++++++++++ 2 files changed, 21 insertions(+), 70 deletions(-) create mode 100644 core/src/scheduler/job/Job.cpp diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index f3f293a0f3..8474e93c1f 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -82,79 +82,9 @@ load_simple_config() { } } -void -load_advance_config() { - // try { - // server::ConfigNode &config = server::Config::GetInstance().GetConfig(server::CONFIG_RESOURCE); - // - // if (config.GetChildren().empty()) throw "resource_config null exception"; - // - // auto resources = config.GetChild(server::CONFIG_RESOURCES).GetChildren(); - // - // if (resources.empty()) throw "Children of resource_config null exception"; - // - // for (auto &resource : resources) { - // auto &resname = resource.first; - // auto &resconf = resource.second; - // auto type = resconf.GetValue(server::CONFIG_RESOURCE_TYPE); - //// auto memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_MEMORY); - // auto device_id = resconf.GetInt64Value(server::CONFIG_RESOURCE_DEVICE_ID); - //// auto enable_loader = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_LOADER); - // auto enable_loader = true; - // auto enable_executor = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_EXECUTOR); - // auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY); - // auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY); - // auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM); - // - // auto res = ResMgrInst::GetInstance()->Add(ResourceFactory::Create(resname, - // type, - // device_id, - // enable_loader, - // enable_executor)); - // - // if (res.lock()->type() == ResourceType::GPU) { - // auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY, 300); - // auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY, 300); - // auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM, 2); - // pinned_memory = 1024 * 1024 * pinned_memory; - // temp_memory = 1024 * 1024 * temp_memory; - // knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, - // pinned_memory, - // temp_memory, - // resource_num); - // } - // } - // - // knowhere::FaissGpuResourceMgr::GetInstance().InitResource(); - // - // auto connections = config.GetChild(server::CONFIG_RESOURCE_CONNECTIONS).GetChildren(); - // if (connections.empty()) throw "connections config null exception"; - // for (auto &conn : connections) { - // auto &connect_name = conn.first; - // auto &connect_conf = conn.second; - // auto connect_speed = connect_conf.GetInt64Value(server::CONFIG_SPEED_CONNECTIONS); - // auto connect_endpoint = connect_conf.GetValue(server::CONFIG_ENDPOINT_CONNECTIONS); - // - // std::string delimiter = "==="; - // std::string left = connect_endpoint.substr(0, connect_endpoint.find(delimiter)); - // std::string right = connect_endpoint.substr(connect_endpoint.find(delimiter) + 3, - // connect_endpoint.length()); - // - // auto connection = Connection(connect_name, connect_speed); - // ResMgrInst::GetInstance()->Connect(left, right, connection); - // } - // } catch (const char *msg) { - // SERVER_LOG_ERROR << msg; - // // TODO(wxyu): throw exception instead - // exit(-1); - //// throw std::exception(); - // } -} - void StartSchedulerService() { load_simple_config(); - // load_advance_config(); ResMgrInst::GetInstance()->Start(); SchedInst::GetInstance()->Start(); JobMgrInst::GetInstance()->Start(); diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp new file mode 100644 index 0000000000..954ea11f1b --- /dev/null +++ b/core/src/scheduler/job/Job.cpp @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// +// Created by wxyu on 2019/10/28. +// + From de2bb68daa557c53bb4ad01f7561bd72222de2bd Mon Sep 17 00:00:00 2001 From: wxyu Date: Mon, 28 Oct 2019 20:34:26 +0800 Subject: [PATCH 79/89] Add unique id for Job Former-commit-id: 1865dbd859f345a3febc3ad76682f928678e59f5 --- CHANGELOG.md | 1 + core/src/db/DBImpl.cpp | 6 ++--- core/src/scheduler/ResourceMgr.h | 1 - core/src/scheduler/job/BuildIndexJob.cpp | 6 +++-- core/src/scheduler/job/BuildIndexJob.h | 2 +- core/src/scheduler/job/DeleteJob.cpp | 6 +++-- core/src/scheduler/job/DeleteJob.h | 2 +- core/src/scheduler/job/Job.cpp | 28 +++++++++++++++++++++--- core/src/scheduler/job/Job.h | 8 ++++--- core/src/scheduler/job/SearchJob.cpp | 6 +++-- core/src/scheduler/job/SearchJob.h | 2 +- 11 files changed, 49 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc2461a9c2..785b7c89ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#92 - Speed up CMake build process - \#96 - Remove .a file in milvus/lib for docker-version - \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss +- \#122 - Add unique id for Job ## Feature - \#115 - Using new structure for tasktable diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 324d304e2a..6995de3d14 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -136,7 +136,7 @@ DBImpl::DeleteTable(const std::string& table_id, const meta::DatesT& dates) { // scheduler will determine when to delete table files auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource(); - scheduler::DeleteJobPtr job = std::make_shared(0, table_id, meta_ptr_, nres); + scheduler::DeleteJobPtr job = std::make_shared(table_id, meta_ptr_, nres); scheduler::JobMgrInst::GetInstance()->Put(job); job->WaitAndDelete(); } else { @@ -439,7 +439,7 @@ DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& fi // step 1: get files to search ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size(); - scheduler::SearchJobPtr job = std::make_shared(0, k, nq, nprobe, vectors); + scheduler::SearchJobPtr job = std::make_shared(k, nq, nprobe, vectors); for (auto& file : files) { scheduler::TableFileSchemaPtr file_ptr = std::make_shared(file); job->AddIndexFile(file_ptr); @@ -754,7 +754,7 @@ DBImpl::BackgroundBuildIndex() { Status status; if (!to_index_files.empty()) { - scheduler::BuildIndexJobPtr job = std::make_shared(0, meta_ptr_, options_); + scheduler::BuildIndexJobPtr job = std::make_shared(meta_ptr_, options_); // step 2: put build index task to scheduler for (auto& file : to_index_files) { diff --git a/core/src/scheduler/ResourceMgr.h b/core/src/scheduler/ResourceMgr.h index 4d2361fb3d..31a1063e5d 100644 --- a/core/src/scheduler/ResourceMgr.h +++ b/core/src/scheduler/ResourceMgr.h @@ -75,7 +75,6 @@ class ResourceMgr : public interface::dumpable { return gpu_resources_; } - // TODO(wxyu): why return shared pointer inline std::vector GetAllResources() { return resources_; diff --git a/core/src/scheduler/job/BuildIndexJob.cpp b/core/src/scheduler/job/BuildIndexJob.cpp index 39c08b6b51..4c4c3b5054 100644 --- a/core/src/scheduler/job/BuildIndexJob.cpp +++ b/core/src/scheduler/job/BuildIndexJob.cpp @@ -23,8 +23,8 @@ namespace milvus { namespace scheduler { -BuildIndexJob::BuildIndexJob(JobId id, engine::meta::MetaPtr meta_ptr, engine::DBOptions options) - : Job(id, JobType::BUILD), meta_ptr_(std::move(meta_ptr)), options_(std::move(options)) { +BuildIndexJob::BuildIndexJob(engine::meta::MetaPtr meta_ptr, engine::DBOptions options) + : Job(JobType::BUILD), meta_ptr_(std::move(meta_ptr)), options_(std::move(options)) { } bool @@ -59,6 +59,8 @@ BuildIndexJob::Dump() const { json ret{ {"number_of_to_index_file", to_index_files_.size()}, }; + auto base = Job::Dump(); + ret.insert(base.begin(), base.end()); return ret; } diff --git a/core/src/scheduler/job/BuildIndexJob.h b/core/src/scheduler/job/BuildIndexJob.h index e3450ee048..9dba5854b6 100644 --- a/core/src/scheduler/job/BuildIndexJob.h +++ b/core/src/scheduler/job/BuildIndexJob.h @@ -41,7 +41,7 @@ using Id2ToTableFileMap = std::unordered_map; class BuildIndexJob : public Job { public: - explicit BuildIndexJob(JobId id, engine::meta::MetaPtr meta_ptr, engine::DBOptions options); + explicit BuildIndexJob(engine::meta::MetaPtr meta_ptr, engine::DBOptions options); public: bool diff --git a/core/src/scheduler/job/DeleteJob.cpp b/core/src/scheduler/job/DeleteJob.cpp index 04a9557177..f2131ffb5b 100644 --- a/core/src/scheduler/job/DeleteJob.cpp +++ b/core/src/scheduler/job/DeleteJob.cpp @@ -22,8 +22,8 @@ namespace milvus { namespace scheduler { -DeleteJob::DeleteJob(JobId id, std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource) - : Job(id, JobType::DELETE), +DeleteJob::DeleteJob(std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource) + : Job(JobType::DELETE), table_id_(std::move(table_id)), meta_ptr_(std::move(meta_ptr)), num_resource_(num_resource) { @@ -52,6 +52,8 @@ DeleteJob::Dump() const { {"number_of_resource", num_resource_}, {"number_of_done", done_resource}, }; + auto base = Job::Dump(); + ret.insert(base.begin(), base.end()); return ret; } diff --git a/core/src/scheduler/job/DeleteJob.h b/core/src/scheduler/job/DeleteJob.h index 93e5aa40cc..a20d67d45a 100644 --- a/core/src/scheduler/job/DeleteJob.h +++ b/core/src/scheduler/job/DeleteJob.h @@ -35,7 +35,7 @@ namespace scheduler { class DeleteJob : public Job { public: - DeleteJob(JobId id, std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource); + DeleteJob(std::string table_id, engine::meta::MetaPtr meta_ptr, uint64_t num_resource); public: void diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp index 954ea11f1b..1199fe17a6 100644 --- a/core/src/scheduler/job/Job.cpp +++ b/core/src/scheduler/job/Job.cpp @@ -15,7 +15,29 @@ // specific language governing permissions and limitations // under the License. -// -// Created by wxyu on 2019/10/28. -// +#include "Job.h" +namespace milvus { +namespace scheduler { + +namespace { +std::mutex unique_job_mutex; +uint64_t unique_job_id = 0; +} // namespace + +Job::Job(JobType type) : type_(type) { + std::lock_guard lock(unique_job_mutex); + id_ = unique_job_id++; +} + +json +Job::Dump() const { + json ret{ + {"id", id_}, + {"type", type_}, + }; + return ret; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/job/Job.h b/core/src/scheduler/job/Job.h index 709db8cffc..949164a8d0 100644 --- a/core/src/scheduler/job/Job.h +++ b/core/src/scheduler/job/Job.h @@ -53,12 +53,14 @@ class Job : public interface::dumpable { return type_; } + json + Dump() const override; + protected: - Job(JobId id, JobType type) : id_(id), type_(type) { - } + explicit Job(JobType type); private: - JobId id_; + JobId id_ = 0; JobType type_; }; diff --git a/core/src/scheduler/job/SearchJob.cpp b/core/src/scheduler/job/SearchJob.cpp index 1143e33add..47c825c122 100644 --- a/core/src/scheduler/job/SearchJob.cpp +++ b/core/src/scheduler/job/SearchJob.cpp @@ -21,8 +21,8 @@ namespace milvus { namespace scheduler { -SearchJob::SearchJob(milvus::scheduler::JobId id, uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors) - : Job(id, JobType::SEARCH), topk_(topk), nq_(nq), nprobe_(nprobe), vectors_(vectors) { +SearchJob::SearchJob(uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors) + : Job(JobType::SEARCH), topk_(topk), nq_(nq), nprobe_(nprobe), vectors_(vectors) { } bool @@ -70,6 +70,8 @@ SearchJob::Dump() const { {"nq", nq_}, {"nprobe", nprobe_}, }; + auto base = Job::Dump(); + ret.insert(base.begin(), base.end()); return ret; } diff --git a/core/src/scheduler/job/SearchJob.h b/core/src/scheduler/job/SearchJob.h index 6c2bd7eea9..1e586090b9 100644 --- a/core/src/scheduler/job/SearchJob.h +++ b/core/src/scheduler/job/SearchJob.h @@ -43,7 +43,7 @@ using ResultSet = std::vector; class SearchJob : public Job { public: - SearchJob(JobId id, uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors); + SearchJob(uint64_t topk, uint64_t nq, uint64_t nprobe, const float* vectors); public: bool From d95b3906c8afdc47d34d1ba43d045967993320a2 Mon Sep 17 00:00:00 2001 From: Heisenberg Date: Mon, 28 Oct 2019 20:41:08 +0800 Subject: [PATCH 80/89] [skip ci] refine the copy time test Former-commit-id: d370cec5e350d3f3740d9b56182ad0a990ed2ec8 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index bb50198f92..e80b85e024 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -426,26 +426,6 @@ test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const st cpu_ivf_index->to_readonly(); } - faiss::gpu::GpuClonerOptions option; - option.allInGpu = true; - - faiss::IndexComposition index_composition; - index_composition.index = cpu_index; - index_composition.quantizer = nullptr; - index_composition.mode = 1; - - double copy_time = elapsed(); - auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - delete index; - - if (pure_gpu_mode) { - index_composition.mode = 2; // 0: all data, 1: copy quantizer, 2: copy data - index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); - } - - copy_time = elapsed() - copy_time; - printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); - size_t nq; float* xq; { @@ -472,6 +452,36 @@ test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const st delete[] gt_int; } + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = cpu_index; + index_composition.quantizer = nullptr; + + faiss::Index* index; + double copy_time; + + if (!pure_gpu_mode) { + index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + + copy_time = elapsed(); + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + } else { + index_composition.mode = 2; + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + + copy_time = elapsed(); + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + } + + copy_time = elapsed() - copy_time; + printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); + const size_t NQ = 1000, K = 1000; if (!pure_gpu_mode) { for (auto nprobe : nprobes) { From d109a3778c4849548298921833390acc18a1b680 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Tue, 29 Oct 2019 09:32:36 +0800 Subject: [PATCH 81/89] [skip ci] Move Roadmap section ahead Former-commit-id: c2a065f0192c4bc6ab948d8c4c972847bfcb771b --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5c5065442f..884ddb01ca 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,10 @@ Make sure Java 8 or higher is already installed. Refer to [this link](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples) for the example code. +## Milvus roadmap + +Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. + ## Contribution guidelines Contributions are welcomed and greatly appreciated. Please read our [contribution guidelines](CONTRIBUTING.md) for detailed contribution workflow. This project adheres to the [code of conduct](CODE_OF_CONDUCT.md) of Milvus. By participating, you are expected to uphold this code. @@ -184,11 +188,6 @@ We greatly appreciate the help of the following people. - [akihoni](https://github.com/akihoni) found a broken link and a small typo in the README file. - -## Milvus roadmap - -Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upcoming features. - ## Resources [Milvus official website](https://www.milvus.io) @@ -203,7 +202,6 @@ Please read our [roadmap](https://milvus.io/docs/en/roadmap/) to learn about upc [Milvus roadmap](https://milvus.io/docs/en/roadmap/) - ## License [Apache License 2.0](LICENSE) From a78e928dfaa389d2c6b02ae1dc7cc60bc0a712f0 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 15:16:04 +0800 Subject: [PATCH 82/89] #90 The server start error messages could be improved to enhance user experience Former-commit-id: e3ad89ab67f2c46bc4cb4e6d094a7763fb098664 --- core/conf/server_config.template | 20 ++--- core/src/server/Config.cpp | 145 +++++++++++++++++-------------- core/src/server/DBWrapper.cpp | 6 +- 3 files changed, 91 insertions(+), 80 deletions(-) diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 7abfb8b055..3b366f1bd4 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -4,7 +4,7 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) port: 19530 # port range: 1025 ~ 65534 deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable - time_zone: UTC+8 + time_zone: UTC+8 # time zone, must be in format: UTC+X db_config: primary_path: @MILVUS_DB_PATH@ # path used to store data and meta @@ -14,30 +14,30 @@ db_config: # Keep 'dialect://:@:/', and replace other texts with real values # Replace 'dialect' with 'mysql' or 'sqlite' - insert_buffer_size: 4 # GB, maximum insert buffer size allowed + insert_buffer_size: 4 # GB, maximum insert buffer size allowed, must be a positive integer # sum of insert_buffer_size and cpu_cache_capacity cannot exceed total memory preload_table: # preload data at startup, '*' means load all tables, empty value means no preload # you can specify preload tables like this: table1,table2,table3 metric_config: - enable_monitor: false # enable monitoring or not + enable_monitor: false # enable monitoring or not, must be a boolean collector: prometheus # prometheus prometheus_config: - port: 8080 # port prometheus uses to fetch metrics + port: 8080 # port prometheus uses to fetch metrics, range: 1025 ~ 65534 cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache - cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered - gpu_cache_capacity: 4 # GB, GPU memory used for cache - gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered - cache_insert_data: false # whether to load inserted data into cache + cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer + cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer + gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: use_blas_threshold: 20 # if nq < use_blas_threshold, use SSE, faster with fluctuated response times # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times resource_config: - search_resources: # define the GPUs used for search computation, valid value: gpux + search_resources: # define the GPUs used for search computation, must be in format: gpux - gpu0 index_build_device: gpu0 # GPU used for building index \ No newline at end of file diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index b20d6c2436..51449cb1de 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -363,8 +363,8 @@ Config::PrintAll() { Status Config::CheckServerConfigAddress(const std::string& value) { if (!ValidationUtil::ValidateIpAddress(value).ok()) { - std::string msg = "Invalid server IP address: " + value - + ". Possible reason: server_config.address is invalid in server_config.yaml."; + std::string msg = "Invalid server IP address: " + value + + ". Possible reason: server_config.address is invalid in server_config.yaml."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -373,14 +373,14 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Port " + value + " is not a number. " - + "Possible reason: server_config.port in server_config.yaml is invalid."; + std::string msg = "Port " + value + " is not a number. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { - std::string msg = "Port " + value + " is not in range [1025, 65534]. " - + "Possible reason: server_config.port in server_config.yaml is invalid."; + std::string msg = "Port " + value + " is not in range [1025, 65534]. " + + "Possible reason: server_config.port in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -433,9 +433,9 @@ Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { std::string msg = - "Invalid db_backend_url: " + value - + ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " - + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; + "Invalid db_backend_url: " + value + + ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " + + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } return Status::OK(); @@ -444,8 +444,8 @@ Config::CheckDBConfigBackendUrl(const std::string& value) { Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid archive disk threshold: " + value - + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + std::string msg = "Invalid archive disk threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -454,8 +454,8 @@ Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid archive days threshold: " + value - + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + std::string msg = "Invalid archive days threshold: " + value + + "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -464,16 +464,24 @@ Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid insert buffer size: " + value - + "Possible reason: db_config.insert_buffer_size in server_config.yaml is invalid."; + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: db_config.insert_buffer_size in server_config.yaml " + "is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; + if (buffer_size <= 0) { + std::string msg = "Invalid insert buffer size: " + value + + "Possible reason: db_config.insert_buffer_size in server_config.yaml " + "is not a positive integer."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { - std::string msg = "Invalid insert buffer size: " + value - + "Possible reason: insert buffer size exceed system memory."; + std::string msg = + "Invalid insert buffer size: " + value + "Possible reason: insert buffer size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -483,8 +491,8 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { Status Config::CheckMetricConfigEnableMonitor(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - std::string msg = "Invalid metric config: " + value - + "Possible reason: metric_config.enable_monitor is invalid."; + std::string msg = + "Invalid metric config: " + value + "Possible reason: metric_config.enable_monitor is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -493,8 +501,7 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) { Status Config::CheckMetricConfigCollector(const std::string& value) { if (value != "prometheus") { - std::string msg = "Invalid metric config: " + value - + "Possible reason: metric_config.collector is invalid."; + std::string msg = "Invalid metric config: " + value + "Possible reason: metric_config.collector is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -503,8 +510,8 @@ Config::CheckMetricConfigCollector(const std::string& value) { Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid metric config: " + value - + "Possible reason: metric_config.prometheus_config.port is invalid."; + std::string msg = "Invalid metric config: " + value + + "Possible reason: metric_config.prometheus_config.port is not in range [1025, 65534]."; return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value); } return Status::OK(); @@ -513,18 +520,24 @@ Config::CheckMetricConfigPrometheusPort(const std::string& value) { Status Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid cpu cache capacity: " + value - + "Possible reason: cache_config.cpu_cache_capacity is invalid."; + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { - uint64_t cpu_cache_capacity = std::stoi(value) * GB; + int64_t cpu_cache_capacity = std::stoi(value) * GB; + if (cpu_cache_capacity <= 0) { + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); - if (cpu_cache_capacity >= total_mem) { - std::string msg = "Invalid cpu cache capacity: " + value - + "Possible reason: Cache config cpu_cache_capacity exceed system memory."; + if (static_cast(cpu_cache_capacity) >= total_mem) { + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: Cache config cpu_cache_capacity exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (cpu_cache_capacity > static_cast(total_mem * 0.9)) { + } else if (static_cast(cpu_cache_capacity) > static_cast(total_mem * 0.9)) { std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; } @@ -536,9 +549,9 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { int64_t insert_buffer_size = buffer_value * GB; if (insert_buffer_size + cpu_cache_capacity >= total_mem) { - std::string msg = "Invalid cpu cache capacity: " + value - + "Possible reason: sum of cache_config.cpu_cache_capacity and " - "db_config.insert_buffer_size exceeds system memory."; + std::string msg = "Invalid cpu cache capacity: " + value + + "Possible reason: sum of cache_config.cpu_cache_capacity and " + "db_config.insert_buffer_size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -548,14 +561,14 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - std::string msg = "Invalid cpu cache threshold: " + value - + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float cpu_cache_threshold = std::stof(value); if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) { - std::string msg = "Invalid cpu cache threshold: " + value - + "Possible reason: cache_config.cpu_cache_threshold is invalid."; + std::string msg = "Invalid cpu cache threshold: " + value + + "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -565,8 +578,8 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid gpu cache capacity: " + value - + "Possible reason: cache_config.gpu_cache_capacity is invalid."; + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; @@ -581,8 +594,8 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value - + "Possible reason: cache_config.gpu_cache_capacity exceed GPU memory."; + std::string msg = "Invalid gpu cache capacity: " + value + + "Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; @@ -594,14 +607,14 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { Status Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - std::string msg = "Invalid gpu cache threshold: " + value - + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float gpu_cache_threshold = std::stof(value); if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { - std::string msg = "Invalid gpu cache threshold: " + value - + "Possible reason: cache_config.gpu_cache_threshold is invalid."; + std::string msg = "Invalid gpu cache threshold: " + value + + "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -611,8 +624,8 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - std::string msg = "Invalid cache insert option: " + value - + "Possible reason: cache_config.cache_insert_data is invalid."; + std::string msg = "Invalid cache insert option: " + value + + "Possible reason: cache_config.cache_insert_data is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -621,8 +634,8 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) { Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid blas threshold: " + value - + "Possible reason: engine_config.use_blas_threshold is invalid."; + std::string msg = "Invalid blas threshold: " + value + + "Possible reason: engine_config.use_blas_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -631,8 +644,8 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid omp thread number: " + value - + "Possible reason: engine_config.omp_thread_num is invalid."; + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -640,8 +653,8 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { uint32_t sys_thread_cnt = 8; CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { - std::string msg = "Invalid omp thread number: " + value - + "Possible reason: engine_config.omp_thread_num is invalid."; + std::string msg = "Invalid omp thread number: " + value + + "Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -650,8 +663,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { - std::string msg = "Invalid resource mode: " + value - + "Possible reason: resource_config.mode is invalid."; + std::string msg = "Invalid resource mode: " + value + "Possible reason: resource_config.mode is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -662,15 +674,15 @@ CheckGpuDevice(const std::string& value) { const std::regex pat("gpu(\\d+)"); std::cmatch m; if (!std::regex_match(value.c_str(), m, pat)) { - std::string msg = "Invalid gpu device: " + value - + "Possible reason: resource_config.search_resources is invalid."; + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t gpu_index = std::stoi(value.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { - std::string msg = "Invalid gpu device: " + value - + "Possible reason: resource_config.search_resources is invalid."; + std::string msg = "Invalid gpu device: " + value + + "Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -679,16 +691,17 @@ CheckGpuDevice(const std::string& value) { Status Config::CheckResourceConfigSearchResources(const std::vector& value) { if (value.empty()) { - std::string msg = "Invalid search resource. " - "Possible reason: resource_config.search_resources is empty."; + std::string msg = + "Invalid search resource. " + "Possible reason: resource_config.search_resources is empty."; return Status(SERVER_INVALID_ARGUMENT, msg); } for (auto& gpu_device : value) { if (!CheckGpuDevice(gpu_device).ok()) { - std::string msg = "Invalid search resource: " + gpu_device - + "Possible reason: resource_config.search_resources is invalid."; - return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config search_resources: " + gpu_device); + std::string msg = "Invalid search resource: " + gpu_device + + "Possible reason: resource_config.search_resources does not match your hardware."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); @@ -697,8 +710,8 @@ Config::CheckResourceConfigSearchResources(const std::vector& value Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { if (!CheckGpuDevice(value).ok()) { - std::string msg = "Invalid index build device: " + value - + "Possible reason: resource_config.index_build_device is invalid."; + std::string msg = "Invalid index build device: " + value + + "Possible reason: resource_config.index_build_device does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index 2217b29e1c..7efb71075a 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -82,8 +82,7 @@ DBWrapper::StartService() { opt.mode_ = engine::DBOptions::MODE::CLUSTER_WRITABLE; } else { std::cerr << "Error: server_config.deploy_mode in server_config.yaml is not one of " - << "single, cluster_readonly, and cluster_writable." - << std::endl; + << "single, cluster_readonly, and cluster_writable." << std::endl; kill(0, SIGUSR1); } @@ -164,8 +163,7 @@ DBWrapper::StartService() { db_ = engine::DBFactory::Build(opt); } catch (std::exception& ex) { std::cerr << "Error: failed to open database: " << ex.what() - << ". Possible reason: the meta system does not work." - << std::endl; + << ". Possible reason: the meta system does not work." << std::endl; kill(0, SIGUSR1); } From a05cebf5133538650fcbee0601bcc1bdebdfdfa9 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 15:18:54 +0800 Subject: [PATCH 83/89] fix job.h header cpplint error Former-commit-id: c5bfb2f7acdfad00adf818b9cc4b20ce42b7c9e1 --- core/src/scheduler/job/Job.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp index 1199fe17a6..06a163b959 100644 --- a/core/src/scheduler/job/Job.cpp +++ b/core/src/scheduler/job/Job.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "Job.h" +#include "scheduler/job/Job.h" namespace milvus { namespace scheduler { From 96dffcf8ab7219b20621a64efe62ffbf2f7996e1 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 15:37:08 +0800 Subject: [PATCH 84/89] format code Former-commit-id: f809ffd4505ada713620e4996d6fe1004bcc69dc --- core/src/server/DBWrapper.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index 7efb71075a..e3d319ac53 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -40,12 +40,14 @@ DBWrapper::StartService() { engine::DBOptions opt; s = config.GetDBConfigBackendUrl(opt.meta_.backend_uri_); if (!s.ok()) { + std::cerr << s.ToString() << std::endl; return s; } std::string path; s = config.GetDBConfigPrimaryPath(path); if (!s.ok()) { + std::cerr << s.ToString() << std::endl; return s; } @@ -55,7 +57,7 @@ DBWrapper::StartService() { s = config.GetDBConfigSecondaryPath(db_slave_path); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } StringHelpFunctions::SplitStringByDelimeter(db_slave_path, ";", opt.meta_.slave_paths_); @@ -64,14 +66,14 @@ DBWrapper::StartService() { s = config.GetCacheConfigCacheInsertData(opt.insert_cache_immediately_); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } std::string mode; s = config.GetServerConfigDeployMode(mode); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (mode == "single") { @@ -91,7 +93,7 @@ DBWrapper::StartService() { s = config.GetEngineConfigOmpThreadNum(omp_thread); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (omp_thread > 0) { @@ -110,7 +112,7 @@ DBWrapper::StartService() { s = config.GetEngineConfigUseBlasThreshold(use_blas_threshold); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } faiss::distance_compute_blas_threshold = use_blas_threshold; @@ -121,7 +123,7 @@ DBWrapper::StartService() { s = config.GetDBConfigArchiveDiskThreshold(disk); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (disk > 0) { @@ -131,7 +133,7 @@ DBWrapper::StartService() { s = config.GetDBConfigArchiveDaysThreshold(days); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } if (days > 0) { @@ -174,7 +176,7 @@ DBWrapper::StartService() { s = config.GetDBConfigPreloadTable(preload_tables); if (!s.ok()) { std::cerr << s.ToString() << std::endl; - kill(0, SIGUSR1); + return s; } s = PreloadTables(preload_tables); From c63c622cf5c4bcec43a33dacfa2b68518a5a1ddc Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 16:06:27 +0800 Subject: [PATCH 85/89] update message Former-commit-id: 568dbdc5d6dc233716682ebec33e6bd3dfe45500 --- core/conf/server_config.template | 6 +++--- core/src/server/Config.cpp | 29 ++++++++++++----------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 3b366f1bd4..3cb899d1b9 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -2,7 +2,7 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # port range: 1025 ~ 65534 + port: 19530 # milvus server port, must in range [1025, 6553] deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable time_zone: UTC+8 # time zone, must be in format: UTC+X @@ -24,7 +24,7 @@ metric_config: enable_monitor: false # enable monitoring or not, must be a boolean collector: prometheus # prometheus prometheus_config: - port: 8080 # port prometheus uses to fetch metrics, range: 1025 ~ 65534 + port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 6553] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer @@ -40,4 +40,4 @@ engine_config: resource_config: search_resources: # define the GPUs used for search computation, must be in format: gpux - gpu0 - index_build_device: gpu0 # GPU used for building index \ No newline at end of file + index_build_device: gpu0 # GPU used for building index, must be in format: gpux \ No newline at end of file diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 51449cb1de..684d92674c 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -363,8 +363,8 @@ Config::PrintAll() { Status Config::CheckServerConfigAddress(const std::string& value) { if (!ValidationUtil::ValidateIpAddress(value).ok()) { - std::string msg = "Invalid server IP address: " + value + - ". Possible reason: server_config.address is invalid in server_config.yaml."; + std::string msg = + "Invalid server IP address: " + value + ". Possible reason: server_config.address is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -373,14 +373,13 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Port " + value + " is not a number. " + - "Possible reason: server_config.port in server_config.yaml is invalid."; + std::string msg = "Port " + value + " is not a number. " + "Possible reason: server_config.port is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { std::string msg = "Port " + value + " is not in range [1025, 65534]. " + - "Possible reason: server_config.port in server_config.yaml is invalid."; + "Possible reason: server_config.port is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -391,7 +390,7 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "Error: server_config.deploy_mode in server_config.yaml is not one of " + "Error: server_config.deploy_mode is not one of " "single, cluster_readonly, and cluster_writable."); } return Status::OK(); @@ -418,8 +417,7 @@ Config::CheckServerConfigTimeZone(const std::string& value) { Status Config::CheckDBConfigPrimaryPath(const std::string& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, - "db_path is empty. Possible reason: db_config.db_path in server_config.yaml is empty."); + return Status(SERVER_INVALID_ARGUMENT, "db_path is empty. Possible reason: db_config.db_path is empty."); } return Status::OK(); } @@ -433,8 +431,7 @@ Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { std::string msg = - "Invalid db_backend_url: " + value + - ". Possible reason: db_config.db_backend_url is invalid in server_config.yaml. " + + "Invalid db_backend_url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } @@ -445,7 +442,7 @@ Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive disk threshold: " + value + - "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + "Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -455,7 +452,7 @@ Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive days threshold: " + value + - "Possible reason: db_config.archive_disk_threshold in server_config.yaml is invalid."; + "Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -465,15 +462,13 @@ Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size in server_config.yaml " - "is not a positive integer."; + "Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; if (buffer_size <= 0) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size in server_config.yaml " - "is not a positive integer."; + "Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -535,7 +530,7 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (static_cast(cpu_cache_capacity) >= total_mem) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: Cache config cpu_cache_capacity exceeds system memory."; + "Possible reason: cache_config.cpu_cache_capacity exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (static_cast(cpu_cache_capacity) > static_cast(total_mem * 0.9)) { std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; From bfdc80401d4c560b256cb9e887543cbd17b57e7b Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 16:15:14 +0800 Subject: [PATCH 86/89] fix typo Former-commit-id: bc177ec75ef30265a1358961f89966c8d6ea535c --- core/conf/server_config.template | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 3cb899d1b9..3feb16fd63 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -2,7 +2,7 @@ server_config: address: 0.0.0.0 # milvus server ip address (IPv4) - port: 19530 # milvus server port, must in range [1025, 6553] + port: 19530 # milvus server port, must in range [1025, 65534] deploy_mode: single # deployment type: single, cluster_readonly, cluster_writable time_zone: UTC+8 # time zone, must be in format: UTC+X @@ -24,7 +24,7 @@ metric_config: enable_monitor: false # enable monitoring or not, must be a boolean collector: prometheus # prometheus prometheus_config: - port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 6553] + port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer From dcc7fbd439ca674bc39aeb859e001c6e4fbe4311 Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 16:50:55 +0800 Subject: [PATCH 87/89] update message Former-commit-id: 84f9b8ce2ac8050128f678754123ae2716e3b342 --- core/src/server/Config.cpp | 75 +++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 684d92674c..937556824e 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -373,13 +373,13 @@ Config::CheckServerConfigAddress(const std::string& value) { Status Config::CheckServerConfigPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Port " + value + " is not a number. " + "Possible reason: server_config.port is invalid."; + std::string msg = "Invalid server port: " + value + ". Possible reason: server_config.port is not a number."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int32_t port = std::stoi(value); if (!(port > 1024 && port < 65535)) { - std::string msg = "Port " + value + " is not in range [1025, 65534]. " + - "Possible reason: server_config.port is invalid."; + std::string msg = "Invalid server port: " + value + + ". Possible reason: server_config.port is not in range [1025, 65534]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -390,7 +390,7 @@ Status Config::CheckServerConfigDeployMode(const std::string& value) { if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") { return Status(SERVER_INVALID_ARGUMENT, - "Error: server_config.deploy_mode is not one of " + "server_config.deploy_mode is not one of " "single, cluster_readonly, and cluster_writable."); } return Status::OK(); @@ -399,15 +399,15 @@ Config::CheckServerConfigDeployMode(const std::string& value) { Status Config::CheckServerConfigTimeZone(const std::string& value) { if (value.length() <= 3) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value); + return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value); } else { if (value.substr(0, 3) != "UTC") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value); + return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value); } else { try { stoi(value.substr(3)); } catch (...) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value); + return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value); } } } @@ -417,7 +417,7 @@ Config::CheckServerConfigTimeZone(const std::string& value) { Status Config::CheckDBConfigPrimaryPath(const std::string& value) { if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "db_path is empty. Possible reason: db_config.db_path is empty."); + return Status(SERVER_INVALID_ARGUMENT, "db_config.db_path is empty."); } return Status::OK(); } @@ -431,7 +431,7 @@ Status Config::CheckDBConfigBackendUrl(const std::string& value) { if (!ValidationUtil::ValidateDbURI(value).ok()) { std::string msg = - "Invalid db_backend_url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " + + "Invalid backend url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " + "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus."; return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value); } @@ -442,7 +442,7 @@ Status Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive disk threshold: " + value + - "Possible reason: db_config.archive_disk_threshold is invalid."; + ". Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -452,7 +452,7 @@ Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive days threshold: " + value + - "Possible reason: db_config.archive_disk_threshold is invalid."; + ". Possible reason: db_config.archive_disk_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -462,13 +462,13 @@ Status Config::CheckDBConfigInsertBufferSize(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size is not a positive integer."; + ". Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t buffer_size = std::stoi(value) * GB; if (buffer_size <= 0) { std::string msg = "Invalid insert buffer size: " + value + - "Possible reason: db_config.insert_buffer_size is not a positive integer."; + ". Possible reason: db_config.insert_buffer_size is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -476,7 +476,7 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { std::string msg = - "Invalid insert buffer size: " + value + "Possible reason: insert buffer size exceeds system memory."; + "Invalid insert buffer size: " + value + ". Possible reason: insert buffer size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -487,7 +487,7 @@ Status Config::CheckMetricConfigEnableMonitor(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { std::string msg = - "Invalid metric config: " + value + "Possible reason: metric_config.enable_monitor is not a boolean."; + "Invalid metric config: " + value + ". Possible reason: metric_config.enable_monitor is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -496,7 +496,8 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) { Status Config::CheckMetricConfigCollector(const std::string& value) { if (value != "prometheus") { - std::string msg = "Invalid metric config: " + value + "Possible reason: metric_config.collector is invalid."; + std::string msg = + "Invalid metric collector: " + value + ". Possible reason: metric_config.collector is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -505,8 +506,8 @@ Config::CheckMetricConfigCollector(const std::string& value) { Status Config::CheckMetricConfigPrometheusPort(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid metric config: " + value + - "Possible reason: metric_config.prometheus_config.port is not in range [1025, 65534]."; + std::string msg = "Invalid metric port: " + value + + ". Possible reason: metric_config.prometheus_config.port is not in range [1025, 65534]."; return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value); } return Status::OK(); @@ -516,13 +517,13 @@ Status Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; + ". Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { int64_t cpu_cache_capacity = std::stoi(value) * GB; if (cpu_cache_capacity <= 0) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; + ". Possible reason: cache_config.cpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -530,7 +531,7 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (static_cast(cpu_cache_capacity) >= total_mem) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: cache_config.cpu_cache_capacity exceeds system memory."; + ". Possible reason: cache_config.cpu_cache_capacity exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (static_cast(cpu_cache_capacity) > static_cast(total_mem * 0.9)) { std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl; @@ -545,7 +546,7 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) { int64_t insert_buffer_size = buffer_value * GB; if (insert_buffer_size + cpu_cache_capacity >= total_mem) { std::string msg = "Invalid cpu cache capacity: " + value + - "Possible reason: sum of cache_config.cpu_cache_capacity and " + ". Possible reason: sum of cache_config.cpu_cache_capacity and " "db_config.insert_buffer_size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -557,13 +558,13 @@ Status Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { std::string msg = "Invalid cpu cache threshold: " + value + - "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float cpu_cache_threshold = std::stof(value); if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) { std::string msg = "Invalid cpu cache threshold: " + value + - "Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -574,7 +575,7 @@ Status Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid gpu cache capacity: " + value + - "Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; + ". Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; @@ -590,7 +591,7 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { std::string msg = "Invalid gpu cache capacity: " + value + - "Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; + ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; @@ -603,13 +604,13 @@ Status Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { std::string msg = "Invalid gpu cache threshold: " + value + - "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { float gpu_cache_threshold = std::stof(value); if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { std::string msg = "Invalid gpu cache threshold: " + value + - "Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; + ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -620,7 +621,7 @@ Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { std::string msg = "Invalid cache insert option: " + value + - "Possible reason: cache_config.cache_insert_data is not a boolean."; + ". Possible reason: cache_config.cache_insert_data is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -630,7 +631,7 @@ Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid blas threshold: " + value + - "Possible reason: engine_config.use_blas_threshold is not a positive integer."; + ". Possible reason: engine_config.use_blas_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -640,7 +641,7 @@ Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid omp thread number: " + value + - "Possible reason: engine_config.omp_thread_num is not a positive integer."; + ". Possible reason: engine_config.omp_thread_num is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -649,7 +650,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { std::string msg = "Invalid omp thread number: " + value + - "Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; + ". Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -658,7 +659,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { Status Config::CheckResourceConfigMode(const std::string& value) { if (value != "simple") { - std::string msg = "Invalid resource mode: " + value + "Possible reason: resource_config.mode is invalid."; + std::string msg = "Invalid resource mode: " + value + ". Possible reason: resource_config.mode is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -670,14 +671,14 @@ CheckGpuDevice(const std::string& value) { std::cmatch m; if (!std::regex_match(value.c_str(), m, pat)) { std::string msg = "Invalid gpu device: " + value + - "Possible reason: resource_config.search_resources does not match your hardware."; + ". Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } int32_t gpu_index = std::stoi(value.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { std::string msg = "Invalid gpu device: " + value + - "Possible reason: resource_config.search_resources does not match your hardware."; + ". Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -695,7 +696,7 @@ Config::CheckResourceConfigSearchResources(const std::vector& value for (auto& gpu_device : value) { if (!CheckGpuDevice(gpu_device).ok()) { std::string msg = "Invalid search resource: " + gpu_device + - "Possible reason: resource_config.search_resources does not match your hardware."; + ". Possible reason: resource_config.search_resources does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } } @@ -706,7 +707,7 @@ Status Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { if (!CheckGpuDevice(value).ok()) { std::string msg = "Invalid index build device: " + value + - "Possible reason: resource_config.index_build_device does not match your hardware."; + ". Possible reason: resource_config.index_build_device does not match your hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); From bb43a771306dd8ef4237d72189a260fe5a28296c Mon Sep 17 00:00:00 2001 From: starlord Date: Tue, 29 Oct 2019 17:19:02 +0800 Subject: [PATCH 88/89] update message Former-commit-id: 0a5ad5dfd42d76032c5008d9d9e4e2213b161232 --- core/src/server/Config.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 937556824e..111cc26f9c 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -475,8 +475,8 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) { uint64_t total_mem = 0, free_mem = 0; CommonUtil::GetSystemMemInfo(total_mem, free_mem); if (buffer_size >= total_mem) { - std::string msg = - "Invalid insert buffer size: " + value + ". Possible reason: insert buffer size exceeds system memory."; + std::string msg = "Invalid insert buffer size: " + value + + ". Possible reason: db_config.insert_buffer_size exceeds system memory."; return Status(SERVER_INVALID_ARGUMENT, msg); } } From 39e127d70752737351efaa1a6ed07d05a6437296 Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Tue, 29 Oct 2019 20:05:01 +0800 Subject: [PATCH 89/89] Upgrade Faiss to v1.6 Former-commit-id: 637f6933c77c2644d4d90120d9d2aed77e4e70ff --- core/src/db/engine/ExecutionEngineImpl.cpp | 2 -- core/src/index/cmake/ThirdPartyPackagesCore.cmake | 6 +++--- .../knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp | 3 ++- .../knowhere/index/vector_index/IndexIVFSQHybrid.cpp | 7 ++++++- core/src/scheduler/job/Job.cpp | 2 +- core/unittest/db/test_engine.cpp | 1 - core/unittest/db/utils.cpp | 4 ++-- 7 files changed, 14 insertions(+), 11 deletions(-) diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 1ecba677fe..66e9795ff3 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -257,7 +257,6 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { -#if 1 const std::string key = location_ + ".quantizer"; std::vector gpus = scheduler::get_gpu_pool(); @@ -306,7 +305,6 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { auto cached_quantizer = std::make_shared(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } -#endif return Status::OK(); } diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index 3635b4fa1b..e8a5c8a995 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -245,11 +245,11 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1 # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 # set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1 - set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 + set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0 endif() else() - set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") - set(FAISS_MD5 "0bc12737b23def156f6a1eb782050135") + set(FAISS_SOURCE_URL "https://github.com/milvus-io/faiss/archive/1.6.0.tar.gz") + set(FAISS_MD5 "eb96d84f98b078a9eec04a796f5c792e") endif() message(STATUS "FAISS URL = ${FAISS_SOURCE_URL}") diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index a26f947181..251dfc12ed 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -86,7 +86,8 @@ GPUIVF::SerializeImpl() { faiss::Index* index = index_.get(); faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(index); - SealImpl(); + // TODO(linxj): support seal + // SealImpl(); faiss::write_index(host_index, &writer); delete host_index; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp index 7b229db21e..84bf594421 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp @@ -160,7 +160,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { index_composition->quantizer = nullptr; index_composition->mode = quantizer_conf->mode; // only 1 - auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option); + auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id, index_composition, &option); delete gpu_index; auto q = std::make_shared(); @@ -354,5 +354,10 @@ IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { GPUIVF::LoadImpl(index_binary); } +void +IVFSQHybrid::set_index_model(IndexModelPtr model) { + GPUIVF::set_index_model(model); +} + #endif } // namespace knowhere diff --git a/core/src/scheduler/job/Job.cpp b/core/src/scheduler/job/Job.cpp index 1199fe17a6..06a163b959 100644 --- a/core/src/scheduler/job/Job.cpp +++ b/core/src/scheduler/job/Job.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "Job.h" +#include "scheduler/job/Job.h" namespace milvus { namespace scheduler { diff --git a/core/unittest/db/test_engine.cpp b/core/unittest/db/test_engine.cpp index 147de5399c..eb2c60ec4b 100644 --- a/core/unittest/db/test_engine.cpp +++ b/core/unittest/db/test_engine.cpp @@ -108,7 +108,6 @@ TEST_F(EngineTest, ENGINE_IMPL_TEST) { ASSERT_EQ(engine_ptr->Dimension(), dimension); ASSERT_EQ(engine_ptr->Count(), ids.size()); - status = engine_ptr->CopyToGpu(0, true); status = engine_ptr->CopyToGpu(0, false); //ASSERT_TRUE(status.ok()); diff --git a/core/unittest/db/utils.cpp b/core/unittest/db/utils.cpp index 8903ce14ea..16e195079c 100644 --- a/core/unittest/db/utils.cpp +++ b/core/unittest/db/utils.cpp @@ -65,10 +65,10 @@ static const char " cache_insert_data: false # whether load inserted data into cache\n" "\n" "engine_config:\n" - " blas_threshold: 20\n" + " use_blas_threshold: 20\n" "\n" "resource_config:\n" - " resource_pool:\n" + " search_resources:\n" " - gpu0\n" " index_build_device: gpu0 # GPU used for building index";