From 513ad3b8424d27f886fd6162e8fea6f611be5293 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Mon, 18 Nov 2019 11:38:48 +0800 Subject: [PATCH 01/14] Support build index with multiple gpu --- CHANGELOG.md | 1 + core/conf/server_cpu_config.template | 3 +- core/conf/server_gpu_config.template | 3 +- core/src/db/engine/ExecutionEngineImpl.cpp | 11 +- core/src/scheduler/JobMgr.cpp | 2 +- core/src/scheduler/SchedInst.cpp | 33 ++++-- core/src/scheduler/SchedInst.h | 5 + .../scheduler/action/PushTaskToNeighbour.cpp | 102 ++++++------------ .../scheduler/optimizer/BuildIndexPass.cpp | 46 ++++++++ core/src/scheduler/optimizer/BuildIndexPass.h | 51 +++++++++ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +- core/src/server/Config.cpp | 72 ++++++++----- core/src/server/Config.h | 7 +- core/src/wrapper/KnowhereResource.cpp | 8 +- core/unittest/server/test_config.cpp | 3 +- 15 files changed, 233 insertions(+), 117 deletions(-) create mode 100644 core/src/scheduler/optimizer/BuildIndexPass.cpp create mode 100644 core/src/scheduler/optimizer/BuildIndexPass.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 745310faa7..42d9291573 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#12 - Pure CPU version for Milvus - \#77 - Support table partition - \#226 - Experimental shards middleware for Milvus +- \#346 - Support build index with multiple gpu ## Improvement - \#275 - Rename C++ SDK IndexType diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 6c95126390..bcfb5fa7ce 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -40,4 +40,5 @@ engine_config: resource_config: search_resources: # define the device used for search computation - cpu - index_build_device: cpu # CPU used for building index + index_build_device: # CPU used for building index + - cpu diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index 154db5d134..a347a9e5bd 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -42,4 +42,5 @@ resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu - gpu0 - index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux + index_build_device: # CPU / GPU used for building index, must be in format: cpu or gpux + - gpu0 diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 19c699bda7..dd80ec796b 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -570,12 +570,19 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { Status ExecutionEngineImpl::Init() { server::Config& config = server::Config::GetInstance(); - Status s = config.GetResourceConfigIndexBuildDevice(gpu_num_); + std::vector gpu_ids; + Status s = config.GetResourceConfigIndexBuildDevice(gpu_ids); if (!s.ok()) { return s; } + for (auto id : gpu_ids) { + if (gpu_num_ == id) { + return Status::OK(); + } + } - return Status::OK(); + std::string msg = "Invalid gpu_num"; + return Status(SERVER_INVALID_ARGUMENT, msg); } } // namespace engine diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 794f6a0f37..8dd095a3fa 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -104,7 +104,7 @@ JobMgr::build_task(const JobPtr& job) { void JobMgr::calculate_path(const TaskPtr& task) { - if (task->type_ != TaskType::SearchTask) { + if (task->type_ != TaskType::SearchTask && task->type_ != TaskType::BuildIndexTask) { return; } diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 61e0c09759..407c3a44d1 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -54,8 +54,8 @@ load_simple_config() { // get resources auto gpu_ids = get_gpu_pool(); - int32_t build_gpu_id; - config.GetResourceConfigIndexBuildDevice(build_gpu_id); + std::vector build_gpu_ids; + config.GetResourceConfigIndexBuildDevice(build_gpu_ids); // create and connect ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false)); @@ -65,19 +65,30 @@ load_simple_config() { ResMgrInst::GetInstance()->Connect("disk", "cpu", io); auto pcie = Connection("pcie", 12000); - bool find_build_gpu_id = false; - for (auto& gpu_id : gpu_ids) { - ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id, true, true)); - ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), pcie); - if (build_gpu_id == gpu_id) { - find_build_gpu_id = true; + + std::vector not_find_build_ids; + for (auto& build_id : build_gpu_ids) { + bool find_gpu_id = false; + for (auto& gpu_id : gpu_ids) { + if (gpu_id == build_id) { + find_gpu_id = true; + break; + } + } + if (not find_gpu_id) { + not_find_build_ids.emplace_back(build_id); } } - if (not find_build_gpu_id) { + for (auto& gpu_id : gpu_ids) { + ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id, true, true)); + ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), pcie); + } + + for (auto& not_find_id : not_find_build_ids) { ResMgrInst::GetInstance()->Add( - ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); - ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); + ResourceFactory::Create(std::to_string(not_find_id), "GPU", not_find_id, true, true)); + ResMgrInst::GetInstance()->Connect("cpu", std::to_string(not_find_id), pcie); } } diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index a3048069f9..4fd22f1fab 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -21,6 +21,7 @@ #include "JobMgr.h" #include "ResourceMgr.h" #include "Scheduler.h" +#include "optimizer/BuildIndexPass.h" #include "optimizer/HybridPass.h" #include "optimizer/LargeSQ8HPass.h" #include "optimizer/OnlyCPUPass.h" @@ -107,11 +108,15 @@ class OptimizerInst { } } + std::vector build_resources; + config.GetResourceConfigIndexBuildDevice(build_resources); + std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared(has_cpu)); + pass_list.push_back(std::make_shared(build_resources)); instance = std::make_shared(pass_list); } } diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index b8a4a1164b..9aed678937 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -138,73 +138,41 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou std::shared_ptr event) { auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; - if (resource->type() == ResourceType::DISK) { - // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr->GetComputeResources(); - std::vector> paths; - std::vector transport_costs; - for (auto& res : compute_resources) { - std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); - transport_costs.push_back(transport_cost); - paths.emplace_back(path); - } - // if (task->job_.lock()->type() == JobType::SEARCH) { - // auto label = task->label(); - // auto spec_label = std::static_pointer_cast(label); - // if (spec_label->resource().lock()->type() == ResourceType::CPU) { - // std::vector spec_path; - // spec_path.push_back(spec_label->resource().lock()->name()); - // spec_path.push_back(resource->name()); - // task->path() = Path(spec_path, spec_path.size() - 1); - // } else { - // // step 2: select min cost, cost(resource) = avg_cost * task_to_do + transport_cost - // uint64_t min_cost = std::numeric_limits::max(); - // uint64_t min_cost_idx = 0; - // for (uint64_t i = 0; i < compute_resources.size(); ++i) { - // if (compute_resources[i]->TotalTasks() == 0) { - // min_cost_idx = i; - // break; - // } - // uint64_t cost = compute_resources[i]->TaskAvgCost() * - // compute_resources[i]->NumOfTaskToExec() + - // transport_costs[i]; - // if (min_cost > cost) { - // min_cost = cost; - // min_cost_idx = i; - // } - // } - // - // // step 3: set path in task - // Path task_path(paths[min_cost_idx], paths[min_cost_idx].size() - 1); - // task->path() = task_path; - // } - // - // } else - if (task->job_.lock()->type() == JobType::BUILD) { - // step2: Read device id in config - // get build index gpu resource - server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); - - bool find_gpu_res = false; - if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { - for (uint64_t i = 0; i < compute_resources.size(); ++i) { - if (compute_resources[i]->name() == - res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { - find_gpu_res = true; - Path task_path(paths[i], paths[i].size() - 1); - task->path() = task_path; - break; - } - } - } - if (not find_gpu_res) { - task->path() = Path(paths[0], paths[0].size() - 1); - } - } - } + // if (resource->type() == ResourceType::DISK) { + // // step 1: calculate shortest path per resource, from disk to compute resource + // auto compute_resources = res_mgr->GetComputeResources(); + // std::vector> paths; + // std::vector transport_costs; + // for (auto& res : compute_resources) { + // std::vector path; + // uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); + // transport_costs.push_back(transport_cost); + // paths.emplace_back(path); + // } + // if (task->job_.lock()->type() == JobType::BUILD) { + // // step2: Read device id in config + // // get build index gpu resource + // server::Config& config = server::Config::GetInstance(); + // int32_t build_index_gpu; + // Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); + // + // bool find_gpu_res = false; + // if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { + // for (uint64_t i = 0; i < compute_resources.size(); ++i) { + // if (compute_resources[i]->name() == + // res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { + // find_gpu_res = true; + // Path task_path(paths[i], paths[i].size() - 1); + // task->path() = task_path; + // break; + // } + // } + // } + // if (not find_gpu_res) { + // task->path() = Path(paths[0], paths[0].size() - 1); + // } + // } + // } if (resource->name() == task->path().Last()) { resource->WakeupExecutor(); diff --git a/core/src/scheduler/optimizer/BuildIndexPass.cpp b/core/src/scheduler/optimizer/BuildIndexPass.cpp new file mode 100644 index 0000000000..725b2509c3 --- /dev/null +++ b/core/src/scheduler/optimizer/BuildIndexPass.cpp @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/BuildIndexPass.h" +#include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" +#include "scheduler/tasklabel/SpecResLabel.h" + +namespace milvus { +namespace scheduler { + +BuildIndexPass::BuildIndexPass(std::vector& build_gpu_ids) : build_gpu_ids_(build_gpu_ids) { +} + +bool +BuildIndexPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::BuildIndexTask) + return false; + + if (build_gpu_ids_.empty()) + return false; + + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_gpu_ids_[specified_gpu_id_]); + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + + specified_gpu_id_ = (specified_gpu_id_ + 1) % build_gpu_ids_.size(); + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/BuildIndexPass.h b/core/src/scheduler/optimizer/BuildIndexPass.h new file mode 100644 index 0000000000..a70844bfa9 --- /dev/null +++ b/core/src/scheduler/optimizer/BuildIndexPass.h @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class BuildIndexPass : public Pass { + public: + explicit BuildIndexPass(std::vector& build_gpu_id); + + public: + bool + Run(const TaskPtr& task) override; + + private: + uint64_t specified_gpu_id_ = 0; + std::vector build_gpu_ids_; +}; + +using BuildIndexPassPtr = std::shared_ptr; + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 3fcda0e8a3..e867e45159 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -41,12 +41,11 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto gpu_id = get_gpu_pool(); if (gpu_id.empty()) return false; - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, gpu_id[specified_gpu_id_]); auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size(); + specified_gpu_id_ = (specified_gpu_id_ + 1) % gpu_id.size(); return true; } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f130e73a85..a2a1354dd4 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -215,8 +215,8 @@ Config::ValidateConfig() { return s; } - int32_t resource_index_build_device; - s = GetResourceConfigIndexBuildDevice(resource_index_build_device); + std::vector index_build_devices; + s = GetResourceConfigIndexBuildDevice(index_build_devices); if (!s.ok()) { return s; } @@ -599,22 +599,24 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int device_id; - Status s = GetResourceConfigIndexBuildDevice(device_id); + std::vector device_ids; + Status s = GetResourceConfigIndexBuildDevice(device_ids); if (!s.ok()) { return s; } size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); - return Status(SERVER_UNEXPECTED_ERROR, msg); - } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + for (auto& device_id : device_ids) { + if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); + return Status(SERVER_UNEXPECTED_ERROR, msg); + } else if (gpu_cache_capacity >= gpu_memory) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + } } } return Status::OK(); @@ -745,11 +747,21 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { - auto status = CheckResource(value); - if (!status.ok()) { - return Status(SERVER_INVALID_ARGUMENT, status.message()); +Config::CheckResourceConfigIndexBuildDevice(const std::vector& value) { + if (value.empty()) { + std::string msg = + "Invalid index build resource. " + "Possible reason: resource_config.index_build_device is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); } + + for (auto& resource : value) { + auto status = CheckResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } + } + return Status::OK(); } @@ -1036,18 +1048,25 @@ Config::GetResourceConfigSearchResources(std::vector& value) { } Status -Config::GetResourceConfigIndexBuildDevice(int32_t& value) { +Config::GetResourceConfigIndexBuildDevice(std::vector& value) { std::string str = - GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - Status s = CheckResourceConfigIndexBuildDevice(str); + GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, + CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + std::vector resources; + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, resources); + + Status s = CheckResourceConfigIndexBuildDevice(resources); if (!s.ok()) { return s; } - if (str == "cpu") { - value = CPU_DEVICE_ID; - } else { - value = std::stoi(str.substr(3)); + for (auto res : resources) { + if (res == "cpu") { + value.emplace_back(CPU_DEVICE_ID); + break; + } + int64_t device_id = std::stoi(str.substr(3)); + value.emplace_back(device_id); } return Status::OK(); @@ -1318,7 +1337,10 @@ Config::SetResourceConfigSearchResources(const std::string& value) { Status Config::SetResourceConfigIndexBuildDevice(const std::string& value) { - Status s = CheckResourceConfigIndexBuildDevice(value); + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, res_vec); + + Status s = CheckResourceConfigIndexBuildDevice(res_vec); if (!s.ok()) { return s; } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3ab0cd8053..08b3accdd6 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -101,10 +101,11 @@ static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; #endif static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_DELIMITER = ","; #ifdef MILVUS_CPU_VERSION static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; #else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu,gpu0"; #endif const int32_t CPU_DEVICE_ID = -1; @@ -190,7 +191,7 @@ class Config { Status CheckResourceConfigSearchResources(const std::vector& value); Status - CheckResourceConfigIndexBuildDevice(const std::string& value); + CheckResourceConfigIndexBuildDevice(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -259,7 +260,7 @@ class Config { Status GetResourceConfigSearchResources(std::vector& value); Status - GetResourceConfigIndexBuildDevice(int32_t& value); + GetResourceConfigIndexBuildDevice(std::vector& value); public: /* server config */ diff --git a/core/src/wrapper/KnowhereResource.cpp b/core/src/wrapper/KnowhereResource.cpp index 8ed19232e9..ccfbcbb6cf 100644 --- a/core/src/wrapper/KnowhereResource.cpp +++ b/core/src/wrapper/KnowhereResource.cpp @@ -48,12 +48,14 @@ KnowhereResource::Initialize() { // get build index gpu resource server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - s = config.GetResourceConfigIndexBuildDevice(build_index_gpu); + std::vector build_index_gpus; + s = config.GetResourceConfigIndexBuildDevice(build_index_gpus); if (!s.ok()) return s; - gpu_resources.insert(std::make_pair(build_index_gpu, GpuResourceSetting())); + for (auto gpu_id : build_index_gpus) { + gpu_resources.insert(std::make_pair(gpu_id, GpuResourceSetting())); + } // get search gpu resource std::vector pool; diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 637273732d..7f4376bb67 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -292,7 +292,8 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); #endif ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigIndexBuildDevice(int32_val); + std::vector device_ids; + s = config.GetResourceConfigIndexBuildDevice(device_ids); ASSERT_TRUE(s.ok()); ASSERT_TRUE(int32_val == resource_index_build_device); } From fddfd1eb03c067234e6127730da14677c03c7554 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 18 Nov 2019 18:47:00 +0800 Subject: [PATCH 02/14] #346 update config APIs to support build index with multiple GPUs --- core/conf/server_cpu_config.template | 6 +- core/conf/server_gpu_config.template | 3 +- core/src/server/Config.cpp | 84 +++++++++++++++------------- core/src/server/Config.h | 16 +++--- core/src/utils/ValidationUtil.cpp | 4 +- core/src/utils/ValidationUtil.h | 4 +- core/unittest/server/test_config.cpp | 33 ++++++----- 7 files changed, 81 insertions(+), 69 deletions(-) diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 6c95126390..bc8fc3bb3d 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -27,7 +27,6 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cache_insert_data: false # whether to load inserted data into cache, must be a boolean @@ -38,6 +37,7 @@ engine_config: gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: - search_resources: # define the device used for search computation + search_resources: # define the devices used for search computation, must be in format: cpu or gpux + - cpu + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux - cpu - index_build_device: cpu # CPU used for building index diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index 154db5d134..c54ed408df 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -42,4 +42,5 @@ resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu - gpu0 - index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux + - gpu0 \ No newline at end of file diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f130e73a85..5672ab52aa 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -215,8 +215,8 @@ Config::ValidateConfig() { return s; } - int32_t resource_index_build_device; - s = GetResourceConfigIndexBuildDevice(resource_index_build_device); + std::vector index_build_resources; + s = GetResourceConfigIndexBuildResources(index_build_resources); if (!s.ok()) { return s; } @@ -351,7 +351,7 @@ Config::ResetDefaultConfig() { return s; } - s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + s = SetResourceConfigIndexBuildResources(CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); if (!s.ok()) { return s; } @@ -599,22 +599,28 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int device_id; - Status s = GetResourceConfigIndexBuildDevice(device_id); + std::vector resources; + Status s = GetResourceConfigIndexBuildResources(resources); if (!s.ok()) { return s; } size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); - return Status(SERVER_UNEXPECTED_ERROR, msg); - } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + for (auto& resource : resources) { + if (resource == "cpu") { + continue; + } + int32_t device_id = std::stoi(resource.substr(3)); + if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); + return Status(SERVER_UNEXPECTED_ERROR, msg); + } else if (gpu_cache_capacity >= gpu_memory) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else if (gpu_cache_capacity > (double) gpu_memory * 0.9) { + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + } } } return Status::OK(); @@ -745,10 +751,18 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { - auto status = CheckResource(value); - if (!status.ok()) { - return Status(SERVER_INVALID_ARGUMENT, status.message()); +Config::CheckResourceConfigIndexBuildResources(const std::vector& value) { + if (value.empty()) { + std::string msg = + "Invalid build index resource. " + "Possible reason: resource_config.build_index_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + for (auto& resource : value) { + auto status = CheckResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } } return Status::OK(); } @@ -1030,27 +1044,18 @@ Status Config::GetResourceConfigSearchResources(std::vector& value) { std::string str = GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, - CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); - server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value); + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); return CheckResourceConfigSearchResources(value); } Status -Config::GetResourceConfigIndexBuildDevice(int32_t& value) { +Config::GetResourceConfigIndexBuildResources(std::vector& value) { std::string str = - GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - Status s = CheckResourceConfigIndexBuildDevice(str); - if (!s.ok()) { - return s; - } - - if (str == "cpu") { - value = CPU_DEVICE_ID; - } else { - value = std::stoi(str.substr(3)); - } - - return Status::OK(); + GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); + return CheckResourceConfigIndexBuildResources(value); } /////////////////////////////////////////////////////////////////////////////// @@ -1305,7 +1310,7 @@ Config::SetResourceConfigMode(const std::string& value) { Status Config::SetResourceConfigSearchResources(const std::string& value) { std::vector res_vec; - server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec); + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); Status s = CheckResourceConfigSearchResources(res_vec); if (!s.ok()) { @@ -1317,13 +1322,16 @@ Config::SetResourceConfigSearchResources(const std::string& value) { } Status -Config::SetResourceConfigIndexBuildDevice(const std::string& value) { - Status s = CheckResourceConfigIndexBuildDevice(value); +Config::SetResourceConfigIndexBuildResources(const std::string &value) { + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); + + Status s = CheckResourceConfigIndexBuildResources(res_vec); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value); + SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3ab0cd8053..0378a079fb 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -91,20 +91,18 @@ static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; static const char* CONFIG_RESOURCE = "resource_config"; static const char* CONFIG_RESOURCE_MODE = "mode"; static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple"; +static const char* CONFIG_RESOURCE_RESOURCES_DELIMITER = ","; static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ","; - #ifdef MILVUS_CPU_VERSION static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu"; #else static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; #endif - -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES = "index_build_resources"; #ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "cpu"; #else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "gpu0"; #endif const int32_t CPU_DEVICE_ID = -1; @@ -190,7 +188,7 @@ class Config { Status CheckResourceConfigSearchResources(const std::vector& value); Status - CheckResourceConfigIndexBuildDevice(const std::string& value); + CheckResourceConfigIndexBuildResources(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -259,7 +257,7 @@ class Config { Status GetResourceConfigSearchResources(std::vector& value); Status - GetResourceConfigIndexBuildDevice(int32_t& value); + GetResourceConfigIndexBuildResources(std::vector& value); public: /* server config */ @@ -320,7 +318,7 @@ class Config { Status SetResourceConfigSearchResources(const std::string& value); Status - SetResourceConfigIndexBuildDevice(const std::string& value); + SetResourceConfigIndexBuildResources(const std::string& value); private: std::unordered_map> config_map_; diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index ec696ff3e0..080de77e17 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector& partition_ } Status -ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { +ValidationUtil::ValidateGpuIndex(int32_t gpu_index) { #ifdef MILVUS_GPU_VERSION int num_devices = 0; auto cuda_err = cudaGetDeviceCount(&num_devices); @@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { } Status -ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) { +ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) { #ifdef MILVUS_GPU_VERSION cudaDeviceProp deviceProp; diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 01801e295a..201ccef3bd 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -59,10 +59,10 @@ class ValidationUtil { ValidatePartitionTags(const std::vector& partition_tags); static Status - ValidateGpuIndex(uint32_t gpu_index); + ValidateGpuIndex(int32_t gpu_index); static Status - GetGpuMemory(uint32_t gpu_index, size_t& memory); + GetGpuMemory(int32_t gpu_index, size_t& memory); static Status ValidateIpAddress(const std::string& ip_address); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 637273732d..37be36b7eb 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -272,29 +272,34 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { #else std::vector search_resources = {"cpu", "gpu0"}; #endif - std::vector res_vec; - std::string res_str; + std::vector search_res_vec; + std::string search_res_str; milvus::server::StringHelpFunctions::MergeStringWithDelimeter( - search_resources, milvus::server::CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_str); - s = config.SetResourceConfigSearchResources(res_str); + search_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, search_res_str); + s = config.SetResourceConfigSearchResources(search_res_str); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigSearchResources(res_vec); + s = config.GetResourceConfigSearchResources(search_res_vec); ASSERT_TRUE(s.ok()); for (size_t i = 0; i < search_resources.size(); i++) { - ASSERT_TRUE(search_resources[i] == res_vec[i]); + ASSERT_TRUE(search_resources[i] == search_res_vec[i]); } #ifdef MILVUS_CPU_VERSION - int32_t resource_index_build_device = milvus::server::CPU_DEVICE_ID; - s = config.SetResourceConfigIndexBuildDevice("cpu"); + std::vector index_build_resources = {"cpu"}; #else - int32_t resource_index_build_device = 0; - s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); + std::vector index_build_resources = {"gpu0", "gpu1"}; #endif + std::vector index_build_res_vec; + std::string index_build_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + index_build_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, index_build_res_str); + s = config.SetResourceConfigIndexBuildResources(index_build_res_str); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigIndexBuildDevice(int32_val); + s = config.GetResourceConfigIndexBuildResources(index_build_res_vec); ASSERT_TRUE(s.ok()); - ASSERT_TRUE(int32_val == resource_index_build_device); + for (size_t i = 0; i < index_build_resources.size(); i++) { + ASSERT_TRUE(index_build_resources[i] == index_build_res_vec[i]); + } } TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { @@ -418,9 +423,9 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetResourceConfigSearchResources("cpu"); ASSERT_TRUE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gup2"); + s = config.SetResourceConfigIndexBuildResources("gup2"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gpu16"); + s = config.SetResourceConfigIndexBuildResources("gpu16"); ASSERT_FALSE(s.ok()); } From 8f64c44271785753f2fb34b34edb08598a950ba8 Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Tue, 19 Nov 2019 09:16:58 +0800 Subject: [PATCH 03/14] [skip ci] Add note about using unbuntu docker --- install.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/install.md b/install.md index 6711b41f76..98b9af4649 100644 --- a/install.md +++ b/install.md @@ -3,6 +3,9 @@ ## Software requirements - Ubuntu 18.04 or higher + + If your Ubuntu version is lower than 18.04, we recommend you to pull a [docker image of Ubuntu 18.04](https://docs.docker.com/install/linux/docker-ce/ubuntu/) as your compilation environment. + - CMake 3.12 or higher ##### For GPU version, you will also need: From 9fb536b502e6d17f1606c117b117bf55e1442a8b Mon Sep 17 00:00:00 2001 From: jielinxu <52057195+jielinxu@users.noreply.github.com> Date: Tue, 19 Nov 2019 11:49:05 +0800 Subject: [PATCH 04/14] [skip ci] minor update --- install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install.md b/install.md index 98b9af4649..f53586af36 100644 --- a/install.md +++ b/install.md @@ -4,7 +4,7 @@ - Ubuntu 18.04 or higher - If your Ubuntu version is lower than 18.04, we recommend you to pull a [docker image of Ubuntu 18.04](https://docs.docker.com/install/linux/docker-ce/ubuntu/) as your compilation environment. + If your operating system is not Ubuntu 18.04 or higher, we recommend you to pull a [docker image of Ubuntu 18.04](https://docs.docker.com/install/linux/docker-ce/ubuntu/) as your compilation environment. - CMake 3.12 or higher From 7ad5c514970497a4583098f07756d7c25176eea8 Mon Sep 17 00:00:00 2001 From: wxyu Date: Tue, 19 Nov 2019 12:29:05 +0800 Subject: [PATCH 05/14] Add virtual method Init() in Pass abstract class close #404 --- CHANGELOG.md | 1 + core/src/scheduler/SchedInst.cpp | 1 + core/src/scheduler/optimizer/HybridPass.cpp | 4 ++++ core/src/scheduler/optimizer/HybridPass.h | 3 +++ core/src/scheduler/optimizer/LargeSQ8HPass.cpp | 3 ++- core/src/scheduler/optimizer/LargeSQ8HPass.h | 5 ++++- core/src/scheduler/optimizer/OnlyCPUPass.cpp | 4 ++++ core/src/scheduler/optimizer/OnlyCPUPass.h | 3 +++ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 4 ++++ core/src/scheduler/optimizer/OnlyGPUPass.h | 3 +++ core/src/scheduler/optimizer/Optimizer.cpp | 12 ++++++------ core/src/scheduler/optimizer/Optimizer.h | 4 ++-- core/src/scheduler/optimizer/Pass.h | 5 ++--- 13 files changed, 39 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bcbb0b10a..a87e4bec63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#322 - Add option to enable / disable prometheus - \#358 - Add more information in build.sh and install.md - \#255 - Add ivfsq8 test report detailed version +- \#404 - Add virtual method Init() in Pass abstract class ## Task diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 61e0c09759..9bd4df944f 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -84,6 +84,7 @@ load_simple_config() { void StartSchedulerService() { load_simple_config(); + OptimizerInst::GetInstance()->Init(); ResMgrInst::GetInstance()->Start(); SchedInst::GetInstance()->Start(); JobMgrInst::GetInstance()->Start(); diff --git a/core/src/scheduler/optimizer/HybridPass.cpp b/core/src/scheduler/optimizer/HybridPass.cpp index d63fc2e819..8bf0d8990d 100644 --- a/core/src/scheduler/optimizer/HybridPass.cpp +++ b/core/src/scheduler/optimizer/HybridPass.cpp @@ -23,6 +23,10 @@ namespace milvus { namespace scheduler { +void +HybridPass::Init() { +} + bool HybridPass::Run(const TaskPtr& task) { // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu diff --git a/core/src/scheduler/optimizer/HybridPass.h b/core/src/scheduler/optimizer/HybridPass.h index 0d02a8bda9..f84a0884f2 100644 --- a/core/src/scheduler/optimizer/HybridPass.h +++ b/core/src/scheduler/optimizer/HybridPass.h @@ -37,6 +37,9 @@ class HybridPass : public Pass { HybridPass() = default; public: + void + Init() override; + bool Run(const TaskPtr& task) override; }; diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index b9784e3c0a..89bd85ec1f 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -27,7 +27,8 @@ namespace milvus { namespace scheduler { -LargeSQ8HPass::LargeSQ8HPass() { +void +LargeSQ8HPass::Init() { server::Config& config = server::Config::GetInstance(); Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); if (!s.ok()) { diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/LargeSQ8HPass.h index 9d135d413a..1afd240c0f 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.h @@ -35,9 +35,12 @@ namespace scheduler { class LargeSQ8HPass : public Pass { public: - LargeSQ8HPass(); + LargeSQ8HPass() = default; public: + void + Init() override; + bool Run(const TaskPtr& task) override; diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.cpp b/core/src/scheduler/optimizer/OnlyCPUPass.cpp index 238a91a82c..392ae7940b 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyCPUPass.cpp @@ -24,6 +24,10 @@ namespace milvus { namespace scheduler { +void +OnlyCPUPass::Init() { +} + bool OnlyCPUPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask) diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.h b/core/src/scheduler/optimizer/OnlyCPUPass.h index 76b42e3766..253775f77e 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.h +++ b/core/src/scheduler/optimizer/OnlyCPUPass.h @@ -37,6 +37,9 @@ class OnlyCPUPass : public Pass { OnlyCPUPass() = default; public: + void + Init() override; + bool Run(const TaskPtr& task) override; }; diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 2a72f9757e..69a8366bb2 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -27,6 +27,10 @@ namespace scheduler { OnlyGPUPass::OnlyGPUPass(bool has_cpu) : has_cpu_(has_cpu) { } +void +OnlyGPUPass::Init() { +} + bool OnlyGPUPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask || has_cpu_) diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.h b/core/src/scheduler/optimizer/OnlyGPUPass.h index 10d909d30e..c7a83fea30 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.h +++ b/core/src/scheduler/optimizer/OnlyGPUPass.h @@ -37,6 +37,9 @@ class OnlyGPUPass : public Pass { explicit OnlyGPUPass(bool has_cpu); public: + void + Init() override; + bool Run(const TaskPtr& task) override; diff --git a/core/src/scheduler/optimizer/Optimizer.cpp b/core/src/scheduler/optimizer/Optimizer.cpp index 46f24ea712..c5fa311a27 100644 --- a/core/src/scheduler/optimizer/Optimizer.cpp +++ b/core/src/scheduler/optimizer/Optimizer.cpp @@ -20,12 +20,12 @@ namespace milvus { namespace scheduler { -// void -// Optimizer::Init() { -// for (auto& pass : pass_list_) { -// pass->Init(); -// } -// } +void +Optimizer::Init() { + for (auto& pass : pass_list_) { + pass->Init(); + } +} bool Optimizer::Run(const TaskPtr& task) { diff --git a/core/src/scheduler/optimizer/Optimizer.h b/core/src/scheduler/optimizer/Optimizer.h index bfabbf7de3..68b519e115 100644 --- a/core/src/scheduler/optimizer/Optimizer.h +++ b/core/src/scheduler/optimizer/Optimizer.h @@ -38,8 +38,8 @@ class Optimizer { explicit Optimizer(std::vector pass_list) : pass_list_(std::move(pass_list)) { } - // void - // Init(); + void + Init(); bool Run(const TaskPtr& task); diff --git a/core/src/scheduler/optimizer/Pass.h b/core/src/scheduler/optimizer/Pass.h index 016b05e457..36a36a1df5 100644 --- a/core/src/scheduler/optimizer/Pass.h +++ b/core/src/scheduler/optimizer/Pass.h @@ -34,9 +34,8 @@ namespace scheduler { class Pass { public: - // virtual void - // Init() { - // } + virtual void + Init() = 0; virtual bool Run(const TaskPtr& task) = 0; From 31aa89e1318c3e8b9b2e1cee220b20b7fc42370b Mon Sep 17 00:00:00 2001 From: Yukikaze-CZR Date: Tue, 19 Nov 2019 12:59:54 +0800 Subject: [PATCH 06/14] Change ivfsq8 test report detailed version --- .../milvus_ivfsq8_test_report_detailed_version.md | 4 ---- .../milvus_ivfsq8_test_report_detailed_version_cn.md | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md b/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md index 4d6840430e..dc1bbb3fd0 100644 --- a/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md +++ b/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md @@ -177,8 +177,6 @@ search_resources: gpu0, gpu1 | nq/topk | topk=1 | topk=10 | topk=100 | | :-----: | :----: | :-----: | :------: | -| nq=1 | 1.000 | 0.800 | 0.790 | -| nq=5 | 0.800 | 0.820 | 0.908 | | nq=10 | 0.900 | 0.910 | 0.939 | | nq=200 | 0.955 | 0.941 | 0.929 | | nq=400 | 0.958 | 0.944 | 0.932 | @@ -194,8 +192,6 @@ search_resources: cpu, gpu0 | nq/topk | topk=1 | topk=10 | topk=100 | | :-----: | :----: | :-----: | :------: | -| nq=1 | 1.000 | 0.800 | 0.790 | -| nq=5 | 0.800 | 0.820 | 0.908 | | nq=10 | 0.900 | 0.910 | 0.939 | | nq=200 | 0.955 | 0.941 | 0.929 | | nq=400 | 0.958 | 0.944 | 0.932 | diff --git a/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md b/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md index 4b5e18f291..bf43d2e0e8 100644 --- a/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md +++ b/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md @@ -177,8 +177,6 @@ search_resources: gpu0, gpu1 | nq/topk | topk=1 | topk=10 | topk=100 | | :-----: | :----: | :-----: | :------: | -| nq=1 | 1.000 | 0.800 | 0.790 | -| nq=5 | 0.800 | 0.820 | 0.908 | | nq=10 | 0.900 | 0.910 | 0.939 | | nq=200 | 0.955 | 0.941 | 0.929 | | nq=400 | 0.958 | 0.944 | 0.932 | @@ -194,8 +192,6 @@ search_resources: cpu, gpu0 | nq/topk | topk=1 | topk=10 | topk=100 | | :-----: | :----: | :-----: | :------: | -| nq=1 | 1.000 | 0.800 | 0.790 | -| nq=5 | 0.800 | 0.820 | 0.908 | | nq=10 | 0.900 | 0.910 | 0.939 | | nq=200 | 0.955 | 0.941 | 0.929 | | nq=400 | 0.958 | 0.944 | 0.932 | @@ -207,5 +203,5 @@ search_resources: cpu, gpu0 **总结** -​ 随着nq的增大,召回率逐渐稳定至93%以上。 +随着nq的增大,召回率逐渐稳定至93%以上。 From 2707e07ccd36e47d1ff60d3be9e95832a6942b28 Mon Sep 17 00:00:00 2001 From: quicksilver Date: Tue, 19 Nov 2019 14:31:32 +0800 Subject: [PATCH 07/14] Fix Deployed GPU-version docker failed in CI --- ci/jenkins/step/deploySingle2Dev.groovy | 6 +----- ci/jenkins/step/singleDevNightlyTest.groovy | 6 +----- ci/jenkins/step/singleDevTest.groovy | 6 +----- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/ci/jenkins/step/deploySingle2Dev.groovy b/ci/jenkins/step/deploySingle2Dev.groovy index f1daaf22ec..7b479ff44a 100644 --- a/ci/jenkins/step/deploySingle2Dev.groovy +++ b/ci/jenkins/step/deploySingle2Dev.groovy @@ -3,11 +3,7 @@ sh 'helm repo update' dir ('milvus-helm') { checkout([$class: 'GitSCM', branches: [[name: "0.6.0"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.6.0:refs/remotes/origin/0.6.0"]]]) dir ("milvus") { - if ("${env.BINRARY_VERSION}" == "gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f gpu_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - } else { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f ci/filebeat/values.yaml --namespace milvus ." - } + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f ci/db_backend/sqlite_${env.BINRARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } diff --git a/ci/jenkins/step/singleDevNightlyTest.groovy b/ci/jenkins/step/singleDevNightlyTest.groovy index d357badfd3..e27bc6c0a5 100644 --- a/ci/jenkins/step/singleDevNightlyTest.groovy +++ b/ci/jenkins/step/singleDevNightlyTest.groovy @@ -13,11 +13,7 @@ timeout(time: 90, unit: 'MINUTES') { } dir ("milvus-helm") { dir ("milvus") { - if ("${env.BINRARY_VERSION}" == "gpu") { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f gpu_values.yaml -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - } else { - sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - } + sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f ci/db_backend/mysql_${env.BINRARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." } } dir ("tests/milvus_python_test") { diff --git a/ci/jenkins/step/singleDevTest.groovy b/ci/jenkins/step/singleDevTest.groovy index c1de5907b0..08812ae01f 100644 --- a/ci/jenkins/step/singleDevTest.groovy +++ b/ci/jenkins/step/singleDevTest.groovy @@ -14,11 +14,7 @@ timeout(time: 60, unit: 'MINUTES') { // } // dir ("milvus-helm") { // dir ("milvus") { - // if ("${env.BINRARY_VERSION}" == "gpu") { - // sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f gpu_values.yaml -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - // } else { - // sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f ci/db_backend/mysql_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." - // } + // sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION} -f ci/db_backend/mysql_${env.BINRARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ." // } // } // dir ("tests/milvus_python_test") { From 0d1923c61f53ea6ec77c98356e6c8301317d9b74 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Tue, 19 Nov 2019 15:49:40 +0800 Subject: [PATCH 08/14] #346 update gpu resource config APIs --- core/conf/server_cpu_config.template | 15 +- core/conf/server_gpu_config.template | 15 +- core/src/cache/GpuCacheMgr.cpp | 4 +- core/src/server/Config.cpp | 462 ++++++++++++++------------- core/src/server/Config.h | 87 +++-- core/src/utils/ValidationUtil.cpp | 4 +- core/src/utils/ValidationUtil.h | 4 +- core/unittest/server/test_config.cpp | 116 ++++--- 8 files changed, 357 insertions(+), 350 deletions(-) diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 6c95126390..ae942f351e 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -27,9 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer - cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: @@ -37,7 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only -resource_config: - search_resources: # define the device used for search computation - - cpu - index_build_device: cpu # CPU used for building index +gpu_resource_config: + enable_gpu: true # whether to enable GPU resources + cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer + search_resources: # define the GPU devices used for search computation, must be in format gpux + - gpu0 + build_index_resources: # define the GPU devices used for index building, must be in format gpux + - gpu0 \ No newline at end of file diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index 154db5d134..92a294d483 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -27,10 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer - cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] - gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer - gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: @@ -38,8 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only -resource_config: - search_resources: # define the devices used for search computation, must be in format: cpu or gpux - - cpu +gpu_resource_config: + enable_gpu: false # whether to enable GPU resources + cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer + search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 - index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux + build_index_resources: # define the GPU devices used for index building, must be in format gpux + - gpu0 \ No newline at end of file diff --git a/core/src/cache/GpuCacheMgr.cpp b/core/src/cache/GpuCacheMgr.cpp index d862bc0393..72229527fa 100644 --- a/core/src/cache/GpuCacheMgr.cpp +++ b/core/src/cache/GpuCacheMgr.cpp @@ -37,7 +37,7 @@ GpuCacheMgr::GpuCacheMgr() { Status s; int64_t gpu_cache_cap; - s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); + s = config.GetGpuResourceConfigCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } @@ -45,7 +45,7 @@ GpuCacheMgr::GpuCacheMgr() { cache_ = std::make_shared>(cap, 1UL << 32); float gpu_mem_threshold; - s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold); + s = config.GetGpuResourceConfigCacheThreshold(gpu_mem_threshold); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f130e73a85..017f1641bd 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -163,20 +163,6 @@ Config::ValidateConfig() { return s; } -#ifdef MILVUS_GPU_VERSION - int64_t cache_gpu_cache_capacity; - s = GetCacheConfigGpuCacheCapacity(cache_gpu_cache_capacity); - if (!s.ok()) { - return s; - } - - float cache_gpu_cache_threshold; - s = GetCacheConfigGpuCacheThreshold(cache_gpu_cache_threshold); - if (!s.ok()) { - return s; - } -#endif - bool cache_insert_data; s = GetCacheConfigCacheInsertData(cache_insert_data); if (!s.ok()) { @@ -202,25 +188,39 @@ Config::ValidateConfig() { return s; } - /* resource config */ - std::string resource_mode; - s = GetResourceConfigMode(resource_mode); + /* gpu resource config */ +#ifdef MILVUS_GPU_VERSION + bool resource_enable_gpu; + s = GetGpuResourceConfigEnableGpu(resource_enable_gpu); if (!s.ok()) { return s; } - std::vector search_resources; - s = GetResourceConfigSearchResources(search_resources); + int64_t resource_cache_capacity; + s = GetGpuResourceConfigCacheCapacity(resource_cache_capacity); if (!s.ok()) { return s; } - int32_t resource_index_build_device; - s = GetResourceConfigIndexBuildDevice(resource_index_build_device); + float resource_cache_threshold; + s = GetGpuResourceConfigCacheThreshold(resource_cache_threshold); if (!s.ok()) { return s; } + std::vector search_resources; + s = GetGpuResourceConfigSearchResources(search_resources); + if (!s.ok()) { + return s; + } + + std::vector index_build_resources; + s = GetGpuResourceConfigBuildIndexResources(index_build_resources); + if (!s.ok()) { + return s; + } +#endif + return Status::OK(); } @@ -307,18 +307,6 @@ Config::ResetDefaultConfig() { return s; } -#ifdef MILVUS_GPU_VERSION - s = SetCacheConfigGpuCacheCapacity(CONFIG_CACHE_GPU_CACHE_CAPACITY_DEFAULT); - if (!s.ok()) { - return s; - } - - s = SetCacheConfigGpuCacheThreshold(CONFIG_CACHE_GPU_CACHE_THRESHOLD_DEFAULT); - if (!s.ok()) { - return s; - } -#endif - s = SetCacheConfigCacheInsertData(CONFIG_CACHE_CACHE_INSERT_DATA_DEFAULT); if (!s.ok()) { return s; @@ -340,22 +328,34 @@ Config::ResetDefaultConfig() { return s; } - /* resource config */ - s = SetResourceConfigMode(CONFIG_RESOURCE_MODE_DEFAULT); + /* gpu resource config */ +#ifdef MILVUS_GPU_VERSION + s = SetGpuResourceConfigEnableGpu(CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT); if (!s.ok()) { return s; } - s = SetResourceConfigSearchResources(CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); + s = SetGpuResourceConfigCacheCapacity(CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT); if (!s.ok()) { return s; } - s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + s = SetGpuResourceConfigCacheThreshold(CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT); if (!s.ok()) { return s; } + s = SetGpuResourceConfigSearchResources(CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT); + if (!s.ok()) { + return s; + } + + s = SetGpuResourceConfigBuildIndexResources(CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT); + if (!s.ok()) { + return s; + } +#endif + return Status::OK(); } @@ -377,7 +377,7 @@ Config::PrintAll() { PrintConfigSection(CONFIG_CACHE); PrintConfigSection(CONFIG_METRIC); PrintConfigSection(CONFIG_ENGINE); - PrintConfigSection(CONFIG_RESOURCE); + PrintConfigSection(CONFIG_GPU_RESOURCE); } //////////////////////////////////////////////////////////////////////////////// @@ -591,52 +591,6 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { return Status::OK(); } -Status -Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { - if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else { - uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int device_id; - Status s = GetResourceConfigIndexBuildDevice(device_id); - if (!s.ok()) { - return s; - } - - size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); - return Status(SERVER_UNEXPECTED_ERROR, msg); - } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; - } - } - return Status::OK(); -} - -Status -Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { - if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - std::string msg = "Invalid gpu cache threshold: " + value + - ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else { - float gpu_cache_threshold = std::stof(value); - if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { - std::string msg = "Invalid gpu cache threshold: " + value + - ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } - } - return Status::OK(); -} - Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { @@ -687,56 +641,99 @@ Config::CheckEngineConfigGpuSearchThreshold(const std::string& value) { } Status -Config::CheckResourceConfigMode(const std::string& value) { - if (value != "simple") { - std::string msg = "Invalid resource mode: " + value + ". Possible reason: resource_config.mode is invalid."; +Config::CheckGpuResourceConfigEnableGpu(const std::string& value) { + if (!ValidationUtil::ValidateStringIsBool(value).ok()) { + std::string msg = "Invalid gpu resource config: " + value + + ". Possible reason: gpu_resource_config.enable_gpu is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } Status -CheckResource(const std::string& value) { +Config::CheckGpuResourceConfigCacheCapacity(const std::string& value) { + if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: gpu_resource_config.cache_capacity is not a positive integer."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else { + uint64_t gpu_cache_capacity = std::stoi(value) * GB; + std::vector gpu_ids; + Status s = GetGpuResourceConfigBuildIndexResources(gpu_ids); + if (!s.ok()) { + return s; + } + + for (int32_t gpu_id : gpu_ids) { + size_t gpu_memory; + if (!ValidationUtil::GetGpuMemory(gpu_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_id); + return Status(SERVER_UNEXPECTED_ERROR, msg); + } else if (gpu_cache_capacity >= gpu_memory) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: gpu_resource_config.cache_capacity exceeds GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + } + } + } + return Status::OK(); +} + +Status +Config::CheckGpuResourceConfigCacheThreshold(const std::string& value) { + if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { + std::string msg = "Invalid gpu cache threshold: " + value + + ". Possible reason: gpu_resource_config.cache_threshold is not in range (0.0, 1.0]."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else { + float gpu_cache_threshold = std::stof(value); + if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { + std::string msg = "Invalid gpu cache threshold: " + value + + ". Possible reason: gpu_resource_config.cache_threshold is not in range (0.0, 1.0]."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + } + return Status::OK(); +} + +Status +CheckGpuResource(const std::string& value) { std::string s = value; std::transform(s.begin(), s.end(), s.begin(), ::tolower); -#ifdef MILVUS_CPU_VERSION - if (s != "cpu") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid CPU resource: " + s); - } -#else - const std::regex pat("cpu|gpu(\\d+)"); + const std::regex pat("gpu(\\d+)"); std::smatch m; if (!std::regex_match(s, m, pat)) { - std::string msg = "Invalid search resource: " + value + - ". Possible reason: resource_config.search_resources is not in the format of cpux or gpux"; + std::string msg = "Invalid gpu resource: " + value + + ". Possible reason: gpu_resource_config is not in the format of cpux or gpux"; return Status(SERVER_INVALID_ARGUMENT, msg); } if (s.compare(0, 3, "gpu") == 0) { int32_t gpu_index = std::stoi(s.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { - std::string msg = "Invalid search resource: " + value + - ". Possible reason: resource_config.search_resources does not match your hardware."; + std::string msg = "Invalid gpu resource: " + value + + ". Possible reason: gpu_resource_config does not match with the hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } } -#endif return Status::OK(); } Status -Config::CheckResourceConfigSearchResources(const std::vector& value) { +Config::CheckGpuResourceConfigSearchResources(const std::vector& value) { if (value.empty()) { std::string msg = - "Invalid search resource. " - "Possible reason: resource_config.search_resources is empty."; + "Invalid gpu search resource. " + "Possible reason: gpu_resource_config.search_resources is empty."; return Status(SERVER_INVALID_ARGUMENT, msg); } for (auto& resource : value) { - auto status = CheckResource(resource); + auto status = CheckGpuResource(resource); if (!status.ok()) { return Status(SERVER_INVALID_ARGUMENT, status.message()); } @@ -745,10 +742,19 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { - auto status = CheckResource(value); - if (!status.ok()) { - return Status(SERVER_INVALID_ARGUMENT, status.message()); +Config::CheckGpuResourceConfigBuildIndexResources(const std::vector& value) { + if (value.empty()) { + std::string msg = + "Invalid gpu build index resource. " + "Possible reason: gpu_resource_config.build_index_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + + for (auto& resource : value) { + auto status = CheckGpuResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } } return Status::OK(); } @@ -855,7 +861,6 @@ Config::GetDBConfigArchiveDiskThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -868,7 +873,6 @@ Config::GetDBConfigArchiveDaysThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -880,7 +884,6 @@ Config::GetDBConfigInsertBufferSize(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -898,7 +901,6 @@ Config::GetMetricConfigEnableMonitor(bool& value) { if (!s.ok()) { return s; } - std::transform(str.begin(), str.end(), str.begin(), ::tolower); value = (str == "true" || str == "on" || str == "yes" || str == "1"); return Status::OK(); @@ -924,7 +926,6 @@ Config::GetCacheConfigCpuCacheCapacity(int64_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -937,33 +938,6 @@ Config::GetCacheConfigCpuCacheThreshold(float& value) { if (!s.ok()) { return s; } - - value = std::stof(str); - return Status::OK(); -} - -Status -Config::GetCacheConfigGpuCacheCapacity(int64_t& value) { - std::string str = - GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_CAPACITY, CONFIG_CACHE_GPU_CACHE_CAPACITY_DEFAULT); - Status s = CheckCacheConfigGpuCacheCapacity(str); - if (!s.ok()) { - return s; - } - - value = std::stoi(str); - return Status::OK(); -} - -Status -Config::GetCacheConfigGpuCacheThreshold(float& value) { - std::string str = - GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_THRESHOLD, CONFIG_CACHE_GPU_CACHE_THRESHOLD_DEFAULT); - Status s = CheckCacheConfigGpuCacheThreshold(str); - if (!s.ok()) { - return s; - } - value = std::stof(str); return Status::OK(); } @@ -976,7 +950,6 @@ Config::GetCacheConfigCacheInsertData(bool& value) { if (!s.ok()) { return s; } - std::transform(str.begin(), str.end(), str.begin(), ::tolower); value = (str == "true" || str == "on" || str == "yes" || str == "1"); return Status::OK(); @@ -990,7 +963,6 @@ Config::GetEngineConfigUseBlasThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -1002,7 +974,6 @@ Config::GetEngineConfigOmpThreadNum(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -1015,41 +986,113 @@ Config::GetEngineConfigGpuSearchThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } Status -Config::GetResourceConfigMode(std::string& value) { - value = GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_MODE, CONFIG_RESOURCE_MODE_DEFAULT); - return CheckResourceConfigMode(value); -} - -Status -Config::GetResourceConfigSearchResources(std::vector& value) { +Config::GetGpuResourceConfigEnableGpu(bool& value) { std::string str = - GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, - CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); - server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value); - return CheckResourceConfigSearchResources(value); -} - -Status -Config::GetResourceConfigIndexBuildDevice(int32_t& value) { - std::string str = - GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - Status s = CheckResourceConfigIndexBuildDevice(str); + GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE_GPU, CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT); + Status s = CheckGpuResourceConfigEnableGpu(str); if (!s.ok()) { return s; } + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + value = (str == "true" || str == "on" || str == "yes" || str == "1"); + return Status::OK(); +} - if (str == "cpu") { - value = CPU_DEVICE_ID; - } else { - value = std::stoi(str.substr(3)); +Status +Config::GetGpuResourceConfigCacheCapacity(int64_t& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, + CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT); + s = CheckGpuResourceConfigCacheCapacity(str); + if (!s.ok()) { + return s; + } + value = std::stoi(str); + return Status::OK(); +} +Status +Config::GetGpuResourceConfigCacheThreshold(float& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; + } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_THRESHOLD, + CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT); + s = CheckGpuResourceConfigCacheThreshold(str); + if (!s.ok()) { + return s; + } + value = std::stof(str); + return Status::OK(); +} + +Status +Config::GetGpuResourceConfigSearchResources(std::vector& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; + } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = GetConfigSequenceStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES, + CONFIG_GPU_RESOURCE_DELIMITER, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT); + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + s = CheckGpuResourceConfigSearchResources(res_vec); + if (!s.ok()) { + return s; + } + for (std::string& res : res_vec) { + value.push_back(std::stoi(res.substr(3))); + } + return Status::OK(); +} + +Status +Config::GetGpuResourceConfigBuildIndexResources(std::vector& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; + } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = + GetConfigSequenceStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, + CONFIG_GPU_RESOURCE_DELIMITER, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT); + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + s = CheckGpuResourceConfigBuildIndexResources(res_vec); + if (!s.ok()) { + return s; + } + for (std::string& res : res_vec) { + value.push_back(std::stoi(res.substr(3))); + } return Status::OK(); } @@ -1061,7 +1104,6 @@ Config::SetServerConfigAddress(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_ADDRESS, value); return Status::OK(); } @@ -1072,7 +1114,6 @@ Config::SetServerConfigPort(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_PORT, value); return Status::OK(); } @@ -1083,7 +1124,6 @@ Config::SetServerConfigDeployMode(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_DEPLOY_MODE, value); return Status::OK(); } @@ -1094,7 +1134,6 @@ Config::SetServerConfigTimeZone(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_TIME_ZONE, value); return Status::OK(); } @@ -1106,7 +1145,6 @@ Config::SetDBConfigPrimaryPath(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_PRIMARY_PATH, value); return Status::OK(); } @@ -1117,7 +1155,6 @@ Config::SetDBConfigSecondaryPath(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_SECONDARY_PATH, value); return Status::OK(); } @@ -1128,7 +1165,6 @@ Config::SetDBConfigBackendUrl(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_BACKEND_URL, value); return Status::OK(); } @@ -1139,7 +1175,6 @@ Config::SetDBConfigArchiveDiskThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_ARCHIVE_DISK_THRESHOLD, value); return Status::OK(); } @@ -1150,7 +1185,6 @@ Config::SetDBConfigArchiveDaysThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_ARCHIVE_DAYS_THRESHOLD, value); return Status::OK(); } @@ -1161,7 +1195,6 @@ Config::SetDBConfigInsertBufferSize(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_INSERT_BUFFER_SIZE, value); return Status::OK(); } @@ -1173,7 +1206,6 @@ Config::SetMetricConfigEnableMonitor(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_ENABLE_MONITOR, value); return Status::OK(); } @@ -1184,7 +1216,6 @@ Config::SetMetricConfigCollector(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_COLLECTOR, value); return Status::OK(); } @@ -1195,7 +1226,6 @@ Config::SetMetricConfigPrometheusPort(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_PORT, value); return Status::OK(); } @@ -1207,7 +1237,6 @@ Config::SetCacheConfigCpuCacheCapacity(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_CAPACITY, value); return Status::OK(); } @@ -1218,40 +1247,16 @@ Config::SetCacheConfigCpuCacheThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_THRESHOLD, value); return Status::OK(); } -Status -Config::SetCacheConfigGpuCacheCapacity(const std::string& value) { - Status s = CheckCacheConfigGpuCacheCapacity(value); - if (!s.ok()) { - return s; - } - - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_CAPACITY, value); - return Status::OK(); -} - -Status -Config::SetCacheConfigGpuCacheThreshold(const std::string& value) { - Status s = CheckCacheConfigGpuCacheThreshold(value); - if (!s.ok()) { - return s; - } - - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_THRESHOLD, value); - return Status::OK(); -} - Status Config::SetCacheConfigCacheInsertData(const std::string& value) { Status s = CheckCacheConfigCacheInsertData(value); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CACHE_INSERT_DATA, value); return Status::OK(); } @@ -1263,7 +1268,6 @@ Config::SetEngineConfigUseBlasThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_USE_BLAS_THRESHOLD, value); return Status::OK(); } @@ -1274,7 +1278,6 @@ Config::SetEngineConfigOmpThreadNum(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_OMP_THREAD_NUM, value); return Status::OK(); } @@ -1285,45 +1288,62 @@ Config::SetEngineConfigGpuSearchThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, value); return Status::OK(); } -/* resource config */ +/* gpu resource config */ Status -Config::SetResourceConfigMode(const std::string& value) { - Status s = CheckResourceConfigMode(value); +Config::SetGpuResourceConfigEnableGpu(const std::string& value) { + Status s = CheckGpuResourceConfigEnableGpu(value); if (!s.ok()) { return s; } - - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_MODE, value); + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE_GPU, value); return Status::OK(); } Status -Config::SetResourceConfigSearchResources(const std::string& value) { +Config::SetGpuResourceConfigCacheCapacity(const std::string& value) { + Status s = CheckGpuResourceConfigCacheCapacity(value); + if (!s.ok()) { + return s; + } + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, value); + return Status::OK(); +} + +Status +Config::SetGpuResourceConfigCacheThreshold(const std::string& value) { + Status s = CheckGpuResourceConfigCacheThreshold(value); + if (!s.ok()) { + return s; + } + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_THRESHOLD, value); + return Status::OK(); +} + +Status +Config::SetGpuResourceConfigSearchResources(const std::string& value) { std::vector res_vec; - server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec); - - Status s = CheckResourceConfigSearchResources(res_vec); + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + Status s = CheckGpuResourceConfigSearchResources(res_vec); if (!s.ok()) { return s; } - - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, value); + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES, value); return Status::OK(); } Status -Config::SetResourceConfigIndexBuildDevice(const std::string& value) { - Status s = CheckResourceConfigIndexBuildDevice(value); +Config::SetGpuResourceConfigBuildIndexResources(const std::string& value) { + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + Status s = CheckGpuResourceConfigBuildIndexResources(res_vec); if (!s.ok()) { return s; } - - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value); + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3ab0cd8053..0442ae0626 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -59,12 +59,8 @@ static const char* CONFIG_DB_PRELOAD_TABLE = "preload_table"; static const char* CONFIG_CACHE = "cache_config"; static const char* CONFIG_CACHE_CPU_CACHE_CAPACITY = "cpu_cache_capacity"; static const char* CONFIG_CACHE_CPU_CACHE_CAPACITY_DEFAULT = "16"; -static const char* CONFIG_CACHE_GPU_CACHE_CAPACITY = "gpu_cache_capacity"; -static const char* CONFIG_CACHE_GPU_CACHE_CAPACITY_DEFAULT = "4"; -static const char* CONFIG_CACHE_CPU_CACHE_THRESHOLD = "cpu_mem_threshold"; +static const char* CONFIG_CACHE_CPU_CACHE_THRESHOLD = "cpu_cache_threshold"; static const char* CONFIG_CACHE_CPU_CACHE_THRESHOLD_DEFAULT = "0.85"; -static const char* CONFIG_CACHE_GPU_CACHE_THRESHOLD = "gpu_mem_threshold"; -static const char* CONFIG_CACHE_GPU_CACHE_THRESHOLD_DEFAULT = "0.85"; static const char* CONFIG_CACHE_CACHE_INSERT_DATA = "cache_insert_data"; static const char* CONFIG_CACHE_CACHE_INSERT_DATA_DEFAULT = "false"; @@ -87,26 +83,23 @@ static const char* CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT = "0"; static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD = "gpu_search_threshold"; static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; -/* resource config */ -static const char* CONFIG_RESOURCE = "resource_config"; -static const char* CONFIG_RESOURCE_MODE = "mode"; -static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ","; - -#ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu"; +/* gpu resource config */ +static const char* CONFIG_GPU_RESOURCE = "gpu_resource_config"; +static const char* CONFIG_GPU_RESOURCE_ENABLE_GPU = "enable_gpu"; +#ifdef MILVUS_GPU_VERSION +static const char* CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT = "true"; #else -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; +static const char* CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT = "false"; #endif - -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; -#ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; -#else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; -#endif -const int32_t CPU_DEVICE_ID = -1; +static const char* CONFIG_GPU_RESOURCE_CACHE_CAPACITY = "cache_capacity"; +static const char* CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT = "4"; +static const char* CONFIG_GPU_RESOURCE_CACHE_THRESHOLD = "cache_threshold"; +static const char* CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT = "0.85"; +static const char* CONFIG_GPU_RESOURCE_DELIMITER = ","; +static const char* CONFIG_GPU_RESOURCE_SEARCH_RESOURCES = "search_resources"; +static const char* CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT = "gpu0"; +static const char* CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES = "build_index_resources"; +static const char* CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT = "gpu0"; class Config { public: @@ -170,10 +163,6 @@ class Config { Status CheckCacheConfigCpuCacheThreshold(const std::string& value); Status - CheckCacheConfigGpuCacheCapacity(const std::string& value); - Status - CheckCacheConfigGpuCacheThreshold(const std::string& value); - Status CheckCacheConfigCacheInsertData(const std::string& value); /* engine config */ @@ -184,13 +173,17 @@ class Config { Status CheckEngineConfigGpuSearchThreshold(const std::string& value); - /* resource config */ + /* gpu resource config */ Status - CheckResourceConfigMode(const std::string& value); + CheckGpuResourceConfigEnableGpu(const std::string& value); Status - CheckResourceConfigSearchResources(const std::vector& value); + CheckGpuResourceConfigCacheCapacity(const std::string& value); Status - CheckResourceConfigIndexBuildDevice(const std::string& value); + CheckGpuResourceConfigCacheThreshold(const std::string& value); + Status + CheckGpuResourceConfigSearchResources(const std::vector& value); + Status + CheckGpuResourceConfigBuildIndexResources(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -239,10 +232,6 @@ class Config { Status GetCacheConfigCpuCacheThreshold(float& value); Status - GetCacheConfigGpuCacheCapacity(int64_t& value); - Status - GetCacheConfigGpuCacheThreshold(float& value); - Status GetCacheConfigCacheInsertData(bool& value); /* engine config */ @@ -253,13 +242,17 @@ class Config { Status GetEngineConfigGpuSearchThreshold(int32_t& value); - /* resource config */ + /* gpu resource config */ Status - GetResourceConfigMode(std::string& value); + GetGpuResourceConfigEnableGpu(bool& value); Status - GetResourceConfigSearchResources(std::vector& value); + GetGpuResourceConfigCacheCapacity(int64_t& value); Status - GetResourceConfigIndexBuildDevice(int32_t& value); + GetGpuResourceConfigCacheThreshold(float& value); + Status + GetGpuResourceConfigSearchResources(std::vector& value); + Status + GetGpuResourceConfigBuildIndexResources(std::vector& value); public: /* server config */ @@ -300,10 +293,6 @@ class Config { Status SetCacheConfigCpuCacheThreshold(const std::string& value); Status - SetCacheConfigGpuCacheCapacity(const std::string& value); - Status - SetCacheConfigGpuCacheThreshold(const std::string& value); - Status SetCacheConfigCacheInsertData(const std::string& value); /* engine config */ @@ -314,13 +303,17 @@ class Config { Status SetEngineConfigGpuSearchThreshold(const std::string& value); - /* resource config */ + /* gpu resource config */ Status - SetResourceConfigMode(const std::string& value); + SetGpuResourceConfigEnableGpu(const std::string& value); Status - SetResourceConfigSearchResources(const std::string& value); + SetGpuResourceConfigCacheCapacity(const std::string& value); Status - SetResourceConfigIndexBuildDevice(const std::string& value); + SetGpuResourceConfigCacheThreshold(const std::string& value); + Status + SetGpuResourceConfigSearchResources(const std::string& value); + Status + SetGpuResourceConfigBuildIndexResources(const std::string& value); private: std::unordered_map> config_map_; diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index ec696ff3e0..080de77e17 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector& partition_ } Status -ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { +ValidationUtil::ValidateGpuIndex(int32_t gpu_index) { #ifdef MILVUS_GPU_VERSION int num_devices = 0; auto cuda_err = cudaGetDeviceCount(&num_devices); @@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { } Status -ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) { +ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) { #ifdef MILVUS_GPU_VERSION cudaDeviceProp deviceProp; diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 01801e295a..201ccef3bd 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -59,10 +59,10 @@ class ValidationUtil { ValidatePartitionTags(const std::vector& partition_tags); static Status - ValidateGpuIndex(uint32_t gpu_index); + ValidateGpuIndex(int32_t gpu_index); static Status - GetGpuMemory(uint32_t gpu_index, size_t& memory); + GetGpuMemory(int32_t gpu_index, size_t& memory); static Status ValidateIpAddress(const std::string& ip_address); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 637273732d..387d321b85 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -216,21 +216,6 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { s = config.GetCacheConfigCpuCacheThreshold(float_val); ASSERT_TRUE(float_val == cache_cpu_cache_threshold); -#ifdef MILVUS_GPU_VERSION - int64_t cache_gpu_cache_capacity = 1; - s = config.SetCacheConfigGpuCacheCapacity(std::to_string(cache_gpu_cache_capacity)); - ASSERT_TRUE(s.ok()); - s = config.GetCacheConfigGpuCacheCapacity(int64_val); - ASSERT_TRUE(s.ok()); - ASSERT_TRUE(int64_val == cache_gpu_cache_capacity); - - float cache_gpu_cache_threshold = 0.2; - s = config.SetCacheConfigGpuCacheThreshold(std::to_string(cache_gpu_cache_threshold)); - ASSERT_TRUE(s.ok()); - s = config.GetCacheConfigGpuCacheThreshold(float_val); - ASSERT_TRUE(float_val == cache_gpu_cache_threshold); -#endif - bool cache_insert_data = true; s = config.SetCacheConfigCacheInsertData(std::to_string(cache_insert_data)); ASSERT_TRUE(s.ok()); @@ -259,42 +244,54 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { ASSERT_TRUE(s.ok()); ASSERT_TRUE(int32_val == engine_gpu_search_threshold); - /* resource config */ - std::string resource_mode = "simple"; - s = config.SetResourceConfigMode(resource_mode); + /* gpu resource config */ + bool resource_enable_gpu = true; + s = config.SetGpuResourceConfigEnableGpu(std::to_string(resource_enable_gpu)); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigMode(str_val); + s = config.GetGpuResourceConfigEnableGpu(bool_val); ASSERT_TRUE(s.ok()); - ASSERT_TRUE(str_val == resource_mode); + ASSERT_TRUE(bool_val == resource_enable_gpu); -#ifdef MILVUS_CPU_VERSION - std::vector search_resources = {"cpu"}; -#else - std::vector search_resources = {"cpu", "gpu0"}; -#endif - std::vector res_vec; - std::string res_str; - milvus::server::StringHelpFunctions::MergeStringWithDelimeter( - search_resources, milvus::server::CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_str); - s = config.SetResourceConfigSearchResources(res_str); +#ifdef MILVUS_GPU_VERSION + int64_t gpu_cache_capacity = 1; + s = config.SetGpuResourceConfigCacheCapacity(std::to_string(gpu_cache_capacity)); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigSearchResources(res_vec); + s = config.GetGpuResourceConfigCacheCapacity(int64_val); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(int64_val == gpu_cache_capacity); + + float gpu_cache_threshold = 0.2; + s = config.SetGpuResourceConfigCacheThreshold(std::to_string(gpu_cache_threshold)); + ASSERT_TRUE(s.ok()); + s = config.GetGpuResourceConfigCacheThreshold(float_val); + ASSERT_TRUE(float_val == gpu_cache_threshold); + + std::vector search_resources = {"gpu0"}; + std::vector search_res_vec; + std::string search_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + search_resources, milvus::server::CONFIG_GPU_RESOURCE_DELIMITER, search_res_str); + s = config.SetGpuResourceConfigSearchResources(search_res_str); + ASSERT_TRUE(s.ok()); + s = config.GetGpuResourceConfigSearchResources(search_res_vec); ASSERT_TRUE(s.ok()); for (size_t i = 0; i < search_resources.size(); i++) { - ASSERT_TRUE(search_resources[i] == res_vec[i]); + ASSERT_TRUE(std::stoi(search_resources[i].substr(3)) == search_res_vec[i]); } -#ifdef MILVUS_CPU_VERSION - int32_t resource_index_build_device = milvus::server::CPU_DEVICE_ID; - s = config.SetResourceConfigIndexBuildDevice("cpu"); -#else - int32_t resource_index_build_device = 0; - s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); + std::vector build_index_resources = {"gpu0"}; + std::vector build_index_res_vec; + std::string build_index_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + build_index_resources, milvus::server::CONFIG_GPU_RESOURCE_DELIMITER, build_index_res_str); + s = config.SetGpuResourceConfigBuildIndexResources(build_index_res_str); + ASSERT_TRUE(s.ok()); + s = config.GetGpuResourceConfigBuildIndexResources(build_index_res_vec); + ASSERT_TRUE(s.ok()); + for (size_t i = 0; i < build_index_resources.size(); i++) { + ASSERT_TRUE(std::stoi(build_index_resources[i].substr(3)) == build_index_res_vec[i]); + } #endif - ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigIndexBuildDevice(int32_val); - ASSERT_TRUE(s.ok()); - ASSERT_TRUE(int32_val == resource_index_build_device); } TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { @@ -381,18 +378,6 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetCacheConfigCpuCacheThreshold("1.0"); ASSERT_FALSE(s.ok()); -#ifdef MILVUS_GPU_VERSION - s = config.SetCacheConfigGpuCacheCapacity("a"); - ASSERT_FALSE(s.ok()); - s = config.SetCacheConfigGpuCacheCapacity("128"); - ASSERT_FALSE(s.ok()); - - s = config.SetCacheConfigGpuCacheThreshold("a"); - ASSERT_FALSE(s.ok()); - s = config.SetCacheConfigGpuCacheThreshold("1.0"); - ASSERT_FALSE(s.ok()); -#endif - s = config.SetCacheConfigCacheInsertData("N"); ASSERT_FALSE(s.ok()); @@ -408,20 +393,29 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetEngineConfigGpuSearchThreshold("-1"); ASSERT_FALSE(s.ok()); - /* resource config */ - s = config.SetResourceConfigMode("default"); + /* gpu resource config */ + s = config.SetGpuResourceConfigEnableGpu("ok"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigSearchResources("gpu10"); +#ifdef MILVUS_GPU_VERSION + s = config.SetGpuResourceConfigCacheCapacity("a"); + ASSERT_FALSE(s.ok()); + s = config.SetGpuResourceConfigCacheCapacity("128"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigSearchResources("cpu"); - ASSERT_TRUE(s.ok()); + s = config.SetGpuResourceConfigCacheThreshold("a"); + ASSERT_FALSE(s.ok()); + s = config.SetGpuResourceConfigCacheThreshold("1.0"); + ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gup2"); + s = config.SetGpuResourceConfigSearchResources("gpu10"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gpu16"); + + s = config.SetGpuResourceConfigBuildIndexResources("gup2"); ASSERT_FALSE(s.ok()); + s = config.SetGpuResourceConfigBuildIndexResources("gpu16"); + ASSERT_FALSE(s.ok()); +#endif } TEST_F(ConfigTest, SERVER_CONFIG_TEST) { From 9f063aa2893e6009d7b89d1fad75ca61c24dbba5 Mon Sep 17 00:00:00 2001 From: wxyu Date: Tue, 19 Nov 2019 16:53:04 +0800 Subject: [PATCH 09/14] Add a Fallback pass in optimizer close #409 add Fallback pass condition --- CHANGELOG.md | 1 + core/src/scheduler/SchedInst.h | 2 + core/src/scheduler/optimizer/FallbackPass.cpp | 43 +++++++++++++++++++ core/src/scheduler/optimizer/FallbackPass.h | 40 +++++++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 core/src/scheduler/optimizer/FallbackPass.cpp create mode 100644 core/src/scheduler/optimizer/FallbackPass.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 98bab925da..ced0633cdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#358 - Add more information in build.sh and install.md - \#255 - Add ivfsq8 test report detailed version - \#404 - Add virtual method Init() in Pass abstract class +- \#409 - Add a Fallback pass in optimizer ## Task diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index a3048069f9..ad02d32081 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -21,6 +21,7 @@ #include "JobMgr.h" #include "ResourceMgr.h" #include "Scheduler.h" +#include "optimizer/FallbackPass.h" #include "optimizer/HybridPass.h" #include "optimizer/LargeSQ8HPass.h" #include "optimizer/OnlyCPUPass.h" @@ -112,6 +113,7 @@ class OptimizerInst { pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared(has_cpu)); + pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); } } diff --git a/core/src/scheduler/optimizer/FallbackPass.cpp b/core/src/scheduler/optimizer/FallbackPass.cpp new file mode 100644 index 0000000000..2e275ede4b --- /dev/null +++ b/core/src/scheduler/optimizer/FallbackPass.cpp @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/FallbackPass.h" +#include "scheduler/SchedInst.h" +#include "scheduler/tasklabel/SpecResLabel.h" + +namespace milvus { +namespace scheduler { + +void +FallbackPass::Init() { +} + +bool +FallbackPass::Run(const TaskPtr& task) { + auto task_type = task->Type(); + if (task_type != TaskType::SearchTask && task_type != TaskType::BuildIndexTask) { + return false; + } + // NEVER be empty + auto cpu = ResMgrInst::GetInstance()->GetCpuResources()[0]; + auto label = std::make_shared(cpu); + task->label() = label; + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/FallbackPass.h b/core/src/scheduler/optimizer/FallbackPass.h new file mode 100644 index 0000000000..728740d53a --- /dev/null +++ b/core/src/scheduler/optimizer/FallbackPass.h @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class FallbackPass : public Pass { + public: + FallbackPass() = default; + + public: + void + Init() override; + + bool + Run(const TaskPtr& task) override; +}; + +} // namespace scheduler +} // namespace milvus From 0011765b2db28ca0550a5f1ae31c1299a3cc7799 Mon Sep 17 00:00:00 2001 From: Yukikaze-CZR Date: Tue, 19 Nov 2019 17:30:29 +0800 Subject: [PATCH 10/14] Change ivfsq8 test report detailed version #255 --- .../milvus_ivfsq8_test_report_detailed_version.md | 8 ++++---- .../milvus_ivfsq8_test_report_detailed_version_cn.md | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md b/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md index dc1bbb3fd0..7bea764df0 100644 --- a/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md +++ b/docs/test_report/milvus_ivfsq8_test_report_detailed_version.md @@ -28,13 +28,13 @@ Memory: 503GB Docker version: 18.09 -Nvidia Driver version: 430.34 +NVIDIA Driver version: 430.34 Milvus version: 0.5.3 SDK interface: Python 3.6.8 -Pymilvus version: 0.2.5 +pymilvus version: 0.2.5 @@ -66,7 +66,7 @@ For details on this dataset, you can check : http://corpus-texmex.irisa.fr/ . > Note: In the query test of recall, we will test the following parameters with different values: > - > nq - grouped by: [1, 5, 10, 200, 400, 600, 800, 1000], + > nq - grouped by: [10, 200, 400, 600, 800, 1000], > > topk - grouped by: [1, 10, 100] @@ -93,7 +93,7 @@ Milvus configuration - gpu_cache_capacity: 6 - use_blas_threshold: 1100 -You can check the definition of Milvus configuration on https://milvus.io/docs/en/reference/milvus_config/. +The definitions of Milvus configuration are on https://milvus.io/docs/en/reference/milvus_config/. Test method diff --git a/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md b/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md index bf43d2e0e8..a6e5e75ea4 100644 --- a/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md +++ b/docs/test_report/milvus_ivfsq8_test_report_detailed_version_cn.md @@ -2,7 +2,7 @@ ## 概述 -本文描述了ivfsq8索引在milvus单机部署方式下的测试报告。 +本文描述了ivfsq8索引在milvus单机部署方式下的测试结果。 @@ -28,13 +28,13 @@ GPU1: GeForce GTX 1080 Docker版本: 18.09 -Nvidia Driver版本: 430.34 +NVIDIA Driver版本: 430.34 Milvus版本: 0.5.3 SDK接口: Python 3.6.8 -Pymilvus版本: 0.2.5 +pymilvus版本: 0.2.5 @@ -66,7 +66,7 @@ Pymilvus版本: 0.2.5 > 备注:在向量准确性测试中,我们会测试下面参数不同的取值来观察结果: > - > 被查询向量的数量nq将按照 [1, 5, 10, 200, 400, 600, 800, 1000]的数量分组, + > 被查询向量的数量nq将按照 [10, 200, 400, 600, 800, 1000]的数量分组, > > 单条查询中最相似的K个结果topk将按照[1, 10, 100]的数量分组。 @@ -93,7 +93,7 @@ Milvus设置: - gpu_cache_capacity: 6 - use_blas_threshold: 1100 -你可以在 https://milvus.io/docs/en/reference/milvus_config/上查询Milvus设置的详细定义。 +Milvus设置的详细定义可以参考 https://milvus.io/docs/en/reference/milvus_config/ 。 测试方法 From c451f8cbf2b66c45759f7ca7bba15212343e2a91 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 19:40:17 +0800 Subject: [PATCH 11/14] clange format and add FallbackPass --- core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h | 1 - core/src/scheduler/SchedInst.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index 4bb4092b75..e064b6f08c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -47,7 +47,6 @@ class IVF : public VectorIndex, public FaissBaseIndex { void set_index_model(IndexModelPtr model) override; - void Add(const DatasetPtr& dataset, const Config& config) override; diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 48eaffc0a5..44fea1b6d3 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -104,14 +104,13 @@ class OptimizerInst { pass_list.push_back(std::make_shared()); #ifdef MILVUS_CPU_VERSION pass_list.push_back(std::make_shared()); -<<<<<<< HEAD #else server::Config& config = server::Config::GetInstance(); std::vector build_resources; config.GetGpuResourceConfigBuildIndexResources(build_resources); pass_list.push_back(std::make_shared(build_resources)); #endif -// pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); } } From fdb7decdd3debbeae3d74cd7fd5ec608b319ca19 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 20:30:43 +0800 Subject: [PATCH 12/14] Remove DefaultLabel --- core/src/scheduler/JobMgr.cpp | 8 +-- core/src/scheduler/JobMgr.h | 5 +- core/src/scheduler/Scheduler.cpp | 4 -- core/src/scheduler/TaskCreator.cpp | 9 +-- core/src/scheduler/Utils.cpp | 1 - core/src/scheduler/action/Action.h | 4 -- .../scheduler/action/PushTaskToNeighbour.cpp | 32 --------- core/src/scheduler/tasklabel/DefaultLabel.h | 36 ---------- core/src/scheduler/tasklabel/TaskLabel.h | 1 - core/unittest/scheduler/CMakeLists.txt | 1 - core/unittest/scheduler/test_normal.cpp | 72 ------------------- core/unittest/scheduler/test_resource.cpp | 18 +++-- core/unittest/scheduler/test_resource_mgr.cpp | 4 +- core/unittest/scheduler/test_scheduler.cpp | 41 ----------- core/unittest/scheduler/test_tasktable.cpp | 9 +-- 15 files changed, 26 insertions(+), 219 deletions(-) delete mode 100644 core/src/scheduler/tasklabel/DefaultLabel.h delete mode 100644 core/unittest/scheduler/test_normal.cpp diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 8dd095a3fa..76c07fe459 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -85,7 +85,7 @@ JobMgr::worker_function() { } for (auto& task : tasks) { - calculate_path(task); + calculate_path(res_mgr_, task); } // disk resources NEVER be empty. @@ -103,7 +103,7 @@ JobMgr::build_task(const JobPtr& job) { } void -JobMgr::calculate_path(const TaskPtr& task) { +JobMgr::calculate_path(const ResourceMgrPtr& res_mgr, const TaskPtr& task) { if (task->type_ != TaskType::SearchTask && task->type_ != TaskType::BuildIndexTask) { return; } @@ -114,9 +114,9 @@ JobMgr::calculate_path(const TaskPtr& task) { std::vector path; auto spec_label = std::static_pointer_cast(task->label()); - auto src = res_mgr_->GetDiskResources()[0]; + auto src = res_mgr->GetDiskResources()[0]; auto dest = spec_label->resource(); - ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + ShortestPath(src.lock(), dest.lock(), res_mgr, path); task->path() = Path(path, path.size() - 1); } diff --git a/core/src/scheduler/JobMgr.h b/core/src/scheduler/JobMgr.h index fbd6c0ee45..af072614b5 100644 --- a/core/src/scheduler/JobMgr.h +++ b/core/src/scheduler/JobMgr.h @@ -59,8 +59,9 @@ class JobMgr : public interface::dumpable { static std::vector build_task(const JobPtr& job); - void - calculate_path(const TaskPtr& task); + public: + static void + calculate_path(const ResourceMgrPtr& res_mgr, const TaskPtr& task); private: bool running_ = false; diff --git a/core/src/scheduler/Scheduler.cpp b/core/src/scheduler/Scheduler.cpp index 8d2d4406f8..68d7457aa9 100644 --- a/core/src/scheduler/Scheduler.cpp +++ b/core/src/scheduler/Scheduler.cpp @@ -108,10 +108,6 @@ Scheduler::OnLoadCompleted(const EventPtr& event) { auto task_table_type = load_completed_event->task_table_item_->task->label()->Type(); switch (task_table_type) { - case TaskLabelType::DEFAULT: { - Action::DefaultLabelTaskScheduler(res_mgr_, resource, load_completed_event); - break; - } case TaskLabelType::SPECIFIED_RESOURCE: { Action::SpecifiedResourceLabelTaskScheduler(res_mgr_, resource, load_completed_event); break; diff --git a/core/src/scheduler/TaskCreator.cpp b/core/src/scheduler/TaskCreator.cpp index 3b7f6fd82e..30b76cc5bb 100644 --- a/core/src/scheduler/TaskCreator.cpp +++ b/core/src/scheduler/TaskCreator.cpp @@ -18,7 +18,6 @@ #include "scheduler/TaskCreator.h" #include "SchedInst.h" #include "tasklabel/BroadcastLabel.h" -#include "tasklabel/DefaultLabel.h" #include "tasklabel/SpecResLabel.h" namespace milvus { @@ -47,8 +46,7 @@ std::vector TaskCreator::Create(const SearchJobPtr& job) { std::vector tasks; for (auto& index_file : job->index_files()) { - auto label = std::make_shared(); - auto task = std::make_shared(index_file.second, label); + auto task = std::make_shared(index_file.second, nullptr); task->job_ = job; tasks.emplace_back(task); } @@ -70,11 +68,8 @@ TaskCreator::Create(const DeleteJobPtr& job) { std::vector TaskCreator::Create(const BuildIndexJobPtr& job) { std::vector tasks; - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); - for (auto& to_index_file : job->to_index_files()) { - auto label = std::make_shared(std::weak_ptr(res_ptr)); - auto task = std::make_shared(to_index_file.second, label); + auto task = std::make_shared(to_index_file.second, nullptr); task->job_ = job; tasks.emplace_back(task); } diff --git a/core/src/scheduler/Utils.cpp b/core/src/scheduler/Utils.cpp index 1be04ba80b..527b9adf03 100644 --- a/core/src/scheduler/Utils.cpp +++ b/core/src/scheduler/Utils.cpp @@ -16,7 +16,6 @@ // under the License. #include "scheduler/Utils.h" -#include "server/Config.h" #ifdef MILVUS_GPU_VERSION #include diff --git a/core/src/scheduler/action/Action.h b/core/src/scheduler/action/Action.h index f5f828cbf6..7391d287a8 100644 --- a/core/src/scheduler/action/Action.h +++ b/core/src/scheduler/action/Action.h @@ -36,10 +36,6 @@ class Action { static void PushTaskToResource(TaskTableItemPtr task_item, const ResourcePtr& dest); - static void - DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, - std::shared_ptr event); - static void SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event); diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index 9aed678937..f49f1d871f 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -101,38 +101,6 @@ Action::PushTaskToResource(TaskTableItemPtr task_item, const ResourcePtr& dest) dest->task_table().Put(task_item->task, task_item); } -void -Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, - std::shared_ptr event) { - if (not resource->HasExecutor() && event->task_table_item_->Move()) { - auto task_item = event->task_table_item_; - auto task = event->task_table_item_->task; - auto search_task = std::static_pointer_cast(task); - bool moved = false; - - // to support test task, REFACTOR - if (resource->type() == ResourceType::CPU) { - if (auto index_engine = search_task->index_engine_) { - auto location = index_engine->GetLocation(); - - for (auto i = 0; i < res_mgr->GetNumGpuResource(); ++i) { - auto index = milvus::cache::GpuCacheMgr::GetInstance(i)->GetIndex(location); - if (index != nullptr) { - moved = true; - auto dest_resource = res_mgr->GetResource(ResourceType::GPU, i); - PushTaskToResource(event->task_table_item_, dest_resource); - break; - } - } - } - } - - if (not moved) { - PushTaskToNeighbourRandomly(task_item, resource); - } - } -} - void Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { diff --git a/core/src/scheduler/tasklabel/DefaultLabel.h b/core/src/scheduler/tasklabel/DefaultLabel.h deleted file mode 100644 index c215743575..0000000000 --- a/core/src/scheduler/tasklabel/DefaultLabel.h +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "TaskLabel.h" - -#include - -namespace milvus { -namespace scheduler { - -class DefaultLabel : public TaskLabel { - public: - DefaultLabel() : TaskLabel(TaskLabelType::DEFAULT) { - } -}; - -using DefaultLabelPtr = std::shared_ptr; - -} // namespace scheduler -} // namespace milvus diff --git a/core/src/scheduler/tasklabel/TaskLabel.h b/core/src/scheduler/tasklabel/TaskLabel.h index d35ce409ff..33e6eb6e57 100644 --- a/core/src/scheduler/tasklabel/TaskLabel.h +++ b/core/src/scheduler/tasklabel/TaskLabel.h @@ -23,7 +23,6 @@ namespace milvus { namespace scheduler { enum class TaskLabelType { - DEFAULT, // means can be executed in any resource SPECIFIED_RESOURCE, // means must executing in special resource BROADCAST, // means all enable-executor resource must execute task }; diff --git a/core/unittest/scheduler/CMakeLists.txt b/core/unittest/scheduler/CMakeLists.txt index 0148441c2d..878f0f23a9 100644 --- a/core/unittest/scheduler/CMakeLists.txt +++ b/core/unittest/scheduler/CMakeLists.txt @@ -21,7 +21,6 @@ set(test_files ${CMAKE_CURRENT_SOURCE_DIR}/test_algorithm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_node.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_normal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_resource.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_resource_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_resource_mgr.cpp diff --git a/core/unittest/scheduler/test_normal.cpp b/core/unittest/scheduler/test_normal.cpp deleted file mode 100644 index 20cd39d0bf..0000000000 --- a/core/unittest/scheduler/test_normal.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include "scheduler/ResourceFactory.h" -#include "scheduler/ResourceMgr.h" -#include "scheduler/SchedInst.h" -#include "scheduler/Scheduler.h" -#include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" -#include "utils/Log.h" - -namespace { - -namespace ms = milvus::scheduler; - -} // namespace - -TEST(NormalTest, INST_TEST) { - // ResourceMgr only compose resources, provide unified event - auto res_mgr = ms::ResMgrInst::GetInstance(); - - res_mgr->Add(ms::ResourceFactory::Create("disk", "DISK", 0, true, false)); - res_mgr->Add(ms::ResourceFactory::Create("cpu", "CPU", 0, true, true)); - - auto IO = ms::Connection("IO", 500.0); - res_mgr->Connect("disk", "cpu", IO); - - auto scheduler = ms::SchedInst::GetInstance(); - - res_mgr->Start(); - scheduler->Start(); - - const uint64_t NUM_TASK = 2; - std::vector> tasks; - ms::TableFileSchemaPtr dummy = nullptr; - - auto disks = res_mgr->GetDiskResources(); - ASSERT_FALSE(disks.empty()); - if (auto observe = disks[0].lock()) { - for (uint64_t i = 0; i < NUM_TASK; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy, label); - task->label() = std::make_shared(); - tasks.push_back(task); - observe->task_table().Put(task); - } - } - - for (auto& task : tasks) { - task->Wait(); - ASSERT_EQ(task->load_count_, 1); - ASSERT_EQ(task->exec_count_, 1); - } - - scheduler->Stop(); - res_mgr->Stop(); -} diff --git a/core/unittest/scheduler/test_resource.cpp b/core/unittest/scheduler/test_resource.cpp index 6fe85ec558..101dcd3976 100644 --- a/core/unittest/scheduler/test_resource.cpp +++ b/core/unittest/scheduler/test_resource.cpp @@ -24,7 +24,7 @@ #include "scheduler/resource/TestResource.h" #include "scheduler/task/Task.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" +#include "scheduler/tasklabel/SpecResLabel.h" namespace milvus { namespace scheduler { @@ -182,8 +182,10 @@ TEST_F(ResourceAdvanceTest, DISK_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(disk_resource_); auto task = std::make_shared(dummy, label); + std::vector path{disk_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); disk_resource_->task_table().Put(task); } @@ -208,8 +210,10 @@ TEST_F(ResourceAdvanceTest, CPU_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(cpu_resource_); auto task = std::make_shared(dummy, label); + std::vector path{cpu_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); cpu_resource_->task_table().Put(task); } @@ -234,8 +238,10 @@ TEST_F(ResourceAdvanceTest, GPU_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(gpu_resource_); auto task = std::make_shared(dummy, label); + std::vector path{gpu_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); gpu_resource_->task_table().Put(task); } @@ -260,8 +266,10 @@ TEST_F(ResourceAdvanceTest, TEST_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(test_resource_); auto task = std::make_shared(dummy, label); + std::vector path{test_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); test_resource_->task_table().Put(task); } diff --git a/core/unittest/scheduler/test_resource_mgr.cpp b/core/unittest/scheduler/test_resource_mgr.cpp index b9127060bd..d6495971c4 100644 --- a/core/unittest/scheduler/test_resource_mgr.cpp +++ b/core/unittest/scheduler/test_resource_mgr.cpp @@ -22,7 +22,6 @@ #include "scheduler/resource/GpuResource.h" #include "scheduler/resource/TestResource.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" namespace milvus { namespace scheduler { @@ -187,8 +186,7 @@ TEST_F(ResourceMgrAdvanceTest, REGISTER_SUBSCRIBER) { auto callback = [&](EventPtr event) { flag = true; }; mgr1_->RegisterSubscriber(callback); TableFileSchemaPtr dummy = nullptr; - auto label = std::make_shared(); - disk_res->task_table().Put(std::make_shared(dummy, label)); + disk_res->task_table().Put(std::make_shared(dummy, nullptr)); sleep(1); ASSERT_TRUE(flag); } diff --git a/core/unittest/scheduler/test_scheduler.cpp b/core/unittest/scheduler/test_scheduler.cpp index 22fb9be723..72538113c3 100644 --- a/core/unittest/scheduler/test_scheduler.cpp +++ b/core/unittest/scheduler/test_scheduler.cpp @@ -23,7 +23,6 @@ #include "scheduler/Scheduler.h" #include "scheduler/resource/Resource.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" #include "scheduler/tasklabel/SpecResLabel.h" #include "utils/Error.h" #include "wrapper/VecIndex.h" @@ -150,46 +149,6 @@ insert_dummy_index_into_gpu_cache(uint64_t device_id) { cache::GpuCacheMgr::GetInstance(device_id)->InsertItem("location", obj); } -TEST_F(SchedulerTest, ON_LOAD_COMPLETED) { - const uint64_t NUM = 10; - std::vector> tasks; - TableFileSchemaPtr dummy = std::make_shared(); - dummy->location_ = "location"; - - insert_dummy_index_into_gpu_cache(1); - - for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy, label); - task->label() = std::make_shared(); - tasks.push_back(task); - cpu_resource_.lock()->task_table().Put(task); - } - - sleep(3); - ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().size(), NUM); -} - -TEST_F(SchedulerTest, PUSH_TASK_TO_NEIGHBOUR_RANDOMLY_TEST) { - const uint64_t NUM = 10; - std::vector> tasks; - TableFileSchemaPtr dummy1 = std::make_shared(); - dummy1->location_ = "location"; - - tasks.clear(); - - for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy1, label); - task->label() = std::make_shared(); - tasks.push_back(task); - cpu_resource_.lock()->task_table().Put(task); - } - - sleep(3); - // ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().Size(), NUM); -} - class SchedulerTest2 : public testing::Test { protected: void diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index 28a2e29c98..3fa1beabeb 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -18,7 +18,6 @@ #include #include "scheduler/TaskTable.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" /************ TaskTableBaseTest ************/ @@ -162,9 +161,8 @@ class TaskTableBaseTest : public ::testing::Test { SetUp() override { milvus::scheduler::TableFileSchemaPtr dummy = nullptr; invalid_task_ = nullptr; - auto label = std::make_shared(); - task1_ = std::make_shared(dummy, label); - task2_ = std::make_shared(dummy, label); + task1_ = std::make_shared(dummy, nullptr); + task2_ = std::make_shared(dummy, nullptr); } milvus::scheduler::TaskPtr invalid_task_; @@ -320,8 +318,7 @@ class TaskTableAdvanceTest : public ::testing::Test { SetUp() override { milvus::scheduler::TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < 8; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy, label); + auto task = std::make_shared(dummy, nullptr); table1_.Put(task); } From a13677b65f252b9c4025c4c2e3a25dfb8e6e112c Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 21:10:43 +0800 Subject: [PATCH 13/14] Refactor optimizer --- core/src/scheduler/SchedInst.h | 22 +++--- .../scheduler/optimizer/BuildIndexPass.cpp | 9 ++- core/src/scheduler/optimizer/BuildIndexPass.h | 2 +- .../src/scheduler/optimizer/FaissFlatPass.cpp | 69 +++++++++++++++++++ .../{LargeSQ8HPass.h => FaissFlatPass.h} | 6 +- .../{OnlyCPUPass.cpp => FaissIVFFlatPass.cpp} | 41 ++++++++--- .../{HybridPass.h => FaissIVFFlatPass.h} | 12 +++- ...LargeSQ8HPass.cpp => FaissIVFSQ8HPass.cpp} | 40 +++-------- .../scheduler/optimizer/FaissIVFSQ8HPass.h | 56 +++++++++++++++ .../scheduler/optimizer/FaissIVFSQ8Pass.cpp | 69 +++++++++++++++++++ .../{OnlyCPUPass.h => FaissIVFSQ8Pass.h} | 12 +++- core/src/scheduler/optimizer/HybridPass.cpp | 47 ------------- 12 files changed, 273 insertions(+), 112 deletions(-) create mode 100644 core/src/scheduler/optimizer/FaissFlatPass.cpp rename core/src/scheduler/optimizer/{LargeSQ8HPass.h => FaissFlatPass.h} (91%) rename core/src/scheduler/optimizer/{OnlyCPUPass.cpp => FaissIVFFlatPass.cpp} (56%) rename core/src/scheduler/optimizer/{HybridPass.h => FaissIVFFlatPass.h} (81%) rename core/src/scheduler/optimizer/{LargeSQ8HPass.cpp => FaissIVFSQ8HPass.cpp} (61%) create mode 100644 core/src/scheduler/optimizer/FaissIVFSQ8HPass.h create mode 100644 core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp rename core/src/scheduler/optimizer/{OnlyCPUPass.h => FaissIVFSQ8Pass.h} (81%) delete mode 100644 core/src/scheduler/optimizer/HybridPass.cpp diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 44fea1b6d3..912f5edf8d 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -24,9 +24,10 @@ #include "Utils.h" #include "optimizer/BuildIndexPass.h" #include "optimizer/FallbackPass.h" -#include "optimizer/HybridPass.h" -#include "optimizer/LargeSQ8HPass.h" -#include "optimizer/OnlyCPUPass.h" +#include "optimizer/FaissFlatPass.h" +#include "optimizer/FaissIVFFlatPass.h" +#include "optimizer/FaissIVFSQ8Pass.h" +#include "optimizer/FaissIVFSQ8HPass.h" #include "optimizer/Optimizer.h" #include "server/Config.h" @@ -100,15 +101,12 @@ class OptimizerInst { std::lock_guard lock(mutex_); if (instance == nullptr) { std::vector pass_list; - pass_list.push_back(std::make_shared()); - pass_list.push_back(std::make_shared()); -#ifdef MILVUS_CPU_VERSION - pass_list.push_back(std::make_shared()); -#else - server::Config& config = server::Config::GetInstance(); - std::vector build_resources; - config.GetGpuResourceConfigBuildIndexResources(build_resources); - pass_list.push_back(std::make_shared(build_resources)); +#ifdef MILVUS_GPU_VERSION + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); #endif pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); diff --git a/core/src/scheduler/optimizer/BuildIndexPass.cpp b/core/src/scheduler/optimizer/BuildIndexPass.cpp index 4e7d5222e2..cd1b7550d8 100644 --- a/core/src/scheduler/optimizer/BuildIndexPass.cpp +++ b/core/src/scheduler/optimizer/BuildIndexPass.cpp @@ -23,11 +23,14 @@ namespace milvus { namespace scheduler { -BuildIndexPass::BuildIndexPass(std::vector& build_gpu_ids) : build_gpu_ids_(build_gpu_ids) { -} - void BuildIndexPass::Init() { + server::Config& config = server::Config::GetInstance(); + std::vector build_resources; + Status s = config.GetGpuResourceConfigBuildIndexResources(build_resources); + if (!s.ok()) { + throw; + } } bool diff --git a/core/src/scheduler/optimizer/BuildIndexPass.h b/core/src/scheduler/optimizer/BuildIndexPass.h index a3eb426527..4f7117fc4e 100644 --- a/core/src/scheduler/optimizer/BuildIndexPass.h +++ b/core/src/scheduler/optimizer/BuildIndexPass.h @@ -34,7 +34,7 @@ namespace scheduler { class BuildIndexPass : public Pass { public: - explicit BuildIndexPass(std::vector& build_gpu_id); + BuildIndexPass() = default; public: void diff --git a/core/src/scheduler/optimizer/FaissFlatPass.cpp b/core/src/scheduler/optimizer/FaissFlatPass.cpp new file mode 100644 index 0000000000..61ca1b9ec9 --- /dev/null +++ b/core/src/scheduler/optimizer/FaissFlatPass.cpp @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/FaissFlatPass.h" +#include "cache/GpuCacheMgr.h" +#include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" +#include "scheduler/task/SearchTask.h" +#include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" +#include "utils/Log.h" + +namespace milvus { +namespace scheduler { + +void +FaissFlatPass::Init() { + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } + s = config.GetGpuResourceConfigSearchResources(gpus); + if (!s.ok()) { + throw; + } +} + +bool +FaissFlatPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IDMAP) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + ResourcePtr res_ptr; + if (search_job->nq() < threshold_) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + } + auto label = std::make_shared(res_ptr); + task->label() = label; + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/FaissFlatPass.h similarity index 91% rename from core/src/scheduler/optimizer/LargeSQ8HPass.h rename to core/src/scheduler/optimizer/FaissFlatPass.h index c69fb12c64..60adf62a07 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/FaissFlatPass.h @@ -33,9 +33,9 @@ namespace milvus { namespace scheduler { -class LargeSQ8HPass : public Pass { +class FaissFlatPass : public Pass { public: - LargeSQ8HPass() = default; + FaissFlatPass() = default; public: void @@ -50,7 +50,7 @@ class LargeSQ8HPass : public Pass { std::vector gpus; }; -using LargeSQ8HPassPtr = std::shared_ptr; +using FaissFlatPassPtr = std::shared_ptr; } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.cpp b/core/src/scheduler/optimizer/FaissIVFFlatPass.cpp similarity index 56% rename from core/src/scheduler/optimizer/OnlyCPUPass.cpp rename to core/src/scheduler/optimizer/FaissIVFFlatPass.cpp index 8cd40092fb..1f1efb374b 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.cpp +++ b/core/src/scheduler/optimizer/FaissIVFFlatPass.cpp @@ -15,31 +15,52 @@ // specific language governing permissions and limitations // under the License. -#include "scheduler/optimizer/OnlyCPUPass.h" +#include "scheduler/optimizer/FaissIVFFlatPass.h" +#include "cache/GpuCacheMgr.h" #include "scheduler/SchedInst.h" #include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" +#include "utils/Log.h" namespace milvus { namespace scheduler { void -OnlyCPUPass::Init() { +FaissIVFFlatPass::Init() { + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } + s = config.GetGpuResourceConfigSearchResources(gpus); + if (!s.ok()) { + throw; + } } bool -OnlyCPUPass::Run(const TaskPtr& task) { - if (task->Type() != TaskType::SearchTask) - return false; - auto search_task = std::static_pointer_cast(task); - if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8 && - search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) { +FaissIVFFlatPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { return false; } - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); - auto label = std::make_shared(std::weak_ptr(res_ptr)); + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + ResourcePtr res_ptr; + if (search_job->nq() < threshold_) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + } + auto label = std::make_shared(res_ptr); task->label() = label; return true; } diff --git a/core/src/scheduler/optimizer/HybridPass.h b/core/src/scheduler/optimizer/FaissIVFFlatPass.h similarity index 81% rename from core/src/scheduler/optimizer/HybridPass.h rename to core/src/scheduler/optimizer/FaissIVFFlatPass.h index f84a0884f2..9264db32a8 100644 --- a/core/src/scheduler/optimizer/HybridPass.h +++ b/core/src/scheduler/optimizer/FaissIVFFlatPass.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -32,9 +33,9 @@ namespace milvus { namespace scheduler { -class HybridPass : public Pass { +class FaissIVFFlatPass : public Pass { public: - HybridPass() = default; + FaissIVFFlatPass() = default; public: void @@ -42,9 +43,14 @@ class HybridPass : public Pass { bool Run(const TaskPtr& task) override; + + private: + int32_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; + std::vector gpus; }; -using HybridPassPtr = std::shared_ptr; +using FaissIVFFlatPassPtr = std::shared_ptr; } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.cpp similarity index 61% rename from core/src/scheduler/optimizer/LargeSQ8HPass.cpp rename to core/src/scheduler/optimizer/FaissIVFSQ8HPass.cpp index dd58feb512..b63a0f7490 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "scheduler/optimizer/LargeSQ8HPass.h" +#include "scheduler/optimizer/FaissIVFSQ8HPass.h" #include "cache/GpuCacheMgr.h" #include "scheduler/SchedInst.h" #include "scheduler/Utils.h" @@ -28,7 +28,7 @@ namespace milvus { namespace scheduler { void -LargeSQ8HPass::Init() { +FaissIVFSQ8HPass::Init() { server::Config& config = server::Config::GetInstance(); Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); if (!s.ok()) { @@ -38,7 +38,7 @@ LargeSQ8HPass::Init() { } bool -LargeSQ8HPass::Run(const TaskPtr& task) { +FaissIVFSQ8HPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask) { return false; } @@ -49,36 +49,16 @@ LargeSQ8HPass::Run(const TaskPtr& task) { } auto search_job = std::static_pointer_cast(search_task->job_.lock()); - - // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu - + ResourcePtr res_ptr; if (search_job->nq() < threshold_) { - return false; + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); } - - // std::vector all_free_mem; - // for (auto& gpu : gpus) { - // auto cache = cache::GpuCacheMgr::GetInstance(gpu); - // auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); - // all_free_mem.push_back(free_mem); - // } - // - // auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); - // auto best_index = std::distance(all_free_mem.begin(), max_e); - // auto best_device_id = gpus[best_index]; - auto best_device_id = count_ % gpus.size(); - count_++; - - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); - if (not res_ptr) { - SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; - // TODO: throw critical error and exit - return false; - } - - auto label = std::make_shared(std::weak_ptr(res_ptr)); + auto label = std::make_shared(res_ptr); task->label() = label; - return true; } diff --git a/core/src/scheduler/optimizer/FaissIVFSQ8HPass.h b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.h new file mode 100644 index 0000000000..fa830ff08a --- /dev/null +++ b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class FaissIVFSQ8HPass : public Pass { + public: + FaissIVFSQ8HPass() = default; + + public: + void + Init() override; + + bool + Run(const TaskPtr& task) override; + + private: + int32_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; + std::vector gpus; +}; + +using FaissIVFSQ8HPassPtr = std::shared_ptr; + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp new file mode 100644 index 0000000000..30dd306b3b --- /dev/null +++ b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/FaissIVFSQ8Pass.h" +#include "cache/GpuCacheMgr.h" +#include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" +#include "scheduler/task/SearchTask.h" +#include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" +#include "utils/Log.h" + +namespace milvus { +namespace scheduler { + +void +FaissIVFSQ8Pass::Init() { + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } + s = config.GetGpuResourceConfigSearchResources(gpus); + if (!s.ok()) { + throw; + } +} + +bool +FaissIVFSQ8Pass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + ResourcePtr res_ptr; + if (search_job->nq() < threshold_) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + } + auto label = std::make_shared(res_ptr); + task->label() = label; + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.h b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.h similarity index 81% rename from core/src/scheduler/optimizer/OnlyCPUPass.h rename to core/src/scheduler/optimizer/FaissIVFSQ8Pass.h index 253775f77e..c5853283f7 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.h +++ b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -32,9 +33,9 @@ namespace milvus { namespace scheduler { -class OnlyCPUPass : public Pass { +class FaissIVFSQ8Pass : public Pass { public: - OnlyCPUPass() = default; + FaissIVFSQ8Pass() = default; public: void @@ -42,9 +43,14 @@ class OnlyCPUPass : public Pass { bool Run(const TaskPtr& task) override; + + private: + int32_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; + std::vector gpus; }; -using OnlyCPUPassPtr = std::shared_ptr; +using FaissIVFSQ8PassPtr = std::shared_ptr; } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/HybridPass.cpp b/core/src/scheduler/optimizer/HybridPass.cpp deleted file mode 100644 index 8bf0d8990d..0000000000 --- a/core/src/scheduler/optimizer/HybridPass.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "scheduler/optimizer/HybridPass.h" -#include "scheduler/SchedInst.h" -#include "scheduler/task/SearchTask.h" -#include "scheduler/tasklabel/SpecResLabel.h" - -namespace milvus { -namespace scheduler { - -void -HybridPass::Init() { -} - -bool -HybridPass::Run(const TaskPtr& task) { - // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu - if (task->Type() != TaskType::SearchTask) - return false; - auto search_task = std::static_pointer_cast(task); - if (search_task->file_->engine_type_ == (int)engine::EngineType::FAISS_IVFSQ8H) { - // TODO: remove "cpu" hardcode - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); - auto label = std::make_shared(std::weak_ptr(res_ptr)); - task->label() = label; - return true; - } - return false; -} - -} // namespace scheduler -} // namespace milvus From a176eb94a21b2cf015745167df0903f490709219 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 21:12:12 +0800 Subject: [PATCH 14/14] clange format --- core/src/scheduler/SchedInst.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 912f5edf8d..dc2d5ade35 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -23,11 +23,11 @@ #include "Scheduler.h" #include "Utils.h" #include "optimizer/BuildIndexPass.h" -#include "optimizer/FallbackPass.h" #include "optimizer/FaissFlatPass.h" #include "optimizer/FaissIVFFlatPass.h" -#include "optimizer/FaissIVFSQ8Pass.h" #include "optimizer/FaissIVFSQ8HPass.h" +#include "optimizer/FaissIVFSQ8Pass.h" +#include "optimizer/FallbackPass.h" #include "optimizer/Optimizer.h" #include "server/Config.h"