From 513ad3b8424d27f886fd6162e8fea6f611be5293 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Mon, 18 Nov 2019 11:38:48 +0800 Subject: [PATCH 1/7] Support build index with multiple gpu --- CHANGELOG.md | 1 + core/conf/server_cpu_config.template | 3 +- core/conf/server_gpu_config.template | 3 +- core/src/db/engine/ExecutionEngineImpl.cpp | 11 +- core/src/scheduler/JobMgr.cpp | 2 +- core/src/scheduler/SchedInst.cpp | 33 ++++-- core/src/scheduler/SchedInst.h | 5 + .../scheduler/action/PushTaskToNeighbour.cpp | 102 ++++++------------ .../scheduler/optimizer/BuildIndexPass.cpp | 46 ++++++++ core/src/scheduler/optimizer/BuildIndexPass.h | 51 +++++++++ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +- core/src/server/Config.cpp | 72 ++++++++----- core/src/server/Config.h | 7 +- core/src/wrapper/KnowhereResource.cpp | 8 +- core/unittest/server/test_config.cpp | 3 +- 15 files changed, 233 insertions(+), 117 deletions(-) create mode 100644 core/src/scheduler/optimizer/BuildIndexPass.cpp create mode 100644 core/src/scheduler/optimizer/BuildIndexPass.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 745310faa7..42d9291573 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#12 - Pure CPU version for Milvus - \#77 - Support table partition - \#226 - Experimental shards middleware for Milvus +- \#346 - Support build index with multiple gpu ## Improvement - \#275 - Rename C++ SDK IndexType diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 6c95126390..bcfb5fa7ce 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -40,4 +40,5 @@ engine_config: resource_config: search_resources: # define the device used for search computation - cpu - index_build_device: cpu # CPU used for building index + index_build_device: # CPU used for building index + - cpu diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index 154db5d134..a347a9e5bd 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -42,4 +42,5 @@ resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu - gpu0 - index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux + index_build_device: # CPU / GPU used for building index, must be in format: cpu or gpux + - gpu0 diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 19c699bda7..dd80ec796b 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -570,12 +570,19 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { Status ExecutionEngineImpl::Init() { server::Config& config = server::Config::GetInstance(); - Status s = config.GetResourceConfigIndexBuildDevice(gpu_num_); + std::vector gpu_ids; + Status s = config.GetResourceConfigIndexBuildDevice(gpu_ids); if (!s.ok()) { return s; } + for (auto id : gpu_ids) { + if (gpu_num_ == id) { + return Status::OK(); + } + } - return Status::OK(); + std::string msg = "Invalid gpu_num"; + return Status(SERVER_INVALID_ARGUMENT, msg); } } // namespace engine diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 794f6a0f37..8dd095a3fa 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -104,7 +104,7 @@ JobMgr::build_task(const JobPtr& job) { void JobMgr::calculate_path(const TaskPtr& task) { - if (task->type_ != TaskType::SearchTask) { + if (task->type_ != TaskType::SearchTask && task->type_ != TaskType::BuildIndexTask) { return; } diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 61e0c09759..407c3a44d1 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -54,8 +54,8 @@ load_simple_config() { // get resources auto gpu_ids = get_gpu_pool(); - int32_t build_gpu_id; - config.GetResourceConfigIndexBuildDevice(build_gpu_id); + std::vector build_gpu_ids; + config.GetResourceConfigIndexBuildDevice(build_gpu_ids); // create and connect ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false)); @@ -65,19 +65,30 @@ load_simple_config() { ResMgrInst::GetInstance()->Connect("disk", "cpu", io); auto pcie = Connection("pcie", 12000); - bool find_build_gpu_id = false; - for (auto& gpu_id : gpu_ids) { - ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id, true, true)); - ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), pcie); - if (build_gpu_id == gpu_id) { - find_build_gpu_id = true; + + std::vector not_find_build_ids; + for (auto& build_id : build_gpu_ids) { + bool find_gpu_id = false; + for (auto& gpu_id : gpu_ids) { + if (gpu_id == build_id) { + find_gpu_id = true; + break; + } + } + if (not find_gpu_id) { + not_find_build_ids.emplace_back(build_id); } } - if (not find_build_gpu_id) { + for (auto& gpu_id : gpu_ids) { + ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id, true, true)); + ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), pcie); + } + + for (auto& not_find_id : not_find_build_ids) { ResMgrInst::GetInstance()->Add( - ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); - ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); + ResourceFactory::Create(std::to_string(not_find_id), "GPU", not_find_id, true, true)); + ResMgrInst::GetInstance()->Connect("cpu", std::to_string(not_find_id), pcie); } } diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index a3048069f9..4fd22f1fab 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -21,6 +21,7 @@ #include "JobMgr.h" #include "ResourceMgr.h" #include "Scheduler.h" +#include "optimizer/BuildIndexPass.h" #include "optimizer/HybridPass.h" #include "optimizer/LargeSQ8HPass.h" #include "optimizer/OnlyCPUPass.h" @@ -107,11 +108,15 @@ class OptimizerInst { } } + std::vector build_resources; + config.GetResourceConfigIndexBuildDevice(build_resources); + std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared(has_cpu)); + pass_list.push_back(std::make_shared(build_resources)); instance = std::make_shared(pass_list); } } diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index b8a4a1164b..9aed678937 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -138,73 +138,41 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou std::shared_ptr event) { auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; - if (resource->type() == ResourceType::DISK) { - // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr->GetComputeResources(); - std::vector> paths; - std::vector transport_costs; - for (auto& res : compute_resources) { - std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); - transport_costs.push_back(transport_cost); - paths.emplace_back(path); - } - // if (task->job_.lock()->type() == JobType::SEARCH) { - // auto label = task->label(); - // auto spec_label = std::static_pointer_cast(label); - // if (spec_label->resource().lock()->type() == ResourceType::CPU) { - // std::vector spec_path; - // spec_path.push_back(spec_label->resource().lock()->name()); - // spec_path.push_back(resource->name()); - // task->path() = Path(spec_path, spec_path.size() - 1); - // } else { - // // step 2: select min cost, cost(resource) = avg_cost * task_to_do + transport_cost - // uint64_t min_cost = std::numeric_limits::max(); - // uint64_t min_cost_idx = 0; - // for (uint64_t i = 0; i < compute_resources.size(); ++i) { - // if (compute_resources[i]->TotalTasks() == 0) { - // min_cost_idx = i; - // break; - // } - // uint64_t cost = compute_resources[i]->TaskAvgCost() * - // compute_resources[i]->NumOfTaskToExec() + - // transport_costs[i]; - // if (min_cost > cost) { - // min_cost = cost; - // min_cost_idx = i; - // } - // } - // - // // step 3: set path in task - // Path task_path(paths[min_cost_idx], paths[min_cost_idx].size() - 1); - // task->path() = task_path; - // } - // - // } else - if (task->job_.lock()->type() == JobType::BUILD) { - // step2: Read device id in config - // get build index gpu resource - server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); - - bool find_gpu_res = false; - if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { - for (uint64_t i = 0; i < compute_resources.size(); ++i) { - if (compute_resources[i]->name() == - res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { - find_gpu_res = true; - Path task_path(paths[i], paths[i].size() - 1); - task->path() = task_path; - break; - } - } - } - if (not find_gpu_res) { - task->path() = Path(paths[0], paths[0].size() - 1); - } - } - } + // if (resource->type() == ResourceType::DISK) { + // // step 1: calculate shortest path per resource, from disk to compute resource + // auto compute_resources = res_mgr->GetComputeResources(); + // std::vector> paths; + // std::vector transport_costs; + // for (auto& res : compute_resources) { + // std::vector path; + // uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); + // transport_costs.push_back(transport_cost); + // paths.emplace_back(path); + // } + // if (task->job_.lock()->type() == JobType::BUILD) { + // // step2: Read device id in config + // // get build index gpu resource + // server::Config& config = server::Config::GetInstance(); + // int32_t build_index_gpu; + // Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); + // + // bool find_gpu_res = false; + // if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { + // for (uint64_t i = 0; i < compute_resources.size(); ++i) { + // if (compute_resources[i]->name() == + // res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { + // find_gpu_res = true; + // Path task_path(paths[i], paths[i].size() - 1); + // task->path() = task_path; + // break; + // } + // } + // } + // if (not find_gpu_res) { + // task->path() = Path(paths[0], paths[0].size() - 1); + // } + // } + // } if (resource->name() == task->path().Last()) { resource->WakeupExecutor(); diff --git a/core/src/scheduler/optimizer/BuildIndexPass.cpp b/core/src/scheduler/optimizer/BuildIndexPass.cpp new file mode 100644 index 0000000000..725b2509c3 --- /dev/null +++ b/core/src/scheduler/optimizer/BuildIndexPass.cpp @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/BuildIndexPass.h" +#include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" +#include "scheduler/tasklabel/SpecResLabel.h" + +namespace milvus { +namespace scheduler { + +BuildIndexPass::BuildIndexPass(std::vector& build_gpu_ids) : build_gpu_ids_(build_gpu_ids) { +} + +bool +BuildIndexPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::BuildIndexTask) + return false; + + if (build_gpu_ids_.empty()) + return false; + + ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_gpu_ids_[specified_gpu_id_]); + auto label = std::make_shared(std::weak_ptr(res_ptr)); + task->label() = label; + + specified_gpu_id_ = (specified_gpu_id_ + 1) % build_gpu_ids_.size(); + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/BuildIndexPass.h b/core/src/scheduler/optimizer/BuildIndexPass.h new file mode 100644 index 0000000000..a70844bfa9 --- /dev/null +++ b/core/src/scheduler/optimizer/BuildIndexPass.h @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class BuildIndexPass : public Pass { + public: + explicit BuildIndexPass(std::vector& build_gpu_id); + + public: + bool + Run(const TaskPtr& task) override; + + private: + uint64_t specified_gpu_id_ = 0; + std::vector build_gpu_ids_; +}; + +using BuildIndexPassPtr = std::shared_ptr; + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 3fcda0e8a3..e867e45159 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -41,12 +41,11 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto gpu_id = get_gpu_pool(); if (gpu_id.empty()) return false; - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, gpu_id[specified_gpu_id_]); auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size(); + specified_gpu_id_ = (specified_gpu_id_ + 1) % gpu_id.size(); return true; } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f130e73a85..a2a1354dd4 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -215,8 +215,8 @@ Config::ValidateConfig() { return s; } - int32_t resource_index_build_device; - s = GetResourceConfigIndexBuildDevice(resource_index_build_device); + std::vector index_build_devices; + s = GetResourceConfigIndexBuildDevice(index_build_devices); if (!s.ok()) { return s; } @@ -599,22 +599,24 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int device_id; - Status s = GetResourceConfigIndexBuildDevice(device_id); + std::vector device_ids; + Status s = GetResourceConfigIndexBuildDevice(device_ids); if (!s.ok()) { return s; } size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); - return Status(SERVER_UNEXPECTED_ERROR, msg); - } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + for (auto& device_id : device_ids) { + if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); + return Status(SERVER_UNEXPECTED_ERROR, msg); + } else if (gpu_cache_capacity >= gpu_memory) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + } } } return Status::OK(); @@ -745,11 +747,21 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { - auto status = CheckResource(value); - if (!status.ok()) { - return Status(SERVER_INVALID_ARGUMENT, status.message()); +Config::CheckResourceConfigIndexBuildDevice(const std::vector& value) { + if (value.empty()) { + std::string msg = + "Invalid index build resource. " + "Possible reason: resource_config.index_build_device is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); } + + for (auto& resource : value) { + auto status = CheckResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } + } + return Status::OK(); } @@ -1036,18 +1048,25 @@ Config::GetResourceConfigSearchResources(std::vector& value) { } Status -Config::GetResourceConfigIndexBuildDevice(int32_t& value) { +Config::GetResourceConfigIndexBuildDevice(std::vector& value) { std::string str = - GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - Status s = CheckResourceConfigIndexBuildDevice(str); + GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, + CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + std::vector resources; + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, resources); + + Status s = CheckResourceConfigIndexBuildDevice(resources); if (!s.ok()) { return s; } - if (str == "cpu") { - value = CPU_DEVICE_ID; - } else { - value = std::stoi(str.substr(3)); + for (auto res : resources) { + if (res == "cpu") { + value.emplace_back(CPU_DEVICE_ID); + break; + } + int64_t device_id = std::stoi(str.substr(3)); + value.emplace_back(device_id); } return Status::OK(); @@ -1318,7 +1337,10 @@ Config::SetResourceConfigSearchResources(const std::string& value) { Status Config::SetResourceConfigIndexBuildDevice(const std::string& value) { - Status s = CheckResourceConfigIndexBuildDevice(value); + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_INDEX_BUILD_DELIMITER, res_vec); + + Status s = CheckResourceConfigIndexBuildDevice(res_vec); if (!s.ok()) { return s; } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3ab0cd8053..08b3accdd6 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -101,10 +101,11 @@ static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; #endif static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_DELIMITER = ","; #ifdef MILVUS_CPU_VERSION static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; #else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu,gpu0"; #endif const int32_t CPU_DEVICE_ID = -1; @@ -190,7 +191,7 @@ class Config { Status CheckResourceConfigSearchResources(const std::vector& value); Status - CheckResourceConfigIndexBuildDevice(const std::string& value); + CheckResourceConfigIndexBuildDevice(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -259,7 +260,7 @@ class Config { Status GetResourceConfigSearchResources(std::vector& value); Status - GetResourceConfigIndexBuildDevice(int32_t& value); + GetResourceConfigIndexBuildDevice(std::vector& value); public: /* server config */ diff --git a/core/src/wrapper/KnowhereResource.cpp b/core/src/wrapper/KnowhereResource.cpp index 8ed19232e9..ccfbcbb6cf 100644 --- a/core/src/wrapper/KnowhereResource.cpp +++ b/core/src/wrapper/KnowhereResource.cpp @@ -48,12 +48,14 @@ KnowhereResource::Initialize() { // get build index gpu resource server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - s = config.GetResourceConfigIndexBuildDevice(build_index_gpu); + std::vector build_index_gpus; + s = config.GetResourceConfigIndexBuildDevice(build_index_gpus); if (!s.ok()) return s; - gpu_resources.insert(std::make_pair(build_index_gpu, GpuResourceSetting())); + for (auto gpu_id : build_index_gpus) { + gpu_resources.insert(std::make_pair(gpu_id, GpuResourceSetting())); + } // get search gpu resource std::vector pool; diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 637273732d..7f4376bb67 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -292,7 +292,8 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); #endif ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigIndexBuildDevice(int32_val); + std::vector device_ids; + s = config.GetResourceConfigIndexBuildDevice(device_ids); ASSERT_TRUE(s.ok()); ASSERT_TRUE(int32_val == resource_index_build_device); } From fddfd1eb03c067234e6127730da14677c03c7554 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 18 Nov 2019 18:47:00 +0800 Subject: [PATCH 2/7] #346 update config APIs to support build index with multiple GPUs --- core/conf/server_cpu_config.template | 6 +- core/conf/server_gpu_config.template | 3 +- core/src/server/Config.cpp | 84 +++++++++++++++------------- core/src/server/Config.h | 16 +++--- core/src/utils/ValidationUtil.cpp | 4 +- core/src/utils/ValidationUtil.h | 4 +- core/unittest/server/test_config.cpp | 33 ++++++----- 7 files changed, 81 insertions(+), 69 deletions(-) diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 6c95126390..bc8fc3bb3d 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -27,7 +27,6 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cache_insert_data: false # whether to load inserted data into cache, must be a boolean @@ -38,6 +37,7 @@ engine_config: gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: - search_resources: # define the device used for search computation + search_resources: # define the devices used for search computation, must be in format: cpu or gpux + - cpu + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux - cpu - index_build_device: cpu # CPU used for building index diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index 154db5d134..c54ed408df 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -42,4 +42,5 @@ resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu - gpu0 - index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux + index_build_resources: # define the devices used for index building, must be in format: cpu or gpux + - gpu0 \ No newline at end of file diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f130e73a85..5672ab52aa 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -215,8 +215,8 @@ Config::ValidateConfig() { return s; } - int32_t resource_index_build_device; - s = GetResourceConfigIndexBuildDevice(resource_index_build_device); + std::vector index_build_resources; + s = GetResourceConfigIndexBuildResources(index_build_resources); if (!s.ok()) { return s; } @@ -351,7 +351,7 @@ Config::ResetDefaultConfig() { return s; } - s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + s = SetResourceConfigIndexBuildResources(CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); if (!s.ok()) { return s; } @@ -599,22 +599,28 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int device_id; - Status s = GetResourceConfigIndexBuildDevice(device_id); + std::vector resources; + Status s = GetResourceConfigIndexBuildResources(resources); if (!s.ok()) { return s; } size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); - return Status(SERVER_UNEXPECTED_ERROR, msg); - } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + for (auto& resource : resources) { + if (resource == "cpu") { + continue; + } + int32_t device_id = std::stoi(resource.substr(3)); + if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); + return Status(SERVER_UNEXPECTED_ERROR, msg); + } else if (gpu_cache_capacity >= gpu_memory) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else if (gpu_cache_capacity > (double) gpu_memory * 0.9) { + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + } } } return Status::OK(); @@ -745,10 +751,18 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { - auto status = CheckResource(value); - if (!status.ok()) { - return Status(SERVER_INVALID_ARGUMENT, status.message()); +Config::CheckResourceConfigIndexBuildResources(const std::vector& value) { + if (value.empty()) { + std::string msg = + "Invalid build index resource. " + "Possible reason: resource_config.build_index_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + for (auto& resource : value) { + auto status = CheckResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } } return Status::OK(); } @@ -1030,27 +1044,18 @@ Status Config::GetResourceConfigSearchResources(std::vector& value) { std::string str = GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, - CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); - server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value); + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); return CheckResourceConfigSearchResources(value); } Status -Config::GetResourceConfigIndexBuildDevice(int32_t& value) { +Config::GetResourceConfigIndexBuildResources(std::vector& value) { std::string str = - GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - Status s = CheckResourceConfigIndexBuildDevice(str); - if (!s.ok()) { - return s; - } - - if (str == "cpu") { - value = CPU_DEVICE_ID; - } else { - value = std::stoi(str.substr(3)); - } - - return Status::OK(); + GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, + CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT); + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value); + return CheckResourceConfigIndexBuildResources(value); } /////////////////////////////////////////////////////////////////////////////// @@ -1305,7 +1310,7 @@ Config::SetResourceConfigMode(const std::string& value) { Status Config::SetResourceConfigSearchResources(const std::string& value) { std::vector res_vec; - server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec); + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); Status s = CheckResourceConfigSearchResources(res_vec); if (!s.ok()) { @@ -1317,13 +1322,16 @@ Config::SetResourceConfigSearchResources(const std::string& value) { } Status -Config::SetResourceConfigIndexBuildDevice(const std::string& value) { - Status s = CheckResourceConfigIndexBuildDevice(value); +Config::SetResourceConfigIndexBuildResources(const std::string &value) { + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec); + + Status s = CheckResourceConfigIndexBuildResources(res_vec); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value); + SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3ab0cd8053..0378a079fb 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -91,20 +91,18 @@ static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; static const char* CONFIG_RESOURCE = "resource_config"; static const char* CONFIG_RESOURCE_MODE = "mode"; static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple"; +static const char* CONFIG_RESOURCE_RESOURCES_DELIMITER = ","; static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ","; - #ifdef MILVUS_CPU_VERSION static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu"; #else static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; #endif - -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES = "index_build_resources"; #ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "cpu"; #else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "gpu0"; #endif const int32_t CPU_DEVICE_ID = -1; @@ -190,7 +188,7 @@ class Config { Status CheckResourceConfigSearchResources(const std::vector& value); Status - CheckResourceConfigIndexBuildDevice(const std::string& value); + CheckResourceConfigIndexBuildResources(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -259,7 +257,7 @@ class Config { Status GetResourceConfigSearchResources(std::vector& value); Status - GetResourceConfigIndexBuildDevice(int32_t& value); + GetResourceConfigIndexBuildResources(std::vector& value); public: /* server config */ @@ -320,7 +318,7 @@ class Config { Status SetResourceConfigSearchResources(const std::string& value); Status - SetResourceConfigIndexBuildDevice(const std::string& value); + SetResourceConfigIndexBuildResources(const std::string& value); private: std::unordered_map> config_map_; diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index ec696ff3e0..080de77e17 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector& partition_ } Status -ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { +ValidationUtil::ValidateGpuIndex(int32_t gpu_index) { #ifdef MILVUS_GPU_VERSION int num_devices = 0; auto cuda_err = cudaGetDeviceCount(&num_devices); @@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { } Status -ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) { +ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) { #ifdef MILVUS_GPU_VERSION cudaDeviceProp deviceProp; diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 01801e295a..201ccef3bd 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -59,10 +59,10 @@ class ValidationUtil { ValidatePartitionTags(const std::vector& partition_tags); static Status - ValidateGpuIndex(uint32_t gpu_index); + ValidateGpuIndex(int32_t gpu_index); static Status - GetGpuMemory(uint32_t gpu_index, size_t& memory); + GetGpuMemory(int32_t gpu_index, size_t& memory); static Status ValidateIpAddress(const std::string& ip_address); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 637273732d..37be36b7eb 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -272,29 +272,34 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { #else std::vector search_resources = {"cpu", "gpu0"}; #endif - std::vector res_vec; - std::string res_str; + std::vector search_res_vec; + std::string search_res_str; milvus::server::StringHelpFunctions::MergeStringWithDelimeter( - search_resources, milvus::server::CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_str); - s = config.SetResourceConfigSearchResources(res_str); + search_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, search_res_str); + s = config.SetResourceConfigSearchResources(search_res_str); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigSearchResources(res_vec); + s = config.GetResourceConfigSearchResources(search_res_vec); ASSERT_TRUE(s.ok()); for (size_t i = 0; i < search_resources.size(); i++) { - ASSERT_TRUE(search_resources[i] == res_vec[i]); + ASSERT_TRUE(search_resources[i] == search_res_vec[i]); } #ifdef MILVUS_CPU_VERSION - int32_t resource_index_build_device = milvus::server::CPU_DEVICE_ID; - s = config.SetResourceConfigIndexBuildDevice("cpu"); + std::vector index_build_resources = {"cpu"}; #else - int32_t resource_index_build_device = 0; - s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); + std::vector index_build_resources = {"gpu0", "gpu1"}; #endif + std::vector index_build_res_vec; + std::string index_build_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + index_build_resources, milvus::server::CONFIG_RESOURCE_RESOURCES_DELIMITER, index_build_res_str); + s = config.SetResourceConfigIndexBuildResources(index_build_res_str); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigIndexBuildDevice(int32_val); + s = config.GetResourceConfigIndexBuildResources(index_build_res_vec); ASSERT_TRUE(s.ok()); - ASSERT_TRUE(int32_val == resource_index_build_device); + for (size_t i = 0; i < index_build_resources.size(); i++) { + ASSERT_TRUE(index_build_resources[i] == index_build_res_vec[i]); + } } TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { @@ -418,9 +423,9 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetResourceConfigSearchResources("cpu"); ASSERT_TRUE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gup2"); + s = config.SetResourceConfigIndexBuildResources("gup2"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gpu16"); + s = config.SetResourceConfigIndexBuildResources("gpu16"); ASSERT_FALSE(s.ok()); } From 0d1923c61f53ea6ec77c98356e6c8301317d9b74 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Tue, 19 Nov 2019 15:49:40 +0800 Subject: [PATCH 3/7] #346 update gpu resource config APIs --- core/conf/server_cpu_config.template | 15 +- core/conf/server_gpu_config.template | 15 +- core/src/cache/GpuCacheMgr.cpp | 4 +- core/src/server/Config.cpp | 462 ++++++++++++++------------- core/src/server/Config.h | 87 +++-- core/src/utils/ValidationUtil.cpp | 4 +- core/src/utils/ValidationUtil.h | 4 +- core/unittest/server/test_config.cpp | 116 ++++--- 8 files changed, 357 insertions(+), 350 deletions(-) diff --git a/core/conf/server_cpu_config.template b/core/conf/server_cpu_config.template index 6c95126390..ae942f351e 100644 --- a/core/conf/server_cpu_config.template +++ b/core/conf/server_cpu_config.template @@ -27,9 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer - cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: @@ -37,7 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only -resource_config: - search_resources: # define the device used for search computation - - cpu - index_build_device: cpu # CPU used for building index +gpu_resource_config: + enable_gpu: true # whether to enable GPU resources + cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer + search_resources: # define the GPU devices used for search computation, must be in format gpux + - gpu0 + build_index_resources: # define the GPU devices used for index building, must be in format gpux + - gpu0 \ No newline at end of file diff --git a/core/conf/server_gpu_config.template b/core/conf/server_gpu_config.template index 154db5d134..92a294d483 100644 --- a/core/conf/server_gpu_config.template +++ b/core/conf/server_gpu_config.template @@ -27,10 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: - cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer - cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] - gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer - gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] + cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: @@ -38,8 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only -resource_config: - search_resources: # define the devices used for search computation, must be in format: cpu or gpux - - cpu +gpu_resource_config: + enable_gpu: false # whether to enable GPU resources + cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer + search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 - index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux + build_index_resources: # define the GPU devices used for index building, must be in format gpux + - gpu0 \ No newline at end of file diff --git a/core/src/cache/GpuCacheMgr.cpp b/core/src/cache/GpuCacheMgr.cpp index d862bc0393..72229527fa 100644 --- a/core/src/cache/GpuCacheMgr.cpp +++ b/core/src/cache/GpuCacheMgr.cpp @@ -37,7 +37,7 @@ GpuCacheMgr::GpuCacheMgr() { Status s; int64_t gpu_cache_cap; - s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); + s = config.GetGpuResourceConfigCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } @@ -45,7 +45,7 @@ GpuCacheMgr::GpuCacheMgr() { cache_ = std::make_shared>(cap, 1UL << 32); float gpu_mem_threshold; - s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold); + s = config.GetGpuResourceConfigCacheThreshold(gpu_mem_threshold); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f130e73a85..017f1641bd 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -163,20 +163,6 @@ Config::ValidateConfig() { return s; } -#ifdef MILVUS_GPU_VERSION - int64_t cache_gpu_cache_capacity; - s = GetCacheConfigGpuCacheCapacity(cache_gpu_cache_capacity); - if (!s.ok()) { - return s; - } - - float cache_gpu_cache_threshold; - s = GetCacheConfigGpuCacheThreshold(cache_gpu_cache_threshold); - if (!s.ok()) { - return s; - } -#endif - bool cache_insert_data; s = GetCacheConfigCacheInsertData(cache_insert_data); if (!s.ok()) { @@ -202,25 +188,39 @@ Config::ValidateConfig() { return s; } - /* resource config */ - std::string resource_mode; - s = GetResourceConfigMode(resource_mode); + /* gpu resource config */ +#ifdef MILVUS_GPU_VERSION + bool resource_enable_gpu; + s = GetGpuResourceConfigEnableGpu(resource_enable_gpu); if (!s.ok()) { return s; } - std::vector search_resources; - s = GetResourceConfigSearchResources(search_resources); + int64_t resource_cache_capacity; + s = GetGpuResourceConfigCacheCapacity(resource_cache_capacity); if (!s.ok()) { return s; } - int32_t resource_index_build_device; - s = GetResourceConfigIndexBuildDevice(resource_index_build_device); + float resource_cache_threshold; + s = GetGpuResourceConfigCacheThreshold(resource_cache_threshold); if (!s.ok()) { return s; } + std::vector search_resources; + s = GetGpuResourceConfigSearchResources(search_resources); + if (!s.ok()) { + return s; + } + + std::vector index_build_resources; + s = GetGpuResourceConfigBuildIndexResources(index_build_resources); + if (!s.ok()) { + return s; + } +#endif + return Status::OK(); } @@ -307,18 +307,6 @@ Config::ResetDefaultConfig() { return s; } -#ifdef MILVUS_GPU_VERSION - s = SetCacheConfigGpuCacheCapacity(CONFIG_CACHE_GPU_CACHE_CAPACITY_DEFAULT); - if (!s.ok()) { - return s; - } - - s = SetCacheConfigGpuCacheThreshold(CONFIG_CACHE_GPU_CACHE_THRESHOLD_DEFAULT); - if (!s.ok()) { - return s; - } -#endif - s = SetCacheConfigCacheInsertData(CONFIG_CACHE_CACHE_INSERT_DATA_DEFAULT); if (!s.ok()) { return s; @@ -340,22 +328,34 @@ Config::ResetDefaultConfig() { return s; } - /* resource config */ - s = SetResourceConfigMode(CONFIG_RESOURCE_MODE_DEFAULT); + /* gpu resource config */ +#ifdef MILVUS_GPU_VERSION + s = SetGpuResourceConfigEnableGpu(CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT); if (!s.ok()) { return s; } - s = SetResourceConfigSearchResources(CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); + s = SetGpuResourceConfigCacheCapacity(CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT); if (!s.ok()) { return s; } - s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); + s = SetGpuResourceConfigCacheThreshold(CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT); if (!s.ok()) { return s; } + s = SetGpuResourceConfigSearchResources(CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT); + if (!s.ok()) { + return s; + } + + s = SetGpuResourceConfigBuildIndexResources(CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT); + if (!s.ok()) { + return s; + } +#endif + return Status::OK(); } @@ -377,7 +377,7 @@ Config::PrintAll() { PrintConfigSection(CONFIG_CACHE); PrintConfigSection(CONFIG_METRIC); PrintConfigSection(CONFIG_ENGINE); - PrintConfigSection(CONFIG_RESOURCE); + PrintConfigSection(CONFIG_GPU_RESOURCE); } //////////////////////////////////////////////////////////////////////////////// @@ -591,52 +591,6 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) { return Status::OK(); } -Status -Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { - if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity is not a positive integer."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else { - uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int device_id; - Status s = GetResourceConfigIndexBuildDevice(device_id); - if (!s.ok()) { - return s; - } - - size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); - return Status(SERVER_UNEXPECTED_ERROR, msg); - } else if (gpu_cache_capacity >= gpu_memory) { - std::string msg = "Invalid gpu cache capacity: " + value + - ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { - std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; - } - } - return Status::OK(); -} - -Status -Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { - if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { - std::string msg = "Invalid gpu cache threshold: " + value + - ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } else { - float gpu_cache_threshold = std::stof(value); - if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { - std::string msg = "Invalid gpu cache threshold: " + value + - ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0]."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } - } - return Status::OK(); -} - Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { @@ -687,56 +641,99 @@ Config::CheckEngineConfigGpuSearchThreshold(const std::string& value) { } Status -Config::CheckResourceConfigMode(const std::string& value) { - if (value != "simple") { - std::string msg = "Invalid resource mode: " + value + ". Possible reason: resource_config.mode is invalid."; +Config::CheckGpuResourceConfigEnableGpu(const std::string& value) { + if (!ValidationUtil::ValidateStringIsBool(value).ok()) { + std::string msg = "Invalid gpu resource config: " + value + + ". Possible reason: gpu_resource_config.enable_gpu is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); } Status -CheckResource(const std::string& value) { +Config::CheckGpuResourceConfigCacheCapacity(const std::string& value) { + if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: gpu_resource_config.cache_capacity is not a positive integer."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else { + uint64_t gpu_cache_capacity = std::stoi(value) * GB; + std::vector gpu_ids; + Status s = GetGpuResourceConfigBuildIndexResources(gpu_ids); + if (!s.ok()) { + return s; + } + + for (int32_t gpu_id : gpu_ids) { + size_t gpu_memory; + if (!ValidationUtil::GetGpuMemory(gpu_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_id); + return Status(SERVER_UNEXPECTED_ERROR, msg); + } else if (gpu_cache_capacity >= gpu_memory) { + std::string msg = "Invalid gpu cache capacity: " + value + + ". Possible reason: gpu_resource_config.cache_capacity exceeds GPU memory."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) { + std::cerr << "Warning: gpu cache capacity value is too big" << std::endl; + } + } + } + return Status::OK(); +} + +Status +Config::CheckGpuResourceConfigCacheThreshold(const std::string& value) { + if (!ValidationUtil::ValidateStringIsFloat(value).ok()) { + std::string msg = "Invalid gpu cache threshold: " + value + + ". Possible reason: gpu_resource_config.cache_threshold is not in range (0.0, 1.0]."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } else { + float gpu_cache_threshold = std::stof(value); + if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) { + std::string msg = "Invalid gpu cache threshold: " + value + + ". Possible reason: gpu_resource_config.cache_threshold is not in range (0.0, 1.0]."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + } + return Status::OK(); +} + +Status +CheckGpuResource(const std::string& value) { std::string s = value; std::transform(s.begin(), s.end(), s.begin(), ::tolower); -#ifdef MILVUS_CPU_VERSION - if (s != "cpu") { - return Status(SERVER_INVALID_ARGUMENT, "Invalid CPU resource: " + s); - } -#else - const std::regex pat("cpu|gpu(\\d+)"); + const std::regex pat("gpu(\\d+)"); std::smatch m; if (!std::regex_match(s, m, pat)) { - std::string msg = "Invalid search resource: " + value + - ". Possible reason: resource_config.search_resources is not in the format of cpux or gpux"; + std::string msg = "Invalid gpu resource: " + value + + ". Possible reason: gpu_resource_config is not in the format of cpux or gpux"; return Status(SERVER_INVALID_ARGUMENT, msg); } if (s.compare(0, 3, "gpu") == 0) { int32_t gpu_index = std::stoi(s.substr(3)); if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) { - std::string msg = "Invalid search resource: " + value + - ". Possible reason: resource_config.search_resources does not match your hardware."; + std::string msg = "Invalid gpu resource: " + value + + ". Possible reason: gpu_resource_config does not match with the hardware."; return Status(SERVER_INVALID_ARGUMENT, msg); } } -#endif return Status::OK(); } Status -Config::CheckResourceConfigSearchResources(const std::vector& value) { +Config::CheckGpuResourceConfigSearchResources(const std::vector& value) { if (value.empty()) { std::string msg = - "Invalid search resource. " - "Possible reason: resource_config.search_resources is empty."; + "Invalid gpu search resource. " + "Possible reason: gpu_resource_config.search_resources is empty."; return Status(SERVER_INVALID_ARGUMENT, msg); } for (auto& resource : value) { - auto status = CheckResource(resource); + auto status = CheckGpuResource(resource); if (!status.ok()) { return Status(SERVER_INVALID_ARGUMENT, status.message()); } @@ -745,10 +742,19 @@ Config::CheckResourceConfigSearchResources(const std::vector& value } Status -Config::CheckResourceConfigIndexBuildDevice(const std::string& value) { - auto status = CheckResource(value); - if (!status.ok()) { - return Status(SERVER_INVALID_ARGUMENT, status.message()); +Config::CheckGpuResourceConfigBuildIndexResources(const std::vector& value) { + if (value.empty()) { + std::string msg = + "Invalid gpu build index resource. " + "Possible reason: gpu_resource_config.build_index_resources is empty."; + return Status(SERVER_INVALID_ARGUMENT, msg); + } + + for (auto& resource : value) { + auto status = CheckGpuResource(resource); + if (!status.ok()) { + return Status(SERVER_INVALID_ARGUMENT, status.message()); + } } return Status::OK(); } @@ -855,7 +861,6 @@ Config::GetDBConfigArchiveDiskThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -868,7 +873,6 @@ Config::GetDBConfigArchiveDaysThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -880,7 +884,6 @@ Config::GetDBConfigInsertBufferSize(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -898,7 +901,6 @@ Config::GetMetricConfigEnableMonitor(bool& value) { if (!s.ok()) { return s; } - std::transform(str.begin(), str.end(), str.begin(), ::tolower); value = (str == "true" || str == "on" || str == "yes" || str == "1"); return Status::OK(); @@ -924,7 +926,6 @@ Config::GetCacheConfigCpuCacheCapacity(int64_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -937,33 +938,6 @@ Config::GetCacheConfigCpuCacheThreshold(float& value) { if (!s.ok()) { return s; } - - value = std::stof(str); - return Status::OK(); -} - -Status -Config::GetCacheConfigGpuCacheCapacity(int64_t& value) { - std::string str = - GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_CAPACITY, CONFIG_CACHE_GPU_CACHE_CAPACITY_DEFAULT); - Status s = CheckCacheConfigGpuCacheCapacity(str); - if (!s.ok()) { - return s; - } - - value = std::stoi(str); - return Status::OK(); -} - -Status -Config::GetCacheConfigGpuCacheThreshold(float& value) { - std::string str = - GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_THRESHOLD, CONFIG_CACHE_GPU_CACHE_THRESHOLD_DEFAULT); - Status s = CheckCacheConfigGpuCacheThreshold(str); - if (!s.ok()) { - return s; - } - value = std::stof(str); return Status::OK(); } @@ -976,7 +950,6 @@ Config::GetCacheConfigCacheInsertData(bool& value) { if (!s.ok()) { return s; } - std::transform(str.begin(), str.end(), str.begin(), ::tolower); value = (str == "true" || str == "on" || str == "yes" || str == "1"); return Status::OK(); @@ -990,7 +963,6 @@ Config::GetEngineConfigUseBlasThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -1002,7 +974,6 @@ Config::GetEngineConfigOmpThreadNum(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } @@ -1015,41 +986,113 @@ Config::GetEngineConfigGpuSearchThreshold(int32_t& value) { if (!s.ok()) { return s; } - value = std::stoi(str); return Status::OK(); } Status -Config::GetResourceConfigMode(std::string& value) { - value = GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_MODE, CONFIG_RESOURCE_MODE_DEFAULT); - return CheckResourceConfigMode(value); -} - -Status -Config::GetResourceConfigSearchResources(std::vector& value) { +Config::GetGpuResourceConfigEnableGpu(bool& value) { std::string str = - GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, - CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT); - server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value); - return CheckResourceConfigSearchResources(value); -} - -Status -Config::GetResourceConfigIndexBuildDevice(int32_t& value) { - std::string str = - GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT); - Status s = CheckResourceConfigIndexBuildDevice(str); + GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE_GPU, CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT); + Status s = CheckGpuResourceConfigEnableGpu(str); if (!s.ok()) { return s; } + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + value = (str == "true" || str == "on" || str == "yes" || str == "1"); + return Status::OK(); +} - if (str == "cpu") { - value = CPU_DEVICE_ID; - } else { - value = std::stoi(str.substr(3)); +Status +Config::GetGpuResourceConfigCacheCapacity(int64_t& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, + CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT); + s = CheckGpuResourceConfigCacheCapacity(str); + if (!s.ok()) { + return s; + } + value = std::stoi(str); + return Status::OK(); +} +Status +Config::GetGpuResourceConfigCacheThreshold(float& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; + } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_THRESHOLD, + CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT); + s = CheckGpuResourceConfigCacheThreshold(str); + if (!s.ok()) { + return s; + } + value = std::stof(str); + return Status::OK(); +} + +Status +Config::GetGpuResourceConfigSearchResources(std::vector& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; + } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = GetConfigSequenceStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES, + CONFIG_GPU_RESOURCE_DELIMITER, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT); + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + s = CheckGpuResourceConfigSearchResources(res_vec); + if (!s.ok()) { + return s; + } + for (std::string& res : res_vec) { + value.push_back(std::stoi(res.substr(3))); + } + return Status::OK(); +} + +Status +Config::GetGpuResourceConfigBuildIndexResources(std::vector& value) { + bool enable_gpu = false; + Status s = GetGpuResourceConfigEnableGpu(enable_gpu); + if (!s.ok()) { + return s; + } + if (!enable_gpu) { + std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable_gpu is set to false."; + return Status(SERVER_UNSUPPORTED_ERROR, msg); + } + std::string str = + GetConfigSequenceStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, + CONFIG_GPU_RESOURCE_DELIMITER, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT); + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + s = CheckGpuResourceConfigBuildIndexResources(res_vec); + if (!s.ok()) { + return s; + } + for (std::string& res : res_vec) { + value.push_back(std::stoi(res.substr(3))); + } return Status::OK(); } @@ -1061,7 +1104,6 @@ Config::SetServerConfigAddress(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_ADDRESS, value); return Status::OK(); } @@ -1072,7 +1114,6 @@ Config::SetServerConfigPort(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_PORT, value); return Status::OK(); } @@ -1083,7 +1124,6 @@ Config::SetServerConfigDeployMode(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_DEPLOY_MODE, value); return Status::OK(); } @@ -1094,7 +1134,6 @@ Config::SetServerConfigTimeZone(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_TIME_ZONE, value); return Status::OK(); } @@ -1106,7 +1145,6 @@ Config::SetDBConfigPrimaryPath(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_PRIMARY_PATH, value); return Status::OK(); } @@ -1117,7 +1155,6 @@ Config::SetDBConfigSecondaryPath(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_SECONDARY_PATH, value); return Status::OK(); } @@ -1128,7 +1165,6 @@ Config::SetDBConfigBackendUrl(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_BACKEND_URL, value); return Status::OK(); } @@ -1139,7 +1175,6 @@ Config::SetDBConfigArchiveDiskThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_ARCHIVE_DISK_THRESHOLD, value); return Status::OK(); } @@ -1150,7 +1185,6 @@ Config::SetDBConfigArchiveDaysThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_ARCHIVE_DAYS_THRESHOLD, value); return Status::OK(); } @@ -1161,7 +1195,6 @@ Config::SetDBConfigInsertBufferSize(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_DB_INSERT_BUFFER_SIZE, value); return Status::OK(); } @@ -1173,7 +1206,6 @@ Config::SetMetricConfigEnableMonitor(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_ENABLE_MONITOR, value); return Status::OK(); } @@ -1184,7 +1216,6 @@ Config::SetMetricConfigCollector(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_COLLECTOR, value); return Status::OK(); } @@ -1195,7 +1226,6 @@ Config::SetMetricConfigPrometheusPort(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_PORT, value); return Status::OK(); } @@ -1207,7 +1237,6 @@ Config::SetCacheConfigCpuCacheCapacity(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_CAPACITY, value); return Status::OK(); } @@ -1218,40 +1247,16 @@ Config::SetCacheConfigCpuCacheThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_THRESHOLD, value); return Status::OK(); } -Status -Config::SetCacheConfigGpuCacheCapacity(const std::string& value) { - Status s = CheckCacheConfigGpuCacheCapacity(value); - if (!s.ok()) { - return s; - } - - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_CAPACITY, value); - return Status::OK(); -} - -Status -Config::SetCacheConfigGpuCacheThreshold(const std::string& value) { - Status s = CheckCacheConfigGpuCacheThreshold(value); - if (!s.ok()) { - return s; - } - - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_GPU_CACHE_THRESHOLD, value); - return Status::OK(); -} - Status Config::SetCacheConfigCacheInsertData(const std::string& value) { Status s = CheckCacheConfigCacheInsertData(value); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CACHE_INSERT_DATA, value); return Status::OK(); } @@ -1263,7 +1268,6 @@ Config::SetEngineConfigUseBlasThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_USE_BLAS_THRESHOLD, value); return Status::OK(); } @@ -1274,7 +1278,6 @@ Config::SetEngineConfigOmpThreadNum(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_OMP_THREAD_NUM, value); return Status::OK(); } @@ -1285,45 +1288,62 @@ Config::SetEngineConfigGpuSearchThreshold(const std::string& value) { if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, value); return Status::OK(); } -/* resource config */ +/* gpu resource config */ Status -Config::SetResourceConfigMode(const std::string& value) { - Status s = CheckResourceConfigMode(value); +Config::SetGpuResourceConfigEnableGpu(const std::string& value) { + Status s = CheckGpuResourceConfigEnableGpu(value); if (!s.ok()) { return s; } - - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_MODE, value); + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE_GPU, value); return Status::OK(); } Status -Config::SetResourceConfigSearchResources(const std::string& value) { +Config::SetGpuResourceConfigCacheCapacity(const std::string& value) { + Status s = CheckGpuResourceConfigCacheCapacity(value); + if (!s.ok()) { + return s; + } + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, value); + return Status::OK(); +} + +Status +Config::SetGpuResourceConfigCacheThreshold(const std::string& value) { + Status s = CheckGpuResourceConfigCacheThreshold(value); + if (!s.ok()) { + return s; + } + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_THRESHOLD, value); + return Status::OK(); +} + +Status +Config::SetGpuResourceConfigSearchResources(const std::string& value) { std::vector res_vec; - server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec); - - Status s = CheckResourceConfigSearchResources(res_vec); + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + Status s = CheckGpuResourceConfigSearchResources(res_vec); if (!s.ok()) { return s; } - - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES, value); + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES, value); return Status::OK(); } Status -Config::SetResourceConfigIndexBuildDevice(const std::string& value) { - Status s = CheckResourceConfigIndexBuildDevice(value); +Config::SetGpuResourceConfigBuildIndexResources(const std::string& value) { + std::vector res_vec; + server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); + Status s = CheckGpuResourceConfigBuildIndexResources(res_vec); if (!s.ok()) { return s; } - - SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value); + SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3ab0cd8053..0442ae0626 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -59,12 +59,8 @@ static const char* CONFIG_DB_PRELOAD_TABLE = "preload_table"; static const char* CONFIG_CACHE = "cache_config"; static const char* CONFIG_CACHE_CPU_CACHE_CAPACITY = "cpu_cache_capacity"; static const char* CONFIG_CACHE_CPU_CACHE_CAPACITY_DEFAULT = "16"; -static const char* CONFIG_CACHE_GPU_CACHE_CAPACITY = "gpu_cache_capacity"; -static const char* CONFIG_CACHE_GPU_CACHE_CAPACITY_DEFAULT = "4"; -static const char* CONFIG_CACHE_CPU_CACHE_THRESHOLD = "cpu_mem_threshold"; +static const char* CONFIG_CACHE_CPU_CACHE_THRESHOLD = "cpu_cache_threshold"; static const char* CONFIG_CACHE_CPU_CACHE_THRESHOLD_DEFAULT = "0.85"; -static const char* CONFIG_CACHE_GPU_CACHE_THRESHOLD = "gpu_mem_threshold"; -static const char* CONFIG_CACHE_GPU_CACHE_THRESHOLD_DEFAULT = "0.85"; static const char* CONFIG_CACHE_CACHE_INSERT_DATA = "cache_insert_data"; static const char* CONFIG_CACHE_CACHE_INSERT_DATA_DEFAULT = "false"; @@ -87,26 +83,23 @@ static const char* CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT = "0"; static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD = "gpu_search_threshold"; static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; -/* resource config */ -static const char* CONFIG_RESOURCE = "resource_config"; -static const char* CONFIG_RESOURCE_MODE = "mode"; -static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ","; - -#ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu"; +/* gpu resource config */ +static const char* CONFIG_GPU_RESOURCE = "gpu_resource_config"; +static const char* CONFIG_GPU_RESOURCE_ENABLE_GPU = "enable_gpu"; +#ifdef MILVUS_GPU_VERSION +static const char* CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT = "true"; #else -static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0"; +static const char* CONFIG_GPU_RESOURCE_ENABLE_GPU_DEFAULT = "false"; #endif - -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; -#ifdef MILVUS_CPU_VERSION -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu"; -#else -static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; -#endif -const int32_t CPU_DEVICE_ID = -1; +static const char* CONFIG_GPU_RESOURCE_CACHE_CAPACITY = "cache_capacity"; +static const char* CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT = "4"; +static const char* CONFIG_GPU_RESOURCE_CACHE_THRESHOLD = "cache_threshold"; +static const char* CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT = "0.85"; +static const char* CONFIG_GPU_RESOURCE_DELIMITER = ","; +static const char* CONFIG_GPU_RESOURCE_SEARCH_RESOURCES = "search_resources"; +static const char* CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT = "gpu0"; +static const char* CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES = "build_index_resources"; +static const char* CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT = "gpu0"; class Config { public: @@ -170,10 +163,6 @@ class Config { Status CheckCacheConfigCpuCacheThreshold(const std::string& value); Status - CheckCacheConfigGpuCacheCapacity(const std::string& value); - Status - CheckCacheConfigGpuCacheThreshold(const std::string& value); - Status CheckCacheConfigCacheInsertData(const std::string& value); /* engine config */ @@ -184,13 +173,17 @@ class Config { Status CheckEngineConfigGpuSearchThreshold(const std::string& value); - /* resource config */ + /* gpu resource config */ Status - CheckResourceConfigMode(const std::string& value); + CheckGpuResourceConfigEnableGpu(const std::string& value); Status - CheckResourceConfigSearchResources(const std::vector& value); + CheckGpuResourceConfigCacheCapacity(const std::string& value); Status - CheckResourceConfigIndexBuildDevice(const std::string& value); + CheckGpuResourceConfigCacheThreshold(const std::string& value); + Status + CheckGpuResourceConfigSearchResources(const std::vector& value); + Status + CheckGpuResourceConfigBuildIndexResources(const std::vector& value); std::string GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = ""); @@ -239,10 +232,6 @@ class Config { Status GetCacheConfigCpuCacheThreshold(float& value); Status - GetCacheConfigGpuCacheCapacity(int64_t& value); - Status - GetCacheConfigGpuCacheThreshold(float& value); - Status GetCacheConfigCacheInsertData(bool& value); /* engine config */ @@ -253,13 +242,17 @@ class Config { Status GetEngineConfigGpuSearchThreshold(int32_t& value); - /* resource config */ + /* gpu resource config */ Status - GetResourceConfigMode(std::string& value); + GetGpuResourceConfigEnableGpu(bool& value); Status - GetResourceConfigSearchResources(std::vector& value); + GetGpuResourceConfigCacheCapacity(int64_t& value); Status - GetResourceConfigIndexBuildDevice(int32_t& value); + GetGpuResourceConfigCacheThreshold(float& value); + Status + GetGpuResourceConfigSearchResources(std::vector& value); + Status + GetGpuResourceConfigBuildIndexResources(std::vector& value); public: /* server config */ @@ -300,10 +293,6 @@ class Config { Status SetCacheConfigCpuCacheThreshold(const std::string& value); Status - SetCacheConfigGpuCacheCapacity(const std::string& value); - Status - SetCacheConfigGpuCacheThreshold(const std::string& value); - Status SetCacheConfigCacheInsertData(const std::string& value); /* engine config */ @@ -314,13 +303,17 @@ class Config { Status SetEngineConfigGpuSearchThreshold(const std::string& value); - /* resource config */ + /* gpu resource config */ Status - SetResourceConfigMode(const std::string& value); + SetGpuResourceConfigEnableGpu(const std::string& value); Status - SetResourceConfigSearchResources(const std::string& value); + SetGpuResourceConfigCacheCapacity(const std::string& value); Status - SetResourceConfigIndexBuildDevice(const std::string& value); + SetGpuResourceConfigCacheThreshold(const std::string& value); + Status + SetGpuResourceConfigSearchResources(const std::string& value); + Status + SetGpuResourceConfigBuildIndexResources(const std::string& value); private: std::unordered_map> config_map_; diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index ec696ff3e0..080de77e17 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector& partition_ } Status -ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { +ValidationUtil::ValidateGpuIndex(int32_t gpu_index) { #ifdef MILVUS_GPU_VERSION int num_devices = 0; auto cuda_err = cudaGetDeviceCount(&num_devices); @@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { } Status -ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) { +ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) { #ifdef MILVUS_GPU_VERSION cudaDeviceProp deviceProp; diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 01801e295a..201ccef3bd 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -59,10 +59,10 @@ class ValidationUtil { ValidatePartitionTags(const std::vector& partition_tags); static Status - ValidateGpuIndex(uint32_t gpu_index); + ValidateGpuIndex(int32_t gpu_index); static Status - GetGpuMemory(uint32_t gpu_index, size_t& memory); + GetGpuMemory(int32_t gpu_index, size_t& memory); static Status ValidateIpAddress(const std::string& ip_address); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 637273732d..387d321b85 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -216,21 +216,6 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { s = config.GetCacheConfigCpuCacheThreshold(float_val); ASSERT_TRUE(float_val == cache_cpu_cache_threshold); -#ifdef MILVUS_GPU_VERSION - int64_t cache_gpu_cache_capacity = 1; - s = config.SetCacheConfigGpuCacheCapacity(std::to_string(cache_gpu_cache_capacity)); - ASSERT_TRUE(s.ok()); - s = config.GetCacheConfigGpuCacheCapacity(int64_val); - ASSERT_TRUE(s.ok()); - ASSERT_TRUE(int64_val == cache_gpu_cache_capacity); - - float cache_gpu_cache_threshold = 0.2; - s = config.SetCacheConfigGpuCacheThreshold(std::to_string(cache_gpu_cache_threshold)); - ASSERT_TRUE(s.ok()); - s = config.GetCacheConfigGpuCacheThreshold(float_val); - ASSERT_TRUE(float_val == cache_gpu_cache_threshold); -#endif - bool cache_insert_data = true; s = config.SetCacheConfigCacheInsertData(std::to_string(cache_insert_data)); ASSERT_TRUE(s.ok()); @@ -259,42 +244,54 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { ASSERT_TRUE(s.ok()); ASSERT_TRUE(int32_val == engine_gpu_search_threshold); - /* resource config */ - std::string resource_mode = "simple"; - s = config.SetResourceConfigMode(resource_mode); + /* gpu resource config */ + bool resource_enable_gpu = true; + s = config.SetGpuResourceConfigEnableGpu(std::to_string(resource_enable_gpu)); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigMode(str_val); + s = config.GetGpuResourceConfigEnableGpu(bool_val); ASSERT_TRUE(s.ok()); - ASSERT_TRUE(str_val == resource_mode); + ASSERT_TRUE(bool_val == resource_enable_gpu); -#ifdef MILVUS_CPU_VERSION - std::vector search_resources = {"cpu"}; -#else - std::vector search_resources = {"cpu", "gpu0"}; -#endif - std::vector res_vec; - std::string res_str; - milvus::server::StringHelpFunctions::MergeStringWithDelimeter( - search_resources, milvus::server::CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_str); - s = config.SetResourceConfigSearchResources(res_str); +#ifdef MILVUS_GPU_VERSION + int64_t gpu_cache_capacity = 1; + s = config.SetGpuResourceConfigCacheCapacity(std::to_string(gpu_cache_capacity)); ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigSearchResources(res_vec); + s = config.GetGpuResourceConfigCacheCapacity(int64_val); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(int64_val == gpu_cache_capacity); + + float gpu_cache_threshold = 0.2; + s = config.SetGpuResourceConfigCacheThreshold(std::to_string(gpu_cache_threshold)); + ASSERT_TRUE(s.ok()); + s = config.GetGpuResourceConfigCacheThreshold(float_val); + ASSERT_TRUE(float_val == gpu_cache_threshold); + + std::vector search_resources = {"gpu0"}; + std::vector search_res_vec; + std::string search_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + search_resources, milvus::server::CONFIG_GPU_RESOURCE_DELIMITER, search_res_str); + s = config.SetGpuResourceConfigSearchResources(search_res_str); + ASSERT_TRUE(s.ok()); + s = config.GetGpuResourceConfigSearchResources(search_res_vec); ASSERT_TRUE(s.ok()); for (size_t i = 0; i < search_resources.size(); i++) { - ASSERT_TRUE(search_resources[i] == res_vec[i]); + ASSERT_TRUE(std::stoi(search_resources[i].substr(3)) == search_res_vec[i]); } -#ifdef MILVUS_CPU_VERSION - int32_t resource_index_build_device = milvus::server::CPU_DEVICE_ID; - s = config.SetResourceConfigIndexBuildDevice("cpu"); -#else - int32_t resource_index_build_device = 0; - s = config.SetResourceConfigIndexBuildDevice("gpu" + std::to_string(resource_index_build_device)); + std::vector build_index_resources = {"gpu0"}; + std::vector build_index_res_vec; + std::string build_index_res_str; + milvus::server::StringHelpFunctions::MergeStringWithDelimeter( + build_index_resources, milvus::server::CONFIG_GPU_RESOURCE_DELIMITER, build_index_res_str); + s = config.SetGpuResourceConfigBuildIndexResources(build_index_res_str); + ASSERT_TRUE(s.ok()); + s = config.GetGpuResourceConfigBuildIndexResources(build_index_res_vec); + ASSERT_TRUE(s.ok()); + for (size_t i = 0; i < build_index_resources.size(); i++) { + ASSERT_TRUE(std::stoi(build_index_resources[i].substr(3)) == build_index_res_vec[i]); + } #endif - ASSERT_TRUE(s.ok()); - s = config.GetResourceConfigIndexBuildDevice(int32_val); - ASSERT_TRUE(s.ok()); - ASSERT_TRUE(int32_val == resource_index_build_device); } TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { @@ -381,18 +378,6 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetCacheConfigCpuCacheThreshold("1.0"); ASSERT_FALSE(s.ok()); -#ifdef MILVUS_GPU_VERSION - s = config.SetCacheConfigGpuCacheCapacity("a"); - ASSERT_FALSE(s.ok()); - s = config.SetCacheConfigGpuCacheCapacity("128"); - ASSERT_FALSE(s.ok()); - - s = config.SetCacheConfigGpuCacheThreshold("a"); - ASSERT_FALSE(s.ok()); - s = config.SetCacheConfigGpuCacheThreshold("1.0"); - ASSERT_FALSE(s.ok()); -#endif - s = config.SetCacheConfigCacheInsertData("N"); ASSERT_FALSE(s.ok()); @@ -408,20 +393,29 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { s = config.SetEngineConfigGpuSearchThreshold("-1"); ASSERT_FALSE(s.ok()); - /* resource config */ - s = config.SetResourceConfigMode("default"); + /* gpu resource config */ + s = config.SetGpuResourceConfigEnableGpu("ok"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigSearchResources("gpu10"); +#ifdef MILVUS_GPU_VERSION + s = config.SetGpuResourceConfigCacheCapacity("a"); + ASSERT_FALSE(s.ok()); + s = config.SetGpuResourceConfigCacheCapacity("128"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigSearchResources("cpu"); - ASSERT_TRUE(s.ok()); + s = config.SetGpuResourceConfigCacheThreshold("a"); + ASSERT_FALSE(s.ok()); + s = config.SetGpuResourceConfigCacheThreshold("1.0"); + ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gup2"); + s = config.SetGpuResourceConfigSearchResources("gpu10"); ASSERT_FALSE(s.ok()); - s = config.SetResourceConfigIndexBuildDevice("gpu16"); + + s = config.SetGpuResourceConfigBuildIndexResources("gup2"); ASSERT_FALSE(s.ok()); + s = config.SetGpuResourceConfigBuildIndexResources("gpu16"); + ASSERT_FALSE(s.ok()); +#endif } TEST_F(ConfigTest, SERVER_CONFIG_TEST) { From c451f8cbf2b66c45759f7ca7bba15212343e2a91 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 19:40:17 +0800 Subject: [PATCH 4/7] clange format and add FallbackPass --- core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h | 1 - core/src/scheduler/SchedInst.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index 4bb4092b75..e064b6f08c 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -47,7 +47,6 @@ class IVF : public VectorIndex, public FaissBaseIndex { void set_index_model(IndexModelPtr model) override; - void Add(const DatasetPtr& dataset, const Config& config) override; diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 48eaffc0a5..44fea1b6d3 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -104,14 +104,13 @@ class OptimizerInst { pass_list.push_back(std::make_shared()); #ifdef MILVUS_CPU_VERSION pass_list.push_back(std::make_shared()); -<<<<<<< HEAD #else server::Config& config = server::Config::GetInstance(); std::vector build_resources; config.GetGpuResourceConfigBuildIndexResources(build_resources); pass_list.push_back(std::make_shared(build_resources)); #endif -// pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); } } From fdb7decdd3debbeae3d74cd7fd5ec608b319ca19 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 20:30:43 +0800 Subject: [PATCH 5/7] Remove DefaultLabel --- core/src/scheduler/JobMgr.cpp | 8 +-- core/src/scheduler/JobMgr.h | 5 +- core/src/scheduler/Scheduler.cpp | 4 -- core/src/scheduler/TaskCreator.cpp | 9 +-- core/src/scheduler/Utils.cpp | 1 - core/src/scheduler/action/Action.h | 4 -- .../scheduler/action/PushTaskToNeighbour.cpp | 32 --------- core/src/scheduler/tasklabel/DefaultLabel.h | 36 ---------- core/src/scheduler/tasklabel/TaskLabel.h | 1 - core/unittest/scheduler/CMakeLists.txt | 1 - core/unittest/scheduler/test_normal.cpp | 72 ------------------- core/unittest/scheduler/test_resource.cpp | 18 +++-- core/unittest/scheduler/test_resource_mgr.cpp | 4 +- core/unittest/scheduler/test_scheduler.cpp | 41 ----------- core/unittest/scheduler/test_tasktable.cpp | 9 +-- 15 files changed, 26 insertions(+), 219 deletions(-) delete mode 100644 core/src/scheduler/tasklabel/DefaultLabel.h delete mode 100644 core/unittest/scheduler/test_normal.cpp diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 8dd095a3fa..76c07fe459 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -85,7 +85,7 @@ JobMgr::worker_function() { } for (auto& task : tasks) { - calculate_path(task); + calculate_path(res_mgr_, task); } // disk resources NEVER be empty. @@ -103,7 +103,7 @@ JobMgr::build_task(const JobPtr& job) { } void -JobMgr::calculate_path(const TaskPtr& task) { +JobMgr::calculate_path(const ResourceMgrPtr& res_mgr, const TaskPtr& task) { if (task->type_ != TaskType::SearchTask && task->type_ != TaskType::BuildIndexTask) { return; } @@ -114,9 +114,9 @@ JobMgr::calculate_path(const TaskPtr& task) { std::vector path; auto spec_label = std::static_pointer_cast(task->label()); - auto src = res_mgr_->GetDiskResources()[0]; + auto src = res_mgr->GetDiskResources()[0]; auto dest = spec_label->resource(); - ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + ShortestPath(src.lock(), dest.lock(), res_mgr, path); task->path() = Path(path, path.size() - 1); } diff --git a/core/src/scheduler/JobMgr.h b/core/src/scheduler/JobMgr.h index fbd6c0ee45..af072614b5 100644 --- a/core/src/scheduler/JobMgr.h +++ b/core/src/scheduler/JobMgr.h @@ -59,8 +59,9 @@ class JobMgr : public interface::dumpable { static std::vector build_task(const JobPtr& job); - void - calculate_path(const TaskPtr& task); + public: + static void + calculate_path(const ResourceMgrPtr& res_mgr, const TaskPtr& task); private: bool running_ = false; diff --git a/core/src/scheduler/Scheduler.cpp b/core/src/scheduler/Scheduler.cpp index 8d2d4406f8..68d7457aa9 100644 --- a/core/src/scheduler/Scheduler.cpp +++ b/core/src/scheduler/Scheduler.cpp @@ -108,10 +108,6 @@ Scheduler::OnLoadCompleted(const EventPtr& event) { auto task_table_type = load_completed_event->task_table_item_->task->label()->Type(); switch (task_table_type) { - case TaskLabelType::DEFAULT: { - Action::DefaultLabelTaskScheduler(res_mgr_, resource, load_completed_event); - break; - } case TaskLabelType::SPECIFIED_RESOURCE: { Action::SpecifiedResourceLabelTaskScheduler(res_mgr_, resource, load_completed_event); break; diff --git a/core/src/scheduler/TaskCreator.cpp b/core/src/scheduler/TaskCreator.cpp index 3b7f6fd82e..30b76cc5bb 100644 --- a/core/src/scheduler/TaskCreator.cpp +++ b/core/src/scheduler/TaskCreator.cpp @@ -18,7 +18,6 @@ #include "scheduler/TaskCreator.h" #include "SchedInst.h" #include "tasklabel/BroadcastLabel.h" -#include "tasklabel/DefaultLabel.h" #include "tasklabel/SpecResLabel.h" namespace milvus { @@ -47,8 +46,7 @@ std::vector TaskCreator::Create(const SearchJobPtr& job) { std::vector tasks; for (auto& index_file : job->index_files()) { - auto label = std::make_shared(); - auto task = std::make_shared(index_file.second, label); + auto task = std::make_shared(index_file.second, nullptr); task->job_ = job; tasks.emplace_back(task); } @@ -70,11 +68,8 @@ TaskCreator::Create(const DeleteJobPtr& job) { std::vector TaskCreator::Create(const BuildIndexJobPtr& job) { std::vector tasks; - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); - for (auto& to_index_file : job->to_index_files()) { - auto label = std::make_shared(std::weak_ptr(res_ptr)); - auto task = std::make_shared(to_index_file.second, label); + auto task = std::make_shared(to_index_file.second, nullptr); task->job_ = job; tasks.emplace_back(task); } diff --git a/core/src/scheduler/Utils.cpp b/core/src/scheduler/Utils.cpp index 1be04ba80b..527b9adf03 100644 --- a/core/src/scheduler/Utils.cpp +++ b/core/src/scheduler/Utils.cpp @@ -16,7 +16,6 @@ // under the License. #include "scheduler/Utils.h" -#include "server/Config.h" #ifdef MILVUS_GPU_VERSION #include diff --git a/core/src/scheduler/action/Action.h b/core/src/scheduler/action/Action.h index f5f828cbf6..7391d287a8 100644 --- a/core/src/scheduler/action/Action.h +++ b/core/src/scheduler/action/Action.h @@ -36,10 +36,6 @@ class Action { static void PushTaskToResource(TaskTableItemPtr task_item, const ResourcePtr& dest); - static void - DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, - std::shared_ptr event); - static void SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event); diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index 9aed678937..f49f1d871f 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -101,38 +101,6 @@ Action::PushTaskToResource(TaskTableItemPtr task_item, const ResourcePtr& dest) dest->task_table().Put(task_item->task, task_item); } -void -Action::DefaultLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, - std::shared_ptr event) { - if (not resource->HasExecutor() && event->task_table_item_->Move()) { - auto task_item = event->task_table_item_; - auto task = event->task_table_item_->task; - auto search_task = std::static_pointer_cast(task); - bool moved = false; - - // to support test task, REFACTOR - if (resource->type() == ResourceType::CPU) { - if (auto index_engine = search_task->index_engine_) { - auto location = index_engine->GetLocation(); - - for (auto i = 0; i < res_mgr->GetNumGpuResource(); ++i) { - auto index = milvus::cache::GpuCacheMgr::GetInstance(i)->GetIndex(location); - if (index != nullptr) { - moved = true; - auto dest_resource = res_mgr->GetResource(ResourceType::GPU, i); - PushTaskToResource(event->task_table_item_, dest_resource); - break; - } - } - } - } - - if (not moved) { - PushTaskToNeighbourRandomly(task_item, resource); - } - } -} - void Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, ResourcePtr resource, std::shared_ptr event) { diff --git a/core/src/scheduler/tasklabel/DefaultLabel.h b/core/src/scheduler/tasklabel/DefaultLabel.h deleted file mode 100644 index c215743575..0000000000 --- a/core/src/scheduler/tasklabel/DefaultLabel.h +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "TaskLabel.h" - -#include - -namespace milvus { -namespace scheduler { - -class DefaultLabel : public TaskLabel { - public: - DefaultLabel() : TaskLabel(TaskLabelType::DEFAULT) { - } -}; - -using DefaultLabelPtr = std::shared_ptr; - -} // namespace scheduler -} // namespace milvus diff --git a/core/src/scheduler/tasklabel/TaskLabel.h b/core/src/scheduler/tasklabel/TaskLabel.h index d35ce409ff..33e6eb6e57 100644 --- a/core/src/scheduler/tasklabel/TaskLabel.h +++ b/core/src/scheduler/tasklabel/TaskLabel.h @@ -23,7 +23,6 @@ namespace milvus { namespace scheduler { enum class TaskLabelType { - DEFAULT, // means can be executed in any resource SPECIFIED_RESOURCE, // means must executing in special resource BROADCAST, // means all enable-executor resource must execute task }; diff --git a/core/unittest/scheduler/CMakeLists.txt b/core/unittest/scheduler/CMakeLists.txt index 0148441c2d..878f0f23a9 100644 --- a/core/unittest/scheduler/CMakeLists.txt +++ b/core/unittest/scheduler/CMakeLists.txt @@ -21,7 +21,6 @@ set(test_files ${CMAKE_CURRENT_SOURCE_DIR}/test_algorithm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_node.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_normal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_resource.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_resource_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_resource_mgr.cpp diff --git a/core/unittest/scheduler/test_normal.cpp b/core/unittest/scheduler/test_normal.cpp deleted file mode 100644 index 20cd39d0bf..0000000000 --- a/core/unittest/scheduler/test_normal.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include "scheduler/ResourceFactory.h" -#include "scheduler/ResourceMgr.h" -#include "scheduler/SchedInst.h" -#include "scheduler/Scheduler.h" -#include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" -#include "utils/Log.h" - -namespace { - -namespace ms = milvus::scheduler; - -} // namespace - -TEST(NormalTest, INST_TEST) { - // ResourceMgr only compose resources, provide unified event - auto res_mgr = ms::ResMgrInst::GetInstance(); - - res_mgr->Add(ms::ResourceFactory::Create("disk", "DISK", 0, true, false)); - res_mgr->Add(ms::ResourceFactory::Create("cpu", "CPU", 0, true, true)); - - auto IO = ms::Connection("IO", 500.0); - res_mgr->Connect("disk", "cpu", IO); - - auto scheduler = ms::SchedInst::GetInstance(); - - res_mgr->Start(); - scheduler->Start(); - - const uint64_t NUM_TASK = 2; - std::vector> tasks; - ms::TableFileSchemaPtr dummy = nullptr; - - auto disks = res_mgr->GetDiskResources(); - ASSERT_FALSE(disks.empty()); - if (auto observe = disks[0].lock()) { - for (uint64_t i = 0; i < NUM_TASK; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy, label); - task->label() = std::make_shared(); - tasks.push_back(task); - observe->task_table().Put(task); - } - } - - for (auto& task : tasks) { - task->Wait(); - ASSERT_EQ(task->load_count_, 1); - ASSERT_EQ(task->exec_count_, 1); - } - - scheduler->Stop(); - res_mgr->Stop(); -} diff --git a/core/unittest/scheduler/test_resource.cpp b/core/unittest/scheduler/test_resource.cpp index 6fe85ec558..101dcd3976 100644 --- a/core/unittest/scheduler/test_resource.cpp +++ b/core/unittest/scheduler/test_resource.cpp @@ -24,7 +24,7 @@ #include "scheduler/resource/TestResource.h" #include "scheduler/task/Task.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" +#include "scheduler/tasklabel/SpecResLabel.h" namespace milvus { namespace scheduler { @@ -182,8 +182,10 @@ TEST_F(ResourceAdvanceTest, DISK_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(disk_resource_); auto task = std::make_shared(dummy, label); + std::vector path{disk_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); disk_resource_->task_table().Put(task); } @@ -208,8 +210,10 @@ TEST_F(ResourceAdvanceTest, CPU_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(cpu_resource_); auto task = std::make_shared(dummy, label); + std::vector path{cpu_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); cpu_resource_->task_table().Put(task); } @@ -234,8 +238,10 @@ TEST_F(ResourceAdvanceTest, GPU_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(gpu_resource_); auto task = std::make_shared(dummy, label); + std::vector path{gpu_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); gpu_resource_->task_table().Put(task); } @@ -260,8 +266,10 @@ TEST_F(ResourceAdvanceTest, TEST_RESOURCE_TEST) { std::vector> tasks; TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); + auto label = std::make_shared(test_resource_); auto task = std::make_shared(dummy, label); + std::vector path{test_resource_->name()}; + task->path() = Path(path, 0); tasks.push_back(task); test_resource_->task_table().Put(task); } diff --git a/core/unittest/scheduler/test_resource_mgr.cpp b/core/unittest/scheduler/test_resource_mgr.cpp index b9127060bd..d6495971c4 100644 --- a/core/unittest/scheduler/test_resource_mgr.cpp +++ b/core/unittest/scheduler/test_resource_mgr.cpp @@ -22,7 +22,6 @@ #include "scheduler/resource/GpuResource.h" #include "scheduler/resource/TestResource.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" namespace milvus { namespace scheduler { @@ -187,8 +186,7 @@ TEST_F(ResourceMgrAdvanceTest, REGISTER_SUBSCRIBER) { auto callback = [&](EventPtr event) { flag = true; }; mgr1_->RegisterSubscriber(callback); TableFileSchemaPtr dummy = nullptr; - auto label = std::make_shared(); - disk_res->task_table().Put(std::make_shared(dummy, label)); + disk_res->task_table().Put(std::make_shared(dummy, nullptr)); sleep(1); ASSERT_TRUE(flag); } diff --git a/core/unittest/scheduler/test_scheduler.cpp b/core/unittest/scheduler/test_scheduler.cpp index 22fb9be723..72538113c3 100644 --- a/core/unittest/scheduler/test_scheduler.cpp +++ b/core/unittest/scheduler/test_scheduler.cpp @@ -23,7 +23,6 @@ #include "scheduler/Scheduler.h" #include "scheduler/resource/Resource.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" #include "scheduler/tasklabel/SpecResLabel.h" #include "utils/Error.h" #include "wrapper/VecIndex.h" @@ -150,46 +149,6 @@ insert_dummy_index_into_gpu_cache(uint64_t device_id) { cache::GpuCacheMgr::GetInstance(device_id)->InsertItem("location", obj); } -TEST_F(SchedulerTest, ON_LOAD_COMPLETED) { - const uint64_t NUM = 10; - std::vector> tasks; - TableFileSchemaPtr dummy = std::make_shared(); - dummy->location_ = "location"; - - insert_dummy_index_into_gpu_cache(1); - - for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy, label); - task->label() = std::make_shared(); - tasks.push_back(task); - cpu_resource_.lock()->task_table().Put(task); - } - - sleep(3); - ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().size(), NUM); -} - -TEST_F(SchedulerTest, PUSH_TASK_TO_NEIGHBOUR_RANDOMLY_TEST) { - const uint64_t NUM = 10; - std::vector> tasks; - TableFileSchemaPtr dummy1 = std::make_shared(); - dummy1->location_ = "location"; - - tasks.clear(); - - for (uint64_t i = 0; i < NUM; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy1, label); - task->label() = std::make_shared(); - tasks.push_back(task); - cpu_resource_.lock()->task_table().Put(task); - } - - sleep(3); - // ASSERT_EQ(res_mgr_->GetResource(ResourceType::GPU, 1)->task_table().Size(), NUM); -} - class SchedulerTest2 : public testing::Test { protected: void diff --git a/core/unittest/scheduler/test_tasktable.cpp b/core/unittest/scheduler/test_tasktable.cpp index 28a2e29c98..3fa1beabeb 100644 --- a/core/unittest/scheduler/test_tasktable.cpp +++ b/core/unittest/scheduler/test_tasktable.cpp @@ -18,7 +18,6 @@ #include #include "scheduler/TaskTable.h" #include "scheduler/task/TestTask.h" -#include "scheduler/tasklabel/DefaultLabel.h" /************ TaskTableBaseTest ************/ @@ -162,9 +161,8 @@ class TaskTableBaseTest : public ::testing::Test { SetUp() override { milvus::scheduler::TableFileSchemaPtr dummy = nullptr; invalid_task_ = nullptr; - auto label = std::make_shared(); - task1_ = std::make_shared(dummy, label); - task2_ = std::make_shared(dummy, label); + task1_ = std::make_shared(dummy, nullptr); + task2_ = std::make_shared(dummy, nullptr); } milvus::scheduler::TaskPtr invalid_task_; @@ -320,8 +318,7 @@ class TaskTableAdvanceTest : public ::testing::Test { SetUp() override { milvus::scheduler::TableFileSchemaPtr dummy = nullptr; for (uint64_t i = 0; i < 8; ++i) { - auto label = std::make_shared(); - auto task = std::make_shared(dummy, label); + auto task = std::make_shared(dummy, nullptr); table1_.Put(task); } From a13677b65f252b9c4025c4c2e3a25dfb8e6e112c Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 21:10:43 +0800 Subject: [PATCH 6/7] Refactor optimizer --- core/src/scheduler/SchedInst.h | 22 +++--- .../scheduler/optimizer/BuildIndexPass.cpp | 9 ++- core/src/scheduler/optimizer/BuildIndexPass.h | 2 +- .../src/scheduler/optimizer/FaissFlatPass.cpp | 69 +++++++++++++++++++ .../{LargeSQ8HPass.h => FaissFlatPass.h} | 6 +- .../{OnlyCPUPass.cpp => FaissIVFFlatPass.cpp} | 41 ++++++++--- .../{HybridPass.h => FaissIVFFlatPass.h} | 12 +++- ...LargeSQ8HPass.cpp => FaissIVFSQ8HPass.cpp} | 40 +++-------- .../scheduler/optimizer/FaissIVFSQ8HPass.h | 56 +++++++++++++++ .../scheduler/optimizer/FaissIVFSQ8Pass.cpp | 69 +++++++++++++++++++ .../{OnlyCPUPass.h => FaissIVFSQ8Pass.h} | 12 +++- core/src/scheduler/optimizer/HybridPass.cpp | 47 ------------- 12 files changed, 273 insertions(+), 112 deletions(-) create mode 100644 core/src/scheduler/optimizer/FaissFlatPass.cpp rename core/src/scheduler/optimizer/{LargeSQ8HPass.h => FaissFlatPass.h} (91%) rename core/src/scheduler/optimizer/{OnlyCPUPass.cpp => FaissIVFFlatPass.cpp} (56%) rename core/src/scheduler/optimizer/{HybridPass.h => FaissIVFFlatPass.h} (81%) rename core/src/scheduler/optimizer/{LargeSQ8HPass.cpp => FaissIVFSQ8HPass.cpp} (61%) create mode 100644 core/src/scheduler/optimizer/FaissIVFSQ8HPass.h create mode 100644 core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp rename core/src/scheduler/optimizer/{OnlyCPUPass.h => FaissIVFSQ8Pass.h} (81%) delete mode 100644 core/src/scheduler/optimizer/HybridPass.cpp diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 44fea1b6d3..912f5edf8d 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -24,9 +24,10 @@ #include "Utils.h" #include "optimizer/BuildIndexPass.h" #include "optimizer/FallbackPass.h" -#include "optimizer/HybridPass.h" -#include "optimizer/LargeSQ8HPass.h" -#include "optimizer/OnlyCPUPass.h" +#include "optimizer/FaissFlatPass.h" +#include "optimizer/FaissIVFFlatPass.h" +#include "optimizer/FaissIVFSQ8Pass.h" +#include "optimizer/FaissIVFSQ8HPass.h" #include "optimizer/Optimizer.h" #include "server/Config.h" @@ -100,15 +101,12 @@ class OptimizerInst { std::lock_guard lock(mutex_); if (instance == nullptr) { std::vector pass_list; - pass_list.push_back(std::make_shared()); - pass_list.push_back(std::make_shared()); -#ifdef MILVUS_CPU_VERSION - pass_list.push_back(std::make_shared()); -#else - server::Config& config = server::Config::GetInstance(); - std::vector build_resources; - config.GetGpuResourceConfigBuildIndexResources(build_resources); - pass_list.push_back(std::make_shared(build_resources)); +#ifdef MILVUS_GPU_VERSION + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); + pass_list.push_back(std::make_shared()); #endif pass_list.push_back(std::make_shared()); instance = std::make_shared(pass_list); diff --git a/core/src/scheduler/optimizer/BuildIndexPass.cpp b/core/src/scheduler/optimizer/BuildIndexPass.cpp index 4e7d5222e2..cd1b7550d8 100644 --- a/core/src/scheduler/optimizer/BuildIndexPass.cpp +++ b/core/src/scheduler/optimizer/BuildIndexPass.cpp @@ -23,11 +23,14 @@ namespace milvus { namespace scheduler { -BuildIndexPass::BuildIndexPass(std::vector& build_gpu_ids) : build_gpu_ids_(build_gpu_ids) { -} - void BuildIndexPass::Init() { + server::Config& config = server::Config::GetInstance(); + std::vector build_resources; + Status s = config.GetGpuResourceConfigBuildIndexResources(build_resources); + if (!s.ok()) { + throw; + } } bool diff --git a/core/src/scheduler/optimizer/BuildIndexPass.h b/core/src/scheduler/optimizer/BuildIndexPass.h index a3eb426527..4f7117fc4e 100644 --- a/core/src/scheduler/optimizer/BuildIndexPass.h +++ b/core/src/scheduler/optimizer/BuildIndexPass.h @@ -34,7 +34,7 @@ namespace scheduler { class BuildIndexPass : public Pass { public: - explicit BuildIndexPass(std::vector& build_gpu_id); + BuildIndexPass() = default; public: void diff --git a/core/src/scheduler/optimizer/FaissFlatPass.cpp b/core/src/scheduler/optimizer/FaissFlatPass.cpp new file mode 100644 index 0000000000..61ca1b9ec9 --- /dev/null +++ b/core/src/scheduler/optimizer/FaissFlatPass.cpp @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/FaissFlatPass.h" +#include "cache/GpuCacheMgr.h" +#include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" +#include "scheduler/task/SearchTask.h" +#include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" +#include "utils/Log.h" + +namespace milvus { +namespace scheduler { + +void +FaissFlatPass::Init() { + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } + s = config.GetGpuResourceConfigSearchResources(gpus); + if (!s.ok()) { + throw; + } +} + +bool +FaissFlatPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IDMAP) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + ResourcePtr res_ptr; + if (search_job->nq() < threshold_) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + } + auto label = std::make_shared(res_ptr); + task->label() = label; + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.h b/core/src/scheduler/optimizer/FaissFlatPass.h similarity index 91% rename from core/src/scheduler/optimizer/LargeSQ8HPass.h rename to core/src/scheduler/optimizer/FaissFlatPass.h index c69fb12c64..60adf62a07 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.h +++ b/core/src/scheduler/optimizer/FaissFlatPass.h @@ -33,9 +33,9 @@ namespace milvus { namespace scheduler { -class LargeSQ8HPass : public Pass { +class FaissFlatPass : public Pass { public: - LargeSQ8HPass() = default; + FaissFlatPass() = default; public: void @@ -50,7 +50,7 @@ class LargeSQ8HPass : public Pass { std::vector gpus; }; -using LargeSQ8HPassPtr = std::shared_ptr; +using FaissFlatPassPtr = std::shared_ptr; } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.cpp b/core/src/scheduler/optimizer/FaissIVFFlatPass.cpp similarity index 56% rename from core/src/scheduler/optimizer/OnlyCPUPass.cpp rename to core/src/scheduler/optimizer/FaissIVFFlatPass.cpp index 8cd40092fb..1f1efb374b 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.cpp +++ b/core/src/scheduler/optimizer/FaissIVFFlatPass.cpp @@ -15,31 +15,52 @@ // specific language governing permissions and limitations // under the License. -#include "scheduler/optimizer/OnlyCPUPass.h" +#include "scheduler/optimizer/FaissIVFFlatPass.h" +#include "cache/GpuCacheMgr.h" #include "scheduler/SchedInst.h" #include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" +#include "utils/Log.h" namespace milvus { namespace scheduler { void -OnlyCPUPass::Init() { +FaissIVFFlatPass::Init() { + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } + s = config.GetGpuResourceConfigSearchResources(gpus); + if (!s.ok()) { + throw; + } } bool -OnlyCPUPass::Run(const TaskPtr& task) { - if (task->Type() != TaskType::SearchTask) - return false; - auto search_task = std::static_pointer_cast(task); - if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8 && - search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) { +FaissIVFFlatPass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { return false; } - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); - auto label = std::make_shared(std::weak_ptr(res_ptr)); + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + ResourcePtr res_ptr; + if (search_job->nq() < threshold_) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + } + auto label = std::make_shared(res_ptr); task->label() = label; return true; } diff --git a/core/src/scheduler/optimizer/HybridPass.h b/core/src/scheduler/optimizer/FaissIVFFlatPass.h similarity index 81% rename from core/src/scheduler/optimizer/HybridPass.h rename to core/src/scheduler/optimizer/FaissIVFFlatPass.h index f84a0884f2..9264db32a8 100644 --- a/core/src/scheduler/optimizer/HybridPass.h +++ b/core/src/scheduler/optimizer/FaissIVFFlatPass.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -32,9 +33,9 @@ namespace milvus { namespace scheduler { -class HybridPass : public Pass { +class FaissIVFFlatPass : public Pass { public: - HybridPass() = default; + FaissIVFFlatPass() = default; public: void @@ -42,9 +43,14 @@ class HybridPass : public Pass { bool Run(const TaskPtr& task) override; + + private: + int32_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; + std::vector gpus; }; -using HybridPassPtr = std::shared_ptr; +using FaissIVFFlatPassPtr = std::shared_ptr; } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.cpp similarity index 61% rename from core/src/scheduler/optimizer/LargeSQ8HPass.cpp rename to core/src/scheduler/optimizer/FaissIVFSQ8HPass.cpp index dd58feb512..b63a0f7490 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "scheduler/optimizer/LargeSQ8HPass.h" +#include "scheduler/optimizer/FaissIVFSQ8HPass.h" #include "cache/GpuCacheMgr.h" #include "scheduler/SchedInst.h" #include "scheduler/Utils.h" @@ -28,7 +28,7 @@ namespace milvus { namespace scheduler { void -LargeSQ8HPass::Init() { +FaissIVFSQ8HPass::Init() { server::Config& config = server::Config::GetInstance(); Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); if (!s.ok()) { @@ -38,7 +38,7 @@ LargeSQ8HPass::Init() { } bool -LargeSQ8HPass::Run(const TaskPtr& task) { +FaissIVFSQ8HPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask) { return false; } @@ -49,36 +49,16 @@ LargeSQ8HPass::Run(const TaskPtr& task) { } auto search_job = std::static_pointer_cast(search_task->job_.lock()); - - // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu - + ResourcePtr res_ptr; if (search_job->nq() < threshold_) { - return false; + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); } - - // std::vector all_free_mem; - // for (auto& gpu : gpus) { - // auto cache = cache::GpuCacheMgr::GetInstance(gpu); - // auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); - // all_free_mem.push_back(free_mem); - // } - // - // auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); - // auto best_index = std::distance(all_free_mem.begin(), max_e); - // auto best_device_id = gpus[best_index]; - auto best_device_id = count_ % gpus.size(); - count_++; - - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); - if (not res_ptr) { - SERVER_LOG_ERROR << "GpuResource " << best_device_id << " invalid."; - // TODO: throw critical error and exit - return false; - } - - auto label = std::make_shared(std::weak_ptr(res_ptr)); + auto label = std::make_shared(res_ptr); task->label() = label; - return true; } diff --git a/core/src/scheduler/optimizer/FaissIVFSQ8HPass.h b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.h new file mode 100644 index 0000000000..fa830ff08a --- /dev/null +++ b/core/src/scheduler/optimizer/FaissIVFSQ8HPass.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Pass.h" + +namespace milvus { +namespace scheduler { + +class FaissIVFSQ8HPass : public Pass { + public: + FaissIVFSQ8HPass() = default; + + public: + void + Init() override; + + bool + Run(const TaskPtr& task) override; + + private: + int32_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; + std::vector gpus; +}; + +using FaissIVFSQ8HPassPtr = std::shared_ptr; + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp new file mode 100644 index 0000000000..30dd306b3b --- /dev/null +++ b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.cpp @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "scheduler/optimizer/FaissIVFSQ8Pass.h" +#include "cache/GpuCacheMgr.h" +#include "scheduler/SchedInst.h" +#include "scheduler/Utils.h" +#include "scheduler/task/SearchTask.h" +#include "scheduler/tasklabel/SpecResLabel.h" +#include "server/Config.h" +#include "utils/Log.h" + +namespace milvus { +namespace scheduler { + +void +FaissIVFSQ8Pass::Init() { + server::Config& config = server::Config::GetInstance(); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); + if (!s.ok()) { + threshold_ = std::numeric_limits::max(); + } + s = config.GetGpuResourceConfigSearchResources(gpus); + if (!s.ok()) { + throw; + } +} + +bool +FaissIVFSQ8Pass::Run(const TaskPtr& task) { + if (task->Type() != TaskType::SearchTask) { + return false; + } + + auto search_task = std::static_pointer_cast(task); + if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8) { + return false; + } + + auto search_job = std::static_pointer_cast(search_task->job_.lock()); + ResourcePtr res_ptr; + if (search_job->nq() < threshold_) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + auto best_device_id = count_ % gpus.size(); + count_++; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); + } + auto label = std::make_shared(res_ptr); + task->label() = label; + return true; +} + +} // namespace scheduler +} // namespace milvus diff --git a/core/src/scheduler/optimizer/OnlyCPUPass.h b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.h similarity index 81% rename from core/src/scheduler/optimizer/OnlyCPUPass.h rename to core/src/scheduler/optimizer/FaissIVFSQ8Pass.h index 253775f77e..c5853283f7 100644 --- a/core/src/scheduler/optimizer/OnlyCPUPass.h +++ b/core/src/scheduler/optimizer/FaissIVFSQ8Pass.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -32,9 +33,9 @@ namespace milvus { namespace scheduler { -class OnlyCPUPass : public Pass { +class FaissIVFSQ8Pass : public Pass { public: - OnlyCPUPass() = default; + FaissIVFSQ8Pass() = default; public: void @@ -42,9 +43,14 @@ class OnlyCPUPass : public Pass { bool Run(const TaskPtr& task) override; + + private: + int32_t threshold_ = std::numeric_limits::max(); + int64_t count_ = 0; + std::vector gpus; }; -using OnlyCPUPassPtr = std::shared_ptr; +using FaissIVFSQ8PassPtr = std::shared_ptr; } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/optimizer/HybridPass.cpp b/core/src/scheduler/optimizer/HybridPass.cpp deleted file mode 100644 index 8bf0d8990d..0000000000 --- a/core/src/scheduler/optimizer/HybridPass.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "scheduler/optimizer/HybridPass.h" -#include "scheduler/SchedInst.h" -#include "scheduler/task/SearchTask.h" -#include "scheduler/tasklabel/SpecResLabel.h" - -namespace milvus { -namespace scheduler { - -void -HybridPass::Init() { -} - -bool -HybridPass::Run(const TaskPtr& task) { - // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu - if (task->Type() != TaskType::SearchTask) - return false; - auto search_task = std::static_pointer_cast(task); - if (search_task->file_->engine_type_ == (int)engine::EngineType::FAISS_IVFSQ8H) { - // TODO: remove "cpu" hardcode - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); - auto label = std::make_shared(std::weak_ptr(res_ptr)); - task->label() = label; - return true; - } - return false; -} - -} // namespace scheduler -} // namespace milvus From a176eb94a21b2cf015745167df0903f490709219 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Tue, 19 Nov 2019 21:12:12 +0800 Subject: [PATCH 7/7] clange format --- core/src/scheduler/SchedInst.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index 912f5edf8d..dc2d5ade35 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -23,11 +23,11 @@ #include "Scheduler.h" #include "Utils.h" #include "optimizer/BuildIndexPass.h" -#include "optimizer/FallbackPass.h" #include "optimizer/FaissFlatPass.h" #include "optimizer/FaissIVFFlatPass.h" -#include "optimizer/FaissIVFSQ8Pass.h" #include "optimizer/FaissIVFSQ8HPass.h" +#include "optimizer/FaissIVFSQ8Pass.h" +#include "optimizer/FallbackPass.h" #include "optimizer/Optimizer.h" #include "server/Config.h"