From 5f2f41244af64dfce7aae10f79349ec8593e1b46 Mon Sep 17 00:00:00 2001 From: Sijie Zhang <36330442+akihoni@users.noreply.github.com> Date: Wed, 30 Oct 2019 10:21:26 +0800 Subject: [PATCH 1/8] Create README_CN.md Former-commit-id: cc3731ea250d77c159f01d996af5ddc2d2ce817a --- README_CN.md | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 README_CN.md diff --git a/README_CN.md b/README_CN.md new file mode 100644 index 0000000000..aac2a57cc6 --- /dev/null +++ b/README_CN.md @@ -0,0 +1,197 @@ +![Milvuslogo](https://raw.githubusercontent.com/milvus-io/docs/master/assets/milvus_logo.png) + +![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) +![Language](https://img.shields.io/badge/language-C%2B%2B-blue) +[![codebeat badge](https://codebeat.co/badges/e030a4f6-b126-4475-a938-4723d54ec3a7?style=plastic)](https://codebeat.co/projects/github-com-jinhai-cn-milvus-master) + +![Release](https://img.shields.io/badge/release-v0.5.0-orange) +![Release_date](https://img.shields.io/badge/release_date-October-yellowgreen) + +- [Slack 频道](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) +- [Twitter](https://twitter.com/milvusio) +- [Facebook](https://www.facebook.com/io.milvus.5) +- [博客](https://www.milvus.io/blog/) +- [CSDN](https://zilliz.blog.csdn.net/) +- [中文官网](https://www.milvus.io/zh-CN/) + +# 欢迎来到 Milvus + +## Milvus 是什么 + +Milvus 是一款开源的、针对海量特征向量的相似性搜索引擎。基于异构众核计算框架设计,成本更低,性能更好。在有限的计算资源下,十亿向量搜索仅毫秒响应。 + +Milvus 提供稳定的 Python、Java 以及 C++ 的 API 接口。 + +通过 [版本发布说明](https://milvus.io/docs/zh-CN/release/v0.5.0/) 获取最新发行版本的 Milvus。 + +- 异构众核 + + Milvus 基于异构众核计算框架设计,成本更低,性能更好。 + +- 多元化索引 + + Milvus 支持多种索引方式,使用量化索引、基于树的索引和图索引等算法。 + +- 资源智能管理 + + Milvus 根据实际数据规模和可利用资源,智能调节优化查询计算和索引构建过程。 + +- 水平扩容 + + Milvus 支持在线 / 离线扩容,仅需执行简单命令,便可弹性伸缩计算节点和存储节点。 + +- 高可用性 + + Milvus 集成了 Kubernetes 框架,能有效避免单点障碍情况的发生。 + +- 简单易用 + + Milvus 安装简单,使用方便,并可使您专注于特征向量。 + +- 可视化监控 + + 您可以使用基于 Prometheus 的图形化监控,以便实时跟踪系统性能。 + +## 整体架构 + +![](https://raw.githubusercontent.com/yamasite/docs/master/assets/milvus_arch.png) + +## 开始使用 Milvus + +### 硬件要求 + +| 硬件设备 | 推荐配置 | +| -------- | ------------------------------------- | +| CPU | Intel CPU Haswell 及以上 | +| GPU | NVIDIA Pascal 系列及以上 | +| 内存 | 8 GB 或以上(取决于具体向量数据规模) | +| 硬盘 | SATA 3.0 SSD 及以上 | + +### 使用 Docker + +您可以方便地使用 Docker 安装 Milvus。具体请查看 [Milvus 安装指南](https://milvus.io/docs/zh-CN/userguide/install_milvus/)。 + +### 从源代码编译 + +#### 软件要求 + +- Ubuntu 18.04 及以上 +- CMake 3.14 及以上 +- CUDA 10.0 及以上 +- NVIDIA driver 418 及以上 + +#### 编译 + +##### 第一步 安装依赖项 + +```shell +$ cd [Milvus sourcecode path]/core +$ ./ubuntu_build_deps.sh +``` + +##### 第二步 编译 + +```shell +$ cd [Milvus sourcecode path]/core +$ ./build.sh -t Debug +or +$ ./build.sh -t Release +``` + +当您成功编译后,所有 Milvus 必需组件将安装在`[Milvus root path]/core/milvus`路径下。 + +##### 启动 Milvus 服务 + +```shell +$ cd [Milvus root path]/core/milvus +``` + +在 `LD_LIBRARY_PATH` 中添加 `lib/` 目录: + +```shell +$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/milvus/lib +``` + +启动 Milvus 服务: + +```shell +$ cd scripts +$ ./start_server.sh +``` + +若要停止 Milvus 服务,请使用如下命令: + +```shell +$ ./stop_server.sh +``` + +若需要修改 Milvus 配置文件 `conf/server_config.yaml` 和`conf/log_config.conf`,请查看 [Milvus 配置](https://milvus.io/docs/zh-CN/reference/milvus_config/)。 + +### 开始您的第一个 Milvus 程序 + +#### 运行 Python 示例代码 + +请确保系统的 Python 版本为 [Python 3.5](https://www.python.org/downloads/) 或以上。 + +安装 Milvus Python SDK。 + +```shell +# Install Milvus Python SDK +$ pip install pymilvus==0.2.3 +``` + +创建 `example.py` 文件,并向文件中加入 [Python 示例代码](https://github.com/milvus-io/pymilvus/blob/master/examples/AdvancedExample.py)。 + +运行示例代码 + +```shell +# Run Milvus Python example +$ python3 example.py +``` + +#### 运行 C++ 示例代码 + +```shell + # Run Milvus C++ example + $ cd [Milvus root path]/core/milvus/bin + $ ./sdk_simple +``` + +#### 运行 Java 示例代码 + +请确保系统的 Java 版本为 Java 8 或以上。 + +请从[此处](https://github.com/milvus-io/milvus-sdk-java/tree/master/examples)获取 Java 示例代码。 + +## 贡献者指南 + +我们由衷欢迎您推送贡献。关于贡献流程的详细信息,请参阅 [贡献者指南](https://github.com/milvus-io/milvus/blob/master/CONTRIBUTING.md)。本项目遵循 Milvus [行为准则](https://github.com/milvus-io/milvus/blob/master/CODE_OF_CONDUCT.md)。如果您希望参与本项目,请遵守该准则的内容。 + +我们使用 [GitHub issues](https://github.com/milvus-io/milvus/issues/new/choose) 追踪问题和补丁。若您希望提出问题或进行讨论,请加入我们的社区。 + +## 加入 Milvus 社区 + +欢迎加入我们的 [Slack 频道](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) 以便与其他用户和贡献者进行交流。 + +## Milvus 路线图 + +请阅读我们的[路线图](https://milvus.io/docs/zh-CN/roadmap/)以获得更多即将开发的新功能。 + +## 相关链接 + +[Milvus 官方网站](https://www.milvus.io/) + +[Milvus 文档](https://www.milvus.io/docs/en/userguide/install_milvus/) + +[Milvus 在线训练营](https://github.com/milvus-io/bootcamp) + +[Milvus 博客](https://www.milvus.io/blog/) + +[Milvus CSDN](https://zilliz.blog.csdn.net/) + +[Milvus 路线图](https://milvus.io/docs/en/roadmap/) + +## 许可协议 + +[Apache 许可协议2.0版](https://github.com/milvus-io/milvus/blob/master/LICENSE) + From 06ec2a04a4821db039b981b64811bc612a7c7a67 Mon Sep 17 00:00:00 2001 From: Sijie Zhang <36330442+akihoni@users.noreply.github.com> Date: Wed, 30 Oct 2019 18:36:08 +0800 Subject: [PATCH 2/8] Update README_CN.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更换无效链接。 Former-commit-id: 5ac8a003c25e68f1d81c57ce08f91ab20e3b8e24 --- README_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_CN.md b/README_CN.md index aac2a57cc6..4fdb44df40 100644 --- a/README_CN.md +++ b/README_CN.md @@ -140,7 +140,7 @@ $ ./stop_server.sh $ pip install pymilvus==0.2.3 ``` -创建 `example.py` 文件,并向文件中加入 [Python 示例代码](https://github.com/milvus-io/pymilvus/blob/master/examples/AdvancedExample.py)。 +创建 `example.py` 文件,并向文件中加入 [Python 示例代码](https://github.com/milvus-io/pymilvus/blob/master/examples/advanced_example.py)。 运行示例代码 From b9b1d0deb92fc79c0d9fc91b2d36e79d066e8b6d Mon Sep 17 00:00:00 2001 From: Sijie Zhang <36330442+akihoni@users.noreply.github.com> Date: Thu, 31 Oct 2019 14:40:56 +0800 Subject: [PATCH 3/8] Update README_CN.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更换产品架构图地址 Former-commit-id: 4e1fcae26649370dad7745ffb5ba0dda2d19de03 --- README_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_CN.md b/README_CN.md index 4fdb44df40..5dad64af9b 100644 --- a/README_CN.md +++ b/README_CN.md @@ -54,7 +54,7 @@ Milvus 提供稳定的 Python、Java 以及 C++ 的 API 接口。 ## 整体架构 -![](https://raw.githubusercontent.com/yamasite/docs/master/assets/milvus_arch.png) +![Milvus_arch](https://github.com/milvus-io/docs/blob/master/assets/milvus_arch.png) ## 开始使用 Milvus From 6ad1ed74e9874ede8c8f98760916cdd0da0a953e Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Fri, 1 Nov 2019 14:51:20 +0800 Subject: [PATCH 4/8] #159 Change the configuration name from 'use_gpu_threshold' to 'gpu_search_threshold' Former-commit-id: b4e576b389a720d66ec5455ca05b20184c8c911d --- CHANGELOG.md | 1 + core/conf/server_config.template | 2 +- .../src/scheduler/optimizer/LargeSQ8HPass.cpp | 2 +- core/src/server/Config.cpp | 34 +++++++++---------- core/src/server/Config.h | 10 +++--- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 266690a82c..5b4069ff65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#130 - Set task state MOVED after resource copy it completed - \#149 - Improve large query optimizer pass - \#156 - Not return error when search_resources and index_build_device set cpu +- \#159 - Change the configuration name from 'use_gpu_threshold' to 'gpu_search_threshold' ## Task diff --git a/core/conf/server_config.template b/core/conf/server_config.template index a23707be5d..8dfb30f534 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -36,7 +36,7 @@ cache_config: engine_config: use_blas_threshold: 20 # if nq < use_blas_threshold, use SSE, faster with fluctuated response times # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times - use_gpu_threshold: 1000 + gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the GPUs used for search computation, must be in format: gpux diff --git a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp index cacedd6208..b9784e3c0a 100644 --- a/core/src/scheduler/optimizer/LargeSQ8HPass.cpp +++ b/core/src/scheduler/optimizer/LargeSQ8HPass.cpp @@ -29,7 +29,7 @@ namespace scheduler { LargeSQ8HPass::LargeSQ8HPass() { server::Config& config = server::Config::GetInstance(); - Status s = config.GetEngineConfigUseGpuThreshold(threshold_); + Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); if (!s.ok()) { threshold_ = std::numeric_limits::max(); } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 2a04128411..d651f5b3b3 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -193,8 +193,8 @@ Config::ValidateConfig() { return s; } - int32_t engine_use_gpu_threshold; - s = GetEngineConfigUseGpuThreshold(engine_use_gpu_threshold); + int32_t engine_gpu_search_threshold; + s = GetEngineConfigGpuSearchThreshold(engine_gpu_search_threshold); if (!s.ok()) { return s; } @@ -330,7 +330,7 @@ Config::ResetDefaultConfig() { return s; } - s = SetEngineConfigUseGpuThreshold(CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT); + s = SetEngineConfigGpuSearchThreshold(CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT); if (!s.ok()) { return s; } @@ -463,7 +463,7 @@ Status Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { std::string msg = "Invalid archive days threshold: " + value + - ". Possible reason: db_config.archive_disk_threshold is invalid."; + ". Possible reason: db_config.archive_days_threshold is invalid."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -631,7 +631,7 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) { Status Config::CheckCacheConfigCacheInsertData(const std::string& value) { if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - std::string msg = "Invalid cache insert option: " + value + + std::string msg = "Invalid cache insert data option: " + value + ". Possible reason: cache_config.cache_insert_data is not a boolean."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -641,7 +641,7 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) { Status Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid blas threshold: " + value + + std::string msg = "Invalid use blas threshold: " + value + ". Possible reason: engine_config.use_blas_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -651,7 +651,7 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) { Status Config::CheckEngineConfigOmpThreadNum(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid omp thread number: " + value + + std::string msg = "Invalid omp thread num: " + value + ". Possible reason: engine_config.omp_thread_num is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -660,7 +660,7 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { uint32_t sys_thread_cnt = 8; CommonUtil::GetSystemAvailableThreads(sys_thread_cnt); if (omp_thread > static_cast(sys_thread_cnt)) { - std::string msg = "Invalid omp thread number: " + value + + std::string msg = "Invalid omp thread num: " + value + ". Possible reason: engine_config.omp_thread_num exceeds system cpu cores."; return Status(SERVER_INVALID_ARGUMENT, msg); } @@ -668,10 +668,10 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) { } Status -Config::CheckEngineConfigUseGpuThreshold(const std::string& value) { +Config::CheckEngineConfigGpuSearchThreshold(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid gpu threshold: " + value + - ". Possible reason: engine_config.use_gpu_threshold is not a positive integer."; + std::string msg = "Invalid gpu search threshold: " + value + + ". Possible reason: engine_config.gpu_search_threshold is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } return Status::OK(); @@ -979,10 +979,10 @@ Config::GetEngineConfigOmpThreadNum(int32_t& value) { } Status -Config::GetEngineConfigUseGpuThreshold(int32_t& value) { +Config::GetEngineConfigGpuSearchThreshold(int32_t& value) { std::string str = - GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_USE_GPU_THRESHOLD, CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT); - Status s = CheckEngineConfigUseGpuThreshold(str); + GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT); + Status s = CheckEngineConfigGpuSearchThreshold(str); if (!s.ok()) { return s; } @@ -1244,13 +1244,13 @@ Config::SetEngineConfigOmpThreadNum(const std::string& value) { } Status -Config::SetEngineConfigUseGpuThreshold(const std::string& value) { - Status s = CheckEngineConfigUseGpuThreshold(value); +Config::SetEngineConfigGpuSearchThreshold(const std::string& value) { + Status s = CheckEngineConfigGpuSearchThreshold(value); if (!s.ok()) { return s; } - SetConfigValueInMem(CONFIG_DB, CONFIG_ENGINE_USE_GPU_THRESHOLD, value); + SetConfigValueInMem(CONFIG_DB, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, value); return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index 3e7ae0c818..c93847b216 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -84,8 +84,8 @@ static const char* CONFIG_ENGINE_USE_BLAS_THRESHOLD = "use_blas_threshold"; static const char* CONFIG_ENGINE_USE_BLAS_THRESHOLD_DEFAULT = "20"; static const char* CONFIG_ENGINE_OMP_THREAD_NUM = "omp_thread_num"; static const char* CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT = "0"; -static const char* CONFIG_ENGINE_USE_GPU_THRESHOLD = "use_gpu_threshold"; -static const char* CONFIG_ENGINE_USE_GPU_THRESHOLD_DEFAULT = "1000"; +static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD = "gpu_search_threshold"; +static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000"; /* resource config */ static const char* CONFIG_RESOURCE = "resource_config"; @@ -169,7 +169,7 @@ class Config { Status CheckEngineConfigOmpThreadNum(const std::string& value); Status - CheckEngineConfigUseGpuThreshold(const std::string& value); + CheckEngineConfigGpuSearchThreshold(const std::string& value); /* resource config */ Status @@ -235,7 +235,7 @@ class Config { Status GetEngineConfigOmpThreadNum(int32_t& value); Status - GetEngineConfigUseGpuThreshold(int32_t& value); + GetEngineConfigGpuSearchThreshold(int32_t& value); /* resource config */ Status @@ -296,7 +296,7 @@ class Config { Status SetEngineConfigOmpThreadNum(const std::string& value); Status - SetEngineConfigUseGpuThreshold(const std::string& value); + SetEngineConfigGpuSearchThreshold(const std::string& value); /* resource config */ Status From e1ad8d34fd14c105883f3309e26ec51f239afb88 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 1 Nov 2019 16:54:59 +0800 Subject: [PATCH 5/8] #164 - Add CPU version for building index Former-commit-id: d03346e95d746a4c95a0e42765df9c0dc1394e42 --- CHANGELOG.md | 1 + core/src/scheduler/JobMgr.cpp | 30 +++--- core/src/scheduler/SchedInst.cpp | 2 +- core/src/scheduler/SchedInst.h | 1 - core/src/scheduler/TaskCreator.cpp | 12 ++- .../scheduler/action/PushTaskToNeighbour.cpp | 102 ++++++------------ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +- core/src/server/Config.cpp | 10 +- core/src/server/Config.h | 2 + 9 files changed, 78 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aac9a2eaf6..93021475d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#115 - Using new structure for tasktable - \#139 - New config option use_gpu_threshold - \#146 - Add only GPU and only CPU version for IVF_SQ8 and IVF_FLAT +- \#164 - Add CPU version for building index ## Improvement - \#64 - Improvement dump function in scheduler diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 794f6a0f37..8e038e51b2 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -104,20 +104,26 @@ JobMgr::build_task(const JobPtr& job) { void JobMgr::calculate_path(const TaskPtr& task) { - if (task->type_ != TaskType::SearchTask) { - return; - } + if (task->type_ == TaskType::SearchTask) { + if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { + return; + } - if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { - return; + std::vector path; + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); + + } else if (task->type_ == TaskType::BuildIndexTask) { + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + std::vector path; + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); } - - std::vector path; - auto spec_label = std::static_pointer_cast(task->label()); - auto src = res_mgr_->GetDiskResources()[0]; - auto dest = spec_label->resource(); - ShortestPath(src.lock(), dest.lock(), res_mgr_, path); - task->path() = Path(path, path.size() - 1); } } // namespace scheduler diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 8474e93c1f..8d58b831bf 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -75,7 +75,7 @@ load_simple_config() { } } - if (not find_build_gpu_id) { + if (not find_build_gpu_id && build_gpu_id != server::CPU_DEVICE_ID) { ResMgrInst::GetInstance()->Add( ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index a3048069f9..d51611af26 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -106,7 +106,6 @@ class OptimizerInst { has_cpu = true; } } - std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); diff --git a/core/src/scheduler/TaskCreator.cpp b/core/src/scheduler/TaskCreator.cpp index 40cfa9aac6..3d2a2072cd 100644 --- a/core/src/scheduler/TaskCreator.cpp +++ b/core/src/scheduler/TaskCreator.cpp @@ -70,8 +70,16 @@ TaskCreator::Create(const DeleteJobPtr& job) { std::vector TaskCreator::Create(const BuildIndexJobPtr& job) { std::vector tasks; - // TODO(yukun): remove "disk" hardcode here - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("disk"); + + server::Config& config = server::Config::GetInstance(); + int32_t build_index_id; + Status stat = config.GetResourceConfigIndexBuildDevice(build_index_id); + ResourcePtr res_ptr; + if (build_index_id == server::CPU_DEVICE_ID) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_index_id); + } for (auto& to_index_file : job->to_index_files()) { auto label = std::make_shared(std::weak_ptr(res_ptr)); diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index b8a4a1164b..6e52708abc 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -138,73 +138,41 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou std::shared_ptr event) { auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; - if (resource->type() == ResourceType::DISK) { - // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr->GetComputeResources(); - std::vector> paths; - std::vector transport_costs; - for (auto& res : compute_resources) { - std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); - transport_costs.push_back(transport_cost); - paths.emplace_back(path); - } - // if (task->job_.lock()->type() == JobType::SEARCH) { - // auto label = task->label(); - // auto spec_label = std::static_pointer_cast(label); - // if (spec_label->resource().lock()->type() == ResourceType::CPU) { - // std::vector spec_path; - // spec_path.push_back(spec_label->resource().lock()->name()); - // spec_path.push_back(resource->name()); - // task->path() = Path(spec_path, spec_path.size() - 1); - // } else { - // // step 2: select min cost, cost(resource) = avg_cost * task_to_do + transport_cost - // uint64_t min_cost = std::numeric_limits::max(); - // uint64_t min_cost_idx = 0; - // for (uint64_t i = 0; i < compute_resources.size(); ++i) { - // if (compute_resources[i]->TotalTasks() == 0) { - // min_cost_idx = i; - // break; - // } - // uint64_t cost = compute_resources[i]->TaskAvgCost() * - // compute_resources[i]->NumOfTaskToExec() + - // transport_costs[i]; - // if (min_cost > cost) { - // min_cost = cost; - // min_cost_idx = i; - // } - // } - // - // // step 3: set path in task - // Path task_path(paths[min_cost_idx], paths[min_cost_idx].size() - 1); - // task->path() = task_path; - // } - // - // } else - if (task->job_.lock()->type() == JobType::BUILD) { - // step2: Read device id in config - // get build index gpu resource - server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); - - bool find_gpu_res = false; - if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { - for (uint64_t i = 0; i < compute_resources.size(); ++i) { - if (compute_resources[i]->name() == - res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { - find_gpu_res = true; - Path task_path(paths[i], paths[i].size() - 1); - task->path() = task_path; - break; - } - } - } - if (not find_gpu_res) { - task->path() = Path(paths[0], paths[0].size() - 1); - } - } - } +// if (resource->type() == ResourceType::DISK) { +// // step 1: calculate shortest path per resource, from disk to compute resource +// auto compute_resources = res_mgr->GetComputeResources(); +// std::vector> paths; +// std::vector transport_costs; +// for (auto& res : compute_resources) { +// std::vector path; +// uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); +// transport_costs.push_back(transport_cost); +// paths.emplace_back(path); +// } +// if (task->job_.lock()->type() == JobType::BUILD) { +// // step2: Read device id in config +// // get build index gpu resource +// server::Config& config = server::Config::GetInstance(); +// int32_t build_index_gpu; +// Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); +// +// bool find_gpu_res = false; +// if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { +// for (uint64_t i = 0; i < compute_resources.size(); ++i) { +// if (compute_resources[i]->name() == +// res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { +// find_gpu_res = true; +// Path task_path(paths[i], paths[i].size() - 1); +// task->path() = task_path; +// break; +// } +// } +// } +// if (not find_gpu_res) { +// task->path() = Path(paths[0], paths[0].size() - 1); +// } +// } +// } if (resource->name() == task->path().Last()) { resource->WakeupExecutor(); diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 3fcda0e8a3..a49b75b17e 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -46,7 +46,8 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size(); + ++specified_gpu_id_; + specified_gpu_id_ = specified_gpu_id_ % gpu_id.size(); return true; } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index d651f5b3b3..0c56d69c39 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -596,6 +596,9 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return s; } + if (gpu_index == server::CPU_DEVICE_ID) + return Status::OK(); + size_t gpu_memory; if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); @@ -1013,7 +1016,12 @@ Config::GetResourceConfigIndexBuildDevice(int32_t& value) { return s; } - value = std::stoi(str.substr(3)); + if (str == "cpu") { + value = CPU_DEVICE_ID; + } else { + value = std::stoi(str.substr(3)); + } + return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index c93847b216..45591fb5ec 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -95,6 +95,8 @@ static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +const int32_t CPU_DEVICE_ID = -1; + class Config { public: static Config& From 44d648c3afc94c5d7a224ff447aeb85f12f09d88 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 1 Nov 2019 16:54:59 +0800 Subject: [PATCH 6/8] #164 - Add CPU version for building index Former-commit-id: 02f9f2a04dc765f41c474af04659f70d121cdc18 --- CHANGELOG.md | 1 + core/src/scheduler/JobMgr.cpp | 29 ++--- core/src/scheduler/SchedInst.cpp | 2 +- core/src/scheduler/SchedInst.h | 1 - core/src/scheduler/TaskCreator.cpp | 11 +- .../scheduler/action/PushTaskToNeighbour.cpp | 102 ++++++------------ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +- core/src/server/Config.cpp | 10 +- core/src/server/Config.h | 2 + 9 files changed, 76 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aac9a2eaf6..93021475d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#115 - Using new structure for tasktable - \#139 - New config option use_gpu_threshold - \#146 - Add only GPU and only CPU version for IVF_SQ8 and IVF_FLAT +- \#164 - Add CPU version for building index ## Improvement - \#64 - Improvement dump function in scheduler diff --git a/core/src/scheduler/JobMgr.cpp b/core/src/scheduler/JobMgr.cpp index 794f6a0f37..4404d95763 100644 --- a/core/src/scheduler/JobMgr.cpp +++ b/core/src/scheduler/JobMgr.cpp @@ -104,20 +104,25 @@ JobMgr::build_task(const JobPtr& job) { void JobMgr::calculate_path(const TaskPtr& task) { - if (task->type_ != TaskType::SearchTask) { - return; - } + if (task->type_ == TaskType::SearchTask) { + if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { + return; + } - if (task->label()->Type() != TaskLabelType::SPECIFIED_RESOURCE) { - return; + std::vector path; + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); + } else if (task->type_ == TaskType::BuildIndexTask) { + auto spec_label = std::static_pointer_cast(task->label()); + auto src = res_mgr_->GetDiskResources()[0]; + auto dest = spec_label->resource(); + std::vector path; + ShortestPath(src.lock(), dest.lock(), res_mgr_, path); + task->path() = Path(path, path.size() - 1); } - - std::vector path; - auto spec_label = std::static_pointer_cast(task->label()); - auto src = res_mgr_->GetDiskResources()[0]; - auto dest = spec_label->resource(); - ShortestPath(src.lock(), dest.lock(), res_mgr_, path); - task->path() = Path(path, path.size() - 1); } } // namespace scheduler diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 8474e93c1f..8d58b831bf 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -75,7 +75,7 @@ load_simple_config() { } } - if (not find_build_gpu_id) { + if (not find_build_gpu_id && build_gpu_id != server::CPU_DEVICE_ID) { ResMgrInst::GetInstance()->Add( ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); diff --git a/core/src/scheduler/SchedInst.h b/core/src/scheduler/SchedInst.h index a3048069f9..d51611af26 100644 --- a/core/src/scheduler/SchedInst.h +++ b/core/src/scheduler/SchedInst.h @@ -106,7 +106,6 @@ class OptimizerInst { has_cpu = true; } } - std::vector pass_list; pass_list.push_back(std::make_shared()); pass_list.push_back(std::make_shared()); diff --git a/core/src/scheduler/TaskCreator.cpp b/core/src/scheduler/TaskCreator.cpp index 40cfa9aac6..9f3bc2ae9a 100644 --- a/core/src/scheduler/TaskCreator.cpp +++ b/core/src/scheduler/TaskCreator.cpp @@ -70,8 +70,15 @@ TaskCreator::Create(const DeleteJobPtr& job) { std::vector TaskCreator::Create(const BuildIndexJobPtr& job) { std::vector tasks; - // TODO(yukun): remove "disk" hardcode here - ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("disk"); + server::Config& config = server::Config::GetInstance(); + int32_t build_index_id; + Status stat = config.GetResourceConfigIndexBuildDevice(build_index_id); + ResourcePtr res_ptr; + if (build_index_id == server::CPU_DEVICE_ID) { + res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); + } else { + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_index_id); + } for (auto& to_index_file : job->to_index_files()) { auto label = std::make_shared(std::weak_ptr(res_ptr)); diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index b8a4a1164b..9aed678937 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -138,73 +138,41 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou std::shared_ptr event) { auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; - if (resource->type() == ResourceType::DISK) { - // step 1: calculate shortest path per resource, from disk to compute resource - auto compute_resources = res_mgr->GetComputeResources(); - std::vector> paths; - std::vector transport_costs; - for (auto& res : compute_resources) { - std::vector path; - uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); - transport_costs.push_back(transport_cost); - paths.emplace_back(path); - } - // if (task->job_.lock()->type() == JobType::SEARCH) { - // auto label = task->label(); - // auto spec_label = std::static_pointer_cast(label); - // if (spec_label->resource().lock()->type() == ResourceType::CPU) { - // std::vector spec_path; - // spec_path.push_back(spec_label->resource().lock()->name()); - // spec_path.push_back(resource->name()); - // task->path() = Path(spec_path, spec_path.size() - 1); - // } else { - // // step 2: select min cost, cost(resource) = avg_cost * task_to_do + transport_cost - // uint64_t min_cost = std::numeric_limits::max(); - // uint64_t min_cost_idx = 0; - // for (uint64_t i = 0; i < compute_resources.size(); ++i) { - // if (compute_resources[i]->TotalTasks() == 0) { - // min_cost_idx = i; - // break; - // } - // uint64_t cost = compute_resources[i]->TaskAvgCost() * - // compute_resources[i]->NumOfTaskToExec() + - // transport_costs[i]; - // if (min_cost > cost) { - // min_cost = cost; - // min_cost_idx = i; - // } - // } - // - // // step 3: set path in task - // Path task_path(paths[min_cost_idx], paths[min_cost_idx].size() - 1); - // task->path() = task_path; - // } - // - // } else - if (task->job_.lock()->type() == JobType::BUILD) { - // step2: Read device id in config - // get build index gpu resource - server::Config& config = server::Config::GetInstance(); - int32_t build_index_gpu; - Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); - - bool find_gpu_res = false; - if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { - for (uint64_t i = 0; i < compute_resources.size(); ++i) { - if (compute_resources[i]->name() == - res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { - find_gpu_res = true; - Path task_path(paths[i], paths[i].size() - 1); - task->path() = task_path; - break; - } - } - } - if (not find_gpu_res) { - task->path() = Path(paths[0], paths[0].size() - 1); - } - } - } + // if (resource->type() == ResourceType::DISK) { + // // step 1: calculate shortest path per resource, from disk to compute resource + // auto compute_resources = res_mgr->GetComputeResources(); + // std::vector> paths; + // std::vector transport_costs; + // for (auto& res : compute_resources) { + // std::vector path; + // uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); + // transport_costs.push_back(transport_cost); + // paths.emplace_back(path); + // } + // if (task->job_.lock()->type() == JobType::BUILD) { + // // step2: Read device id in config + // // get build index gpu resource + // server::Config& config = server::Config::GetInstance(); + // int32_t build_index_gpu; + // Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); + // + // bool find_gpu_res = false; + // if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { + // for (uint64_t i = 0; i < compute_resources.size(); ++i) { + // if (compute_resources[i]->name() == + // res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { + // find_gpu_res = true; + // Path task_path(paths[i], paths[i].size() - 1); + // task->path() = task_path; + // break; + // } + // } + // } + // if (not find_gpu_res) { + // task->path() = Path(paths[0], paths[0].size() - 1); + // } + // } + // } if (resource->name() == task->path().Last()) { resource->WakeupExecutor(); diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index 3fcda0e8a3..a49b75b17e 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -46,7 +46,8 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size(); + ++specified_gpu_id_; + specified_gpu_id_ = specified_gpu_id_ % gpu_id.size(); return true; } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index d651f5b3b3..0c56d69c39 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -596,6 +596,9 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return s; } + if (gpu_index == server::CPU_DEVICE_ID) + return Status::OK(); + size_t gpu_memory; if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); @@ -1013,7 +1016,12 @@ Config::GetResourceConfigIndexBuildDevice(int32_t& value) { return s; } - value = std::stoi(str.substr(3)); + if (str == "cpu") { + value = CPU_DEVICE_ID; + } else { + value = std::stoi(str.substr(3)); + } + return Status::OK(); } diff --git a/core/src/server/Config.h b/core/src/server/Config.h index c93847b216..45591fb5ec 100644 --- a/core/src/server/Config.h +++ b/core/src/server/Config.h @@ -95,6 +95,8 @@ static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device"; static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0"; +const int32_t CPU_DEVICE_ID = -1; + class Config { public: static Config& From 63033ebc1f484afc8ba4df467ce685c4044e7f4d Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Fri, 1 Nov 2019 20:14:18 +0800 Subject: [PATCH 7/8] Code format Former-commit-id: 8c9c010fd45c0a6737b8ad6c4493469440292707 --- core/src/scheduler/SchedInst.cpp | 12 ++++++------ core/src/scheduler/optimizer/OnlyGPUPass.cpp | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/core/src/scheduler/SchedInst.cpp b/core/src/scheduler/SchedInst.cpp index 8d58b831bf..f0c00c2d2a 100644 --- a/core/src/scheduler/SchedInst.cpp +++ b/core/src/scheduler/SchedInst.cpp @@ -55,8 +55,8 @@ load_simple_config() { // get resources auto gpu_ids = get_gpu_pool(); - int32_t build_gpu_id; - config.GetResourceConfigIndexBuildDevice(build_gpu_id); + int32_t index_build_device_id; + config.GetResourceConfigIndexBuildDevice(index_build_device_id); // create and connect ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false)); @@ -70,15 +70,15 @@ load_simple_config() { for (auto& gpu_id : gpu_ids) { ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), pcie); - if (build_gpu_id == gpu_id) { + if (index_build_device_id == gpu_id) { find_build_gpu_id = true; } } - if (not find_build_gpu_id && build_gpu_id != server::CPU_DEVICE_ID) { + if (not find_build_gpu_id && index_build_device_id != server::CPU_DEVICE_ID) { ResMgrInst::GetInstance()->Add( - ResourceFactory::Create(std::to_string(build_gpu_id), "GPU", build_gpu_id, true, true)); - ResMgrInst::GetInstance()->Connect("cpu", std::to_string(build_gpu_id), pcie); + ResourceFactory::Create(std::to_string(index_build_device_id), "GPU", index_build_device_id, true, true)); + ResMgrInst::GetInstance()->Connect("cpu", std::to_string(index_build_device_id), pcie); } } diff --git a/core/src/scheduler/optimizer/OnlyGPUPass.cpp b/core/src/scheduler/optimizer/OnlyGPUPass.cpp index a49b75b17e..e5d3c71fd3 100644 --- a/core/src/scheduler/optimizer/OnlyGPUPass.cpp +++ b/core/src/scheduler/optimizer/OnlyGPUPass.cpp @@ -46,8 +46,7 @@ OnlyGPUPass::Run(const TaskPtr& task) { auto label = std::make_shared(std::weak_ptr(res_ptr)); task->label() = label; - ++specified_gpu_id_; - specified_gpu_id_ = specified_gpu_id_ % gpu_id.size(); + specified_gpu_id_ = (specified_gpu_id_ + 1) % gpu_id.size(); return true; } From 2f28f053d28363a5bc7aa17fb741586916a54b37 Mon Sep 17 00:00:00 2001 From: fishpenguin Date: Sat, 2 Nov 2019 11:26:08 +0800 Subject: [PATCH 8/8] change gpu_index to device_id Former-commit-id: 6993e98e19e31f942ffee5833b593a59cffa94a7 --- core/src/server/Config.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index 0c56d69c39..dbe7d260c5 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -590,18 +590,18 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) { return Status(SERVER_INVALID_ARGUMENT, msg); } else { uint64_t gpu_cache_capacity = std::stoi(value) * GB; - int gpu_index; - Status s = GetResourceConfigIndexBuildDevice(gpu_index); + int device_id; + Status s = GetResourceConfigIndexBuildDevice(device_id); if (!s.ok()) { return s; } - if (gpu_index == server::CPU_DEVICE_ID) + if (device_id == server::CPU_DEVICE_ID) return Status::OK(); size_t gpu_memory; - if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) { - std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index); + if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) { + std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id); return Status(SERVER_UNEXPECTED_ERROR, msg); } else if (gpu_cache_capacity >= gpu_memory) { std::string msg = "Invalid gpu cache capacity: " + value +