Gpu check (#2129)

* add GpuCheck class Signed-off-by: yhz <413554850@qq.com> * Check gpu environment Signed-off-by: Yhz <yinghao.zou@zilliz.com> * add server checker unittest Signed-off-by: Yhz <yinghao.zou@zilliz.com>
2025-12-29 23:15:28 +08:00 · 2020-04-27 16:56:12 +08:00 · 2020-04-27 16:56:12 +08:00 · bf42dbd5b6
commit bf42dbd5b6
parent 1f0466708f
7 changed files with 497 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -19,6 +19,7 @@ Please mark all change in change log and use the issue from GitHub
 -   \#2054 Check if CPU instruction sets are illegal
 -   \#2059 Add lock file avoid multiple instances modifying data at the same time
 -   \#2064 Warn when use SQLite as metadata management
+-   \#2111 Check GPU environment before start server

 ## Improvement
 -   \#221 Refactor LOG macro
--- a/core/src/server/Server.cpp
+++ b/core/src/server/Server.cpp
@ -24,6 +24,7 @@
 #include "server/DBWrapper.h"
 #include "server/grpc_impl/GrpcServer.h"
 #include "server/init/CpuChecker.h"
+#include "server/init/GpuChecker.h"
 #include "server/web_impl/WebServer.h"
 #include "src/version.h"
 //#include "storage/s3/S3ClientWrapper.h"
@ -237,6 +238,13 @@ Server::Start() {
        if (!s.ok()) {
            return s;
        }
+
+#ifdef MILVUS_GPU_VERSION
+        s = GpuChecker::CheckGpuEnvironment();
+        if (!s.ok()) {
+            return s;
+        }
+#endif
        /* record config and hardware information into log */
        LogConfigInFile(config_filename_);
        LogCpuInfo();
--- a/core/src/server/init/CpuChecker.cpp
+++ b/core/src/server/init/CpuChecker.cpp
@ -15,6 +15,8 @@
 #include <string>
 #include <vector>

+#include <fiu-local.h>
+
 #include "faiss/FaissHook.h"
 #include "faiss/utils/instruction_set.h"
 #include "utils/Log.h"
@ -28,16 +30,26 @@ CpuChecker::CheckCpuInstructionSet() {
    std::vector<std::string> instruction_sets;

    auto& instruction_set_inst = faiss::InstructionSet::GetInstance();
-    if (faiss::support_avx512()) {
+
+    bool support_avx512 = faiss::support_avx512();
+    fiu_do_on("CpuChecker.CheckCpuInstructionSet.not_support_avx512", support_avx512 = false);
+    if (support_avx512) {
        instruction_sets.emplace_back("avx512");
    }
-    if (instruction_set_inst.AVX2()) {
+
+    bool support_axv2 = instruction_set_inst.AVX2();
+    fiu_do_on("CpuChecker.CheckCpuInstructionSet.not_support_avx2", support_axv2 = false);
+    if (support_axv2) {
        instruction_sets.emplace_back("avx2");
    }
-    if (instruction_set_inst.SSE42()) {
+
+    bool support_sse4_2 = instruction_set_inst.SSE42();
+    fiu_do_on("CpuChecker.CheckCpuInstructionSet.not_support_sse4_2", support_sse4_2 = false);
+    if (support_sse4_2) {
        instruction_sets.emplace_back("sse4_2");
    }

+    fiu_do_on("CpuChecker.CheckCpuInstructionSet.instruction_sets_empty", instruction_sets.clear());
    if (instruction_sets.empty()) {
        std::string msg =
            "CPU instruction sets are not supported. Ensure the CPU supports at least one of the following instruction "
--- a/core/src/server/init/GpuChecker.cpp
+++ b/core/src/server/init/GpuChecker.cpp
@ -0,0 +1,274 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#ifdef MILVUS_GPU_VERSION
+#include "server/init/GpuChecker.h"
+
+#include <iostream>
+#include <set>
+#include <vector>
+
+#include <fiu-local.h>
+
+#include "config/Config.h"
+#include "utils/Log.h"
+
+namespace milvus {
+namespace server {
+
+namespace {
+std::string
+ConvertCudaVersion(int version) {
+    return std::to_string(version / 1000) + "." + std::to_string((version % 100) / 10);
+}
+}  // namespace
+
+const int CUDA_MIN_VERSION = 10000;  // 10.0
+const float GPU_MIN_COMPUTE_CAPACITY = 6.0;
+const char* NVIDIA_MIN_DRIVER_VERSION = "418.00";
+
+std::string
+GpuChecker::NvmlErrorString(nvmlReturn_t error_no) {
+    return "code: " + std::to_string(error_no) + ", message: " + nvmlErrorString(error_no);
+}
+
+std::string
+GpuChecker::CudaErrorString(cudaError_t error_no) {
+    return "code: " + std::to_string(error_no) + ", message: " + cudaGetErrorString(error_no);
+}
+
+Status
+GpuChecker::GetGpuComputeCapacity(nvmlDevice_t device, int& major, int& minor) {
+    nvmlReturn_t code = nvmlDeviceGetCudaComputeCapability(device, &major, &minor);
+    if (NVML_SUCCESS != code) {
+        return Status(SERVER_UNEXPECTED_ERROR, NvmlErrorString(code));
+    }
+
+    return Status::OK();
+}
+
+Status
+GpuChecker::GetGpuNvidiaDriverVersion(std::string& version) {
+    char driver_version[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
+    memset(driver_version, 0, NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE);
+    auto nvml_code = nvmlSystemGetDriverVersion(driver_version, NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE);
+    if (NVML_SUCCESS != nvml_code) {
+        return Status(SERVER_UNEXPECTED_ERROR, NvmlErrorString(nvml_code));
+    }
+
+    version = std::string(driver_version);
+    return Status::OK();
+}
+
+Status
+GpuChecker::GetGpuCudaDriverVersion(int& version) {
+    auto cuda_code = cudaDriverGetVersion(&version);
+    if (cudaSuccess != cuda_code) {
+        std::string error_msg = "Check cuda driver version failed. " + CudaErrorString(cuda_code);
+        return Status(SERVER_UNEXPECTED_ERROR, error_msg);
+    }
+    return Status::OK();
+}
+
+Status
+GpuChecker::GetGpuCudaRuntimeVersion(int& version) {
+    auto cuda_code = cudaRuntimeGetVersion(&version);
+    if (cudaSuccess != cuda_code) {
+        std::string error_msg = "Check cuda runtime version failed. " + CudaErrorString(cuda_code);
+        return Status(SERVER_UNEXPECTED_ERROR, error_msg);
+    }
+    return Status::OK();
+}
+
+Status
+GpuChecker::CheckGpuEnvironment() {
+    std::string err_msg;
+
+    auto& config = Config::GetInstance();
+    bool gpu_enable = true;
+    auto status = config.GetGpuResourceConfigEnable(gpu_enable);
+    if (!status.ok()) {
+        err_msg = "Cannot check if GPUs are enable from configuration. " + status.message();
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+    if (!gpu_enable) {
+        return Status::OK();
+    }
+
+    std::vector<int64_t> build_gpus;
+    status = config.GetGpuResourceConfigBuildIndexResources(build_gpus);
+    if (!status.ok()) {
+        err_msg = "Get GPU resources of building index failed. " + status.message();
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    std::vector<int64_t> search_gpus;
+    status = config.GetGpuResourceConfigSearchResources(search_gpus);
+    if (!status.ok()) {
+        err_msg = "Get GPU resources of search failed. " + status.message();
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    std::set<int64_t> gpu_sets(build_gpus.begin(), build_gpus.end());
+    gpu_sets.insert(search_gpus.begin(), search_gpus.end());
+
+    nvmlReturn_t nvmlresult = nvmlInit();
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.nvml_init_fail", nvmlresult = NVML_ERROR_UNKNOWN);
+    if (NVML_SUCCESS != nvmlresult) {
+        err_msg = "nvml initialize failed. " + NvmlErrorString(nvmlresult);
+        LOG_SERVER_FATAL_ << err_msg;
+        std::cerr << err_msg << std::endl;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    /* Check nvidia driver version */
+    std::string nvidia_version;
+    status = GetGpuNvidiaDriverVersion(nvidia_version);
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.get_nvidia_driver_fail", status = Status(SERVER_UNEXPECTED_ERROR, ""));
+    if (!status.ok()) {
+        err_msg = " Check nvidia driver failed. " + status.message();
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.nvidia_driver_too_slow",
+              nvidia_version = std::to_string(std::stof(NVIDIA_MIN_DRIVER_VERSION) - 1));
+    if (nvidia_version.compare(NVIDIA_MIN_DRIVER_VERSION) < 0) {
+        err_msg = "Nvidia driver version " + std::string(nvidia_version) + " is slower than " +
+                  std::string(NVIDIA_MIN_DRIVER_VERSION);
+        LOG_SERVER_FATAL_ << err_msg;
+        std::cerr << err_msg << std::endl;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    /* Check Cuda version */
+    int cuda_driver_version = 0;
+    status = GetGpuCudaDriverVersion(cuda_driver_version);
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.cuda_driver_fail", status = Status(SERVER_UNEXPECTED_ERROR, ""));
+    if (!status.ok()) {
+        err_msg = " Check Cuda driver failed. " + status.message();
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.cuda_driver_too_slow", cuda_driver_version = CUDA_MIN_VERSION - 1);
+    if (cuda_driver_version < CUDA_MIN_VERSION) {
+        err_msg = "Cuda driver version is " + ConvertCudaVersion(cuda_driver_version) +
+                  ", slower than minimum required version " + ConvertCudaVersion(CUDA_MIN_VERSION);
+        LOG_SERVER_FATAL_ << err_msg;
+        std::cerr << err_msg << std::endl;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    int cuda_runtime_version = 0;
+    status = GetGpuCudaRuntimeVersion(cuda_runtime_version);
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.cuda_runtime_driver_fail", status = Status(SERVER_UNEXPECTED_ERROR, ""));
+    if (!status.ok()) {
+        err_msg = " Check Cuda runtime driver failed. " + status.message();
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.cuda_runtime_driver_too_slow",
+              cuda_runtime_version = CUDA_MIN_VERSION - 1);
+    if (cuda_runtime_version < CUDA_MIN_VERSION) {
+        err_msg = "Cuda runtime version is " + ConvertCudaVersion(cuda_runtime_version) +
+                  ", slow than minimum required version " + ConvertCudaVersion(CUDA_MIN_VERSION);
+        LOG_SERVER_FATAL_ << err_msg;
+        std::cerr << err_msg << std::endl;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    /* Compute capacity */
+    uint32_t device_count = 0;
+    nvmlresult = nvmlDeviceGetCount(&device_count);
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.nvml_get_device_count_fail", nvmlresult = NVML_ERROR_UNKNOWN);
+    if (NVML_SUCCESS != nvmlresult) {
+        err_msg = "Obtain GPU count failed. " + NvmlErrorString(nvmlresult);
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.nvml_device_count_zero", device_count = 0);
+    if (device_count == 0) {
+        err_msg = "GPU count is zero. Make sure there are available GPUs in host machine";
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    char device_name[NVML_DEVICE_NAME_BUFFER_SIZE];
+    int major, minor;
+    for (uint32_t i = 0; i < device_count; i++) {
+        if (gpu_sets.find(i) == gpu_sets.end()) {
+            continue;
+        }
+
+        nvmlDevice_t device;
+        nvmlresult = nvmlDeviceGetHandleByIndex(i, &device);
+        fiu_do_on("GpuChecker.CheckGpuEnvironment.nvml_get_device_handle_fail", nvmlresult = NVML_ERROR_UNKNOWN);
+        if (NVML_SUCCESS != nvmlresult) {
+            err_msg = "Obtain GPU " + std::to_string(i) + " handle failed. " + NvmlErrorString(nvmlresult);
+            LOG_SERVER_FATAL_ << err_msg;
+            return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+        }
+        memset(device_name, 0, NVML_DEVICE_NAME_BUFFER_SIZE);
+        nvmlresult = nvmlDeviceGetName(device, device_name, NVML_DEVICE_NAME_BUFFER_SIZE);
+        fiu_do_on("GpuChecker.CheckGpuEnvironment.nvml_get_device_name_fail", nvmlresult = NVML_ERROR_UNKNOWN);
+        if (NVML_SUCCESS != nvmlresult) {
+            err_msg = "Obtain GPU " + std::to_string(i) + " name failed. " + NvmlErrorString(nvmlresult);
+            LOG_SERVER_FATAL_ << err_msg;
+            return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+        }
+
+        major = 0;
+        minor = 0;
+        status = GetGpuComputeCapacity(device, major, minor);
+        fiu_do_on("GpuChecker.CheckGpuEnvironment.device_compute_capacity_fail",
+                  status = Status(SERVER_UNEXPECTED_ERROR, ""));
+        if (!status.ok()) {
+            err_msg = "Obtain GPU " + std::to_string(i) + " compute capacity failed. " + status.message();
+            LOG_SERVER_FATAL_ << err_msg;
+            std::cerr << err_msg << std::endl;
+            return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+        }
+        float cc = major + minor / 1.0f;
+        fiu_do_on("GpuChecker.CheckGpuEnvironment.device_compute_capacity_too_weak", cc = GPU_MIN_COMPUTE_CAPACITY - 1);
+        if (cc < GPU_MIN_COMPUTE_CAPACITY) {
+            err_msg = "GPU " + std::to_string(i) + " compute capability " + std::to_string(cc) +
+                      " is too weak. Required least GPU compute capability is " +
+                      std::to_string(GPU_MIN_COMPUTE_CAPACITY);
+            LOG_SERVER_FATAL_ << err_msg;
+            std::cerr << err_msg << std::endl;
+            return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+        }
+
+        LOG_SERVER_INFO_ << "GPU" << i << ": name=" << device_name << ", compute capacity=" << cc;
+    }
+
+    nvmlresult = nvmlShutdown();
+    fiu_do_on("GpuChecker.CheckGpuEnvironment.nvml_shutdown_fail", nvmlresult = NVML_ERROR_UNKNOWN);
+    if (NVML_SUCCESS != nvmlresult) {
+        err_msg = "nvml shutdown handle failed. " + NvmlErrorString(nvmlresult);
+        LOG_SERVER_FATAL_ << err_msg;
+        return Status(SERVER_UNEXPECTED_ERROR, err_msg);
+    }
+
+    std::cout << "Nvidia driver version: " << nvidia_version << "\n"
+              << "CUDA Driver Version / Runtime Version : " << ConvertCudaVersion(cuda_driver_version) << " / "
+              << ConvertCudaVersion(cuda_runtime_version) << std::endl;
+
+    return Status::OK();
+}
+
+}  // namespace server
+}  // namespace milvus
+#endif
--- a/core/src/server/init/GpuChecker.h
+++ b/core/src/server/init/GpuChecker.h
@ -0,0 +1,58 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#ifdef MILVUS_GPU_VERSION
+
+#pragma once
+
+#include <string>
+
+#include <cuda_runtime.h>
+#include <nvml.h>
+
+#include "utils/Status.h"
+
+namespace milvus {
+namespace server {
+
+extern const int CUDA_MIN_VERSION;
+extern const float GPU_MIN_COMPUTE_CAPACITY;
+extern const char* NVIDIA_MIN_DRIVER_VERSION;
+
+class GpuChecker {
+ private:
+    static std::string
+    NvmlErrorString(nvmlReturn_t error_no);
+
+    static std::string
+    CudaErrorString(cudaError_t error_no);
+
+ private:
+    static Status
+    GetGpuComputeCapacity(nvmlDevice_t device, int& major, int& minor);
+
+    static Status
+    GetGpuNvidiaDriverVersion(std::string& version);
+
+    static Status
+    GetGpuCudaDriverVersion(int& version);
+
+    static Status
+    GetGpuCudaRuntimeVersion(int& version);
+
+ public:
+    static Status
+    CheckGpuEnvironment();
+};
+
+}  // namespace server
+}  // namespace milvus
+#endif
--- a/core/unittest/server/CMakeLists.txt
+++ b/core/unittest/server/CMakeLists.txt
@ -13,6 +13,7 @@

 set(test_files
        ${CMAKE_CURRENT_SOURCE_DIR}/test_cache.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/test_check.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/test_config.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/test_rpc.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/test_web.cpp
--- a/core/unittest/server/test_check.cpp
+++ b/core/unittest/server/test_check.cpp
@ -0,0 +1,140 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#include <fiu-control.h>
+#include <fiu-local.h>
+#include <gtest/gtest.h>
+
+#include "config/Config.h"
+#include "server/init/CpuChecker.h"
+#ifdef MILVUS_GPU_VERSION
+#include "server/init/GpuChecker.h"
+#endif
+
+namespace ms = milvus::server;
+
+class ServerCheckerTest : public testing::Test {
+ protected:
+    void
+    SetUp() override {
+    }
+
+    void
+    TearDown() override {
+    }
+};
+
+TEST_F(ServerCheckerTest, CPU_TEST) {
+    auto status = ms::CpuChecker::CheckCpuInstructionSet();
+    ASSERT_TRUE(status.ok());
+}
+
+TEST_F(ServerCheckerTest, CPU_FAIL_TEST) {
+    fiu_enable("CpuChecker.CheckCpuInstructionSet.instruction_sets_empty", 1, NULL, 0);
+    ASSERT_FALSE(ms::CpuChecker::CheckCpuInstructionSet().ok());
+    fiu_disable("CpuChecker.CheckCpuInstructionSet.instruction_sets_empty");
+
+    fiu_enable("CpuChecker.CheckCpuInstructionSet.not_support_avx512", 1, NULL, 0);
+    // CPU not support avx512, but avx2 and sse4_2 support
+    ASSERT_TRUE(ms::CpuChecker::CheckCpuInstructionSet().ok());
+
+    // CPU only support sse4_2
+    fiu_enable("CpuChecker.CheckCpuInstructionSet.not_support_avx2", 1, NULL, 0);
+    ASSERT_TRUE(ms::CpuChecker::CheckCpuInstructionSet().ok());
+
+    // CPU not support one of sse4_2, avx2, avx512
+    fiu_enable("CpuChecker.CheckCpuInstructionSet.not_support_sse4_2", 1, NULL, 0);
+    ASSERT_FALSE(ms::CpuChecker::CheckCpuInstructionSet().ok());
+
+    fiu_disable("CpuChecker.CheckCpuInstructionSet.not_support_sse4_2");
+    fiu_disable("CpuChecker.CheckCpuInstructionSet.not_support_avx2");
+    fiu_disable("CpuChecker.CheckCpuInstructionSet.not_support_avx512");
+}
+
+#ifdef MILVUS_GPU_VERSION
+TEST_F(ServerCheckerTest, GPU_TEST) {
+    auto& config = ms::Config::GetInstance();
+    auto status = config.SetGpuResourceConfigEnable("true");
+    ASSERT_TRUE(status.ok()) << status.message();
+
+    status = ms::GpuChecker::CheckGpuEnvironment();
+    ASSERT_TRUE(status.ok()) << status.message();
+
+    status = config.SetGpuResourceConfigEnable("false");
+    ASSERT_TRUE(status.ok()) << status.message();
+
+    status = ms::GpuChecker::CheckGpuEnvironment();
+    ASSERT_TRUE(status.ok()) << status.message();
+}
+
+TEST_F(ServerCheckerTest, GPU_FAIL_TEST) {
+    auto& config = ms::Config::GetInstance();
+    auto status = config.SetGpuResourceConfigEnable("true");
+    ASSERT_TRUE(status.ok()) << status.message();
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.nvml_init_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.nvml_init_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.get_nvidia_driver_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.get_nvidia_driver_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.nvidia_driver_too_slow", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.nvidia_driver_too_slow");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.cuda_driver_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.cuda_driver_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.cuda_driver_too_slow", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.cuda_driver_too_slow");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.cuda_runtime_driver_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.cuda_runtime_driver_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.cuda_runtime_driver_too_slow", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.cuda_runtime_driver_too_slow");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.nvml_get_device_count_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.nvml_get_device_count_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.nvml_device_count_zero", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.nvml_device_count_zero");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.nvml_get_device_handle_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.nvml_get_device_handle_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.nvml_get_device_name_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.nvml_get_device_name_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.device_compute_capacity_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.device_compute_capacity_fail");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.device_compute_capacity_too_weak", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.device_compute_capacity_too_weak");
+
+    fiu_enable("GpuChecker.CheckGpuEnvironment.nvml_shutdown_fail", 1, NULL, 0);
+    ASSERT_FALSE(ms::GpuChecker::CheckGpuEnvironment().ok());
+    fiu_disable("GpuChecker.CheckGpuEnvironment.nvml_shutdown_fail");
+}
+
+#endif