diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index a730091a46..3d83bff864 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -61,18 +61,14 @@ void PrometheusMetrics::GPUPercentGaugeSet() { if(!startup_) return; int numDevice = server::SystemInfo::GetInstance().num_device(); -// std::vector values = server::SystemInfo::GetInstance().GPUPercent(); + std::vector used_total = server::SystemInfo::GetInstance().GPUMemoryTotal(); std::vector used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed(); - constexpr unsigned long long MtoB = 1024*1024; for (int i = 0; i < numDevice; i++) { prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}}); -// std::cout << "nvmlDeviceGetUtilizationRates: " << values[i] << std::endl; -// GPU_percent.Set(static_cast(values[i])); - double percent = (double)used_memory[i] / (double)MtoB; - double res = (percent / 6078) * 100; - GPU_percent.Set(res); + double percent = (double)used_memory[i] / (double)used_total[i]; + GPU_percent.Set(percent * 100); } } diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index 9348b0d9c6..7628db78bb 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -192,17 +192,17 @@ SystemInfo::CPUPercent() { } -std::vector -SystemInfo::GPUPercent() { +std::vector +SystemInfo::GPUMemoryTotal() { // get GPU usage percent if(!initialized_) Init(); - std::vector result; - nvmlUtilization_t utilization; + std::vector result; + nvmlMemory_t nvmlMemory; for (int i = 0; i < num_device_; ++i) { nvmlDevice_t device; nvmlDeviceGetHandleByIndex(i, &device); - nvmlDeviceGetUtilizationRates(device, &utilization); - result.push_back(utilization.gpu); + nvmlDeviceGetMemoryInfo(device, &nvmlMemory); + result.push_back(nvmlMemory.total); } return result; } diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index 5ffb2c773f..629aaf7220 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -60,7 +60,7 @@ class SystemInfo { double MemoryPercent(); double CPUPercent(); std::pair Octets(); - std::vector GPUPercent(); + std::vector GPUMemoryTotal(); std::vector GPUMemoryUsed(); std::vector CPUCorePercent();