From e102bddaa5b75cb83f72b51ad7df359455fef61e Mon Sep 17 00:00:00 2001 From: Heisenberg Date: Wed, 9 Oct 2019 11:10:34 +0800 Subject: [PATCH 1/2] MS-601 Docker logs error caused by get CPUTemperature error Former-commit-id: 4611939ce3c9b66313aef12991b3477c43813ce7 --- cpp/src/metrics/PrometheusMetrics.cpp | 2 +- cpp/src/metrics/SystemInfo.cpp | 46 ++++++++++++++++++++------- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index bc1860389f..182f14d46c 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -46,7 +46,7 @@ PrometheusMetrics::Init() { return s.code(); } - const std::string uri = std::string("/tmp/metrics"); + const std::string uri = std::string("/metrics"); const std::size_t num_threads = 2; // Init Exposer diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index 1414d94eae..70e917ad04 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -24,6 +24,9 @@ #include #include #include +#include +#include +#include namespace milvus { namespace server { @@ -237,18 +240,39 @@ SystemInfo::GPUTemperature() { std::vector SystemInfo::CPUTemperature() { std::vector result; - for (int i = 0; i <= num_physical_processors_; ++i) { - std::string path = "/sys/class/thermal/thermal_zone" + std::to_string(i) + "/temp"; - FILE* file = fopen(path.data(), "r"); - if (file == nullptr) { - perror("Could not open thermal file"); - return result; - } - float temp; - fscanf(file, "%f", &temp); - result.push_back(temp / 1000); - fclose(file); + std::string path = "/sys/class/hwmon/"; + + DIR *dir = NULL; + dir = opendir(path.c_str()); + if (!dir) { + perror("opendir"); + return result; } + + struct dirent *ptr = NULL; + while ((ptr = readdir(dir)) != NULL) { + std::string filename(path); + filename.append(ptr->d_name); + + char buf[100]; + if (readlink(filename.c_str(), buf, 100) != -1) { + std::string m(buf); + if (m.find("coretemp") != std::string::npos) { + std::string object = filename; + object += "/temp1_input"; + FILE *file = fopen(object.c_str(), "r"); + if (file == nullptr) { + perror("Could not open temperature file"); + exit(1); + } + float temp; + fscanf(file, "%f", &temp); + result.push_back(temp / 1000); + } + } + } + closedir(dir); + return result; } std::vector From c7f093ce6b8bc24408951e8868778710d6944c6c Mon Sep 17 00:00:00 2001 From: Heisenberg Date: Wed, 9 Oct 2019 17:09:12 +0800 Subject: [PATCH 2/2] change perror into log Former-commit-id: b482d6df5aac705fd8b4f6b57ef936678ee40856 --- cpp/CHANGELOG.md | 1 + cpp/src/metrics/SystemInfo.cpp | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 8f3079b8dd..b7cf014f63 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -10,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-577 - Unittest Query randomly hung - MS-587 - Count get wrong result after adding vectors and index built immediately - MS-599 - search wrong result when table created with metric_type: IP +- MS-601 - Docker logs error caused by get CPUTemperature error ## Improvement - MS-552 - Add and change the easylogging library diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index 70e917ad04..a49c3070a6 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -16,6 +16,7 @@ // under the License. #include "metrics/SystemInfo.h" +#include "utils/Log.h" #include #include @@ -63,12 +64,12 @@ SystemInfo::Init() { nvmlReturn_t nvmlresult; nvmlresult = nvmlInit(); if (NVML_SUCCESS != nvmlresult) { - printf("System information initilization failed"); + SERVER_LOG_ERROR << "System information initilization failed"; return; } nvmlresult = nvmlDeviceGetCount(&num_device_); if (NVML_SUCCESS != nvmlresult) { - printf("Unable to get devidce number"); + SERVER_LOG_ERROR << "Unable to get devidce number"; return; } @@ -154,7 +155,7 @@ SystemInfo::getTotalCpuTime(std::vector& work_time_array) { std::vector total_time_array; FILE* file = fopen("/proc/stat", "r"); if (file == NULL) { - perror("Could not open stat file"); + SERVER_LOG_ERROR << "Could not open stat file"; return total_time_array; } @@ -165,7 +166,7 @@ SystemInfo::getTotalCpuTime(std::vector& work_time_array) { char buffer[1024]; char* ret = fgets(buffer, sizeof(buffer) - 1, file); if (ret == NULL) { - perror("Could not read stat file"); + SERVER_LOG_ERROR << "Could not read stat file"; fclose(file); return total_time_array; } @@ -245,7 +246,7 @@ SystemInfo::CPUTemperature() { DIR *dir = NULL; dir = opendir(path.c_str()); if (!dir) { - perror("opendir"); + SERVER_LOG_ERROR << "Could not open hwmon directory"; return result; } @@ -262,7 +263,7 @@ SystemInfo::CPUTemperature() { object += "/temp1_input"; FILE *file = fopen(object.c_str(), "r"); if (file == nullptr) { - perror("Could not open temperature file"); + SERVER_LOG_ERROR << "Could not open temperature file" exit(1); } float temp;