diff --git a/core/src/metrics/SystemInfo.cpp b/core/src/metrics/SystemInfo.cpp index a96bdec6c3..982137d522 100644 --- a/core/src/metrics/SystemInfo.cpp +++ b/core/src/metrics/SystemInfo.cpp @@ -39,6 +39,7 @@ SystemInfo::Init() { initialized_ = true; // initialize CPU information + FILE* file = nullptr; try { struct tms time_sample; char line[128]; @@ -46,7 +47,7 @@ SystemInfo::Init() { last_sys_cpu_ = time_sample.tms_stime; last_user_cpu_ = time_sample.tms_utime; num_processors_ = 0; - FILE* file = fopen("/proc/cpuinfo", "r"); + file = fopen("/proc/cpuinfo", "r"); if (file) { while (fgets(line, 128, file) != nullptr) { if (strncmp(line, "processor", 9) == 0) { @@ -62,6 +63,9 @@ SystemInfo::Init() { } total_ram_ = GetPhysicalMemory(); } catch (std::exception& ex) { + if (file != nullptr) { + fclose(file); + } std::string msg = "Failed to read /proc/cpuinfo, reason: " + std::string(ex.what()); LOG_SERVER_ERROR_ << msg; } @@ -121,9 +125,11 @@ SystemInfo::GetPhysicalMemory() { int64_t SystemInfo::GetProcessUsedMemory() { + FILE* file = nullptr; + try { // Note: this value is in KB! - FILE* file = fopen("/proc/self/status", "r"); + file = fopen("/proc/self/status", "r"); int64_t result = 0; constexpr int64_t KB = 1024; if (file) { @@ -144,6 +150,9 @@ SystemInfo::GetProcessUsedMemory() { // return value in Byte return (result * KB); } catch (std::exception& ex) { + if (file != nullptr) { + fclose(file); + } std::string msg = "Failed to read /proc/self/status, reason: " + std::string(ex.what()); LOG_SERVER_ERROR_ << msg; return 0; @@ -181,8 +190,9 @@ SystemInfo::CPUCorePercent() { std::vector SystemInfo::getTotalCpuTime(std::vector& work_time_array) { std::vector total_time_array; + FILE* file = nullptr; try { - FILE* file = fopen("/proc/stat", "r"); + file = fopen("/proc/stat", "r"); fiu_do_on("SystemInfo.getTotalCpuTime.open_proc", file = NULL); if (file == NULL) { LOG_SERVER_ERROR_ << "Failed to read /proc/stat"; @@ -198,8 +208,7 @@ SystemInfo::getTotalCpuTime(std::vector& work_time_array) { fiu_do_on("SystemInfo.getTotalCpuTime.read_proc", ret = NULL); if (ret == NULL) { LOG_SERVER_ERROR_ << "Could not read stat file"; - fclose(file); - return total_time_array; + break; } sscanf(buffer, "cpu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu", &user, &nice, &system, @@ -211,6 +220,9 @@ SystemInfo::getTotalCpuTime(std::vector& work_time_array) { fclose(file); } catch (std::exception& ex) { + if (file != nullptr) { + fclose(file); + } std::string msg = "Failed to read /proc/stat, reason: " + std::string(ex.what()); LOG_SERVER_ERROR_ << msg; } @@ -289,9 +301,10 @@ std::vector SystemInfo::CPUTemperature() { std::vector result; std::string path = "/sys/class/hwmon/"; + DIR* dir = nullptr; try { - DIR* dir = opendir(path.c_str()); - fiu_do_on("SystemInfo.CPUTemperature.opendir", dir = NULL); + dir = opendir(path.c_str()); + fiu_do_on("SystemInfo.CPUTemperature.opendir", closedir(dir) || (dir = NULL)); if (!dir) { LOG_SERVER_ERROR_ << "Could not open hwmon directory"; return result; @@ -302,28 +315,30 @@ SystemInfo::CPUTemperature() { std::string filename(path); filename.append(ptr->d_name); - char buf[100]; - if (readlink(filename.c_str(), buf, 100) != -1) { - std::string m(buf); + std::string m(100, '\0'); + if (readlink(filename.c_str(), m.data(), 100) != -1) { if (m.find("coretemp") != std::string::npos) { std::string object = filename; object += "/temp1_input"; FILE* file = fopen(object.c_str(), "r"); - fiu_do_on("SystemInfo.CPUTemperature.openfile", file = NULL); + fiu_do_on("SystemInfo.CPUTemperature.openfile", fclose(file) || (file = NULL)); if (file == nullptr) { LOG_SERVER_ERROR_ << "Could not open temperature file"; - return result; + } else { + float temp; + if (fscanf(file, "%f", &temp) != -1) { + result.push_back(temp / 1000); + } + fclose(file); } - float temp; - if (fscanf(file, "%f", &temp) != -1) { - result.push_back(temp / 1000); - } - fclose(file); } } } closedir(dir); } catch (std::exception& ex) { + if (dir != nullptr) { + closedir(dir); + } std::string msg = "Failed to get cpu temperature, reason: " + std::string(ex.what()); LOG_SERVER_ERROR_ << msg; } diff --git a/core/src/scheduler/resource/Node.cpp b/core/src/scheduler/resource/Node.cpp index a5a70af6e5..274205928d 100644 --- a/core/src/scheduler/resource/Node.cpp +++ b/core/src/scheduler/resource/Node.cpp @@ -56,5 +56,11 @@ Node::AddNeighbour(const NeighbourNodePtr& neighbour_node, Connection& connectio // else do nothing, consider it.. } +void +Node::RemoveNeighbour(const NeighbourNodePtr& neighbour_node) { + std::lock_guard lk(mutex_); + neighbours_.erase(neighbour_node->id_); +} + } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/resource/Node.h b/core/src/scheduler/resource/Node.h index d5b90c7012..96408f25eb 100644 --- a/core/src/scheduler/resource/Node.h +++ b/core/src/scheduler/resource/Node.h @@ -51,6 +51,9 @@ class Node : public interface::dumpable { std::vector GetNeighbours(); + void + RemoveNeighbour(const NeighbourNodePtr& neighbour_node); + public: json Dump() const override; diff --git a/core/unittest/scheduler/test_node.cpp b/core/unittest/scheduler/test_node.cpp index c957483420..a86b524aaa 100644 --- a/core/unittest/scheduler/test_node.cpp +++ b/core/unittest/scheduler/test_node.cpp @@ -35,6 +35,13 @@ class NodeTest : public ::testing::Test { node2_->AddNeighbour(node1_, pcie); } + void + TearDown() { + node1_->RemoveNeighbour(node2_); + node1_->RemoveNeighbour(node3_); + node2_->RemoveNeighbour(node1_); + } + ms::NodePtr node1_; ms::NodePtr node2_; ms::NodePtr node3_; diff --git a/docker/deploy/cpu/centos7/Dockerfile b/docker/deploy/cpu/centos7/Dockerfile index 9bbea88396..6337795272 100644 --- a/docker/deploy/cpu/centos7/Dockerfile +++ b/docker/deploy/cpu/centos7/Dockerfile @@ -9,6 +9,8 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under the License. +FROM hectormolinero/tini:v18 AS tini + FROM centos:centos7 RUN yum install -y epel-release && \ @@ -18,8 +20,7 @@ RUN yum install -y epel-release && \ COPY ./milvus /var/lib/milvus ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/var/lib/milvus/lib" -RUN wget -O /tini https://github.com/krallin/tini/releases/download/v0.19.0/tini && \ - chmod +x /tini +COPY --from=tini /usr/bin/tini /tini ENTRYPOINT ["/tini", "--"] diff --git a/docker/deploy/gpu/centos7/Dockerfile b/docker/deploy/gpu/centos7/Dockerfile index 6e4897ae27..7995b8375e 100644 --- a/docker/deploy/gpu/centos7/Dockerfile +++ b/docker/deploy/gpu/centos7/Dockerfile @@ -9,6 +9,8 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under the License. +FROM hectormolinero/tini:v18 AS tini + FROM nvidia/cuda:10.1-devel-centos7 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility @@ -20,8 +22,7 @@ RUN yum --disablerepo=cuda install -y epel-release && \ COPY ./milvus /var/lib/milvus ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/var/lib/milvus/lib" -RUN wget -O /tini https://github.com/krallin/tini/releases/download/v0.19.0/tini && \ - chmod +x /tini +COPY --from=tini /usr/bin/tini /tini ENTRYPOINT ["/tini", "--"]