From e27e6741d39425858579ea0936381b137e945cd1 Mon Sep 17 00:00:00 2001 From: wxyu Date: Tue, 8 Oct 2019 11:52:30 +0800 Subject: [PATCH] MS-611 Add resources validity check in ResourceMgr Former-commit-id: 55900459b4b25b27328b1ef85540f423742e70a3 --- cpp/CHANGELOG.md | 1 + cpp/src/scheduler/ResourceMgr.cpp | 66 +++++++++++++++++++++++++++---- cpp/src/scheduler/ResourceMgr.h | 15 +++++++ 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index a026353d0a..5ff949e7d6 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -25,6 +25,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-585 - Update namespace in scheduler - MS-608 - Update TODO names - MS-609 - Update task construct function +- MS-611 - Add resources validity check in ResourceMgr ## New Feature diff --git a/cpp/src/scheduler/ResourceMgr.cpp b/cpp/src/scheduler/ResourceMgr.cpp index a4f73f9a34..41c4ddbc5e 100644 --- a/cpp/src/scheduler/ResourceMgr.cpp +++ b/cpp/src/scheduler/ResourceMgr.cpp @@ -24,6 +24,12 @@ namespace scheduler { void ResourceMgr::Start() { + if (not check_resource_valid()) { + ENGINE_LOG_ERROR << "Resources invalid, cannot start ResourceMgr."; + ENGINE_LOG_ERROR << Dump(); + return; + } + std::lock_guard lck(resources_mutex_); for (auto& resource : resources_) { resource->Start(); @@ -60,8 +66,22 @@ ResourceMgr::Add(ResourcePtr&& resource) { resource->RegisterSubscriber(std::bind(&ResourceMgr::post_event, this, std::placeholders::_1)); - if (resource->type() == ResourceType::DISK) { - disk_resources_.emplace_back(ResourceWPtr(resource)); + switch (resource->type()) { + case ResourceType::DISK: { + disk_resources_.emplace_back(ResourceWPtr(resource)); + break; + } + case ResourceType::CPU: { + cpu_resources_.emplace_back(ResourceWPtr(resource)); + break; + } + case ResourceType::GPU: { + gpu_resources_.emplace_back(ResourceWPtr(resource)); + break; + } + default: { + break; + } } resources_.emplace_back(resource); @@ -148,14 +168,14 @@ ResourceMgr::GetNumGpuResource() const { std::string ResourceMgr::Dump() { - std::string str = "ResourceMgr contains " + std::to_string(resources_.size()) + " resources.\n"; + std::stringstream ss; + ss << "ResourceMgr contains " << resources_.size() << " resources." << std::endl; - for (uint64_t i = 0; i < resources_.size(); ++i) { - str += "Resource No." + std::to_string(i) + ":\n"; - // str += resources_[i]->Dump(); + for (auto& res : resources_) { + ss << res->Dump(); } - return str; + return ss.str(); } std::string @@ -170,6 +190,34 @@ ResourceMgr::DumpTaskTables() { return ss.str(); } +bool +ResourceMgr::check_resource_valid() { + { + // TODO: check one disk-resource, one cpu-resource, zero or more gpu-resource; + if (GetDiskResources().size() != 1) return false; + if (GetCpuResources().size() != 1) return false; + } + + { + // TODO: one compute-resource at least; + if (GetNumOfComputeResource() < 1) return false; + } + + { + // TODO: check disk only connect with cpu + } + + { + // TODO: check gpu only connect with cpu + } + + { + // TODO: check if exists isolated node + } + + return true; +} + void ResourceMgr::post_event(const EventPtr& event) { { @@ -183,7 +231,9 @@ void ResourceMgr::event_process() { while (running_) { std::unique_lock lock(event_mutex_); - event_cv_.wait(lock, [this] { return !queue_.empty(); }); + event_cv_.wait(lock, [this] { + return !queue_.empty(); + }); auto event = queue_.front(); queue_.pop(); diff --git a/cpp/src/scheduler/ResourceMgr.h b/cpp/src/scheduler/ResourceMgr.h index 82ba0e6449..7a8e1ca4ca 100644 --- a/cpp/src/scheduler/ResourceMgr.h +++ b/cpp/src/scheduler/ResourceMgr.h @@ -64,6 +64,16 @@ class ResourceMgr { return disk_resources_; } + inline std::vector& + GetCpuResources() { + return cpu_resources_; + } + + inline std::vector& + GetGpuResources() { + return gpu_resources_; + } + // TODO(wxyu): why return shared pointer inline std::vector GetAllResources() { @@ -100,6 +110,9 @@ class ResourceMgr { DumpTaskTables(); private: + bool + check_resource_valid(); + void post_event(const EventPtr& event); @@ -110,6 +123,8 @@ class ResourceMgr { bool running_ = false; std::vector disk_resources_; + std::vector cpu_resources_; + std::vector gpu_resources_; std::vector resources_; mutable std::mutex resources_mutex_;