mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
MS-546 Add simple mode resource_config
Former-commit-id: d1909071f0d5e630ae8919c473083cad04778d76
This commit is contained in:
parent
d4ea0151eb
commit
54ec2a126d
@ -115,6 +115,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-533 - Update resource_test to cover dump function
|
||||
- MS-523 - Config file validation
|
||||
- MS-539 - Remove old task code
|
||||
- MS-546 - Add simple mode resource_config
|
||||
|
||||
## New Feature
|
||||
- MS-343 - Implement ResourceMgr
|
||||
|
||||
@ -38,44 +38,8 @@ engine_config:
|
||||
use_blas_threshold: 20
|
||||
|
||||
resource_config:
|
||||
# resource list, length: 0~N
|
||||
# please set a DISK resource and a CPU resource least, or system will not return query result.
|
||||
#
|
||||
# example:
|
||||
# resource_name: # resource name, just using in connections below
|
||||
# type: DISK # resource type, optional: DISK/CPU/GPU
|
||||
# device_id: 0
|
||||
# enable_executor: false # if is enable executor, optional: true, false
|
||||
|
||||
mode: simple
|
||||
resources:
|
||||
ssda:
|
||||
type: DISK
|
||||
device_id: 0
|
||||
enable_executor: false
|
||||
|
||||
cpu:
|
||||
type: CPU
|
||||
device_id: 0
|
||||
enable_executor: false
|
||||
|
||||
gpu0:
|
||||
type: GPU
|
||||
device_id: 0
|
||||
enable_executor: true
|
||||
gpu_resource_num: 2
|
||||
pinned_memory: 300
|
||||
temp_memory: 300
|
||||
|
||||
# connection list, length: 0~N
|
||||
# example:
|
||||
# connection_name:
|
||||
# speed: 100 # unit: MS/s
|
||||
# endpoint: ${resource_name}===${resource_name}
|
||||
connections:
|
||||
io:
|
||||
speed: 500
|
||||
endpoint: ssda===cpu
|
||||
pcie0:
|
||||
speed: 11000
|
||||
endpoint: cpu===gpu0
|
||||
|
||||
- cpu
|
||||
# - gpu0
|
||||
# - gpu1
|
||||
|
||||
@ -8,6 +8,8 @@
|
||||
#include "server/ServerConfig.h"
|
||||
#include "ResourceFactory.h"
|
||||
#include "knowhere/index/vector_index/gpu_ivf.h"
|
||||
#include "Utils.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
@ -20,72 +22,132 @@ SchedulerPtr SchedInst::instance = nullptr;
|
||||
std::mutex SchedInst::mutex_;
|
||||
|
||||
void
|
||||
StartSchedulerService() {
|
||||
try {
|
||||
server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
|
||||
load_simple_config() {
|
||||
server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
|
||||
auto mode = config.GetValue("mode", "simple");
|
||||
|
||||
if (config.GetChildren().empty()) throw "resource_config null exception";
|
||||
|
||||
auto resources = config.GetChild(server::CONFIG_RESOURCES).GetChildren();
|
||||
|
||||
if (resources.empty()) throw "Children of resource_config null exception";
|
||||
|
||||
for (auto &resource : resources) {
|
||||
auto &resname = resource.first;
|
||||
auto &resconf = resource.second;
|
||||
auto type = resconf.GetValue(server::CONFIG_RESOURCE_TYPE);
|
||||
// auto memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_MEMORY);
|
||||
auto device_id = resconf.GetInt64Value(server::CONFIG_RESOURCE_DEVICE_ID);
|
||||
// auto enable_loader = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_LOADER);
|
||||
auto enable_loader = true;
|
||||
auto enable_executor = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_EXECUTOR);
|
||||
auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY);
|
||||
auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY);
|
||||
auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM);
|
||||
|
||||
auto res = ResMgrInst::GetInstance()->Add(ResourceFactory::Create(resname,
|
||||
type,
|
||||
device_id,
|
||||
enable_loader,
|
||||
enable_executor));
|
||||
|
||||
if (res.lock()->type() == ResourceType::GPU) {
|
||||
auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY, 300);
|
||||
auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY, 300);
|
||||
auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM, 2);
|
||||
pinned_memory = 1024 * 1024 * pinned_memory;
|
||||
temp_memory = 1024 * 1024 * temp_memory;
|
||||
knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(device_id,
|
||||
pinned_memory,
|
||||
temp_memory,
|
||||
resource_num);
|
||||
auto resources = config.GetSequence("resources");
|
||||
bool cpu = false;
|
||||
std::set<uint64_t> gpu_ids;
|
||||
for (auto &resource : resources) {
|
||||
if (resource == "cpu") {
|
||||
cpu = true;
|
||||
break;
|
||||
} else {
|
||||
if (resource.length() < 4 || resource.substr(0, 3) != "gpu") {
|
||||
// error
|
||||
exit(-1);
|
||||
}
|
||||
auto gpu_id = std::stoi(resource.substr(3));
|
||||
if (gpu_id >= get_num_gpu()) {
|
||||
// error
|
||||
exit(-1);
|
||||
}
|
||||
gpu_ids.insert(gpu_id);
|
||||
}
|
||||
}
|
||||
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false));
|
||||
auto io = Connection("io", 500);
|
||||
if (cpu) {
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create("cpu", "CPU", 0, true, true));
|
||||
ResMgrInst::GetInstance()->Connect("disk", "cpu", io);
|
||||
} else {
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create("cpu", "CPU", 0, true, false));
|
||||
ResMgrInst::GetInstance()->Connect("disk", "cpu", io);
|
||||
|
||||
auto pcie = Connection("pcie", 12000);
|
||||
for (auto &gpu_id : gpu_ids) {
|
||||
ResMgrInst::GetInstance()->Add(ResourceFactory::Create(std::to_string(gpu_id), "GPU", gpu_id, true, true));
|
||||
ResMgrInst::GetInstance()->Connect("cpu", std::to_string(gpu_id), io);
|
||||
auto pinned_memory = 300;
|
||||
auto temp_memory = 300;
|
||||
auto resource_num = 2;
|
||||
pinned_memory = 1024 * 1024 * pinned_memory;
|
||||
temp_memory = 1024 * 1024 * temp_memory;
|
||||
knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(gpu_id,
|
||||
pinned_memory,
|
||||
temp_memory,
|
||||
resource_num);
|
||||
}
|
||||
|
||||
knowhere::FaissGpuResourceMgr::GetInstance().InitResource();
|
||||
|
||||
auto connections = config.GetChild(server::CONFIG_RESOURCE_CONNECTIONS).GetChildren();
|
||||
if(connections.empty()) throw "connections config null exception";
|
||||
for (auto &conn : connections) {
|
||||
auto &connect_name = conn.first;
|
||||
auto &connect_conf = conn.second;
|
||||
auto connect_speed = connect_conf.GetInt64Value(server::CONFIG_SPEED_CONNECTIONS);
|
||||
auto connect_endpoint = connect_conf.GetValue(server::CONFIG_ENDPOINT_CONNECTIONS);
|
||||
|
||||
std::string delimiter = "===";
|
||||
std::string left = connect_endpoint.substr(0, connect_endpoint.find(delimiter));
|
||||
std::string right = connect_endpoint.substr(connect_endpoint.find(delimiter) + 3,
|
||||
connect_endpoint.length());
|
||||
|
||||
auto connection = Connection(connect_name, connect_speed);
|
||||
ResMgrInst::GetInstance()->Connect(left, right, connection);
|
||||
}
|
||||
} catch (const char* msg) {
|
||||
SERVER_LOG_ERROR << msg;
|
||||
// TODO: throw exception instead
|
||||
exit(-1);
|
||||
// throw std::exception();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
load_advance_config() {
|
||||
// try {
|
||||
// server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
|
||||
//
|
||||
// if (config.GetChildren().empty()) throw "resource_config null exception";
|
||||
//
|
||||
// auto resources = config.GetChild(server::CONFIG_RESOURCES).GetChildren();
|
||||
//
|
||||
// if (resources.empty()) throw "Children of resource_config null exception";
|
||||
//
|
||||
// for (auto &resource : resources) {
|
||||
// auto &resname = resource.first;
|
||||
// auto &resconf = resource.second;
|
||||
// auto type = resconf.GetValue(server::CONFIG_RESOURCE_TYPE);
|
||||
//// auto memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_MEMORY);
|
||||
// auto device_id = resconf.GetInt64Value(server::CONFIG_RESOURCE_DEVICE_ID);
|
||||
//// auto enable_loader = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_LOADER);
|
||||
// auto enable_loader = true;
|
||||
// auto enable_executor = resconf.GetBoolValue(server::CONFIG_RESOURCE_ENABLE_EXECUTOR);
|
||||
// auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY);
|
||||
// auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY);
|
||||
// auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM);
|
||||
//
|
||||
// auto res = ResMgrInst::GetInstance()->Add(ResourceFactory::Create(resname,
|
||||
// type,
|
||||
// device_id,
|
||||
// enable_loader,
|
||||
// enable_executor));
|
||||
//
|
||||
// if (res.lock()->type() == ResourceType::GPU) {
|
||||
// auto pinned_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_PIN_MEMORY, 300);
|
||||
// auto temp_memory = resconf.GetInt64Value(server::CONFIG_RESOURCE_TEMP_MEMORY, 300);
|
||||
// auto resource_num = resconf.GetInt64Value(server::CONFIG_RESOURCE_NUM, 2);
|
||||
// pinned_memory = 1024 * 1024 * pinned_memory;
|
||||
// temp_memory = 1024 * 1024 * temp_memory;
|
||||
// knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(device_id,
|
||||
// pinned_memory,
|
||||
// temp_memory,
|
||||
// resource_num);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// knowhere::FaissGpuResourceMgr::GetInstance().InitResource();
|
||||
//
|
||||
// auto connections = config.GetChild(server::CONFIG_RESOURCE_CONNECTIONS).GetChildren();
|
||||
// if (connections.empty()) throw "connections config null exception";
|
||||
// for (auto &conn : connections) {
|
||||
// auto &connect_name = conn.first;
|
||||
// auto &connect_conf = conn.second;
|
||||
// auto connect_speed = connect_conf.GetInt64Value(server::CONFIG_SPEED_CONNECTIONS);
|
||||
// auto connect_endpoint = connect_conf.GetValue(server::CONFIG_ENDPOINT_CONNECTIONS);
|
||||
//
|
||||
// std::string delimiter = "===";
|
||||
// std::string left = connect_endpoint.substr(0, connect_endpoint.find(delimiter));
|
||||
// std::string right = connect_endpoint.substr(connect_endpoint.find(delimiter) + 3,
|
||||
// connect_endpoint.length());
|
||||
//
|
||||
// auto connection = Connection(connect_name, connect_speed);
|
||||
// ResMgrInst::GetInstance()->Connect(left, right, connection);
|
||||
// }
|
||||
// } catch (const char *msg) {
|
||||
// SERVER_LOG_ERROR << msg;
|
||||
// // TODO: throw exception instead
|
||||
// exit(-1);
|
||||
//// throw std::exception();
|
||||
// }
|
||||
}
|
||||
|
||||
void
|
||||
StartSchedulerService() {
|
||||
load_simple_config();
|
||||
// load_advance_config();
|
||||
|
||||
ResMgrInst::GetInstance()->Start();
|
||||
SchedInst::GetInstance()->Start();
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
#include "Utils.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
@ -21,6 +22,13 @@ get_current_timestamp() {
|
||||
return millis;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
get_num_gpu() {
|
||||
int n_devices = 0;
|
||||
cudaGetDeviceCount(&n_devices);
|
||||
return n_devices;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -14,6 +14,9 @@ namespace engine {
|
||||
uint64_t
|
||||
get_current_timestamp();
|
||||
|
||||
uint64_t
|
||||
get_num_gpu();
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -378,6 +378,32 @@ ServerConfig::CheckEngineConfig() {
|
||||
|
||||
ErrorCode
|
||||
ServerConfig::CheckResourceConfig() {
|
||||
/*
|
||||
resource_config:
|
||||
mode: simple
|
||||
resources:
|
||||
- cpu
|
||||
- gpu0
|
||||
- gpu100
|
||||
*/
|
||||
bool okay = true;
|
||||
server::ConfigNode &config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_RESOURCE);
|
||||
auto mode = config.GetValue("mode", "simple");
|
||||
if (mode != "simple") {
|
||||
std::cerr << "ERROR: invalid resource config: mode is " << mode << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
auto resources = config.GetSequence("resources");
|
||||
if (resources.empty()) {
|
||||
std::cerr << "ERROR: invalid resource config: resources empty" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
|
||||
return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
//ErrorCode
|
||||
//ServerConfig::CheckResourceConfig() {
|
||||
/*
|
||||
|
||||
resource_config:
|
||||
@ -422,142 +448,143 @@ ServerConfig::CheckResourceConfig() {
|
||||
speed: 11000
|
||||
endpoint: cpu===gpu0
|
||||
*/
|
||||
bool okay = true;
|
||||
server::ConfigNode resource_config = GetConfig(CONFIG_RESOURCE);
|
||||
if (resource_config.GetChildren().empty()) {
|
||||
std::cerr << "ERROR: no context under resource" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
|
||||
auto resources = resource_config.GetChild(CONFIG_RESOURCES).GetChildren();
|
||||
|
||||
if (resources.empty()) {
|
||||
std::cerr << "no resources specified" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
|
||||
bool resource_valid_flag = false;
|
||||
bool hasDisk = false;
|
||||
bool hasCPU = false;
|
||||
bool hasExecutor = false;
|
||||
std::set<std::string> resource_list;
|
||||
for (auto &resource : resources) {
|
||||
resource_list.emplace(resource.first);
|
||||
auto &resource_conf = resource.second;
|
||||
auto type = resource_conf.GetValue(CONFIG_RESOURCE_TYPE);
|
||||
|
||||
std::string device_id_str = resource_conf.GetValue(CONFIG_RESOURCE_DEVICE_ID, "0");
|
||||
int32_t device_id = -1;
|
||||
if (ValidationUtil::ValidateStringIsNumber(device_id_str) != SERVER_SUCCESS) {
|
||||
std::cerr << "ERROR: device_id " << device_id_str << " is not a number" << std::endl;
|
||||
okay = false;
|
||||
} else {
|
||||
device_id = std::stol(device_id_str);
|
||||
}
|
||||
|
||||
std::string enable_executor_str = resource_conf.GetValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, "off");
|
||||
if (ValidationUtil::ValidateStringIsBool(enable_executor_str) != SERVER_SUCCESS) {
|
||||
std::cerr << "ERROR: invalid enable_executor config: " << enable_executor_str << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
|
||||
if (type == "DISK") {
|
||||
hasDisk = true;
|
||||
} else if (type == "CPU") {
|
||||
hasCPU = true;
|
||||
if (resource_conf.GetBoolValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, false)) {
|
||||
hasExecutor = true;
|
||||
}
|
||||
}
|
||||
else if (type == "GPU") {
|
||||
int build_index_gpu_index = GetConfig(CONFIG_DB).GetInt32Value(CONFIG_DB_BUILD_INDEX_GPU, 0);
|
||||
if (device_id == build_index_gpu_index) {
|
||||
resource_valid_flag = true;
|
||||
}
|
||||
if (resource_conf.GetBoolValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, false)) {
|
||||
hasExecutor = true;
|
||||
}
|
||||
std::string gpu_resource_num_str = resource_conf.GetValue(CONFIG_RESOURCE_NUM, "2");
|
||||
if (ValidationUtil::ValidateStringIsNumber(gpu_resource_num_str) != SERVER_SUCCESS) {
|
||||
std::cerr << "ERROR: gpu_resource_num " << gpu_resource_num_str << " is not a number" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
bool mem_valid = true;
|
||||
std::string pinned_memory_str = resource_conf.GetValue(CONFIG_RESOURCE_PIN_MEMORY, "300");
|
||||
if (ValidationUtil::ValidateStringIsNumber(pinned_memory_str) != SERVER_SUCCESS) {
|
||||
std::cerr << "ERROR: pinned_memory " << pinned_memory_str << " is not a number" << std::endl;
|
||||
okay = false;
|
||||
mem_valid = false;
|
||||
}
|
||||
std::string temp_memory_str = resource_conf.GetValue(CONFIG_RESOURCE_TEMP_MEMORY, "300");
|
||||
if (ValidationUtil::ValidateStringIsNumber(temp_memory_str) != SERVER_SUCCESS) {
|
||||
std::cerr << "ERROR: temp_memory " << temp_memory_str << " is not a number" << std::endl;
|
||||
okay = false;
|
||||
mem_valid = false;
|
||||
}
|
||||
if (mem_valid) {
|
||||
size_t gpu_memory;
|
||||
if (ValidationUtil::GetGpuMemory(device_id, gpu_memory) != SERVER_SUCCESS) {
|
||||
std::cerr << "ERROR: could not get gpu memory for device " << device_id << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
else {
|
||||
size_t prealoc_mem = std::stol(pinned_memory_str) + std::stol(temp_memory_str);
|
||||
if (prealoc_mem >= gpu_memory) {
|
||||
std::cerr << "ERROR: sum of pinned_memory and temp_memory " << prealoc_mem
|
||||
<< " exceeds total gpu memory " << gpu_memory << " for device " << device_id << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!resource_valid_flag) {
|
||||
std::cerr << "Building index GPU can't be found in resource config." << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
if (!hasDisk || !hasCPU) {
|
||||
std::cerr << "No DISK or CPU resource" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
if (!hasExecutor) {
|
||||
std::cerr << "No CPU or GPU resource has executor enabled" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
|
||||
auto connections = resource_config.GetChild(CONFIG_RESOURCE_CONNECTIONS).GetChildren();
|
||||
for (auto &connection : connections) {
|
||||
auto &connection_conf = connection.second;
|
||||
|
||||
std::string speed_str = connection_conf.GetValue(CONFIG_SPEED_CONNECTIONS);
|
||||
if (ValidationUtil::ValidateStringIsNumber(speed_str) != SERVER_SUCCESS) {
|
||||
std::cerr << "ERROR: speed " << speed_str << " is not a number" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
|
||||
std::string endpoint_str = connection_conf.GetValue(CONFIG_ENDPOINT_CONNECTIONS);
|
||||
std::string delimiter = "===";
|
||||
auto delimiter_pos = endpoint_str.find(delimiter);
|
||||
if (delimiter_pos == std::string::npos) {
|
||||
std::cerr << "ERROR: invalid endpoint format: " << endpoint_str << std::endl;
|
||||
okay = false;
|
||||
} else {
|
||||
std::string left_resource = endpoint_str.substr(0, delimiter_pos);
|
||||
if (resource_list.find(left_resource) == resource_list.end()) {
|
||||
std::cerr << "ERROR: left resource " << left_resource << " does not exist" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
std::string right_resource = endpoint_str.substr(delimiter_pos + delimiter.length(), endpoint_str.length());
|
||||
if (resource_list.find(right_resource) == resource_list.end()) {
|
||||
std::cerr << "ERROR: right resource " << right_resource << " does not exist" << std::endl;
|
||||
okay = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
|
||||
}
|
||||
// bool okay = true;
|
||||
// server::ConfigNode resource_config = GetConfig(CONFIG_RESOURCE);
|
||||
// if (resource_config.GetChildren().empty()) {
|
||||
// std::cerr << "ERROR: no context under resource" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
//
|
||||
// auto resources = resource_config.GetChild(CONFIG_RESOURCES).GetChildren();
|
||||
//
|
||||
// if (resources.empty()) {
|
||||
// std::cerr << "no resources specified" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
//
|
||||
// bool resource_valid_flag = false;
|
||||
// bool hasDisk = false;
|
||||
// bool hasCPU = false;
|
||||
// bool hasExecutor = false;
|
||||
// std::set<std::string> resource_list;
|
||||
// for (auto &resource : resources) {
|
||||
// resource_list.emplace(resource.first);
|
||||
// auto &resource_conf = resource.second;
|
||||
// auto type = resource_conf.GetValue(CONFIG_RESOURCE_TYPE);
|
||||
//
|
||||
// std::string device_id_str = resource_conf.GetValue(CONFIG_RESOURCE_DEVICE_ID, "0");
|
||||
// int32_t device_id = -1;
|
||||
// if (ValidationUtil::ValidateStringIsNumber(device_id_str) != SERVER_SUCCESS) {
|
||||
// std::cerr << "ERROR: device_id " << device_id_str << " is not a number" << std::endl;
|
||||
// okay = false;
|
||||
// } else {
|
||||
// device_id = std::stol(device_id_str);
|
||||
// }
|
||||
//
|
||||
// std::string enable_executor_str = resource_conf.GetValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, "off");
|
||||
// if (ValidationUtil::ValidateStringIsBool(enable_executor_str) != SERVER_SUCCESS) {
|
||||
// std::cerr << "ERROR: invalid enable_executor config: " << enable_executor_str << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
//
|
||||
// if (type == "DISK") {
|
||||
// hasDisk = true;
|
||||
// } else if (type == "CPU") {
|
||||
// hasCPU = true;
|
||||
// if (resource_conf.GetBoolValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, false)) {
|
||||
// hasExecutor = true;
|
||||
// }
|
||||
// }
|
||||
// else if (type == "GPU") {
|
||||
// int build_index_gpu_index = GetConfig(CONFIG_DB).GetInt32Value(CONFIG_DB_BUILD_INDEX_GPU, 0);
|
||||
// if (device_id == build_index_gpu_index) {
|
||||
// resource_valid_flag = true;
|
||||
// }
|
||||
// if (resource_conf.GetBoolValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, false)) {
|
||||
// hasExecutor = true;
|
||||
// }
|
||||
// std::string gpu_resource_num_str = resource_conf.GetValue(CONFIG_RESOURCE_NUM, "2");
|
||||
// if (ValidationUtil::ValidateStringIsNumber(gpu_resource_num_str) != SERVER_SUCCESS) {
|
||||
// std::cerr << "ERROR: gpu_resource_num " << gpu_resource_num_str << " is not a number" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
// bool mem_valid = true;
|
||||
// std::string pinned_memory_str = resource_conf.GetValue(CONFIG_RESOURCE_PIN_MEMORY, "300");
|
||||
// if (ValidationUtil::ValidateStringIsNumber(pinned_memory_str) != SERVER_SUCCESS) {
|
||||
// std::cerr << "ERROR: pinned_memory " << pinned_memory_str << " is not a number" << std::endl;
|
||||
// okay = false;
|
||||
// mem_valid = false;
|
||||
// }
|
||||
// std::string temp_memory_str = resource_conf.GetValue(CONFIG_RESOURCE_TEMP_MEMORY, "300");
|
||||
// if (ValidationUtil::ValidateStringIsNumber(temp_memory_str) != SERVER_SUCCESS) {
|
||||
// std::cerr << "ERROR: temp_memory " << temp_memory_str << " is not a number" << std::endl;
|
||||
// okay = false;
|
||||
// mem_valid = false;
|
||||
// }
|
||||
// if (mem_valid) {
|
||||
// size_t gpu_memory;
|
||||
// if (ValidationUtil::GetGpuMemory(device_id, gpu_memory) != SERVER_SUCCESS) {
|
||||
// std::cerr << "ERROR: could not get gpu memory for device " << device_id << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
// else {
|
||||
// size_t prealoc_mem = std::stol(pinned_memory_str) + std::stol(temp_memory_str);
|
||||
// if (prealoc_mem >= gpu_memory) {
|
||||
// std::cerr << "ERROR: sum of pinned_memory and temp_memory " << prealoc_mem
|
||||
// << " exceeds total gpu memory " << gpu_memory << " for device " << device_id << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (!resource_valid_flag) {
|
||||
// std::cerr << "Building index GPU can't be found in resource config." << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
// if (!hasDisk || !hasCPU) {
|
||||
// std::cerr << "No DISK or CPU resource" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
// if (!hasExecutor) {
|
||||
// std::cerr << "No CPU or GPU resource has executor enabled" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
//
|
||||
// auto connections = resource_config.GetChild(CONFIG_RESOURCE_CONNECTIONS).GetChildren();
|
||||
// for (auto &connection : connections) {
|
||||
// auto &connection_conf = connection.second;
|
||||
//
|
||||
// std::string speed_str = connection_conf.GetValue(CONFIG_SPEED_CONNECTIONS);
|
||||
// if (ValidationUtil::ValidateStringIsNumber(speed_str) != SERVER_SUCCESS) {
|
||||
// std::cerr << "ERROR: speed " << speed_str << " is not a number" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
//
|
||||
// std::string endpoint_str = connection_conf.GetValue(CONFIG_ENDPOINT_CONNECTIONS);
|
||||
// std::string delimiter = "===";
|
||||
// auto delimiter_pos = endpoint_str.find(delimiter);
|
||||
// if (delimiter_pos == std::string::npos) {
|
||||
// std::cerr << "ERROR: invalid endpoint format: " << endpoint_str << std::endl;
|
||||
// okay = false;
|
||||
// } else {
|
||||
// std::string left_resource = endpoint_str.substr(0, delimiter_pos);
|
||||
// if (resource_list.find(left_resource) == resource_list.end()) {
|
||||
// std::cerr << "ERROR: left resource " << left_resource << " does not exist" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
// std::string right_resource = endpoint_str.substr(delimiter_pos + delimiter.length(), endpoint_str.length());
|
||||
// if (resource_list.find(right_resource) == resource_list.end()) {
|
||||
// std::cerr << "ERROR: right resource " << right_resource << " does not exist" << std::endl;
|
||||
// okay = false;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
|
||||
// return SERVER_SUCCESS;
|
||||
//}
|
||||
|
||||
void
|
||||
ServerConfig::PrintAll() const {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user