mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
Add push mode for prometheus monitor (#905)
* Add push mode for prometheus monitor * format code * fix for comments * fix test_MetricBase bug * Change ip to address in config
This commit is contained in:
parent
28e61ee4ec
commit
a92a43e282
@ -18,6 +18,7 @@ Please mark all change in change log and use the issue from GitHub
|
||||
- \#766 - If partition tag is similar, wrong partition is searched
|
||||
- \#771 - Add server build commit info interface
|
||||
- \#759 - Put C++ sdk out of milvus/core
|
||||
- \#813 - Add push mode for prometheus monitor
|
||||
- \#815 - Support MinIO storage
|
||||
- \#910 - Change Milvus c++ standard to c++17
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@ version: 0.1
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# Server Config | Description | Type | Default |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# address | IP address that Milvus server monitors. | String | 0.0.0.0 |
|
||||
# address | IP address that Milvus server monitors. | Ip | 0.0.0.0 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
@ -68,7 +68,7 @@ db_config:
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_enable | Enable MinIO storage or not. | Boolean | false |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_address | MinIO storage service IP address. | String | 127.0.0.1 |
|
||||
# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
@ -95,13 +95,16 @@ storage_config:
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# collector | Connected monitoring system to collect metrics. | String | Prometheus |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 |
|
||||
# address | Pushgateway address | IP | 127.0.0.1 +
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
metric_config:
|
||||
enable_monitor: false
|
||||
collector: prometheus
|
||||
prometheus_config:
|
||||
port: 8080
|
||||
address: 127.0.0.1
|
||||
port: 9091
|
||||
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# Cache Config | Description | Type | Default |
|
||||
|
||||
@ -20,7 +20,7 @@ version: 0.1
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# Server Config | Description | Type | Default |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# address | IP address that Milvus server monitors. | String | 0.0.0.0 |
|
||||
# address | IP address that Milvus server monitors. | Ip | 0.0.0.0 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
@ -68,7 +68,7 @@ db_config:
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_enable | Enable MinIO storage or not. | Boolean | false |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_address | MinIO storage service IP address. | String | 127.0.0.1 |
|
||||
# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
@ -95,13 +95,16 @@ storage_config:
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# collector | Connected monitoring system to collect metrics. | String | Prometheus |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 |
|
||||
# address | Pushgateway address | IP | 127.0.0.1 +
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
metric_config:
|
||||
enable_monitor: false
|
||||
collector: prometheus
|
||||
prometheus_config:
|
||||
port: 8080
|
||||
address: 127.0.0.1
|
||||
port: 9091
|
||||
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# Cache Config | Description | Type | Default |
|
||||
|
||||
@ -20,7 +20,7 @@ version: 0.1
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# Server Config | Description | Type | Default |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# address | IP address that Milvus server monitors. | String | 0.0.0.0 |
|
||||
# address | IP address that Milvus server monitors. | IP | 0.0.0.0 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
@ -68,7 +68,7 @@ db_config:
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_enable | Enable MinIO storage or not. | Boolean | false |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_address | MinIO storage service IP address. | String | 127.0.0.1 |
|
||||
# minio_address | MinIO storage service IP address. | Ip | 127.0.0.1 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# minio_port | MinIO storage service port. Port range (1024, 65535) | Integer | 9000 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
@ -95,13 +95,16 @@ storage_config:
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# collector | Connected monitoring system to collect metrics. | String | Prometheus |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Port to visit Prometheus, port range (1024, 65535) | Integer | 8080 |
|
||||
# address | Pushgateway address | IP | 127.0.0.1 +
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# port | Pushgateway port, port range (1024, 65535) | Integer | 9091 |
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
metric_config:
|
||||
enable_monitor: false
|
||||
collector: prometheus
|
||||
prometheus_config:
|
||||
port: 8080
|
||||
address: 127.0.0.1
|
||||
port: 9091
|
||||
|
||||
#----------------------+------------------------------------------------------------+------------+-----------------+
|
||||
# Cache Config | Description | Type | Default |
|
||||
|
||||
@ -137,6 +137,7 @@ set(prometheus_lib
|
||||
prometheus-cpp-push
|
||||
prometheus-cpp-pull
|
||||
prometheus-cpp-core
|
||||
curl
|
||||
)
|
||||
|
||||
set(boost_lib
|
||||
|
||||
@ -631,6 +631,7 @@ DBImpl::StartMetricTask() {
|
||||
server::Metrics::GetInstance().CPUCoreUsagePercentSet();
|
||||
server::Metrics::GetInstance().GPUTemperature();
|
||||
server::Metrics::GetInstance().CPUTemperature();
|
||||
server::Metrics::GetInstance().PushToGateway();
|
||||
|
||||
// ENGINE_LOG_TRACE << "Metric task finished";
|
||||
}
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "SystemInfo.h"
|
||||
#include "utils/Error.h"
|
||||
#include "utils/Status.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
@ -32,8 +32,9 @@ class MetricsBase {
|
||||
return instance;
|
||||
}
|
||||
|
||||
virtual ErrorCode
|
||||
virtual Status
|
||||
Init() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual void
|
||||
@ -203,6 +204,10 @@ class MetricsBase {
|
||||
virtual void
|
||||
CPUTemperature() {
|
||||
}
|
||||
|
||||
virtual void
|
||||
PushToGateway() {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace server
|
||||
|
||||
@ -27,39 +27,48 @@
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
|
||||
ErrorCode
|
||||
Status
|
||||
PrometheusMetrics::Init() {
|
||||
try {
|
||||
Config& config = Config::GetInstance();
|
||||
Status s = config.GetMetricConfigEnableMonitor(startup_);
|
||||
if (!s.ok()) {
|
||||
return s.code();
|
||||
return s;
|
||||
}
|
||||
if (!startup_) {
|
||||
return SERVER_SUCCESS;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Following should be read from config file.
|
||||
std::string bind_address;
|
||||
s = config.GetMetricConfigPrometheusPort(bind_address);
|
||||
std::string push_port, push_address;
|
||||
s = config.GetMetricConfigPrometheusPort(push_port);
|
||||
if (!s.ok()) {
|
||||
return s.code();
|
||||
return s;
|
||||
}
|
||||
s = config.GetMetricConfigPrometheusAddress(push_address);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
const std::string uri = std::string("/metrics");
|
||||
const std::size_t num_threads = 2;
|
||||
|
||||
// Init Exposer
|
||||
exposer_ptr_ = std::make_shared<prometheus::Exposer>(bind_address, uri, num_threads);
|
||||
auto labels = prometheus::Gateway::GetInstanceLabel("pushgateway");
|
||||
|
||||
// Exposer Registry
|
||||
exposer_ptr_->RegisterCollectable(registry_);
|
||||
// Init pushgateway
|
||||
gateway_ = std::make_shared<prometheus::Gateway>(push_address, push_port, "milvus_metrics", labels);
|
||||
|
||||
// Init Exposer
|
||||
// exposer_ptr_ = std::make_shared<prometheus::Exposer>(bind_address, uri, num_threads);
|
||||
|
||||
// Pushgateway Registry
|
||||
gateway_->RegisterCollectable(registry_);
|
||||
} catch (std::exception& ex) {
|
||||
SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what());
|
||||
return SERVER_UNEXPECTED_ERROR;
|
||||
return Status(SERVER_UNEXPECTED_ERROR, ex.what());
|
||||
}
|
||||
|
||||
return SERVER_SUCCESS;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <prometheus/exposer.h>
|
||||
#include <prometheus/gateway.h>
|
||||
#include <prometheus/registry.h>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
@ -25,7 +26,8 @@
|
||||
#include <vector>
|
||||
|
||||
#include "metrics/MetricBase.h"
|
||||
#include "utils/Error.h"
|
||||
#include "utils/Log.h"
|
||||
#include "utils/Status.h"
|
||||
|
||||
#define METRICS_NOW_TIME std::chrono::system_clock::now()
|
||||
//#define server::Metrics::GetInstance() server::GetInstance()
|
||||
@ -42,11 +44,11 @@ class PrometheusMetrics : public MetricsBase {
|
||||
return instance;
|
||||
}
|
||||
|
||||
ErrorCode
|
||||
Init();
|
||||
Status
|
||||
Init() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<prometheus::Exposer> exposer_ptr_;
|
||||
std::shared_ptr<prometheus::Gateway> gateway_;
|
||||
std::shared_ptr<prometheus::Registry> registry_ = std::make_shared<prometheus::Registry>();
|
||||
bool startup_ = false;
|
||||
|
||||
@ -293,9 +295,18 @@ class PrometheusMetrics : public MetricsBase {
|
||||
void
|
||||
CPUTemperature() override;
|
||||
|
||||
std::shared_ptr<prometheus::Exposer>&
|
||||
exposer_ptr() {
|
||||
return exposer_ptr_;
|
||||
void
|
||||
PushToGateway() override {
|
||||
if (startup_) {
|
||||
if (gateway_->Push() != 200) {
|
||||
ENGINE_LOG_WARNING << "Metrics pushgateway failed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<prometheus::Gateway>&
|
||||
gateway() {
|
||||
return gateway_;
|
||||
}
|
||||
|
||||
// prometheus::Exposer& exposer() { return exposer_;}
|
||||
|
||||
@ -134,6 +134,9 @@ Config::ValidateConfig() {
|
||||
std::string metric_collector;
|
||||
CONFIG_CHECK(GetMetricConfigCollector(metric_collector));
|
||||
|
||||
std::string metric_prometheus_address;
|
||||
CONFIG_CHECK(GetMetricConfigPrometheusAddress(metric_prometheus_address));
|
||||
|
||||
std::string metric_prometheus_port;
|
||||
CONFIG_CHECK(GetMetricConfigPrometheusPort(metric_prometheus_port));
|
||||
|
||||
@ -214,6 +217,7 @@ Config::ResetDefaultConfig() {
|
||||
/* metric config */
|
||||
CONFIG_CHECK(SetMetricConfigEnableMonitor(CONFIG_METRIC_ENABLE_MONITOR_DEFAULT));
|
||||
CONFIG_CHECK(SetMetricConfigCollector(CONFIG_METRIC_COLLECTOR_DEFAULT));
|
||||
CONFIG_CHECK(SetMetricConfigPrometheusAddress(CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT));
|
||||
CONFIG_CHECK(SetMetricConfigPrometheusPort(CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT));
|
||||
|
||||
/* cache config */
|
||||
@ -556,6 +560,16 @@ Config::CheckMetricConfigCollector(const std::string& value) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
Config::CheckMetricConfigPrometheusAddress(const std::string& value) {
|
||||
if (!ValidationUtil::ValidateIpAddress(value).ok()) {
|
||||
std::string msg =
|
||||
"Invalid metric ip: " + value + ". Possible reason: metric_config.prometheus_config.ip is invalid.";
|
||||
return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_ip: " + value);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
Config::CheckMetricConfigPrometheusPort(const std::string& value) {
|
||||
if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
|
||||
@ -999,6 +1013,12 @@ Config::GetMetricConfigCollector(std::string& value) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
Config::GetMetricConfigPrometheusAddress(std::string& value) {
|
||||
value = GetConfigStr(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_ADDRESS, CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status
|
||||
Config::GetMetricConfigPrometheusPort(std::string& value) {
|
||||
value = GetConfigStr(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_PORT, CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT);
|
||||
@ -1272,6 +1292,12 @@ Config::SetMetricConfigCollector(const std::string& value) {
|
||||
return SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_COLLECTOR, value);
|
||||
}
|
||||
|
||||
Status
|
||||
Config::SetMetricConfigPrometheusAddress(const std::string& value) {
|
||||
CONFIG_CHECK(CheckMetricConfigPrometheusAddress(value));
|
||||
SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_PROMETHEUS_ADDRESS, value);
|
||||
}
|
||||
|
||||
Status
|
||||
Config::SetMetricConfigPrometheusPort(const std::string& value) {
|
||||
CONFIG_CHECK(CheckMetricConfigPrometheusPort(value));
|
||||
|
||||
@ -98,8 +98,10 @@ static const char* CONFIG_METRIC_ENABLE_MONITOR_DEFAULT = "false";
|
||||
static const char* CONFIG_METRIC_COLLECTOR = "collector";
|
||||
static const char* CONFIG_METRIC_COLLECTOR_DEFAULT = "prometheus";
|
||||
static const char* CONFIG_METRIC_PROMETHEUS = "prometheus_config";
|
||||
static const char* CONFIG_METRIC_PROMETHEUS_ADDRESS = "address";
|
||||
static const char* CONFIG_METRIC_PROMETHEUS_ADDRESS_DEFAULT = "127.0.0.1";
|
||||
static const char* CONFIG_METRIC_PROMETHEUS_PORT = "port";
|
||||
static const char* CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT = "8080";
|
||||
static const char* CONFIG_METRIC_PROMETHEUS_PORT_DEFAULT = "9091";
|
||||
|
||||
/* engine config */
|
||||
static const char* CONFIG_ENGINE = "engine_config";
|
||||
@ -212,6 +214,8 @@ class Config {
|
||||
Status
|
||||
CheckMetricConfigCollector(const std::string& value);
|
||||
Status
|
||||
CheckMetricConfigPrometheusAddress(const std::string& value);
|
||||
Status
|
||||
CheckMetricConfigPrometheusPort(const std::string& value);
|
||||
|
||||
/* cache config */
|
||||
@ -300,6 +304,8 @@ class Config {
|
||||
Status
|
||||
GetMetricConfigCollector(std::string& value);
|
||||
Status
|
||||
GetMetricConfigPrometheusAddress(std::string& value);
|
||||
Status
|
||||
GetMetricConfigPrometheusPort(std::string& value);
|
||||
|
||||
/* cache config */
|
||||
@ -382,6 +388,8 @@ class Config {
|
||||
Status
|
||||
SetMetricConfigCollector(const std::string& value);
|
||||
Status
|
||||
SetMetricConfigPrometheusAddress(const std::string& value);
|
||||
Status
|
||||
SetMetricConfigPrometheusPort(const std::string& value);
|
||||
|
||||
/* cache config */
|
||||
|
||||
@ -55,6 +55,7 @@ class NoReusePortOption : public ::grpc::ServerBuilderOption {
|
||||
void
|
||||
UpdateArguments(::grpc::ChannelArguments* args) override {
|
||||
args->SetInt(GRPC_ARG_ALLOW_REUSEPORT, 0);
|
||||
args->SetInt(GRPC_ARG_MAX_CONCURRENT_STREAMS, 20);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -62,5 +62,6 @@ TEST(MetricbaseTest, METRICBASE_TEST) {
|
||||
instance.ConnectionGaugeIncrement();
|
||||
instance.ConnectionGaugeDecrement();
|
||||
instance.KeepingAliveCounterIncrement();
|
||||
instance.PushToGateway();
|
||||
instance.OctetsSet();
|
||||
}
|
||||
|
||||
@ -67,6 +67,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST) {
|
||||
instance.ConnectionGaugeIncrement();
|
||||
instance.ConnectionGaugeDecrement();
|
||||
instance.KeepingAliveCounterIncrement();
|
||||
instance.PushToGateway();
|
||||
instance.OctetsSet();
|
||||
|
||||
instance.CPUCoreUsagePercentSet();
|
||||
|
||||
@ -218,6 +218,10 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) {
|
||||
ASSERT_TRUE(config.GetMetricConfigCollector(str_val).ok());
|
||||
ASSERT_TRUE(str_val == metric_collector);
|
||||
|
||||
std::string metric_prometheus_address = "127.0.0.1";
|
||||
ASSERT_TRUE(config.GetMetricConfigPrometheusAddress(str_val).ok());
|
||||
ASSERT_TRUE(str_val == metric_prometheus_address);
|
||||
|
||||
std::string metric_prometheus_port = "2222";
|
||||
ASSERT_TRUE(config.SetMetricConfigPrometheusPort(metric_prometheus_port).ok());
|
||||
ASSERT_TRUE(config.GetMetricConfigPrometheusPort(str_val).ok());
|
||||
@ -298,12 +302,14 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) {
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string gen_get_command(const std::string& parent_node, const std::string& child_node) {
|
||||
std::string
|
||||
gen_get_command(const std::string& parent_node, const std::string& child_node) {
|
||||
std::string cmd = "get_config " + parent_node + ms::CONFIG_NODE_DELIMITER + child_node;
|
||||
return cmd;
|
||||
}
|
||||
|
||||
std::string gen_set_command(const std::string& parent_node, const std::string& child_node, const std::string& value) {
|
||||
std::string
|
||||
gen_set_command(const std::string& parent_node, const std::string& child_node, const std::string& value) {
|
||||
std::string cmd = "set_config " + parent_node + ms::CONFIG_NODE_DELIMITER + child_node + " " + value;
|
||||
return cmd;
|
||||
}
|
||||
@ -519,6 +525,8 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) {
|
||||
|
||||
ASSERT_FALSE(config.SetMetricConfigCollector("zilliz").ok());
|
||||
|
||||
ASSERT_FALSE(config.SetMetricConfigPrometheusAddress("127.0.0").ok());
|
||||
|
||||
ASSERT_FALSE(config.SetMetricConfigPrometheusPort("0xff").ok());
|
||||
|
||||
/* cache config */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user