mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
format metric code
Former-commit-id: ea430e044ae3c93b051c4ac6261cfe3c241ae80b
This commit is contained in:
parent
a0e4d19e4f
commit
ca1ab56cb4
@ -4,4 +4,5 @@
|
||||
*src/thirdparty*
|
||||
*src/core/thirdparty*
|
||||
*src/grpc*
|
||||
*easylogging++*
|
||||
*easylogging++*
|
||||
*SqliteMetaImpl.cpp
|
||||
@ -21,67 +21,149 @@
|
||||
#include "utils/Error.h"
|
||||
#include "SystemInfo.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
class MetricsBase{
|
||||
class MetricsBase {
|
||||
public:
|
||||
static MetricsBase&
|
||||
GetInstance(){
|
||||
static MetricsBase &
|
||||
GetInstance() {
|
||||
static MetricsBase instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
virtual ErrorCode Init() {};
|
||||
virtual ErrorCode Init() {
|
||||
}
|
||||
|
||||
virtual void AddVectorsSuccessTotalIncrement(double value = 1) {};
|
||||
virtual void AddVectorsFailTotalIncrement(double value = 1) {};
|
||||
virtual void AddVectorsDurationHistogramOberve(double value) {};
|
||||
virtual void AddVectorsSuccessTotalIncrement(double value = 1) {
|
||||
}
|
||||
|
||||
virtual void RawFileSizeHistogramObserve(double value) {};
|
||||
virtual void IndexFileSizeHistogramObserve(double value) {};
|
||||
virtual void BuildIndexDurationSecondsHistogramObserve(double value) {};
|
||||
virtual void AddVectorsFailTotalIncrement(double value = 1) {
|
||||
}
|
||||
|
||||
virtual void CpuCacheUsageGaugeSet(double value) {};
|
||||
virtual void GpuCacheUsageGaugeSet() {};
|
||||
virtual void AddVectorsDurationHistogramOberve(double value) {
|
||||
}
|
||||
|
||||
virtual void MetaAccessTotalIncrement(double value = 1) {};
|
||||
virtual void MetaAccessDurationSecondsHistogramObserve(double value) {};
|
||||
virtual void FaissDiskLoadDurationSecondsHistogramObserve(double value) {};
|
||||
virtual void FaissDiskLoadSizeBytesHistogramObserve(double value) {};
|
||||
virtual void CacheAccessTotalIncrement(double value = 1) {};
|
||||
virtual void MemTableMergeDurationSecondsHistogramObserve(double value) {};
|
||||
virtual void SearchIndexDataDurationSecondsHistogramObserve(double value) {};
|
||||
virtual void SearchRawDataDurationSecondsHistogramObserve(double value) {};
|
||||
virtual void IndexFileSizeTotalIncrement(double value = 1) {};
|
||||
virtual void RawFileSizeTotalIncrement(double value = 1) {};
|
||||
virtual void IndexFileSizeGaugeSet(double value) {};
|
||||
virtual void RawFileSizeGaugeSet(double value) {};
|
||||
virtual void FaissDiskLoadIOSpeedGaugeSet(double value) {};
|
||||
virtual void QueryResponseSummaryObserve(double value) {};
|
||||
virtual void DiskStoreIOSpeedGaugeSet(double value) {};
|
||||
virtual void DataFileSizeGaugeSet(double value) {};
|
||||
virtual void AddVectorsSuccessGaugeSet(double value) {};
|
||||
virtual void AddVectorsFailGaugeSet(double value) {};
|
||||
virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) {};
|
||||
virtual void QueryVectorResponsePerSecondGaugeSet(double value) {};
|
||||
virtual void CPUUsagePercentSet() {};
|
||||
virtual void RAMUsagePercentSet() {};
|
||||
virtual void QueryResponsePerSecondGaugeSet(double value) {};
|
||||
virtual void GPUPercentGaugeSet() {};
|
||||
virtual void GPUMemoryUsageGaugeSet() {};
|
||||
virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {};
|
||||
virtual void QueryIndexTypePerSecondSet(std::string type, double value) {};
|
||||
virtual void ConnectionGaugeIncrement() {};
|
||||
virtual void ConnectionGaugeDecrement() {};
|
||||
virtual void KeepingAliveCounterIncrement(double value = 1) {};
|
||||
virtual void OctetsSet() {};
|
||||
virtual void RawFileSizeHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void CPUCoreUsagePercentSet() {};
|
||||
virtual void GPUTemperature() {};
|
||||
virtual void CPUTemperature() {};
|
||||
virtual void IndexFileSizeHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void BuildIndexDurationSecondsHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void CpuCacheUsageGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void GpuCacheUsageGaugeSet() {
|
||||
}
|
||||
|
||||
virtual void MetaAccessTotalIncrement(double value = 1) {
|
||||
}
|
||||
|
||||
virtual void MetaAccessDurationSecondsHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void FaissDiskLoadDurationSecondsHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void FaissDiskLoadSizeBytesHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void CacheAccessTotalIncrement(double value = 1) {
|
||||
}
|
||||
|
||||
virtual void MemTableMergeDurationSecondsHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void SearchIndexDataDurationSecondsHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void SearchRawDataDurationSecondsHistogramObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void IndexFileSizeTotalIncrement(double value = 1) {
|
||||
}
|
||||
|
||||
virtual void RawFileSizeTotalIncrement(double value = 1) {
|
||||
}
|
||||
|
||||
virtual void IndexFileSizeGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void RawFileSizeGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void FaissDiskLoadIOSpeedGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void QueryResponseSummaryObserve(double value) {
|
||||
}
|
||||
|
||||
virtual void DiskStoreIOSpeedGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void DataFileSizeGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void AddVectorsSuccessGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void AddVectorsFailGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) {
|
||||
}
|
||||
|
||||
virtual void QueryVectorResponsePerSecondGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void CPUUsagePercentSet() {
|
||||
}
|
||||
|
||||
virtual void RAMUsagePercentSet() {
|
||||
}
|
||||
|
||||
virtual void QueryResponsePerSecondGaugeSet(double value) {
|
||||
}
|
||||
|
||||
virtual void GPUPercentGaugeSet() {
|
||||
}
|
||||
|
||||
virtual void GPUMemoryUsageGaugeSet() {
|
||||
}
|
||||
|
||||
virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
|
||||
}
|
||||
|
||||
virtual void QueryIndexTypePerSecondSet(std::string type, double value) {
|
||||
}
|
||||
|
||||
virtual void ConnectionGaugeIncrement() {
|
||||
}
|
||||
|
||||
virtual void ConnectionGaugeDecrement() {
|
||||
}
|
||||
|
||||
virtual void KeepingAliveCounterIncrement(double value = 1) {
|
||||
}
|
||||
|
||||
virtual void OctetsSet() {
|
||||
}
|
||||
|
||||
virtual void CPUCoreUsagePercentSet() {
|
||||
}
|
||||
|
||||
virtual void GPUTemperature() {
|
||||
}
|
||||
|
||||
virtual void CPUTemperature() {
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace server
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
||||
@ -15,10 +15,11 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "Metrics.h"
|
||||
#include "metrics/Metrics.h"
|
||||
#include "server/Config.h"
|
||||
#include "PrometheusMetrics.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
@ -44,6 +45,6 @@ Metrics::CreateMetricsCollector() {
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace server
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#include "MetricBase.h"
|
||||
#include "db/meta/MetaTypes.h"
|
||||
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
@ -44,7 +43,7 @@ class Metrics {
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectMetricsBase {
|
||||
protected:
|
||||
protected:
|
||||
CollectMetricsBase() {
|
||||
start_time_ = METRICS_NOW_TIME;
|
||||
}
|
||||
@ -56,19 +55,19 @@ protected:
|
||||
return METRICS_MICROSECONDS(start_time_, end_time);
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
using TIME_POINT = std::chrono::system_clock::time_point;
|
||||
TIME_POINT start_time_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectInsertMetrics : CollectMetricsBase {
|
||||
public:
|
||||
CollectInsertMetrics(size_t n, Status& status) : n_(n), status_(status) {
|
||||
public:
|
||||
CollectInsertMetrics(size_t n, Status &status) : n_(n), status_(status) {
|
||||
}
|
||||
|
||||
~CollectInsertMetrics() {
|
||||
if(n_ > 0) {
|
||||
if (n_ > 0) {
|
||||
auto total_time = TimeFromBegine();
|
||||
double avg_time = total_time / n_;
|
||||
for (int i = 0; i < n_; ++i) {
|
||||
@ -86,19 +85,19 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
size_t n_;
|
||||
Status& status_;
|
||||
Status &status_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectQueryMetrics : CollectMetricsBase {
|
||||
public:
|
||||
CollectQueryMetrics(size_t nq) : nq_(nq) {
|
||||
public:
|
||||
explicit CollectQueryMetrics(size_t nq) : nq_(nq) {
|
||||
}
|
||||
|
||||
~CollectQueryMetrics() {
|
||||
if(nq_ > 0) {
|
||||
if (nq_ > 0) {
|
||||
auto total_time = TimeFromBegine();
|
||||
for (int i = 0; i < nq_; ++i) {
|
||||
server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time);
|
||||
@ -109,13 +108,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
size_t nq_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectMergeFilesMetrics : CollectMetricsBase {
|
||||
public:
|
||||
public:
|
||||
CollectMergeFilesMetrics() {
|
||||
}
|
||||
|
||||
@ -127,7 +126,7 @@ public:
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectBuildIndexMetrics : CollectMetricsBase {
|
||||
public:
|
||||
public:
|
||||
CollectBuildIndexMetrics() {
|
||||
}
|
||||
|
||||
@ -139,8 +138,8 @@ public:
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectExecutionEngineMetrics : CollectMetricsBase {
|
||||
public:
|
||||
CollectExecutionEngineMetrics(double physical_size) : physical_size_(physical_size) {
|
||||
public:
|
||||
explicit CollectExecutionEngineMetrics(double physical_size) : physical_size_(physical_size) {
|
||||
}
|
||||
|
||||
~CollectExecutionEngineMetrics() {
|
||||
@ -151,27 +150,28 @@ public:
|
||||
server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size_ / double(total_time));
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
double physical_size_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectSerializeMetrics : CollectMetricsBase {
|
||||
public:
|
||||
CollectSerializeMetrics(size_t size) : size_(size) {
|
||||
public:
|
||||
explicit CollectSerializeMetrics(size_t size) : size_(size) {
|
||||
}
|
||||
|
||||
~CollectSerializeMetrics() {
|
||||
auto total_time = TimeFromBegine();
|
||||
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size_ / total_time);
|
||||
}
|
||||
private:
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectAddMetrics : CollectMetricsBase {
|
||||
public:
|
||||
public:
|
||||
CollectAddMetrics(size_t n, uint16_t dimension) : n_(n), dimension_(dimension) {
|
||||
}
|
||||
|
||||
@ -181,15 +181,16 @@ public:
|
||||
static_cast<int>(dimension_),
|
||||
total_time);
|
||||
}
|
||||
private:
|
||||
|
||||
private:
|
||||
size_t n_;
|
||||
uint16_t dimension_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectDurationMetrics : CollectMetricsBase {
|
||||
public:
|
||||
CollectDurationMetrics(int index_type) : index_type_(index_type) {
|
||||
public:
|
||||
explicit CollectDurationMetrics(int index_type) : index_type_(index_type) {
|
||||
}
|
||||
|
||||
~CollectDurationMetrics() {
|
||||
@ -209,19 +210,20 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
private:
|
||||
|
||||
private:
|
||||
int index_type_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CollectSearchTaskMetrics : CollectMetricsBase {
|
||||
public:
|
||||
CollectSearchTaskMetrics(int index_type) : index_type_(index_type) {
|
||||
public:
|
||||
explicit CollectSearchTaskMetrics(int index_type) : index_type_(index_type) {
|
||||
}
|
||||
|
||||
~CollectSearchTaskMetrics() {
|
||||
auto total_time = TimeFromBegine();
|
||||
switch(index_type_) {
|
||||
switch (index_type_) {
|
||||
case engine::meta::TableFileSchema::RAW: {
|
||||
server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time);
|
||||
break;
|
||||
@ -237,13 +239,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
int index_type_;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class MetricCollector : CollectMetricsBase {
|
||||
public:
|
||||
public:
|
||||
MetricCollector() {
|
||||
server::Metrics::GetInstance().MetaAccessTotalIncrement();
|
||||
}
|
||||
@ -254,11 +256,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace server
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
||||
@ -16,12 +16,14 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
#include "metrics/PrometheusMetrics.h"
|
||||
#include "cache/GpuCacheMgr.h"
|
||||
#include "PrometheusMetrics.h"
|
||||
#include "server/Config.h"
|
||||
#include "utils/Log.h"
|
||||
#include "SystemInfo.h"
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
@ -47,93 +49,96 @@ PrometheusMetrics::Init() {
|
||||
|
||||
// Exposer Registry
|
||||
exposer_ptr_->RegisterCollectable(registry_);
|
||||
} catch (std::exception& ex) {
|
||||
} catch (std::exception &ex) {
|
||||
SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what());
|
||||
return SERVER_UNEXPECTED_ERROR;
|
||||
}
|
||||
|
||||
return SERVER_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PrometheusMetrics::CPUUsagePercentSet() {
|
||||
if(!startup_) return ;
|
||||
PrometheusMetrics::CPUUsagePercentSet() {
|
||||
if (!startup_) return;
|
||||
double usage_percent = server::SystemInfo::GetInstance().CPUPercent();
|
||||
CPU_usage_percent_.Set(usage_percent);
|
||||
}
|
||||
|
||||
void
|
||||
PrometheusMetrics::RAMUsagePercentSet() {
|
||||
if(!startup_) return ;
|
||||
if (!startup_) return;
|
||||
double usage_percent = server::SystemInfo::GetInstance().MemoryPercent();
|
||||
RAM_usage_percent_.Set(usage_percent);
|
||||
}
|
||||
|
||||
void
|
||||
PrometheusMetrics::GPUPercentGaugeSet() {
|
||||
if(!startup_) return;
|
||||
if (!startup_) return;
|
||||
int numDevice = server::SystemInfo::GetInstance().num_device();
|
||||
std::vector<unsigned long long > used_total = server::SystemInfo::GetInstance().GPUMemoryTotal();
|
||||
std::vector<unsigned long long > used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed();
|
||||
std::vector<uint64_t> used_total = server::SystemInfo::GetInstance().GPUMemoryTotal();
|
||||
std::vector<uint64_t> used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed();
|
||||
|
||||
for (int i = 0; i < numDevice; ++i) {
|
||||
prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}});
|
||||
double percent = (double)used_memory[i] / (double)used_total[i];
|
||||
double percent = (double) used_memory[i] / (double) used_total[i];
|
||||
GPU_percent.Set(percent * 100);
|
||||
}
|
||||
}
|
||||
|
||||
void PrometheusMetrics::GPUMemoryUsageGaugeSet() {
|
||||
if(!startup_) return;
|
||||
std::vector<unsigned long long> values = server::SystemInfo::GetInstance().GPUMemoryUsed();
|
||||
constexpr unsigned long long MtoB = 1024*1024;
|
||||
void
|
||||
PrometheusMetrics::GPUMemoryUsageGaugeSet() {
|
||||
if (!startup_) return;
|
||||
std::vector<uint64_t> values = server::SystemInfo::GetInstance().GPUMemoryUsed();
|
||||
constexpr uint64_t MtoB = 1024 * 1024;
|
||||
int numDevice = server::SystemInfo::GetInstance().num_device();
|
||||
|
||||
for (int i = 0; i < numDevice; ++i) {
|
||||
prometheus::Gauge &GPU_memory = GPU_memory_usage_.Add({{"DeviceNum", std::to_string(i)}});
|
||||
GPU_memory.Set(values[i] / MtoB);
|
||||
}
|
||||
|
||||
}
|
||||
void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
|
||||
|
||||
void
|
||||
PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {
|
||||
// MB/s
|
||||
if(!startup_) return;
|
||||
|
||||
long long MtoB = 1024*1024;
|
||||
long long size = num_vector * dim * 4;
|
||||
add_vectors_per_second_gauge_.Set(size/time/MtoB);
|
||||
if (!startup_) return;
|
||||
|
||||
int64_t MtoB = 1024 * 1024;
|
||||
int64_t size = num_vector * dim * 4;
|
||||
add_vectors_per_second_gauge_.Set(size / time / MtoB);
|
||||
}
|
||||
void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) {
|
||||
if(!startup_) return;
|
||||
if(type == "IVF"){
|
||||
|
||||
void
|
||||
PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) {
|
||||
if (!startup_) return;
|
||||
if (type == "IVF") {
|
||||
query_index_IVF_type_per_second_gauge_.Set(value);
|
||||
} else if(type == "IDMap"){
|
||||
} else if (type == "IDMap") {
|
||||
query_index_IDMAP_type_per_second_gauge_.Set(value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void PrometheusMetrics::ConnectionGaugeIncrement() {
|
||||
if(!startup_) return;
|
||||
void
|
||||
PrometheusMetrics::ConnectionGaugeIncrement() {
|
||||
if (!startup_) return;
|
||||
connection_gauge_.Increment();
|
||||
}
|
||||
|
||||
void PrometheusMetrics::ConnectionGaugeDecrement() {
|
||||
if(!startup_) return;
|
||||
void
|
||||
PrometheusMetrics::ConnectionGaugeDecrement() {
|
||||
if (!startup_) return;
|
||||
connection_gauge_.Decrement();
|
||||
}
|
||||
|
||||
void PrometheusMetrics::OctetsSet() {
|
||||
if(!startup_) return;
|
||||
void
|
||||
PrometheusMetrics::OctetsSet() {
|
||||
if (!startup_) return;
|
||||
|
||||
// get old stats and reset them
|
||||
unsigned long long old_inoctets = SystemInfo::GetInstance().get_inoctets();
|
||||
unsigned long long old_outoctets = SystemInfo::GetInstance().get_octets();
|
||||
uint64_t old_inoctets = SystemInfo::GetInstance().get_inoctets();
|
||||
uint64_t old_outoctets = SystemInfo::GetInstance().get_octets();
|
||||
auto old_time = SystemInfo::GetInstance().get_nettime();
|
||||
std::pair<unsigned long long, unsigned long long> in_and_out_octets = SystemInfo::GetInstance().Octets();
|
||||
std::pair<uint64_t, uint64_t> in_and_out_octets = SystemInfo::GetInstance().Octets();
|
||||
SystemInfo::GetInstance().set_inoctets(in_and_out_octets.first);
|
||||
SystemInfo::GetInstance().set_outoctets(in_and_out_octets.second);
|
||||
SystemInfo::GetInstance().set_nettime();
|
||||
@ -142,13 +147,14 @@ void PrometheusMetrics::OctetsSet() {
|
||||
constexpr double micro_to_second = 1e-6;
|
||||
auto now_time = std::chrono::system_clock::now();
|
||||
auto total_microsecond = METRICS_MICROSECONDS(old_time, now_time);
|
||||
auto total_second = total_microsecond*micro_to_second;
|
||||
if(total_second == 0) return;
|
||||
inoctets_gauge_.Set((in_and_out_octets.first-old_inoctets)/total_second);
|
||||
outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second);
|
||||
auto total_second = total_microsecond * micro_to_second;
|
||||
if (total_second == 0) return;
|
||||
inoctets_gauge_.Set((in_and_out_octets.first - old_inoctets) / total_second);
|
||||
outoctets_gauge_.Set((in_and_out_octets.second - old_outoctets) / total_second);
|
||||
}
|
||||
|
||||
void PrometheusMetrics::CPUCoreUsagePercentSet() {
|
||||
void
|
||||
PrometheusMetrics::CPUCoreUsagePercentSet() {
|
||||
if (!startup_)
|
||||
return;
|
||||
|
||||
@ -160,11 +166,12 @@ void PrometheusMetrics::CPUCoreUsagePercentSet() {
|
||||
}
|
||||
}
|
||||
|
||||
void PrometheusMetrics::GPUTemperature() {
|
||||
void
|
||||
PrometheusMetrics::GPUTemperature() {
|
||||
if (!startup_)
|
||||
return;
|
||||
|
||||
std::vector<unsigned int> GPU_temperatures = server::SystemInfo::GetInstance().GPUTemperature();
|
||||
std::vector<uint64_t> GPU_temperatures = server::SystemInfo::GetInstance().GPUTemperature();
|
||||
|
||||
for (int i = 0; i < GPU_temperatures.size(); ++i) {
|
||||
prometheus::Gauge &gpu_temp = GPU_temperature_.Add({{"GPU", std::to_string(i)}});
|
||||
@ -172,7 +179,8 @@ void PrometheusMetrics::GPUTemperature() {
|
||||
}
|
||||
}
|
||||
|
||||
void PrometheusMetrics::CPUTemperature() {
|
||||
void
|
||||
PrometheusMetrics::CPUTemperature() {
|
||||
if (!startup_)
|
||||
return;
|
||||
|
||||
@ -184,7 +192,8 @@ void PrometheusMetrics::CPUTemperature() {
|
||||
}
|
||||
}
|
||||
|
||||
void PrometheusMetrics::GpuCacheUsageGaugeSet() {
|
||||
void
|
||||
PrometheusMetrics::GpuCacheUsageGaugeSet() {
|
||||
// std::vector<uint64_t > gpu_ids = {0};
|
||||
// for(auto i = 0; i < gpu_ids.size(); ++i) {
|
||||
// uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage();
|
||||
@ -194,6 +203,6 @@ void PrometheusMetrics::GpuCacheUsageGaugeSet() {
|
||||
// }
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace server
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
||||
@ -22,22 +22,20 @@
|
||||
#include <prometheus/registry.h>
|
||||
#include <prometheus/exposer.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "utils/Error.h"
|
||||
#include "MetricBase.h"
|
||||
|
||||
|
||||
#define METRICS_NOW_TIME std::chrono::system_clock::now()
|
||||
//#define server::Metrics::GetInstance() server::GetInstance()
|
||||
#define METRICS_MICROSECONDS(a,b) (std::chrono::duration_cast<std::chrono::microseconds> (b-a)).count();
|
||||
|
||||
#define METRICS_MICROSECONDS(a, b) (std::chrono::duration_cast<std::chrono::microseconds> (b-a)).count();
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
|
||||
class PrometheusMetrics: public MetricsBase {
|
||||
|
||||
class PrometheusMetrics : public MetricsBase {
|
||||
public:
|
||||
static PrometheusMetrics &
|
||||
GetInstance() {
|
||||
@ -51,59 +49,215 @@ class PrometheusMetrics: public MetricsBase {
|
||||
std::shared_ptr<prometheus::Exposer> exposer_ptr_;
|
||||
std::shared_ptr<prometheus::Registry> registry_ = std::make_shared<prometheus::Registry>();
|
||||
bool startup_ = false;
|
||||
|
||||
public:
|
||||
void SetStartup(bool startup) {startup_ = startup;};
|
||||
void AddVectorsSuccessTotalIncrement(double value = 1.0) override { if(startup_) add_vectors_success_total_.Increment(value);};
|
||||
void AddVectorsFailTotalIncrement(double value = 1.0) override { if(startup_) add_vectors_fail_total_.Increment(value);};
|
||||
void AddVectorsDurationHistogramOberve(double value) override { if(startup_) add_vectors_duration_histogram_.Observe(value);};
|
||||
void RawFileSizeHistogramObserve(double value) override { if(startup_) raw_files_size_histogram_.Observe(value);};
|
||||
void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);};
|
||||
void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);};
|
||||
void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);};
|
||||
void SetStartup(bool startup) {
|
||||
startup_ = startup;
|
||||
}
|
||||
|
||||
void AddVectorsSuccessTotalIncrement(double value = 1.0) override {
|
||||
if (startup_) {
|
||||
add_vectors_success_total_.Increment(value);
|
||||
}
|
||||
}
|
||||
|
||||
void AddVectorsFailTotalIncrement(double value = 1.0) override {
|
||||
if (startup_) {
|
||||
add_vectors_fail_total_.Increment(value);
|
||||
}
|
||||
}
|
||||
|
||||
void AddVectorsDurationHistogramOberve(double value) override {
|
||||
if (startup_) {
|
||||
add_vectors_duration_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void RawFileSizeHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
raw_files_size_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void IndexFileSizeHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
index_files_size_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void BuildIndexDurationSecondsHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
build_index_duration_seconds_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void CpuCacheUsageGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
cpu_cache_usage_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void GpuCacheUsageGaugeSet() override;
|
||||
|
||||
void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);};
|
||||
void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);};
|
||||
void MetaAccessTotalIncrement(double value = 1) override {
|
||||
if (startup_) {
|
||||
meta_access_total_.Increment(value);
|
||||
}
|
||||
}
|
||||
|
||||
void FaissDiskLoadDurationSecondsHistogramObserve(double value) override { if(startup_) faiss_disk_load_duration_seconds_histogram_.Observe(value);};
|
||||
void FaissDiskLoadSizeBytesHistogramObserve(double value) override { if(startup_) faiss_disk_load_size_bytes_histogram_.Observe(value);};
|
||||
void FaissDiskLoadIOSpeedGaugeSet(double value) override { if(startup_) faiss_disk_load_IO_speed_gauge_.Set(value);};
|
||||
void MetaAccessDurationSecondsHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
meta_access_duration_seconds_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void CacheAccessTotalIncrement(double value = 1) override { if(startup_) cache_access_total_.Increment(value);};
|
||||
void MemTableMergeDurationSecondsHistogramObserve(double value) override { if(startup_) mem_table_merge_duration_seconds_histogram_.Observe(value);};
|
||||
void SearchIndexDataDurationSecondsHistogramObserve(double value) override { if(startup_) search_index_data_duration_seconds_histogram_.Observe(value);};
|
||||
void SearchRawDataDurationSecondsHistogramObserve(double value) override { if(startup_) search_raw_data_duration_seconds_histogram_.Observe(value);};
|
||||
void IndexFileSizeTotalIncrement(double value = 1) override { if(startup_) index_file_size_total_.Increment(value);};
|
||||
void RawFileSizeTotalIncrement(double value = 1) override { if(startup_) raw_file_size_total_.Increment(value);};
|
||||
void IndexFileSizeGaugeSet(double value) override { if(startup_) index_file_size_gauge_.Set(value);};
|
||||
void RawFileSizeGaugeSet(double value) override { if(startup_) raw_file_size_gauge_.Set(value);};
|
||||
void QueryResponseSummaryObserve(double value) override {if(startup_) query_response_summary_.Observe(value);};
|
||||
void DiskStoreIOSpeedGaugeSet(double value) override { if(startup_) disk_store_IO_speed_gauge_.Set(value);};
|
||||
void DataFileSizeGaugeSet(double value) override { if(startup_) data_file_size_gauge_.Set(value);};
|
||||
void AddVectorsSuccessGaugeSet(double value) override { if(startup_) add_vectors_success_gauge_.Set(value);};
|
||||
void AddVectorsFailGaugeSet(double value) override { if(startup_) add_vectors_fail_gauge_.Set(value);};
|
||||
void QueryVectorResponseSummaryObserve(double value, int count = 1) override { if (startup_) for(int i = 0 ; i < count ; ++i) query_vector_response_summary_.Observe(value);};
|
||||
void QueryVectorResponsePerSecondGaugeSet(double value) override {if (startup_) query_vector_response_per_second_gauge_.Set(value);};
|
||||
void CPUUsagePercentSet() override ;
|
||||
void FaissDiskLoadDurationSecondsHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
faiss_disk_load_duration_seconds_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void FaissDiskLoadSizeBytesHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
faiss_disk_load_size_bytes_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void FaissDiskLoadIOSpeedGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
faiss_disk_load_IO_speed_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void CacheAccessTotalIncrement(double value = 1) override {
|
||||
if (startup_) {
|
||||
cache_access_total_.Increment(value);
|
||||
}
|
||||
}
|
||||
|
||||
void MemTableMergeDurationSecondsHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
mem_table_merge_duration_seconds_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void SearchIndexDataDurationSecondsHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
search_index_data_duration_seconds_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void SearchRawDataDurationSecondsHistogramObserve(double value) override {
|
||||
if (startup_) {
|
||||
search_raw_data_duration_seconds_histogram_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void IndexFileSizeTotalIncrement(double value = 1) override {
|
||||
if (startup_) {
|
||||
index_file_size_total_.Increment(value);
|
||||
}
|
||||
}
|
||||
|
||||
void RawFileSizeTotalIncrement(double value = 1) override {
|
||||
if (startup_) {
|
||||
raw_file_size_total_.Increment(value);
|
||||
}
|
||||
}
|
||||
|
||||
void IndexFileSizeGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
index_file_size_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void RawFileSizeGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
raw_file_size_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryResponseSummaryObserve(double value) override {
|
||||
if (startup_) {
|
||||
query_response_summary_.Observe(value);
|
||||
}
|
||||
}
|
||||
|
||||
void DiskStoreIOSpeedGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
disk_store_IO_speed_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void DataFileSizeGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
data_file_size_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void AddVectorsSuccessGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
add_vectors_success_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void AddVectorsFailGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
add_vectors_fail_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryVectorResponseSummaryObserve(double value, int count = 1) override {
|
||||
if (startup_) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
query_vector_response_summary_.Observe(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryVectorResponsePerSecondGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
query_vector_response_per_second_gauge_.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void CPUUsagePercentSet() override;
|
||||
void CPUCoreUsagePercentSet() override;
|
||||
|
||||
void RAMUsagePercentSet() override ;
|
||||
void QueryResponsePerSecondGaugeSet(double value) override {if(startup_) query_response_per_second_gauge.Set(value);};
|
||||
void GPUPercentGaugeSet() override ;
|
||||
void GPUMemoryUsageGaugeSet() override ;
|
||||
void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) override ;
|
||||
void QueryIndexTypePerSecondSet(std::string type, double value) override ;
|
||||
void ConnectionGaugeIncrement() override ;
|
||||
void ConnectionGaugeDecrement() override ;
|
||||
void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);};
|
||||
void OctetsSet() override ;
|
||||
void RAMUsagePercentSet() override;
|
||||
|
||||
void QueryResponsePerSecondGaugeSet(double value) override {
|
||||
if (startup_) {
|
||||
query_response_per_second_gauge.Set(value);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUPercentGaugeSet() override;
|
||||
void GPUMemoryUsageGaugeSet() override;
|
||||
void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) override;
|
||||
void QueryIndexTypePerSecondSet(std::string type, double value) override;
|
||||
void ConnectionGaugeIncrement() override;
|
||||
void ConnectionGaugeDecrement() override;
|
||||
|
||||
void KeepingAliveCounterIncrement(double value = 1) override {
|
||||
if (startup_) {
|
||||
keeping_alive_counter_.Increment(value);
|
||||
}
|
||||
}
|
||||
|
||||
void OctetsSet() override;
|
||||
|
||||
void GPUTemperature() override;
|
||||
void CPUTemperature() override;
|
||||
|
||||
std::shared_ptr<prometheus::Exposer> &exposer_ptr() {return exposer_ptr_; }
|
||||
std::shared_ptr<prometheus::Exposer> &exposer_ptr() {
|
||||
return exposer_ptr_;
|
||||
}
|
||||
|
||||
// prometheus::Exposer& exposer() { return exposer_;}
|
||||
std::shared_ptr<prometheus::Registry> ®istry_ptr() {return registry_; }
|
||||
std::shared_ptr<prometheus::Registry> ®istry_ptr() {
|
||||
return registry_;
|
||||
}
|
||||
|
||||
// .....
|
||||
private:
|
||||
@ -125,7 +279,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
prometheus::Counter &add_group_success_total_ = add_group_request_.Add({{"outcome", "success"}});
|
||||
prometheus::Counter &add_group_fail_total_ = add_group_request_.Add({{"outcome", "fail"}});
|
||||
|
||||
|
||||
//record get_group request
|
||||
prometheus::Family<prometheus::Counter> &get_group_request_ = prometheus::BuildCounter()
|
||||
.Name("get_group_request_total")
|
||||
@ -135,7 +288,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
prometheus::Counter &get_group_success_total_ = get_group_request_.Add({{"outcome", "success"}});
|
||||
prometheus::Counter &get_group_fail_total_ = get_group_request_.Add({{"outcome", "fail"}});
|
||||
|
||||
|
||||
//record has_group request
|
||||
prometheus::Family<prometheus::Counter> &has_group_request_ = prometheus::BuildCounter()
|
||||
.Name("has_group_request_total")
|
||||
@ -145,7 +297,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
prometheus::Counter &has_group_success_total_ = has_group_request_.Add({{"outcome", "success"}});
|
||||
prometheus::Counter &has_group_fail_total_ = has_group_request_.Add({{"outcome", "fail"}});
|
||||
|
||||
|
||||
//record get_group_files
|
||||
prometheus::Family<prometheus::Counter> &get_group_files_request_ = prometheus::BuildCounter()
|
||||
.Name("get_group_files_request_total")
|
||||
@ -155,7 +306,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
prometheus::Counter &get_group_files_success_total_ = get_group_files_request_.Add({{"outcome", "success"}});
|
||||
prometheus::Counter &get_group_files_fail_total_ = get_group_files_request_.Add({{"outcome", "fail"}});
|
||||
|
||||
|
||||
//record add_vectors count and average time
|
||||
//need to be considered
|
||||
prometheus::Family<prometheus::Counter> &add_vectors_request_ = prometheus::BuildCounter()
|
||||
@ -169,36 +319,39 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Name("add_vector_duration_microseconds")
|
||||
.Help("average time of adding every vector")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &add_vectors_duration_histogram_ = add_vectors_duration_seconds_.Add({}, BucketBoundaries{0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.08, 0.1, 0.5, 1});
|
||||
|
||||
prometheus::Histogram &add_vectors_duration_histogram_ =
|
||||
add_vectors_duration_seconds_.Add({}, BucketBoundaries{0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.08, 0.1, 0.5, 1});
|
||||
|
||||
//record search count and average time
|
||||
prometheus::Family<prometheus::Counter> &search_request_ = prometheus::BuildCounter()
|
||||
.Name("search_request_total")
|
||||
.Help("the number of search request")
|
||||
.Register(*registry_);
|
||||
prometheus::Counter &search_success_total_ = search_request_.Add({{"outcome","success"}});
|
||||
prometheus::Counter &search_fail_total_ = search_request_.Add({{"outcome","fail"}});
|
||||
prometheus::Counter &search_success_total_ = search_request_.Add({{"outcome", "success"}});
|
||||
prometheus::Counter &search_fail_total_ = search_request_.Add({{"outcome", "fail"}});
|
||||
|
||||
prometheus::Family<prometheus::Histogram> &search_request_duration_seconds_ = prometheus::BuildHistogram()
|
||||
.Name("search_request_duration_microsecond")
|
||||
.Help("histogram of processing time for each search")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &search_duration_histogram_ = search_request_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0});
|
||||
prometheus::Histogram
|
||||
&search_duration_histogram_ = search_request_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0});
|
||||
|
||||
//record raw_files size histogram
|
||||
prometheus::Family<prometheus::Histogram> &raw_files_size_ = prometheus::BuildHistogram()
|
||||
.Name("search_raw_files_bytes")
|
||||
.Help("histogram of raw files size by bytes")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &raw_files_size_histogram_ = raw_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10});
|
||||
prometheus::Histogram
|
||||
&raw_files_size_histogram_ = raw_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10});
|
||||
|
||||
//record index_files size histogram
|
||||
prometheus::Family<prometheus::Histogram> &index_files_size_ = prometheus::BuildHistogram()
|
||||
.Name("search_index_files_bytes")
|
||||
.Help("histogram of index files size by bytes")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &index_files_size_histogram_ = index_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10});
|
||||
prometheus::Histogram
|
||||
&index_files_size_histogram_ = index_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10});
|
||||
|
||||
//record index and raw files size counter
|
||||
prometheus::Family<prometheus::Counter> &file_size_total_ = prometheus::BuildCounter()
|
||||
@ -221,30 +374,34 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Name("build_index_duration_microseconds")
|
||||
.Help("histogram of processing time for building index")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{5e5, 2e6, 4e6, 6e6, 8e6, 1e7});
|
||||
|
||||
prometheus::Histogram &build_index_duration_seconds_histogram_ =
|
||||
build_index_duration_seconds_.Add({}, BucketBoundaries{5e5, 2e6, 4e6, 6e6, 8e6, 1e7});
|
||||
|
||||
//record processing time for all building index
|
||||
prometheus::Family<prometheus::Histogram> &all_build_index_duration_seconds_ = prometheus::BuildHistogram()
|
||||
.Name("all_build_index_duration_microseconds")
|
||||
.Help("histogram of processing time for building index")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &all_build_index_duration_seconds_histogram_ = all_build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7});
|
||||
prometheus::Histogram &all_build_index_duration_seconds_histogram_ =
|
||||
all_build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7});
|
||||
|
||||
//record duration of merging mem table
|
||||
prometheus::Family<prometheus::Histogram> &mem_table_merge_duration_seconds_ = prometheus::BuildHistogram()
|
||||
.Name("mem_table_merge_duration_microseconds")
|
||||
.Help("histogram of processing time for merging mem tables")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &mem_table_merge_duration_seconds_histogram_ = mem_table_merge_duration_seconds_.Add({}, BucketBoundaries{5e4, 1e5, 2e5, 4e5, 6e5, 8e5, 1e6});
|
||||
prometheus::Histogram &mem_table_merge_duration_seconds_histogram_ =
|
||||
mem_table_merge_duration_seconds_.Add({}, BucketBoundaries{5e4, 1e5, 2e5, 4e5, 6e5, 8e5, 1e6});
|
||||
|
||||
//record search index and raw data duration
|
||||
prometheus::Family<prometheus::Histogram> &search_data_duration_seconds_ = prometheus::BuildHistogram()
|
||||
.Name("search_data_duration_microseconds")
|
||||
.Help("histograms of processing time for search index and raw data")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &search_index_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "index"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5});
|
||||
prometheus::Histogram &search_raw_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "raw"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5});
|
||||
prometheus::Histogram &search_index_data_duration_seconds_histogram_ =
|
||||
search_data_duration_seconds_.Add({{"type", "index"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5});
|
||||
prometheus::Histogram &search_raw_data_duration_seconds_histogram_ =
|
||||
search_data_duration_seconds_.Add({{"type", "raw"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5});
|
||||
|
||||
|
||||
////all form Cache.cpp
|
||||
@ -263,7 +420,8 @@ class PrometheusMetrics: public MetricsBase {
|
||||
// .Name("meta_visit_duration_seconds")
|
||||
// .Help("histogram of processing time to get data from mata")
|
||||
// .Register(*registry_);
|
||||
// prometheus::Histogram &meta_visit_duration_seconds_histogram_ = meta_visit_duration_seconds_.Add({{}}, BucketBoundaries{0.1, 1.0, 10.0});
|
||||
// prometheus::Histogram &meta_visit_duration_seconds_histogram_ =
|
||||
// meta_visit_duration_seconds_.Add({{}}, BucketBoundaries{0.1, 1.0, 10.0});
|
||||
|
||||
|
||||
////all from MemManager.cpp
|
||||
@ -281,8 +439,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &mem_usage_total_gauge_ = mem_usage_total_.Add({});
|
||||
|
||||
|
||||
|
||||
////all from DBMetaImpl.cpp
|
||||
//record meta access count
|
||||
prometheus::Family<prometheus::Counter> &meta_access_ = prometheus::BuildCounter()
|
||||
@ -296,9 +452,8 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Name("meta_access_duration_microseconds")
|
||||
.Help("histogram of processing time for accessing mata")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &meta_access_duration_seconds_histogram_ = meta_access_duration_seconds_.Add({}, BucketBoundaries{100, 300, 500, 700, 900, 2000, 4000, 6000, 8000, 20000});
|
||||
|
||||
|
||||
prometheus::Histogram &meta_access_duration_seconds_histogram_ =
|
||||
meta_access_duration_seconds_.Add({}, BucketBoundaries{100, 300, 500, 700, 900, 2000, 4000, 6000, 8000, 20000});
|
||||
|
||||
////all from FaissExecutionEngine.cpp
|
||||
//record data loading from disk count, size, duration, IO speed
|
||||
@ -306,26 +461,28 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Name("disk_load_duration_microseconds")
|
||||
.Help("Histogram of processing time for loading data from disk")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &faiss_disk_load_duration_seconds_histogram_ = disk_load_duration_second_.Add({{"DB","Faiss"}},BucketBoundaries{2e5, 4e5, 6e5 , 8e5});
|
||||
prometheus::Histogram &faiss_disk_load_duration_seconds_histogram_ =
|
||||
disk_load_duration_second_.Add({{"DB", "Faiss"}}, BucketBoundaries{2e5, 4e5, 6e5, 8e5});
|
||||
|
||||
prometheus::Family<prometheus::Histogram> &disk_load_size_bytes_ = prometheus::BuildHistogram()
|
||||
.Name("disk_load_size_bytes")
|
||||
.Help("Histogram of data size by bytes for loading data from disk")
|
||||
.Register(*registry_);
|
||||
prometheus::Histogram &faiss_disk_load_size_bytes_histogram_ = disk_load_size_bytes_.Add({{"DB","Faiss"}},BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9});
|
||||
prometheus::Histogram &faiss_disk_load_size_bytes_histogram_ =
|
||||
disk_load_size_bytes_.Add({{"DB", "Faiss"}}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9});
|
||||
|
||||
// prometheus::Family<prometheus::Histogram> &disk_load_IO_speed_ = prometheus::BuildHistogram()
|
||||
// .Name("disk_load_IO_speed_byte_per_sec")
|
||||
// .Help("Histogram of IO speed for loading data from disk")
|
||||
// .Register(*registry_);
|
||||
// prometheus::Histogram &faiss_disk_load_IO_speed_histogram_ = disk_load_IO_speed_.Add({{"DB","Faiss"}},BucketBoundaries{1000, 2000, 3000, 4000, 6000, 8000});
|
||||
// prometheus::Histogram &faiss_disk_load_IO_speed_histogram_ =
|
||||
// disk_load_IO_speed_.Add({{"DB","Faiss"}},BucketBoundaries{1000, 2000, 3000, 4000, 6000, 8000});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &faiss_disk_load_IO_speed_ = prometheus::BuildGauge()
|
||||
.Name("disk_load_IO_speed_byte_per_microsec")
|
||||
.Help("disk IO speed ")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &faiss_disk_load_IO_speed_gauge_ = faiss_disk_load_IO_speed_.Add({{"DB","Faiss"}});
|
||||
|
||||
prometheus::Gauge &faiss_disk_load_IO_speed_gauge_ = faiss_disk_load_IO_speed_.Add({{"DB", "Faiss"}});
|
||||
|
||||
////all from CacheMgr.cpp
|
||||
//record cache access count
|
||||
@ -344,9 +501,9 @@ class PrometheusMetrics: public MetricsBase {
|
||||
|
||||
//record GPU cache usage and %
|
||||
prometheus::Family<prometheus::Gauge> &gpu_cache_usage_ = prometheus::BuildGauge()
|
||||
.Name("gpu_cache_usage_bytes")
|
||||
.Help("current gpu cache usage by bytes")
|
||||
.Register(*registry_);
|
||||
.Name("gpu_cache_usage_bytes")
|
||||
.Help("current gpu cache usage by bytes")
|
||||
.Register(*registry_);
|
||||
|
||||
// record query response
|
||||
using Quantiles = std::vector<prometheus::detail::CKMSQuantiles::Quantile>;
|
||||
@ -354,18 +511,21 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Name("query_response_summary")
|
||||
.Help("query response summary")
|
||||
.Register(*registry_);
|
||||
prometheus::Summary &query_response_summary_ = query_response_.Add({}, Quantiles{{0.95,0.00},{0.9,0.05},{0.8,0.1}});
|
||||
prometheus::Summary
|
||||
&query_response_summary_ = query_response_.Add({}, Quantiles{{0.95, 0.00}, {0.9, 0.05}, {0.8, 0.1}});
|
||||
|
||||
prometheus::Family<prometheus::Summary> &query_vector_response_ = prometheus::BuildSummary()
|
||||
.Name("query_vector_response_summary")
|
||||
.Help("query each vector response summary")
|
||||
.Register(*registry_);
|
||||
prometheus::Summary &query_vector_response_summary_ = query_vector_response_.Add({}, Quantiles{{0.95,0.00},{0.9,0.05},{0.8,0.1}});
|
||||
prometheus::Summary &query_vector_response_summary_ =
|
||||
query_vector_response_.Add({}, Quantiles{{0.95, 0.00}, {0.9, 0.05}, {0.8, 0.1}});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &query_vector_response_per_second_ = prometheus::BuildGauge()
|
||||
.Name("query_vector_response_per_microsecond")
|
||||
.Help("the number of vectors can be queried every second ")
|
||||
.Register(*registry_); prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({});
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &query_response_per_second_ = prometheus::BuildGauge()
|
||||
.Name("query_response_per_microsecond")
|
||||
@ -404,7 +564,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({{"CPU", "avg"}});
|
||||
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &RAM_ = prometheus::BuildGauge()
|
||||
.Name("RAM_usage_percent")
|
||||
.Help("RAM usage percent by this process")
|
||||
@ -427,8 +586,10 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Name("query_index_throughtout_per_microsecond")
|
||||
.Help("query index throughtout per microsecond")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &query_index_IVF_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IVF"}});
|
||||
prometheus::Gauge &query_index_IDMAP_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IDMAP"}});
|
||||
prometheus::Gauge
|
||||
&query_index_IVF_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType", "IVF"}});
|
||||
prometheus::Gauge
|
||||
&query_index_IDMAP_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType", "IDMAP"}});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &connection_ = prometheus::BuildGauge()
|
||||
.Name("connection_number")
|
||||
@ -449,7 +610,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
prometheus::Gauge &inoctets_gauge_ = octets_.Add({{"type", "inoctets"}});
|
||||
prometheus::Gauge &outoctets_gauge_ = octets_.Add({{"type", "outoctets"}});
|
||||
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &GPU_temperature_ = prometheus::BuildGauge()
|
||||
.Name("GPU_temperature")
|
||||
.Help("GPU temperature")
|
||||
@ -461,9 +621,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Register(*registry_);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace server
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
||||
@ -16,29 +16,28 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
#include "SystemInfo.h"
|
||||
#include "metrics/SystemInfo.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "nvml.h"
|
||||
//#include <mutex>
|
||||
//
|
||||
//std::mutex mutex;
|
||||
|
||||
#include <nvml.h>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
namespace server {
|
||||
|
||||
void SystemInfo::Init() {
|
||||
if(initialized_) return;
|
||||
void
|
||||
SystemInfo::Init() {
|
||||
if (initialized_) return;
|
||||
|
||||
initialized_ = true;
|
||||
|
||||
// initialize CPU information
|
||||
FILE* file;
|
||||
FILE *file;
|
||||
struct tms time_sample;
|
||||
char line[128];
|
||||
last_cpu_ = times(&time_sample);
|
||||
@ -46,7 +45,7 @@ void SystemInfo::Init() {
|
||||
last_user_cpu_ = time_sample.tms_utime;
|
||||
file = fopen("/proc/cpuinfo", "r");
|
||||
num_processors_ = 0;
|
||||
while(fgets(line, 128, file) != NULL){
|
||||
while (fgets(line, 128, file) != NULL) {
|
||||
if (strncmp(line, "processor", 9) == 0) num_processors_++;
|
||||
if (strncmp(line, "physical", 8) == 0) {
|
||||
num_physical_processors_ = ParseLine(line);
|
||||
@ -58,24 +57,24 @@ void SystemInfo::Init() {
|
||||
//initialize GPU information
|
||||
nvmlReturn_t nvmlresult;
|
||||
nvmlresult = nvmlInit();
|
||||
if(NVML_SUCCESS != nvmlresult) {
|
||||
if (NVML_SUCCESS != nvmlresult) {
|
||||
printf("System information initilization failed");
|
||||
return ;
|
||||
return;
|
||||
}
|
||||
nvmlresult = nvmlDeviceGetCount(&num_device_);
|
||||
if(NVML_SUCCESS != nvmlresult) {
|
||||
if (NVML_SUCCESS != nvmlresult) {
|
||||
printf("Unable to get devidce number");
|
||||
return ;
|
||||
return;
|
||||
}
|
||||
|
||||
//initialize network traffic information
|
||||
std::pair<unsigned long long, unsigned long long> in_and_out_octets = Octets();
|
||||
std::pair<uint64_t, uint64_t> in_and_out_octets = Octets();
|
||||
in_octets_ = in_and_out_octets.first;
|
||||
out_octets_ = in_and_out_octets.second;
|
||||
net_time_ = std::chrono::system_clock::now();
|
||||
}
|
||||
|
||||
long long
|
||||
uint64_t
|
||||
SystemInfo::ParseLine(char *line) {
|
||||
// This assumes that a digit will be found and the line ends in " Kb".
|
||||
int i = strlen(line);
|
||||
@ -83,53 +82,52 @@ SystemInfo::ParseLine(char *line) {
|
||||
while (*p < '0' || *p > '9') p++;
|
||||
line[i - 3] = '\0';
|
||||
i = atoi(p);
|
||||
return static_cast<long long>(i);
|
||||
return static_cast<uint64_t>(i);
|
||||
}
|
||||
|
||||
unsigned long
|
||||
uint64_t
|
||||
SystemInfo::GetPhysicalMemory() {
|
||||
struct sysinfo memInfo;
|
||||
sysinfo (&memInfo);
|
||||
unsigned long totalPhysMem = memInfo.totalram;
|
||||
sysinfo(&memInfo);
|
||||
uint64_t totalPhysMem = memInfo.totalram;
|
||||
//Multiply in next statement to avoid int overflow on right hand side...
|
||||
totalPhysMem *= memInfo.mem_unit;
|
||||
return totalPhysMem;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
uint64_t
|
||||
SystemInfo::GetProcessUsedMemory() {
|
||||
//Note: this value is in KB!
|
||||
FILE* file = fopen("/proc/self/status", "r");
|
||||
constexpr int64_t line_length = 128;
|
||||
long long result = -1;
|
||||
constexpr int64_t KB_SIZE = 1024;
|
||||
FILE *file = fopen("/proc/self/status", "r");
|
||||
constexpr uint64_t line_length = 128;
|
||||
uint64_t result = -1;
|
||||
constexpr uint64_t KB_SIZE = 1024;
|
||||
char line[line_length];
|
||||
|
||||
while (fgets(line, line_length, file) != NULL){
|
||||
if (strncmp(line, "VmRSS:", 6) == 0){
|
||||
while (fgets(line, line_length, file) != NULL) {
|
||||
if (strncmp(line, "VmRSS:", 6) == 0) {
|
||||
result = ParseLine(line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
// return value in Byte
|
||||
return (result*KB_SIZE);
|
||||
|
||||
return (result * KB_SIZE);
|
||||
}
|
||||
|
||||
double
|
||||
SystemInfo::MemoryPercent() {
|
||||
if (!initialized_) Init();
|
||||
return (double)(GetProcessUsedMemory()*100)/(double)total_ram_;
|
||||
return (double) (GetProcessUsedMemory() * 100) / (double) total_ram_;
|
||||
}
|
||||
|
||||
std::vector<double>
|
||||
SystemInfo::CPUCorePercent() {
|
||||
std::vector<unsigned long long> prev_work_time_array;
|
||||
std::vector<unsigned long long> prev_total_time_array = getTotalCpuTime(prev_work_time_array);
|
||||
std::vector<uint64_t> prev_work_time_array;
|
||||
std::vector<uint64_t> prev_total_time_array = getTotalCpuTime(prev_work_time_array);
|
||||
usleep(100000);
|
||||
std::vector<unsigned long long> cur_work_time_array;
|
||||
std::vector<unsigned long long> cur_total_time_array = getTotalCpuTime(cur_work_time_array);
|
||||
std::vector<uint64_t> cur_work_time_array;
|
||||
std::vector<uint64_t> cur_total_time_array = getTotalCpuTime(cur_work_time_array);
|
||||
|
||||
std::vector<double> cpu_core_percent;
|
||||
for (int i = 1; i < num_processors_; i++) {
|
||||
@ -140,22 +138,21 @@ SystemInfo::CPUCorePercent() {
|
||||
return cpu_core_percent;
|
||||
}
|
||||
|
||||
std::vector<unsigned long long>
|
||||
SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array)
|
||||
{
|
||||
std::vector<unsigned long long> total_time_array;
|
||||
FILE* file = fopen("/proc/stat", "r");
|
||||
std::vector<uint64_t>
|
||||
SystemInfo::getTotalCpuTime(std::vector<uint64_t> &work_time_array) {
|
||||
std::vector<uint64_t> total_time_array;
|
||||
FILE *file = fopen("/proc/stat", "r");
|
||||
if (file == NULL) {
|
||||
perror("Could not open stat file");
|
||||
return total_time_array;
|
||||
}
|
||||
|
||||
unsigned long long user = 0, nice = 0, system = 0, idle = 0;
|
||||
unsigned long long iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0;
|
||||
uint64_t user = 0, nice = 0, system = 0, idle = 0;
|
||||
uint64_t iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0;
|
||||
|
||||
for (int i = 0; i < num_processors_; i++) {
|
||||
char buffer[1024];
|
||||
char* ret = fgets(buffer, sizeof(buffer) - 1, file);
|
||||
char *ret = fgets(buffer, sizeof(buffer) - 1, file);
|
||||
if (ret == NULL) {
|
||||
perror("Could not read stat file");
|
||||
fclose(file);
|
||||
@ -163,7 +160,7 @@ SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array)
|
||||
}
|
||||
|
||||
sscanf(buffer,
|
||||
"cpu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu",
|
||||
"cpu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu",
|
||||
&user, &nice, &system, &idle, &iowait, &irq, &softirq, &steal, &guest, &guestnice);
|
||||
|
||||
work_time_array.push_back(user + nice + system);
|
||||
@ -174,9 +171,6 @@ SystemInfo::getTotalCpuTime(std::vector<unsigned long long> &work_time_array)
|
||||
return total_time_array;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
double
|
||||
SystemInfo::CPUPercent() {
|
||||
if (!initialized_) Init();
|
||||
@ -186,11 +180,10 @@ SystemInfo::CPUPercent() {
|
||||
|
||||
now = times(&time_sample);
|
||||
if (now <= last_cpu_ || time_sample.tms_stime < last_sys_cpu_ ||
|
||||
time_sample.tms_utime < last_user_cpu_){
|
||||
time_sample.tms_utime < last_user_cpu_) {
|
||||
//Overflow detection. Just skip this value.
|
||||
percent = -1.0;
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
percent = (time_sample.tms_stime - last_sys_cpu_) +
|
||||
(time_sample.tms_utime - last_user_cpu_);
|
||||
percent /= (now - last_cpu_);
|
||||
@ -203,12 +196,11 @@ SystemInfo::CPUPercent() {
|
||||
return percent;
|
||||
}
|
||||
|
||||
|
||||
std::vector<unsigned long long>
|
||||
std::vector<uint64_t>
|
||||
SystemInfo::GPUMemoryTotal() {
|
||||
// get GPU usage percent
|
||||
if(!initialized_) Init();
|
||||
std::vector<unsigned long long > result;
|
||||
if (!initialized_) Init();
|
||||
std::vector<uint64_t> result;
|
||||
nvmlMemory_t nvmlMemory;
|
||||
for (int i = 0; i < num_device_; ++i) {
|
||||
nvmlDevice_t device;
|
||||
@ -219,21 +211,22 @@ SystemInfo::GPUMemoryTotal() {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<unsigned int>
|
||||
SystemInfo::GPUTemperature(){
|
||||
if(!initialized_) Init();
|
||||
std::vector<unsigned int > result;
|
||||
std::vector<uint64_t>
|
||||
SystemInfo::GPUTemperature() {
|
||||
if (!initialized_) Init();
|
||||
std::vector<uint64_t> result;
|
||||
for (int i = 0; i < num_device_; i++) {
|
||||
nvmlDevice_t device;
|
||||
nvmlDeviceGetHandleByIndex(i, &device);
|
||||
unsigned int temp;
|
||||
nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU,&temp);
|
||||
nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp);
|
||||
result.push_back(temp);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<float>
|
||||
SystemInfo::CPUTemperature(){
|
||||
SystemInfo::CPUTemperature() {
|
||||
std::vector<float> result;
|
||||
for (int i = 0; i <= num_physical_processors_; ++i) {
|
||||
std::string path = "/sys/class/thermal/thermal_zone" + std::to_string(i) + "/temp";
|
||||
@ -247,15 +240,14 @@ SystemInfo::CPUTemperature(){
|
||||
result.push_back(temp / 1000);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::vector<unsigned long long>
|
||||
std::vector<uint64_t>
|
||||
SystemInfo::GPUMemoryUsed() {
|
||||
// get GPU memory used
|
||||
if(!initialized_) Init();
|
||||
if (!initialized_) Init();
|
||||
|
||||
std::vector<unsigned long long int> result;
|
||||
std::vector<uint64_t> result;
|
||||
nvmlMemory_t nvmlMemory;
|
||||
for (int i = 0; i < num_device_; ++i) {
|
||||
nvmlDevice_t device;
|
||||
@ -266,42 +258,41 @@ SystemInfo::GPUMemoryUsed() {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<unsigned long long , unsigned long long >
|
||||
SystemInfo::Octets(){
|
||||
std::pair<uint64_t, uint64_t>
|
||||
SystemInfo::Octets() {
|
||||
pid_t pid = getpid();
|
||||
// const std::string filename = "/proc/"+std::to_string(pid)+"/net/netstat";
|
||||
const std::string filename = "/proc/net/netstat";
|
||||
std::ifstream file(filename);
|
||||
std::string lastline = "";
|
||||
std::string line = "";
|
||||
while(file){
|
||||
while (file) {
|
||||
getline(file, line);
|
||||
if(file.fail()){
|
||||
if (file.fail()) {
|
||||
break;
|
||||
}
|
||||
lastline = line;
|
||||
}
|
||||
std::vector<size_t> space_position;
|
||||
size_t space_pos = lastline.find(" ");
|
||||
while(space_pos != std::string::npos){
|
||||
while (space_pos != std::string::npos) {
|
||||
space_position.push_back(space_pos);
|
||||
space_pos = lastline.find(" ",space_pos+1);
|
||||
space_pos = lastline.find(" ", space_pos + 1);
|
||||
}
|
||||
// InOctets is between 6th and 7th " " and OutOctets is between 7th and 8th " "
|
||||
size_t inoctets_begin = space_position[6]+1;
|
||||
size_t inoctets_length = space_position[7]-inoctets_begin;
|
||||
size_t outoctets_begin = space_position[7]+1;
|
||||
size_t outoctets_length = space_position[8]-outoctets_begin;
|
||||
std::string inoctets = lastline.substr(inoctets_begin,inoctets_length);
|
||||
std::string outoctets = lastline.substr(outoctets_begin,outoctets_length);
|
||||
size_t inoctets_begin = space_position[6] + 1;
|
||||
size_t inoctets_length = space_position[7] - inoctets_begin;
|
||||
size_t outoctets_begin = space_position[7] + 1;
|
||||
size_t outoctets_length = space_position[8] - outoctets_begin;
|
||||
std::string inoctets = lastline.substr(inoctets_begin, inoctets_length);
|
||||
std::string outoctets = lastline.substr(outoctets_begin, outoctets_length);
|
||||
|
||||
|
||||
unsigned long long inoctets_bytes = std::stoull(inoctets);
|
||||
unsigned long long outoctets_bytes = std::stoull(outoctets);
|
||||
std::pair<unsigned long long , unsigned long long > res(inoctets_bytes, outoctets_bytes);
|
||||
uint64_t inoctets_bytes = std::stoull(inoctets);
|
||||
uint64_t outoctets_bytes = std::stoull(outoctets);
|
||||
std::pair<uint64_t, uint64_t> res(inoctets_bytes, outoctets_bytes);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace server
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
||||
@ -18,19 +18,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "sys/types.h"
|
||||
#include "sys/sysinfo.h"
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
#include "string.h"
|
||||
#include "sys/times.h"
|
||||
#include "sys/vtimes.h"
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sys/times.h>
|
||||
#include <sys/vtimes.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <chrono>
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace zilliz {
|
||||
namespace milvus {
|
||||
@ -38,7 +36,7 @@ namespace server {
|
||||
|
||||
class SystemInfo {
|
||||
private:
|
||||
unsigned long total_ram_ = 0;
|
||||
uint64_t total_ram_ = 0;
|
||||
clock_t last_cpu_ = clock_t();
|
||||
clock_t last_sys_cpu_ = clock_t();
|
||||
clock_t last_user_cpu_ = clock_t();
|
||||
@ -46,44 +44,71 @@ class SystemInfo {
|
||||
int num_processors_ = 0;
|
||||
int num_physical_processors_ = 0;
|
||||
//number of GPU
|
||||
unsigned int num_device_ = 0;
|
||||
unsigned long long in_octets_ = 0;
|
||||
unsigned long long out_octets_ = 0;
|
||||
uint32_t num_device_ = 0;
|
||||
uint64_t in_octets_ = 0;
|
||||
uint64_t out_octets_ = 0;
|
||||
bool initialized_ = false;
|
||||
|
||||
public:
|
||||
static SystemInfo &
|
||||
GetInstance(){
|
||||
GetInstance() {
|
||||
static SystemInfo instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Init();
|
||||
int num_processor() const { return num_processors_;};
|
||||
int num_physical_processors() const { return num_physical_processors_; };
|
||||
int num_device() const {return num_device_;};
|
||||
unsigned long long get_inoctets() { return in_octets_;};
|
||||
unsigned long long get_octets() { return out_octets_;};
|
||||
std::chrono::system_clock::time_point get_nettime() { return net_time_;};
|
||||
void set_inoctets(unsigned long long value) { in_octets_ = value;};
|
||||
void set_outoctets(unsigned long long value) { out_octets_ = value;};
|
||||
void set_nettime() {net_time_ = std::chrono::system_clock::now();};
|
||||
long long ParseLine(char* line);
|
||||
unsigned long GetPhysicalMemory();
|
||||
unsigned long GetProcessUsedMemory();
|
||||
|
||||
int num_processor() const {
|
||||
return num_processors_;
|
||||
}
|
||||
|
||||
int num_physical_processors() const {
|
||||
return num_physical_processors_;
|
||||
}
|
||||
|
||||
uint32_t num_device() const {
|
||||
return num_device_;
|
||||
}
|
||||
|
||||
uint64_t get_inoctets() {
|
||||
return in_octets_;
|
||||
}
|
||||
|
||||
uint64_t get_octets() {
|
||||
return out_octets_;
|
||||
}
|
||||
|
||||
std::chrono::system_clock::time_point get_nettime() {
|
||||
return net_time_;
|
||||
}
|
||||
|
||||
void set_inoctets(uint64_t value) {
|
||||
in_octets_ = value;
|
||||
}
|
||||
|
||||
void set_outoctets(uint64_t value) {
|
||||
out_octets_ = value;
|
||||
}
|
||||
|
||||
void set_nettime() {
|
||||
net_time_ = std::chrono::system_clock::now();
|
||||
}
|
||||
|
||||
uint64_t ParseLine(char *line);
|
||||
uint64_t GetPhysicalMemory();
|
||||
uint64_t GetProcessUsedMemory();
|
||||
double MemoryPercent();
|
||||
double CPUPercent();
|
||||
std::pair<unsigned long long , unsigned long long > Octets();
|
||||
std::vector<unsigned long long> GPUMemoryTotal();
|
||||
std::vector<unsigned long long> GPUMemoryUsed();
|
||||
std::pair<uint64_t, uint64_t> Octets();
|
||||
std::vector<uint64_t> GPUMemoryTotal();
|
||||
std::vector<uint64_t> GPUMemoryUsed();
|
||||
|
||||
std::vector<double> CPUCorePercent();
|
||||
std::vector<unsigned long long> getTotalCpuTime(std::vector<unsigned long long> &workTime);
|
||||
std::vector<unsigned int> GPUTemperature();
|
||||
std::vector<uint64_t> getTotalCpuTime(std::vector<uint64_t> &workTime);
|
||||
std::vector<uint64_t> GPUTemperature();
|
||||
std::vector<float> CPUTemperature();
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace server
|
||||
} // namespace milvus
|
||||
} // namespace zilliz
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user