diff --git a/cpp/build-support/lint_exclusions.txt b/cpp/build-support/lint_exclusions.txt index f08664a1a9..27bd780f42 100644 --- a/cpp/build-support/lint_exclusions.txt +++ b/cpp/build-support/lint_exclusions.txt @@ -4,4 +4,5 @@ *src/thirdparty* *src/core/thirdparty* *src/grpc* -*easylogging++* \ No newline at end of file +*easylogging++* +*SqliteMetaImpl.cpp \ No newline at end of file diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index 8a3f11d7df..32a0e715e6 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -21,67 +21,149 @@ #include "utils/Error.h" #include "SystemInfo.h" +#include + namespace zilliz { namespace milvus { namespace server { -class MetricsBase{ +class MetricsBase { public: - static MetricsBase& - GetInstance(){ + static MetricsBase & + GetInstance() { static MetricsBase instance; return instance; } - virtual ErrorCode Init() {}; + virtual ErrorCode Init() { + } - virtual void AddVectorsSuccessTotalIncrement(double value = 1) {}; - virtual void AddVectorsFailTotalIncrement(double value = 1) {}; - virtual void AddVectorsDurationHistogramOberve(double value) {}; + virtual void AddVectorsSuccessTotalIncrement(double value = 1) { + } - virtual void RawFileSizeHistogramObserve(double value) {}; - virtual void IndexFileSizeHistogramObserve(double value) {}; - virtual void BuildIndexDurationSecondsHistogramObserve(double value) {}; + virtual void AddVectorsFailTotalIncrement(double value = 1) { + } - virtual void CpuCacheUsageGaugeSet(double value) {}; - virtual void GpuCacheUsageGaugeSet() {}; + virtual void AddVectorsDurationHistogramOberve(double value) { + } - virtual void MetaAccessTotalIncrement(double value = 1) {}; - virtual void MetaAccessDurationSecondsHistogramObserve(double value) {}; - virtual void FaissDiskLoadDurationSecondsHistogramObserve(double value) {}; - virtual void FaissDiskLoadSizeBytesHistogramObserve(double value) {}; - virtual void CacheAccessTotalIncrement(double value = 1) {}; - virtual void MemTableMergeDurationSecondsHistogramObserve(double value) {}; - virtual void SearchIndexDataDurationSecondsHistogramObserve(double value) {}; - virtual void SearchRawDataDurationSecondsHistogramObserve(double value) {}; - virtual void IndexFileSizeTotalIncrement(double value = 1) {}; - virtual void RawFileSizeTotalIncrement(double value = 1) {}; - virtual void IndexFileSizeGaugeSet(double value) {}; - virtual void RawFileSizeGaugeSet(double value) {}; - virtual void FaissDiskLoadIOSpeedGaugeSet(double value) {}; - virtual void QueryResponseSummaryObserve(double value) {}; - virtual void DiskStoreIOSpeedGaugeSet(double value) {}; - virtual void DataFileSizeGaugeSet(double value) {}; - virtual void AddVectorsSuccessGaugeSet(double value) {}; - virtual void AddVectorsFailGaugeSet(double value) {}; - virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) {}; - virtual void QueryVectorResponsePerSecondGaugeSet(double value) {}; - virtual void CPUUsagePercentSet() {}; - virtual void RAMUsagePercentSet() {}; - virtual void QueryResponsePerSecondGaugeSet(double value) {}; - virtual void GPUPercentGaugeSet() {}; - virtual void GPUMemoryUsageGaugeSet() {}; - virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {}; - virtual void QueryIndexTypePerSecondSet(std::string type, double value) {}; - virtual void ConnectionGaugeIncrement() {}; - virtual void ConnectionGaugeDecrement() {}; - virtual void KeepingAliveCounterIncrement(double value = 1) {}; - virtual void OctetsSet() {}; + virtual void RawFileSizeHistogramObserve(double value) { + } - virtual void CPUCoreUsagePercentSet() {}; - virtual void GPUTemperature() {}; - virtual void CPUTemperature() {}; + virtual void IndexFileSizeHistogramObserve(double value) { + } + + virtual void BuildIndexDurationSecondsHistogramObserve(double value) { + } + + virtual void CpuCacheUsageGaugeSet(double value) { + } + + virtual void GpuCacheUsageGaugeSet() { + } + + virtual void MetaAccessTotalIncrement(double value = 1) { + } + + virtual void MetaAccessDurationSecondsHistogramObserve(double value) { + } + + virtual void FaissDiskLoadDurationSecondsHistogramObserve(double value) { + } + + virtual void FaissDiskLoadSizeBytesHistogramObserve(double value) { + } + + virtual void CacheAccessTotalIncrement(double value = 1) { + } + + virtual void MemTableMergeDurationSecondsHistogramObserve(double value) { + } + + virtual void SearchIndexDataDurationSecondsHistogramObserve(double value) { + } + + virtual void SearchRawDataDurationSecondsHistogramObserve(double value) { + } + + virtual void IndexFileSizeTotalIncrement(double value = 1) { + } + + virtual void RawFileSizeTotalIncrement(double value = 1) { + } + + virtual void IndexFileSizeGaugeSet(double value) { + } + + virtual void RawFileSizeGaugeSet(double value) { + } + + virtual void FaissDiskLoadIOSpeedGaugeSet(double value) { + } + + virtual void QueryResponseSummaryObserve(double value) { + } + + virtual void DiskStoreIOSpeedGaugeSet(double value) { + } + + virtual void DataFileSizeGaugeSet(double value) { + } + + virtual void AddVectorsSuccessGaugeSet(double value) { + } + + virtual void AddVectorsFailGaugeSet(double value) { + } + + virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) { + } + + virtual void QueryVectorResponsePerSecondGaugeSet(double value) { + } + + virtual void CPUUsagePercentSet() { + } + + virtual void RAMUsagePercentSet() { + } + + virtual void QueryResponsePerSecondGaugeSet(double value) { + } + + virtual void GPUPercentGaugeSet() { + } + + virtual void GPUMemoryUsageGaugeSet() { + } + + virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) { + } + + virtual void QueryIndexTypePerSecondSet(std::string type, double value) { + } + + virtual void ConnectionGaugeIncrement() { + } + + virtual void ConnectionGaugeDecrement() { + } + + virtual void KeepingAliveCounterIncrement(double value = 1) { + } + + virtual void OctetsSet() { + } + + virtual void CPUCoreUsagePercentSet() { + } + + virtual void GPUTemperature() { + } + + virtual void CPUTemperature() { + } }; -} -} -} \ No newline at end of file +} // namespace server +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/metrics/Metrics.cpp b/cpp/src/metrics/Metrics.cpp index 612f5e7fce..0a1b333b42 100644 --- a/cpp/src/metrics/Metrics.cpp +++ b/cpp/src/metrics/Metrics.cpp @@ -15,10 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "Metrics.h" +#include "metrics/Metrics.h" #include "server/Config.h" #include "PrometheusMetrics.h" +#include namespace zilliz { namespace milvus { @@ -44,6 +45,6 @@ Metrics::CreateMetricsCollector() { } } -} -} -} +} // namespace server +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/metrics/Metrics.h b/cpp/src/metrics/Metrics.h index 30d6e68f52..66e06b8ba1 100644 --- a/cpp/src/metrics/Metrics.h +++ b/cpp/src/metrics/Metrics.h @@ -21,7 +21,6 @@ #include "MetricBase.h" #include "db/meta/MetaTypes.h" - namespace zilliz { namespace milvus { namespace server { @@ -44,7 +43,7 @@ class Metrics { }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectMetricsBase { -protected: + protected: CollectMetricsBase() { start_time_ = METRICS_NOW_TIME; } @@ -56,19 +55,19 @@ protected: return METRICS_MICROSECONDS(start_time_, end_time); } -protected: + protected: using TIME_POINT = std::chrono::system_clock::time_point; TIME_POINT start_time_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectInsertMetrics : CollectMetricsBase { -public: - CollectInsertMetrics(size_t n, Status& status) : n_(n), status_(status) { + public: + CollectInsertMetrics(size_t n, Status &status) : n_(n), status_(status) { } ~CollectInsertMetrics() { - if(n_ > 0) { + if (n_ > 0) { auto total_time = TimeFromBegine(); double avg_time = total_time / n_; for (int i = 0; i < n_; ++i) { @@ -86,19 +85,19 @@ public: } } -private: + private: size_t n_; - Status& status_; + Status &status_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectQueryMetrics : CollectMetricsBase { -public: - CollectQueryMetrics(size_t nq) : nq_(nq) { + public: + explicit CollectQueryMetrics(size_t nq) : nq_(nq) { } ~CollectQueryMetrics() { - if(nq_ > 0) { + if (nq_ > 0) { auto total_time = TimeFromBegine(); for (int i = 0; i < nq_; ++i) { server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time); @@ -109,13 +108,13 @@ public: } } -private: + private: size_t nq_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectMergeFilesMetrics : CollectMetricsBase { -public: + public: CollectMergeFilesMetrics() { } @@ -127,7 +126,7 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectBuildIndexMetrics : CollectMetricsBase { -public: + public: CollectBuildIndexMetrics() { } @@ -139,8 +138,8 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectExecutionEngineMetrics : CollectMetricsBase { -public: - CollectExecutionEngineMetrics(double physical_size) : physical_size_(physical_size) { + public: + explicit CollectExecutionEngineMetrics(double physical_size) : physical_size_(physical_size) { } ~CollectExecutionEngineMetrics() { @@ -151,27 +150,28 @@ public: server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size_ / double(total_time)); } -private: + private: double physical_size_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectSerializeMetrics : CollectMetricsBase { -public: - CollectSerializeMetrics(size_t size) : size_(size) { + public: + explicit CollectSerializeMetrics(size_t size) : size_(size) { } ~CollectSerializeMetrics() { auto total_time = TimeFromBegine(); server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size_ / total_time); } -private: + + private: size_t size_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectAddMetrics : CollectMetricsBase { -public: + public: CollectAddMetrics(size_t n, uint16_t dimension) : n_(n), dimension_(dimension) { } @@ -181,15 +181,16 @@ public: static_cast(dimension_), total_time); } -private: + + private: size_t n_; uint16_t dimension_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectDurationMetrics : CollectMetricsBase { -public: - CollectDurationMetrics(int index_type) : index_type_(index_type) { + public: + explicit CollectDurationMetrics(int index_type) : index_type_(index_type) { } ~CollectDurationMetrics() { @@ -209,19 +210,20 @@ public: } } } -private: + + private: int index_type_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CollectSearchTaskMetrics : CollectMetricsBase { -public: - CollectSearchTaskMetrics(int index_type) : index_type_(index_type) { + public: + explicit CollectSearchTaskMetrics(int index_type) : index_type_(index_type) { } ~CollectSearchTaskMetrics() { auto total_time = TimeFromBegine(); - switch(index_type_) { + switch (index_type_) { case engine::meta::TableFileSchema::RAW: { server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); break; @@ -237,13 +239,13 @@ public: } } -private: + private: int index_type_; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class MetricCollector : CollectMetricsBase { -public: + public: MetricCollector() { server::Metrics::GetInstance().MetaAccessTotalIncrement(); } @@ -254,11 +256,6 @@ public: } }; - - -} -} -} - - - +} // namespace server +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 6f3513072a..8dcc97aa8b 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -16,12 +16,14 @@ // under the License. +#include "metrics/PrometheusMetrics.h" #include "cache/GpuCacheMgr.h" -#include "PrometheusMetrics.h" #include "server/Config.h" #include "utils/Log.h" #include "SystemInfo.h" +#include +#include namespace zilliz { namespace milvus { @@ -47,93 +49,96 @@ PrometheusMetrics::Init() { // Exposer Registry exposer_ptr_->RegisterCollectable(registry_); - } catch (std::exception& ex) { + } catch (std::exception &ex) { SERVER_LOG_ERROR << "Failed to connect prometheus server: " << std::string(ex.what()); return SERVER_UNEXPECTED_ERROR; } return SERVER_SUCCESS; - } - void -PrometheusMetrics::CPUUsagePercentSet() { - if(!startup_) return ; +PrometheusMetrics::CPUUsagePercentSet() { + if (!startup_) return; double usage_percent = server::SystemInfo::GetInstance().CPUPercent(); CPU_usage_percent_.Set(usage_percent); } void PrometheusMetrics::RAMUsagePercentSet() { - if(!startup_) return ; + if (!startup_) return; double usage_percent = server::SystemInfo::GetInstance().MemoryPercent(); RAM_usage_percent_.Set(usage_percent); } void PrometheusMetrics::GPUPercentGaugeSet() { - if(!startup_) return; + if (!startup_) return; int numDevice = server::SystemInfo::GetInstance().num_device(); - std::vector used_total = server::SystemInfo::GetInstance().GPUMemoryTotal(); - std::vector used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed(); + std::vector used_total = server::SystemInfo::GetInstance().GPUMemoryTotal(); + std::vector used_memory = server::SystemInfo::GetInstance().GPUMemoryUsed(); for (int i = 0; i < numDevice; ++i) { prometheus::Gauge &GPU_percent = GPU_percent_.Add({{"DeviceNum", std::to_string(i)}}); - double percent = (double)used_memory[i] / (double)used_total[i]; + double percent = (double) used_memory[i] / (double) used_total[i]; GPU_percent.Set(percent * 100); } } -void PrometheusMetrics::GPUMemoryUsageGaugeSet() { - if(!startup_) return; - std::vector values = server::SystemInfo::GetInstance().GPUMemoryUsed(); - constexpr unsigned long long MtoB = 1024*1024; +void +PrometheusMetrics::GPUMemoryUsageGaugeSet() { + if (!startup_) return; + std::vector values = server::SystemInfo::GetInstance().GPUMemoryUsed(); + constexpr uint64_t MtoB = 1024 * 1024; int numDevice = server::SystemInfo::GetInstance().num_device(); for (int i = 0; i < numDevice; ++i) { prometheus::Gauge &GPU_memory = GPU_memory_usage_.Add({{"DeviceNum", std::to_string(i)}}); GPU_memory.Set(values[i] / MtoB); } - } -void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) { + +void +PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) { // MB/s - if(!startup_) return; - - long long MtoB = 1024*1024; - long long size = num_vector * dim * 4; - add_vectors_per_second_gauge_.Set(size/time/MtoB); + if (!startup_) return; + int64_t MtoB = 1024 * 1024; + int64_t size = num_vector * dim * 4; + add_vectors_per_second_gauge_.Set(size / time / MtoB); } -void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) { - if(!startup_) return; - if(type == "IVF"){ + +void +PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) { + if (!startup_) return; + if (type == "IVF") { query_index_IVF_type_per_second_gauge_.Set(value); - } else if(type == "IDMap"){ + } else if (type == "IDMap") { query_index_IDMAP_type_per_second_gauge_.Set(value); } - } -void PrometheusMetrics::ConnectionGaugeIncrement() { - if(!startup_) return; +void +PrometheusMetrics::ConnectionGaugeIncrement() { + if (!startup_) return; connection_gauge_.Increment(); } -void PrometheusMetrics::ConnectionGaugeDecrement() { - if(!startup_) return; +void +PrometheusMetrics::ConnectionGaugeDecrement() { + if (!startup_) return; connection_gauge_.Decrement(); } -void PrometheusMetrics::OctetsSet() { - if(!startup_) return; +void +PrometheusMetrics::OctetsSet() { + if (!startup_) return; // get old stats and reset them - unsigned long long old_inoctets = SystemInfo::GetInstance().get_inoctets(); - unsigned long long old_outoctets = SystemInfo::GetInstance().get_octets(); + uint64_t old_inoctets = SystemInfo::GetInstance().get_inoctets(); + uint64_t old_outoctets = SystemInfo::GetInstance().get_octets(); auto old_time = SystemInfo::GetInstance().get_nettime(); - std::pair in_and_out_octets = SystemInfo::GetInstance().Octets(); + std::pair in_and_out_octets = SystemInfo::GetInstance().Octets(); SystemInfo::GetInstance().set_inoctets(in_and_out_octets.first); SystemInfo::GetInstance().set_outoctets(in_and_out_octets.second); SystemInfo::GetInstance().set_nettime(); @@ -142,13 +147,14 @@ void PrometheusMetrics::OctetsSet() { constexpr double micro_to_second = 1e-6; auto now_time = std::chrono::system_clock::now(); auto total_microsecond = METRICS_MICROSECONDS(old_time, now_time); - auto total_second = total_microsecond*micro_to_second; - if(total_second == 0) return; - inoctets_gauge_.Set((in_and_out_octets.first-old_inoctets)/total_second); - outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second); + auto total_second = total_microsecond * micro_to_second; + if (total_second == 0) return; + inoctets_gauge_.Set((in_and_out_octets.first - old_inoctets) / total_second); + outoctets_gauge_.Set((in_and_out_octets.second - old_outoctets) / total_second); } -void PrometheusMetrics::CPUCoreUsagePercentSet() { +void +PrometheusMetrics::CPUCoreUsagePercentSet() { if (!startup_) return; @@ -160,11 +166,12 @@ void PrometheusMetrics::CPUCoreUsagePercentSet() { } } -void PrometheusMetrics::GPUTemperature() { +void +PrometheusMetrics::GPUTemperature() { if (!startup_) return; - std::vector GPU_temperatures = server::SystemInfo::GetInstance().GPUTemperature(); + std::vector GPU_temperatures = server::SystemInfo::GetInstance().GPUTemperature(); for (int i = 0; i < GPU_temperatures.size(); ++i) { prometheus::Gauge &gpu_temp = GPU_temperature_.Add({{"GPU", std::to_string(i)}}); @@ -172,7 +179,8 @@ void PrometheusMetrics::GPUTemperature() { } } -void PrometheusMetrics::CPUTemperature() { +void +PrometheusMetrics::CPUTemperature() { if (!startup_) return; @@ -184,7 +192,8 @@ void PrometheusMetrics::CPUTemperature() { } } -void PrometheusMetrics::GpuCacheUsageGaugeSet() { +void +PrometheusMetrics::GpuCacheUsageGaugeSet() { // std::vector gpu_ids = {0}; // for(auto i = 0; i < gpu_ids.size(); ++i) { // uint64_t cache_usage = cache::GpuCacheMgr::GetInstance(gpu_ids[i])->CacheUsage(); @@ -194,6 +203,6 @@ void PrometheusMetrics::GpuCacheUsageGaugeSet() { // } } -} -} -} +} // namespace server +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index 4ee6699b77..ce45c5e711 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -22,22 +22,20 @@ #include #include #include +#include #include "utils/Error.h" #include "MetricBase.h" - #define METRICS_NOW_TIME std::chrono::system_clock::now() //#define server::Metrics::GetInstance() server::GetInstance() -#define METRICS_MICROSECONDS(a,b) (std::chrono::duration_cast (b-a)).count(); - +#define METRICS_MICROSECONDS(a, b) (std::chrono::duration_cast (b-a)).count(); namespace zilliz { namespace milvus { namespace server { -class PrometheusMetrics: public MetricsBase { - +class PrometheusMetrics : public MetricsBase { public: static PrometheusMetrics & GetInstance() { @@ -51,59 +49,215 @@ class PrometheusMetrics: public MetricsBase { std::shared_ptr exposer_ptr_; std::shared_ptr registry_ = std::make_shared(); bool startup_ = false; + public: - void SetStartup(bool startup) {startup_ = startup;}; - void AddVectorsSuccessTotalIncrement(double value = 1.0) override { if(startup_) add_vectors_success_total_.Increment(value);}; - void AddVectorsFailTotalIncrement(double value = 1.0) override { if(startup_) add_vectors_fail_total_.Increment(value);}; - void AddVectorsDurationHistogramOberve(double value) override { if(startup_) add_vectors_duration_histogram_.Observe(value);}; - void RawFileSizeHistogramObserve(double value) override { if(startup_) raw_files_size_histogram_.Observe(value);}; - void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);}; - void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);}; - void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);}; + void SetStartup(bool startup) { + startup_ = startup; + } + + void AddVectorsSuccessTotalIncrement(double value = 1.0) override { + if (startup_) { + add_vectors_success_total_.Increment(value); + } + } + + void AddVectorsFailTotalIncrement(double value = 1.0) override { + if (startup_) { + add_vectors_fail_total_.Increment(value); + } + } + + void AddVectorsDurationHistogramOberve(double value) override { + if (startup_) { + add_vectors_duration_histogram_.Observe(value); + } + } + + void RawFileSizeHistogramObserve(double value) override { + if (startup_) { + raw_files_size_histogram_.Observe(value); + } + } + + void IndexFileSizeHistogramObserve(double value) override { + if (startup_) { + index_files_size_histogram_.Observe(value); + } + } + + void BuildIndexDurationSecondsHistogramObserve(double value) override { + if (startup_) { + build_index_duration_seconds_histogram_.Observe(value); + } + } + + void CpuCacheUsageGaugeSet(double value) override { + if (startup_) { + cpu_cache_usage_gauge_.Set(value); + } + } + void GpuCacheUsageGaugeSet() override; - void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);}; - void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);}; + void MetaAccessTotalIncrement(double value = 1) override { + if (startup_) { + meta_access_total_.Increment(value); + } + } - void FaissDiskLoadDurationSecondsHistogramObserve(double value) override { if(startup_) faiss_disk_load_duration_seconds_histogram_.Observe(value);}; - void FaissDiskLoadSizeBytesHistogramObserve(double value) override { if(startup_) faiss_disk_load_size_bytes_histogram_.Observe(value);}; - void FaissDiskLoadIOSpeedGaugeSet(double value) override { if(startup_) faiss_disk_load_IO_speed_gauge_.Set(value);}; + void MetaAccessDurationSecondsHistogramObserve(double value) override { + if (startup_) { + meta_access_duration_seconds_histogram_.Observe(value); + } + } - void CacheAccessTotalIncrement(double value = 1) override { if(startup_) cache_access_total_.Increment(value);}; - void MemTableMergeDurationSecondsHistogramObserve(double value) override { if(startup_) mem_table_merge_duration_seconds_histogram_.Observe(value);}; - void SearchIndexDataDurationSecondsHistogramObserve(double value) override { if(startup_) search_index_data_duration_seconds_histogram_.Observe(value);}; - void SearchRawDataDurationSecondsHistogramObserve(double value) override { if(startup_) search_raw_data_duration_seconds_histogram_.Observe(value);}; - void IndexFileSizeTotalIncrement(double value = 1) override { if(startup_) index_file_size_total_.Increment(value);}; - void RawFileSizeTotalIncrement(double value = 1) override { if(startup_) raw_file_size_total_.Increment(value);}; - void IndexFileSizeGaugeSet(double value) override { if(startup_) index_file_size_gauge_.Set(value);}; - void RawFileSizeGaugeSet(double value) override { if(startup_) raw_file_size_gauge_.Set(value);}; - void QueryResponseSummaryObserve(double value) override {if(startup_) query_response_summary_.Observe(value);}; - void DiskStoreIOSpeedGaugeSet(double value) override { if(startup_) disk_store_IO_speed_gauge_.Set(value);}; - void DataFileSizeGaugeSet(double value) override { if(startup_) data_file_size_gauge_.Set(value);}; - void AddVectorsSuccessGaugeSet(double value) override { if(startup_) add_vectors_success_gauge_.Set(value);}; - void AddVectorsFailGaugeSet(double value) override { if(startup_) add_vectors_fail_gauge_.Set(value);}; - void QueryVectorResponseSummaryObserve(double value, int count = 1) override { if (startup_) for(int i = 0 ; i < count ; ++i) query_vector_response_summary_.Observe(value);}; - void QueryVectorResponsePerSecondGaugeSet(double value) override {if (startup_) query_vector_response_per_second_gauge_.Set(value);}; - void CPUUsagePercentSet() override ; + void FaissDiskLoadDurationSecondsHistogramObserve(double value) override { + if (startup_) { + faiss_disk_load_duration_seconds_histogram_.Observe(value); + } + } + + void FaissDiskLoadSizeBytesHistogramObserve(double value) override { + if (startup_) { + faiss_disk_load_size_bytes_histogram_.Observe(value); + } + } + + void FaissDiskLoadIOSpeedGaugeSet(double value) override { + if (startup_) { + faiss_disk_load_IO_speed_gauge_.Set(value); + } + } + + void CacheAccessTotalIncrement(double value = 1) override { + if (startup_) { + cache_access_total_.Increment(value); + } + } + + void MemTableMergeDurationSecondsHistogramObserve(double value) override { + if (startup_) { + mem_table_merge_duration_seconds_histogram_.Observe(value); + } + } + + void SearchIndexDataDurationSecondsHistogramObserve(double value) override { + if (startup_) { + search_index_data_duration_seconds_histogram_.Observe(value); + } + } + + void SearchRawDataDurationSecondsHistogramObserve(double value) override { + if (startup_) { + search_raw_data_duration_seconds_histogram_.Observe(value); + } + } + + void IndexFileSizeTotalIncrement(double value = 1) override { + if (startup_) { + index_file_size_total_.Increment(value); + } + } + + void RawFileSizeTotalIncrement(double value = 1) override { + if (startup_) { + raw_file_size_total_.Increment(value); + } + } + + void IndexFileSizeGaugeSet(double value) override { + if (startup_) { + index_file_size_gauge_.Set(value); + } + } + + void RawFileSizeGaugeSet(double value) override { + if (startup_) { + raw_file_size_gauge_.Set(value); + } + } + + void QueryResponseSummaryObserve(double value) override { + if (startup_) { + query_response_summary_.Observe(value); + } + } + + void DiskStoreIOSpeedGaugeSet(double value) override { + if (startup_) { + disk_store_IO_speed_gauge_.Set(value); + } + } + + void DataFileSizeGaugeSet(double value) override { + if (startup_) { + data_file_size_gauge_.Set(value); + } + } + + void AddVectorsSuccessGaugeSet(double value) override { + if (startup_) { + add_vectors_success_gauge_.Set(value); + } + } + + void AddVectorsFailGaugeSet(double value) override { + if (startup_) { + add_vectors_fail_gauge_.Set(value); + } + } + + void QueryVectorResponseSummaryObserve(double value, int count = 1) override { + if (startup_) { + for (int i = 0; i < count; ++i) { + query_vector_response_summary_.Observe(value); + } + } + } + + void QueryVectorResponsePerSecondGaugeSet(double value) override { + if (startup_) { + query_vector_response_per_second_gauge_.Set(value); + } + } + + void CPUUsagePercentSet() override; void CPUCoreUsagePercentSet() override; - void RAMUsagePercentSet() override ; - void QueryResponsePerSecondGaugeSet(double value) override {if(startup_) query_response_per_second_gauge.Set(value);}; - void GPUPercentGaugeSet() override ; - void GPUMemoryUsageGaugeSet() override ; - void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) override ; - void QueryIndexTypePerSecondSet(std::string type, double value) override ; - void ConnectionGaugeIncrement() override ; - void ConnectionGaugeDecrement() override ; - void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);}; - void OctetsSet() override ; + void RAMUsagePercentSet() override; + + void QueryResponsePerSecondGaugeSet(double value) override { + if (startup_) { + query_response_per_second_gauge.Set(value); + } + } + + void GPUPercentGaugeSet() override; + void GPUMemoryUsageGaugeSet() override; + void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) override; + void QueryIndexTypePerSecondSet(std::string type, double value) override; + void ConnectionGaugeIncrement() override; + void ConnectionGaugeDecrement() override; + + void KeepingAliveCounterIncrement(double value = 1) override { + if (startup_) { + keeping_alive_counter_.Increment(value); + } + } + + void OctetsSet() override; void GPUTemperature() override; void CPUTemperature() override; - std::shared_ptr &exposer_ptr() {return exposer_ptr_; } + std::shared_ptr &exposer_ptr() { + return exposer_ptr_; + } + // prometheus::Exposer& exposer() { return exposer_;} - std::shared_ptr ®istry_ptr() {return registry_; } + std::shared_ptr ®istry_ptr() { + return registry_; + } // ..... private: @@ -125,7 +279,6 @@ class PrometheusMetrics: public MetricsBase { prometheus::Counter &add_group_success_total_ = add_group_request_.Add({{"outcome", "success"}}); prometheus::Counter &add_group_fail_total_ = add_group_request_.Add({{"outcome", "fail"}}); - //record get_group request prometheus::Family &get_group_request_ = prometheus::BuildCounter() .Name("get_group_request_total") @@ -135,7 +288,6 @@ class PrometheusMetrics: public MetricsBase { prometheus::Counter &get_group_success_total_ = get_group_request_.Add({{"outcome", "success"}}); prometheus::Counter &get_group_fail_total_ = get_group_request_.Add({{"outcome", "fail"}}); - //record has_group request prometheus::Family &has_group_request_ = prometheus::BuildCounter() .Name("has_group_request_total") @@ -145,7 +297,6 @@ class PrometheusMetrics: public MetricsBase { prometheus::Counter &has_group_success_total_ = has_group_request_.Add({{"outcome", "success"}}); prometheus::Counter &has_group_fail_total_ = has_group_request_.Add({{"outcome", "fail"}}); - //record get_group_files prometheus::Family &get_group_files_request_ = prometheus::BuildCounter() .Name("get_group_files_request_total") @@ -155,7 +306,6 @@ class PrometheusMetrics: public MetricsBase { prometheus::Counter &get_group_files_success_total_ = get_group_files_request_.Add({{"outcome", "success"}}); prometheus::Counter &get_group_files_fail_total_ = get_group_files_request_.Add({{"outcome", "fail"}}); - //record add_vectors count and average time //need to be considered prometheus::Family &add_vectors_request_ = prometheus::BuildCounter() @@ -169,36 +319,39 @@ class PrometheusMetrics: public MetricsBase { .Name("add_vector_duration_microseconds") .Help("average time of adding every vector") .Register(*registry_); - prometheus::Histogram &add_vectors_duration_histogram_ = add_vectors_duration_seconds_.Add({}, BucketBoundaries{0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.08, 0.1, 0.5, 1}); - + prometheus::Histogram &add_vectors_duration_histogram_ = + add_vectors_duration_seconds_.Add({}, BucketBoundaries{0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.08, 0.1, 0.5, 1}); //record search count and average time prometheus::Family &search_request_ = prometheus::BuildCounter() .Name("search_request_total") .Help("the number of search request") .Register(*registry_); - prometheus::Counter &search_success_total_ = search_request_.Add({{"outcome","success"}}); - prometheus::Counter &search_fail_total_ = search_request_.Add({{"outcome","fail"}}); + prometheus::Counter &search_success_total_ = search_request_.Add({{"outcome", "success"}}); + prometheus::Counter &search_fail_total_ = search_request_.Add({{"outcome", "fail"}}); prometheus::Family &search_request_duration_seconds_ = prometheus::BuildHistogram() .Name("search_request_duration_microsecond") .Help("histogram of processing time for each search") .Register(*registry_); - prometheus::Histogram &search_duration_histogram_ = search_request_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram + &search_duration_histogram_ = search_request_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); //record raw_files size histogram prometheus::Family &raw_files_size_ = prometheus::BuildHistogram() .Name("search_raw_files_bytes") .Help("histogram of raw files size by bytes") .Register(*registry_); - prometheus::Histogram &raw_files_size_histogram_ = raw_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10}); + prometheus::Histogram + &raw_files_size_histogram_ = raw_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10}); //record index_files size histogram prometheus::Family &index_files_size_ = prometheus::BuildHistogram() .Name("search_index_files_bytes") .Help("histogram of index files size by bytes") .Register(*registry_); - prometheus::Histogram &index_files_size_histogram_ = index_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10}); + prometheus::Histogram + &index_files_size_histogram_ = index_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10}); //record index and raw files size counter prometheus::Family &file_size_total_ = prometheus::BuildCounter() @@ -221,30 +374,34 @@ class PrometheusMetrics: public MetricsBase { .Name("build_index_duration_microseconds") .Help("histogram of processing time for building index") .Register(*registry_); - prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{5e5, 2e6, 4e6, 6e6, 8e6, 1e7}); - + prometheus::Histogram &build_index_duration_seconds_histogram_ = + build_index_duration_seconds_.Add({}, BucketBoundaries{5e5, 2e6, 4e6, 6e6, 8e6, 1e7}); //record processing time for all building index prometheus::Family &all_build_index_duration_seconds_ = prometheus::BuildHistogram() .Name("all_build_index_duration_microseconds") .Help("histogram of processing time for building index") .Register(*registry_); - prometheus::Histogram &all_build_index_duration_seconds_histogram_ = all_build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7}); + prometheus::Histogram &all_build_index_duration_seconds_histogram_ = + all_build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7}); //record duration of merging mem table prometheus::Family &mem_table_merge_duration_seconds_ = prometheus::BuildHistogram() .Name("mem_table_merge_duration_microseconds") .Help("histogram of processing time for merging mem tables") .Register(*registry_); - prometheus::Histogram &mem_table_merge_duration_seconds_histogram_ = mem_table_merge_duration_seconds_.Add({}, BucketBoundaries{5e4, 1e5, 2e5, 4e5, 6e5, 8e5, 1e6}); + prometheus::Histogram &mem_table_merge_duration_seconds_histogram_ = + mem_table_merge_duration_seconds_.Add({}, BucketBoundaries{5e4, 1e5, 2e5, 4e5, 6e5, 8e5, 1e6}); //record search index and raw data duration prometheus::Family &search_data_duration_seconds_ = prometheus::BuildHistogram() .Name("search_data_duration_microseconds") .Help("histograms of processing time for search index and raw data") .Register(*registry_); - prometheus::Histogram &search_index_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "index"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5}); - prometheus::Histogram &search_raw_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "raw"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5}); + prometheus::Histogram &search_index_data_duration_seconds_histogram_ = + search_data_duration_seconds_.Add({{"type", "index"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5}); + prometheus::Histogram &search_raw_data_duration_seconds_histogram_ = + search_data_duration_seconds_.Add({{"type", "raw"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5}); ////all form Cache.cpp @@ -263,7 +420,8 @@ class PrometheusMetrics: public MetricsBase { // .Name("meta_visit_duration_seconds") // .Help("histogram of processing time to get data from mata") // .Register(*registry_); -// prometheus::Histogram &meta_visit_duration_seconds_histogram_ = meta_visit_duration_seconds_.Add({{}}, BucketBoundaries{0.1, 1.0, 10.0}); +// prometheus::Histogram &meta_visit_duration_seconds_histogram_ = +// meta_visit_duration_seconds_.Add({{}}, BucketBoundaries{0.1, 1.0, 10.0}); ////all from MemManager.cpp @@ -281,8 +439,6 @@ class PrometheusMetrics: public MetricsBase { .Register(*registry_); prometheus::Gauge &mem_usage_total_gauge_ = mem_usage_total_.Add({}); - - ////all from DBMetaImpl.cpp //record meta access count prometheus::Family &meta_access_ = prometheus::BuildCounter() @@ -296,9 +452,8 @@ class PrometheusMetrics: public MetricsBase { .Name("meta_access_duration_microseconds") .Help("histogram of processing time for accessing mata") .Register(*registry_); - prometheus::Histogram &meta_access_duration_seconds_histogram_ = meta_access_duration_seconds_.Add({}, BucketBoundaries{100, 300, 500, 700, 900, 2000, 4000, 6000, 8000, 20000}); - - + prometheus::Histogram &meta_access_duration_seconds_histogram_ = + meta_access_duration_seconds_.Add({}, BucketBoundaries{100, 300, 500, 700, 900, 2000, 4000, 6000, 8000, 20000}); ////all from FaissExecutionEngine.cpp //record data loading from disk count, size, duration, IO speed @@ -306,26 +461,28 @@ class PrometheusMetrics: public MetricsBase { .Name("disk_load_duration_microseconds") .Help("Histogram of processing time for loading data from disk") .Register(*registry_); - prometheus::Histogram &faiss_disk_load_duration_seconds_histogram_ = disk_load_duration_second_.Add({{"DB","Faiss"}},BucketBoundaries{2e5, 4e5, 6e5 , 8e5}); + prometheus::Histogram &faiss_disk_load_duration_seconds_histogram_ = + disk_load_duration_second_.Add({{"DB", "Faiss"}}, BucketBoundaries{2e5, 4e5, 6e5, 8e5}); prometheus::Family &disk_load_size_bytes_ = prometheus::BuildHistogram() .Name("disk_load_size_bytes") .Help("Histogram of data size by bytes for loading data from disk") .Register(*registry_); - prometheus::Histogram &faiss_disk_load_size_bytes_histogram_ = disk_load_size_bytes_.Add({{"DB","Faiss"}},BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9}); + prometheus::Histogram &faiss_disk_load_size_bytes_histogram_ = + disk_load_size_bytes_.Add({{"DB", "Faiss"}}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9}); // prometheus::Family &disk_load_IO_speed_ = prometheus::BuildHistogram() // .Name("disk_load_IO_speed_byte_per_sec") // .Help("Histogram of IO speed for loading data from disk") // .Register(*registry_); -// prometheus::Histogram &faiss_disk_load_IO_speed_histogram_ = disk_load_IO_speed_.Add({{"DB","Faiss"}},BucketBoundaries{1000, 2000, 3000, 4000, 6000, 8000}); +// prometheus::Histogram &faiss_disk_load_IO_speed_histogram_ = +// disk_load_IO_speed_.Add({{"DB","Faiss"}},BucketBoundaries{1000, 2000, 3000, 4000, 6000, 8000}); prometheus::Family &faiss_disk_load_IO_speed_ = prometheus::BuildGauge() .Name("disk_load_IO_speed_byte_per_microsec") .Help("disk IO speed ") .Register(*registry_); - prometheus::Gauge &faiss_disk_load_IO_speed_gauge_ = faiss_disk_load_IO_speed_.Add({{"DB","Faiss"}}); - + prometheus::Gauge &faiss_disk_load_IO_speed_gauge_ = faiss_disk_load_IO_speed_.Add({{"DB", "Faiss"}}); ////all from CacheMgr.cpp //record cache access count @@ -344,9 +501,9 @@ class PrometheusMetrics: public MetricsBase { //record GPU cache usage and % prometheus::Family &gpu_cache_usage_ = prometheus::BuildGauge() - .Name("gpu_cache_usage_bytes") - .Help("current gpu cache usage by bytes") - .Register(*registry_); + .Name("gpu_cache_usage_bytes") + .Help("current gpu cache usage by bytes") + .Register(*registry_); // record query response using Quantiles = std::vector; @@ -354,18 +511,21 @@ class PrometheusMetrics: public MetricsBase { .Name("query_response_summary") .Help("query response summary") .Register(*registry_); - prometheus::Summary &query_response_summary_ = query_response_.Add({}, Quantiles{{0.95,0.00},{0.9,0.05},{0.8,0.1}}); + prometheus::Summary + &query_response_summary_ = query_response_.Add({}, Quantiles{{0.95, 0.00}, {0.9, 0.05}, {0.8, 0.1}}); prometheus::Family &query_vector_response_ = prometheus::BuildSummary() .Name("query_vector_response_summary") .Help("query each vector response summary") .Register(*registry_); - prometheus::Summary &query_vector_response_summary_ = query_vector_response_.Add({}, Quantiles{{0.95,0.00},{0.9,0.05},{0.8,0.1}}); + prometheus::Summary &query_vector_response_summary_ = + query_vector_response_.Add({}, Quantiles{{0.95, 0.00}, {0.9, 0.05}, {0.8, 0.1}}); prometheus::Family &query_vector_response_per_second_ = prometheus::BuildGauge() .Name("query_vector_response_per_microsecond") .Help("the number of vectors can be queried every second ") - .Register(*registry_); prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({}); + .Register(*registry_); + prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({}); prometheus::Family &query_response_per_second_ = prometheus::BuildGauge() .Name("query_response_per_microsecond") @@ -404,7 +564,6 @@ class PrometheusMetrics: public MetricsBase { .Register(*registry_); prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({{"CPU", "avg"}}); - prometheus::Family &RAM_ = prometheus::BuildGauge() .Name("RAM_usage_percent") .Help("RAM usage percent by this process") @@ -427,8 +586,10 @@ class PrometheusMetrics: public MetricsBase { .Name("query_index_throughtout_per_microsecond") .Help("query index throughtout per microsecond") .Register(*registry_); - prometheus::Gauge &query_index_IVF_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IVF"}}); - prometheus::Gauge &query_index_IDMAP_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IDMAP"}}); + prometheus::Gauge + &query_index_IVF_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType", "IVF"}}); + prometheus::Gauge + &query_index_IDMAP_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType", "IDMAP"}}); prometheus::Family &connection_ = prometheus::BuildGauge() .Name("connection_number") @@ -449,7 +610,6 @@ class PrometheusMetrics: public MetricsBase { prometheus::Gauge &inoctets_gauge_ = octets_.Add({{"type", "inoctets"}}); prometheus::Gauge &outoctets_gauge_ = octets_.Add({{"type", "outoctets"}}); - prometheus::Family &GPU_temperature_ = prometheus::BuildGauge() .Name("GPU_temperature") .Help("GPU temperature") @@ -461,9 +621,6 @@ class PrometheusMetrics: public MetricsBase { .Register(*registry_); }; -} -} -} - - - +} // namespace server +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index 5db2448ee9..54d0c51943 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -16,29 +16,28 @@ // under the License. -#include "SystemInfo.h" +#include "metrics/SystemInfo.h" #include #include #include #include -#include "nvml.h" -//#include -// -//std::mutex mutex; - +#include +#include +#include namespace zilliz { namespace milvus { namespace server { -void SystemInfo::Init() { - if(initialized_) return; +void +SystemInfo::Init() { + if (initialized_) return; initialized_ = true; // initialize CPU information - FILE* file; + FILE *file; struct tms time_sample; char line[128]; last_cpu_ = times(&time_sample); @@ -46,7 +45,7 @@ void SystemInfo::Init() { last_user_cpu_ = time_sample.tms_utime; file = fopen("/proc/cpuinfo", "r"); num_processors_ = 0; - while(fgets(line, 128, file) != NULL){ + while (fgets(line, 128, file) != NULL) { if (strncmp(line, "processor", 9) == 0) num_processors_++; if (strncmp(line, "physical", 8) == 0) { num_physical_processors_ = ParseLine(line); @@ -58,24 +57,24 @@ void SystemInfo::Init() { //initialize GPU information nvmlReturn_t nvmlresult; nvmlresult = nvmlInit(); - if(NVML_SUCCESS != nvmlresult) { + if (NVML_SUCCESS != nvmlresult) { printf("System information initilization failed"); - return ; + return; } nvmlresult = nvmlDeviceGetCount(&num_device_); - if(NVML_SUCCESS != nvmlresult) { + if (NVML_SUCCESS != nvmlresult) { printf("Unable to get devidce number"); - return ; + return; } //initialize network traffic information - std::pair in_and_out_octets = Octets(); + std::pair in_and_out_octets = Octets(); in_octets_ = in_and_out_octets.first; out_octets_ = in_and_out_octets.second; net_time_ = std::chrono::system_clock::now(); } -long long +uint64_t SystemInfo::ParseLine(char *line) { // This assumes that a digit will be found and the line ends in " Kb". int i = strlen(line); @@ -83,53 +82,52 @@ SystemInfo::ParseLine(char *line) { while (*p < '0' || *p > '9') p++; line[i - 3] = '\0'; i = atoi(p); - return static_cast(i); + return static_cast(i); } -unsigned long +uint64_t SystemInfo::GetPhysicalMemory() { struct sysinfo memInfo; - sysinfo (&memInfo); - unsigned long totalPhysMem = memInfo.totalram; + sysinfo(&memInfo); + uint64_t totalPhysMem = memInfo.totalram; //Multiply in next statement to avoid int overflow on right hand side... totalPhysMem *= memInfo.mem_unit; return totalPhysMem; } -unsigned long +uint64_t SystemInfo::GetProcessUsedMemory() { //Note: this value is in KB! - FILE* file = fopen("/proc/self/status", "r"); - constexpr int64_t line_length = 128; - long long result = -1; - constexpr int64_t KB_SIZE = 1024; + FILE *file = fopen("/proc/self/status", "r"); + constexpr uint64_t line_length = 128; + uint64_t result = -1; + constexpr uint64_t KB_SIZE = 1024; char line[line_length]; - while (fgets(line, line_length, file) != NULL){ - if (strncmp(line, "VmRSS:", 6) == 0){ + while (fgets(line, line_length, file) != NULL) { + if (strncmp(line, "VmRSS:", 6) == 0) { result = ParseLine(line); break; } } fclose(file); // return value in Byte - return (result*KB_SIZE); - + return (result * KB_SIZE); } double SystemInfo::MemoryPercent() { if (!initialized_) Init(); - return (double)(GetProcessUsedMemory()*100)/(double)total_ram_; + return (double) (GetProcessUsedMemory() * 100) / (double) total_ram_; } std::vector SystemInfo::CPUCorePercent() { - std::vector prev_work_time_array; - std::vector prev_total_time_array = getTotalCpuTime(prev_work_time_array); + std::vector prev_work_time_array; + std::vector prev_total_time_array = getTotalCpuTime(prev_work_time_array); usleep(100000); - std::vector cur_work_time_array; - std::vector cur_total_time_array = getTotalCpuTime(cur_work_time_array); + std::vector cur_work_time_array; + std::vector cur_total_time_array = getTotalCpuTime(cur_work_time_array); std::vector cpu_core_percent; for (int i = 1; i < num_processors_; i++) { @@ -140,22 +138,21 @@ SystemInfo::CPUCorePercent() { return cpu_core_percent; } -std::vector -SystemInfo::getTotalCpuTime(std::vector &work_time_array) -{ - std::vector total_time_array; - FILE* file = fopen("/proc/stat", "r"); +std::vector +SystemInfo::getTotalCpuTime(std::vector &work_time_array) { + std::vector total_time_array; + FILE *file = fopen("/proc/stat", "r"); if (file == NULL) { perror("Could not open stat file"); return total_time_array; } - unsigned long long user = 0, nice = 0, system = 0, idle = 0; - unsigned long long iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0; + uint64_t user = 0, nice = 0, system = 0, idle = 0; + uint64_t iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guestnice = 0; for (int i = 0; i < num_processors_; i++) { char buffer[1024]; - char* ret = fgets(buffer, sizeof(buffer) - 1, file); + char *ret = fgets(buffer, sizeof(buffer) - 1, file); if (ret == NULL) { perror("Could not read stat file"); fclose(file); @@ -163,7 +160,7 @@ SystemInfo::getTotalCpuTime(std::vector &work_time_array) } sscanf(buffer, - "cpu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu %16llu", + "cpu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu %16lu", &user, &nice, &system, &idle, &iowait, &irq, &softirq, &steal, &guest, &guestnice); work_time_array.push_back(user + nice + system); @@ -174,9 +171,6 @@ SystemInfo::getTotalCpuTime(std::vector &work_time_array) return total_time_array; } - - - double SystemInfo::CPUPercent() { if (!initialized_) Init(); @@ -186,11 +180,10 @@ SystemInfo::CPUPercent() { now = times(&time_sample); if (now <= last_cpu_ || time_sample.tms_stime < last_sys_cpu_ || - time_sample.tms_utime < last_user_cpu_){ + time_sample.tms_utime < last_user_cpu_) { //Overflow detection. Just skip this value. percent = -1.0; - } - else{ + } else { percent = (time_sample.tms_stime - last_sys_cpu_) + (time_sample.tms_utime - last_user_cpu_); percent /= (now - last_cpu_); @@ -203,12 +196,11 @@ SystemInfo::CPUPercent() { return percent; } - -std::vector +std::vector SystemInfo::GPUMemoryTotal() { // get GPU usage percent - if(!initialized_) Init(); - std::vector result; + if (!initialized_) Init(); + std::vector result; nvmlMemory_t nvmlMemory; for (int i = 0; i < num_device_; ++i) { nvmlDevice_t device; @@ -219,21 +211,22 @@ SystemInfo::GPUMemoryTotal() { return result; } -std::vector -SystemInfo::GPUTemperature(){ - if(!initialized_) Init(); - std::vector result; +std::vector +SystemInfo::GPUTemperature() { + if (!initialized_) Init(); + std::vector result; for (int i = 0; i < num_device_; i++) { nvmlDevice_t device; nvmlDeviceGetHandleByIndex(i, &device); unsigned int temp; - nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU,&temp); + nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp); result.push_back(temp); } return result; } + std::vector -SystemInfo::CPUTemperature(){ +SystemInfo::CPUTemperature() { std::vector result; for (int i = 0; i <= num_physical_processors_; ++i) { std::string path = "/sys/class/thermal/thermal_zone" + std::to_string(i) + "/temp"; @@ -247,15 +240,14 @@ SystemInfo::CPUTemperature(){ result.push_back(temp / 1000); fclose(file); } - } -std::vector +std::vector SystemInfo::GPUMemoryUsed() { // get GPU memory used - if(!initialized_) Init(); + if (!initialized_) Init(); - std::vector result; + std::vector result; nvmlMemory_t nvmlMemory; for (int i = 0; i < num_device_; ++i) { nvmlDevice_t device; @@ -266,42 +258,41 @@ SystemInfo::GPUMemoryUsed() { return result; } -std::pair -SystemInfo::Octets(){ +std::pair +SystemInfo::Octets() { pid_t pid = getpid(); // const std::string filename = "/proc/"+std::to_string(pid)+"/net/netstat"; const std::string filename = "/proc/net/netstat"; std::ifstream file(filename); std::string lastline = ""; std::string line = ""; - while(file){ + while (file) { getline(file, line); - if(file.fail()){ + if (file.fail()) { break; } lastline = line; } std::vector space_position; size_t space_pos = lastline.find(" "); - while(space_pos != std::string::npos){ + while (space_pos != std::string::npos) { space_position.push_back(space_pos); - space_pos = lastline.find(" ",space_pos+1); + space_pos = lastline.find(" ", space_pos + 1); } // InOctets is between 6th and 7th " " and OutOctets is between 7th and 8th " " - size_t inoctets_begin = space_position[6]+1; - size_t inoctets_length = space_position[7]-inoctets_begin; - size_t outoctets_begin = space_position[7]+1; - size_t outoctets_length = space_position[8]-outoctets_begin; - std::string inoctets = lastline.substr(inoctets_begin,inoctets_length); - std::string outoctets = lastline.substr(outoctets_begin,outoctets_length); + size_t inoctets_begin = space_position[6] + 1; + size_t inoctets_length = space_position[7] - inoctets_begin; + size_t outoctets_begin = space_position[7] + 1; + size_t outoctets_length = space_position[8] - outoctets_begin; + std::string inoctets = lastline.substr(inoctets_begin, inoctets_length); + std::string outoctets = lastline.substr(outoctets_begin, outoctets_length); - - unsigned long long inoctets_bytes = std::stoull(inoctets); - unsigned long long outoctets_bytes = std::stoull(outoctets); - std::pair res(inoctets_bytes, outoctets_bytes); + uint64_t inoctets_bytes = std::stoull(inoctets); + uint64_t outoctets_bytes = std::stoull(outoctets); + std::pair res(inoctets_bytes, outoctets_bytes); return res; } -} -} -} \ No newline at end of file +} // namespace server +} // namespace milvus +} // namespace zilliz diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index 9089c7cddb..802cbb0cce 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -18,19 +18,17 @@ #pragma once -#include "sys/types.h" -#include "sys/sysinfo.h" -#include "stdlib.h" -#include "stdio.h" -#include "string.h" -#include "sys/times.h" -#include "sys/vtimes.h" +#include +#include +#include +#include +#include +#include +#include #include - #include #include - - +#include namespace zilliz { namespace milvus { @@ -38,7 +36,7 @@ namespace server { class SystemInfo { private: - unsigned long total_ram_ = 0; + uint64_t total_ram_ = 0; clock_t last_cpu_ = clock_t(); clock_t last_sys_cpu_ = clock_t(); clock_t last_user_cpu_ = clock_t(); @@ -46,44 +44,71 @@ class SystemInfo { int num_processors_ = 0; int num_physical_processors_ = 0; //number of GPU - unsigned int num_device_ = 0; - unsigned long long in_octets_ = 0; - unsigned long long out_octets_ = 0; + uint32_t num_device_ = 0; + uint64_t in_octets_ = 0; + uint64_t out_octets_ = 0; bool initialized_ = false; public: static SystemInfo & - GetInstance(){ + GetInstance() { static SystemInfo instance; return instance; } void Init(); - int num_processor() const { return num_processors_;}; - int num_physical_processors() const { return num_physical_processors_; }; - int num_device() const {return num_device_;}; - unsigned long long get_inoctets() { return in_octets_;}; - unsigned long long get_octets() { return out_octets_;}; - std::chrono::system_clock::time_point get_nettime() { return net_time_;}; - void set_inoctets(unsigned long long value) { in_octets_ = value;}; - void set_outoctets(unsigned long long value) { out_octets_ = value;}; - void set_nettime() {net_time_ = std::chrono::system_clock::now();}; - long long ParseLine(char* line); - unsigned long GetPhysicalMemory(); - unsigned long GetProcessUsedMemory(); + + int num_processor() const { + return num_processors_; + } + + int num_physical_processors() const { + return num_physical_processors_; + } + + uint32_t num_device() const { + return num_device_; + } + + uint64_t get_inoctets() { + return in_octets_; + } + + uint64_t get_octets() { + return out_octets_; + } + + std::chrono::system_clock::time_point get_nettime() { + return net_time_; + } + + void set_inoctets(uint64_t value) { + in_octets_ = value; + } + + void set_outoctets(uint64_t value) { + out_octets_ = value; + } + + void set_nettime() { + net_time_ = std::chrono::system_clock::now(); + } + + uint64_t ParseLine(char *line); + uint64_t GetPhysicalMemory(); + uint64_t GetProcessUsedMemory(); double MemoryPercent(); double CPUPercent(); - std::pair Octets(); - std::vector GPUMemoryTotal(); - std::vector GPUMemoryUsed(); + std::pair Octets(); + std::vector GPUMemoryTotal(); + std::vector GPUMemoryUsed(); std::vector CPUCorePercent(); - std::vector getTotalCpuTime(std::vector &workTime); - std::vector GPUTemperature(); + std::vector getTotalCpuTime(std::vector &workTime); + std::vector GPUTemperature(); std::vector CPUTemperature(); - }; -} -} -} +} // namespace server +} // namespace milvus +} // namespace zilliz