From e9dac7521f8bdb814595f63fdd97f2910736f980 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Mon, 3 Jun 2019 15:50:04 +0800 Subject: [PATCH 1/8] fix CHANGELOG CONFLICTcqqq Former-commit-id: da059eb1ab2c0d4dac31e698e47a345b9baf916c --- cpp/src/metrics/SystemInfo.cpp | 7 +++++++ cpp/src/metrics/SystemInfo.h | 35 ++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 cpp/src/metrics/SystemInfo.cpp create mode 100644 cpp/src/metrics/SystemInfo.h diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp new file mode 100644 index 0000000000..ae3884fb1e --- /dev/null +++ b/cpp/src/metrics/SystemInfo.cpp @@ -0,0 +1,7 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ + +#include "SystemInfo.h" diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h new file mode 100644 index 0000000000..78ae1a5672 --- /dev/null +++ b/cpp/src/metrics/SystemInfo.h @@ -0,0 +1,35 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ + +#pragma once + +#include "sys/types.h" +#include "sys/sysinfo.h" + + +namespace zilliz { +namespace vecwise { +namespace server { + +class SystemInfo { + private: + + public: + static SystemInfo & + GetInstance(){ + static SystemInfo instance; + return instance; + } + + long long GetPhysicalMemory(); + + + +}; + +} +} +} From 24863f63e54d8f575e5020b6adc6465990267c9c Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 5 Jun 2019 15:45:48 +0800 Subject: [PATCH 2/8] all metrics are done Former-commit-id: 68275f6545ac62a28899f4b6bbb687365c52d52e --- cpp/src/CMakeLists.txt | 7 +- cpp/src/db/DBImpl.inl | 17 +- cpp/src/db/FaissExecutionEngine.inl | 5 +- cpp/src/db/MemManager.inl | 5 + cpp/src/metrics/MetricBase.h | 11 + cpp/src/metrics/PrometheusMetrics.cpp | 77 ++++++- cpp/src/metrics/PrometheusMetrics.h | 90 +++++++- cpp/src/metrics/SystemInfo.cpp | 204 ++++++++++++++++++ cpp/src/metrics/SystemInfo.h | 29 ++- cpp/src/server/MegasearchServer.cpp | 3 +- cpp/src/server/MegasearchThreadPoolServer.cpp | 19 ++ cpp/src/server/MegasearchThreadPoolServer.h | 32 +++ cpp/src/server/Server.cpp | 1 + cpp/src/thrift/megasearch.thrift | 2 +- cpp/unittest/CMakeLists.txt | 1 + 15 files changed, 485 insertions(+), 18 deletions(-) create mode 100644 cpp/src/server/MegasearchThreadPoolServer.cpp create mode 100644 cpp/src/server/MegasearchThreadPoolServer.h diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 86afe997dc..1ff02d8e43 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -33,7 +33,7 @@ set(service_files thrift/gen-cpp/MegasearchService.cpp thrift/gen-cpp/megasearch_constants.cpp thrift/gen-cpp/megasearch_types.cpp - ) + metrics/SystemInfo.cpp metrics/SystemInfo.h server/MegasearchThreadPoolServer.cpp server/MegasearchThreadPoolServer.h) set(vecwise_engine_files ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp @@ -51,6 +51,7 @@ include_directories(/usr/include) include_directories(/usr/local/cuda/include) include_directories(thrift/gen-cpp) + #target_link_libraries(megasearch boost_system_static) #target_link_libraries(megasearch boost_filesystem_static) #target_link_libraries(megasearch boost_serialization_static) @@ -92,6 +93,7 @@ set(third_party_libs snappy zlib zstd + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) if (GPU_VERSION STREQUAL "ON") @@ -109,6 +111,7 @@ if (GPU_VERSION STREQUAL "ON") libprometheus-cpp-push.a libprometheus-cpp-pull.a libprometheus-cpp-core.a + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) else() set(engine_libs @@ -122,6 +125,7 @@ else() libprometheus-cpp-push.a libprometheus-cpp-pull.a libprometheus-cpp-core.a + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) endif () @@ -175,7 +179,6 @@ set(server_libs liblz4.a dl metrics - ) add_executable(vecwise_server diff --git a/cpp/src/db/DBImpl.inl b/cpp/src/db/DBImpl.inl index ba0074ae2c..9103be9984 100644 --- a/cpp/src/db/DBImpl.inl +++ b/cpp/src/db/DBImpl.inl @@ -63,7 +63,7 @@ Status DBImpl::InsertVectors(const std::string& table_id_, // double average_time = double(time_span.count()) / n; double total_time = METRICS_MICROSECONDS(start_time,end_time); - double avg_time = total_time / n; + double avg_time = total_time / double(n); for (int i = 0; i < n; ++i) { server::Metrics::GetInstance().AddVectorsDurationHistogramOberve(avg_time); } @@ -85,13 +85,14 @@ Status DBImpl::Query(const std::string &table_id, size_t k, size_t nq, meta::DatesT dates = {meta::Meta::GetDate()}; Status result = Query(table_id, k, nq, vectors, dates, results); auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time,end_time); + auto total_time = METRICS_MICROSECONDS(start_time, end_time); auto average_time = total_time / nq; for (int i = 0; i < nq; ++i) { server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time); } server::Metrics::GetInstance().QueryVectorResponseSummaryObserve(average_time, nq); server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); + server::Metrics::GetInstance().QueryResponsePerSecondGaugeSet(1.0 / total_time); return result; } @@ -256,17 +257,23 @@ void DBImpl::StartTimerTasks(int interval) { template void DBImpl::BackgroundTimerTask(int interval) { Status status; + server::SystemInfo::GetInstance().Init(); while (true) { if (!bg_error_.ok()) break; if (shutting_down_.load(std::memory_order_acquire)) break; std::this_thread::sleep_for(std::chrono::seconds(interval)); - int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheUsage(); - LOG(DEBUG) << "Cache usage " << cache_total; - server::Metrics::GetInstance().CacheUsageGaugeSet(static_cast(cache_total)); + server::Metrics::GetInstance().KeepingAliveCounterIncrement(interval); + int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); + int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); + server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total); long size; Size(size); server::Metrics::GetInstance().DataFileSizeGaugeSet(size); + server::Metrics::GetInstance().CPUUsagePercentSet(); + server::Metrics::GetInstance().RAMUsagePercentSet(); + server::Metrics::GetInstance().GPUPercentGaugeSet(); + server::Metrics::GetInstance().GPUMemoryUsageGaugeSet(); TrySchedule(); } } diff --git a/cpp/src/db/FaissExecutionEngine.inl b/cpp/src/db/FaissExecutionEngine.inl index 8dd701d54d..0f22f41a7c 100644 --- a/cpp/src/db/FaissExecutionEngine.inl +++ b/cpp/src/db/FaissExecutionEngine.inl @@ -130,8 +130,11 @@ Status FaissExecutionEngine::Search(long n, long k, float *distances, long *labels) const { - + auto start_time = METRICS_NOW_TIME; pIndex_->search(n, data, k, distances, labels); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time,end_time); + server::Metrics::GetInstance().QueryIndexTypePerSecondSet(IndexTrait::BuildIndexType, double(n)/double(total_time)); return Status::OK(); } diff --git a/cpp/src/db/MemManager.inl b/cpp/src/db/MemManager.inl index 528622795d..b4ffc1b8db 100644 --- a/cpp/src/db/MemManager.inl +++ b/cpp/src/db/MemManager.inl @@ -31,8 +31,12 @@ MemVectors::MemVectors(const std::shared_ptr& meta_ptr, template void MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { + auto start_time = METRICS_NOW_TIME; pIdGenerator_->GetNextIDNumbers(n_, vector_ids_); pEE_->AddWithIds(n_, vectors_, vector_ids_.data()); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(schema_.dimension), total_time); } template @@ -107,6 +111,7 @@ Status MemManager::InsertVectors(const std::string& table_id_, const float* vectors_, IDNumbers& vector_ids_) { std::unique_lock lock(mutex_); + return InsertVectorsNoLock(table_id_, n_, vectors_, vector_ids_); } diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index fae4b084e3..96dcf22ed6 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -8,6 +8,7 @@ #include "utils/Error.h" #include "server/ServerConfig.h" +#include "SystemInfo.h" namespace zilliz { namespace vecwise { @@ -71,6 +72,16 @@ class MetricsBase{ virtual void AddVectorsFailGaugeSet(double value) {}; virtual void QueryVectorResponseSummaryObserve(double value, int count = 1) {}; virtual void QueryVectorResponsePerSecondGaugeSet(double value) {}; + virtual void CPUUsagePercentSet() {}; + virtual void RAMUsagePercentSet() {}; + virtual void QueryResponsePerSecondGaugeSet(double value) {}; + virtual void GPUPercentGaugeSet() {}; + virtual void GPUMemoryUsageGaugeSet() {}; + virtual void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) {}; + virtual void QueryIndexTypePerSecondSet(std::string type, double value) {}; + virtual void ConnectionGaugeIncrement() {}; + virtual void ConnectionGaugeDecrement() {}; + virtual void KeepingAliveCounterIncrement(double value = 1) {}; }; diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 7b93a33e9b..39462e80ea 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -5,6 +5,7 @@ ******************************************************************************/ #include "PrometheusMetrics.h" +#include "SystemInfo.h" namespace zilliz { @@ -25,10 +26,84 @@ PrometheusMetrics::Init() { // Exposer Registry exposer_ptr_->RegisterCollectable(registry_); - return SERVER_SUCCESS; + } +void +PrometheusMetrics::CPUUsagePercentSet() { + if(!startup_) return ; + double usage_percent = server::SystemInfo::GetInstance().CPUPercent(); + CPU_usage_percent_.Set(usage_percent); +} + +void +PrometheusMetrics::RAMUsagePercentSet() { + if(!startup_) return ; + double usage_percent = server::SystemInfo::GetInstance().MemoryPercent(); + RAM_usage_percent_.Set(usage_percent); +} + +void +PrometheusMetrics::GPUPercentGaugeSet() { + if(!startup_) return; + int numDevide = server::SystemInfo::GetInstance().NumDevice(); + std::vector values = server::SystemInfo::GetInstance().GPUPercent(); + if(numDevide >= 1) GPU0_percent_gauge_.Set(static_cast(values[0])); + if(numDevide >= 2) GPU1_percent_gauge_.Set(static_cast(values[1])); + if(numDevide >= 3) GPU2_percent_gauge_.Set(static_cast(values[2])); + if(numDevide >= 4) GPU3_percent_gauge_.Set(static_cast(values[3])); + if(numDevide >= 5) GPU4_percent_gauge_.Set(static_cast(values[4])); + if(numDevide >= 6) GPU5_percent_gauge_.Set(static_cast(values[5])); + if(numDevide >= 7) GPU6_percent_gauge_.Set(static_cast(values[6])); + if(numDevide >= 8) GPU7_percent_gauge_.Set(static_cast(values[7])); + + // to do +} + +void PrometheusMetrics::GPUMemoryUsageGaugeSet() { + if(!startup_) return; + std::vector values = server::SystemInfo::GetInstance().GPUMemoryUsed(); + unsigned long long MtoB = 1024*1024; + int numDevice = values.size(); + if(numDevice >=1) GPU0_memory_usage_gauge_.Set(values[0]/MtoB); + if(numDevice >=2) GPU1_memory_usage_gauge_.Set(values[1]/MtoB); + if(numDevice >=3) GPU2_memory_usage_gauge_.Set(values[2]/MtoB); + if(numDevice >=4) GPU3_memory_usage_gauge_.Set(values[3]/MtoB); + if(numDevice >=5) GPU4_memory_usage_gauge_.Set(values[4]/MtoB); + if(numDevice >=6) GPU5_memory_usage_gauge_.Set(values[5]/MtoB); + if(numDevice >=7) GPU6_memory_usage_gauge_.Set(values[6]/MtoB); + if(numDevice >=8) GPU7_memory_usage_gauge_.Set(values[7]/MtoB); + + // to do +} +void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) { + // MB/s + if(!startup_) return; + + long long MtoB = 1024*1024; + long long size = num_vector * dim * 4; + add_vectors_per_second_gauge_.Set(size/time/MtoB); + +} +void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) { + if(type == "IVF"){ + query_index_IVF_type_per_second_gauge_.Set(value); + } else if(type == "IDMap"){ + query_index_IDMAP_type_per_second_gauge_.Set(value); + } + +} +void PrometheusMetrics::ConnectionGaugeIncrement() { + if(!startup_) return; + connection_gauge_.Increment(); +} +void PrometheusMetrics::ConnectionGaugeDecrement() { + if(!startup_) return; + connection_gauge_.Decrement(); +} + + } } } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index cfb127968f..c23c137fe8 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -104,9 +104,16 @@ class PrometheusMetrics: public MetricsBase { void AddVectorsFailGaugeSet(double value) override { if(startup_) add_vectors_fail_gauge_.Set(value);}; void QueryVectorResponseSummaryObserve(double value, int count = 1) override { if (startup_) for(int i = 0 ; i < count ; ++i) query_vector_response_summary_.Observe(value);}; void QueryVectorResponsePerSecondGaugeSet(double value) override {if (startup_) query_vector_response_per_second_gauge_.Set(value);}; - - - + void CPUUsagePercentSet() override ; + void RAMUsagePercentSet() override ; + void QueryResponsePerSecondGaugeSet(double value) override {if(startup_) query_response_per_second_gauge.Set(value);}; + void GPUPercentGaugeSet() override ; + void GPUMemoryUsageGaugeSet() override ; + void AddVectorsPerSecondGaugeSet(int num_vector, int dim, double time) override ; + void QueryIndexTypePerSecondSet(std::string type, double value) override ; + void ConnectionGaugeIncrement() override ; + void ConnectionGaugeDecrement() override ; + void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);}; // prometheus::Counter &connection_total() {return connection_total_; } // @@ -273,7 +280,7 @@ class PrometheusMetrics: public MetricsBase { .Name("build_index_duration_microseconds") .Help("histogram of processing time for building index") .Register(*registry_); - prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7}); + prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{5e5, 2e6, 4e6, 6e6, 8e6, 1e7}); //record processing time for all building index @@ -414,6 +421,12 @@ class PrometheusMetrics: public MetricsBase { .Register(*registry_); prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({}); + prometheus::Family &query_response_per_second_ = prometheus::BuildGauge() + .Name("query_response_per_microsecond") + .Help("the number of queries can be processed every microsecond") + .Register(*registry_); + prometheus::Gauge &query_response_per_second_gauge = query_response_per_second_.Add({}); + prometheus::Family &disk_store_IO_speed_ = prometheus::BuildGauge() .Name("disk_store_IO_speed_bytes_per_microseconds") .Help("disk_store_IO_speed") @@ -433,6 +446,75 @@ class PrometheusMetrics: public MetricsBase { prometheus::Gauge &add_vectors_success_gauge_ = add_vectors_.Add({{"outcome", "success"}}); prometheus::Gauge &add_vectors_fail_gauge_ = add_vectors_.Add({{"outcome", "fail"}}); + prometheus::Family &add_vectors_per_second_ = prometheus::BuildGauge() + .Name("add_vectors_throughput_per_microsecond") + .Help("add vectors throughput per microsecond") + .Register(*registry_); + prometheus::Gauge &add_vectors_per_second_gauge_ = add_vectors_per_second_.Add({}); + + prometheus::Family &CPU_ = prometheus::BuildGauge() + .Name("CPU_usage_percent") + .Help("CPU usage percent by this this process") + .Register(*registry_); + prometheus::Gauge &CPU_usage_percent_ = CPU_.Add({}); + + prometheus::Family &RAM_ = prometheus::BuildGauge() + .Name("RAM_usage_percent") + .Help("RAM usage percent by this process") + .Register(*registry_); + prometheus::Gauge &RAM_usage_percent_ = RAM_.Add({}); + + //GPU Usage Percent + prometheus::Family &GPU_percent_ = prometheus::BuildGauge() + .Name("Gpu_usage_percent") + .Help("GPU_usage_percent ") + .Register(*registry_); + prometheus::Gauge &GPU0_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "0"}}); + prometheus::Gauge &GPU1_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "1"}}); + prometheus::Gauge &GPU2_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "2"}}); + prometheus::Gauge &GPU3_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "3"}}); + prometheus::Gauge &GPU4_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "4"}}); + prometheus::Gauge &GPU5_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "5"}}); + prometheus::Gauge &GPU6_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "6"}}); + prometheus::Gauge &GPU7_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "7"}}); + + + + + //GPU Mempry used + prometheus::Family &GPU_memory_usage_ = prometheus::BuildGauge() + .Name("GPU_memory_usage_total") + .Help("GPU memory usage total ") + .Register(*registry_); + prometheus::Gauge &GPU0_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "0"}}); + prometheus::Gauge &GPU1_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "1"}}); + prometheus::Gauge &GPU2_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "2"}}); + prometheus::Gauge &GPU3_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "3"}}); + prometheus::Gauge &GPU4_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "4"}}); + prometheus::Gauge &GPU5_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "5"}}); + prometheus::Gauge &GPU6_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "6"}}); + prometheus::Gauge &GPU7_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "7"}}); + + prometheus::Family &query_index_type_per_second_ = prometheus::BuildGauge() + .Name("query_index_throughtout_per_microsecond") + .Help("query index throughtout per microsecond") + .Register(*registry_); + prometheus::Gauge &query_index_IVF_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IVF"}}); + prometheus::Gauge &query_index_IDMAP_type_per_second_gauge_ = query_index_type_per_second_.Add({{"IndexType","IDMAP"}}); + + prometheus::Family &connection_ = prometheus::BuildGauge() + .Name("connection_number") + .Help("the number of connections") + .Register(*registry_); + prometheus::Gauge &connection_gauge_ = connection_.Add({}); + + prometheus::Family &keeping_alive_ = prometheus::BuildCounter() + .Name("keeping_alive_seconds_total") + .Help("total seconds of the serve alive") + .Register(*registry_); + prometheus::Counter &keeping_alive_counter_ = keeping_alive_.Add({}); + + }; diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index ae3884fb1e..24c6688786 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -5,3 +5,207 @@ ******************************************************************************/ #include "SystemInfo.h" + +#include +#include +#include +#include +#include "nvml.h" +//#include +// +//std::mutex mutex; + + +namespace zilliz { +namespace vecwise { +namespace server { + +void SystemInfo::Init() { + if(initialized) return; +// mutex.lock(); + initialized = true; +// mutex.unlock(); + // initialize CPU information + FILE* file; + struct tms timeSample; + char line[128]; + lastCPU_ = times(&timeSample); + lastSysCPU_ = timeSample.tms_stime; + lastUserCPU_ = timeSample.tms_utime; + file = fopen("/proc/cpuinfo", "r"); + numProcessors = 0; + while(fgets(line, 128, file) != NULL){ + if (strncmp(line, "processor", 9) == 0) numProcessors++; + } + total_RAM_ = GetPhysicalMemory(); + fclose(file); + + //initialize GPU information + nvmlReturn_t nvmlresult; + nvmlresult = nvmlInit(); + if(NVML_SUCCESS != nvmlresult) { + printf("System information initilization failed"); + return ; + } + nvmlresult = nvmlDeviceGetCount(&numDevice); + if(NVML_SUCCESS != nvmlresult) { + printf("Unable to get devidce number"); + return ; + } + +} + +long long +SystemInfo::parseLine(char *line) { + // This assumes that a digit will be found and the line ends in " Kb". + int i = strlen(line); + const char *p = line; + while (*p < '0' || *p > '9') p++; + line[i - 3] = '\0'; + i = atoi(p); + return static_cast(i); +} + +unsigned long +SystemInfo::GetPhysicalMemory() { + struct sysinfo memInfo; + sysinfo (&memInfo); + unsigned long totalPhysMem = memInfo.totalram; + //Multiply in next statement to avoid int overflow on right hand side... + totalPhysMem *= memInfo.mem_unit; + return totalPhysMem; +} + +unsigned long +SystemInfo::GetProcessUsedMemory() { + //Note: this value is in KB! + FILE* file = fopen("/proc/self/status", "r"); + long long result = -1; + char line[128]; + + while (fgets(line, 128, file) != NULL){ + if (strncmp(line, "VmRSS:", 6) == 0){ + result = parseLine(line); + break; + } + } + fclose(file); +// printf("RAM is %d",result); + // return value in Byte + return (result*1024); + +} + +double +SystemInfo::MemoryPercent() { + if (!initialized) Init(); + return GetProcessUsedMemory()*100/total_RAM_; +} + +double +SystemInfo::CPUPercent() { + if (!initialized) Init(); + struct tms timeSample; + clock_t now; + double percent; + + now = times(&timeSample); + if (now <= lastCPU_ || timeSample.tms_stime < lastSysCPU_ || + timeSample.tms_utime < lastUserCPU_){ + //Overflow detection. Just skip this value. + percent = -1.0; + } + else{ + percent = (timeSample.tms_stime - lastSysCPU_) + + (timeSample.tms_utime - lastUserCPU_); + percent /= (now - lastCPU_); + percent /= numProcessors; + percent *= 100; + } + lastCPU_ = now; + lastSysCPU_ = timeSample.tms_stime; + lastUserCPU_ = timeSample.tms_utime; + + return percent; +} + +std::unordered_map> +SystemInfo::GetGPUMemPercent(){ + // return GPUID: MEM% + + //write GPU info to a file + system("nvidia-smi pmon -c 1 > GPUInfo.txt"); + int pid = (int)getpid(); + + //parse line + std::ifstream read_file; + read_file.open("GPUInfo.txt"); + std::string line; + while(getline(read_file, line)){ + std::vector words = split(line); + // 0 1 2 3 4 5 6 7 + //words stand for gpuindex, pid, type, sm, mem, enc, dec, command respectively + if(std::stoi(words[1]) != pid) continue; + int GPUindex = std::stoi(words[0]); + double sm_percent = std::stod(words[3]); + double mem_percent = std::stod(words[4]); + + } + +} + +std::vector +SystemInfo::split(std::string input) { + std::vector words; + input += " "; + int word_start = 0; + for (int i = 0; i < input.size(); ++i) { + if(input[i] != ' ') continue; + if(input[i] == ' ') { + word_start = i + 1; + continue; + } + words.push_back(input.substr(word_start,i-word_start)); + } + return words; +} + +std::vector +SystemInfo::GPUPercent() { + // get GPU usage percent + if(!initialized) Init(); + std::vector result; + nvmlUtilization_t utilization; + for (int i = 0; i < numDevice; ++i) { + nvmlDevice_t device; + nvmlDeviceGetHandleByIndex(i, &device); + nvmlDeviceGetUtilizationRates(device, &utilization); + result.push_back(utilization.gpu); + } + return result; +// nvmlDevice_t device; +// nvmlUtilization_t utilization; +// nvmlDeviceGetHandleByIndex(device_index, &device); +// nvmlDeviceGetUtilizationRates(device, &utilization); +// return utilization.gpu; +} + +std::vector +SystemInfo::GPUMemoryUsed() { + // get GPU memory used + if(!initialized) Init(); + + std::vector result; + nvmlMemory_t nvmlMemory; + for (int i = 0; i < numDevice; ++i) { + nvmlDevice_t device; + nvmlDeviceGetHandleByIndex(i, &device); + nvmlDeviceGetMemoryInfo(device, &nvmlMemory); + result.push_back(nvmlMemory.used); + } + return result; +} + +} +} +} \ No newline at end of file diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index 78ae1a5672..8d2eeb131e 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -8,6 +8,15 @@ #include "sys/types.h" #include "sys/sysinfo.h" +#include "stdlib.h" +#include "stdio.h" +#include "string.h" +#include "sys/times.h" +#include "sys/vtimes.h" + +#include +#include + namespace zilliz { @@ -16,6 +25,12 @@ namespace server { class SystemInfo { private: + unsigned long total_RAM_ ; + clock_t lastCPU_, lastSysCPU_, lastUserCPU_; + int numProcessors; + //number of GPU + unsigned int numDevice; + bool initialized = false; public: static SystemInfo & @@ -24,9 +39,17 @@ class SystemInfo { return instance; } - long long GetPhysicalMemory(); - - + void Init(); + int NumDevice() {return numDevice;}; + long long parseLine(char* line); + unsigned long GetPhysicalMemory(); + unsigned long GetProcessUsedMemory(); + double MemoryPercent(); + double CPUPercent(); + std::unordered_map> GetGPUMemPercent(); + std::vector split(std::string input); + std::vector GPUPercent(); + std::vector GPUMemoryUsed(); }; diff --git a/cpp/src/server/MegasearchServer.cpp b/cpp/src/server/MegasearchServer.cpp index f771fc4dd8..459402c879 100644 --- a/cpp/src/server/MegasearchServer.cpp +++ b/cpp/src/server/MegasearchServer.cpp @@ -8,6 +8,7 @@ #include "megasearch_types.h" #include "megasearch_constants.h" #include "ServerConfig.h" +#include "MegasearchThreadPoolServer.h" #include #include @@ -76,7 +77,7 @@ MegasearchServer::StartService() { threadManager->threadFactory(threadFactory); threadManager->start(); - s_server.reset(new TThreadPoolServer(processor, + s_server.reset(new MegasearchThreadPoolServer(processor, server_transport, transport_factory, protocol_factory, diff --git a/cpp/src/server/MegasearchThreadPoolServer.cpp b/cpp/src/server/MegasearchThreadPoolServer.cpp new file mode 100644 index 0000000000..4f3a92e021 --- /dev/null +++ b/cpp/src/server/MegasearchThreadPoolServer.cpp @@ -0,0 +1,19 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#include "metrics/Metrics.h" + + +#include "MegasearchThreadPoolServer.h" + + +void zilliz::vecwise::server::MegasearchThreadPoolServer::onClientConnected(const std::shared_ptr &pClient) { + server::Metrics::GetInstance().ConnectionGaugeIncrement(); + TThreadPoolServer::onClientConnected(pClient); +} +void zilliz::vecwise::server::MegasearchThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) { + server::Metrics::GetInstance().ConnectionGaugeDecrement(); + TThreadPoolServer::onClientDisconnected(pClient); +} diff --git a/cpp/src/server/MegasearchThreadPoolServer.h b/cpp/src/server/MegasearchThreadPoolServer.h new file mode 100644 index 0000000000..03eb3b608a --- /dev/null +++ b/cpp/src/server/MegasearchThreadPoolServer.h @@ -0,0 +1,32 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ + +#pragma once + +#include + + +namespace zilliz { +namespace vecwise { +namespace server { + +class MegasearchThreadPoolServer : public apache::thrift::server::TThreadPoolServer { + public: + MegasearchThreadPoolServer( + const std::shared_ptr& processor, + const std::shared_ptr& serverTransport, + const std::shared_ptr& transportFactory, + const std::shared_ptr& protocolFactory, + const std::shared_ptr& threadManager + = apache::thrift::concurrency::ThreadManager::newSimpleThreadManager()); + protected: + void onClientConnected(const std::shared_ptr& pClient) override ; + void onClientDisconnected(apache::thrift::server::TConnectedClient* pClient) override ; +}; + +} +} +} \ No newline at end of file diff --git a/cpp/src/server/Server.cpp b/cpp/src/server/Server.cpp index 5829f3e6a1..67fbfeb687 100644 --- a/cpp/src/server/Server.cpp +++ b/cpp/src/server/Server.cpp @@ -175,6 +175,7 @@ Server::Start() { signal(SIGHUP, SignalUtil::HandleSignal); signal(SIGTERM, SignalUtil::HandleSignal); server::Metrics::GetInstance().Init(); + server::SystemInfo::GetInstance().Init(); SERVER_LOG_INFO << "Vecwise server is running..."; StartService(); diff --git a/cpp/src/thrift/megasearch.thrift b/cpp/src/thrift/megasearch.thrift index 800faf5db8..07fcfedfe8 100644 --- a/cpp/src/thrift/megasearch.thrift +++ b/cpp/src/thrift/megasearch.thrift @@ -34,7 +34,7 @@ exception Exception { * @brief Table column description */ struct Column { - 1: required i32 type; ///< Column Type: 0:invealid/1:int8/2:int16/3:int32/4:int64/5:float32/6:float64/7:date/8:vector + 1: required i32 type; ///< Column Type: 0:invalid/1:int8/2:int16/3:int32/4:int64/5:float32/6:float64/7:date/8:vector 2: required string name; ///< Column name } diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index addce57da1..4db6674272 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -32,6 +32,7 @@ set(unittest_libs civetweb dl z + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so ) add_subdirectory(server) From 07b85d3b38a820cc77799a4477ab2bc3b65f0180 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Wed, 5 Jun 2019 15:50:39 +0800 Subject: [PATCH 3/8] add ms-59 in changelog Former-commit-id: 86e7f0cbbae12051c4c68b2f4ff7741007aed765 --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 47a5ebf6e4..cf501e00d9 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -39,6 +39,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-37 - Add query, cache usage, disk write speed and file data size metrics - MS-30 - Use faiss v1.5.2 - MS-54 - cmake: Change Thrift third party URL to github.com +- MS-59 - prometheus: add all proposed metrics ## Task From c55a70dc9423fa1a32deec51def6ad020d468741 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Mon, 10 Jun 2019 10:33:24 +0800 Subject: [PATCH 4/8] fix codestyle Former-commit-id: 10caa1a57e07115fd3eab3f3a23eaebda6b8ecf2 --- cpp/src/metrics/PrometheusMetrics.cpp | 1 + cpp/src/metrics/SystemInfo.cpp | 66 ++++++++++++--------------- cpp/src/metrics/SystemInfo.h | 14 +++--- 3 files changed, 39 insertions(+), 42 deletions(-) diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 39462e80ea..ae45a22fea 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -87,6 +87,7 @@ void PrometheusMetrics::AddVectorsPerSecondGaugeSet(int num_vector, int dim, dou } void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double value) { + if(!startup_) return; if(type == "IVF"){ query_index_IVF_type_per_second_gauge_.Set(value); } else if(type == "IDMap"){ diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index 24c6688786..a90e206a93 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -21,23 +21,23 @@ namespace vecwise { namespace server { void SystemInfo::Init() { - if(initialized) return; -// mutex.lock(); - initialized = true; -// mutex.unlock(); + if(initialized_) return; + + initialized_ = true; + // initialize CPU information FILE* file; - struct tms timeSample; + struct tms time_sample; char line[128]; - lastCPU_ = times(&timeSample); - lastSysCPU_ = timeSample.tms_stime; - lastUserCPU_ = timeSample.tms_utime; + last_cpu_ = times(&time_sample); + last_sys_cpu_ = time_sample.tms_stime; + last_user_cpu_ = time_sample.tms_utime; file = fopen("/proc/cpuinfo", "r"); - numProcessors = 0; + num_processors_ = 0; while(fgets(line, 128, file) != NULL){ - if (strncmp(line, "processor", 9) == 0) numProcessors++; + if (strncmp(line, "processor", 9) == 0) num_processors_++; } - total_RAM_ = GetPhysicalMemory(); + total_ram_ = GetPhysicalMemory(); fclose(file); //initialize GPU information @@ -47,7 +47,7 @@ void SystemInfo::Init() { printf("System information initilization failed"); return ; } - nvmlresult = nvmlDeviceGetCount(&numDevice); + nvmlresult = nvmlDeviceGetCount(&num_device_); if(NVML_SUCCESS != nvmlresult) { printf("Unable to get devidce number"); return ; @@ -90,7 +90,6 @@ SystemInfo::GetProcessUsedMemory() { } } fclose(file); -// printf("RAM is %d",result); // return value in Byte return (result*1024); @@ -98,33 +97,33 @@ SystemInfo::GetProcessUsedMemory() { double SystemInfo::MemoryPercent() { - if (!initialized) Init(); - return GetProcessUsedMemory()*100/total_RAM_; + if (!initialized_) Init(); + return GetProcessUsedMemory()*100/total_ram_; } double SystemInfo::CPUPercent() { - if (!initialized) Init(); - struct tms timeSample; + if (!initialized_) Init(); + struct tms time_sample; clock_t now; double percent; - now = times(&timeSample); - if (now <= lastCPU_ || timeSample.tms_stime < lastSysCPU_ || - timeSample.tms_utime < lastUserCPU_){ + now = times(&time_sample); + if (now <= last_cpu_ || time_sample.tms_stime < last_sys_cpu_ || + time_sample.tms_utime < last_user_cpu_){ //Overflow detection. Just skip this value. percent = -1.0; } else{ - percent = (timeSample.tms_stime - lastSysCPU_) + - (timeSample.tms_utime - lastUserCPU_); - percent /= (now - lastCPU_); - percent /= numProcessors; + percent = (time_sample.tms_stime - last_sys_cpu_) + + (time_sample.tms_utime - last_user_cpu_); + percent /= (now - last_cpu_); + percent /= num_processors_; percent *= 100; } - lastCPU_ = now; - lastSysCPU_ = timeSample.tms_stime; - lastUserCPU_ = timeSample.tms_utime; + last_cpu_ = now; + last_sys_cpu_ = time_sample.tms_stime; + last_user_cpu_ = time_sample.tms_utime; return percent; } @@ -173,31 +172,26 @@ SystemInfo::split(std::string input) { std::vector SystemInfo::GPUPercent() { // get GPU usage percent - if(!initialized) Init(); + if(!initialized_) Init(); std::vector result; nvmlUtilization_t utilization; - for (int i = 0; i < numDevice; ++i) { + for (int i = 0; i < num_device_; ++i) { nvmlDevice_t device; nvmlDeviceGetHandleByIndex(i, &device); nvmlDeviceGetUtilizationRates(device, &utilization); result.push_back(utilization.gpu); } return result; -// nvmlDevice_t device; -// nvmlUtilization_t utilization; -// nvmlDeviceGetHandleByIndex(device_index, &device); -// nvmlDeviceGetUtilizationRates(device, &utilization); -// return utilization.gpu; } std::vector SystemInfo::GPUMemoryUsed() { // get GPU memory used - if(!initialized) Init(); + if(!initialized_) Init(); std::vector result; nvmlMemory_t nvmlMemory; - for (int i = 0; i < numDevice; ++i) { + for (int i = 0; i < num_device_; ++i) { nvmlDevice_t device; nvmlDeviceGetHandleByIndex(i, &device); nvmlDeviceGetMemoryInfo(device, &nvmlMemory); diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index 8d2eeb131e..d173f4f3af 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -25,12 +25,14 @@ namespace server { class SystemInfo { private: - unsigned long total_RAM_ ; - clock_t lastCPU_, lastSysCPU_, lastUserCPU_; - int numProcessors; + unsigned long total_ram_ = 0; + clock_t last_cpu_ = clock_t(); + clock_t last_sys_cpu_ = clock_t(); + clock_t last_user_cpu_ = clock_t(); + int num_processors_ = 0; //number of GPU - unsigned int numDevice; - bool initialized = false; + unsigned int num_device_ = 0; + bool initialized_ = false; public: static SystemInfo & @@ -40,7 +42,7 @@ class SystemInfo { } void Init(); - int NumDevice() {return numDevice;}; + int NumDevice() {return num_device_;}; long long parseLine(char* line); unsigned long GetPhysicalMemory(); unsigned long GetProcessUsedMemory(); From 73a1d26e408dc7fc13fc43f628d6f6b8320e262e Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Mon, 10 Jun 2019 15:56:20 +0800 Subject: [PATCH 5/8] fix conflict Former-commit-id: fc062bd7f5818169bd1e3fac750ecf4b016d5d80 --- cpp/src/CMakeLists.txt | 6 +++++- cpp/src/db/FaissExecutionEngine.cpp | 2 +- cpp/src/db/MemManager.cpp | 2 +- cpp/src/server/MegasearchThreadPoolServer.cpp | 8 ++++++++ cpp/src/server/MegasearchThreadPoolServer.h | 1 + 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 2c1219b506..5669bd3f2d 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -34,7 +34,11 @@ set(service_files thrift/gen-cpp/MegasearchService.cpp thrift/gen-cpp/megasearch_constants.cpp thrift/gen-cpp/megasearch_types.cpp - metrics/SystemInfo.cpp metrics/SystemInfo.h server/MegasearchThreadPoolServer.cpp server/MegasearchThreadPoolServer.h) + metrics/SystemInfo.cpp + metrics/SystemInfo.h + server/MegasearchThreadPoolServer.cpp + server/MegasearchThreadPoolServer.h + ) set(vecwise_engine_files ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index c676ac74b2..65bdeead1f 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -135,7 +135,7 @@ Status FaissExecutionEngine::Search(long n, pIndex_->search(n, data, k, distances, labels); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time,end_time); - server::Metrics::GetInstance().QueryIndexTypePerSecondSet(IndexTrait::BuildIndexType, double(n)/double(total_time)); + server::Metrics::GetInstance().QueryIndexTypePerSecondSet(build_index_type_, double(n)/double(total_time)); return Status::OK(); } diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index a7d639d655..9bae4c9f21 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -34,7 +34,7 @@ void MemVectors::Add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { pEE_->AddWithIds(n_, vectors_, vector_ids_.data()); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(schema_.dimension), total_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), static_cast(schema_.dimension_), total_time); } size_t MemVectors::Total() const { diff --git a/cpp/src/server/MegasearchThreadPoolServer.cpp b/cpp/src/server/MegasearchThreadPoolServer.cpp index 4f3a92e021..f7ce0d4f72 100644 --- a/cpp/src/server/MegasearchThreadPoolServer.cpp +++ b/cpp/src/server/MegasearchThreadPoolServer.cpp @@ -17,3 +17,11 @@ void zilliz::vecwise::server::MegasearchThreadPoolServer::onClientDisconnected(a server::Metrics::GetInstance().ConnectionGaugeDecrement(); TThreadPoolServer::onClientDisconnected(pClient); } +zilliz::vecwise::server::MegasearchThreadPoolServer::MegasearchThreadPoolServer(const std::shared_ptr &processor, + const std::shared_ptr &serverTransport, + const std::shared_ptr &transportFactory, + const std::shared_ptr &protocolFactory, + const std::shared_ptr &threadManager) + : TThreadPoolServer(processor, serverTransport, transportFactory, protocolFactory, threadManager) { + +} diff --git a/cpp/src/server/MegasearchThreadPoolServer.h b/cpp/src/server/MegasearchThreadPoolServer.h index 03eb3b608a..309c17ef3f 100644 --- a/cpp/src/server/MegasearchThreadPoolServer.h +++ b/cpp/src/server/MegasearchThreadPoolServer.h @@ -22,6 +22,7 @@ class MegasearchThreadPoolServer : public apache::thrift::server::TThreadPoolSer const std::shared_ptr& protocolFactory, const std::shared_ptr& threadManager = apache::thrift::concurrency::ThreadManager::newSimpleThreadManager()); + protected: void onClientConnected(const std::shared_ptr& pClient) override ; void onClientDisconnected(apache::thrift::server::TConnectedClient* pClient) override ; From c74d368640c0d571c2bbfc253666d7fd62a465b7 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Mon, 10 Jun 2019 19:16:06 +0800 Subject: [PATCH 6/8] CHANGELOG Former-commit-id: 22edc2371e9fc3ca684b271f5d3766cd91c8196b --- cpp/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 1be1421972..8461041307 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -43,7 +43,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-37 - Add query, cache usage, disk write speed and file data size metrics - MS-30 - Use faiss v1.5.2 - MS-54 - cmake: Change Thrift third party URL to github.com -- MS-59 - prometheus: add all proposed metrics +- MS-69 - prometheus: add all proposed metrics ## Task From 4875bdc4b67cc21986baf9fcb62ae0649ad5549f Mon Sep 17 00:00:00 2001 From: zhiru Date: Tue, 11 Jun 2019 11:47:17 +0800 Subject: [PATCH 7/8] Remove rocksdb from third-party Former-commit-id: 5f7439e7a2152d5b6b9a8dd82ff54a091ea729ec --- cpp/CHANGELOG.md | 1 + cpp/cmake/DefineOptions.cmake | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 34d80abbbf..9a9310e7e2 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -15,6 +15,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-64 - Different table can have different index type - MS-52 - Return search score - MS-66 - Support time range query +- MS-68 - Remove rocksdb from third-party ## Task diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index ce2e4ae6be..cc358e7f1e 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -81,7 +81,7 @@ define_option(MEGASEARCH_WITH_OPENBLAS "Build with OpenBLAS library" ON) define_option(MEGASEARCH_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) -define_option(MEGASEARCH_WITH_ROCKSDB "Build with RocksDB library" ON) +define_option(MEGASEARCH_WITH_ROCKSDB "Build with RocksDB library" OFF) define_option(MEGASEARCH_WITH_SNAPPY "Build with Snappy compression" ON) From 9cb66ac8d987c61313e5744e35bac5a08d32e390 Mon Sep 17 00:00:00 2001 From: yu yunfeng Date: Tue, 11 Jun 2019 14:10:22 +0800 Subject: [PATCH 8/8] fix Former-commit-id: 00af0b4a367be2dfb26fff09105482364fd56a8e --- cpp/src/metrics/PrometheusMetrics.cpp | 28 +++++- cpp/src/metrics/PrometheusMetrics.h | 44 ++------- cpp/src/metrics/SystemInfo.cpp | 90 ++++++++++--------- cpp/src/metrics/SystemInfo.h | 8 +- cpp/src/server/MegasearchThreadPoolServer.cpp | 15 +++- cpp/src/thrift/megasearch.thrift | 2 +- 6 files changed, 93 insertions(+), 94 deletions(-) diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index e5db5a9264..8672817428 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -37,6 +37,7 @@ PrometheusMetrics::Init() { } + void PrometheusMetrics::CPUUsagePercentSet() { if(!startup_) return ; @@ -54,8 +55,11 @@ PrometheusMetrics::RAMUsagePercentSet() { void PrometheusMetrics::GPUPercentGaugeSet() { if(!startup_) return; - int numDevide = server::SystemInfo::GetInstance().NumDevice(); + int numDevide = server::SystemInfo::GetInstance().num_device(); std::vector values = server::SystemInfo::GetInstance().GPUPercent(); +// for (int i = 0; i < numDevide; ++i) { +// GPU_percent_gauges_[i].Set(static_cast(values[i])); +// } if(numDevide >= 1) GPU0_percent_gauge_.Set(static_cast(values[0])); if(numDevide >= 2) GPU1_percent_gauge_.Set(static_cast(values[1])); if(numDevide >= 3) GPU2_percent_gauge_.Set(static_cast(values[2])); @@ -70,9 +74,13 @@ PrometheusMetrics::GPUPercentGaugeSet() { void PrometheusMetrics::GPUMemoryUsageGaugeSet() { if(!startup_) return; + int numDevide = server::SystemInfo::GetInstance().num_device(); std::vector values = server::SystemInfo::GetInstance().GPUMemoryUsed(); - unsigned long long MtoB = 1024*1024; + constexpr unsigned long long MtoB = 1024*1024; int numDevice = values.size(); +// for (int i = 0; i < numDevice; ++i) { +// GPU_memory_usage_gauges_[i].Set(values[i]/MtoB); +// } if(numDevice >=1) GPU0_memory_usage_gauge_.Set(values[0]/MtoB); if(numDevice >=2) GPU1_memory_usage_gauge_.Set(values[1]/MtoB); if(numDevice >=3) GPU2_memory_usage_gauge_.Set(values[2]/MtoB); @@ -111,6 +119,22 @@ void PrometheusMetrics::ConnectionGaugeDecrement() { connection_gauge_.Decrement(); } +//void PrometheusMetrics::GpuPercentInit() { +// int num_device = SystemInfo::GetInstance().num_device(); +// constexpr char device_number[] = "DeviceNum"; +// for(int i = 0; i < num_device; ++ i) { +// GPU_percent_gauges_.emplace_back(GPU_percent_.Add({{device_number,std::to_string(i)}})); +// } +// +//} +//void PrometheusMetrics::GpuMemoryInit() { +// int num_device = SystemInfo::GetInstance().num_device(); +// constexpr char device_number[] = "DeviceNum"; +// for(int i = 0; i < num_device; ++ i) { +// GPU_memory_usage_gauges_.emplace_back(GPU_memory_usage_.Add({{device_number,std::to_string(i)}})); +// } +//} + } } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index c23c137fe8..fc2bef6f60 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -49,6 +49,8 @@ class PrometheusMetrics: public MetricsBase { std::shared_ptr exposer_ptr_; std::shared_ptr registry_ = std::make_shared(); bool startup_ = false; +// void GpuPercentInit(); +// void GpuMemoryInit(); public: void AddGroupSuccessTotalIncrement(double value = 1.0) override { if(startup_) add_group_success_total_.Increment(value);}; @@ -115,45 +117,7 @@ class PrometheusMetrics: public MetricsBase { void ConnectionGaugeDecrement() override ; void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);}; -// prometheus::Counter &connection_total() {return connection_total_; } -// -// prometheus::Counter &add_group_success_total() { return add_group_success_total_; } -// prometheus::Counter &add_group_fail_total() { return add_group_fail_total_; } -// -// prometheus::Counter &get_group_success_total() { return get_group_success_total_;} -// prometheus::Counter &get_group_fail_total() { return get_group_fail_total_;} -// -// prometheus::Counter &has_group_success_total() { return has_group_success_total_;} -// prometheus::Counter &has_group_fail_total() { return has_group_fail_total_;} -// -// prometheus::Counter &get_group_files_success_total() { return get_group_files_success_total_;}; -// prometheus::Counter &get_group_files_fail_total() { return get_group_files_fail_total_;} -// -// prometheus::Counter &add_vectors_success_total() { return add_vectors_success_total_; } -// prometheus::Counter &add_vectors_fail_total() { return add_vectors_fail_total_; } -// -// prometheus::Histogram &add_vectors_duration_histogram() { return add_vectors_duration_histogram_;} -// -// prometheus::Counter &search_success_total() { return search_success_total_; } -// prometheus::Counter &search_fail_total() { return search_fail_total_; } -// -// prometheus::Histogram &search_duration_histogram() { return search_duration_histogram_; } -// prometheus::Histogram &raw_files_size_histogram() { return raw_files_size_histogram_; } -// prometheus::Histogram &index_files_size_histogram() { return index_files_size_histogram_; } -// -// prometheus::Histogram &build_index_duration_seconds_histogram() { return build_index_duration_seconds_histogram_; } -// -// prometheus::Histogram &all_build_index_duration_seconds_histogram() { return all_build_index_duration_seconds_histogram_; } -// -// prometheus::Gauge &cache_usage_gauge() { return cache_usage_gauge_; } -// -// prometheus::Counter &meta_visit_total() { return meta_visit_total_; } -// -// prometheus::Histogram &meta_visit_duration_seconds_histogram() { return meta_visit_duration_seconds_histogram_; } -// -// prometheus::Gauge &mem_usage_percent_gauge() { return mem_usage_percent_gauge_; } -// -// prometheus::Gauge &mem_usage_total_gauge() { return mem_usage_total_gauge_; } + @@ -477,6 +441,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Gauge &GPU5_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "5"}}); prometheus::Gauge &GPU6_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "6"}}); prometheus::Gauge &GPU7_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "7"}}); +// std::vector GPU_percent_gauges_; @@ -494,6 +459,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Gauge &GPU5_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "5"}}); prometheus::Gauge &GPU6_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "6"}}); prometheus::Gauge &GPU7_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "7"}}); +// std::vector GPU_memory_usage_gauges_; prometheus::Family &query_index_type_per_second_ = prometheus::BuildGauge() .Name("query_index_throughtout_per_microsecond") diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index a90e206a93..210817f856 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -56,7 +56,7 @@ void SystemInfo::Init() { } long long -SystemInfo::parseLine(char *line) { +SystemInfo::ParseLine(char *line) { // This assumes that a digit will be found and the line ends in " Kb". int i = strlen(line); const char *p = line; @@ -80,18 +80,20 @@ unsigned long SystemInfo::GetProcessUsedMemory() { //Note: this value is in KB! FILE* file = fopen("/proc/self/status", "r"); + constexpr int64_t line_length = 128; long long result = -1; - char line[128]; + constexpr int64_t KB_SIZE = 1024; + char line[line_length]; - while (fgets(line, 128, file) != NULL){ + while (fgets(line, line_length, file) != NULL){ if (strncmp(line, "VmRSS:", 6) == 0){ - result = parseLine(line); + result = ParseLine(line); break; } } fclose(file); // return value in Byte - return (result*1024); + return (result*KB_SIZE); } @@ -128,46 +130,46 @@ SystemInfo::CPUPercent() { return percent; } -std::unordered_map> -SystemInfo::GetGPUMemPercent(){ - // return GPUID: MEM% +//std::unordered_map> +//SystemInfo::GetGPUMemPercent(){ +// // return GPUID: MEM% +// +// //write GPU info to a file +// system("nvidia-smi pmon -c 1 > GPUInfo.txt"); +// int pid = (int)getpid(); +// +// //parse line +// std::ifstream read_file; +// read_file.open("GPUInfo.txt"); +// std::string line; +// while(getline(read_file, line)){ +// std::vector words = split(line); +// // 0 1 2 3 4 5 6 7 +// //words stand for gpuindex, pid, type, sm, mem, enc, dec, command respectively +// if(std::stoi(words[1]) != pid) continue; +// int GPUindex = std::stoi(words[0]); +// double sm_percent = std::stod(words[3]); +// double mem_percent = std::stod(words[4]); +// +// } +// +//} - //write GPU info to a file - system("nvidia-smi pmon -c 1 > GPUInfo.txt"); - int pid = (int)getpid(); - - //parse line - std::ifstream read_file; - read_file.open("GPUInfo.txt"); - std::string line; - while(getline(read_file, line)){ - std::vector words = split(line); - // 0 1 2 3 4 5 6 7 - //words stand for gpuindex, pid, type, sm, mem, enc, dec, command respectively - if(std::stoi(words[1]) != pid) continue; - int GPUindex = std::stoi(words[0]); - double sm_percent = std::stod(words[3]); - double mem_percent = std::stod(words[4]); - - } - -} - -std::vector -SystemInfo::split(std::string input) { - std::vector words; - input += " "; - int word_start = 0; - for (int i = 0; i < input.size(); ++i) { - if(input[i] != ' ') continue; - if(input[i] == ' ') { - word_start = i + 1; - continue; - } - words.push_back(input.substr(word_start,i-word_start)); - } - return words; -} +//std::vector +//SystemInfo::split(std::string input) { +// std::vector words; +// input += " "; +// int word_start = 0; +// for (int i = 0; i < input.size(); ++i) { +// if(input[i] != ' ') continue; +// if(input[i] == ' ') { +// word_start = i + 1; +// continue; +// } +// words.push_back(input.substr(word_start,i-word_start)); +// } +// return words; +//} std::vector SystemInfo::GPUPercent() { diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index d173f4f3af..042358c3df 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -42,14 +42,14 @@ class SystemInfo { } void Init(); - int NumDevice() {return num_device_;}; - long long parseLine(char* line); + int num_device() const {return num_device_;}; + long long ParseLine(char* line); unsigned long GetPhysicalMemory(); unsigned long GetProcessUsedMemory(); double MemoryPercent(); double CPUPercent(); - std::unordered_map> GetGPUMemPercent(); - std::vector split(std::string input); +// std::unordered_map> GetGPUMemPercent() {}; +// std::vector split(std::string input) {}; std::vector GPUPercent(); std::vector GPUMemoryUsed(); diff --git a/cpp/src/server/MegasearchThreadPoolServer.cpp b/cpp/src/server/MegasearchThreadPoolServer.cpp index f7ce0d4f72..d227442a45 100644 --- a/cpp/src/server/MegasearchThreadPoolServer.cpp +++ b/cpp/src/server/MegasearchThreadPoolServer.cpp @@ -4,16 +4,20 @@ * Proprietary and confidential. ******************************************************************************/ #include "metrics/Metrics.h" - - #include "MegasearchThreadPoolServer.h" +namespace zilliz { +namespace vecwise { +namespace server { -void zilliz::vecwise::server::MegasearchThreadPoolServer::onClientConnected(const std::shared_ptr &pClient) { +void +MegasearchThreadPoolServer::onClientConnected(const std::shared_ptr &pClient) { server::Metrics::GetInstance().ConnectionGaugeIncrement(); TThreadPoolServer::onClientConnected(pClient); } -void zilliz::vecwise::server::MegasearchThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) { + +void +MegasearchThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) { server::Metrics::GetInstance().ConnectionGaugeDecrement(); TThreadPoolServer::onClientDisconnected(pClient); } @@ -25,3 +29,6 @@ zilliz::vecwise::server::MegasearchThreadPoolServer::MegasearchThreadPoolServer( : TThreadPoolServer(processor, serverTransport, transportFactory, protocolFactory, threadManager) { } +} +} +} \ No newline at end of file diff --git a/cpp/src/thrift/megasearch.thrift b/cpp/src/thrift/megasearch.thrift index 0dc7230934..0f15695e65 100644 --- a/cpp/src/thrift/megasearch.thrift +++ b/cpp/src/thrift/megasearch.thrift @@ -29,7 +29,7 @@ exception Exception { } - +/** * @brief Table Schema */ struct TableSchema {