diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 54c482b708..80d1a70c8b 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -25,3 +25,4 @@ Please mark all change in change log and use the ticket from JIRA. - MS-20 - Clean Code Part 1 - MS-30 - Use faiss v1.5.2 - MS-32 - Fix thrift error +- MS-34 - Fix prometheus-cpp thirdparty diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index c0330b20ee..8167879376 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -990,6 +990,8 @@ if(MEGASEARCH_WITH_PROMETHEUS) link_directories(SYSTEM ${PROMETHEUS_PREFIX}/core/) include_directories(SYSTEM ${PROMETHEUS_PREFIX}/core/include) + + link_directories(${PROMETHEUS_PREFIX}/civetweb_ep-prefix/src/civetweb_ep) endif() # ---------------------------------------------------------------------- diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index 523b1f9968..781a77dd35 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -16,6 +16,7 @@ metric_config: collector: prometheus # prometheus, now we only have prometheus prometheus_config: collect_type: pull # pull means prometheus pull the message from megasearch, push means megasearch push metric to push gateway + port: 8080 push_gateway_ip_address: 127.0.0.1 push_gateway_port: 9091 diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 75a593af26..86afe997dc 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -11,12 +11,12 @@ aux_source_directory(server server_files) aux_source_directory(utils utils_files) aux_source_directory(db db_files) aux_source_directory(wrapper wrapper_files) -#aux_source_directory(metrics metrics_files) +aux_source_directory(metrics metrics_files) -set(metrics_files - metrics/Metrics.cpp - metrics/MetricBase.h -) +#set(metrics_files +# metrics/Metrics.cpp +# metrics/MetricBase.h +#) set(license_check_files @@ -82,6 +82,7 @@ set(third_party_libs prometheus-cpp-push prometheus-cpp-pull prometheus-cpp-core + civetweb rocksdb boost_system_static boost_filesystem_static @@ -105,9 +106,9 @@ if (GPU_VERSION STREQUAL "ON") cudart cublas libsqlite3.a -# libprometheus-cpp-push.a -# libprometheus-cpp-pull.a -# libprometheus-cpp-core.a + libprometheus-cpp-push.a + libprometheus-cpp-pull.a + libprometheus-cpp-core.a ) else() set(engine_libs @@ -118,9 +119,9 @@ else() libgfortran.a libquadmath.a libsqlite3.a -# libprometheus-cpp-push.a -# libprometheus-cpp-pull.a -# libprometheus-cpp-core.a + libprometheus-cpp-push.a + libprometheus-cpp-pull.a + libprometheus-cpp-core.a ) endif () @@ -173,6 +174,8 @@ set(server_libs libzstd.a liblz4.a dl + metrics + ) add_executable(vecwise_server diff --git a/cpp/src/db/FaissExecutionEngine.inl b/cpp/src/db/FaissExecutionEngine.inl index d69519d0ca..8dd701d54d 100644 --- a/cpp/src/db/FaissExecutionEngine.inl +++ b/cpp/src/db/FaissExecutionEngine.inl @@ -84,8 +84,8 @@ Status FaissExecutionEngine::Load() { server::Metrics::GetInstance().FaissDiskLoadSizeBytesHistogramObserve(total_size); - server::Metrics::GetInstance().FaissDiskLoadIOSpeedHistogramObserve(total_size/double(total_time)); - +// server::Metrics::GetInstance().FaissDiskLoadIOSpeedHistogramObserve(total_size/double(total_time)); + server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(total_size/double(total_time)); } return Status::OK(); } diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index a857d6827e..d720b8de44 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -63,6 +63,7 @@ class MetricsBase{ virtual void RawFileSizeTotalIncrement(double value = 1) {}; virtual void IndexFileSizeGaugeSet(double value) {}; virtual void RawFileSizeGaugeSet(double value) {}; + virtual void FaissDiskLoadIOSpeedGaugeSet(double value) {}; }; diff --git a/cpp/src/metrics/Metrics.cpp b/cpp/src/metrics/Metrics.cpp index ee823b3cd9..feb986b162 100644 --- a/cpp/src/metrics/Metrics.cpp +++ b/cpp/src/metrics/Metrics.cpp @@ -4,7 +4,10 @@ * Proprietary and confidential. ******************************************************************************/ +#pragma once + #include "Metrics.h" +#include "PrometheusMetrics.h" namespace zilliz { namespace vecwise { @@ -14,8 +17,8 @@ MetricsBase & Metrics::CreateMetricsCollector(MetricCollectorType collector_type) { switch (collector_type) { case MetricCollectorType::PROMETHEUS: -// static PrometheusMetrics instance = PrometheusMetrics::GetInstance(); - return MetricsBase::GetInstance(); + static PrometheusMetrics instance = PrometheusMetrics::GetInstance(); + return instance; default:return MetricsBase::GetInstance(); } } @@ -24,6 +27,7 @@ MetricsBase & Metrics::GetInstance() { ConfigNode &config = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC); std::string collector_typr_str = config.GetValue(CONFIG_METRIC_COLLECTOR); + if (collector_typr_str == "prometheus") { return CreateMetricsCollector(MetricCollectorType::PROMETHEUS); } else if (collector_typr_str == "zabbix") { diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 72c9f0d8cc..7b93a33e9b 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -6,6 +6,7 @@ #include "PrometheusMetrics.h" + namespace zilliz { namespace vecwise { namespace server { @@ -15,7 +16,7 @@ PrometheusMetrics::Init() { ConfigNode& configNode = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC); startup_ = configNode.GetValue(CONFIG_METRIC_IS_STARTUP) == "true" ? true:false; // Following should be read from config file. - const std::string bind_address = "8080"; + const std::string bind_address = configNode.GetChild(CONFIG_PROMETHEUS).GetValue(CONFIG_METRIC_PROMETHEUS_PORT); const std::string uri = std::string("/metrics"); const std::size_t num_threads = 2; diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index 06fa64bfa4..000aa31608 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -17,7 +17,7 @@ #define METRICS_NOW_TIME std::chrono::system_clock::now() -#define server::Metrics::GetInstance() server::GetInstance() +//#define server::Metrics::GetInstance() server::GetInstance() #define METRICS_MICROSECONDS(a,b) (std::chrono::duration_cast (b-a)).count(); @@ -86,7 +86,8 @@ class PrometheusMetrics: public MetricsBase { void FaissDiskLoadDurationSecondsHistogramObserve(double value) { if(startup_) faiss_disk_load_duration_seconds_histogram_.Observe(value);}; void FaissDiskLoadSizeBytesHistogramObserve(double value) { if(startup_) faiss_disk_load_size_bytes_histogram_.Observe(value);}; - void FaissDiskLoadIOSpeedHistogramObserve(double value) { if(startup_) faiss_disk_load_IO_speed_histogram_.Observe(value);}; +// void FaissDiskLoadIOSpeedHistogramObserve(double value) { if(startup_) faiss_disk_load_IO_speed_histogram_.Observe(value);}; + void FaissDiskLoadIOSpeedGaugeSet(double value) { if(startup_) faiss_disk_load_IO_speed_gauge_.Set(value);}; void CacheAccessTotalIncrement(double value = 1) { if(startup_) cache_access_total_.Increment(value);}; void MemTableMergeDurationSecondsHistogramObserve(double value) { if(startup_) mem_table_merge_duration_seconds_histogram_.Observe(value);}; @@ -211,7 +212,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Counter &add_vectors_fail_total_ = add_vectors_request_.Add({{"outcome", "fail"}}); prometheus::Family &add_vectors_duration_seconds_ = prometheus::BuildHistogram() - .Name("add_vector_duration_seconds") + .Name("add_vector_duration_microseconds") .Help("average time of adding every vector") .Register(*registry_); prometheus::Histogram &add_vectors_duration_histogram_ = add_vectors_duration_seconds_.Add({}, BucketBoundaries{0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.08, 0.1, 0.5, 1}); @@ -226,7 +227,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Counter &search_fail_total_ = search_request_.Add({{"outcome","fail"}}); prometheus::Family &search_request_duration_seconds_ = prometheus::BuildHistogram() - .Name("search_request_duration_second") + .Name("search_request_duration_microsecond") .Help("histogram of processing time for each search") .Register(*registry_); prometheus::Histogram &search_duration_histogram_ = search_request_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); @@ -236,14 +237,14 @@ class PrometheusMetrics: public MetricsBase { .Name("search_raw_files_bytes") .Help("histogram of raw files size by bytes") .Register(*registry_); - prometheus::Histogram &raw_files_size_histogram_ = raw_files_size_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &raw_files_size_histogram_ = raw_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10}); //record index_files size histogram prometheus::Family &index_files_size_ = prometheus::BuildHistogram() .Name("search_index_files_bytes") .Help("histogram of index files size by bytes") .Register(*registry_); - prometheus::Histogram &index_files_size_histogram_ = index_files_size_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &index_files_size_histogram_ = index_files_size_.Add({}, BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9, 1e10}); //record index and raw files size counter prometheus::Family &file_size_total_ = prometheus::BuildCounter() @@ -263,33 +264,33 @@ class PrometheusMetrics: public MetricsBase { //record processing time for building index prometheus::Family &build_index_duration_seconds_ = prometheus::BuildHistogram() - .Name("build_index_duration_seconds") + .Name("build_index_duration_microseconds") .Help("histogram of processing time for building index") .Register(*registry_); - prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &build_index_duration_seconds_histogram_ = build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7}); //record processing time for all building index prometheus::Family &all_build_index_duration_seconds_ = prometheus::BuildHistogram() - .Name("all_build_index_duration_seconds") + .Name("all_build_index_duration_microseconds") .Help("histogram of processing time for building index") .Register(*registry_); - prometheus::Histogram &all_build_index_duration_seconds_histogram_ = all_build_index_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &all_build_index_duration_seconds_histogram_ = all_build_index_duration_seconds_.Add({}, BucketBoundaries{2e6, 4e6, 6e6, 8e6, 1e7}); //record duration of merging mem table prometheus::Family &mem_table_merge_duration_seconds_ = prometheus::BuildHistogram() - .Name("mem_table_merge_duration_seconds") + .Name("mem_table_merge_duration_microseconds") .Help("histogram of processing time for merging mem tables") .Register(*registry_); - prometheus::Histogram &mem_table_merge_duration_seconds_histogram_ = mem_table_merge_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &mem_table_merge_duration_seconds_histogram_ = mem_table_merge_duration_seconds_.Add({}, BucketBoundaries{5e4, 1e5, 2e5, 4e5, 6e5, 8e5, 1e6}); //record search index and raw data duration prometheus::Family &search_data_duration_seconds_ = prometheus::BuildHistogram() - .Name("search_data_duration_seconds") + .Name("search_data_duration_microseconds") .Help("histograms of processing time for search index and raw data") .Register(*registry_); - prometheus::Histogram &search_index_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "index"}}, BucketBoundaries{0.1, 1.0, 10.0}); - prometheus::Histogram &search_raw_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "raw"}}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &search_index_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "index"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5}); + prometheus::Histogram &search_raw_data_duration_seconds_histogram_ = search_data_duration_seconds_.Add({{"type", "raw"}}, BucketBoundaries{1e5, 2e5, 4e5, 6e5, 8e5}); ////all form Cache.cpp @@ -343,34 +344,41 @@ class PrometheusMetrics: public MetricsBase { //record meta access duration prometheus::Family &meta_access_duration_seconds_ = prometheus::BuildHistogram() - .Name("meta_access_duration_seconds") + .Name("meta_access_duration_microseconds") .Help("histogram of processing time for accessing mata") .Register(*registry_); - prometheus::Histogram &meta_access_duration_seconds_histogram_ = meta_access_duration_seconds_.Add({}, BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &meta_access_duration_seconds_histogram_ = meta_access_duration_seconds_.Add({}, BucketBoundaries{100, 300, 500, 700, 900, 2000, 4000, 6000, 8000, 20000}); ////all from FaissExecutionEngine.cpp //record data loading from disk count, size, duration, IO speed prometheus::Family &disk_load_duration_second_ = prometheus::BuildHistogram() - .Name("disk_load_duration_seconds") + .Name("disk_load_duration_microseconds") .Help("Histogram of processing time for loading data from disk") .Register(*registry_); - prometheus::Histogram &faiss_disk_load_duration_seconds_histogram_ = disk_load_duration_second_.Add({{"DB","Faiss"}},BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &faiss_disk_load_duration_seconds_histogram_ = disk_load_duration_second_.Add({{"DB","Faiss"}},BucketBoundaries{2e5, 4e5, 6e5 , 8e5}); prometheus::Family &disk_load_size_bytes_ = prometheus::BuildHistogram() .Name("disk_load_size_bytes") .Help("Histogram of data size by bytes for loading data from disk") .Register(*registry_); - prometheus::Histogram &faiss_disk_load_size_bytes_histogram_ = disk_load_size_bytes_.Add({{"DB","Faiss"}},BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Histogram &faiss_disk_load_size_bytes_histogram_ = disk_load_size_bytes_.Add({{"DB","Faiss"}},BucketBoundaries{1e9, 2e9, 4e9, 6e9, 8e9}); - prometheus::Family &disk_load_IO_speed_ = prometheus::BuildHistogram() - .Name("disk_load_IO_speed_byte_per_sec") - .Help("Histogram of IO speed for loading data from disk") +// prometheus::Family &disk_load_IO_speed_ = prometheus::BuildHistogram() +// .Name("disk_load_IO_speed_byte_per_sec") +// .Help("Histogram of IO speed for loading data from disk") +// .Register(*registry_); +// prometheus::Histogram &faiss_disk_load_IO_speed_histogram_ = disk_load_IO_speed_.Add({{"DB","Faiss"}},BucketBoundaries{1000, 2000, 3000, 4000, 6000, 8000}); + + prometheus::Family &faiss_disk_load_IO_speed_ = prometheus::BuildGauge() + .Name("disk_load_IO_speed_byte_per_microsec") + .Help("disk IO speed ") .Register(*registry_); - prometheus::Histogram &faiss_disk_load_IO_speed_histogram_ = disk_load_IO_speed_.Add({{"DB","Faiss"}},BucketBoundaries{0.1, 1.0, 10.0}); + prometheus::Gauge &faiss_disk_load_IO_speed_gauge_ = faiss_disk_load_IO_speed_.Add({{"DB","Faiss"}}); - ////all from CacheMgr.cpp + + ////all from CacheMgr.cpp //record cache access count prometheus::Family &cache_access_ = prometheus::BuildCounter() .Name("cache_access_total") diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 8bb387bdd9..407dfa7e1f 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -38,6 +38,8 @@ static const std::string CONFIG_LICENSE_PATH = "license_path"; static const std::string CONFIG_METRIC = "metric_config"; static const std::string CONFIG_METRIC_IS_STARTUP = "is_startup"; static const std::string CONFIG_METRIC_COLLECTOR = "collector"; +static const std::string CONFIG_PROMETHEUS = "prometheus_config"; +static const std::string CONFIG_METRIC_PROMETHEUS_PORT = "port"; class ServerConfig { public: diff --git a/cpp/unittest/CMakeLists.txt b/cpp/unittest/CMakeLists.txt index 7d37042896..addce57da1 100644 --- a/cpp/unittest/CMakeLists.txt +++ b/cpp/unittest/CMakeLists.txt @@ -7,6 +7,7 @@ link_directories( "${CMAKE_BINARY_DIR}/lib" #"${VECWISE_THIRD_PARTY_BUILD}/lib" "${GTEST_PREFIX}/lib/" + ) message(STATUS "GTEST LIB: ${GTEST_PREFIX}/lib") @@ -24,7 +25,14 @@ set(unittest_libs pthread metrics openblas - gfortran) + gfortran + prometheus-cpp-pull + prometheus-cpp-push + prometheus-cpp-core + civetweb + dl + z + ) add_subdirectory(server) add_subdirectory(db)