From 2cd296875bae6dd06d253c5728250766b323ed68 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 17:25:49 +0800 Subject: [PATCH 1/4] reconstruct MetricsCollector Former-commit-id: 4f7a219b9eb10d98068a5488967319b039fd695a --- cpp/CHANGELOG.md | 1 + cpp/conf/server_config.template | 1 - cpp/src/db/DBImpl.cpp | 58 +---- cpp/src/db/engine/ExecutionEngineImpl.cpp | 34 +-- cpp/src/db/insert/MemTableFile.cpp | 11 +- cpp/src/db/insert/VectorSource.cpp | 8 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 60 ++--- cpp/src/db/meta/SqliteMetaImpl.cpp | 64 ++--- cpp/src/db/scheduler/task/SearchTask.cpp | 23 +- cpp/src/metrics/Metrics.h | 228 ++++++++++++++++++ cpp/src/scheduler/task/SearchTask.cpp | 24 +- cpp/unittest/metrics/metricbase_test.cpp | 2 +- cpp/unittest/metrics/prometheus_test.cpp | 2 +- .../server/appendix/server_config.yaml | 1 - 14 files changed, 299 insertions(+), 218 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index e7f1cc74e1..1dded29e17 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -41,6 +41,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-400 - Add timestamp record in task state change function - MS-402 - Add dump implementation for TaskTableItem - MS-403 - Add GpuCacheMgr +- MS-407 - Reconstruct MetricsCollector ## New Feature - MS-343 - Implement ResourceMgr diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index 107b4a1060..3441f1eaab 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -25,7 +25,6 @@ metric_config: is_startup: off # if monitoring start: on, off collector: prometheus # metrics collector: prometheus prometheus_config: # following are prometheus configure - collect_type: pull # prometheus collect data method port: 8080 # the port prometheus use to fetch metrics push_gateway_ip_address: 127.0.0.1 # push method configure: push gateway ip address push_gateway_port: 9091 # push method configure: push gateway port diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 8a002f29b0..500cad075d 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -23,6 +23,7 @@ #include #include #include +#include namespace zilliz { namespace milvus { @@ -34,32 +35,6 @@ constexpr uint64_t METRIC_ACTION_INTERVAL = 1; constexpr uint64_t COMPACT_ACTION_INTERVAL = 1; constexpr uint64_t INDEX_ACTION_INTERVAL = 1; -void CollectInsertMetrics(double total_time, size_t n, bool succeed) { - double avg_time = total_time / n; - for (int i = 0; i < n; ++i) { - server::Metrics::GetInstance().AddVectorsDurationHistogramOberve(avg_time); - } - -// server::Metrics::GetInstance().add_vector_duration_seconds_quantiles().Observe((average_time)); - if (succeed) { - server::Metrics::GetInstance().AddVectorsSuccessTotalIncrement(n); - server::Metrics::GetInstance().AddVectorsSuccessGaugeSet(n); - } - else { - server::Metrics::GetInstance().AddVectorsFailTotalIncrement(n); - server::Metrics::GetInstance().AddVectorsFailGaugeSet(n); - } -} - -void CollectQueryMetrics(double total_time, size_t nq) { - for (int i = 0; i < nq; ++i) { - server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time); - } - auto average_time = total_time / nq; - server::Metrics::GetInstance().QueryVectorResponseSummaryObserve(average_time, nq); - server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); -} - } @@ -164,27 +139,21 @@ Status DBImpl::InsertVectors(const std::string& table_id_, auto start_time = METRICS_NOW_TIME; Status status = mem_mgr_->InsertVectors(table_id_, n, vectors, vector_ids_); - auto end_time = METRICS_NOW_TIME; - double total_time = METRICS_MICROSECONDS(start_time,end_time); + zilliz::milvus::server::CollectInsertMetrics metrics(start_time, n, status.ok()); // std::chrono::microseconds time_span = std::chrono::duration_cast(end_time - start_time); // double average_time = double(time_span.count()) / n; ENGINE_LOG_DEBUG << "Insert vectors to cache finished"; - CollectInsertMetrics(total_time, n, status.ok()); return status; - } Status DBImpl::Query(const std::string &table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float *vectors, QueryResults &results) { - auto start_time = METRICS_NOW_TIME; + server::CollectQueryMetrics metrics(nq); + meta::DatesT dates = {meta::Meta::GetDate()}; Status result = Query(table_id, k, nq, nprobe, vectors, dates, results); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time,end_time); - - CollectQueryMetrics(total_time, nq); return result; } @@ -251,7 +220,8 @@ Status DBImpl::Query(const std::string& table_id, const std::vector Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) { - auto start_time = METRICS_NOW_TIME; + server::CollectQueryMetrics metrics(nq); + server::TimeRecorder rc(""); //step 1: get files to search @@ -294,11 +264,6 @@ Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSch results = context->GetResult(); rc.ElapseFromBegin("Engine query totally cost"); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time,end_time); - - CollectQueryMetrics(total_time, nq); - return Status::OK(); } @@ -418,14 +383,10 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, long index_size = 0; for (auto& file : files) { + server::CollectMergeFilesMetrics metrics; - auto start_time = METRICS_NOW_TIME; index->Merge(file.location_); auto file_schema = file; - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time,end_time); - server::Metrics::GetInstance().MemTableMergeDurationSecondsHistogramObserve(total_time); - file_schema.file_type_ = meta::TableFileSchema::TO_DELETE; updated.push_back(file_schema); ENGINE_LOG_DEBUG << "Merging file " << file_schema.file_id_; @@ -641,11 +602,8 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { std::shared_ptr index; try { - auto start_time = METRICS_NOW_TIME; + server::CollectBuildIndexMetrics metrics; index = to_index->BuildIndex(table_file.location_); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time); } catch (std::exception& ex) { //typical error: out of gpu memory std::string msg = "BuildIndex encounter exception" + std::string(ex.what()); diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp index deaf864e94..446c6e083a 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.cpp +++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp @@ -116,9 +116,11 @@ Status ExecutionEngineImpl::Serialize() { } Status ExecutionEngineImpl::Load(bool to_cache) { + double physical_size; + server::CollectExecutionEngineMetrics metrics(physical_size); + index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); bool already_in_cache = (index_ != nullptr); - auto start_time = METRICS_NOW_TIME; if (!index_) { try { index_ = read_index(location_); @@ -133,22 +135,16 @@ Status ExecutionEngineImpl::Load(bool to_cache) { if (!already_in_cache && to_cache) { Cache(); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - - server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); - double physical_size = PhysicalSize(); - - server::Metrics::GetInstance().FaissDiskLoadSizeBytesHistogramObserve(physical_size); - server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size / double(total_time)); + physical_size = PhysicalSize(); } return Status::OK(); } Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) { + double physical_size; + server::CollectExecutionEngineMetrics metrics(physical_size); index_ = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_); bool already_in_cache = (index_ != nullptr); - auto start_time = METRICS_NOW_TIME; if (!index_) { try { index_ = index_->CopyToGpu(device_id); @@ -163,21 +159,17 @@ Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) { if (!already_in_cache) { GpuCache(device_id); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - double physical_size = PhysicalSize(); - - server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size); + physical_size = PhysicalSize(); } return Status::OK(); } Status ExecutionEngineImpl::CopyToCpu() { + double physical_size; + server::CollectExecutionEngineMetrics metrics(physical_size); index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); bool already_in_cache = (index_ != nullptr); - auto start_time = METRICS_NOW_TIME; if (!index_) { try { index_ = index_->CopyToCpu(); @@ -192,14 +184,8 @@ Status ExecutionEngineImpl::CopyToCpu() { if(!already_in_cache) { Cache(); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - double physical_size = PhysicalSize(); - - server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size); + physical_size = PhysicalSize(); } - return Status::OK(); } diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index f8f79c8618..51d5dcd365 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -80,20 +80,15 @@ bool MemTableFile::IsFull() { } Status MemTableFile::Serialize() { - - auto start_time = METRICS_NOW_TIME; - - auto size = GetCurrentMem(); + size_t size; + server::CollectSerializeMetrics metrics(size); + size = GetCurrentMem(); execution_engine_->Serialize(); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); table_file_schema_.file_size_ = execution_engine_->PhysicalSize(); table_file_schema_.row_count_ = execution_engine_->Count(); - server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time); - table_file_schema_.file_type_ = (size >= options_.index_trigger_size) ? meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; diff --git a/cpp/src/db/insert/VectorSource.cpp b/cpp/src/db/insert/VectorSource.cpp index 27385b4b23..b977192575 100644 --- a/cpp/src/db/insert/VectorSource.cpp +++ b/cpp/src/db/insert/VectorSource.cpp @@ -24,7 +24,7 @@ Status VectorSource::Add(const ExecutionEnginePtr &execution_engine, size_t &num_vectors_added, IDNumbers &vector_ids) { - auto start_time = METRICS_NOW_TIME; + server::CollectorAddMetrics metrics(n_, table_file_schema.dimension_); num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; @@ -49,12 +49,6 @@ Status VectorSource::Add(const ExecutionEnginePtr &execution_engine, ENGINE_LOG_ERROR << "VectorSource::Add failed: " + status.ToString(); } - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), - static_cast(table_file_schema.dimension_), - total_time); - return status; } diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index 954c498f7f..e5a5af6855 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -39,24 +39,6 @@ Status HandleException(const std::string &desc, std::exception &e) { return Status::DBTransactionError(desc, e.what()); } -class MetricCollector { - public: - MetricCollector() { - server::Metrics::GetInstance().MetaAccessTotalIncrement(); - start_time_ = METRICS_NOW_TIME; - } - - ~MetricCollector() { - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time_, end_time); - server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time); - } - - private: - using TIME_POINT = std::chrono::system_clock::time_point; - TIME_POINT start_time_; -}; - } Status MySQLMetaImpl::NextTableId(std::string &table_id) { @@ -272,7 +254,7 @@ Status MySQLMetaImpl::DropPartitionsByDates(const std::string &table_id, Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -390,7 +372,7 @@ Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { Status MySQLMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -457,7 +439,7 @@ Status MySQLMetaImpl::UpdateTableIndexParam(const std::string &table_id, const T Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -504,7 +486,7 @@ Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex Status MySQLMetaImpl::DropTableIndex(const std::string &table_id) { try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -560,7 +542,7 @@ Status MySQLMetaImpl::DropTableIndex(const std::string &table_id) { Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -603,7 +585,7 @@ Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -642,7 +624,7 @@ Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { try { - MetricCollector metric; + server::MetricCollector metric; StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -697,7 +679,7 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { try { - MetricCollector metric; + server::MetricCollector metric; StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -737,7 +719,7 @@ Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { try { - MetricCollector metric; + server::MetricCollector metric; StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -802,7 +784,7 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { } try { - MetricCollector metric; + server::MetricCollector metric; NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; @@ -870,7 +852,7 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { files.clear(); try { - MetricCollector metric; + server::MetricCollector metric; StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -953,7 +935,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, files.clear(); try { - MetricCollector metric; + server::MetricCollector metric; StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1067,7 +1049,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, files.clear(); try { - MetricCollector metric; + server::MetricCollector metric; StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1177,7 +1159,7 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, files.clear(); try { - MetricCollector metric; + server::MetricCollector metric; //check table existence TableSchema table_schema; @@ -1457,7 +1439,7 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { ENGINE_LOG_DEBUG << "About to discard size=" << to_discard_size; try { - MetricCollector metric; + server::MetricCollector metric; bool status; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1529,7 +1511,7 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { file_schema.updated_time_ = utils::GetMicroSecTimeStamp(); try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1643,7 +1625,7 @@ Status MySQLMetaImpl::UpdateTableFilesToIndex(const std::string &table_id) { Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1733,7 +1715,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { //remove to_delete files try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1812,7 +1794,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { //remove to_delete tables try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1870,7 +1852,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { //remove deleted table folder //don't remove table folder until all its files has been deleted try { - MetricCollector metric; + server::MetricCollector metric; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1955,7 +1937,7 @@ Status MySQLMetaImpl::CleanUp() { Status MySQLMetaImpl::Count(const std::string &table_id, uint64_t &result) { try { - MetricCollector metric; + server::MetricCollector metric; TableSchema table_schema; table_schema.table_id_ = table_id; diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index b4859473ef..76ad6060cc 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -34,24 +34,6 @@ Status HandleException(const std::string& desc, std::exception &e) { return Status::DBTransactionError(desc, e.what()); } -class MetricCollector { -public: - MetricCollector() { - server::Metrics::GetInstance().MetaAccessTotalIncrement(); - start_time_ = METRICS_NOW_TIME; - } - - ~MetricCollector() { - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time_, end_time); - server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time); - } - -private: - using TIME_POINT = std::chrono::system_clock::time_point; - TIME_POINT start_time_; -}; - } inline auto StoragePrototype(const std::string &path) { @@ -170,7 +152,7 @@ Status SqliteMetaImpl::DropPartitionsByDates(const std::string &table_id, Status SqliteMetaImpl::CreateTable(TableSchema &table_schema) { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -212,7 +194,7 @@ Status SqliteMetaImpl::CreateTable(TableSchema &table_schema) { Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -236,7 +218,7 @@ Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { Status SqliteMetaImpl::DeleteTableFiles(const std::string& table_id) { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -261,7 +243,7 @@ Status SqliteMetaImpl::DeleteTableFiles(const std::string& table_id) { Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { try { - MetricCollector metric; + server::MetricCollector metric; auto groups = ConnectorPtr->select(columns(&TableSchema::id_, &TableSchema::state_, @@ -350,7 +332,7 @@ Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) Status SqliteMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -399,7 +381,7 @@ Status SqliteMetaImpl::UpdateTableIndexParam(const std::string &table_id, const Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { try { - MetricCollector metric; + server::MetricCollector metric; auto groups = ConnectorPtr->select(columns(&TableSchema::engine_type_, &TableSchema::nlist_, @@ -426,7 +408,7 @@ Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableInde Status SqliteMetaImpl::DropTableIndex(const std::string &table_id) { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -464,7 +446,7 @@ Status SqliteMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { has_or_not = false; try { - MetricCollector metric; + server::MetricCollector metric; auto tables = ConnectorPtr->select(columns(&TableSchema::id_), where(c(&TableSchema::table_id_) == table_id and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); @@ -483,7 +465,7 @@ Status SqliteMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { Status SqliteMetaImpl::AllTables(std::vector& table_schema_array) { try { - MetricCollector metric; + server::MetricCollector metric; auto selected = ConnectorPtr->select(columns(&TableSchema::id_, &TableSchema::table_id_, @@ -527,7 +509,7 @@ Status SqliteMetaImpl::CreateTableFile(TableFileSchema &file_schema) { } try { - MetricCollector metric; + server::MetricCollector metric; NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; @@ -558,7 +540,7 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { files.clear(); try { - MetricCollector metric; + server::MetricCollector metric; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, @@ -616,7 +598,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, files.clear(); try { - MetricCollector metric; + server::MetricCollector metric; if (partition.empty()) { std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; @@ -716,7 +698,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, const DatesT &partition, DatePartionedTableFilesSchema &files) { files.clear(); - MetricCollector metric; + server::MetricCollector metric; try { auto select_columns = columns(&TableFileSchema::id_, @@ -793,7 +775,7 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, files.clear(); try { - MetricCollector metric; + server::MetricCollector metric; //check table existence TableSchema table_schema; @@ -967,7 +949,7 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { ENGINE_LOG_DEBUG << "About to discard size=" << to_discard_size; try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -1024,7 +1006,7 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { Status SqliteMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { file_schema.updated_time_ = utils::GetMicroSecTimeStamp(); try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -1050,7 +1032,7 @@ Status SqliteMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { Status SqliteMetaImpl::UpdateTableFilesToIndex(const std::string& table_id) { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -1072,7 +1054,7 @@ Status SqliteMetaImpl::UpdateTableFilesToIndex(const std::string& table_id) { Status SqliteMetaImpl::UpdateTableFiles(TableFilesSchema &files) { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -1121,7 +1103,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { //remove to_delete files try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -1165,7 +1147,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { //remove to_delete tables try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -1195,7 +1177,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { //remove deleted table folder //don't remove table folder until all its files has been deleted try { - MetricCollector metric; + server::MetricCollector metric; for(auto& table_id : table_ids) { auto selected = ConnectorPtr->select(columns(&TableFileSchema::file_id_), @@ -1214,7 +1196,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { Status SqliteMetaImpl::CleanUp() { try { - MetricCollector metric; + server::MetricCollector metric; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -1245,7 +1227,7 @@ Status SqliteMetaImpl::CleanUp() { Status SqliteMetaImpl::Count(const std::string &table_id, uint64_t &result) { try { - MetricCollector metric; + server::MetricCollector metric; std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; auto selected = ConnectorPtr->select(columns(&TableFileSchema::row_count_), diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index 4e7c0f4611..86401323ea 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -59,23 +59,6 @@ void ParallelReduce(std::function& reduce_function, size_t } } -void CollectDurationMetrics(int index_type, double total_time) { - switch(index_type) { - case meta::TableFileSchema::RAW: { - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - break; - } - case meta::TableFileSchema::TO_INDEX: { - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - break; - } - default: { - server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); - break; - } - } -} - } SearchTask::SearchTask() @@ -92,7 +75,7 @@ std::shared_ptr SearchTask::Execute() { server::TimeRecorder rc("DoSearch file id:" + std::to_string(index_id_)); - auto start_time = METRICS_NOW_TIME; + server::CollectSearchTaskMetrics metrics(file_type_); bool metric_l2 = (index_engine_->IndexMetricType() == MetricType::L2); @@ -137,10 +120,6 @@ std::shared_ptr SearchTask::Execute() { context->IndexSearchDone(index_id_); } - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - CollectDurationMetrics(file_type_, total_time); - rc.ElapseFromBegin("totally cost"); return nullptr; diff --git a/cpp/src/metrics/Metrics.h b/cpp/src/metrics/Metrics.h index 65df7140cc..bc47971b26 100644 --- a/cpp/src/metrics/Metrics.h +++ b/cpp/src/metrics/Metrics.h @@ -6,6 +6,7 @@ #pragma once #include "MetricBase.h" +#include "db/meta/MetaTypes.h" namespace zilliz { @@ -29,6 +30,233 @@ class Metrics { static MetricsBase &CreateMetricsCollector(); }; +class CollectInsertMetrics { +public: + CollectInsertMetrics(std::chrono::system_clock::time_point start_time, + size_t n, bool succeed) : start_time_(start_time), n_(n), succeed_(succeed) { + } + + ~CollectInsertMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + double avg_time = total_time / n_; + for (int i = 0; i < n_; ++i) { + Metrics::GetInstance().AddVectorsDurationHistogramOberve(avg_time); + } + + // server::Metrics::GetInstance().add_vector_duration_seconds_quantiles().Observe((average_time)); + if (succeed_) { + server::Metrics::GetInstance().AddVectorsSuccessTotalIncrement(n_); + server::Metrics::GetInstance().AddVectorsSuccessGaugeSet(n_); + } + else { + server::Metrics::GetInstance().AddVectorsFailTotalIncrement(n_); + server::Metrics::GetInstance().AddVectorsFailGaugeSet(n_); + } + } + +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; + size_t n_; + bool succeed_; +}; + +class CollectQueryMetrics { +public: + CollectQueryMetrics(size_t nq) : nq_(nq) { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectQueryMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + for (int i = 0; i < nq_; ++i) { + server::Metrics::GetInstance().QueryResponseSummaryObserve(total_time); + } + auto average_time = total_time / nq_; + server::Metrics::GetInstance().QueryVectorResponseSummaryObserve(average_time, nq_); + server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq_) / total_time); + } + +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; + size_t nq_; +}; + +class CollectMergeFilesMetrics { +public: + CollectMergeFilesMetrics() { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectMergeFilesMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + server::Metrics::GetInstance().MemTableMergeDurationSecondsHistogramObserve(total_time); + } + +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; +}; + +class CollectBuildIndexMetrics { +public: + CollectBuildIndexMetrics() { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectBuildIndexMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time); + } +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; +}; + +class CollectExecutionEngineMetrics { +public: + CollectExecutionEngineMetrics(double& physical_size) : physical_size_(physical_size) { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectExecutionEngineMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + + server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); + + server::Metrics::GetInstance().FaissDiskLoadSizeBytesHistogramObserve(physical_size_); + server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size_ / double(total_time)); + } + +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; + double& physical_size_; +}; + +class CollectSerializeMetrics { +public: + CollectSerializeMetrics(size_t& size) : size_(size) { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectSerializeMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size_ / total_time); + } +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; + size_t& size_; +}; + +class CollectorAddMetrics { +public: + CollectorAddMetrics(size_t n, uint16_t dimension) : n_(n), dimension_(dimension) { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectorAddMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), + static_cast(dimension_), + total_time); + } +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; + size_t n_; + uint16_t dimension_; +}; + +class CollectorDurationMetrics { +public: + CollectorDurationMetrics() { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectorDurationMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + switch (index_type_) { + case engine::meta::TableFileSchema::RAW: { + server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); + break; + } + case engine::meta::TableFileSchema::TO_INDEX: { + server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); + break; + } + default: { + server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); + break; + } + } + } +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; + int index_type_; +}; + +class CollectSearchTaskMetrics { +public: + CollectSearchTaskMetrics(int index_type) : index_type_(index_type) { + start_time_ = METRICS_NOW_TIME; + } + + ~CollectSearchTaskMetrics() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + switch(index_type_) { + case engine::meta::TableFileSchema::RAW: { + server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); + break; + } + case engine::meta::TableFileSchema::TO_INDEX: { + server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); + break; + } + default: { + server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); + break; + } + } + } + +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; + int index_type_; +}; + +class MetricCollector { +public: + MetricCollector() { + server::Metrics::GetInstance().MetaAccessTotalIncrement(); + start_time_ = METRICS_NOW_TIME; + } + + ~MetricCollector() { + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time_, end_time); + server::Metrics::GetInstance().MetaAccessDurationSecondsHistogramObserve(total_time); + } + +private: + using TIME_POINT = std::chrono::system_clock::time_point; + TIME_POINT start_time_; +}; + + } } diff --git a/cpp/src/scheduler/task/SearchTask.cpp b/cpp/src/scheduler/task/SearchTask.cpp index ea482bcb72..86d58d7185 100644 --- a/cpp/src/scheduler/task/SearchTask.cpp +++ b/cpp/src/scheduler/task/SearchTask.cpp @@ -81,24 +81,6 @@ CollectFileMetrics(int file_type, size_t file_size) { } } -void -CollectDurationMetrics(int index_type, double total_time) { - switch (index_type) { - case meta::TableFileSchema::RAW: { - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - break; - } - case meta::TableFileSchema::TO_INDEX: { - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - break; - } - default: { - server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); - break; - } - } -} - XSearchTask::XSearchTask(TableFileSchemaPtr file) : file_(file) { index_engine_ = EngineFactory::Build(file_->dimension_, file_->location_, @@ -159,7 +141,7 @@ XSearchTask::Execute() { server::TimeRecorder rc("DoSearch file id:" + std::to_string(index_id_)); - auto start_time = METRICS_NOW_TIME; + server::CollectorDurationMetrics metrics(index_type_); std::vector output_ids; std::vector output_distence; @@ -202,10 +184,6 @@ XSearchTask::Execute() { context->IndexSearchDone(index_id_); } - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - CollectDurationMetrics(index_type_, total_time); - rc.ElapseFromBegin("totally cost"); } diff --git a/cpp/unittest/metrics/metricbase_test.cpp b/cpp/unittest/metrics/metricbase_test.cpp index 1997748fdd..0f13cae972 100644 --- a/cpp/unittest/metrics/metricbase_test.cpp +++ b/cpp/unittest/metrics/metricbase_test.cpp @@ -21,7 +21,7 @@ TEST(MetricbaseTest, METRICBASE_TEST){ instance.RawFileSizeHistogramObserve(1.0); instance.IndexFileSizeHistogramObserve(1.0); instance.BuildIndexDurationSecondsHistogramObserve(1.0); - instance.CacheUsageGaugeSet(1.0); + instance.CpuCacheUsageGaugeSet(1.0); instance.MetaAccessTotalIncrement(); instance.MetaAccessDurationSecondsHistogramObserve(1.0); instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0); diff --git a/cpp/unittest/metrics/prometheus_test.cpp b/cpp/unittest/metrics/prometheus_test.cpp index 004e58a5fc..53703c9abd 100644 --- a/cpp/unittest/metrics/prometheus_test.cpp +++ b/cpp/unittest/metrics/prometheus_test.cpp @@ -22,7 +22,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST){ instance.RawFileSizeHistogramObserve(1.0); instance.IndexFileSizeHistogramObserve(1.0); instance.BuildIndexDurationSecondsHistogramObserve(1.0); - instance.CacheUsageGaugeSet(1.0); + instance.CpuCacheUsageGaugeSet(1.0); instance.MetaAccessTotalIncrement(); instance.MetaAccessDurationSecondsHistogramObserve(1.0); instance.FaissDiskLoadDurationSecondsHistogramObserve(1.0); diff --git a/cpp/unittest/server/appendix/server_config.yaml b/cpp/unittest/server/appendix/server_config.yaml index 0f09d570f1..7faabee224 100644 --- a/cpp/unittest/server/appendix/server_config.yaml +++ b/cpp/unittest/server/appendix/server_config.yaml @@ -18,7 +18,6 @@ metric_config: is_startup: off # if monitoring start: on, off collector: prometheus # metrics collector: prometheus prometheus_config: # following are prometheus configure - collect_type: pull # prometheus collect data method port: 8080 # the port prometheus use to fetch metrics push_gateway_ip_address: 127.0.0.1 # push method configure: push gateway ip address push_gateway_port: 9091 # push method configure: push gateway port From 1503673ab18100178afe44b9779440e1ec626aef Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 17:38:31 +0800 Subject: [PATCH 2/4] modify CollectorMetrics in InsertVectors Former-commit-id: 32ed2cebf20dcb828a5d97a40f0c833a566d4d48 --- cpp/src/db/DBImpl.cpp | 6 +++--- cpp/src/metrics/Metrics.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index a09457d354..4299801cb9 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -141,9 +141,9 @@ Status DBImpl::InsertVectors(const std::string& table_id_, uint64_t n, const float* vectors, IDNumbers& vector_ids_) { ENGINE_LOG_DEBUG << "Insert " << n << " vectors to cache"; - auto start_time = METRICS_NOW_TIME; - Status status = mem_mgr_->InsertVectors(table_id_, n, vectors, vector_ids_); - zilliz::milvus::server::CollectInsertMetrics metrics(start_time, n, status.ok()); + Status status; + zilliz::milvus::server::CollectInsertMetrics metrics(n, status); + status = mem_mgr_->InsertVectors(table_id_, n, vectors, vector_ids_); // std::chrono::microseconds time_span = std::chrono::duration_cast(end_time - start_time); // double average_time = double(time_span.count()) / n; diff --git a/cpp/src/metrics/Metrics.h b/cpp/src/metrics/Metrics.h index bc47971b26..98e7c3064e 100644 --- a/cpp/src/metrics/Metrics.h +++ b/cpp/src/metrics/Metrics.h @@ -32,8 +32,8 @@ class Metrics { class CollectInsertMetrics { public: - CollectInsertMetrics(std::chrono::system_clock::time_point start_time, - size_t n, bool succeed) : start_time_(start_time), n_(n), succeed_(succeed) { + CollectInsertMetrics(size_t n, engine::Status& status) : n_(n), status_(status) { + start_time_ = METRICS_NOW_TIME; } ~CollectInsertMetrics() { @@ -45,7 +45,7 @@ public: } // server::Metrics::GetInstance().add_vector_duration_seconds_quantiles().Observe((average_time)); - if (succeed_) { + if (status_.ok()) { server::Metrics::GetInstance().AddVectorsSuccessTotalIncrement(n_); server::Metrics::GetInstance().AddVectorsSuccessGaugeSet(n_); } @@ -59,7 +59,7 @@ private: using TIME_POINT = std::chrono::system_clock::time_point; TIME_POINT start_time_; size_t n_; - bool succeed_; + engine::Status& status_; }; class CollectQueryMetrics { From f562894905ccb473136885189895c506cbf0d047 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 17:55:24 +0800 Subject: [PATCH 3/4] remove some ref Former-commit-id: 052bf2dd4cd3e5832fb8c284511bdde1919bf9bb --- cpp/src/db/engine/ExecutionEngineImpl.cpp | 14 ++++---------- cpp/src/db/insert/MemTableFile.cpp | 4 +--- cpp/src/metrics/Metrics.h | 8 ++++---- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp index 446c6e083a..2952d70ef4 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.cpp +++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp @@ -116,13 +116,12 @@ Status ExecutionEngineImpl::Serialize() { } Status ExecutionEngineImpl::Load(bool to_cache) { - double physical_size; - server::CollectExecutionEngineMetrics metrics(physical_size); - index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); bool already_in_cache = (index_ != nullptr); if (!index_) { try { + double physical_size = PhysicalSize(); + server::CollectExecutionEngineMetrics metrics(physical_size); index_ = read_index(location_); ENGINE_LOG_DEBUG << "Disk io from: " << location_; } catch (knowhere::KnowhereException &e) { @@ -135,14 +134,11 @@ Status ExecutionEngineImpl::Load(bool to_cache) { if (!already_in_cache && to_cache) { Cache(); - physical_size = PhysicalSize(); } return Status::OK(); } Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) { - double physical_size; - server::CollectExecutionEngineMetrics metrics(physical_size); index_ = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_); bool already_in_cache = (index_ != nullptr); if (!index_) { @@ -159,15 +155,12 @@ Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) { if (!already_in_cache) { GpuCache(device_id); - physical_size = PhysicalSize(); } return Status::OK(); } Status ExecutionEngineImpl::CopyToCpu() { - double physical_size; - server::CollectExecutionEngineMetrics metrics(physical_size); index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); bool already_in_cache = (index_ != nullptr); if (!index_) { @@ -184,7 +177,6 @@ Status ExecutionEngineImpl::CopyToCpu() { if(!already_in_cache) { Cache(); - physical_size = PhysicalSize(); } return Status::OK(); } @@ -198,6 +190,8 @@ Status ExecutionEngineImpl::Merge(const std::string &location) { auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location); if (!to_merge) { try { + double physical_size = server::CommonUtil::GetFileSize(location); + server::CollectExecutionEngineMetrics metrics(physical_size); to_merge = read_index(location); } catch (knowhere::KnowhereException &e) { ENGINE_LOG_ERROR << e.what(); diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index 51d5dcd365..99f2e19b35 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -80,12 +80,10 @@ bool MemTableFile::IsFull() { } Status MemTableFile::Serialize() { - size_t size; + size_t size = GetCurrentMem(); server::CollectSerializeMetrics metrics(size); - size = GetCurrentMem(); execution_engine_->Serialize(); - table_file_schema_.file_size_ = execution_engine_->PhysicalSize(); table_file_schema_.row_count_ = execution_engine_->Count(); diff --git a/cpp/src/metrics/Metrics.h b/cpp/src/metrics/Metrics.h index 98e7c3064e..99c97c424f 100644 --- a/cpp/src/metrics/Metrics.h +++ b/cpp/src/metrics/Metrics.h @@ -120,7 +120,7 @@ private: class CollectExecutionEngineMetrics { public: - CollectExecutionEngineMetrics(double& physical_size) : physical_size_(physical_size) { + CollectExecutionEngineMetrics(double physical_size) : physical_size_(physical_size) { start_time_ = METRICS_NOW_TIME; } @@ -137,12 +137,12 @@ public: private: using TIME_POINT = std::chrono::system_clock::time_point; TIME_POINT start_time_; - double& physical_size_; + double physical_size_; }; class CollectSerializeMetrics { public: - CollectSerializeMetrics(size_t& size) : size_(size) { + CollectSerializeMetrics(size_t size) : size_(size) { start_time_ = METRICS_NOW_TIME; } @@ -154,7 +154,7 @@ public: private: using TIME_POINT = std::chrono::system_clock::time_point; TIME_POINT start_time_; - size_t& size_; + size_t size_; }; class CollectorAddMetrics { From bc1339c5f9c5525e8cefe3bd2beb780e306b6480 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 19:08:27 +0800 Subject: [PATCH 4/4] add MetricsCollector Former-commit-id: 7953c0a4374624725507b6e7578628669a245ded --- cpp/src/db/insert/VectorSource.cpp | 2 +- cpp/src/metrics/Metrics.h | 12 +++++------ cpp/src/scheduler/task/SearchTask.cpp | 2 +- cpp/unittest/metrics/metrics_test.cpp | 29 +++++++++++++++++++++++++++ 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/cpp/src/db/insert/VectorSource.cpp b/cpp/src/db/insert/VectorSource.cpp index b977192575..f7d6a48297 100644 --- a/cpp/src/db/insert/VectorSource.cpp +++ b/cpp/src/db/insert/VectorSource.cpp @@ -24,7 +24,7 @@ Status VectorSource::Add(const ExecutionEnginePtr &execution_engine, size_t &num_vectors_added, IDNumbers &vector_ids) { - server::CollectorAddMetrics metrics(n_, table_file_schema.dimension_); + server::CollectAddMetrics metrics(n_, table_file_schema.dimension_); num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; diff --git a/cpp/src/metrics/Metrics.h b/cpp/src/metrics/Metrics.h index 99c97c424f..48f9f2b111 100644 --- a/cpp/src/metrics/Metrics.h +++ b/cpp/src/metrics/Metrics.h @@ -157,13 +157,13 @@ private: size_t size_; }; -class CollectorAddMetrics { +class CollectAddMetrics { public: - CollectorAddMetrics(size_t n, uint16_t dimension) : n_(n), dimension_(dimension) { + CollectAddMetrics(size_t n, uint16_t dimension) : n_(n), dimension_(dimension) { start_time_ = METRICS_NOW_TIME; } - ~CollectorAddMetrics() { + ~CollectAddMetrics() { auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time_, end_time); server::Metrics::GetInstance().AddVectorsPerSecondGaugeSet(static_cast(n_), @@ -177,13 +177,13 @@ private: uint16_t dimension_; }; -class CollectorDurationMetrics { +class CollectDurationMetrics { public: - CollectorDurationMetrics() { + CollectDurationMetrics(int index_type) : index_type_(index_type) { start_time_ = METRICS_NOW_TIME; } - ~CollectorDurationMetrics() { + ~CollectDurationMetrics() { auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time_, end_time); switch (index_type_) { diff --git a/cpp/src/scheduler/task/SearchTask.cpp b/cpp/src/scheduler/task/SearchTask.cpp index 6c64436bea..b387e2de3b 100644 --- a/cpp/src/scheduler/task/SearchTask.cpp +++ b/cpp/src/scheduler/task/SearchTask.cpp @@ -141,7 +141,7 @@ XSearchTask::Execute() { server::TimeRecorder rc("DoSearch file id:" + std::to_string(index_id_)); - server::CollectorDurationMetrics metrics(index_type_); + server::CollectDurationMetrics metrics(index_type_); std::vector output_ids; std::vector output_distence; diff --git a/cpp/unittest/metrics/metrics_test.cpp b/cpp/unittest/metrics/metrics_test.cpp index a6301e05e1..c1c674f12b 100644 --- a/cpp/unittest/metrics/metrics_test.cpp +++ b/cpp/unittest/metrics/metrics_test.cpp @@ -119,4 +119,33 @@ TEST_F(MetricTest, Metric_Tes) { delete [] qxb; }; +TEST_F(MetricTest, Collector_Metrics_Test){ + engine::Status status = engine::Status::OK(); + server::CollectInsertMetrics insert_metrics0(0, status); + status = engine::Status::Error("error"); + server::CollectInsertMetrics insert_metrics1(0, status); + + server::CollectQueryMetrics query_metrics(10); + + server::CollectMergeFilesMetrics merge_metrics(); + + server::CollectBuildIndexMetrics build_index_metrics(); + + server::CollectExecutionEngineMetrics execution_metrics(10); + + server::CollectSerializeMetrics serialize_metrics(10); + + server::CollectAddMetrics add_metrics(10, 128); + + server::CollectDurationMetrics duration_metrics_raw(engine::meta::TableFileSchema::RAW); + server::CollectDurationMetrics duration_metrics_index(engine::meta::TableFileSchema::TO_INDEX); + server::CollectDurationMetrics duration_metrics_delete(engine::meta::TableFileSchema::TO_DELETE); + + server::CollectSearchTaskMetrics search_metrics_raw(engine::meta::TableFileSchema::RAW); + server::CollectSearchTaskMetrics search_metrics_index(engine::meta::TableFileSchema::TO_INDEX); + server::CollectSearchTaskMetrics search_metrics_delete(engine::meta::TableFileSchema::TO_DELETE); + + server::MetricCollector metric_collector(); +} +