mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-02 00:45:30 +08:00
Merge branch 'branch-1.2' into 'branch-1.2'
MS-37 Add metrics See merge request megasearch/vecwise_engine!49 Former-commit-id: a24b1914ac475406f85ba83ff2c5ead2796122d8
This commit is contained in:
commit
84396d0521
@ -26,3 +26,4 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-30 - Use faiss v1.5.2
|
||||
- MS-32 - Fix thrift error
|
||||
- MS-34 - Fix prometheus-cpp thirdparty
|
||||
- MS-37 - Add query, cache usage, disk write speed and file data size metrics
|
||||
|
||||
@ -71,17 +71,26 @@ Status DBImpl<EngineT>::InsertVectors(const std::string& table_id_,
|
||||
// server::Metrics::GetInstance().add_vector_duration_seconds_quantiles().Observe((average_time));
|
||||
if (!status.ok()) {
|
||||
server::Metrics::GetInstance().AddVectorsFailTotalIncrement(n);
|
||||
server::Metrics::GetInstance().AddVectorsFailGaugeSet(n);
|
||||
return status;
|
||||
}
|
||||
server::Metrics::GetInstance().AddVectorsSuccessTotalIncrement(n);
|
||||
server::Metrics::GetInstance().AddVectorsSuccessGaugeSet(n);
|
||||
}
|
||||
|
||||
template<typename EngineT>
|
||||
Status DBImpl<EngineT>::Query(const std::string &table_id, size_t k, size_t nq,
|
||||
const float *vectors, QueryResults &results) {
|
||||
|
||||
auto start_time = METRICS_NOW_TIME;
|
||||
meta::DatesT dates = {meta::Meta::GetDate()};
|
||||
return Query(table_id, k, nq, vectors, dates, results);
|
||||
Status result = Query(table_id, k, nq, vectors, dates, results);
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time,end_time);
|
||||
auto average_time = total_time / nq;
|
||||
for (int i = 0; i < nq; ++i) {
|
||||
server::Metrics::GetInstance().QueryResponseSummaryObserve(average_time);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename EngineT>
|
||||
@ -250,7 +259,12 @@ void DBImpl<EngineT>::BackgroundTimerTask(int interval) {
|
||||
if (shutting_down_.load(std::memory_order_acquire)) break;
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(interval));
|
||||
|
||||
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheUsage();
|
||||
LOG(DEBUG) << "Cache usage " << cache_total;
|
||||
server::Metrics::GetInstance().CacheUsageGaugeSet(static_cast<double>(cache_total));
|
||||
long size;
|
||||
Size(size);
|
||||
server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
|
||||
TrySchedule();
|
||||
}
|
||||
}
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include "MemManager.h"
|
||||
#include "Meta.h"
|
||||
#include "MetaConsts.h"
|
||||
#include "metrics/Metrics.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
@ -48,8 +49,14 @@ template<typename EngineT>
|
||||
Status MemVectors<EngineT>::Serialize(std::string& table_id) {
|
||||
table_id = schema_.table_id;
|
||||
auto size = ApproximateSize();
|
||||
auto start_time = METRICS_NOW_TIME;
|
||||
pEE_->Serialize();
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
schema_.size = size;
|
||||
|
||||
server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet(size/total_time);
|
||||
|
||||
schema_.file_type = (size >= options_.index_trigger_size) ?
|
||||
meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW;
|
||||
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include <string>
|
||||
#include <signal.h>
|
||||
#include <easylogging++.h>
|
||||
#include "metrics/Metrics.h"
|
||||
|
||||
#include "utils/SignalUtil.h"
|
||||
#include "utils/CommonUtil.h"
|
||||
@ -25,7 +26,6 @@ using namespace zilliz::vecwise;
|
||||
int
|
||||
main(int argc, char *argv[]) {
|
||||
printf("Vecwise engine server start...\n");
|
||||
|
||||
// zilliz::lib::gpu::InitMemoryAllocator();
|
||||
|
||||
signal(SIGINT, server::SignalUtil::HandleSignal);
|
||||
|
||||
@ -64,7 +64,11 @@ class MetricsBase{
|
||||
virtual void IndexFileSizeGaugeSet(double value) {};
|
||||
virtual void RawFileSizeGaugeSet(double value) {};
|
||||
virtual void FaissDiskLoadIOSpeedGaugeSet(double value) {};
|
||||
|
||||
virtual void QueryResponseSummaryObserve(double value) {};
|
||||
virtual void DiskStoreIOSpeedGaugeSet(double value) {};
|
||||
virtual void DataFileSizeGaugeSet(double value) {};
|
||||
virtual void AddVectorsSuccessGaugeSet(double value) {};
|
||||
virtual void AddVectorsFailGaugeSet(double value) {};
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -4,7 +4,6 @@
|
||||
* Proprietary and confidential.
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Metrics.h"
|
||||
#include "PrometheusMetrics.h"
|
||||
|
||||
@ -97,7 +97,11 @@ class PrometheusMetrics: public MetricsBase {
|
||||
void RawFileSizeTotalIncrement(double value = 1) { if(startup_) raw_file_size_total_.Increment(value);};
|
||||
void IndexFileSizeGaugeSet(double value) { if(startup_) index_file_size_gauge_.Set(value);};
|
||||
void RawFileSizeGaugeSet(double value) { if(startup_) raw_file_size_gauge_.Set(value);};
|
||||
|
||||
void QueryResponseSummaryObserve(double value) {if(startup_) query_response_summary_.Observe(value);};
|
||||
void DiskStoreIOSpeedGaugeSet(double value) { if(startup_) disk_store_IO_speed_gauge_.Set(value);};
|
||||
void DataFileSizeGaugeSet(double value) { if(startup_) data_file_size_gauge_.Set(value);};
|
||||
void AddVectorsSuccessGaugeSet(double value) { if(startup_) add_vectors_success_gauge_.Set(value);};
|
||||
void AddVectorsFailGaugeSet(double value) { if(startup_) add_vectors_fail_gauge_.Set(value);};
|
||||
|
||||
|
||||
|
||||
@ -295,11 +299,6 @@ class PrometheusMetrics: public MetricsBase {
|
||||
|
||||
////all form Cache.cpp
|
||||
//record cache usage, when insert/erase/clear/free
|
||||
prometheus::Family<prometheus::Gauge> &cache_usage_ = prometheus::BuildGauge()
|
||||
.Name("cache_usage")
|
||||
.Help("total bytes that cache used")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &cache_usage_gauge_ = cache_usage_.Add({});
|
||||
|
||||
|
||||
////all from Meta.cpp
|
||||
@ -386,6 +385,39 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Register(*registry_);
|
||||
prometheus::Counter &cache_access_total_ = cache_access_.Add({});
|
||||
|
||||
// record cache usage and %
|
||||
prometheus::Family<prometheus::Gauge> &cache_usage_ = prometheus::BuildGauge()
|
||||
.Name("cache_usage_bytes")
|
||||
.Help("current cache usage by bytes")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &cache_usage_gauge_ = cache_usage_.Add({});
|
||||
|
||||
// record query response
|
||||
using Quantiles = std::vector<prometheus::detail::CKMSQuantiles::Quantile>;
|
||||
prometheus::Family<prometheus::Summary> &query_response_ = prometheus::BuildSummary()
|
||||
.Name("query_response_summary")
|
||||
.Help("query response summary")
|
||||
.Register(*registry_);
|
||||
prometheus::Summary &query_response_summary_ = query_response_.Add({}, Quantiles{{0.95,0.00},{0.9,0.05},{0.8,0.1}});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &disk_store_IO_speed_ = prometheus::BuildGauge()
|
||||
.Name("disk_store_IO_speed_bytes_per_microseconds")
|
||||
.Help("disk_store_IO_speed")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &disk_store_IO_speed_gauge_ = disk_store_IO_speed_.Add({});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &data_file_size_ = prometheus::BuildGauge()
|
||||
.Name("data_file_size_bytes")
|
||||
.Help("data file size by bytes")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &data_file_size_gauge_ = data_file_size_.Add({});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &add_vectors_ = prometheus::BuildGauge()
|
||||
.Name("add_vectors")
|
||||
.Help("current added vectors")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &add_vectors_success_gauge_ = add_vectors_.Add({{"outcome", "success"}});
|
||||
prometheus::Gauge &add_vectors_fail_gauge_ = add_vectors_.Add({{"outcome", "fail"}});
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -138,7 +138,7 @@ int
|
||||
Server::Start() {
|
||||
// server::Metrics::GetInstance().Init();
|
||||
// server::Metrics::GetInstance().exposer_ptr()->RegisterCollectable(server::Metrics::GetInstance().registry_ptr());
|
||||
server::Metrics::GetInstance().Init();
|
||||
// server::Metrics::GetInstance().Init();
|
||||
|
||||
if (daemonized_) {
|
||||
Daemonize();
|
||||
@ -177,7 +177,7 @@ Server::Start() {
|
||||
signal(SIGINT, SignalUtil::HandleSignal);
|
||||
signal(SIGHUP, SignalUtil::HandleSignal);
|
||||
signal(SIGTERM, SignalUtil::HandleSignal);
|
||||
|
||||
server::Metrics::GetInstance().Init();
|
||||
SERVER_LOG_INFO << "Vecwise server is running...";
|
||||
StartService();
|
||||
|
||||
|
||||
@ -31,7 +31,8 @@ set(require_files
|
||||
../../src/metrics/Metrics.cpp
|
||||
|
||||
# ../../src/cache/CacheMgr.cpp
|
||||
# ../../src/metrics/PrometheusMetrics.cpp
|
||||
../../src/metrics/PrometheusMetrics.cpp
|
||||
../../src/metrics/MetricBase.h
|
||||
../../src/server/ServerConfig.cpp
|
||||
../../src/utils/CommonUtil.cpp
|
||||
../../src/utils/TimeRecorder.cpp
|
||||
|
||||
@ -32,7 +32,7 @@ TEST_F(DBTest, Metric_Tes) {
|
||||
// server::Metrics::GetInstance().exposer_ptr()->RegisterCollectable(server::Metrics::GetInstance().registry_ptr());
|
||||
server::Metrics::GetInstance().Init();
|
||||
// server::PrometheusMetrics::GetInstance().exposer_ptr()->RegisterCollectable(server::PrometheusMetrics::GetInstance().registry_ptr());
|
||||
zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->SetCapacity(1*1024*1024*1024);
|
||||
zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->SetCapacity(4*1024*1024*1024);
|
||||
std::cout<<zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->CacheCapacity()<<std::endl;
|
||||
static const std::string group_name = "test_group";
|
||||
static const int group_dim = 256;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user