diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 85c488ebde..c2418aca65 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -10,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Improvement - MS-82 - Update server startup welcome message - MS-83 - Update vecwise to Milvus +- MS-77 - Performance issue of post-search action ## New Feature @@ -27,6 +28,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-45 - Implement DeleteTable interface - MS-75 - cmake: change faiss version to 1.5.2; add CUDA gencode - MS-81 - fix faiss ptx issue; change cuda gencode +- MS-84 - cmake: add arrow, jemalloc and jsoncons third party; default build option OFF +- MS-85 - add NetIO metric ## Task - MS-74 - Change README.md in cpp diff --git a/cpp/cmake/DefineOptions.cmake b/cpp/cmake/DefineOptions.cmake index 538e54fe3b..d72ea9ca5a 100644 --- a/cpp/cmake/DefineOptions.cmake +++ b/cpp/cmake/DefineOptions.cmake @@ -55,6 +55,8 @@ define_option_string(MEGASEARCH_DEPENDENCY_SOURCE define_option(MEGASEARCH_VERBOSE_THIRDPARTY_BUILD "Show output from ExternalProjects rather than just logging to files" ON) +define_option(MEGASEARCH_WITH_ARROW "Build with ARROW" OFF) + define_option(MEGASEARCH_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF) define_option(MEGASEARCH_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \ @@ -77,6 +79,8 @@ define_option(MEGASEARCH_WITH_LAPACK "Build with LAPACK library" ON) define_option(MEGASEARCH_WITH_LZ4 "Build with lz4 compression" ON) +define_option(MEGASEARCH_WITH_JSONCONS "Build with JSONCONS" OFF) + define_option(MEGASEARCH_WITH_OPENBLAS "Build with OpenBLAS library" ON) define_option(MEGASEARCH_WITH_PROMETHEUS "Build with PROMETHEUS library" ON) diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index ca84bca7b0..25a9a5077f 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -17,11 +17,13 @@ set(MEGASEARCH_THIRDPARTY_DEPENDENCIES + ARROW BOOST BZip2 Easylogging++ FAISS GTest + JSONCONS LAPACK Lz4 OpenBLAS @@ -45,7 +47,9 @@ foreach(DEPENDENCY ${MEGASEARCH_THIRDPARTY_DEPENDENCIES}) endforeach() macro(build_dependency DEPENDENCY_NAME) - if("${DEPENDENCY_NAME}" STREQUAL "BZip2") + if("${DEPENDENCY_NAME}" STREQUAL "ARROW") + build_arrow() + elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2") build_bzip2() elseif("${DEPENDENCY_NAME}" STREQUAL "Easylogging++") build_easyloggingpp() @@ -57,6 +61,8 @@ macro(build_dependency DEPENDENCY_NAME) build_lz4() elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest") build_gtest() + elseif ("${DEPENDENCY_NAME}" STREQUAL "JSONCONS") + build_jsoncons() elseif ("${DEPENDENCY_NAME}" STREQUAL "OpenBLAS") build_openblas() elseif ("${DEPENDENCY_NAME}" STREQUAL "Prometheus") @@ -196,6 +202,14 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) set(${_LIB_NAME} "${_LIB_VERSION}") endforeach() +if(DEFINED ENV{MEGASEARCH_ARROW_URL}) + set(ARROW_SOURCE_URL "$ENV{MEGASEARCH_ARROW_URL}") +else() + set(ARROW_SOURCE_URL + "https://github.com/youny626/arrow.git" + ) +endif() + if(DEFINED ENV{MEGASEARCH_BOOST_URL}) set(BOOST_SOURCE_URL "$ENV{MEGASEARCH_BOOST_URL}") else() @@ -230,6 +244,13 @@ else () "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz") endif() +if (DEFINED ENV{MEGASEARCH_JSONCONS_URL}) + set(JSONCONS_SOURCE_URL "$ENV{MEGASEARCH_JSONCONS_URL}") +else () + set(JSONCONS_SOURCE_URL + "https://github.com/danielaparker/jsoncons/archive/v${JSONCONS_VERSION}.tar.gz") +endif() + if(DEFINED ENV{MEGASEARCH_LAPACK_URL}) set(LAPACK_SOURCE_URL "$ENV{MEGASEARCH_LAPACK_URL}") else() @@ -310,6 +331,93 @@ else() set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz") endif() +# ---------------------------------------------------------------------- +# ARROW + +macro(build_arrow) + message(STATUS "Building Apache ARROW-${ARROW_VERSION} from source") + set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep/cpp") + set(ARROW_STATIC_LIB_NAME arrow) +# set(ARROW_CUDA_STATIC_LIB_NAME arrow_cuda) + set(ARROW_STATIC_LIB + "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) +# set(ARROW_CUDA_STATIC_LIB +# "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_CUDA_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" +# ) + set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include") + + set(ARROW_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} +# "-DARROW_THRIFT_URL=${THRIFT_SOURCE_URL}" + #"env ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}" + -DARROW_BUILD_STATIC=ON + -DARROW_BUILD_SHARED=OFF + -DARROW_PARQUET=ON + -DARROW_USE_GLOG=OFF + -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX} + "-DCMAKE_LIBRARY_PATH=${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs" + -DCMAKE_BUILD_TYPE=Release) + +# set($ENV{ARROW_THRIFT_URL} ${THRIFT_SOURCE_URL}) + + externalproject_add(arrow_ep + GIT_REPOSITORY + ${ARROW_SOURCE_URL} + GIT_TAG + ${ARROW_VERSION} + GIT_SHALLOW + TRUE +# SOURCE_DIR +# ${ARROW_PREFIX} +# BINARY_DIR +# ${ARROW_PREFIX} + SOURCE_SUBDIR + cpp +# COMMAND +# "export \"ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}\"" + ${EP_LOG_OPTIONS} + CMAKE_ARGS + ${ARROW_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} + INSTALL_COMMAND + ${MAKE} install +# BUILD_IN_SOURCE +# 1 + BUILD_BYPRODUCTS + "${ARROW_STATIC_LIB}" +# "${ARROW_CUDA_STATIC_LIB}" + ) + +# ExternalProject_Add_StepDependencies(arrow_ep build thrift_ep) + + file(MAKE_DIRECTORY "${ARROW_PREFIX}/include") + add_library(arrow STATIC IMPORTED) + set_target_properties(arrow + PROPERTIES IMPORTED_LOCATION "${ARROW_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}") +# INTERFACE_LINK_LIBRARIES thrift) + add_dependencies(arrow arrow_ep) + + set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep-build/jemalloc_ep-prefix/src/jemalloc_ep") + + add_custom_command(TARGET arrow_ep POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${ARROW_PREFIX}/lib/ + COMMAND ${CMAKE_COMMAND} -E copy ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a ${ARROW_PREFIX}/lib/ + DEPENDS ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a) + +endmacro() + +if(MEGASEARCH_WITH_ARROW) + + resolve_dependency(ARROW) + + link_directories(SYSTEM ${ARROW_PREFIX}/lib/) + include_directories(SYSTEM ${ARROW_INCLUDE_DIR}) +endif() + # ---------------------------------------------------------------------- # Add Boost dependencies (code adapted from Apache Kudu (incubating)) @@ -849,6 +957,30 @@ if (MEGASEARCH_BUILD_TESTS) include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) endif() +# ---------------------------------------------------------------------- +# JSONCONS + +macro(build_jsoncons) + message(STATUS "Building JSONCONS-${JSONCONS_VERSION} from source") + + set(JSONCONS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jsoncons_ep-prefix") + set(JSONCONS_TAR_NAME "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}.tar.gz") + set(JSONCONS_INCLUDE_DIR "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}/include") + if (NOT EXISTS ${JSONCONS_INCLUDE_DIR}) + file(MAKE_DIRECTORY ${JSONCONS_PREFIX}) + file(DOWNLOAD ${JSONCONS_SOURCE_URL} + ${JSONCONS_TAR_NAME}) + execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${JSONCONS_TAR_NAME} + WORKING_DIRECTORY ${JSONCONS_PREFIX}) + + endif () +endmacro() + +if(MEGASEARCH_WITH_JSONCONS) + resolve_dependency(JSONCONS) + include_directories(SYSTEM "${JSONCONS_INCLUDE_DIR}") +endif() + # ---------------------------------------------------------------------- # lz4 @@ -1201,16 +1333,16 @@ macro(build_sqlite_orm) message(STATUS "Building SQLITE_ORM-${SQLITE_ORM_VERSION} from source") set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix") - set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz") #sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz - if (NOT EXISTS ${SQLITE_ORM_TAR_NAME}) + set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz") + set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm") + if (NOT EXISTS ${SQLITE_ORM_INCLUDE_DIR}) file(MAKE_DIRECTORY ${SQLITE_ORM_PREFIX}) - file(DOWNLOAD https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz + file(DOWNLOAD ${SQLITE_ORM_SOURCE_URL} ${SQLITE_ORM_TAR_NAME}) execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${SQLITE_ORM_TAR_NAME} WORKING_DIRECTORY ${SQLITE_ORM_PREFIX}) endif () - set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm") #set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix/src/sqlite_orm_ep") #set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/include/sqlite_orm") diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 56b3e5e139..ac5644dda8 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -58,6 +58,7 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") include_directories(thrift/gen-cpp) set(third_party_libs + arrow easyloggingpp sqlite thrift diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index b127778be8..69a76981b2 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -10,6 +10,7 @@ #include "EngineFactory.h" #include "metrics/Metrics.h" #include "scheduler/SearchScheduler.h" +#include "utils/TimeRecorder.h" #include #include @@ -71,6 +72,55 @@ void CollectFileMetrics(int file_type, size_t file_size, double total_time) { } } +void CalcScore(uint64_t vector_count, + const float *vectors_data, + uint64_t dimension, + const SearchContext::ResultSet &result_src, + SearchContext::ResultSet &result_target) { + result_target.clear(); + if(result_src.empty()){ + return; + } + + server::TimeRecorder rc("Calculate Score"); + int vec_index = 0; + for(auto& result : result_src) { + const float * vec_data = vectors_data + vec_index*dimension; + double vec_len = 0; + for(uint64_t i = 0; i < dimension; i++) { + vec_len += vec_data[i]*vec_data[i]; + } + vec_index++; + + double max_score = 0.0; + for(auto& pair : result) { + if(max_score < pair.second) { + max_score = pair.second; + } + } + + //makesure socre is less than 100 + if(max_score > vec_len) { + vec_len = max_score; + } + + //avoid divided by zero + static constexpr double TOLERANCE = std::numeric_limits::epsilon(); + if(vec_len < TOLERANCE) { + vec_len = TOLERANCE; + } + + SearchContext::Id2ScoreMap score_array; + double vec_len_inverse = 1.0/vec_len; + for(auto& pair : result) { + score_array.push_back(std::make_pair(pair.first, (1 - pair.second*vec_len_inverse)*100.0)); + } + result_target.emplace_back(score_array); + } + + rc.Elapse("totally cost"); +} + } @@ -301,6 +351,11 @@ Status DBImpl::QuerySync(const std::string& table_id, uint64_t k, uint64_t nq, if (results.empty()) { return Status::NotFound("Group " + table_id + ", search result not found!"); } + + QueryResults temp_results; + CalcScore(nq, vectors, dim, results, temp_results); + results.swap(temp_results); + return Status::OK(); } @@ -329,9 +384,13 @@ Status DBImpl::QueryAsync(const std::string& table_id, uint64_t k, uint64_t nq, context->WaitResult(); - //step 3: construct results + //step 3: construct results, calculate score between 0 ~ 100 auto& context_result = context->GetResult(); - results.swap(context_result); + meta::TableSchema table_schema; + table_schema.table_id_ = table_id; + pMeta_->DescribeTable(table_schema); + + CalcScore(context->nq(), context->vectors(), table_schema.dimension_, context_result, results); return Status::OK(); } @@ -361,6 +420,7 @@ void DBImpl::BackgroundTimerTask(int interval) { server::Metrics::GetInstance().RAMUsagePercentSet(); server::Metrics::GetInstance().GPUPercentGaugeSet(); server::Metrics::GetInstance().GPUMemoryUsageGaugeSet(); + server::Metrics::GetInstance().OctetsSet(); TrySchedule(); } } diff --git a/cpp/src/db/scheduler/SearchTaskQueue.cpp b/cpp/src/db/scheduler/SearchTaskQueue.cpp index a0ed0834d0..819a881f39 100644 --- a/cpp/src/db/scheduler/SearchTaskQueue.cpp +++ b/cpp/src/db/scheduler/SearchTaskQueue.cpp @@ -18,10 +18,15 @@ void ClusterResult(const std::vector &output_ids, uint64_t topk, SearchContext::ResultSet &result_set) { result_set.clear(); + result_set.reserve(nq); for (auto i = 0; i < nq; i++) { SearchContext::Id2ScoreMap id_score; + id_score.reserve(topk); for (auto k = 0; k < topk; k++) { uint64_t index = i * topk + k; + if(output_ids[index] < 0) { + continue; + } id_score.push_back(std::make_pair(output_ids[index], output_distence[index])); } result_set.emplace_back(id_score); @@ -29,20 +34,60 @@ void ClusterResult(const std::vector &output_ids, } void MergeResult(SearchContext::Id2ScoreMap &score_src, - SearchContext::Id2ScoreMap &score_target, - uint64_t topk) { - for (auto& pair_src : score_src) { - for (auto iter = score_target.begin(); iter != score_target.end(); ++iter) { - if(pair_src.second > iter->second) { - score_target.insert(iter, pair_src); + SearchContext::Id2ScoreMap &score_target, + uint64_t topk) { + //Note: the score_src and score_target are already arranged by score in ascending order + if(score_src.empty()) { + return; + } + + if(score_target.empty()) { + score_target.swap(score_src); + return; + } + + size_t src_count = score_src.size(); + size_t target_count = score_target.size(); + SearchContext::Id2ScoreMap score_merged; + score_merged.reserve(topk); + size_t src_index = 0, target_index = 0; + while(true) { + //all score_src items are merged, if score_merged.size() still less than topk + //move items from score_target to score_merged until score_merged.size() equal topk + if(src_index >= src_count - 1) { + for(size_t i = target_index; i < target_count && score_merged.size() < topk; ++i) { + score_merged.push_back(score_target[i]); } + break; + } + + //all score_target items are merged, if score_merged.size() still less than topk + //move items from score_src to score_merged until score_merged.size() equal topk + if(target_index >= target_count - 1) { + for(size_t i = src_index; i < src_count && score_merged.size() < topk; ++i) { + score_merged.push_back(score_src[i]); + } + break; + } + + //compare score, put smallest score to score_merged one by one + auto& src_pair = score_src[src_index]; + auto& target_pair = score_target[target_index]; + if(src_pair.second > target_pair.second) { + score_merged.push_back(target_pair); + target_index++; + } else { + score_merged.push_back(src_pair); + src_index++; + } + + //score_merged.size() already equal topk + if(score_merged.size() >= topk) { + break; } } - //remove unused items - while (score_target.size() > topk) { - score_target.pop_back(); - } + score_target.swap(score_merged); } void TopkResult(SearchContext::ResultSet &result_src, @@ -65,33 +110,6 @@ void TopkResult(SearchContext::ResultSet &result_src, } } -void CalcScore(uint64_t vector_count, - const float *vectors_data, - uint64_t dimension, - const SearchContext::ResultSet &result_src, - SearchContext::ResultSet &result_target) { - result_target.clear(); - if(result_src.empty()){ - return; - } - - int vec_index = 0; - for(auto& result : result_src) { - const float * vec_data = vectors_data + vec_index*dimension; - double vec_len = 0; - for(uint64_t i = 0; i < dimension; i++) { - vec_len += vec_data[i]*vec_data[i]; - } - vec_index++; - - SearchContext::Id2ScoreMap score_array; - for(auto& pair : result) { - score_array.push_back(std::make_pair(pair.first, (1 - pair.second/vec_len)*100.0)); - } - result_target.emplace_back(score_array); - } -} - } bool SearchTask::DoSearch() { @@ -109,8 +127,8 @@ bool SearchTask::DoSearch() { output_ids.resize(inner_k*context->nq()); output_distence.resize(inner_k*context->nq()); - //step 2: search try { + //step 2: search index_engine_->Search(context->nq(), context->vectors(), inner_k, output_distence.data(), output_ids.data()); @@ -125,18 +143,13 @@ bool SearchTask::DoSearch() { TopkResult(result_set, inner_k, context->GetResult()); rc.Record("reduce topk"); - //step 5: calculate score between 0 ~ 100 - CalcScore(context->nq(), context->vectors(), index_engine_->Dimension(), context->GetResult(), result_set); - context->GetResult().swap(result_set); - rc.Record("calculate score"); - } catch (std::exception& ex) { SERVER_LOG_ERROR << "SearchTask encounter exception: " << ex.what(); context->IndexSearchDone(index_id_);//mark as done avoid dead lock, even search failed continue; } - //step 6: notify to send result to client + //step 5: notify to send result to client context->IndexSearchDone(index_id_); } diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index 8755549d1e..c18e857e4e 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -82,6 +82,7 @@ class MetricsBase{ virtual void ConnectionGaugeIncrement() {}; virtual void ConnectionGaugeDecrement() {}; virtual void KeepingAliveCounterIncrement(double value = 1) {}; + virtual void OctetsSet() {}; }; diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index 4a993d4cb8..07e1d2ee71 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -33,6 +33,8 @@ PrometheusMetrics::Init() { return SERVER_UNEXPECTED_ERROR; } + // + return SERVER_SUCCESS; } @@ -110,15 +112,39 @@ void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double valu } } + void PrometheusMetrics::ConnectionGaugeIncrement() { if(!startup_) return; connection_gauge_.Increment(); } + void PrometheusMetrics::ConnectionGaugeDecrement() { if(!startup_) return; connection_gauge_.Decrement(); } +void PrometheusMetrics::OctetsSet() { + if(!startup_) return; + + // get old stats and reset them + unsigned long long old_inoctets = SystemInfo::GetInstance().get_inoctets(); + unsigned long long old_outoctets = SystemInfo::GetInstance().get_octets(); + auto old_time = SystemInfo::GetInstance().get_nettime(); + std::pair in_and_out_octets = SystemInfo::GetInstance().Octets(); + SystemInfo::GetInstance().set_inoctets(in_and_out_octets.first); + SystemInfo::GetInstance().set_outoctets(in_and_out_octets.second); + SystemInfo::GetInstance().set_nettime(); + + // + constexpr double micro_to_second = 1e-6; + auto now_time = std::chrono::system_clock::now(); + auto total_microsecond = METRICS_MICROSECONDS(old_time, now_time); + auto total_second = total_microsecond*micro_to_second; + if(total_second == 0) return; + inoctets_gauge_.Set((in_and_out_octets.first-old_inoctets)/total_second); + outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second); +} + //void PrometheusMetrics::GpuPercentInit() { // int num_device = SystemInfo::GetInstance().num_device(); // constexpr char device_number[] = "DeviceNum"; diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index 942598a60f..902e79b5a5 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -116,6 +116,7 @@ class PrometheusMetrics: public MetricsBase { void ConnectionGaugeIncrement() override ; void ConnectionGaugeDecrement() override ; void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);}; + void OctetsSet() override ; @@ -480,6 +481,13 @@ class PrometheusMetrics: public MetricsBase { .Register(*registry_); prometheus::Counter &keeping_alive_counter_ = keeping_alive_.Add({}); + prometheus::Family &octets_ = prometheus::BuildGauge() + .Name("octets_bytes_per_second") + .Help("octets bytes per second") + .Register(*registry_); + prometheus::Gauge &inoctets_gauge_ = octets_.Add({{"type", "inoctets"}}); + prometheus::Gauge &outoctets_gauge_ = octets_.Add({{"type", "outoctets"}}); + }; diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index 9aa85ccd94..d4ea2de575 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -53,6 +53,11 @@ void SystemInfo::Init() { return ; } + //initialize network traffic information + std::pair in_and_out_octets = Octets(); + in_octets_ = in_and_out_octets.first; + out_octets_ = in_and_out_octets.second; + net_time_ = std::chrono::system_clock::now(); } long long @@ -202,6 +207,42 @@ SystemInfo::GPUMemoryUsed() { return result; } +std::pair +SystemInfo::Octets(){ + pid_t pid = getpid(); +// const std::string filename = "/proc/"+std::to_string(pid)+"/net/netstat"; + const std::string filename = "/proc/net/netstat"; + std::ifstream file(filename); + std::string lastline = ""; + std::string line = ""; + while(file){ + getline(file, line); + if(file.fail()){ + break; + } + lastline = line; + } + std::vector space_position; + size_t space_pos = lastline.find(" "); + while(space_pos != std::string::npos){ + space_position.push_back(space_pos); + space_pos = lastline.find(" ",space_pos+1); + } + // InOctets is between 6th and 7th " " and OutOctets is between 7th and 8th " " + size_t inoctets_begin = space_position[6]+1; + size_t inoctets_length = space_position[7]-inoctets_begin; + size_t outoctets_begin = space_position[7]+1; + size_t outoctets_length = space_position[8]-outoctets_begin; + std::string inoctets = lastline.substr(inoctets_begin,inoctets_length); + std::string outoctets = lastline.substr(outoctets_begin,outoctets_length); + + + unsigned long long inoctets_bytes = std::stoull(inoctets); + unsigned long long outoctets_bytes = std::stoull(outoctets); + std::pair res(inoctets_bytes, outoctets_bytes); + return res; +} + } } } \ No newline at end of file diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index 0ac2499bdc..eeff4b61f5 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -13,6 +13,7 @@ #include "string.h" #include "sys/times.h" #include "sys/vtimes.h" +#include #include #include @@ -29,9 +30,12 @@ class SystemInfo { clock_t last_cpu_ = clock_t(); clock_t last_sys_cpu_ = clock_t(); clock_t last_user_cpu_ = clock_t(); + std::chrono::system_clock::time_point net_time_ = std::chrono::system_clock::now(); int num_processors_ = 0; //number of GPU unsigned int num_device_ = 0; + unsigned long long in_octets_ = 0; + unsigned long long out_octets_ = 0; bool initialized_ = false; public: @@ -43,11 +47,18 @@ class SystemInfo { void Init(); int num_device() const {return num_device_;}; + unsigned long long get_inoctets() { return in_octets_;}; + unsigned long long get_octets() { return out_octets_;}; + std::chrono::system_clock::time_point get_nettime() { return net_time_;}; + void set_inoctets(unsigned long long value) { in_octets_ = value;}; + void set_outoctets(unsigned long long value) { out_octets_ = value;}; + void set_nettime() {net_time_ = std::chrono::system_clock::now();}; long long ParseLine(char* line); unsigned long GetPhysicalMemory(); unsigned long GetProcessUsedMemory(); double MemoryPercent(); double CPUPercent(); + std::pair Octets(); // std::unordered_map> GetGPUMemPercent() {}; // std::vector split(std::string input) {}; std::vector GPUPercent(); diff --git a/cpp/src/server/Server.cpp b/cpp/src/server/Server.cpp index 7acc4b78d0..08bf84fe94 100644 --- a/cpp/src/server/Server.cpp +++ b/cpp/src/server/Server.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +//#include #include #include diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 6c290f9d1e..dde2d8bb03 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -1,33 +1,10 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Toolchain library versions -# -# This file is used by `download_dependencies.sh` and cmake to figure out which -# version of a dependency to fetch. In order to add a new dependency, add a -# version variable, e.g. MY_DEP_VERSION and append an entry in the -# `DEPENDENCIES` array (see the comment on top of the declaration for the -# format). - +ARROW_VERSION=zilliz BOOST_VERSION=1.70.0 BZIP2_VERSION=1.0.6 EASYLOGGINGPP_VERSION=v9.96.7 FAISS_VERSION=v1.5.2 GTEST_VERSION=1.8.1 +JSONCONS_VERSION=0.126.0 LAPACK_VERSION=v3.8.0 LZ4_VERSION=v1.9.1 OPENBLAS_VERSION=v0.3.6 @@ -41,28 +18,4 @@ YAMLCPP_VERSION=0.6.2 ZLIB_VERSION=v1.2.11 ZSTD_VERSION=v1.4.0 -# The first field is the name of the environment variable expected by cmake. -# This _must_ match what is defined. The second field is the name of the -# generated archive file. The third field is the url of the project for the -# given version. -DEPENDENCIES=( - "MEGASEARCH_BOOST_URL boost-${BOOST_VERSION}.tar.gz https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//./_}.tar.gz" - "MEGASEARCH_BZIP2_URL bzip2-${BZIP2_VERSION}.tar.gz https://fossies.org/linux/misc/bzip2-${BZIP2_VERSION}.tar.gz" - "MEGASEARCH_EASYLOGGINGPP_URL easyloggingpp-${EASYLOGGINGPP_VERSION}.tar.gz https://github.com/zuhd-org/easyloggingpp/archive/${EASYLOGGINGPP_VERSION}.tar.gz" - "MEGASEARCH_FAISS_URL faiss-${FAISS_VERSION}.tar.gz https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz" - "MEGASEARCH_GTEST_URL gtest-${GTEST_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz" - "MEGASEARCH_LAPACK_URL lapack-${LAPACK_VERSION}.tar.gz https://github.com/Reference-LAPACK/lapack/archive/${LAPACK_VERSION}.tar.gz - "MEGASEARCH_LZ4_URL lz4-${LZ4_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz" - "MEGASEARCH_OPENBLAS_URL openblas-${OPENBLAS_VERSION}.tar.gz https://github.com/xianyi/OpenBLAS/archive/${OPENBLAS_VERSION}.tar.gz" - "MEGASEARCH_PROMETHEUS_URL https://github.com/jupp0r/prometheus-cpp.git" - "MEGASEARCH_ROCKSDB_URL rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/${ROCKSDB_VERSION}.tar.gz" - "MEGASEARCH_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz https://github.com/google/snappy/archive/${SNAPPY_VERSION}.tar.gz" - "MEGASEARCH_SQLITE_URL sqlite-autoconf-${SQLITE_VERSION}.tar.gz https://www.sqlite.org/2019/sqlite-autoconf-${SQLITE_VERSION}.tar.gz" - "MEGASEARCH_SQLITE_ORM_URL sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz" - "MEGASEARCH_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz https://github.com/apache/thrift/archive/${THRIFT_VERSION}.tar.gz" - "MEGASEARCH_YAMLCPP_URL yaml-cpp-${YAMLCPP_VERSION}.tar.gz https://github.com/jbeder/yaml-cpp/archive/yaml-cpp-${YAMLCPP_VERSION}.tar.gz" - "MEGASEARCH_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz" - "MEGASEARCH_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz" - ) - # vim: set filetype=sh: \ No newline at end of file diff --git a/cpp/unittest/metrics/metrics_test.cpp b/cpp/unittest/metrics/metrics_test.cpp index 9fb5f9c209..9a315f5557 100644 --- a/cpp/unittest/metrics/metrics_test.cpp +++ b/cpp/unittest/metrics/metrics_test.cpp @@ -27,13 +27,15 @@ using namespace zilliz::milvus; TEST_F(DBTest, Metric_Tes) { - + server::SystemInfo::GetInstance().Init(); // server::Metrics::GetInstance().Init(); // server::Metrics::GetInstance().exposer_ptr()->RegisterCollectable(server::Metrics::GetInstance().registry_ptr()); server::Metrics::GetInstance().Init(); + // server::PrometheusMetrics::GetInstance().exposer_ptr()->RegisterCollectable(server::PrometheusMetrics::GetInstance().registry_ptr()); zilliz::milvus::cache::CpuCacheMgr::GetInstance()->SetCapacity(2UL*1024*1024*1024); std::cout<CacheCapacity()<Query(group_name, k, qb, qxb, results); +// stat = db_->Query(group_name, k, qb, qxb, results); ss << "Search " << j << " With Size " << (float)(count*group_dim*sizeof(float))/(1024*1024) << " M"; -// STOP_TIMER(ss.str()); - ASSERT_STATS(stat); + +// ASSERT_STATS(stat); for (auto k=0; k= prev_count); std::this_thread::sleep_for(std::chrono::seconds(1));