mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-06 02:42:53 +08:00
Merge remote-tracking branch 'upstream/branch-0.3.0' into mysql-0.3.0
Former-commit-id: e5468a09dbd74cea3e7486223b2c3b0a50862dd1
This commit is contained in:
commit
f6564f6c3c
@ -10,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
## Improvement
|
||||
- MS-82 - Update server startup welcome message
|
||||
- MS-83 - Update vecwise to Milvus
|
||||
- MS-77 - Performance issue of post-search action
|
||||
|
||||
## New Feature
|
||||
|
||||
@ -27,6 +28,8 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-45 - Implement DeleteTable interface
|
||||
- MS-75 - cmake: change faiss version to 1.5.2; add CUDA gencode
|
||||
- MS-81 - fix faiss ptx issue; change cuda gencode
|
||||
- MS-84 - cmake: add arrow, jemalloc and jsoncons third party; default build option OFF
|
||||
- MS-85 - add NetIO metric
|
||||
|
||||
## Task
|
||||
- MS-74 - Change README.md in cpp
|
||||
|
||||
@ -55,6 +55,8 @@ define_option_string(MEGASEARCH_DEPENDENCY_SOURCE
|
||||
define_option(MEGASEARCH_VERBOSE_THIRDPARTY_BUILD
|
||||
"Show output from ExternalProjects rather than just logging to files" ON)
|
||||
|
||||
define_option(MEGASEARCH_WITH_ARROW "Build with ARROW" OFF)
|
||||
|
||||
define_option(MEGASEARCH_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" OFF)
|
||||
|
||||
define_option(MEGASEARCH_BOOST_VENDORED "Use vendored Boost instead of existing Boost. \
|
||||
@ -77,6 +79,8 @@ define_option(MEGASEARCH_WITH_LAPACK "Build with LAPACK library" ON)
|
||||
|
||||
define_option(MEGASEARCH_WITH_LZ4 "Build with lz4 compression" ON)
|
||||
|
||||
define_option(MEGASEARCH_WITH_JSONCONS "Build with JSONCONS" OFF)
|
||||
|
||||
define_option(MEGASEARCH_WITH_OPENBLAS "Build with OpenBLAS library" ON)
|
||||
|
||||
define_option(MEGASEARCH_WITH_PROMETHEUS "Build with PROMETHEUS library" ON)
|
||||
|
||||
@ -17,11 +17,13 @@
|
||||
|
||||
set(MEGASEARCH_THIRDPARTY_DEPENDENCIES
|
||||
|
||||
ARROW
|
||||
BOOST
|
||||
BZip2
|
||||
Easylogging++
|
||||
FAISS
|
||||
GTest
|
||||
JSONCONS
|
||||
LAPACK
|
||||
Lz4
|
||||
OpenBLAS
|
||||
@ -45,7 +47,9 @@ foreach(DEPENDENCY ${MEGASEARCH_THIRDPARTY_DEPENDENCIES})
|
||||
endforeach()
|
||||
|
||||
macro(build_dependency DEPENDENCY_NAME)
|
||||
if("${DEPENDENCY_NAME}" STREQUAL "BZip2")
|
||||
if("${DEPENDENCY_NAME}" STREQUAL "ARROW")
|
||||
build_arrow()
|
||||
elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2")
|
||||
build_bzip2()
|
||||
elseif("${DEPENDENCY_NAME}" STREQUAL "Easylogging++")
|
||||
build_easyloggingpp()
|
||||
@ -57,6 +61,8 @@ macro(build_dependency DEPENDENCY_NAME)
|
||||
build_lz4()
|
||||
elseif ("${DEPENDENCY_NAME}" STREQUAL "GTest")
|
||||
build_gtest()
|
||||
elseif ("${DEPENDENCY_NAME}" STREQUAL "JSONCONS")
|
||||
build_jsoncons()
|
||||
elseif ("${DEPENDENCY_NAME}" STREQUAL "OpenBLAS")
|
||||
build_openblas()
|
||||
elseif ("${DEPENDENCY_NAME}" STREQUAL "Prometheus")
|
||||
@ -196,6 +202,14 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
|
||||
set(${_LIB_NAME} "${_LIB_VERSION}")
|
||||
endforeach()
|
||||
|
||||
if(DEFINED ENV{MEGASEARCH_ARROW_URL})
|
||||
set(ARROW_SOURCE_URL "$ENV{MEGASEARCH_ARROW_URL}")
|
||||
else()
|
||||
set(ARROW_SOURCE_URL
|
||||
"https://github.com/youny626/arrow.git"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MEGASEARCH_BOOST_URL})
|
||||
set(BOOST_SOURCE_URL "$ENV{MEGASEARCH_BOOST_URL}")
|
||||
else()
|
||||
@ -230,6 +244,13 @@ else ()
|
||||
"https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz")
|
||||
endif()
|
||||
|
||||
if (DEFINED ENV{MEGASEARCH_JSONCONS_URL})
|
||||
set(JSONCONS_SOURCE_URL "$ENV{MEGASEARCH_JSONCONS_URL}")
|
||||
else ()
|
||||
set(JSONCONS_SOURCE_URL
|
||||
"https://github.com/danielaparker/jsoncons/archive/v${JSONCONS_VERSION}.tar.gz")
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MEGASEARCH_LAPACK_URL})
|
||||
set(LAPACK_SOURCE_URL "$ENV{MEGASEARCH_LAPACK_URL}")
|
||||
else()
|
||||
@ -310,6 +331,93 @@ else()
|
||||
set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz")
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# ARROW
|
||||
|
||||
macro(build_arrow)
|
||||
message(STATUS "Building Apache ARROW-${ARROW_VERSION} from source")
|
||||
set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep/cpp")
|
||||
set(ARROW_STATIC_LIB_NAME arrow)
|
||||
# set(ARROW_CUDA_STATIC_LIB_NAME arrow_cuda)
|
||||
set(ARROW_STATIC_LIB
|
||||
"${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
|
||||
)
|
||||
# set(ARROW_CUDA_STATIC_LIB
|
||||
# "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_CUDA_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
|
||||
# )
|
||||
set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
|
||||
|
||||
set(ARROW_CMAKE_ARGS
|
||||
${EP_COMMON_CMAKE_ARGS}
|
||||
# "-DARROW_THRIFT_URL=${THRIFT_SOURCE_URL}"
|
||||
#"env ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}"
|
||||
-DARROW_BUILD_STATIC=ON
|
||||
-DARROW_BUILD_SHARED=OFF
|
||||
-DARROW_PARQUET=ON
|
||||
-DARROW_USE_GLOG=OFF
|
||||
-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}
|
||||
"-DCMAKE_LIBRARY_PATH=${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs"
|
||||
-DCMAKE_BUILD_TYPE=Release)
|
||||
|
||||
# set($ENV{ARROW_THRIFT_URL} ${THRIFT_SOURCE_URL})
|
||||
|
||||
externalproject_add(arrow_ep
|
||||
GIT_REPOSITORY
|
||||
${ARROW_SOURCE_URL}
|
||||
GIT_TAG
|
||||
${ARROW_VERSION}
|
||||
GIT_SHALLOW
|
||||
TRUE
|
||||
# SOURCE_DIR
|
||||
# ${ARROW_PREFIX}
|
||||
# BINARY_DIR
|
||||
# ${ARROW_PREFIX}
|
||||
SOURCE_SUBDIR
|
||||
cpp
|
||||
# COMMAND
|
||||
# "export \"ARROW_THRIFT_URL=${THRIFT_SOURCE_URL}\""
|
||||
${EP_LOG_OPTIONS}
|
||||
CMAKE_ARGS
|
||||
${ARROW_CMAKE_ARGS}
|
||||
BUILD_COMMAND
|
||||
${MAKE}
|
||||
${MAKE_BUILD_ARGS}
|
||||
INSTALL_COMMAND
|
||||
${MAKE} install
|
||||
# BUILD_IN_SOURCE
|
||||
# 1
|
||||
BUILD_BYPRODUCTS
|
||||
"${ARROW_STATIC_LIB}"
|
||||
# "${ARROW_CUDA_STATIC_LIB}"
|
||||
)
|
||||
|
||||
# ExternalProject_Add_StepDependencies(arrow_ep build thrift_ep)
|
||||
|
||||
file(MAKE_DIRECTORY "${ARROW_PREFIX}/include")
|
||||
add_library(arrow STATIC IMPORTED)
|
||||
set_target_properties(arrow
|
||||
PROPERTIES IMPORTED_LOCATION "${ARROW_STATIC_LIB}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}")
|
||||
# INTERFACE_LINK_LIBRARIES thrift)
|
||||
add_dependencies(arrow arrow_ep)
|
||||
|
||||
set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep-build/jemalloc_ep-prefix/src/jemalloc_ep")
|
||||
|
||||
add_custom_command(TARGET arrow_ep POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${ARROW_PREFIX}/lib/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a ${ARROW_PREFIX}/lib/
|
||||
DEPENDS ${JEMALLOC_PREFIX}/lib/libjemalloc_pic.a)
|
||||
|
||||
endmacro()
|
||||
|
||||
if(MEGASEARCH_WITH_ARROW)
|
||||
|
||||
resolve_dependency(ARROW)
|
||||
|
||||
link_directories(SYSTEM ${ARROW_PREFIX}/lib/)
|
||||
include_directories(SYSTEM ${ARROW_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Add Boost dependencies (code adapted from Apache Kudu (incubating))
|
||||
|
||||
@ -849,6 +957,30 @@ if (MEGASEARCH_BUILD_TESTS)
|
||||
include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# JSONCONS
|
||||
|
||||
macro(build_jsoncons)
|
||||
message(STATUS "Building JSONCONS-${JSONCONS_VERSION} from source")
|
||||
|
||||
set(JSONCONS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jsoncons_ep-prefix")
|
||||
set(JSONCONS_TAR_NAME "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}.tar.gz")
|
||||
set(JSONCONS_INCLUDE_DIR "${JSONCONS_PREFIX}/jsoncons-${JSONCONS_VERSION}/include")
|
||||
if (NOT EXISTS ${JSONCONS_INCLUDE_DIR})
|
||||
file(MAKE_DIRECTORY ${JSONCONS_PREFIX})
|
||||
file(DOWNLOAD ${JSONCONS_SOURCE_URL}
|
||||
${JSONCONS_TAR_NAME})
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${JSONCONS_TAR_NAME}
|
||||
WORKING_DIRECTORY ${JSONCONS_PREFIX})
|
||||
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
if(MEGASEARCH_WITH_JSONCONS)
|
||||
resolve_dependency(JSONCONS)
|
||||
include_directories(SYSTEM "${JSONCONS_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# lz4
|
||||
|
||||
@ -1201,16 +1333,16 @@ macro(build_sqlite_orm)
|
||||
message(STATUS "Building SQLITE_ORM-${SQLITE_ORM_VERSION} from source")
|
||||
|
||||
set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix")
|
||||
set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz") #sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz
|
||||
if (NOT EXISTS ${SQLITE_ORM_TAR_NAME})
|
||||
set(SQLITE_ORM_TAR_NAME "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz")
|
||||
set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm")
|
||||
if (NOT EXISTS ${SQLITE_ORM_INCLUDE_DIR})
|
||||
file(MAKE_DIRECTORY ${SQLITE_ORM_PREFIX})
|
||||
file(DOWNLOAD https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz
|
||||
file(DOWNLOAD ${SQLITE_ORM_SOURCE_URL}
|
||||
${SQLITE_ORM_TAR_NAME})
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${SQLITE_ORM_TAR_NAME}
|
||||
WORKING_DIRECTORY ${SQLITE_ORM_PREFIX})
|
||||
|
||||
endif ()
|
||||
set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/sqlite_orm-${SQLITE_ORM_VERSION}/include/sqlite_orm")
|
||||
|
||||
#set(SQLITE_ORM_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/sqlite_orm_ep-prefix/src/sqlite_orm_ep")
|
||||
#set(SQLITE_ORM_INCLUDE_DIR "${SQLITE_ORM_PREFIX}/include/sqlite_orm")
|
||||
|
||||
@ -58,6 +58,7 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
|
||||
include_directories(thrift/gen-cpp)
|
||||
|
||||
set(third_party_libs
|
||||
arrow
|
||||
easyloggingpp
|
||||
sqlite
|
||||
thrift
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
#include "EngineFactory.h"
|
||||
#include "metrics/Metrics.h"
|
||||
#include "scheduler/SearchScheduler.h"
|
||||
#include "utils/TimeRecorder.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <chrono>
|
||||
@ -71,6 +72,55 @@ void CollectFileMetrics(int file_type, size_t file_size, double total_time) {
|
||||
}
|
||||
}
|
||||
|
||||
void CalcScore(uint64_t vector_count,
|
||||
const float *vectors_data,
|
||||
uint64_t dimension,
|
||||
const SearchContext::ResultSet &result_src,
|
||||
SearchContext::ResultSet &result_target) {
|
||||
result_target.clear();
|
||||
if(result_src.empty()){
|
||||
return;
|
||||
}
|
||||
|
||||
server::TimeRecorder rc("Calculate Score");
|
||||
int vec_index = 0;
|
||||
for(auto& result : result_src) {
|
||||
const float * vec_data = vectors_data + vec_index*dimension;
|
||||
double vec_len = 0;
|
||||
for(uint64_t i = 0; i < dimension; i++) {
|
||||
vec_len += vec_data[i]*vec_data[i];
|
||||
}
|
||||
vec_index++;
|
||||
|
||||
double max_score = 0.0;
|
||||
for(auto& pair : result) {
|
||||
if(max_score < pair.second) {
|
||||
max_score = pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
//makesure socre is less than 100
|
||||
if(max_score > vec_len) {
|
||||
vec_len = max_score;
|
||||
}
|
||||
|
||||
//avoid divided by zero
|
||||
static constexpr double TOLERANCE = std::numeric_limits<float>::epsilon();
|
||||
if(vec_len < TOLERANCE) {
|
||||
vec_len = TOLERANCE;
|
||||
}
|
||||
|
||||
SearchContext::Id2ScoreMap score_array;
|
||||
double vec_len_inverse = 1.0/vec_len;
|
||||
for(auto& pair : result) {
|
||||
score_array.push_back(std::make_pair(pair.first, (1 - pair.second*vec_len_inverse)*100.0));
|
||||
}
|
||||
result_target.emplace_back(score_array);
|
||||
}
|
||||
|
||||
rc.Elapse("totally cost");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -301,6 +351,11 @@ Status DBImpl::QuerySync(const std::string& table_id, uint64_t k, uint64_t nq,
|
||||
if (results.empty()) {
|
||||
return Status::NotFound("Group " + table_id + ", search result not found!");
|
||||
}
|
||||
|
||||
QueryResults temp_results;
|
||||
CalcScore(nq, vectors, dim, results, temp_results);
|
||||
results.swap(temp_results);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -329,9 +384,13 @@ Status DBImpl::QueryAsync(const std::string& table_id, uint64_t k, uint64_t nq,
|
||||
|
||||
context->WaitResult();
|
||||
|
||||
//step 3: construct results
|
||||
//step 3: construct results, calculate score between 0 ~ 100
|
||||
auto& context_result = context->GetResult();
|
||||
results.swap(context_result);
|
||||
meta::TableSchema table_schema;
|
||||
table_schema.table_id_ = table_id;
|
||||
pMeta_->DescribeTable(table_schema);
|
||||
|
||||
CalcScore(context->nq(), context->vectors(), table_schema.dimension_, context_result, results);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
@ -361,6 +420,7 @@ void DBImpl::BackgroundTimerTask(int interval) {
|
||||
server::Metrics::GetInstance().RAMUsagePercentSet();
|
||||
server::Metrics::GetInstance().GPUPercentGaugeSet();
|
||||
server::Metrics::GetInstance().GPUMemoryUsageGaugeSet();
|
||||
server::Metrics::GetInstance().OctetsSet();
|
||||
TrySchedule();
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,10 +18,15 @@ void ClusterResult(const std::vector<long> &output_ids,
|
||||
uint64_t topk,
|
||||
SearchContext::ResultSet &result_set) {
|
||||
result_set.clear();
|
||||
result_set.reserve(nq);
|
||||
for (auto i = 0; i < nq; i++) {
|
||||
SearchContext::Id2ScoreMap id_score;
|
||||
id_score.reserve(topk);
|
||||
for (auto k = 0; k < topk; k++) {
|
||||
uint64_t index = i * topk + k;
|
||||
if(output_ids[index] < 0) {
|
||||
continue;
|
||||
}
|
||||
id_score.push_back(std::make_pair(output_ids[index], output_distence[index]));
|
||||
}
|
||||
result_set.emplace_back(id_score);
|
||||
@ -29,20 +34,60 @@ void ClusterResult(const std::vector<long> &output_ids,
|
||||
}
|
||||
|
||||
void MergeResult(SearchContext::Id2ScoreMap &score_src,
|
||||
SearchContext::Id2ScoreMap &score_target,
|
||||
uint64_t topk) {
|
||||
for (auto& pair_src : score_src) {
|
||||
for (auto iter = score_target.begin(); iter != score_target.end(); ++iter) {
|
||||
if(pair_src.second > iter->second) {
|
||||
score_target.insert(iter, pair_src);
|
||||
SearchContext::Id2ScoreMap &score_target,
|
||||
uint64_t topk) {
|
||||
//Note: the score_src and score_target are already arranged by score in ascending order
|
||||
if(score_src.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(score_target.empty()) {
|
||||
score_target.swap(score_src);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t src_count = score_src.size();
|
||||
size_t target_count = score_target.size();
|
||||
SearchContext::Id2ScoreMap score_merged;
|
||||
score_merged.reserve(topk);
|
||||
size_t src_index = 0, target_index = 0;
|
||||
while(true) {
|
||||
//all score_src items are merged, if score_merged.size() still less than topk
|
||||
//move items from score_target to score_merged until score_merged.size() equal topk
|
||||
if(src_index >= src_count - 1) {
|
||||
for(size_t i = target_index; i < target_count && score_merged.size() < topk; ++i) {
|
||||
score_merged.push_back(score_target[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
//all score_target items are merged, if score_merged.size() still less than topk
|
||||
//move items from score_src to score_merged until score_merged.size() equal topk
|
||||
if(target_index >= target_count - 1) {
|
||||
for(size_t i = src_index; i < src_count && score_merged.size() < topk; ++i) {
|
||||
score_merged.push_back(score_src[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
//compare score, put smallest score to score_merged one by one
|
||||
auto& src_pair = score_src[src_index];
|
||||
auto& target_pair = score_target[target_index];
|
||||
if(src_pair.second > target_pair.second) {
|
||||
score_merged.push_back(target_pair);
|
||||
target_index++;
|
||||
} else {
|
||||
score_merged.push_back(src_pair);
|
||||
src_index++;
|
||||
}
|
||||
|
||||
//score_merged.size() already equal topk
|
||||
if(score_merged.size() >= topk) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//remove unused items
|
||||
while (score_target.size() > topk) {
|
||||
score_target.pop_back();
|
||||
}
|
||||
score_target.swap(score_merged);
|
||||
}
|
||||
|
||||
void TopkResult(SearchContext::ResultSet &result_src,
|
||||
@ -65,33 +110,6 @@ void TopkResult(SearchContext::ResultSet &result_src,
|
||||
}
|
||||
}
|
||||
|
||||
void CalcScore(uint64_t vector_count,
|
||||
const float *vectors_data,
|
||||
uint64_t dimension,
|
||||
const SearchContext::ResultSet &result_src,
|
||||
SearchContext::ResultSet &result_target) {
|
||||
result_target.clear();
|
||||
if(result_src.empty()){
|
||||
return;
|
||||
}
|
||||
|
||||
int vec_index = 0;
|
||||
for(auto& result : result_src) {
|
||||
const float * vec_data = vectors_data + vec_index*dimension;
|
||||
double vec_len = 0;
|
||||
for(uint64_t i = 0; i < dimension; i++) {
|
||||
vec_len += vec_data[i]*vec_data[i];
|
||||
}
|
||||
vec_index++;
|
||||
|
||||
SearchContext::Id2ScoreMap score_array;
|
||||
for(auto& pair : result) {
|
||||
score_array.push_back(std::make_pair(pair.first, (1 - pair.second/vec_len)*100.0));
|
||||
}
|
||||
result_target.emplace_back(score_array);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool SearchTask::DoSearch() {
|
||||
@ -109,8 +127,8 @@ bool SearchTask::DoSearch() {
|
||||
output_ids.resize(inner_k*context->nq());
|
||||
output_distence.resize(inner_k*context->nq());
|
||||
|
||||
//step 2: search
|
||||
try {
|
||||
//step 2: search
|
||||
index_engine_->Search(context->nq(), context->vectors(), inner_k, output_distence.data(),
|
||||
output_ids.data());
|
||||
|
||||
@ -125,18 +143,13 @@ bool SearchTask::DoSearch() {
|
||||
TopkResult(result_set, inner_k, context->GetResult());
|
||||
rc.Record("reduce topk");
|
||||
|
||||
//step 5: calculate score between 0 ~ 100
|
||||
CalcScore(context->nq(), context->vectors(), index_engine_->Dimension(), context->GetResult(), result_set);
|
||||
context->GetResult().swap(result_set);
|
||||
rc.Record("calculate score");
|
||||
|
||||
} catch (std::exception& ex) {
|
||||
SERVER_LOG_ERROR << "SearchTask encounter exception: " << ex.what();
|
||||
context->IndexSearchDone(index_id_);//mark as done avoid dead lock, even search failed
|
||||
continue;
|
||||
}
|
||||
|
||||
//step 6: notify to send result to client
|
||||
//step 5: notify to send result to client
|
||||
context->IndexSearchDone(index_id_);
|
||||
}
|
||||
|
||||
|
||||
@ -82,6 +82,7 @@ class MetricsBase{
|
||||
virtual void ConnectionGaugeIncrement() {};
|
||||
virtual void ConnectionGaugeDecrement() {};
|
||||
virtual void KeepingAliveCounterIncrement(double value = 1) {};
|
||||
virtual void OctetsSet() {};
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -33,6 +33,8 @@ PrometheusMetrics::Init() {
|
||||
return SERVER_UNEXPECTED_ERROR;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
return SERVER_SUCCESS;
|
||||
|
||||
}
|
||||
@ -110,15 +112,39 @@ void PrometheusMetrics::QueryIndexTypePerSecondSet(std::string type, double valu
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void PrometheusMetrics::ConnectionGaugeIncrement() {
|
||||
if(!startup_) return;
|
||||
connection_gauge_.Increment();
|
||||
}
|
||||
|
||||
void PrometheusMetrics::ConnectionGaugeDecrement() {
|
||||
if(!startup_) return;
|
||||
connection_gauge_.Decrement();
|
||||
}
|
||||
|
||||
void PrometheusMetrics::OctetsSet() {
|
||||
if(!startup_) return;
|
||||
|
||||
// get old stats and reset them
|
||||
unsigned long long old_inoctets = SystemInfo::GetInstance().get_inoctets();
|
||||
unsigned long long old_outoctets = SystemInfo::GetInstance().get_octets();
|
||||
auto old_time = SystemInfo::GetInstance().get_nettime();
|
||||
std::pair<unsigned long long, unsigned long long> in_and_out_octets = SystemInfo::GetInstance().Octets();
|
||||
SystemInfo::GetInstance().set_inoctets(in_and_out_octets.first);
|
||||
SystemInfo::GetInstance().set_outoctets(in_and_out_octets.second);
|
||||
SystemInfo::GetInstance().set_nettime();
|
||||
|
||||
//
|
||||
constexpr double micro_to_second = 1e-6;
|
||||
auto now_time = std::chrono::system_clock::now();
|
||||
auto total_microsecond = METRICS_MICROSECONDS(old_time, now_time);
|
||||
auto total_second = total_microsecond*micro_to_second;
|
||||
if(total_second == 0) return;
|
||||
inoctets_gauge_.Set((in_and_out_octets.first-old_inoctets)/total_second);
|
||||
outoctets_gauge_.Set((in_and_out_octets.second-old_outoctets)/total_second);
|
||||
}
|
||||
|
||||
//void PrometheusMetrics::GpuPercentInit() {
|
||||
// int num_device = SystemInfo::GetInstance().num_device();
|
||||
// constexpr char device_number[] = "DeviceNum";
|
||||
|
||||
@ -116,6 +116,7 @@ class PrometheusMetrics: public MetricsBase {
|
||||
void ConnectionGaugeIncrement() override ;
|
||||
void ConnectionGaugeDecrement() override ;
|
||||
void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);};
|
||||
void OctetsSet() override ;
|
||||
|
||||
|
||||
|
||||
@ -480,6 +481,13 @@ class PrometheusMetrics: public MetricsBase {
|
||||
.Register(*registry_);
|
||||
prometheus::Counter &keeping_alive_counter_ = keeping_alive_.Add({});
|
||||
|
||||
prometheus::Family<prometheus::Gauge> &octets_ = prometheus::BuildGauge()
|
||||
.Name("octets_bytes_per_second")
|
||||
.Help("octets bytes per second")
|
||||
.Register(*registry_);
|
||||
prometheus::Gauge &inoctets_gauge_ = octets_.Add({{"type", "inoctets"}});
|
||||
prometheus::Gauge &outoctets_gauge_ = octets_.Add({{"type", "outoctets"}});
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
@ -53,6 +53,11 @@ void SystemInfo::Init() {
|
||||
return ;
|
||||
}
|
||||
|
||||
//initialize network traffic information
|
||||
std::pair<unsigned long long, unsigned long long> in_and_out_octets = Octets();
|
||||
in_octets_ = in_and_out_octets.first;
|
||||
out_octets_ = in_and_out_octets.second;
|
||||
net_time_ = std::chrono::system_clock::now();
|
||||
}
|
||||
|
||||
long long
|
||||
@ -202,6 +207,42 @@ SystemInfo::GPUMemoryUsed() {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<unsigned long long , unsigned long long >
|
||||
SystemInfo::Octets(){
|
||||
pid_t pid = getpid();
|
||||
// const std::string filename = "/proc/"+std::to_string(pid)+"/net/netstat";
|
||||
const std::string filename = "/proc/net/netstat";
|
||||
std::ifstream file(filename);
|
||||
std::string lastline = "";
|
||||
std::string line = "";
|
||||
while(file){
|
||||
getline(file, line);
|
||||
if(file.fail()){
|
||||
break;
|
||||
}
|
||||
lastline = line;
|
||||
}
|
||||
std::vector<size_t> space_position;
|
||||
size_t space_pos = lastline.find(" ");
|
||||
while(space_pos != std::string::npos){
|
||||
space_position.push_back(space_pos);
|
||||
space_pos = lastline.find(" ",space_pos+1);
|
||||
}
|
||||
// InOctets is between 6th and 7th " " and OutOctets is between 7th and 8th " "
|
||||
size_t inoctets_begin = space_position[6]+1;
|
||||
size_t inoctets_length = space_position[7]-inoctets_begin;
|
||||
size_t outoctets_begin = space_position[7]+1;
|
||||
size_t outoctets_length = space_position[8]-outoctets_begin;
|
||||
std::string inoctets = lastline.substr(inoctets_begin,inoctets_length);
|
||||
std::string outoctets = lastline.substr(outoctets_begin,outoctets_length);
|
||||
|
||||
|
||||
unsigned long long inoctets_bytes = std::stoull(inoctets);
|
||||
unsigned long long outoctets_bytes = std::stoull(outoctets);
|
||||
std::pair<unsigned long long , unsigned long long > res(inoctets_bytes, outoctets_bytes);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -13,6 +13,7 @@
|
||||
#include "string.h"
|
||||
#include "sys/times.h"
|
||||
#include "sys/vtimes.h"
|
||||
#include <chrono>
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@ -29,9 +30,12 @@ class SystemInfo {
|
||||
clock_t last_cpu_ = clock_t();
|
||||
clock_t last_sys_cpu_ = clock_t();
|
||||
clock_t last_user_cpu_ = clock_t();
|
||||
std::chrono::system_clock::time_point net_time_ = std::chrono::system_clock::now();
|
||||
int num_processors_ = 0;
|
||||
//number of GPU
|
||||
unsigned int num_device_ = 0;
|
||||
unsigned long long in_octets_ = 0;
|
||||
unsigned long long out_octets_ = 0;
|
||||
bool initialized_ = false;
|
||||
|
||||
public:
|
||||
@ -43,11 +47,18 @@ class SystemInfo {
|
||||
|
||||
void Init();
|
||||
int num_device() const {return num_device_;};
|
||||
unsigned long long get_inoctets() { return in_octets_;};
|
||||
unsigned long long get_octets() { return out_octets_;};
|
||||
std::chrono::system_clock::time_point get_nettime() { return net_time_;};
|
||||
void set_inoctets(unsigned long long value) { in_octets_ = value;};
|
||||
void set_outoctets(unsigned long long value) { out_octets_ = value;};
|
||||
void set_nettime() {net_time_ = std::chrono::system_clock::now();};
|
||||
long long ParseLine(char* line);
|
||||
unsigned long GetPhysicalMemory();
|
||||
unsigned long GetProcessUsedMemory();
|
||||
double MemoryPercent();
|
||||
double CPUPercent();
|
||||
std::pair<unsigned long long , unsigned long long > Octets();
|
||||
// std::unordered_map<int,std::vector<double>> GetGPUMemPercent() {};
|
||||
// std::vector<std::string> split(std::string input) {};
|
||||
std::vector<unsigned int> GPUPercent();
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <csignal>
|
||||
#include <numaif.h>
|
||||
//#include <numaif.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
51
cpp/thirdparty/versions.txt
vendored
51
cpp/thirdparty/versions.txt
vendored
@ -1,33 +1,10 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# Toolchain library versions
|
||||
#
|
||||
# This file is used by `download_dependencies.sh` and cmake to figure out which
|
||||
# version of a dependency to fetch. In order to add a new dependency, add a
|
||||
# version variable, e.g. MY_DEP_VERSION and append an entry in the
|
||||
# `DEPENDENCIES` array (see the comment on top of the declaration for the
|
||||
# format).
|
||||
|
||||
ARROW_VERSION=zilliz
|
||||
BOOST_VERSION=1.70.0
|
||||
BZIP2_VERSION=1.0.6
|
||||
EASYLOGGINGPP_VERSION=v9.96.7
|
||||
FAISS_VERSION=v1.5.2
|
||||
GTEST_VERSION=1.8.1
|
||||
JSONCONS_VERSION=0.126.0
|
||||
LAPACK_VERSION=v3.8.0
|
||||
LZ4_VERSION=v1.9.1
|
||||
OPENBLAS_VERSION=v0.3.6
|
||||
@ -41,28 +18,4 @@ YAMLCPP_VERSION=0.6.2
|
||||
ZLIB_VERSION=v1.2.11
|
||||
ZSTD_VERSION=v1.4.0
|
||||
|
||||
# The first field is the name of the environment variable expected by cmake.
|
||||
# This _must_ match what is defined. The second field is the name of the
|
||||
# generated archive file. The third field is the url of the project for the
|
||||
# given version.
|
||||
DEPENDENCIES=(
|
||||
"MEGASEARCH_BOOST_URL boost-${BOOST_VERSION}.tar.gz https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//./_}.tar.gz"
|
||||
"MEGASEARCH_BZIP2_URL bzip2-${BZIP2_VERSION}.tar.gz https://fossies.org/linux/misc/bzip2-${BZIP2_VERSION}.tar.gz"
|
||||
"MEGASEARCH_EASYLOGGINGPP_URL easyloggingpp-${EASYLOGGINGPP_VERSION}.tar.gz https://github.com/zuhd-org/easyloggingpp/archive/${EASYLOGGINGPP_VERSION}.tar.gz"
|
||||
"MEGASEARCH_FAISS_URL faiss-${FAISS_VERSION}.tar.gz https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz"
|
||||
"MEGASEARCH_GTEST_URL gtest-${GTEST_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
|
||||
"MEGASEARCH_LAPACK_URL lapack-${LAPACK_VERSION}.tar.gz https://github.com/Reference-LAPACK/lapack/archive/${LAPACK_VERSION}.tar.gz
|
||||
"MEGASEARCH_LZ4_URL lz4-${LZ4_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz"
|
||||
"MEGASEARCH_OPENBLAS_URL openblas-${OPENBLAS_VERSION}.tar.gz https://github.com/xianyi/OpenBLAS/archive/${OPENBLAS_VERSION}.tar.gz"
|
||||
"MEGASEARCH_PROMETHEUS_URL https://github.com/jupp0r/prometheus-cpp.git"
|
||||
"MEGASEARCH_ROCKSDB_URL rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/${ROCKSDB_VERSION}.tar.gz"
|
||||
"MEGASEARCH_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz https://github.com/google/snappy/archive/${SNAPPY_VERSION}.tar.gz"
|
||||
"MEGASEARCH_SQLITE_URL sqlite-autoconf-${SQLITE_VERSION}.tar.gz https://www.sqlite.org/2019/sqlite-autoconf-${SQLITE_VERSION}.tar.gz"
|
||||
"MEGASEARCH_SQLITE_ORM_URL sqlite_orm-${SQLITE_ORM_VERSION}.tar.gz https://github.com/fnc12/sqlite_orm/archive/${SQLITE_ORM_VERSION}.tar.gz"
|
||||
"MEGASEARCH_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz https://github.com/apache/thrift/archive/${THRIFT_VERSION}.tar.gz"
|
||||
"MEGASEARCH_YAMLCPP_URL yaml-cpp-${YAMLCPP_VERSION}.tar.gz https://github.com/jbeder/yaml-cpp/archive/yaml-cpp-${YAMLCPP_VERSION}.tar.gz"
|
||||
"MEGASEARCH_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz https://github.com/madler/zlib/archive/${ZLIB_VERSION}.tar.gz"
|
||||
"MEGASEARCH_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz"
|
||||
)
|
||||
|
||||
# vim: set filetype=sh:
|
||||
@ -27,13 +27,15 @@ using namespace zilliz::milvus;
|
||||
|
||||
TEST_F(DBTest, Metric_Tes) {
|
||||
|
||||
|
||||
server::SystemInfo::GetInstance().Init();
|
||||
// server::Metrics::GetInstance().Init();
|
||||
// server::Metrics::GetInstance().exposer_ptr()->RegisterCollectable(server::Metrics::GetInstance().registry_ptr());
|
||||
server::Metrics::GetInstance().Init();
|
||||
|
||||
// server::PrometheusMetrics::GetInstance().exposer_ptr()->RegisterCollectable(server::PrometheusMetrics::GetInstance().registry_ptr());
|
||||
zilliz::milvus::cache::CpuCacheMgr::GetInstance()->SetCapacity(2UL*1024*1024*1024);
|
||||
std::cout<<zilliz::milvus::cache::CpuCacheMgr::GetInstance()->CacheCapacity()<<std::endl;
|
||||
|
||||
static const std::string group_name = "test_group";
|
||||
static const int group_dim = 256;
|
||||
|
||||
@ -81,19 +83,19 @@ TEST_F(DBTest, Metric_Tes) {
|
||||
prev_count = count;
|
||||
|
||||
START_TIMER;
|
||||
stat = db_->Query(group_name, k, qb, qxb, results);
|
||||
// stat = db_->Query(group_name, k, qb, qxb, results);
|
||||
ss << "Search " << j << " With Size " << (float)(count*group_dim*sizeof(float))/(1024*1024) << " M";
|
||||
// STOP_TIMER(ss.str());
|
||||
|
||||
ASSERT_STATS(stat);
|
||||
|
||||
// ASSERT_STATS(stat);
|
||||
for (auto k=0; k<qb; ++k) {
|
||||
ASSERT_EQ(results[k][0].first, target_ids[k]);
|
||||
// ASSERT_EQ(results[k][0].first, target_ids[k]);
|
||||
ss.str("");
|
||||
ss << "Result [" << k << "]:";
|
||||
for (auto result : results[k]) {
|
||||
ss << result.first << " ";
|
||||
}
|
||||
/* LOG(DEBUG) << ss.str(); */
|
||||
// for (auto result : results[k]) {
|
||||
// ss << result.first << " ";
|
||||
// }
|
||||
|
||||
}
|
||||
ASSERT_TRUE(count >= prev_count);
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user