diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b897c608d..0efa5cebcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,12 @@ Please mark all change in change log and use the ticket from JIRA. ## Bug ## Improvement - \#64 - Improvement dump function in scheduler +- \#82 - Move easyloggingpp into "external" directory ## Feature ## Task -# Milvus 0.5.0 (TODO) +# Milvus 0.5.0 (2019-10-21) ## Bug - MS-568 - Fix gpuresource free error diff --git a/README.md b/README.md index 9c11af94da..3d1979be4a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ -![Milvuslogo](https://github.com/milvus-io/docs/blob/branch-0.5.0/assets/milvus_logo.png) +![Milvuslogo](https://github.com/milvus-io/docs/blob/master/assets/milvus_logo.png) + ![LICENSE](https://img.shields.io/badge/license-Apache--2.0-brightgreen) ![Language](https://img.shields.io/badge/language-C%2B%2B-blue) +[![codebeat badge](https://codebeat.co/badges/e030a4f6-b126-4475-a938-4723d54ec3a7?style=plastic)](https://codebeat.co/projects/github-com-jinhai-cn-milvus-master) - [Slack Community](https://join.slack.com/t/milvusio/shared_invite/enQtNzY1OTQ0NDI3NjMzLWNmYmM1NmNjOTQ5MGI5NDhhYmRhMGU5M2NhNzhhMDMzY2MzNDdlYjM5ODQ5MmE3ODFlYzU3YjJkNmVlNDQ2ZTk) - [Twitter](https://twitter.com/milvusio) @@ -13,34 +15,49 @@ # Welcome to Milvus -Firstly, welcome, and thanks for your interest in [Milvus](https://milvus.io)! ​​No matter who you are, what you do, we greatly appreciate your contribution to help us reinvent data science with Milvus.​ :beers: - ## What is Milvus -Milvus is an open source vector search engine which provides state-of-the-art similarity search and analysis for billion-scale feature vectors. +Milvus is an open source similarity search engine for massive feature vectors. Designed with heterogeneous computing architecture for the best cost efficiency. Searches over billion-scale vectors take only milliseconds with minimum computing resources. Milvus provides stable Python, Java and C++ APIs. -Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/Releases/v0.4.0/). +Keep up-to-date with newest releases and latest updates by reading Milvus [release notes](https://milvus.io/docs/en/Releases/v0.5.0/). -- GPU-accelerated search engine +- Heterogeneous computing - Milvus uses CPU/GPU heterogeneous computing architecture to process feature vectors, and are orders of magnitudes faster than traditional databases. + Milvus is designed with heterogeneous computing architecture for the best performance and cost efficiency. -- Various indexes +- Multiple indexes - Milvus supports quantization indexing, tree-based indexing, and graph indexing algorithms. + Milvus supports a variety of indexing types that employs quantization, tree-based, and graph indexing techniques. -- Intelligent scheduling +- Intelligent resource management - Milvus optimizes the search computation and index building according to your data size and available resources. + Milvus automatically adapts search computation and index building processes based on your datasets and available resources. - Horizontal scalability - Milvus expands computation and storage by adding nodes during runtime, which allows you to scale the data size without redesigning the system. + Milvus supports online / offline expansion to scale both storage and computation resources with simple commands. + +- High availability + + Milvus is integrated with Kubernetes framework so that all single point of failures could be avoided. + +- High compatibility + + Milvus is compatible with almost all deep learning models and major programming languages such as Python, Java and C++, etc. + +- Ease of use + + Milvus can be easily installed in a few steps and enables you to exclusively focus on feature vectors. + +- Visualized monitor + + You can track system performance on Prometheus-based GUI monitor dashboards. ## Architecture -![Milvus_arch](https://github.com/milvus-io/docs/blob/branch-0.5.0/assets/milvus_arch.jpg) + +![Milvus_arch](https://github.com/milvus-io/docs/blob/master/assets/milvus_arch.png) ## Get started @@ -117,20 +134,20 @@ To edit Milvus settings in `conf/server_config.yaml` and `conf/log_config.conf`, #### Run Python example code -Make sure [Python 3.4](https://www.python.org/downloads/) or higher is already installed and in use. +Make sure [Python 3.5](https://www.python.org/downloads/) or higher is already installed and in use. Install Milvus Python SDK. ```shell # Install Milvus Python SDK -$ pip install pymilvus==0.2.0 +$ pip install pymilvus==0.2.3 ``` Create a new file `example.py`, and add [Python example code](https://github.com/milvus-io/pymilvus/blob/master/examples/AdvancedExample.py) to it. Run the example code. -```python +```shell # Run Milvus Python example $ python3 example.py ``` diff --git a/ci/jenkins/scripts/coverage.sh b/ci/jenkins/scripts/coverage.sh index dd52df442c..07ab210d2f 100755 --- a/ci/jenkins/scripts/coverage.sh +++ b/ci/jenkins/scripts/coverage.sh @@ -132,8 +132,8 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/utils/easylogging++.h" \ - "*/src/utils/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.h" \ + "*/src/external/easyloggingpp/easylogging++.cc" if [ $? -ne 0 ]; then echo "gen ${FILE_INFO_OUTPUT_NEW} failed" diff --git a/codecov.yaml b/codecov.yaml new file mode 100644 index 0000000000..debe315ac0 --- /dev/null +++ b/codecov.yaml @@ -0,0 +1,14 @@ +#Configuration File for CodeCov +coverage: + precision: 2 + round: down + range: "70...100" + + status: + project: on + patch: yes + changes: no + +comment: + layout: "header, diff, changes, tree" + behavior: default diff --git a/core/build-support/lint_exclusions.txt b/core/build-support/lint_exclusions.txt index 226db75a43..2be060f121 100644 --- a/core/build-support/lint_exclusions.txt +++ b/core/build-support/lint_exclusions.txt @@ -6,4 +6,5 @@ *easylogging++* *SqliteMetaImpl.cpp *src/grpc* +*src/external* *milvus/include* \ No newline at end of file diff --git a/core/coverage.sh b/core/coverage.sh index b3c2e96eed..05e9d3e850 100755 --- a/core/coverage.sh +++ b/core/coverage.sh @@ -122,8 +122,10 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ "*/src/server/Server.cpp" \ "*/src/server/DBWrapper.cpp" \ "*/src/server/grpc_impl/GrpcServer.cpp" \ - "*/src/utils/easylogging++.h" \ - "*/src/utils/easylogging++.cc" + "*/src/external/easyloggingpp/easylogging++.h" \ + "*/src/external/easyloggingpp/easylogging++.cc" \ + "*/src/external/*" + if [ $? -ne 0 ]; then echo "generate ${FILE_INFO_OUTPUT_NEW} failed" exit -2 diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index b0228bd090..d086955078 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -64,6 +64,13 @@ set(scheduler_files ${scheduler_task_files} ) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/easyloggingpp external_easyloggingpp_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/nlohmann external_nlohmann_files) +set(external_files + ${external_easyloggingpp_files} + ${external_nlohmann_files} + ) + aux_source_directory(${MILVUS_ENGINE_SRC}/server server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/server/grpc_impl grpc_server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/utils utils_files) @@ -77,6 +84,7 @@ set(engine_files ${db_insert_files} ${db_meta_files} ${metrics_files} + ${external_files} ${utils_files} ${wrapper_files} ) diff --git a/core/src/utils/easylogging++.cc b/core/src/external/easyloggingpp/easylogging++.cc similarity index 100% rename from core/src/utils/easylogging++.cc rename to core/src/external/easyloggingpp/easylogging++.cc diff --git a/core/src/utils/easylogging++.h b/core/src/external/easyloggingpp/easylogging++.h similarity index 100% rename from core/src/utils/easylogging++.h rename to core/src/external/easyloggingpp/easylogging++.h diff --git a/core/src/index/knowhere/knowhere/common/Log.h b/core/src/index/knowhere/knowhere/common/Log.h index 222d03d73e..369e7143af 100644 --- a/core/src/index/knowhere/knowhere/common/Log.h +++ b/core/src/index/knowhere/knowhere/common/Log.h @@ -17,7 +17,7 @@ #pragma once -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" namespace knowhere { diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index 8a5e089486..f840b28e28 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -20,7 +20,7 @@ set(basic_libs ) set(util_srcs - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp diff --git a/core/src/main.cpp b/core/src/main.cpp index d1c9ba6dfd..d60f26d702 100644 --- a/core/src/main.cpp +++ b/core/src/main.cpp @@ -23,11 +23,11 @@ #include #include "../version.h" +#include "external/easyloggingpp/easylogging++.h" #include "metrics/Metrics.h" #include "server/Server.h" #include "utils/CommonUtil.h" #include "utils/SignalUtil.h" -#include "utils/easylogging++.h" INITIALIZE_EASYLOGGINGPP diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 2836d41dd4..1bf1caff76 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -307,71 +307,71 @@ XSearchTask::MergeTopkToResultSet(const std::vector& input_ids, const s } } -void -XSearchTask::MergeTopkArray(std::vector& tar_ids, std::vector& tar_distance, uint64_t& tar_input_k, - const std::vector& src_ids, const std::vector& src_distance, - uint64_t src_input_k, uint64_t nq, uint64_t topk, bool ascending) { - if (src_ids.empty() || src_distance.empty()) { - return; - } - - uint64_t output_k = std::min(topk, tar_input_k + src_input_k); - std::vector id_buf(nq * output_k, -1); - std::vector dist_buf(nq * output_k, 0.0); - - uint64_t buf_k, src_k, tar_k; - uint64_t src_idx, tar_idx, buf_idx; - uint64_t src_input_k_multi_i, tar_input_k_multi_i, buf_k_multi_i; - - for (uint64_t i = 0; i < nq; i++) { - src_input_k_multi_i = src_input_k * i; - tar_input_k_multi_i = tar_input_k * i; - buf_k_multi_i = output_k * i; - buf_k = src_k = tar_k = 0; - while (buf_k < output_k && src_k < src_input_k && tar_k < tar_input_k) { - src_idx = src_input_k_multi_i + src_k; - tar_idx = tar_input_k_multi_i + tar_k; - buf_idx = buf_k_multi_i + buf_k; - if ((ascending && src_distance[src_idx] < tar_distance[tar_idx]) || - (!ascending && src_distance[src_idx] > tar_distance[tar_idx])) { - id_buf[buf_idx] = src_ids[src_idx]; - dist_buf[buf_idx] = src_distance[src_idx]; - src_k++; - } else { - id_buf[buf_idx] = tar_ids[tar_idx]; - dist_buf[buf_idx] = tar_distance[tar_idx]; - tar_k++; - } - buf_k++; - } - - if (buf_k < output_k) { - if (src_k < src_input_k) { - while (buf_k < output_k && src_k < src_input_k) { - src_idx = src_input_k_multi_i + src_k; - buf_idx = buf_k_multi_i + buf_k; - id_buf[buf_idx] = src_ids[src_idx]; - dist_buf[buf_idx] = src_distance[src_idx]; - src_k++; - buf_k++; - } - } else { - while (buf_k < output_k && tar_k < tar_input_k) { - tar_idx = tar_input_k_multi_i + tar_k; - buf_idx = buf_k_multi_i + buf_k; - id_buf[buf_idx] = tar_ids[tar_idx]; - dist_buf[buf_idx] = tar_distance[tar_idx]; - tar_k++; - buf_k++; - } - } - } - } - - tar_ids.swap(id_buf); - tar_distance.swap(dist_buf); - tar_input_k = output_k; -} +// void +// XSearchTask::MergeTopkArray(std::vector& tar_ids, std::vector& tar_distance, uint64_t& tar_input_k, +// const std::vector& src_ids, const std::vector& src_distance, +// uint64_t src_input_k, uint64_t nq, uint64_t topk, bool ascending) { +// if (src_ids.empty() || src_distance.empty()) { +// return; +// } +// +// uint64_t output_k = std::min(topk, tar_input_k + src_input_k); +// std::vector id_buf(nq * output_k, -1); +// std::vector dist_buf(nq * output_k, 0.0); +// +// uint64_t buf_k, src_k, tar_k; +// uint64_t src_idx, tar_idx, buf_idx; +// uint64_t src_input_k_multi_i, tar_input_k_multi_i, buf_k_multi_i; +// +// for (uint64_t i = 0; i < nq; i++) { +// src_input_k_multi_i = src_input_k * i; +// tar_input_k_multi_i = tar_input_k * i; +// buf_k_multi_i = output_k * i; +// buf_k = src_k = tar_k = 0; +// while (buf_k < output_k && src_k < src_input_k && tar_k < tar_input_k) { +// src_idx = src_input_k_multi_i + src_k; +// tar_idx = tar_input_k_multi_i + tar_k; +// buf_idx = buf_k_multi_i + buf_k; +// if ((ascending && src_distance[src_idx] < tar_distance[tar_idx]) || +// (!ascending && src_distance[src_idx] > tar_distance[tar_idx])) { +// id_buf[buf_idx] = src_ids[src_idx]; +// dist_buf[buf_idx] = src_distance[src_idx]; +// src_k++; +// } else { +// id_buf[buf_idx] = tar_ids[tar_idx]; +// dist_buf[buf_idx] = tar_distance[tar_idx]; +// tar_k++; +// } +// buf_k++; +// } +// +// if (buf_k < output_k) { +// if (src_k < src_input_k) { +// while (buf_k < output_k && src_k < src_input_k) { +// src_idx = src_input_k_multi_i + src_k; +// buf_idx = buf_k_multi_i + buf_k; +// id_buf[buf_idx] = src_ids[src_idx]; +// dist_buf[buf_idx] = src_distance[src_idx]; +// src_k++; +// buf_k++; +// } +// } else { +// while (buf_k < output_k && tar_k < tar_input_k) { +// tar_idx = tar_input_k_multi_i + tar_k; +// buf_idx = buf_k_multi_i + buf_k; +// id_buf[buf_idx] = tar_ids[tar_idx]; +// dist_buf[buf_idx] = tar_distance[tar_idx]; +// tar_k++; +// buf_k++; +// } +// } +// } +// } +// +// tar_ids.swap(id_buf); +// tar_distance.swap(dist_buf); +// tar_input_k = output_k; +//} } // namespace scheduler } // namespace milvus diff --git a/core/src/scheduler/task/SearchTask.h b/core/src/scheduler/task/SearchTask.h index 6a7381e0e6..bbc8b5bd8f 100644 --- a/core/src/scheduler/task/SearchTask.h +++ b/core/src/scheduler/task/SearchTask.h @@ -42,10 +42,10 @@ class XSearchTask : public Task { MergeTopkToResultSet(const std::vector& input_ids, const std::vector& input_distance, uint64_t input_k, uint64_t nq, uint64_t topk, bool ascending, scheduler::ResultSet& result); - static void - MergeTopkArray(std::vector& tar_ids, std::vector& tar_distance, uint64_t& tar_input_k, - const std::vector& src_ids, const std::vector& src_distance, uint64_t src_input_k, - uint64_t nq, uint64_t topk, bool ascending); + // static void + // MergeTopkArray(std::vector& tar_ids, std::vector& tar_distance, uint64_t& tar_input_k, + // const std::vector& src_ids, const std::vector& src_distance, uint64_t + // src_input_k, uint64_t nq, uint64_t topk, bool ascending); public: TableFileSchemaPtr file_; diff --git a/core/src/utils/Log.h b/core/src/utils/Log.h index 1dd116367a..4aa3146b01 100644 --- a/core/src/utils/Log.h +++ b/core/src/utils/Log.h @@ -17,7 +17,7 @@ #pragma once -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" namespace milvus { diff --git a/core/src/utils/LogUtil.h b/core/src/utils/LogUtil.h index 9926939442..7e5afd087a 100644 --- a/core/src/utils/LogUtil.h +++ b/core/src/utils/LogUtil.h @@ -17,8 +17,8 @@ #pragma once +#include "external/easyloggingpp/easylogging++.h" #include "utils/Status.h" -#include "utils/easylogging++.h" #include #include diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index 258fd76a8e..aae7fb8d7f 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -60,6 +60,13 @@ set(scheduler_files ${scheduler_optimizer_files} ) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/easyloggingpp external_easyloggingpp_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/external/nlohmann external_nlohmann_files) +set(external_files + ${external_easyloggingpp_files} + ${external_nlohmann_files} + ) + aux_source_directory(${MILVUS_ENGINE_SRC}/server server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/server/grpc_impl grpc_server_files) aux_source_directory(${MILVUS_ENGINE_SRC}/utils utils_files) @@ -74,7 +81,7 @@ set(helper_files ${MILVUS_ENGINE_SRC}/utils/TimeRecorder.cpp ${MILVUS_ENGINE_SRC}/utils/Status.cpp ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ) set(common_files @@ -85,6 +92,7 @@ set(common_files ${db_insert_files} ${db_meta_files} ${metrics_files} + ${external_files} ${scheduler_files} ${wrapper_files} ${helper_files} diff --git a/core/unittest/db/test_search.cpp b/core/unittest/db/test_search.cpp index dc393b7a26..b8cf08b3e2 100644 --- a/core/unittest/db/test_search.cpp +++ b/core/unittest/db/test_search.cpp @@ -30,6 +30,7 @@ namespace ms = milvus::scheduler; void BuildResult(std::vector& output_ids, std::vector& output_distance, + uint64_t input_k, uint64_t topk, uint64_t nq, bool ascending) { @@ -39,9 +40,15 @@ BuildResult(std::vector& output_ids, output_distance.resize(nq * topk); for (uint64_t i = 0; i < nq; i++) { - for (uint64_t j = 0; j < topk; j++) { + //insert valid items + for (uint64_t j = 0; j < input_k; j++) { output_ids[i * topk + j] = (int64_t)(drand48() * 100000); - output_distance[i * topk + j] = ascending ? (j + drand48()) : ((topk - j) + drand48()); + output_distance[i * topk + j] = ascending ? (j + drand48()) : ((input_k - j) + drand48()); + } + //insert invalid items + for (uint64_t j = input_k; j < topk; j++) { + output_ids[i * topk + j] = -1; + output_distance[i * topk + j] = -1.0; } } } @@ -83,23 +90,32 @@ CheckTopkResult(const std::vector& input_ids_1, ASSERT_EQ(input_ids_1.size(), input_distance_1.size()); ASSERT_EQ(input_ids_2.size(), input_distance_2.size()); - uint64_t input_k1 = input_ids_1.size() / nq; - uint64_t input_k2 = input_ids_2.size() / nq; - for (int64_t i = 0; i < nq; i++) { std::vector - src_vec(input_distance_1.begin() + i * input_k1, input_distance_1.begin() + (i + 1) * input_k1); + src_vec(input_distance_1.begin() + i * topk, input_distance_1.begin() + (i + 1) * topk); src_vec.insert(src_vec.end(), - input_distance_2.begin() + i * input_k2, - input_distance_2.begin() + (i + 1) * input_k2); + input_distance_2.begin() + i * topk, + input_distance_2.begin() + (i + 1) * topk); if (ascending) { std::sort(src_vec.begin(), src_vec.end()); } else { std::sort(src_vec.begin(), src_vec.end(), std::greater()); } - uint64_t n = std::min(topk, input_k1 + input_k2); + //erase invalid items + std::vector::iterator iter; + for (iter = src_vec.begin(); iter != src_vec.end();) { + if (*iter < 0.0) + iter = src_vec.erase(iter); + else + ++iter; + } + + uint64_t n = std::min(topk, result[i].size()); for (uint64_t j = 0; j < n; j++) { + if (result[i][j].first < 0) { + continue; + } if (src_vec[j] != result[i][j].second) { std::cout << src_vec[j] << " " << result[i][j].second << std::endl; } @@ -110,12 +126,13 @@ CheckTopkResult(const std::vector& input_ids_1, } // namespace -void MergeTopkToResultSetTest(uint64_t topk_1, uint64_t topk_2, uint64_t nq, uint64_t topk, bool ascending) { +void +MergeTopkToResultSetTest(uint64_t topk_1, uint64_t topk_2, uint64_t nq, uint64_t topk, bool ascending) { std::vector ids1, ids2; std::vector dist1, dist2; ms::ResultSet result; - BuildResult(ids1, dist1, topk_1, nq, ascending); - BuildResult(ids2, dist2, topk_2, nq, ascending); + BuildResult(ids1, dist1, topk_1, topk, nq, ascending); + BuildResult(ids2, dist2, topk_2, topk, nq, ascending); ms::XSearchTask::MergeTopkToResultSet(ids1, dist1, topk_1, nq, topk, ascending, result); ms::XSearchTask::MergeTopkToResultSet(ids2, dist2, topk_2, nq, topk, ascending, result); CheckTopkResult(ids1, dist1, ids2, dist2, topk, nq, ascending, result); @@ -134,70 +151,72 @@ TEST(DBSearchTest, MERGE_RESULT_SET_TEST) { MergeTopkToResultSetTest(TOP_K, TOP_K, NQ, TOP_K, false); /* test3, id1/dist1 small topk */ - MergeTopkToResultSetTest(TOP_K/2, TOP_K, NQ, TOP_K, true); - MergeTopkToResultSetTest(TOP_K/2, TOP_K, NQ, TOP_K, false); + MergeTopkToResultSetTest(TOP_K / 2, TOP_K, NQ, TOP_K, true); + MergeTopkToResultSetTest(TOP_K / 2, TOP_K, NQ, TOP_K, false); /* test4, id1/dist1 small topk, id2/dist2 small topk */ - MergeTopkToResultSetTest(TOP_K/2, TOP_K/3, NQ, TOP_K, true); - MergeTopkToResultSetTest(TOP_K/2, TOP_K/3, NQ, TOP_K, false); + MergeTopkToResultSetTest(TOP_K / 2, TOP_K / 3, NQ, TOP_K, true); + MergeTopkToResultSetTest(TOP_K / 2, TOP_K / 3, NQ, TOP_K, false); } -void MergeTopkArrayTest(uint64_t topk_1, uint64_t topk_2, uint64_t nq, uint64_t topk, bool ascending) { - std::vector ids1, ids2; - std::vector dist1, dist2; - ms::ResultSet result; - BuildResult(ids1, dist1, topk_1, nq, ascending); - BuildResult(ids2, dist2, topk_2, nq, ascending); - uint64_t result_topk = std::min(topk, topk_1 + topk_2); - ms::XSearchTask::MergeTopkArray(ids1, dist1, topk_1, ids2, dist2, topk_2, nq, topk, ascending); - if (ids1.size() != result_topk * nq) { - std::cout << ids1.size() << " " << result_topk * nq << std::endl; - } - ASSERT_TRUE(ids1.size() == result_topk * nq); - ASSERT_TRUE(dist1.size() == result_topk * nq); - for (uint64_t i = 0; i < nq; i++) { - for (uint64_t k = 1; k < result_topk; k++) { - if (ascending) { - if (dist1[i * result_topk + k] < dist1[i * result_topk + k - 1]) { - std::cout << dist1[i * result_topk + k - 1] << " " << dist1[i * result_topk + k] << std::endl; - } - ASSERT_TRUE(dist1[i * result_topk + k] >= dist1[i * result_topk + k - 1]); - } else { - if (dist1[i * result_topk + k] > dist1[i * result_topk + k - 1]) { - std::cout << dist1[i * result_topk + k - 1] << " " << dist1[i * result_topk + k] << std::endl; - } - ASSERT_TRUE(dist1[i * result_topk + k] <= dist1[i * result_topk + k - 1]); - } - } - } -} +//void MergeTopkArrayTest(uint64_t topk_1, uint64_t topk_2, uint64_t nq, uint64_t topk, bool ascending) { +// std::vector ids1, ids2; +// std::vector dist1, dist2; +// ms::ResultSet result; +// BuildResult(ids1, dist1, topk_1, topk, nq, ascending); +// BuildResult(ids2, dist2, topk_2, topk, nq, ascending); +// uint64_t result_topk = std::min(topk, topk_1 + topk_2); +// ms::XSearchTask::MergeTopkArray(ids1, dist1, topk_1, ids2, dist2, topk_2, nq, topk, ascending); +// if (ids1.size() != result_topk * nq) { +// std::cout << ids1.size() << " " << result_topk * nq << std::endl; +// } +// ASSERT_TRUE(ids1.size() == result_topk * nq); +// ASSERT_TRUE(dist1.size() == result_topk * nq); +// for (uint64_t i = 0; i < nq; i++) { +// for (uint64_t k = 1; k < result_topk; k++) { +// float f0 = dist1[i * topk + k - 1]; +// float f1 = dist1[i * topk + k]; +// if (ascending) { +// if (f1 < f0) { +// std::cout << f0 << " " << f1 << std::endl; +// } +// ASSERT_TRUE(f1 >= f0); +// } else { +// if (f1 > f0) { +// std::cout << f0 << " " << f1 << std::endl; +// } +// ASSERT_TRUE(f1 <= f0); +// } +// } +// } +//} -TEST(DBSearchTest, MERGE_ARRAY_TEST) { - uint64_t NQ = 15; - uint64_t TOP_K = 64; +//TEST(DBSearchTest, MERGE_ARRAY_TEST) { +// uint64_t NQ = 15; +// uint64_t TOP_K = 64; +// +// /* test1, id1/dist1 valid, id2/dist2 empty */ +// MergeTopkArrayTest(TOP_K, 0, NQ, TOP_K, true); +// MergeTopkArrayTest(TOP_K, 0, NQ, TOP_K, false); +// MergeTopkArrayTest(0, TOP_K, NQ, TOP_K, true); +// MergeTopkArrayTest(0, TOP_K, NQ, TOP_K, false); - /* test1, id1/dist1 valid, id2/dist2 empty */ - MergeTopkArrayTest(TOP_K, 0, NQ, TOP_K, true); - MergeTopkArrayTest(TOP_K, 0, NQ, TOP_K, false); - MergeTopkArrayTest(0, TOP_K, NQ, TOP_K, true); - MergeTopkArrayTest(0, TOP_K, NQ, TOP_K, false); - - /* test2, id1/dist1 valid, id2/dist2 valid */ - MergeTopkArrayTest(TOP_K, TOP_K, NQ, TOP_K, true); - MergeTopkArrayTest(TOP_K, TOP_K, NQ, TOP_K, false); - - /* test3, id1/dist1 small topk */ - MergeTopkArrayTest(TOP_K/2, TOP_K, NQ, TOP_K, true); - MergeTopkArrayTest(TOP_K/2, TOP_K, NQ, TOP_K, false); - MergeTopkArrayTest(TOP_K, TOP_K/2, NQ, TOP_K, true); - MergeTopkArrayTest(TOP_K, TOP_K/2, NQ, TOP_K, false); - - /* test4, id1/dist1 small topk, id2/dist2 small topk */ - MergeTopkArrayTest(TOP_K/2, TOP_K/3, NQ, TOP_K, true); - MergeTopkArrayTest(TOP_K/2, TOP_K/3, NQ, TOP_K, false); - MergeTopkArrayTest(TOP_K/3, TOP_K/2, NQ, TOP_K, true); - MergeTopkArrayTest(TOP_K/3, TOP_K/2, NQ, TOP_K, false); -} +// /* test2, id1/dist1 valid, id2/dist2 valid */ +// MergeTopkArrayTest(TOP_K, TOP_K, NQ, TOP_K, true); +// MergeTopkArrayTest(TOP_K, TOP_K, NQ, TOP_K, false); +// +// /* test3, id1/dist1 small topk */ +// MergeTopkArrayTest(TOP_K/2, TOP_K, NQ, TOP_K, true); +// MergeTopkArrayTest(TOP_K/2, TOP_K, NQ, TOP_K, false); +// MergeTopkArrayTest(TOP_K, TOP_K/2, NQ, TOP_K, true); +// MergeTopkArrayTest(TOP_K, TOP_K/2, NQ, TOP_K, false); +// +// /* test4, id1/dist1 small topk, id2/dist2 small topk */ +// MergeTopkArrayTest(TOP_K/2, TOP_K/3, NQ, TOP_K, true); +// MergeTopkArrayTest(TOP_K/2, TOP_K/3, NQ, TOP_K, false); +// MergeTopkArrayTest(TOP_K/3, TOP_K/2, NQ, TOP_K, true); +// MergeTopkArrayTest(TOP_K/3, TOP_K/2, NQ, TOP_K, false); +//} TEST(DBSearchTest, REDUCE_PERF_TEST) { int32_t index_file_num = 478; /* sift1B dataset, index files num */ @@ -206,8 +225,8 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { std::vector thread_vec = {4, 8}; std::vector nq_vec = {1, 10, 100}; std::vector topk_vec = {1, 4, 16, 64}; - int32_t NQ = nq_vec[nq_vec.size()-1]; - int32_t TOPK = topk_vec[topk_vec.size()-1]; + int32_t NQ = nq_vec[nq_vec.size() - 1]; + int32_t TOPK = topk_vec[topk_vec.size() - 1]; std::vector> id_vec; std::vector> dist_vec; @@ -217,7 +236,7 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { /* generate testing data */ for (i = 0; i < index_file_num; i++) { - BuildResult(input_ids, input_distance, TOPK, NQ, ascending); + BuildResult(input_ids, input_distance, TOPK, TOPK, NQ, ascending); id_vec.push_back(input_ids); dist_vec.push_back(input_distance); } @@ -237,7 +256,7 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { } std::string str1 = "Method-1 " + std::to_string(max_thread_num) + " " + - std::to_string(nq) + " " + std::to_string(top_k); + std::to_string(nq) + " " + std::to_string(top_k); milvus::TimeRecorder rc1(str1); /////////////////////////////////////////////////////////////////////////////////////// @@ -255,114 +274,114 @@ TEST(DBSearchTest, REDUCE_PERF_TEST) { rc1.RecordSection("reduce done"); - /////////////////////////////////////////////////////////////////////////////////////// - /* method-2 */ - std::vector> id_vec_2(index_file_num); - std::vector> dist_vec_2(index_file_num); - std::vector k_vec_2(index_file_num); - for (i = 0; i < index_file_num; i++) { - CopyResult(id_vec_2[i], dist_vec_2[i], top_k, id_vec[i], dist_vec[i], TOPK, nq); - k_vec_2[i] = top_k; - } - - std::string str2 = "Method-2 " + std::to_string(max_thread_num) + " " + - std::to_string(nq) + " " + std::to_string(top_k); - milvus::TimeRecorder rc2(str2); - - for (step = 1; step < index_file_num; step *= 2) { - for (i = 0; i + step < index_file_num; i += step * 2) { - ms::XSearchTask::MergeTopkArray(id_vec_2[i], dist_vec_2[i], k_vec_2[i], - id_vec_2[i + step], dist_vec_2[i + step], k_vec_2[i + step], - nq, top_k, ascending); - } - } - ms::XSearchTask::MergeTopkToResultSet(id_vec_2[0], - dist_vec_2[0], - k_vec_2[0], - nq, - top_k, - ascending, - final_result_2); - ASSERT_EQ(final_result_2.size(), nq); - - rc2.RecordSection("reduce done"); - - for (i = 0; i < nq; i++) { - ASSERT_EQ(final_result[i].size(), final_result_2[i].size()); - for (k = 0; k < final_result[i].size(); k++) { - if (final_result[i][k].first != final_result_2[i][k].first) { - std::cout << i << " " << k << std::endl; - } - ASSERT_EQ(final_result[i][k].first, final_result_2[i][k].first); - ASSERT_EQ(final_result[i][k].second, final_result_2[i][k].second); - } - } - - /////////////////////////////////////////////////////////////////////////////////////// - /* method-3 parallel */ - std::vector> id_vec_3(index_file_num); - std::vector> dist_vec_3(index_file_num); - std::vector k_vec_3(index_file_num); - for (i = 0; i < index_file_num; i++) { - CopyResult(id_vec_3[i], dist_vec_3[i], top_k, id_vec[i], dist_vec[i], TOPK, nq); - k_vec_3[i] = top_k; - } - - std::string str3 = "Method-3 " + std::to_string(max_thread_num) + " " + - std::to_string(nq) + " " + std::to_string(top_k); - milvus::TimeRecorder rc3(str3); - - for (step = 1; step < index_file_num; step *= 2) { - for (i = 0; i + step < index_file_num; i += step * 2) { - threads_list.push_back( - threadPool.enqueue(ms::XSearchTask::MergeTopkArray, - std::ref(id_vec_3[i]), - std::ref(dist_vec_3[i]), - std::ref(k_vec_3[i]), - std::ref(id_vec_3[i + step]), - std::ref(dist_vec_3[i + step]), - std::ref(k_vec_3[i + step]), - nq, - top_k, - ascending)); - } - - while (threads_list.size() > 0) { - int nready = 0; - for (auto it = threads_list.begin(); it != threads_list.end(); it = it) { - auto &p = *it; - std::chrono::milliseconds span(0); - if (p.wait_for(span) == std::future_status::ready) { - threads_list.erase(it++); - ++nready; - } else { - ++it; - } - } - - if (nready == 0) { - std::this_thread::yield(); - } - } - } - ms::XSearchTask::MergeTopkToResultSet(id_vec_3[0], - dist_vec_3[0], - k_vec_3[0], - nq, - top_k, - ascending, - final_result_3); - ASSERT_EQ(final_result_3.size(), nq); - - rc3.RecordSection("reduce done"); - - for (i = 0; i < nq; i++) { - ASSERT_EQ(final_result[i].size(), final_result_3[i].size()); - for (k = 0; k < final_result[i].size(); k++) { - ASSERT_EQ(final_result[i][k].first, final_result_3[i][k].first); - ASSERT_EQ(final_result[i][k].second, final_result_3[i][k].second); - } - } +// /////////////////////////////////////////////////////////////////////////////////////// +// /* method-2 */ +// std::vector> id_vec_2(index_file_num); +// std::vector> dist_vec_2(index_file_num); +// std::vector k_vec_2(index_file_num); +// for (i = 0; i < index_file_num; i++) { +// CopyResult(id_vec_2[i], dist_vec_2[i], top_k, id_vec[i], dist_vec[i], TOPK, nq); +// k_vec_2[i] = top_k; +// } +// +// std::string str2 = "Method-2 " + std::to_string(max_thread_num) + " " + +// std::to_string(nq) + " " + std::to_string(top_k); +// milvus::TimeRecorder rc2(str2); +// +// for (step = 1; step < index_file_num; step *= 2) { +// for (i = 0; i + step < index_file_num; i += step * 2) { +// ms::XSearchTask::MergeTopkArray(id_vec_2[i], dist_vec_2[i], k_vec_2[i], +// id_vec_2[i + step], dist_vec_2[i + step], k_vec_2[i + step], +// nq, top_k, ascending); +// } +// } +// ms::XSearchTask::MergeTopkToResultSet(id_vec_2[0], +// dist_vec_2[0], +// k_vec_2[0], +// nq, +// top_k, +// ascending, +// final_result_2); +// ASSERT_EQ(final_result_2.size(), nq); +// +// rc2.RecordSection("reduce done"); +// +// for (i = 0; i < nq; i++) { +// ASSERT_EQ(final_result[i].size(), final_result_2[i].size()); +// for (k = 0; k < final_result[i].size(); k++) { +// if (final_result[i][k].first != final_result_2[i][k].first) { +// std::cout << i << " " << k << std::endl; +// } +// ASSERT_EQ(final_result[i][k].first, final_result_2[i][k].first); +// ASSERT_EQ(final_result[i][k].second, final_result_2[i][k].second); +// } +// } +// +// /////////////////////////////////////////////////////////////////////////////////////// +// /* method-3 parallel */ +// std::vector> id_vec_3(index_file_num); +// std::vector> dist_vec_3(index_file_num); +// std::vector k_vec_3(index_file_num); +// for (i = 0; i < index_file_num; i++) { +// CopyResult(id_vec_3[i], dist_vec_3[i], top_k, id_vec[i], dist_vec[i], TOPK, nq); +// k_vec_3[i] = top_k; +// } +// +// std::string str3 = "Method-3 " + std::to_string(max_thread_num) + " " + +// std::to_string(nq) + " " + std::to_string(top_k); +// milvus::TimeRecorder rc3(str3); +// +// for (step = 1; step < index_file_num; step *= 2) { +// for (i = 0; i + step < index_file_num; i += step * 2) { +// threads_list.push_back( +// threadPool.enqueue(ms::XSearchTask::MergeTopkArray, +// std::ref(id_vec_3[i]), +// std::ref(dist_vec_3[i]), +// std::ref(k_vec_3[i]), +// std::ref(id_vec_3[i + step]), +// std::ref(dist_vec_3[i + step]), +// std::ref(k_vec_3[i + step]), +// nq, +// top_k, +// ascending)); +// } +// +// while (threads_list.size() > 0) { +// int nready = 0; +// for (auto it = threads_list.begin(); it != threads_list.end(); it = it) { +// auto &p = *it; +// std::chrono::milliseconds span(0); +// if (p.wait_for(span) == std::future_status::ready) { +// threads_list.erase(it++); +// ++nready; +// } else { +// ++it; +// } +// } +// +// if (nready == 0) { +// std::this_thread::yield(); +// } +// } +// } +// ms::XSearchTask::MergeTopkToResultSet(id_vec_3[0], +// dist_vec_3[0], +// k_vec_3[0], +// nq, +// top_k, +// ascending, +// final_result_3); +// ASSERT_EQ(final_result_3.size(), nq); +// +// rc3.RecordSection("reduce done"); +// +// for (i = 0; i < nq; i++) { +// ASSERT_EQ(final_result[i].size(), final_result_3[i].size()); +// for (k = 0; k < final_result[i].size(); k++) { +// ASSERT_EQ(final_result[i][k].first, final_result_3[i][k].first); +// ASSERT_EQ(final_result[i][k].second, final_result_3[i][k].second); +// } +// } } } } diff --git a/core/unittest/main.cpp b/core/unittest/main.cpp index d17cf9da58..2cd0624f7b 100644 --- a/core/unittest/main.cpp +++ b/core/unittest/main.cpp @@ -18,7 +18,7 @@ #include #include -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" INITIALIZE_EASYLOGGINGPP diff --git a/core/unittest/wrapper/CMakeLists.txt b/core/unittest/wrapper/CMakeLists.txt index 156d89b241..a8015f8d34 100644 --- a/core/unittest/wrapper/CMakeLists.txt +++ b/core/unittest/wrapper/CMakeLists.txt @@ -26,7 +26,7 @@ set(wrapper_files set(util_files utils.cpp - ${MILVUS_ENGINE_SRC}/utils/easylogging++.cc + ${MILVUS_ENGINE_SRC}/external/easyloggingpp/easylogging++.cc ${MILVUS_ENGINE_SRC}/utils/Status.cpp ) diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index f112fc7e65..1ec98ccb5d 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "utils/easylogging++.h" +#include "external/easyloggingpp/easylogging++.h" #include "wrapper/VecIndex.h" #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" diff --git a/tests/milvus_benchmark/requirements.txt b/tests/milvus_benchmark/requirements.txt index 1285b4d2ba..328cff1eb4 100644 --- a/tests/milvus_benchmark/requirements.txt +++ b/tests/milvus_benchmark/requirements.txt @@ -1,6 +1,6 @@ numpy==1.16.3 pymilvus>=0.1.18 -pyyaml==3.12 +pyyaml==5.1 docker==4.0.2 tableprint==0.8.0 ansicolors==1.1.8 \ No newline at end of file