diff --git a/internal/core/CMakeLists.txt b/internal/core/CMakeLists.txt index 8afcc8ef68..e8ae1fc32b 100644 --- a/internal/core/CMakeLists.txt +++ b/internal/core/CMakeLists.txt @@ -185,6 +185,7 @@ if ( BUILD_UNIT_TEST STREQUAL "ON" ) append_flags( CMAKE_CXX_FLAGS FLAGS "-DELPP_DISABLE_LOGS") add_subdirectory(unittest) + add_subdirectory(bench) endif () add_custom_target( Clean-All COMMAND ${CMAKE_BUILD_TOOL} clean ) diff --git a/internal/core/bench/CMakeLists.txt b/internal/core/bench/CMakeLists.txt new file mode 100644 index 0000000000..8b1b0a13e6 --- /dev/null +++ b/internal/core/bench/CMakeLists.txt @@ -0,0 +1,18 @@ +include_directories(${CMAKE_HOME_DIRECTORY}/src) +include_directories(${CMAKE_HOME_DIRECTORY}/unittest) +include_directories(${CMAKE_HOME_DIRECTORY}/src/index/knowhere) + +set(bench_srcs + bench_naive.cpp + bench_search.cpp +) + +add_executable(all_bench ${bench_srcs}) +target_link_libraries(all_bench + milvus_segcore + milvus_indexbuilder + log + pthread + ) + +target_link_libraries(all_bench benchmark::benchmark_main) diff --git a/internal/core/bench/bench_naive.cpp b/internal/core/bench/bench_naive.cpp new file mode 100644 index 0000000000..4d1c015273 --- /dev/null +++ b/internal/core/bench/bench_naive.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include + +static void +BN_Naive_StringCreation(benchmark::State& state) { + for (auto _ : state) std::string empty_string; +} +// Register the function as a benchmark +BENCHMARK(BN_Naive_StringCreation); + +// Define another benchmark +static void +BN_Naive_StringCopy(benchmark::State& state) { + std::string x = "hello"; + for (auto _ : state) std::string copy(x); +} +BENCHMARK(BN_Naive_StringCopy); diff --git a/internal/core/bench/bench_search.cpp b/internal/core/bench/bench_search.cpp new file mode 100644 index 0000000000..cd581e07fc --- /dev/null +++ b/internal/core/bench/bench_search.cpp @@ -0,0 +1,118 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include +#include "segcore/SegmentGrowing.h" +#include "segcore/SegmentSealed.h" +#include "test_utils/DataGen.h" + +using namespace milvus; +using namespace milvus::query; +using namespace milvus::segcore; + +static int dim = 128; +static int64_t N = 1024 * 1024 * 1; + +const auto schema = []() { + auto schema = std::make_shared(); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_L2); + return schema; +}(); + +const auto dataset_ = [] { + auto dataset_ = DataGen(schema, N); + return dataset_; +}(); + +const auto plan = [] { + std::string dsl = R"({ + "bool": { + "must": [ + { + "vector": { + "fakevec": { + "metric_type": "L2", + "params": { + "nprobe": 4 + }, + "query": "$0", + "topk": 5 + } + } + } + ] + } + })"; + auto plan = CreatePlan(*schema, dsl); + return plan; +}(); +auto ph_group = [] { + auto num_queries = 5; + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); + auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + return ph_group; +}(); + +static void +Search_SmallIndex(benchmark::State& state) { + // schema->AddDebugField("age", DataType::FLOAT); + + auto is_small_index = state.range(0); + auto chunk_size = state.range(1) * 1024; + auto segment = CreateGrowingSegment(schema, chunk_size); + if (!is_small_index) { + segment->debug_disable_small_index(); + } + segment->PreInsert(N); + ColumnBasedRawData raw_data; + raw_data.columns_ = dataset_.cols_; + raw_data.count = N; + segment->Insert(0, N, dataset_.row_ids_.data(), dataset_.timestamps_.data(), raw_data); + + Timestamp time = 10000000; + std::vector ph_group_arr = {ph_group.get()}; + + for (auto _ : state) { + auto qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); + } +} + +BENCHMARK(Search_SmallIndex)->MinTime(5)->ArgsProduct({{true, false}, {8, 16, 32, 64, 128}}); + +static void +Search_Sealed(benchmark::State& state) { + auto segment = CreateSealedSegment(schema); + SealedLoader(dataset_, *segment); + auto choice = state.range(0); + if (choice == 0) { + // Brute Force + } else if (choice == 1) { + // ivf + auto vec = (const float*)dataset_.cols_[0].data(); + auto indexing = GenIndexing(N, dim, vec); + LoadIndexInfo info; + info.index = indexing; + info.index_params["index_type"] = "IVF"; + info.index_params["index_mode"] = "CPU"; + info.index_params["metric_type"] = MetricTypeToName(MetricType::METRIC_L2); + segment->LoadIndex(info); + } + Timestamp time = 10000000; + std::vector ph_group_arr = {ph_group.get()}; + + for (auto _ : state) { + auto qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); + } +} + +BENCHMARK(Search_Sealed)->MinTime(5)->Arg(1)->Arg(0); diff --git a/internal/core/build-support/lint_exclusions.txt b/internal/core/build-support/lint_exclusions.txt index 5514c3eeb7..1b0e402f97 100644 --- a/internal/core/build-support/lint_exclusions.txt +++ b/internal/core/build-support/lint_exclusions.txt @@ -5,4 +5,5 @@ *src/grpc* *output* *unittest* +*bench* *src/pb* diff --git a/internal/core/run_clang_format.sh b/internal/core/run_clang_format.sh index b5a682639c..8f475584db 100755 --- a/internal/core/run_clang_format.sh +++ b/internal/core/run_clang_format.sh @@ -12,5 +12,6 @@ formatThis() { formatThis "${CorePath}/src" formatThis "${CorePath}/unittest" +formatThis "${CorePath}/bench" ${CorePath}/build-support/add_license.sh ${CorePath}/build-support/cpp_license.txt ${CorePath} diff --git a/internal/core/src/segcore/SegmentGrowing.h b/internal/core/src/segcore/SegmentGrowing.h index 02701ed938..6cc5f11b11 100644 --- a/internal/core/src/segcore/SegmentGrowing.h +++ b/internal/core/src/segcore/SegmentGrowing.h @@ -50,6 +50,9 @@ class SegmentGrowing : public SegmentInternalInterface { }; public: + virtual void + debug_disable_small_index() = 0; + virtual int64_t PreInsert(int64_t size) = 0; diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index e701e461d4..b6dd817957 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -188,9 +188,10 @@ SegmentGrowingImpl::do_insert(int64_t reserved_begin, // NOTE: this must be the last step, cannot be put above uid2offset_.insert(std::make_pair(row_id, reserved_begin + i)); } - record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size); - indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / size_per_chunk_, record_); + if (!debug_disable_small_index_) { + indexing_record_.UpdateResourceAck(record_.ack_responder_.GetAck() / size_per_chunk_, record_); + } } Status diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index bf75d6bb08..7a62a8edee 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -112,6 +112,11 @@ class SegmentGrowingImpl : public SegmentGrowing { } public: + void + debug_disable_small_index() override { + debug_disable_small_index_ = true; + } + ssize_t get_row_count() const override { return record_.ack_responder_.GetAck(); @@ -204,6 +209,9 @@ class SegmentGrowingImpl : public SegmentGrowing { SealedIndexingRecord sealed_indexing_record_; tbb::concurrent_unordered_multimap uid2offset_; + + private: + bool debug_disable_small_index_ = false; }; } // namespace milvus::segcore diff --git a/internal/core/thirdparty/CMakeLists.txt b/internal/core/thirdparty/CMakeLists.txt index 4dfe837381..787d7e6535 100644 --- a/internal/core/thirdparty/CMakeLists.txt +++ b/internal/core/thirdparty/CMakeLists.txt @@ -41,6 +41,7 @@ find_package( Threads REQUIRED ) # ****************************** Thirdparty googletest *************************************** if ( MILVUS_BUILD_TESTS ) add_subdirectory( gtest ) + add_subdirectory( google_benchmark) endif() diff --git a/internal/core/thirdparty/google_benchmark/CMakeLists.txt b/internal/core/thirdparty/google_benchmark/CMakeLists.txt new file mode 100644 index 0000000000..e5406dce8b --- /dev/null +++ b/internal/core/thirdparty/google_benchmark/CMakeLists.txt @@ -0,0 +1,21 @@ +include(FetchContent) +FetchContent_Declare(google_benchmark + URL https://github.com/google/benchmark/archive/v1.5.2.tar.gz + URL_MD5 084b34aceaeac11a6607d35220ca2efa + DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/google_benchmark + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/google_benchmark + ) + +FetchContent_GetProperties( google_benchmark ) +if ( NOT google_benchmark_POPULATED ) + + FetchContent_Populate( google_benchmark ) + + # Adding the following targets: + # gtest, gtest_main, gmock, gmock_main + message("gb=${google_benchmark_SOURCE_DIR}") + add_subdirectory( ${google_benchmark_SOURCE_DIR} + ${google_benchmark_BINARY_DIR} + EXCLUDE_FROM_ALL ) +endif()