mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
Issue: #31285 Basically, I've replaced `FixedVector<bool>` and `boost::dynamic_bitset` with custom bitset and bitsetview in order to reduce the memory bandwidth & increase performance for the filtering. This PR is for internal use only. Current progress (numbers are for GCC 9.5.0 on Ubuntu 22.04 LTS; clang-17 produces better performance numbers): Baseline: ``` [ RUN ] CApiTest.AssembeChunkPerfTest start test cost: 17903us [ OK ] CApiTest.AssembeChunkPerfTest (183 ms) [ RUN ] Expr.TestMultiLogicalExprsOptimization cost: 1391us cost: 5us cost: 4us cost: 4us cost: 6us cost: 4us cost: 4us cost: 4us cost: 4us cost: 4us 143 cost: 10us cost: 8us cost: 10us cost: 8us cost: 8us cost: 8us cost: 8us cost: 8us cost: 8us cost: 9us 8 /home/ubuntu/zilliz/milvus4/milvus/internal/core/unittest/test_expr.cpp:1561: Failure Expected: (cost_op) < (cost_no_op), actual: 143 vs 8 [ FAILED ] Expr.TestMultiLogicalExprsOptimization (7 ms) [ RUN ] Expr.TestExprs start test 3cost: 889us start test 10cost: 2us start test 20cost: 2us start test 30cost: 2us start test 50cost: 3us start test 100cost: 7us start test 200cost: 16us [ OK ] Expr.TestExprs (9 ms) [ RUN ] Expr.TestUnaryBenchTest start test type:2 cost: 124.8us start test type:3 cost: 163.1us start test type:4 cost: 275.9us start test type:5 cost: 590.9us start test type:10 cost: 62.7us start test type:11 cost: 65.9us [ OK ] Expr.TestUnaryBenchTest (1153 ms) [ RUN ] Expr.TestBinaryRangeBenchTest start test type:2 cost: 151.4us start test type:3 cost: 198.4us start test type:4 cost: 361.9us start test type:5 cost: 753.9us start test type:10 cost: 64.6us start test type:11 cost: 62.2us [ OK ] Expr.TestBinaryRangeBenchTest (1151 ms) [ RUN ] Expr.TestLogicalUnaryBenchTest start test type:2 cost: 121.14us start test type:3 cost: 156.84us start test type:4 cost: 249.76us start test type:5 cost: 534.44us start test type:10 cost: 82.2us start test type:11 cost: 83.52us [ OK ] Expr.TestLogicalUnaryBenchTest (1202 ms) [ RUN ] Expr.TestBinaryLogicalBenchTest start test type:2 cost: 80.64us start test type:3 cost: 78.22us start test type:4 cost: 255.76us start test type:5 cost: 532.04us start test type:10 cost: 89.26us start test type:11 cost: 90us [ OK ] Expr.TestBinaryLogicalBenchTest (1198 ms) [ RUN ] Expr.TestBinaryArithOpEvalRangeBenchExpr start test type:2 cost: 401.7us start test type:3 cost: 420.96us start test type:4 cost: 418.04us start test type:5 cost: 470.54us start test type:10 cost: 250.32us start test type:11 cost: 850.08us [ OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1273 ms) [ RUN ] Expr.TestCompareExprBenchTest start test type:2 cost: 162us start test type:3 cost: 142us start test type:4 cost: 374us start test type:5 cost: 674us start test type:10 cost: 366us start test type:11 cost: 645us [ OK ] Expr.TestCompareExprBenchTest (1214 ms) [ RUN ] Expr.TestRefactorExprs start test 3cost: 1253us start test 10cost: 1060us start test 20cost: 681us start test 30cost: 522us start test 50cost: 511us start test 100cost: 506us start test 200cost: 497us [ OK ] Expr.TestRefactorExprs (1142 ms) ``` Candidate: ``` [ RUN ] CApiTest.AssembeChunkPerfTest start test cost: 6099us [ OK ] CApiTest.AssembeChunkPerfTest (153 ms) [ RUN ] Expr.TestMultiLogicalExprsOptimization cost: 42us cost: 15us cost: 15us cost: 14us cost: 15us cost: 15us cost: 15us cost: 15us cost: 15us cost: 15us 17 cost: 41us cost: 39us cost: 33us cost: 33us cost: 33us cost: 33us cost: 34us cost: 41us cost: 34us cost: 34us 35 [ OK ] Expr.TestMultiLogicalExprsOptimization (6 ms) [ RUN ] Expr.TestExprs start test 3cost: 20us start test 10cost: 2us start test 20cost: 2us start test 30cost: 2us start test 50cost: 4us start test 100cost: 8us start test 200cost: 15us [ OK ] Expr.TestExprs (8 ms) [ RUN ] Expr.TestUnaryBenchTest start test type:2 cost: 55.7us start test type:3 cost: 79.8us start test type:4 cost: 177.6us start test type:5 cost: 337.2us start test type:10 cost: 16.9us start test type:11 cost: 15.7us [ OK ] Expr.TestUnaryBenchTest (1140 ms) [ RUN ] Expr.TestBinaryRangeBenchTest start test type:2 cost: 57.1us start test type:3 cost: 87us start test type:4 cost: 177.5us start test type:5 cost: 342.7us start test type:10 cost: 17.9us start test type:11 cost: 16.7us [ OK ] Expr.TestBinaryRangeBenchTest (1152 ms) [ RUN ] Expr.TestLogicalUnaryBenchTest start test type:2 cost: 34.58us start test type:3 cost: 68.86us start test type:4 cost: 151.38us start test type:5 cost: 286.8us start test type:10 cost: 16.54us start test type:11 cost: 16.7us [ OK ] Expr.TestLogicalUnaryBenchTest (1165 ms) [ RUN ] Expr.TestBinaryLogicalBenchTest start test type:2 cost: 20us start test type:3 cost: 17.1us start test type:4 cost: 154.12us start test type:5 cost: 286.1us start test type:10 cost: 19.6us start test type:11 cost: 19.24us [ OK ] Expr.TestBinaryLogicalBenchTest (1188 ms) [ RUN ] Expr.TestBinaryArithOpEvalRangeBenchExpr start test type:2 cost: 125.7us start test type:3 cost: 111.34us start test type:4 cost: 148.02us start test type:5 cost: 306.7us start test type:10 cost: 149.3us start test type:11 cost: 282.94us [ OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1221 ms) [ RUN ] Expr.TestCompareExprBenchTest start test type:2 cost: 89us start test type:3 cost: 79us start test type:4 cost: 323us start test type:5 cost: 629us start test type:10 cost: 313us start test type:11 cost: 591us [ OK ] Expr.TestCompareExprBenchTest (1228 ms) [ RUN ] Expr.TestRefactorExprs start test 3cost: 874us start test 10cost: 611us start test 20cost: 290us start test 30cost: 294us start test 50cost: 272us start test 100cost: 278us start test 200cost: 279us [ OK ] Expr.TestRefactorExprs (1149 ms) ``` Signed-off-by: Alexandr Guzhva <alexanderguzhva@gmail.com>
209 lines
6.0 KiB
C++
209 lines
6.0 KiB
C++
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
|
|
|
#pragma once
|
|
|
|
#include <gtest/gtest.h>
|
|
#include <vector>
|
|
#include <memory>
|
|
|
|
#include "common/Types.h"
|
|
|
|
using milvus::index::ScalarIndex;
|
|
|
|
namespace {
|
|
|
|
bool
|
|
compare_float(float x, float y, float epsilon = 0.000001f) {
|
|
if (fabs(x - y) < epsilon)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
compare_double(double x, double y, double epsilon = 0.000001f) {
|
|
if (fabs(x - y) < epsilon)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
Any(const milvus::TargetBitmap& bitmap) {
|
|
return bitmap.any();
|
|
}
|
|
|
|
bool
|
|
BitSetNone(const milvus::TargetBitmap& bitmap) {
|
|
return bitmap.none();
|
|
}
|
|
|
|
uint64_t
|
|
Count(const milvus::TargetBitmap& bitmap) {
|
|
return bitmap.count();
|
|
}
|
|
|
|
inline void
|
|
assert_order(const milvus::SearchResult& result,
|
|
const knowhere::MetricType& metric_type) {
|
|
bool dsc = milvus::PositivelyRelated(metric_type);
|
|
auto& ids = result.seg_offsets_;
|
|
auto& dist = result.distances_;
|
|
auto nq = result.total_nq_;
|
|
auto topk = result.unity_topK_;
|
|
if (dsc) {
|
|
for (int i = 0; i < nq; i++) {
|
|
for (int j = 1; j < topk; j++) {
|
|
auto idx = i * topk + j;
|
|
if (ids[idx] != -1) {
|
|
ASSERT_GE(dist[idx - 1], dist[idx]);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
for (int i = 0; i < nq; i++) {
|
|
for (int j = 1; j < topk; j++) {
|
|
auto idx = i * topk + j;
|
|
if (ids[idx] != -1) {
|
|
ASSERT_LE(dist[idx - 1], dist[idx]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
inline void
|
|
assert_in(ScalarIndex<T>* index, const std::vector<T>& arr) {
|
|
// hard to compare floating point value.
|
|
if (std::is_floating_point_v<T>) {
|
|
return;
|
|
}
|
|
|
|
auto bitset1 = index->In(arr.size(), arr.data());
|
|
ASSERT_EQ(arr.size(), bitset1.size());
|
|
ASSERT_TRUE(Any(bitset1));
|
|
auto test = std::make_unique<T>(arr[arr.size() - 1] + 1);
|
|
auto bitset2 = index->In(1, test.get());
|
|
ASSERT_EQ(arr.size(), bitset2.size());
|
|
ASSERT_TRUE(BitSetNone(bitset2));
|
|
}
|
|
|
|
template <typename T>
|
|
inline void
|
|
assert_not_in(ScalarIndex<T>* index, const std::vector<T>& arr) {
|
|
auto bitset1 = index->NotIn(arr.size(), arr.data());
|
|
ASSERT_EQ(arr.size(), bitset1.size());
|
|
ASSERT_TRUE(BitSetNone(bitset1));
|
|
auto test = std::make_unique<T>(arr[arr.size() - 1] + 1);
|
|
auto bitset2 = index->NotIn(1, test.get());
|
|
ASSERT_EQ(arr.size(), bitset2.size());
|
|
ASSERT_TRUE(Any(bitset2));
|
|
}
|
|
|
|
template <typename T>
|
|
inline void
|
|
assert_range(ScalarIndex<T>* index, const std::vector<T>& arr) {
|
|
auto test_min = arr[0];
|
|
auto test_max = arr[arr.size() - 1];
|
|
|
|
auto bitset1 = index->Range(test_min - 1, milvus::OpType::GreaterThan);
|
|
ASSERT_EQ(arr.size(), bitset1.size());
|
|
ASSERT_TRUE(Any(bitset1));
|
|
|
|
auto bitset2 = index->Range(test_min, milvus::OpType::GreaterEqual);
|
|
ASSERT_EQ(arr.size(), bitset2.size());
|
|
ASSERT_TRUE(Any(bitset2));
|
|
|
|
auto bitset3 = index->Range(test_max + 1, milvus::OpType::LessThan);
|
|
ASSERT_EQ(arr.size(), bitset3.size());
|
|
ASSERT_TRUE(Any(bitset3));
|
|
|
|
auto bitset4 = index->Range(test_max, milvus::OpType::LessEqual);
|
|
ASSERT_EQ(arr.size(), bitset4.size());
|
|
ASSERT_TRUE(Any(bitset4));
|
|
|
|
auto bitset5 = index->Range(test_min, true, test_max, true);
|
|
ASSERT_EQ(arr.size(), bitset5.size());
|
|
ASSERT_TRUE(Any(bitset5));
|
|
}
|
|
|
|
template <typename T>
|
|
inline void
|
|
assert_reverse(ScalarIndex<T>* index, const std::vector<T>& arr) {
|
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
|
ASSERT_EQ(index->Reverse_Lookup(offset), arr[offset]);
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline void
|
|
assert_reverse(ScalarIndex<float>* index, const std::vector<float>& arr) {
|
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
|
ASSERT_TRUE(compare_float(index->Reverse_Lookup(offset), arr[offset]));
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline void
|
|
assert_reverse(ScalarIndex<double>* index, const std::vector<double>& arr) {
|
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
|
ASSERT_TRUE(compare_double(index->Reverse_Lookup(offset), arr[offset]));
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline void
|
|
assert_reverse(ScalarIndex<std::string>* index,
|
|
const std::vector<std::string>& arr) {
|
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
|
ASSERT_TRUE(arr[offset].compare(index->Reverse_Lookup(offset)) == 0);
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline void
|
|
assert_in(ScalarIndex<std::string>* index,
|
|
const std::vector<std::string>& arr) {
|
|
auto bitset1 = index->In(arr.size(), arr.data());
|
|
ASSERT_EQ(arr.size(), bitset1.size());
|
|
ASSERT_TRUE(Any(bitset1));
|
|
}
|
|
|
|
template <>
|
|
inline void
|
|
assert_not_in(ScalarIndex<std::string>* index,
|
|
const std::vector<std::string>& arr) {
|
|
auto bitset1 = index->NotIn(arr.size(), arr.data());
|
|
ASSERT_EQ(arr.size(), bitset1.size());
|
|
ASSERT_TRUE(BitSetNone(bitset1));
|
|
}
|
|
|
|
template <>
|
|
inline void
|
|
assert_range(ScalarIndex<std::string>* index,
|
|
const std::vector<std::string>& arr) {
|
|
auto test_min = arr[0];
|
|
auto test_max = arr[arr.size() - 1];
|
|
|
|
auto bitset2 = index->Range(test_min, milvus::OpType::GreaterEqual);
|
|
ASSERT_EQ(arr.size(), bitset2.size());
|
|
ASSERT_TRUE(Any(bitset2));
|
|
|
|
auto bitset4 = index->Range(test_max, milvus::OpType::LessEqual);
|
|
ASSERT_EQ(arr.size(), bitset4.size());
|
|
ASSERT_TRUE(Any(bitset4));
|
|
|
|
auto bitset5 = index->Range(test_min, true, test_max, true);
|
|
ASSERT_EQ(arr.size(), bitset5.size());
|
|
ASSERT_TRUE(Any(bitset5));
|
|
}
|
|
} // namespace
|