Alexander Guzhva c4b37fb285
enhance: Custom bitset and bitsetview prototypes (#30454)
Issue: #31285 

Basically, I've replaced `FixedVector<bool>` and `boost::dynamic_bitset`
with custom bitset and bitsetview in order to reduce the memory
bandwidth & increase performance for the filtering.

This PR is for internal use only. 

Current progress (numbers are for GCC 9.5.0 on Ubuntu 22.04 LTS;
clang-17 produces better performance numbers):
Baseline:
```
[ RUN      ] CApiTest.AssembeChunkPerfTest
start test
cost: 17903us
[       OK ] CApiTest.AssembeChunkPerfTest (183 ms)

[ RUN      ] Expr.TestMultiLogicalExprsOptimization
cost: 1391us
cost: 5us
cost: 4us
cost: 4us
cost: 6us
cost: 4us
cost: 4us
cost: 4us
cost: 4us
cost: 4us
143
cost: 10us
cost: 8us
cost: 10us
cost: 8us
cost: 8us
cost: 8us
cost: 8us
cost: 8us
cost: 8us
cost: 9us
8
/home/ubuntu/zilliz/milvus4/milvus/internal/core/unittest/test_expr.cpp:1561: Failure
Expected: (cost_op) < (cost_no_op), actual: 143 vs 8
[  FAILED  ] Expr.TestMultiLogicalExprsOptimization (7 ms)
[ RUN      ] Expr.TestExprs
start test
3cost: 889us
start test
10cost: 2us
start test
20cost: 2us
start test
30cost: 2us
start test
50cost: 3us
start test
100cost: 7us
start test
200cost: 16us
[       OK ] Expr.TestExprs (9 ms)

[ RUN      ] Expr.TestUnaryBenchTest
start test type:2
 cost: 124.8us
start test type:3
 cost: 163.1us
start test type:4
 cost: 275.9us
start test type:5
 cost: 590.9us
start test type:10
 cost: 62.7us
start test type:11
 cost: 65.9us
[       OK ] Expr.TestUnaryBenchTest (1153 ms)
[ RUN      ] Expr.TestBinaryRangeBenchTest
start test type:2
 cost: 151.4us
start test type:3
 cost: 198.4us
start test type:4
 cost: 361.9us
start test type:5
 cost: 753.9us
start test type:10
 cost: 64.6us
start test type:11
 cost: 62.2us
[       OK ] Expr.TestBinaryRangeBenchTest (1151 ms)
[ RUN      ] Expr.TestLogicalUnaryBenchTest
start test type:2
 cost: 121.14us
start test type:3
 cost: 156.84us
start test type:4
 cost: 249.76us
start test type:5
 cost: 534.44us
start test type:10
 cost: 82.2us
start test type:11
 cost: 83.52us
[       OK ] Expr.TestLogicalUnaryBenchTest (1202 ms)
[ RUN      ] Expr.TestBinaryLogicalBenchTest
start test type:2
 cost: 80.64us
start test type:3
 cost: 78.22us
start test type:4
 cost: 255.76us
start test type:5
 cost: 532.04us
start test type:10
 cost: 89.26us
start test type:11
 cost: 90us
[       OK ] Expr.TestBinaryLogicalBenchTest (1198 ms)
[ RUN      ] Expr.TestBinaryArithOpEvalRangeBenchExpr
start test type:2
 cost: 401.7us
start test type:3
 cost: 420.96us
start test type:4
 cost: 418.04us
start test type:5
 cost: 470.54us
start test type:10
 cost: 250.32us
start test type:11
 cost: 850.08us
[       OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1273 ms)
[ RUN      ] Expr.TestCompareExprBenchTest
start test type:2
 cost: 162us
start test type:3
 cost: 142us
start test type:4
 cost: 374us
start test type:5
 cost: 674us
start test type:10
 cost: 366us
start test type:11
 cost: 645us
[       OK ] Expr.TestCompareExprBenchTest (1214 ms)
[ RUN      ] Expr.TestRefactorExprs
start test
3cost: 1253us
start test
10cost: 1060us
start test
20cost: 681us
start test
30cost: 522us
start test
50cost: 511us
start test
100cost: 506us
start test
200cost: 497us
[       OK ] Expr.TestRefactorExprs (1142 ms)

```

Candidate:
```
[ RUN      ] CApiTest.AssembeChunkPerfTest
start test
cost: 6099us
[       OK ] CApiTest.AssembeChunkPerfTest (153 ms)

[ RUN      ] Expr.TestMultiLogicalExprsOptimization
cost: 42us
cost: 15us
cost: 15us
cost: 14us
cost: 15us
cost: 15us
cost: 15us
cost: 15us
cost: 15us
cost: 15us
17
cost: 41us
cost: 39us
cost: 33us
cost: 33us
cost: 33us
cost: 33us
cost: 34us
cost: 41us
cost: 34us
cost: 34us
35
[       OK ] Expr.TestMultiLogicalExprsOptimization (6 ms)
[ RUN      ] Expr.TestExprs
start test
3cost: 20us
start test
10cost: 2us
start test
20cost: 2us
start test
30cost: 2us
start test
50cost: 4us
start test
100cost: 8us
start test
200cost: 15us
[       OK ] Expr.TestExprs (8 ms)

[ RUN      ] Expr.TestUnaryBenchTest
start test type:2
 cost: 55.7us
start test type:3
 cost: 79.8us
start test type:4
 cost: 177.6us
start test type:5
 cost: 337.2us
start test type:10
 cost: 16.9us
start test type:11
 cost: 15.7us
[       OK ] Expr.TestUnaryBenchTest (1140 ms)
[ RUN      ] Expr.TestBinaryRangeBenchTest
start test type:2
 cost: 57.1us
start test type:3
 cost: 87us
start test type:4
 cost: 177.5us
start test type:5
 cost: 342.7us
start test type:10
 cost: 17.9us
start test type:11
 cost: 16.7us
[       OK ] Expr.TestBinaryRangeBenchTest (1152 ms)
[ RUN      ] Expr.TestLogicalUnaryBenchTest
start test type:2
 cost: 34.58us
start test type:3
 cost: 68.86us
start test type:4
 cost: 151.38us
start test type:5
 cost: 286.8us
start test type:10
 cost: 16.54us
start test type:11
 cost: 16.7us
[       OK ] Expr.TestLogicalUnaryBenchTest (1165 ms)
[ RUN      ] Expr.TestBinaryLogicalBenchTest
start test type:2
 cost: 20us
start test type:3
 cost: 17.1us
start test type:4
 cost: 154.12us
start test type:5
 cost: 286.1us
start test type:10
 cost: 19.6us
start test type:11
 cost: 19.24us
[       OK ] Expr.TestBinaryLogicalBenchTest (1188 ms)
[ RUN      ] Expr.TestBinaryArithOpEvalRangeBenchExpr
start test type:2
 cost: 125.7us
start test type:3
 cost: 111.34us
start test type:4
 cost: 148.02us
start test type:5
 cost: 306.7us
start test type:10
 cost: 149.3us
start test type:11
 cost: 282.94us
[       OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1221 ms)
[ RUN      ] Expr.TestCompareExprBenchTest
start test type:2
 cost: 89us
start test type:3
 cost: 79us
start test type:4
 cost: 323us
start test type:5
 cost: 629us
start test type:10
 cost: 313us
start test type:11
 cost: 591us
[       OK ] Expr.TestCompareExprBenchTest (1228 ms)
[ RUN      ] Expr.TestRefactorExprs
start test
3cost: 874us
start test
10cost: 611us
start test
20cost: 290us
start test
30cost: 294us
start test
50cost: 272us
start test
100cost: 278us
start test
200cost: 279us
[       OK ] Expr.TestRefactorExprs (1149 ms)

```

Signed-off-by: Alexandr Guzhva <alexanderguzhva@gmail.com>
2024-03-24 21:49:07 +08:00

209 lines
6.0 KiB
C++

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#pragma once
#include <gtest/gtest.h>
#include <vector>
#include <memory>
#include "common/Types.h"
using milvus::index::ScalarIndex;
namespace {
bool
compare_float(float x, float y, float epsilon = 0.000001f) {
if (fabs(x - y) < epsilon)
return true;
return false;
}
bool
compare_double(double x, double y, double epsilon = 0.000001f) {
if (fabs(x - y) < epsilon)
return true;
return false;
}
bool
Any(const milvus::TargetBitmap& bitmap) {
return bitmap.any();
}
bool
BitSetNone(const milvus::TargetBitmap& bitmap) {
return bitmap.none();
}
uint64_t
Count(const milvus::TargetBitmap& bitmap) {
return bitmap.count();
}
inline void
assert_order(const milvus::SearchResult& result,
const knowhere::MetricType& metric_type) {
bool dsc = milvus::PositivelyRelated(metric_type);
auto& ids = result.seg_offsets_;
auto& dist = result.distances_;
auto nq = result.total_nq_;
auto topk = result.unity_topK_;
if (dsc) {
for (int i = 0; i < nq; i++) {
for (int j = 1; j < topk; j++) {
auto idx = i * topk + j;
if (ids[idx] != -1) {
ASSERT_GE(dist[idx - 1], dist[idx]);
}
}
}
} else {
for (int i = 0; i < nq; i++) {
for (int j = 1; j < topk; j++) {
auto idx = i * topk + j;
if (ids[idx] != -1) {
ASSERT_LE(dist[idx - 1], dist[idx]);
}
}
}
}
}
template <typename T>
inline void
assert_in(ScalarIndex<T>* index, const std::vector<T>& arr) {
// hard to compare floating point value.
if (std::is_floating_point_v<T>) {
return;
}
auto bitset1 = index->In(arr.size(), arr.data());
ASSERT_EQ(arr.size(), bitset1.size());
ASSERT_TRUE(Any(bitset1));
auto test = std::make_unique<T>(arr[arr.size() - 1] + 1);
auto bitset2 = index->In(1, test.get());
ASSERT_EQ(arr.size(), bitset2.size());
ASSERT_TRUE(BitSetNone(bitset2));
}
template <typename T>
inline void
assert_not_in(ScalarIndex<T>* index, const std::vector<T>& arr) {
auto bitset1 = index->NotIn(arr.size(), arr.data());
ASSERT_EQ(arr.size(), bitset1.size());
ASSERT_TRUE(BitSetNone(bitset1));
auto test = std::make_unique<T>(arr[arr.size() - 1] + 1);
auto bitset2 = index->NotIn(1, test.get());
ASSERT_EQ(arr.size(), bitset2.size());
ASSERT_TRUE(Any(bitset2));
}
template <typename T>
inline void
assert_range(ScalarIndex<T>* index, const std::vector<T>& arr) {
auto test_min = arr[0];
auto test_max = arr[arr.size() - 1];
auto bitset1 = index->Range(test_min - 1, milvus::OpType::GreaterThan);
ASSERT_EQ(arr.size(), bitset1.size());
ASSERT_TRUE(Any(bitset1));
auto bitset2 = index->Range(test_min, milvus::OpType::GreaterEqual);
ASSERT_EQ(arr.size(), bitset2.size());
ASSERT_TRUE(Any(bitset2));
auto bitset3 = index->Range(test_max + 1, milvus::OpType::LessThan);
ASSERT_EQ(arr.size(), bitset3.size());
ASSERT_TRUE(Any(bitset3));
auto bitset4 = index->Range(test_max, milvus::OpType::LessEqual);
ASSERT_EQ(arr.size(), bitset4.size());
ASSERT_TRUE(Any(bitset4));
auto bitset5 = index->Range(test_min, true, test_max, true);
ASSERT_EQ(arr.size(), bitset5.size());
ASSERT_TRUE(Any(bitset5));
}
template <typename T>
inline void
assert_reverse(ScalarIndex<T>* index, const std::vector<T>& arr) {
for (size_t offset = 0; offset < arr.size(); ++offset) {
ASSERT_EQ(index->Reverse_Lookup(offset), arr[offset]);
}
}
template <>
inline void
assert_reverse(ScalarIndex<float>* index, const std::vector<float>& arr) {
for (size_t offset = 0; offset < arr.size(); ++offset) {
ASSERT_TRUE(compare_float(index->Reverse_Lookup(offset), arr[offset]));
}
}
template <>
inline void
assert_reverse(ScalarIndex<double>* index, const std::vector<double>& arr) {
for (size_t offset = 0; offset < arr.size(); ++offset) {
ASSERT_TRUE(compare_double(index->Reverse_Lookup(offset), arr[offset]));
}
}
template <>
inline void
assert_reverse(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
for (size_t offset = 0; offset < arr.size(); ++offset) {
ASSERT_TRUE(arr[offset].compare(index->Reverse_Lookup(offset)) == 0);
}
}
template <>
inline void
assert_in(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
auto bitset1 = index->In(arr.size(), arr.data());
ASSERT_EQ(arr.size(), bitset1.size());
ASSERT_TRUE(Any(bitset1));
}
template <>
inline void
assert_not_in(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
auto bitset1 = index->NotIn(arr.size(), arr.data());
ASSERT_EQ(arr.size(), bitset1.size());
ASSERT_TRUE(BitSetNone(bitset1));
}
template <>
inline void
assert_range(ScalarIndex<std::string>* index,
const std::vector<std::string>& arr) {
auto test_min = arr[0];
auto test_max = arr[arr.size() - 1];
auto bitset2 = index->Range(test_min, milvus::OpType::GreaterEqual);
ASSERT_EQ(arr.size(), bitset2.size());
ASSERT_TRUE(Any(bitset2));
auto bitset4 = index->Range(test_max, milvus::OpType::LessEqual);
ASSERT_EQ(arr.size(), bitset4.size());
ASSERT_TRUE(Any(bitset4));
auto bitset5 = index->Range(test_min, true, test_max, true);
ASSERT_EQ(arr.size(), bitset5.size());
ASSERT_TRUE(Any(bitset5));
}
} // namespace