mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
Issue: #31285 Basically, I've replaced `FixedVector<bool>` and `boost::dynamic_bitset` with custom bitset and bitsetview in order to reduce the memory bandwidth & increase performance for the filtering. This PR is for internal use only. Current progress (numbers are for GCC 9.5.0 on Ubuntu 22.04 LTS; clang-17 produces better performance numbers): Baseline: ``` [ RUN ] CApiTest.AssembeChunkPerfTest start test cost: 17903us [ OK ] CApiTest.AssembeChunkPerfTest (183 ms) [ RUN ] Expr.TestMultiLogicalExprsOptimization cost: 1391us cost: 5us cost: 4us cost: 4us cost: 6us cost: 4us cost: 4us cost: 4us cost: 4us cost: 4us 143 cost: 10us cost: 8us cost: 10us cost: 8us cost: 8us cost: 8us cost: 8us cost: 8us cost: 8us cost: 9us 8 /home/ubuntu/zilliz/milvus4/milvus/internal/core/unittest/test_expr.cpp:1561: Failure Expected: (cost_op) < (cost_no_op), actual: 143 vs 8 [ FAILED ] Expr.TestMultiLogicalExprsOptimization (7 ms) [ RUN ] Expr.TestExprs start test 3cost: 889us start test 10cost: 2us start test 20cost: 2us start test 30cost: 2us start test 50cost: 3us start test 100cost: 7us start test 200cost: 16us [ OK ] Expr.TestExprs (9 ms) [ RUN ] Expr.TestUnaryBenchTest start test type:2 cost: 124.8us start test type:3 cost: 163.1us start test type:4 cost: 275.9us start test type:5 cost: 590.9us start test type:10 cost: 62.7us start test type:11 cost: 65.9us [ OK ] Expr.TestUnaryBenchTest (1153 ms) [ RUN ] Expr.TestBinaryRangeBenchTest start test type:2 cost: 151.4us start test type:3 cost: 198.4us start test type:4 cost: 361.9us start test type:5 cost: 753.9us start test type:10 cost: 64.6us start test type:11 cost: 62.2us [ OK ] Expr.TestBinaryRangeBenchTest (1151 ms) [ RUN ] Expr.TestLogicalUnaryBenchTest start test type:2 cost: 121.14us start test type:3 cost: 156.84us start test type:4 cost: 249.76us start test type:5 cost: 534.44us start test type:10 cost: 82.2us start test type:11 cost: 83.52us [ OK ] Expr.TestLogicalUnaryBenchTest (1202 ms) [ RUN ] Expr.TestBinaryLogicalBenchTest start test type:2 cost: 80.64us start test type:3 cost: 78.22us start test type:4 cost: 255.76us start test type:5 cost: 532.04us start test type:10 cost: 89.26us start test type:11 cost: 90us [ OK ] Expr.TestBinaryLogicalBenchTest (1198 ms) [ RUN ] Expr.TestBinaryArithOpEvalRangeBenchExpr start test type:2 cost: 401.7us start test type:3 cost: 420.96us start test type:4 cost: 418.04us start test type:5 cost: 470.54us start test type:10 cost: 250.32us start test type:11 cost: 850.08us [ OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1273 ms) [ RUN ] Expr.TestCompareExprBenchTest start test type:2 cost: 162us start test type:3 cost: 142us start test type:4 cost: 374us start test type:5 cost: 674us start test type:10 cost: 366us start test type:11 cost: 645us [ OK ] Expr.TestCompareExprBenchTest (1214 ms) [ RUN ] Expr.TestRefactorExprs start test 3cost: 1253us start test 10cost: 1060us start test 20cost: 681us start test 30cost: 522us start test 50cost: 511us start test 100cost: 506us start test 200cost: 497us [ OK ] Expr.TestRefactorExprs (1142 ms) ``` Candidate: ``` [ RUN ] CApiTest.AssembeChunkPerfTest start test cost: 6099us [ OK ] CApiTest.AssembeChunkPerfTest (153 ms) [ RUN ] Expr.TestMultiLogicalExprsOptimization cost: 42us cost: 15us cost: 15us cost: 14us cost: 15us cost: 15us cost: 15us cost: 15us cost: 15us cost: 15us 17 cost: 41us cost: 39us cost: 33us cost: 33us cost: 33us cost: 33us cost: 34us cost: 41us cost: 34us cost: 34us 35 [ OK ] Expr.TestMultiLogicalExprsOptimization (6 ms) [ RUN ] Expr.TestExprs start test 3cost: 20us start test 10cost: 2us start test 20cost: 2us start test 30cost: 2us start test 50cost: 4us start test 100cost: 8us start test 200cost: 15us [ OK ] Expr.TestExprs (8 ms) [ RUN ] Expr.TestUnaryBenchTest start test type:2 cost: 55.7us start test type:3 cost: 79.8us start test type:4 cost: 177.6us start test type:5 cost: 337.2us start test type:10 cost: 16.9us start test type:11 cost: 15.7us [ OK ] Expr.TestUnaryBenchTest (1140 ms) [ RUN ] Expr.TestBinaryRangeBenchTest start test type:2 cost: 57.1us start test type:3 cost: 87us start test type:4 cost: 177.5us start test type:5 cost: 342.7us start test type:10 cost: 17.9us start test type:11 cost: 16.7us [ OK ] Expr.TestBinaryRangeBenchTest (1152 ms) [ RUN ] Expr.TestLogicalUnaryBenchTest start test type:2 cost: 34.58us start test type:3 cost: 68.86us start test type:4 cost: 151.38us start test type:5 cost: 286.8us start test type:10 cost: 16.54us start test type:11 cost: 16.7us [ OK ] Expr.TestLogicalUnaryBenchTest (1165 ms) [ RUN ] Expr.TestBinaryLogicalBenchTest start test type:2 cost: 20us start test type:3 cost: 17.1us start test type:4 cost: 154.12us start test type:5 cost: 286.1us start test type:10 cost: 19.6us start test type:11 cost: 19.24us [ OK ] Expr.TestBinaryLogicalBenchTest (1188 ms) [ RUN ] Expr.TestBinaryArithOpEvalRangeBenchExpr start test type:2 cost: 125.7us start test type:3 cost: 111.34us start test type:4 cost: 148.02us start test type:5 cost: 306.7us start test type:10 cost: 149.3us start test type:11 cost: 282.94us [ OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1221 ms) [ RUN ] Expr.TestCompareExprBenchTest start test type:2 cost: 89us start test type:3 cost: 79us start test type:4 cost: 323us start test type:5 cost: 629us start test type:10 cost: 313us start test type:11 cost: 591us [ OK ] Expr.TestCompareExprBenchTest (1228 ms) [ RUN ] Expr.TestRefactorExprs start test 3cost: 874us start test 10cost: 611us start test 20cost: 290us start test 30cost: 294us start test 50cost: 272us start test 100cost: 278us start test 200cost: 279us [ OK ] Expr.TestRefactorExprs (1149 ms) ``` Signed-off-by: Alexandr Guzhva <alexanderguzhva@gmail.com>
149 lines
3.9 KiB
C++
149 lines
3.9 KiB
C++
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
#include "common/FieldData.h"
|
|
|
|
namespace milvus {
|
|
|
|
/**
|
|
* @brief base class for different type vector
|
|
* @todo implement full null value support
|
|
*/
|
|
|
|
class BaseVector {
|
|
public:
|
|
BaseVector(DataType data_type,
|
|
size_t length,
|
|
std::optional<size_t> null_count = std::nullopt)
|
|
: type_kind_(data_type), length_(length), null_count_(null_count) {
|
|
}
|
|
virtual ~BaseVector() = default;
|
|
|
|
int64_t
|
|
size() {
|
|
return length_;
|
|
}
|
|
|
|
DataType
|
|
type() {
|
|
return type_kind_;
|
|
}
|
|
|
|
protected:
|
|
DataType type_kind_;
|
|
size_t length_;
|
|
std::optional<size_t> null_count_;
|
|
};
|
|
|
|
using VectorPtr = std::shared_ptr<BaseVector>;
|
|
|
|
/**
|
|
* @brief Single vector for scalar types
|
|
* @todo using memory pool && buffer replace FieldData
|
|
*/
|
|
class ColumnVector final : public BaseVector {
|
|
public:
|
|
ColumnVector(DataType data_type,
|
|
size_t length,
|
|
std::optional<size_t> null_count = std::nullopt)
|
|
: BaseVector(data_type, length, null_count) {
|
|
values_ = InitScalarFieldData(data_type, length);
|
|
}
|
|
|
|
// ColumnVector(FixedVector<bool>&& data)
|
|
// : BaseVector(DataType::BOOL, data.size()) {
|
|
// values_ =
|
|
// std::make_shared<FieldData<bool>>(DataType::BOOL, std::move(data));
|
|
// }
|
|
|
|
// the size is the number of bits
|
|
ColumnVector(TargetBitmap&& bitmap)
|
|
: BaseVector(DataType::INT8, bitmap.size()) {
|
|
values_ = std::make_shared<FieldDataImpl<uint8_t, false>>(
|
|
bitmap.size(), DataType::INT8, std::move(bitmap).into());
|
|
}
|
|
|
|
virtual ~ColumnVector() override {
|
|
values_.reset();
|
|
}
|
|
|
|
void*
|
|
GetRawData() {
|
|
return values_->Data();
|
|
}
|
|
|
|
template <typename As>
|
|
const As*
|
|
RawAsValues() const {
|
|
return reinterpret_cast<const As*>(values_->Data());
|
|
}
|
|
|
|
private:
|
|
FieldDataPtr values_;
|
|
};
|
|
|
|
using ColumnVectorPtr = std::shared_ptr<ColumnVector>;
|
|
|
|
/**
|
|
* @brief Multi vectors for scalar types
|
|
* mainly using it to pass internal result in segcore scalar engine system
|
|
*/
|
|
class RowVector : public BaseVector {
|
|
public:
|
|
RowVector(std::vector<DataType>& data_types,
|
|
size_t length,
|
|
std::optional<size_t> null_count = std::nullopt)
|
|
: BaseVector(DataType::ROW, length, null_count) {
|
|
for (auto& type : data_types) {
|
|
children_values_.emplace_back(
|
|
std::make_shared<ColumnVector>(type, length));
|
|
}
|
|
}
|
|
|
|
RowVector(const std::vector<VectorPtr>& children)
|
|
: BaseVector(DataType::ROW, 0) {
|
|
for (auto& child : children) {
|
|
children_values_.push_back(child);
|
|
if (child->size() > length_) {
|
|
length_ = child->size();
|
|
}
|
|
}
|
|
}
|
|
|
|
const std::vector<VectorPtr>&
|
|
childrens() {
|
|
return children_values_;
|
|
}
|
|
|
|
VectorPtr
|
|
child(int index) {
|
|
assert(index < children_values_.size());
|
|
return children_values_[index];
|
|
}
|
|
|
|
private:
|
|
std::vector<VectorPtr> children_values_;
|
|
};
|
|
|
|
using RowVectorPtr = std::shared_ptr<RowVector>;
|
|
|
|
} // namespace milvus
|