mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
enhance: all op(Null) is false in expr (#35527)
#31728 --------- Signed-off-by: lixinguo <xinguo.li@zilliz.com> Co-authored-by: lixinguo <xinguo.li@zilliz.com>
This commit is contained in:
parent
04c306e63f
commit
eb3e4583ec
@ -69,7 +69,7 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
|
|||||||
ssize_t byte_count = (element_count + 7) / 8;
|
ssize_t byte_count = (element_count + 7) / 8;
|
||||||
// Note: if 'nullable == true` and valid_data is nullptr
|
// Note: if 'nullable == true` and valid_data is nullptr
|
||||||
// means null_count == 0, will fill it with 0xFF
|
// means null_count == 0, will fill it with 0xFF
|
||||||
if (!valid_data) {
|
if (valid_data == nullptr) {
|
||||||
valid_data_.assign(byte_count, 0xFF);
|
valid_data_.assign(byte_count, 0xFF);
|
||||||
} else {
|
} else {
|
||||||
std::copy_n(valid_data, byte_count, valid_data_.data());
|
std::copy_n(valid_data, byte_count, valid_data_.data());
|
||||||
|
|||||||
@ -19,6 +19,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "EasyAssert.h"
|
||||||
|
#include "Types.h"
|
||||||
#include "common/FieldData.h"
|
#include "common/FieldData.h"
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
@ -50,6 +52,7 @@ class BaseVector {
|
|||||||
protected:
|
protected:
|
||||||
DataType type_kind_;
|
DataType type_kind_;
|
||||||
size_t length_;
|
size_t length_;
|
||||||
|
// todo: use null_count to skip some bitset operate
|
||||||
std::optional<size_t> null_count_;
|
std::optional<size_t> null_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -65,8 +68,8 @@ class ColumnVector final : public BaseVector {
|
|||||||
size_t length,
|
size_t length,
|
||||||
std::optional<size_t> null_count = std::nullopt)
|
std::optional<size_t> null_count = std::nullopt)
|
||||||
: BaseVector(data_type, length, null_count) {
|
: BaseVector(data_type, length, null_count) {
|
||||||
//todo: support null expr
|
|
||||||
values_ = InitScalarFieldData(data_type, false, length);
|
values_ = InitScalarFieldData(data_type, false, length);
|
||||||
|
valid_values_ = InitScalarFieldData(data_type, false, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ColumnVector(FixedVector<bool>&& data)
|
// ColumnVector(FixedVector<bool>&& data)
|
||||||
@ -75,15 +78,25 @@ class ColumnVector final : public BaseVector {
|
|||||||
// std::make_shared<FieldData<bool>>(DataType::BOOL, std::move(data));
|
// std::make_shared<FieldData<bool>>(DataType::BOOL, std::move(data));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
// // the size is the number of bits
|
||||||
|
// ColumnVector(TargetBitmap&& bitmap)
|
||||||
|
// : BaseVector(DataType::INT8, bitmap.size()) {
|
||||||
|
// values_ = std::make_shared<FieldDataImpl<uint8_t, false>>(
|
||||||
|
// bitmap.size(), DataType::INT8, false, std::move(bitmap).into());
|
||||||
|
// }
|
||||||
|
|
||||||
// the size is the number of bits
|
// the size is the number of bits
|
||||||
ColumnVector(TargetBitmap&& bitmap)
|
ColumnVector(TargetBitmap&& bitmap, TargetBitmap&& valid_bitmap)
|
||||||
: BaseVector(DataType::INT8, bitmap.size()) {
|
: BaseVector(DataType::INT8, bitmap.size()) {
|
||||||
values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(DataType::INT8,
|
values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(DataType::INT8,
|
||||||
std::move(bitmap));
|
std::move(bitmap));
|
||||||
|
valid_values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(
|
||||||
|
DataType::INT8, std::move(valid_bitmap));
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~ColumnVector() override {
|
virtual ~ColumnVector() override {
|
||||||
values_.reset();
|
values_.reset();
|
||||||
|
valid_values_.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void*
|
void*
|
||||||
@ -91,6 +104,11 @@ class ColumnVector final : public BaseVector {
|
|||||||
return values_->Data();
|
return values_->Data();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void*
|
||||||
|
GetValidRawData() {
|
||||||
|
return valid_values_->Data();
|
||||||
|
}
|
||||||
|
|
||||||
template <typename As>
|
template <typename As>
|
||||||
const As*
|
const As*
|
||||||
RawAsValues() const {
|
RawAsValues() const {
|
||||||
@ -99,6 +117,7 @@ class ColumnVector final : public BaseVector {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
FieldDataPtr values_;
|
FieldDataPtr values_;
|
||||||
|
FieldDataPtr valid_values_;
|
||||||
};
|
};
|
||||||
|
|
||||||
using ColumnVectorPtr = std::shared_ptr<ColumnVector>;
|
using ColumnVectorPtr = std::shared_ptr<ColumnVector>;
|
||||||
|
|||||||
@ -25,16 +25,19 @@ PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
? active_count_ - current_pos_
|
? active_count_ - current_pos_
|
||||||
: batch_size_;
|
: batch_size_;
|
||||||
|
|
||||||
|
// always true no need to skip null
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
result = nullptr;
|
result = nullptr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
|
||||||
res.set();
|
res.set();
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
result = res_vec;
|
result = res_vec;
|
||||||
current_pos_ += real_batch_size;
|
current_pos_ += real_batch_size;
|
||||||
|
|||||||
@ -113,9 +113,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
auto op_type = expr_->op_type_;
|
auto op_type = expr_->op_type_;
|
||||||
@ -129,6 +131,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
#define BinaryArithRangeJSONCompare(cmp) \
|
#define BinaryArithRangeJSONCompare(cmp) \
|
||||||
do { \
|
do { \
|
||||||
for (size_t i = 0; i < size; ++i) { \
|
for (size_t i = 0; i < size; ++i) { \
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) { \
|
||||||
|
res[i] = false; \
|
||||||
|
valid_res[i] = false; \
|
||||||
|
continue; \
|
||||||
|
} \
|
||||||
auto x = data[i].template at<GetType>(pointer); \
|
auto x = data[i].template at<GetType>(pointer); \
|
||||||
if (x.error()) { \
|
if (x.error()) { \
|
||||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||||
@ -146,6 +153,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
#define BinaryArithRangeJSONCompareNotEqual(cmp) \
|
#define BinaryArithRangeJSONCompareNotEqual(cmp) \
|
||||||
do { \
|
do { \
|
||||||
for (size_t i = 0; i < size; ++i) { \
|
for (size_t i = 0; i < size; ++i) { \
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) { \
|
||||||
|
res[i] = false; \
|
||||||
|
valid_res[i] = false; \
|
||||||
|
continue; \
|
||||||
|
} \
|
||||||
auto x = data[i].template at<GetType>(pointer); \
|
auto x = data[i].template at<GetType>(pointer); \
|
||||||
if (x.error()) { \
|
if (x.error()) { \
|
||||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||||
@ -161,8 +173,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
auto execute_sub_batch = [op_type, arith_type](const milvus::Json* data,
|
auto execute_sub_batch = [op_type, arith_type](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValueType val,
|
ValueType val,
|
||||||
ValueType right_operand,
|
ValueType right_operand,
|
||||||
const std::string& pointer) {
|
const std::string& pointer) {
|
||||||
@ -197,6 +211,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int array_length = 0;
|
int array_length = 0;
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -246,6 +265,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int array_length = 0;
|
int array_length = 0;
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -295,6 +319,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int array_length = 0;
|
int array_length = 0;
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -344,6 +373,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int array_length = 0;
|
int array_length = 0;
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -393,6 +427,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int array_length = 0;
|
int array_length = 0;
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -442,6 +481,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int array_length = 0;
|
int array_length = 0;
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -471,6 +515,7 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForJson() {
|
|||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
|
int64_t processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
|
||||||
std::nullptr_t{},
|
std::nullptr_t{},
|
||||||
res,
|
res,
|
||||||
|
valid_res,
|
||||||
value,
|
value,
|
||||||
right_operand,
|
right_operand,
|
||||||
pointer);
|
pointer);
|
||||||
@ -492,9 +537,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
int index = -1;
|
int index = -1;
|
||||||
if (expr_->column_.nested_path_.size() > 0) {
|
if (expr_->column_.nested_path_.size() > 0) {
|
||||||
@ -511,6 +558,11 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
#define BinaryArithRangeArrayCompare(cmp) \
|
#define BinaryArithRangeArrayCompare(cmp) \
|
||||||
do { \
|
do { \
|
||||||
for (size_t i = 0; i < size; ++i) { \
|
for (size_t i = 0; i < size; ++i) { \
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) { \
|
||||||
|
res[i] = false; \
|
||||||
|
valid_res[i] = false; \
|
||||||
|
continue; \
|
||||||
|
} \
|
||||||
if (index >= data[i].length()) { \
|
if (index >= data[i].length()) { \
|
||||||
res[i] = false; \
|
res[i] = false; \
|
||||||
continue; \
|
continue; \
|
||||||
@ -521,8 +573,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
auto execute_sub_batch = [op_type, arith_type](const ArrayView* data,
|
auto execute_sub_batch = [op_type, arith_type](const ArrayView* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValueType val,
|
ValueType val,
|
||||||
ValueType right_operand,
|
ValueType right_operand,
|
||||||
int index) {
|
int index) {
|
||||||
@ -558,6 +612,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[i].length() == val;
|
res[i] = data[i].length() == val;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -601,6 +659,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[i].length() != val;
|
res[i] = data[i].length() != val;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -644,6 +706,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[i].length() > val;
|
res[i] = data[i].length() > val;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -687,6 +753,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[i].length() >= val;
|
res[i] = data[i].length() >= val;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -730,6 +800,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[i].length() < val;
|
res[i] = data[i].length() < val;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -773,6 +847,10 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
}
|
}
|
||||||
case proto::plan::ArithOpType::ArrayLength: {
|
case proto::plan::ArithOpType::ArrayLength: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[i].length() <= val;
|
res[i] = data[i].length() <= val;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -794,8 +872,14 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForArray() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size =
|
||||||
execute_sub_batch, std::nullptr_t{}, res, value, right_operand, index);
|
ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
value,
|
||||||
|
right_operand,
|
||||||
|
index);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -1185,12 +1269,13 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForIndex() {
|
|||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, value, right_operand);
|
auto res = ProcessIndexChunks<T>(execute_sub_batch, value, right_operand);
|
||||||
AssertInfo(res.size() == real_batch_size,
|
AssertInfo(res->size() == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
res.size(),
|
res->size(),
|
||||||
real_batch_size);
|
real_batch_size);
|
||||||
return std::make_shared<ColumnVector>(std::move(res));
|
// return std::make_shared<ColumnVector>(std::move(res));
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -1209,16 +1294,20 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() {
|
|||||||
auto value = GetValueFromProto<HighPrecisionType>(expr_->value_);
|
auto value = GetValueFromProto<HighPrecisionType>(expr_->value_);
|
||||||
auto right_operand =
|
auto right_operand =
|
||||||
GetValueFromProto<HighPrecisionType>(expr_->right_operand_);
|
GetValueFromProto<HighPrecisionType>(expr_->right_operand_);
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto op_type = expr_->op_type_;
|
auto op_type = expr_->op_type_;
|
||||||
auto arith_type = expr_->arith_op_type_;
|
auto arith_type = expr_->arith_op_type_;
|
||||||
auto execute_sub_batch = [op_type, arith_type](
|
auto execute_sub_batch = [op_type, arith_type](
|
||||||
const T* data,
|
const T* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
HighPrecisionType value,
|
HighPrecisionType value,
|
||||||
HighPrecisionType right_operand) {
|
HighPrecisionType right_operand) {
|
||||||
switch (op_type) {
|
switch (op_type) {
|
||||||
@ -1534,9 +1623,23 @@ PhyBinaryArithOpEvalRangeExpr::ExecRangeVisitorImplForData() {
|
|||||||
"arithmetic eval expr: {}",
|
"arithmetic eval expr: {}",
|
||||||
op_type);
|
op_type);
|
||||||
}
|
}
|
||||||
|
// there is a batch operation in ArithOpElementFunc,
|
||||||
|
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||||
|
// but to mask res with valid_data after the batch operation.
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<T>(
|
int64_t processed_size = ProcessDataChunks<T>(execute_sub_batch,
|
||||||
execute_sub_batch, std::nullptr_t{}, res, value, right_operand);
|
std::nullptr_t{},
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
value,
|
||||||
|
right_operand);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -239,7 +239,6 @@ struct ArithOpElementFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if constexpr (!std::is_same_v<decltype(CmpOpHelper<cmp_op>::op),
|
if constexpr (!std::is_same_v<decltype(CmpOpHelper<cmp_op>::op),
|
||||||
void>) {
|
void>) {
|
||||||
constexpr auto cmp_op_cvt = CmpOpHelper<cmp_op>::op;
|
constexpr auto cmp_op_cvt = CmpOpHelper<cmp_op>::op;
|
||||||
@ -282,22 +281,26 @@ struct ArithOpIndexFunc {
|
|||||||
HighPrecisonType right_operand) {
|
HighPrecisonType right_operand) {
|
||||||
TargetBitmap res(size);
|
TargetBitmap res(size);
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
auto raw = index->Reverse_Lookup(i);
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (cmp_op == proto::plan::OpType::Equal) {
|
if constexpr (cmp_op == proto::plan::OpType::Equal) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (index->Reverse_Lookup(i) + right_operand) == val;
|
res[i] = (raw.value() + right_operand) == val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Sub) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (index->Reverse_Lookup(i) - right_operand) == val;
|
res[i] = (raw.value() - right_operand) == val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (index->Reverse_Lookup(i) * right_operand) == val;
|
res[i] = (raw.value() * right_operand) == val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (index->Reverse_Lookup(i) / right_operand) == val;
|
res[i] = (raw.value() / right_operand) == val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Mod) {
|
||||||
res[i] =
|
res[i] = (fmod(raw.value(), right_operand)) == val;
|
||||||
(fmod(index->Reverse_Lookup(i), right_operand)) == val;
|
|
||||||
} else {
|
} else {
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
OpTypeInvalid,
|
OpTypeInvalid,
|
||||||
@ -307,20 +310,19 @@ struct ArithOpIndexFunc {
|
|||||||
}
|
}
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::NotEqual) {
|
} else if constexpr (cmp_op == proto::plan::OpType::NotEqual) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (index->Reverse_Lookup(i) + right_operand) != val;
|
res[i] = (raw.value() + right_operand) != val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Sub) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (index->Reverse_Lookup(i) - right_operand) != val;
|
res[i] = (raw.value() - right_operand) != val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (index->Reverse_Lookup(i) * right_operand) != val;
|
res[i] = (raw.value() * right_operand) != val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (index->Reverse_Lookup(i) / right_operand) != val;
|
res[i] = (raw.value() / right_operand) != val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Mod) {
|
||||||
res[i] =
|
res[i] = (fmod(raw.value(), right_operand)) != val;
|
||||||
(fmod(index->Reverse_Lookup(i), right_operand)) != val;
|
|
||||||
} else {
|
} else {
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
OpTypeInvalid,
|
OpTypeInvalid,
|
||||||
@ -330,20 +332,19 @@ struct ArithOpIndexFunc {
|
|||||||
}
|
}
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::GreaterThan) {
|
} else if constexpr (cmp_op == proto::plan::OpType::GreaterThan) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (index->Reverse_Lookup(i) + right_operand) > val;
|
res[i] = (raw.value() + right_operand) > val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Sub) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (index->Reverse_Lookup(i) - right_operand) > val;
|
res[i] = (raw.value() - right_operand) > val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (index->Reverse_Lookup(i) * right_operand) > val;
|
res[i] = (raw.value() * right_operand) > val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (index->Reverse_Lookup(i) / right_operand) > val;
|
res[i] = (raw.value() / right_operand) > val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Mod) {
|
||||||
res[i] =
|
res[i] = (fmod(raw.value(), right_operand)) > val;
|
||||||
(fmod(index->Reverse_Lookup(i), right_operand)) > val;
|
|
||||||
} else {
|
} else {
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
OpTypeInvalid,
|
OpTypeInvalid,
|
||||||
@ -353,20 +354,19 @@ struct ArithOpIndexFunc {
|
|||||||
}
|
}
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::GreaterEqual) {
|
} else if constexpr (cmp_op == proto::plan::OpType::GreaterEqual) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (index->Reverse_Lookup(i) + right_operand) >= val;
|
res[i] = (raw.value() + right_operand) >= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Sub) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (index->Reverse_Lookup(i) - right_operand) >= val;
|
res[i] = (raw.value() - right_operand) >= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (index->Reverse_Lookup(i) * right_operand) >= val;
|
res[i] = (raw.value() * right_operand) >= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (index->Reverse_Lookup(i) / right_operand) >= val;
|
res[i] = (raw.value() / right_operand) >= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Mod) {
|
||||||
res[i] =
|
res[i] = (fmod(raw.value(), right_operand)) >= val;
|
||||||
(fmod(index->Reverse_Lookup(i), right_operand)) >= val;
|
|
||||||
} else {
|
} else {
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
OpTypeInvalid,
|
OpTypeInvalid,
|
||||||
@ -376,20 +376,19 @@ struct ArithOpIndexFunc {
|
|||||||
}
|
}
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::LessThan) {
|
} else if constexpr (cmp_op == proto::plan::OpType::LessThan) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (index->Reverse_Lookup(i) + right_operand) < val;
|
res[i] = (raw.value() + right_operand) < val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Sub) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (index->Reverse_Lookup(i) - right_operand) < val;
|
res[i] = (raw.value() - right_operand) < val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (index->Reverse_Lookup(i) * right_operand) < val;
|
res[i] = (raw.value() * right_operand) < val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (index->Reverse_Lookup(i) / right_operand) < val;
|
res[i] = (raw.value() / right_operand) < val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Mod) {
|
||||||
res[i] =
|
res[i] = (fmod(raw.value(), right_operand)) < val;
|
||||||
(fmod(index->Reverse_Lookup(i), right_operand)) < val;
|
|
||||||
} else {
|
} else {
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
OpTypeInvalid,
|
OpTypeInvalid,
|
||||||
@ -399,20 +398,19 @@ struct ArithOpIndexFunc {
|
|||||||
}
|
}
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::LessEqual) {
|
} else if constexpr (cmp_op == proto::plan::OpType::LessEqual) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (index->Reverse_Lookup(i) + right_operand) <= val;
|
res[i] = (raw.value() + right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Sub) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (index->Reverse_Lookup(i) - right_operand) <= val;
|
res[i] = (raw.value() - right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (index->Reverse_Lookup(i) * right_operand) <= val;
|
res[i] = (raw.value() * right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (index->Reverse_Lookup(i) / right_operand) <= val;
|
res[i] = (raw.value() / right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Mod) {
|
||||||
res[i] =
|
res[i] = (fmod(raw.value(), right_operand)) <= val;
|
||||||
(fmod(index->Reverse_Lookup(i), right_operand)) <= val;
|
|
||||||
} else {
|
} else {
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
OpTypeInvalid,
|
OpTypeInvalid,
|
||||||
|
|||||||
@ -15,6 +15,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "BinaryRangeExpr.h"
|
#include "BinaryRangeExpr.h"
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "query/Utils.h"
|
#include "query/Utils.h"
|
||||||
|
|
||||||
@ -150,8 +151,12 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
|
|||||||
cached_overflow_res_->size() == batch_size) {
|
cached_overflow_res_->size() == batch_size) {
|
||||||
return cached_overflow_res_;
|
return cached_overflow_res_;
|
||||||
}
|
}
|
||||||
auto res = std::make_shared<ColumnVector>(TargetBitmap(batch_size));
|
auto valid_res = ProcessChunksForValid<T>(is_index_mode_);
|
||||||
return res;
|
auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
|
||||||
|
std::move(valid_res));
|
||||||
|
cached_overflow_res_ = res_vec;
|
||||||
|
|
||||||
|
return res_vec;
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (std::is_integral_v<T> && !std::is_same_v<bool, T>) {
|
if constexpr (std::is_integral_v<T> && !std::is_same_v<bool, T>) {
|
||||||
@ -207,12 +212,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||||||
func(index_ptr, val1, val2, lower_inclusive, upper_inclusive));
|
func(index_ptr, val1, val2, lower_inclusive, upper_inclusive));
|
||||||
};
|
};
|
||||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, val1, val2);
|
auto res = ProcessIndexChunks<T>(execute_sub_batch, val1, val2);
|
||||||
AssertInfo(res.size() == real_batch_size,
|
AssertInfo(res->size() == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
res.size(),
|
res->size(),
|
||||||
real_batch_size);
|
real_batch_size);
|
||||||
return std::make_shared<ColumnVector>(std::move(res));
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -240,14 +245,18 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
PreCheckOverflow<T>(val1, val2, lower_inclusive, upper_inclusive)) {
|
PreCheckOverflow<T>(val1, val2, lower_inclusive, upper_inclusive)) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
||||||
const T* data,
|
const T* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
HighPrecisionType val1,
|
HighPrecisionType val1,
|
||||||
HighPrecisionType val2) {
|
HighPrecisionType val2) {
|
||||||
if (lower_inclusive && upper_inclusive) {
|
if (lower_inclusive && upper_inclusive) {
|
||||||
@ -263,6 +272,16 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
BinaryRangeElementFunc<T, false, false> func;
|
BinaryRangeElementFunc<T, false, false> func;
|
||||||
func(val1, val2, data, size, res);
|
func(val1, val2, data, size, res);
|
||||||
}
|
}
|
||||||
|
// there is a batch operation in BinaryRangeElementFunc,
|
||||||
|
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||||
|
// but to mask res with valid_data after the batch operation.
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
auto skip_index_func =
|
auto skip_index_func =
|
||||||
[val1, val2, lower_inclusive, upper_inclusive](
|
[val1, val2, lower_inclusive, upper_inclusive](
|
||||||
@ -282,7 +301,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<T>(
|
int64_t processed_size = ProcessDataChunks<T>(
|
||||||
execute_sub_batch, skip_index_func, res, val1, val2);
|
execute_sub_batch, skip_index_func, res, valid_res, val1, val2);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -301,9 +320,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
bool lower_inclusive = expr_->lower_inclusive_;
|
bool lower_inclusive = expr_->lower_inclusive_;
|
||||||
bool upper_inclusive = expr_->upper_inclusive_;
|
bool upper_inclusive = expr_->upper_inclusive_;
|
||||||
@ -313,26 +334,28 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
|
|||||||
|
|
||||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer](
|
auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer](
|
||||||
const milvus::Json* data,
|
const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValueType val1,
|
ValueType val1,
|
||||||
ValueType val2) {
|
ValueType val2) {
|
||||||
if (lower_inclusive && upper_inclusive) {
|
if (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForJson<ValueType, true, true> func;
|
BinaryRangeElementFuncForJson<ValueType, true, true> func;
|
||||||
func(val1, val2, pointer, data, size, res);
|
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFuncForJson<ValueType, true, false> func;
|
BinaryRangeElementFuncForJson<ValueType, true, false> func;
|
||||||
func(val1, val2, pointer, data, size, res);
|
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForJson<ValueType, false, true> func;
|
BinaryRangeElementFuncForJson<ValueType, false, true> func;
|
||||||
func(val1, val2, pointer, data, size, res);
|
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||||
} else {
|
} else {
|
||||||
BinaryRangeElementFuncForJson<ValueType, false, false> func;
|
BinaryRangeElementFuncForJson<ValueType, false, false> func;
|
||||||
func(val1, val2, pointer, data, size, res);
|
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, val1, val2);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -351,9 +374,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
bool lower_inclusive = expr_->lower_inclusive_;
|
bool lower_inclusive = expr_->lower_inclusive_;
|
||||||
bool upper_inclusive = expr_->upper_inclusive_;
|
bool upper_inclusive = expr_->upper_inclusive_;
|
||||||
@ -366,27 +391,29 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
|
|||||||
|
|
||||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
||||||
const milvus::ArrayView* data,
|
const milvus::ArrayView* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValueType val1,
|
ValueType val1,
|
||||||
ValueType val2,
|
ValueType val2,
|
||||||
int index) {
|
int index) {
|
||||||
if (lower_inclusive && upper_inclusive) {
|
if (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForArray<ValueType, true, true> func;
|
BinaryRangeElementFuncForArray<ValueType, true, true> func;
|
||||||
func(val1, val2, index, data, size, res);
|
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFuncForArray<ValueType, true, false> func;
|
BinaryRangeElementFuncForArray<ValueType, true, false> func;
|
||||||
func(val1, val2, index, data, size, res);
|
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForArray<ValueType, false, true> func;
|
BinaryRangeElementFuncForArray<ValueType, false, true> func;
|
||||||
func(val1, val2, index, data, size, res);
|
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||||
} else {
|
} else {
|
||||||
BinaryRangeElementFuncForArray<ValueType, false, false> func;
|
BinaryRangeElementFuncForArray<ValueType, false, false> func;
|
||||||
func(val1, val2, index, data, size, res);
|
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, val1, val2, index);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -54,6 +54,10 @@ struct BinaryRangeElementFunc {
|
|||||||
|
|
||||||
#define BinaryRangeJSONCompare(cmp) \
|
#define BinaryRangeJSONCompare(cmp) \
|
||||||
do { \
|
do { \
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) { \
|
||||||
|
res[i] = valid_res[i] = false; \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
auto x = src[i].template at<GetType>(pointer); \
|
auto x = src[i].template at<GetType>(pointer); \
|
||||||
if (x.error()) { \
|
if (x.error()) { \
|
||||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||||
@ -81,8 +85,10 @@ struct BinaryRangeElementFuncForJson {
|
|||||||
ValueType val2,
|
ValueType val2,
|
||||||
const std::string& pointer,
|
const std::string& pointer,
|
||||||
const milvus::Json* src,
|
const milvus::Json* src,
|
||||||
|
const bool* valid_data,
|
||||||
size_t n,
|
size_t n,
|
||||||
TargetBitmapView res) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res) {
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
if constexpr (lower_inclusive && upper_inclusive) {
|
if constexpr (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeJSONCompare(val1 <= value && value <= val2);
|
BinaryRangeJSONCompare(val1 <= value && value <= val2);
|
||||||
@ -107,9 +113,15 @@ struct BinaryRangeElementFuncForArray {
|
|||||||
ValueType val2,
|
ValueType val2,
|
||||||
int index,
|
int index,
|
||||||
const milvus::ArrayView* src,
|
const milvus::ArrayView* src,
|
||||||
|
const bool* valid_data,
|
||||||
size_t n,
|
size_t n,
|
||||||
TargetBitmapView res) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res) {
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (lower_inclusive && upper_inclusive) {
|
if constexpr (lower_inclusive && upper_inclusive) {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[i].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "CompareExpr.h"
|
#include "CompareExpr.h"
|
||||||
#include "common/type_c.h"
|
#include "common/type_c.h"
|
||||||
|
#include <optional>
|
||||||
#include "query/Relational.h"
|
#include "query/Relational.h"
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
@ -58,12 +59,19 @@ PhyCompareFilterExpr::GetChunkData(FieldId field_id,
|
|||||||
segment_->chunk_scalar_index<T>(field_id,
|
segment_->chunk_scalar_index<T>(field_id,
|
||||||
current_chunk_id));
|
current_chunk_id));
|
||||||
}
|
}
|
||||||
return indexing.Reverse_Lookup(current_chunk_pos++);
|
auto raw = indexing.Reverse_Lookup(current_chunk_pos);
|
||||||
|
current_chunk_pos++;
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return raw.value();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto chunk_data =
|
auto chunk_data =
|
||||||
segment_->chunk_data<T>(field_id, current_chunk_id).data();
|
segment_->chunk_data<T>(field_id, current_chunk_id).data();
|
||||||
|
auto chunk_valid_data =
|
||||||
|
segment_->chunk_data<T>(field_id, current_chunk_id).valid_data();
|
||||||
auto current_chunk_size = segment_->chunk_size(field_id, current_chunk_id);
|
auto current_chunk_size = segment_->chunk_size(field_id, current_chunk_id);
|
||||||
return
|
return
|
||||||
[=, ¤t_chunk_id, ¤t_chunk_pos]() mutable -> const number {
|
[=, ¤t_chunk_id, ¤t_chunk_pos]() mutable -> const number {
|
||||||
@ -72,10 +80,16 @@ PhyCompareFilterExpr::GetChunkData(FieldId field_id,
|
|||||||
current_chunk_pos = 0;
|
current_chunk_pos = 0;
|
||||||
chunk_data =
|
chunk_data =
|
||||||
segment_->chunk_data<T>(field_id, current_chunk_id).data();
|
segment_->chunk_data<T>(field_id, current_chunk_id).data();
|
||||||
|
chunk_valid_data =
|
||||||
|
segment_->chunk_data<T>(field_id, current_chunk_id)
|
||||||
|
.valid_data();
|
||||||
current_chunk_size =
|
current_chunk_size =
|
||||||
segment_->chunk_size(field_id, current_chunk_id);
|
segment_->chunk_size(field_id, current_chunk_id);
|
||||||
}
|
}
|
||||||
|
if (chunk_valid_data && !chunk_valid_data[current_chunk_pos]) {
|
||||||
|
current_chunk_pos++;
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
return chunk_data[current_chunk_pos++];
|
return chunk_data[current_chunk_pos++];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -103,7 +117,12 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||||||
segment_->chunk_scalar_index<std::string>(
|
segment_->chunk_scalar_index<std::string>(
|
||||||
field_id, current_chunk_id));
|
field_id, current_chunk_id));
|
||||||
}
|
}
|
||||||
return indexing.Reverse_Lookup(current_chunk_pos++);
|
auto raw = indexing.Reverse_Lookup(current_chunk_pos);
|
||||||
|
current_chunk_pos++;
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return raw.value();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -114,6 +133,9 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||||||
auto chunk_data =
|
auto chunk_data =
|
||||||
segment_->chunk_data<std::string>(field_id, current_chunk_id)
|
segment_->chunk_data<std::string>(field_id, current_chunk_id)
|
||||||
.data();
|
.data();
|
||||||
|
auto chunk_valid_data =
|
||||||
|
segment_->chunk_data<std::string>(field_id, current_chunk_id)
|
||||||
|
.valid_data();
|
||||||
auto current_chunk_size =
|
auto current_chunk_size =
|
||||||
segment_->chunk_size(field_id, current_chunk_id);
|
segment_->chunk_size(field_id, current_chunk_id);
|
||||||
return [=,
|
return [=,
|
||||||
@ -126,16 +148,26 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||||||
segment_
|
segment_
|
||||||
->chunk_data<std::string>(field_id, current_chunk_id)
|
->chunk_data<std::string>(field_id, current_chunk_id)
|
||||||
.data();
|
.data();
|
||||||
|
chunk_valid_data =
|
||||||
|
segment_
|
||||||
|
->chunk_data<std::string>(field_id, current_chunk_id)
|
||||||
|
.valid_data();
|
||||||
current_chunk_size =
|
current_chunk_size =
|
||||||
segment_->chunk_size(field_id, current_chunk_id);
|
segment_->chunk_size(field_id, current_chunk_id);
|
||||||
}
|
}
|
||||||
|
if (chunk_valid_data && !chunk_valid_data[current_chunk_pos]) {
|
||||||
|
current_chunk_pos++;
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
return chunk_data[current_chunk_pos++];
|
return chunk_data[current_chunk_pos++];
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
auto chunk_data =
|
auto chunk_data =
|
||||||
segment_->chunk_view<std::string_view>(field_id, current_chunk_id)
|
segment_->chunk_view<std::string_view>(field_id, current_chunk_id)
|
||||||
.first.data();
|
.first.data();
|
||||||
|
auto chunk_valid_data =
|
||||||
|
segment_->chunk_data<std::string_view>(field_id, current_chunk_id)
|
||||||
|
.valid_data();
|
||||||
auto current_chunk_size =
|
auto current_chunk_size =
|
||||||
segment_->chunk_size(field_id, current_chunk_id);
|
segment_->chunk_size(field_id, current_chunk_id);
|
||||||
return [=,
|
return [=,
|
||||||
@ -148,9 +180,17 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||||||
->chunk_view<std::string_view>(
|
->chunk_view<std::string_view>(
|
||||||
field_id, current_chunk_id)
|
field_id, current_chunk_id)
|
||||||
.first.data();
|
.first.data();
|
||||||
|
chunk_valid_data = segment_
|
||||||
|
->chunk_data<std::string_view>(
|
||||||
|
field_id, current_chunk_id)
|
||||||
|
.valid_data();
|
||||||
current_chunk_size =
|
current_chunk_size =
|
||||||
segment_->chunk_size(field_id, current_chunk_id);
|
segment_->chunk_size(field_id, current_chunk_id);
|
||||||
}
|
}
|
||||||
|
if (chunk_valid_data && !chunk_valid_data[current_chunk_pos]) {
|
||||||
|
current_chunk_pos++;
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
return std::string(chunk_data[current_chunk_pos++]);
|
return std::string(chunk_data[current_chunk_pos++]);
|
||||||
};
|
};
|
||||||
@ -203,9 +243,11 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto left = GetChunkData(expr_->left_data_type_,
|
auto left = GetChunkData(expr_->left_data_type_,
|
||||||
expr_->left_field_id_,
|
expr_->left_field_id_,
|
||||||
@ -218,8 +260,15 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||||||
right_current_chunk_id_,
|
right_current_chunk_id_,
|
||||||
right_current_chunk_pos_);
|
right_current_chunk_pos_);
|
||||||
for (int i = 0; i < real_batch_size; ++i) {
|
for (int i = 0; i < real_batch_size; ++i) {
|
||||||
res[i] = boost::apply_visitor(
|
if (!left().has_value() || !right().has_value()) {
|
||||||
milvus::query::Relational<decltype(op)>{}, left(), right());
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res[i] =
|
||||||
|
boost::apply_visitor(milvus::query::Relational<decltype(op)>{},
|
||||||
|
left().value(),
|
||||||
|
right().value());
|
||||||
}
|
}
|
||||||
return res_vec;
|
return res_vec;
|
||||||
} else {
|
} else {
|
||||||
@ -228,9 +277,11 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto left_data_barrier =
|
auto left_data_barrier =
|
||||||
segment_->num_chunk_data(expr_->left_field_id_);
|
segment_->num_chunk_data(expr_->left_field_id_);
|
||||||
@ -255,10 +306,16 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||||||
for (int i = chunk_id == current_chunk_id_ ? current_chunk_pos_ : 0;
|
for (int i = chunk_id == current_chunk_id_ ? current_chunk_pos_ : 0;
|
||||||
i < chunk_size;
|
i < chunk_size;
|
||||||
++i) {
|
++i) {
|
||||||
res[processed_rows++] = boost::apply_visitor(
|
if (!left(i).has_value() || !right(i).has_value()) {
|
||||||
|
res[processed_rows] = false;
|
||||||
|
valid_res[processed_rows] = false;
|
||||||
|
} else {
|
||||||
|
res[processed_rows] = boost::apply_visitor(
|
||||||
milvus::query::Relational<decltype(op)>{},
|
milvus::query::Relational<decltype(op)>{},
|
||||||
left(i),
|
left(i).value(),
|
||||||
right(i));
|
right(i).value());
|
||||||
|
}
|
||||||
|
processed_rows++;
|
||||||
|
|
||||||
if (processed_rows >= batch_size_) {
|
if (processed_rows >= batch_size_) {
|
||||||
current_chunk_id_ = chunk_id;
|
current_chunk_id_ = chunk_id;
|
||||||
@ -280,12 +337,23 @@ PhyCompareFilterExpr::GetChunkData(FieldId field_id,
|
|||||||
auto& indexing = segment_->chunk_scalar_index<T>(field_id, chunk_id);
|
auto& indexing = segment_->chunk_scalar_index<T>(field_id, chunk_id);
|
||||||
if (indexing.HasRawData()) {
|
if (indexing.HasRawData()) {
|
||||||
return [&indexing](int i) -> const number {
|
return [&indexing](int i) -> const number {
|
||||||
return indexing.Reverse_Lookup(i);
|
auto raw = indexing.Reverse_Lookup(i);
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return raw.value();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto chunk_data = segment_->chunk_data<T>(field_id, chunk_id).data();
|
auto chunk_data = segment_->chunk_data<T>(field_id, chunk_id).data();
|
||||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
auto chunk_valid_data =
|
||||||
|
segment_->chunk_data<T>(field_id, chunk_id).valid_data();
|
||||||
|
return [chunk_data, chunk_valid_data](int i) -> const number {
|
||||||
|
if (chunk_valid_data && !chunk_valid_data[i]) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return chunk_data[i];
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -297,8 +365,12 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||||||
auto& indexing =
|
auto& indexing =
|
||||||
segment_->chunk_scalar_index<std::string>(field_id, chunk_id);
|
segment_->chunk_scalar_index<std::string>(field_id, chunk_id);
|
||||||
if (indexing.HasRawData()) {
|
if (indexing.HasRawData()) {
|
||||||
return [&indexing](int i) -> const std::string {
|
return [&indexing](int i) -> const number {
|
||||||
return indexing.Reverse_Lookup(i);
|
auto raw = indexing.Reverse_Lookup(i);
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return raw.value();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -308,12 +380,23 @@ PhyCompareFilterExpr::GetChunkData<std::string>(FieldId field_id,
|
|||||||
.growing_enable_mmap) {
|
.growing_enable_mmap) {
|
||||||
auto chunk_data =
|
auto chunk_data =
|
||||||
segment_->chunk_data<std::string>(field_id, chunk_id).data();
|
segment_->chunk_data<std::string>(field_id, chunk_id).data();
|
||||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
auto chunk_valid_data =
|
||||||
|
segment_->chunk_data<std::string>(field_id, chunk_id).valid_data();
|
||||||
|
return [chunk_data, chunk_valid_data](int i) -> const number {
|
||||||
|
if (chunk_valid_data && !chunk_valid_data[i]) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return chunk_data[i];
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
auto chunk_data =
|
auto chunk_info =
|
||||||
segment_->chunk_view<std::string_view>(field_id, chunk_id)
|
segment_->chunk_view<std::string_view>(field_id, chunk_id);
|
||||||
.first.data();
|
auto chunk_data = chunk_info.first.data();
|
||||||
return [chunk_data](int i) -> const number {
|
auto chunk_valid_data = chunk_info.second.data();
|
||||||
|
return [chunk_data, chunk_valid_data](int i) -> const number {
|
||||||
|
if (chunk_valid_data && !chunk_valid_data[i]) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
return std::string(chunk_data[i]);
|
return std::string(chunk_data[i]);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -450,9 +533,11 @@ PhyCompareFilterExpr::ExecCompareRightType() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto expr_type = expr_->op_type_;
|
auto expr_type = expr_->op_type_;
|
||||||
auto execute_sub_batch = [expr_type](const T* left,
|
auto execute_sub_batch = [expr_type](const T* left,
|
||||||
@ -491,15 +576,14 @@ PhyCompareFilterExpr::ExecCompareRightType() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(OpTypeInvalid,
|
||||||
OpTypeInvalid,
|
fmt::format("unsupported operator type for "
|
||||||
fmt::format(
|
"compare column expr: {}",
|
||||||
"unsupported operator type for compare column expr: {}",
|
|
||||||
expr_type));
|
expr_type));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size =
|
int64_t processed_size =
|
||||||
ProcessBothDataChunks<T, U>(execute_sub_batch, res);
|
ProcessBothDataChunks<T, U>(execute_sub_batch, res, valid_res);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include <fmt/core.h>
|
#include <fmt/core.h>
|
||||||
#include <boost/variant.hpp>
|
#include <boost/variant.hpp>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
@ -29,7 +30,7 @@
|
|||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace exec {
|
namespace exec {
|
||||||
|
|
||||||
using number = boost::variant<bool,
|
using number_type = boost::variant<bool,
|
||||||
int8_t,
|
int8_t,
|
||||||
int16_t,
|
int16_t,
|
||||||
int32_t,
|
int32_t,
|
||||||
@ -37,6 +38,9 @@ using number = boost::variant<bool,
|
|||||||
float,
|
float,
|
||||||
double,
|
double,
|
||||||
std::string>;
|
std::string>;
|
||||||
|
|
||||||
|
using number = std::optional<number_type>;
|
||||||
|
|
||||||
using ChunkDataAccessor = std::function<const number(int)>;
|
using ChunkDataAccessor = std::function<const number(int)>;
|
||||||
using MultipleChunkDataAccessor = std::function<const number()>;
|
using MultipleChunkDataAccessor = std::function<const number()>;
|
||||||
|
|
||||||
@ -264,16 +268,19 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
|
|
||||||
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
||||||
int64_t
|
int64_t
|
||||||
ProcessBothDataChunks(FUNC func, TargetBitmapView res, ValTypes... values) {
|
ProcessBothDataChunks(FUNC func,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ValTypes... values) {
|
||||||
if (segment_->is_chunked()) {
|
if (segment_->is_chunked()) {
|
||||||
return ProcessBothDataChunksForMultipleChunk<T,
|
return ProcessBothDataChunksForMultipleChunk<T,
|
||||||
U,
|
U,
|
||||||
FUNC,
|
FUNC,
|
||||||
ValTypes...>(
|
ValTypes...>(
|
||||||
func, res, values...);
|
func, res, valid_res, values...);
|
||||||
} else {
|
} else {
|
||||||
return ProcessBothDataChunksForSingleChunk<T, U, FUNC, ValTypes...>(
|
return ProcessBothDataChunksForSingleChunk<T, U, FUNC, ValTypes...>(
|
||||||
func, res, values...);
|
func, res, valid_res, values...);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -281,6 +288,7 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
int64_t
|
int64_t
|
||||||
ProcessBothDataChunksForSingleChunk(FUNC func,
|
ProcessBothDataChunksForSingleChunk(FUNC func,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValTypes... values) {
|
ValTypes... values) {
|
||||||
int64_t processed_size = 0;
|
int64_t processed_size = 0;
|
||||||
|
|
||||||
@ -304,6 +312,20 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
const T* left_data = left_chunk.data() + data_pos;
|
const T* left_data = left_chunk.data() + data_pos;
|
||||||
const U* right_data = right_chunk.data() + data_pos;
|
const U* right_data = right_chunk.data() + data_pos;
|
||||||
func(left_data, right_data, size, res + processed_size, values...);
|
func(left_data, right_data, size, res + processed_size, values...);
|
||||||
|
const bool* left_valid_data = left_chunk.valid_data();
|
||||||
|
const bool* right_valid_data = right_chunk.valid_data();
|
||||||
|
// mask with valid_data
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (left_valid_data && !left_valid_data[i + data_pos]) {
|
||||||
|
res[processed_size + i] = false;
|
||||||
|
valid_res[processed_size + i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (right_valid_data && !right_valid_data[i + data_pos]) {
|
||||||
|
res[processed_size + i] = false;
|
||||||
|
valid_res[processed_size + i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
processed_size += size;
|
processed_size += size;
|
||||||
|
|
||||||
if (processed_size >= batch_size_) {
|
if (processed_size >= batch_size_) {
|
||||||
@ -320,6 +342,7 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
int64_t
|
int64_t
|
||||||
ProcessBothDataChunksForMultipleChunk(FUNC func,
|
ProcessBothDataChunksForMultipleChunk(FUNC func,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValTypes... values) {
|
ValTypes... values) {
|
||||||
int64_t processed_size = 0;
|
int64_t processed_size = 0;
|
||||||
|
|
||||||
@ -347,6 +370,20 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
const T* left_data = left_chunk.data() + data_pos;
|
const T* left_data = left_chunk.data() + data_pos;
|
||||||
const U* right_data = right_chunk.data() + data_pos;
|
const U* right_data = right_chunk.data() + data_pos;
|
||||||
func(left_data, right_data, size, res + processed_size, values...);
|
func(left_data, right_data, size, res + processed_size, values...);
|
||||||
|
const bool* left_valid_data = left_chunk.valid_data();
|
||||||
|
const bool* right_valid_data = right_chunk.valid_data();
|
||||||
|
// mask with valid_data
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (left_valid_data && !left_valid_data[i + data_pos]) {
|
||||||
|
res[processed_size + i] = false;
|
||||||
|
valid_res[processed_size + i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (right_valid_data && !right_valid_data[i + data_pos]) {
|
||||||
|
res[processed_size + i] = false;
|
||||||
|
valid_res[processed_size + i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
processed_size += size;
|
processed_size += size;
|
||||||
|
|
||||||
if (processed_size >= batch_size_) {
|
if (processed_size >= batch_size_) {
|
||||||
|
|||||||
@ -44,22 +44,30 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment() {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
auto execute_sub_batch = [](const milvus::Json* data,
|
auto execute_sub_batch = [](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer) {
|
const std::string& pointer) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = data[i].exist(pointer);
|
res[i] = data[i].exist(pointer);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<Json>(
|
int64_t processed_size = ProcessDataChunks<Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -248,6 +249,7 @@ class SegmentExpr : public Expr {
|
|||||||
FUNC func,
|
FUNC func,
|
||||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValTypes... values) {
|
ValTypes... values) {
|
||||||
// For sealed segment, only single chunk
|
// For sealed segment, only single chunk
|
||||||
Assert(num_data_chunk_ == 1);
|
Assert(num_data_chunk_ == 1);
|
||||||
@ -256,13 +258,16 @@ class SegmentExpr : public Expr {
|
|||||||
|
|
||||||
auto& skip_index = segment_->GetSkipIndex();
|
auto& skip_index = segment_->GetSkipIndex();
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||||
auto data_vec =
|
auto views_info = segment_->get_batch_views<T>(
|
||||||
segment_
|
field_id_, 0, current_data_chunk_pos_, need_size);
|
||||||
->get_batch_views<T>(
|
// first is the raw data, second is valid_data
|
||||||
field_id_, 0, current_data_chunk_pos_, need_size)
|
// use valid_data to see if raw data is null
|
||||||
.first;
|
func(views_info.first.data(),
|
||||||
|
views_info.second.data(),
|
||||||
func(data_vec.data(), need_size, res, values...);
|
need_size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
values...);
|
||||||
}
|
}
|
||||||
current_data_chunk_pos_ += need_size;
|
current_data_chunk_pos_ += need_size;
|
||||||
return need_size;
|
return need_size;
|
||||||
@ -274,6 +279,7 @@ class SegmentExpr : public Expr {
|
|||||||
FUNC func,
|
FUNC func,
|
||||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValTypes... values) {
|
ValTypes... values) {
|
||||||
int64_t processed_size = 0;
|
int64_t processed_size = 0;
|
||||||
|
|
||||||
@ -281,7 +287,7 @@ class SegmentExpr : public Expr {
|
|||||||
std::is_same_v<T, Json>) {
|
std::is_same_v<T, Json>) {
|
||||||
if (segment_->type() == SegmentType::Sealed) {
|
if (segment_->type() == SegmentType::Sealed) {
|
||||||
return ProcessChunkForSealedSeg<T>(
|
return ProcessChunkForSealedSeg<T>(
|
||||||
func, skip_func, res, values...);
|
func, skip_func, res, valid_res, values...);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,7 +309,16 @@ class SegmentExpr : public Expr {
|
|||||||
if (!skip_func || !skip_func(skip_index, field_id_, i)) {
|
if (!skip_func || !skip_func(skip_index, field_id_, i)) {
|
||||||
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
||||||
const T* data = chunk.data() + data_pos;
|
const T* data = chunk.data() + data_pos;
|
||||||
func(data, size, res + processed_size, values...);
|
const bool* valid_data = chunk.valid_data();
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
valid_data += data_pos;
|
||||||
|
}
|
||||||
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
|
values...);
|
||||||
}
|
}
|
||||||
|
|
||||||
processed_size += size;
|
processed_size += size;
|
||||||
@ -322,6 +337,7 @@ class SegmentExpr : public Expr {
|
|||||||
FUNC func,
|
FUNC func,
|
||||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValTypes... values) {
|
ValTypes... values) {
|
||||||
int64_t processed_size = 0;
|
int64_t processed_size = 0;
|
||||||
|
|
||||||
@ -356,13 +372,21 @@ class SegmentExpr : public Expr {
|
|||||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||||
std::is_same_v<T, Json>) {
|
std::is_same_v<T, Json>) {
|
||||||
if (segment_->type() == SegmentType::Sealed) {
|
if (segment_->type() == SegmentType::Sealed) {
|
||||||
|
// first is the raw data, second is valid_data
|
||||||
|
// use valid_data to see if raw data is null
|
||||||
auto data_vec = segment_
|
auto data_vec = segment_
|
||||||
->get_batch_views<T>(
|
->get_batch_views<T>(
|
||||||
field_id_, i, data_pos, size)
|
field_id_, i, data_pos, size)
|
||||||
.first;
|
.first;
|
||||||
|
auto valid_data = segment_
|
||||||
|
->get_batch_views<T>(
|
||||||
|
field_id_, i, data_pos, size)
|
||||||
|
.second;
|
||||||
func(data_vec.data(),
|
func(data_vec.data(),
|
||||||
|
valid_data.data(),
|
||||||
size,
|
size,
|
||||||
res + processed_size,
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
values...);
|
values...);
|
||||||
is_seal = true;
|
is_seal = true;
|
||||||
}
|
}
|
||||||
@ -370,7 +394,16 @@ class SegmentExpr : public Expr {
|
|||||||
if (!is_seal) {
|
if (!is_seal) {
|
||||||
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
||||||
const T* data = chunk.data() + data_pos;
|
const T* data = chunk.data() + data_pos;
|
||||||
func(data, size, res + processed_size, values...);
|
const bool* valid_data = chunk.valid_data();
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
valid_data += data_pos;
|
||||||
|
}
|
||||||
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
|
values...);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -403,8 +436,10 @@ class SegmentExpr : public Expr {
|
|||||||
|
|
||||||
int
|
int
|
||||||
ProcessIndexOneChunk(TargetBitmap& result,
|
ProcessIndexOneChunk(TargetBitmap& result,
|
||||||
|
TargetBitmap& valid_result,
|
||||||
size_t chunk_id,
|
size_t chunk_id,
|
||||||
const TargetBitmap& chunk_res,
|
const TargetBitmap& chunk_res,
|
||||||
|
const TargetBitmap& chunk_valid_res,
|
||||||
int processed_rows) {
|
int processed_rows) {
|
||||||
auto data_pos =
|
auto data_pos =
|
||||||
chunk_id == current_index_chunk_ ? current_index_chunk_pos_ : 0;
|
chunk_id == current_index_chunk_ ? current_index_chunk_pos_ : 0;
|
||||||
@ -416,33 +451,41 @@ class SegmentExpr : public Expr {
|
|||||||
// chunk_res.begin() + data_pos,
|
// chunk_res.begin() + data_pos,
|
||||||
// chunk_res.begin() + data_pos + size);
|
// chunk_res.begin() + data_pos + size);
|
||||||
result.append(chunk_res, data_pos, size);
|
result.append(chunk_res, data_pos, size);
|
||||||
|
valid_result.append(chunk_valid_res, data_pos, size);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename FUNC, typename... ValTypes>
|
template <typename T, typename FUNC, typename... ValTypes>
|
||||||
TargetBitmap
|
VectorPtr
|
||||||
ProcessIndexChunks(FUNC func, ValTypes... values) {
|
ProcessIndexChunks(FUNC func, ValTypes... values) {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
using Index = index::ScalarIndex<IndexInnerType>;
|
using Index = index::ScalarIndex<IndexInnerType>;
|
||||||
TargetBitmap result;
|
TargetBitmap result;
|
||||||
|
TargetBitmap valid_result;
|
||||||
int processed_rows = 0;
|
int processed_rows = 0;
|
||||||
|
|
||||||
for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
|
for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
|
||||||
// This cache result help getting result for every batch loop.
|
// This cache result help getting result for every batch loop.
|
||||||
// It avoids indexing execute for evevy batch because indexing
|
// It avoids indexing execute for every batch because indexing
|
||||||
// executing costs quite much time.
|
// executing costs quite much time.
|
||||||
if (cached_index_chunk_id_ != i) {
|
if (cached_index_chunk_id_ != i) {
|
||||||
const Index& index =
|
const Index& index =
|
||||||
segment_->chunk_scalar_index<IndexInnerType>(field_id_, i);
|
segment_->chunk_scalar_index<IndexInnerType>(field_id_, i);
|
||||||
auto* index_ptr = const_cast<Index*>(&index);
|
auto* index_ptr = const_cast<Index*>(&index);
|
||||||
cached_index_chunk_res_ = std::move(func(index_ptr, values...));
|
cached_index_chunk_res_ = std::move(func(index_ptr, values...));
|
||||||
|
auto valid_result = index_ptr->IsNotNull();
|
||||||
|
cached_index_chunk_valid_res_ = std::move(valid_result);
|
||||||
cached_index_chunk_id_ = i;
|
cached_index_chunk_id_ = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto size = ProcessIndexOneChunk(
|
auto size = ProcessIndexOneChunk(result,
|
||||||
result, i, cached_index_chunk_res_, processed_rows);
|
valid_result,
|
||||||
|
i,
|
||||||
|
cached_index_chunk_res_,
|
||||||
|
cached_index_chunk_valid_res_,
|
||||||
|
processed_rows);
|
||||||
|
|
||||||
if (processed_rows + size >= batch_size_) {
|
if (processed_rows + size >= batch_size_) {
|
||||||
current_index_chunk_ = i;
|
current_index_chunk_ = i;
|
||||||
@ -454,23 +497,136 @@ class SegmentExpr : public Expr {
|
|||||||
processed_rows += size;
|
processed_rows += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return std::make_shared<ColumnVector>(std::move(result),
|
||||||
|
std::move(valid_result));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
TargetBitmap
|
||||||
|
ProcessChunksForValid(bool use_index) {
|
||||||
|
if (use_index) {
|
||||||
|
return ProcessIndexChunksForValid<T>();
|
||||||
|
} else {
|
||||||
|
return ProcessDataChunksForValid<T>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
TargetBitmap
|
||||||
|
ProcessDataChunksForValid() {
|
||||||
|
TargetBitmap valid_result(batch_size_);
|
||||||
|
valid_result.set();
|
||||||
|
int64_t processed_size = 0;
|
||||||
|
for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) {
|
||||||
|
auto data_pos =
|
||||||
|
(i == current_data_chunk_) ? current_data_chunk_pos_ : 0;
|
||||||
|
auto size =
|
||||||
|
(i == (num_data_chunk_ - 1))
|
||||||
|
? (segment_->type() == SegmentType::Growing
|
||||||
|
? (active_count_ % size_per_chunk_ == 0
|
||||||
|
? size_per_chunk_ - data_pos
|
||||||
|
: active_count_ % size_per_chunk_ - data_pos)
|
||||||
|
: active_count_ - data_pos)
|
||||||
|
: size_per_chunk_ - data_pos;
|
||||||
|
|
||||||
|
size = std::min(size, batch_size_ - processed_size);
|
||||||
|
|
||||||
|
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
||||||
|
const bool* valid_data = chunk.valid_data();
|
||||||
|
if (valid_data == nullptr) {
|
||||||
|
return valid_result;
|
||||||
|
}
|
||||||
|
valid_data += data_pos;
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
valid_result[i + data_pos] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
processed_size += size;
|
||||||
|
if (processed_size >= batch_size_) {
|
||||||
|
current_data_chunk_ = i;
|
||||||
|
current_data_chunk_pos_ = data_pos + size;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return valid_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ProcessIndexOneChunkForValid(TargetBitmap& valid_result,
|
||||||
|
size_t chunk_id,
|
||||||
|
const TargetBitmap& chunk_valid_res,
|
||||||
|
int processed_rows) {
|
||||||
|
auto data_pos =
|
||||||
|
chunk_id == current_index_chunk_ ? current_index_chunk_pos_ : 0;
|
||||||
|
auto size = std::min(
|
||||||
|
std::min(size_per_chunk_ - data_pos, batch_size_ - processed_rows),
|
||||||
|
int64_t(chunk_valid_res.size()));
|
||||||
|
|
||||||
|
valid_result.append(chunk_valid_res, data_pos, size);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
TargetBitmap
|
||||||
|
ProcessIndexChunksForValid() {
|
||||||
|
typedef std::
|
||||||
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
|
IndexInnerType;
|
||||||
|
using Index = index::ScalarIndex<IndexInnerType>;
|
||||||
|
int processed_rows = 0;
|
||||||
|
TargetBitmap valid_result;
|
||||||
|
valid_result.set();
|
||||||
|
|
||||||
|
for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
|
||||||
|
// This cache result help getting result for every batch loop.
|
||||||
|
// It avoids indexing execute for every batch because indexing
|
||||||
|
// executing costs quite much time.
|
||||||
|
if (cached_index_chunk_id_ != i) {
|
||||||
|
const Index& index =
|
||||||
|
segment_->chunk_scalar_index<IndexInnerType>(field_id_, i);
|
||||||
|
auto* index_ptr = const_cast<Index*>(&index);
|
||||||
|
auto execute_sub_batch = [](Index* index_ptr) {
|
||||||
|
TargetBitmap res = index_ptr->IsNotNull();
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
cached_index_chunk_valid_res_ = execute_sub_batch(index_ptr);
|
||||||
|
cached_index_chunk_id_ = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto size = ProcessIndexOneChunkForValid(
|
||||||
|
valid_result, i, cached_index_chunk_valid_res_, processed_rows);
|
||||||
|
|
||||||
|
if (processed_rows + size >= batch_size_) {
|
||||||
|
current_index_chunk_ = i;
|
||||||
|
current_index_chunk_pos_ = i == current_index_chunk_
|
||||||
|
? current_index_chunk_pos_ + size
|
||||||
|
: size;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
processed_rows += size;
|
||||||
|
}
|
||||||
|
return valid_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename FUNC, typename... ValTypes>
|
template <typename FUNC, typename... ValTypes>
|
||||||
TargetBitmap
|
VectorPtr
|
||||||
ProcessTextMatchIndex(FUNC func, ValTypes... values) {
|
ProcessTextMatchIndex(FUNC func, ValTypes... values) {
|
||||||
TargetBitmap result;
|
TargetBitmap result;
|
||||||
|
TargetBitmap valid_result;
|
||||||
|
|
||||||
if (cached_match_res_ == nullptr) {
|
if (cached_match_res_ == nullptr) {
|
||||||
auto index = segment_->GetTextIndex(field_id_);
|
auto index = segment_->GetTextIndex(field_id_);
|
||||||
auto res = std::move(func(index, values...));
|
auto res = std::move(func(index, values...));
|
||||||
|
auto valid_res = index->IsNotNull();
|
||||||
cached_match_res_ = std::make_shared<TargetBitmap>(std::move(res));
|
cached_match_res_ = std::make_shared<TargetBitmap>(std::move(res));
|
||||||
|
cached_index_chunk_valid_res_ = std::move(valid_res);
|
||||||
if (cached_match_res_->size() < active_count_) {
|
if (cached_match_res_->size() < active_count_) {
|
||||||
// some entities are not visible in inverted index.
|
// some entities are not visible in inverted index.
|
||||||
// only happend on growing segment.
|
// only happend on growing segment.
|
||||||
TargetBitmap tail(active_count_ - cached_match_res_->size());
|
TargetBitmap tail(active_count_ - cached_match_res_->size());
|
||||||
cached_match_res_->append(tail);
|
cached_match_res_->append(tail);
|
||||||
|
cached_index_chunk_valid_res_.append(tail);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -481,9 +637,13 @@ class SegmentExpr : public Expr {
|
|||||||
: batch_size_;
|
: batch_size_;
|
||||||
result.append(
|
result.append(
|
||||||
*cached_match_res_, current_data_chunk_pos_, real_batch_size);
|
*cached_match_res_, current_data_chunk_pos_, real_batch_size);
|
||||||
|
valid_result.append(cached_index_chunk_valid_res_,
|
||||||
|
current_data_chunk_pos_,
|
||||||
|
real_batch_size);
|
||||||
current_data_chunk_pos_ += real_batch_size;
|
current_data_chunk_pos_ += real_batch_size;
|
||||||
|
|
||||||
return result;
|
return std::make_shared<ColumnVector>(std::move(result),
|
||||||
|
std::move(valid_result));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename FUNC, typename... ValTypes>
|
template <typename T, typename FUNC, typename... ValTypes>
|
||||||
@ -581,6 +741,8 @@ class SegmentExpr : public Expr {
|
|||||||
// Cache for index scan to avoid search index every batch
|
// Cache for index scan to avoid search index every batch
|
||||||
int64_t cached_index_chunk_id_{-1};
|
int64_t cached_index_chunk_id_{-1};
|
||||||
TargetBitmap cached_index_chunk_res_{};
|
TargetBitmap cached_index_chunk_res_{};
|
||||||
|
// Cache for chunk valid res.
|
||||||
|
TargetBitmap cached_index_chunk_valid_res_{};
|
||||||
|
|
||||||
// Cache for text match.
|
// Cache for text match.
|
||||||
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
|
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
|
||||||
|
|||||||
@ -15,6 +15,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "JsonContainsExpr.h"
|
#include "JsonContainsExpr.h"
|
||||||
|
#include <utility>
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
@ -173,17 +174,21 @@ PhyJsonContainsFilterExpr::ExecArrayContains() {
|
|||||||
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
AssertInfo(expr_->column_.nested_path_.size() == 0,
|
||||||
"[ExecArrayContains]nested path must be null");
|
"[ExecArrayContains]nested path must be null");
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
std::unordered_set<GetType> elements;
|
std::unordered_set<GetType> elements;
|
||||||
for (auto const& element : expr_->vals_) {
|
for (auto const& element : expr_->vals_) {
|
||||||
elements.insert(GetValueFromProto<GetType>(element));
|
elements.insert(GetValueFromProto<GetType>(element));
|
||||||
}
|
}
|
||||||
auto execute_sub_batch = [](const milvus::ArrayView* data,
|
auto execute_sub_batch = [](const milvus::ArrayView* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::unordered_set<GetType>& elements) {
|
const std::unordered_set<GetType>& elements) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
const auto& array = data[i];
|
const auto& array = data[i];
|
||||||
@ -195,12 +200,16 @@ PhyJsonContainsFilterExpr::ExecArrayContains() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, elements);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -221,9 +230,11 @@ PhyJsonContainsFilterExpr::ExecJsonContains() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
std::unordered_set<GetType> elements;
|
std::unordered_set<GetType> elements;
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
@ -231,8 +242,10 @@ PhyJsonContainsFilterExpr::ExecJsonContains() {
|
|||||||
elements.insert(GetValueFromProto<GetType>(element));
|
elements.insert(GetValueFromProto<GetType>(element));
|
||||||
}
|
}
|
||||||
auto execute_sub_batch = [](const milvus::Json* data,
|
auto execute_sub_batch = [](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer,
|
const std::string& pointer,
|
||||||
const std::unordered_set<GetType>& elements) {
|
const std::unordered_set<GetType>& elements) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
@ -253,12 +266,16 @@ PhyJsonContainsFilterExpr::ExecJsonContains() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<Json>(
|
int64_t processed_size = ProcessDataChunks<Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -274,9 +291,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
std::vector<proto::plan::Array> elements;
|
std::vector<proto::plan::Array> elements;
|
||||||
@ -285,8 +304,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() {
|
|||||||
}
|
}
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[](const milvus::Json* data,
|
[](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer,
|
const std::string& pointer,
|
||||||
const std::vector<proto::plan::Array>& elements) {
|
const std::vector<proto::plan::Array>& elements) {
|
||||||
auto executor = [&](size_t i) -> bool {
|
auto executor = [&](size_t i) -> bool {
|
||||||
@ -316,12 +337,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -344,9 +369,11 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
std::unordered_set<GetType> elements;
|
std::unordered_set<GetType> elements;
|
||||||
for (auto const& element : expr_->vals_) {
|
for (auto const& element : expr_->vals_) {
|
||||||
@ -354,8 +381,10 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto execute_sub_batch = [](const milvus::ArrayView* data,
|
auto execute_sub_batch = [](const milvus::ArrayView* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::unordered_set<GetType>& elements) {
|
const std::unordered_set<GetType>& elements) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
std::unordered_set<GetType> tmp_elements(elements);
|
std::unordered_set<GetType> tmp_elements(elements);
|
||||||
@ -369,12 +398,16 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll() {
|
|||||||
return tmp_elements.size() == 0;
|
return tmp_elements.size() == 0;
|
||||||
};
|
};
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, elements);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, elements);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -395,9 +428,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
std::unordered_set<GetType> elements;
|
std::unordered_set<GetType> elements;
|
||||||
@ -406,8 +441,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto execute_sub_batch = [](const milvus::Json* data,
|
auto execute_sub_batch = [](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer,
|
const std::string& pointer,
|
||||||
const std::unordered_set<GetType>& elements) {
|
const std::unordered_set<GetType>& elements) {
|
||||||
auto executor = [&](const size_t i) -> bool {
|
auto executor = [&](const size_t i) -> bool {
|
||||||
@ -431,12 +468,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll() {
|
|||||||
return tmp_elements.size() == 0;
|
return tmp_elements.size() == 0;
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<Json>(
|
int64_t processed_size = ProcessDataChunks<Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -451,9 +492,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -467,8 +510,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||||||
|
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[](const milvus::Json* data,
|
[](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer,
|
const std::string& pointer,
|
||||||
const std::vector<proto::plan::GenericValue>& elements,
|
const std::vector<proto::plan::GenericValue>& elements,
|
||||||
const std::unordered_set<int> elements_index) {
|
const std::unordered_set<int> elements_index) {
|
||||||
@ -553,6 +598,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||||||
return tmp_elements_index.size() == 0;
|
return tmp_elements_index.size() == 0;
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -560,6 +609,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType() {
|
|||||||
int64_t processed_size = ProcessDataChunks<Json>(execute_sub_batch,
|
int64_t processed_size = ProcessDataChunks<Json>(execute_sub_batch,
|
||||||
std::nullptr_t{},
|
std::nullptr_t{},
|
||||||
res,
|
res,
|
||||||
|
valid_res,
|
||||||
pointer,
|
pointer,
|
||||||
elements,
|
elements,
|
||||||
elements_index);
|
elements_index);
|
||||||
@ -578,9 +628,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -590,8 +642,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() {
|
|||||||
}
|
}
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[](const milvus::Json* data,
|
[](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer,
|
const std::string& pointer,
|
||||||
const std::vector<proto::plan::Array>& elements) {
|
const std::vector<proto::plan::Array>& elements) {
|
||||||
auto executor = [&](const size_t i) {
|
auto executor = [&](const size_t i) {
|
||||||
@ -625,12 +679,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray() {
|
|||||||
return exist_elements_index.size() == elements.size();
|
return exist_elements_index.size() == elements.size();
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<Json>(
|
int64_t processed_size = ProcessDataChunks<Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -646,9 +704,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -662,8 +722,10 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() {
|
|||||||
|
|
||||||
auto execute_sub_batch =
|
auto execute_sub_batch =
|
||||||
[](const milvus::Json* data,
|
[](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string& pointer,
|
const std::string& pointer,
|
||||||
const std::vector<proto::plan::GenericValue>& elements) {
|
const std::vector<proto::plan::GenericValue>& elements) {
|
||||||
auto executor = [&](const size_t i) {
|
auto executor = [&](const size_t i) {
|
||||||
@ -739,12 +801,16 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<Json>(
|
int64_t processed_size = ProcessDataChunks<Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer, elements);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, elements);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -832,12 +898,12 @@ PhyJsonContainsFilterExpr::ExecArrayContainsForIndexSegmentImpl() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
auto res = ProcessIndexChunks<GetType>(execute_sub_batch, elems);
|
auto res = ProcessIndexChunks<GetType>(execute_sub_batch, elems);
|
||||||
AssertInfo(res.size() == real_batch_size,
|
AssertInfo(res->size() == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
res.size(),
|
res->size(),
|
||||||
real_batch_size);
|
real_batch_size);
|
||||||
return std::make_shared<ColumnVector>(std::move(res));
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
} //namespace exec
|
} //namespace exec
|
||||||
|
|||||||
@ -45,6 +45,10 @@ PhyLogicalBinaryExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
"unsupported logical operator: {}",
|
"unsupported logical operator: {}",
|
||||||
expr_->GetOpTypeString());
|
expr_->GetOpTypeString());
|
||||||
}
|
}
|
||||||
|
TargetBitmapView lvalid_view(lflat->GetValidRawData(), size);
|
||||||
|
TargetBitmapView rvalid_view(rflat->GetValidRawData(), size);
|
||||||
|
LogicalElementFunc<LogicalOpType::Or> func;
|
||||||
|
func(lvalid_view, rvalid_view, size);
|
||||||
result = std::move(left);
|
result = std::move(left);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -30,6 +30,9 @@ PhyLogicalUnaryExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
auto flat_vec = GetColumnVector(result);
|
auto flat_vec = GetColumnVector(result);
|
||||||
TargetBitmapView data(flat_vec->GetRawData(), flat_vec->size());
|
TargetBitmapView data(flat_vec->GetRawData(), flat_vec->size());
|
||||||
data.flip();
|
data.flip();
|
||||||
|
TargetBitmapView valid_data(flat_vec->GetValidRawData(),
|
||||||
|
flat_vec->size());
|
||||||
|
data &= valid_data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -15,6 +15,8 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "TermExpr.h"
|
#include "TermExpr.h"
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
#include "query/Utils.h"
|
#include "query/Utils.h"
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace exec {
|
namespace exec {
|
||||||
@ -199,9 +201,12 @@ PhyTermFilterExpr::ExecPkTermImpl() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
// pk valid_bitmap is always all true
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
for (size_t i = 0; i < real_batch_size; ++i) {
|
for (size_t i = 0; i < real_batch_size; ++i) {
|
||||||
res[i] = cached_bits_[current_data_chunk_pos_++];
|
res[i] = cached_bits_[current_data_chunk_pos_++];
|
||||||
@ -241,17 +246,21 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
AssertInfo(expr_->vals_.size() == 1,
|
AssertInfo(expr_->vals_.size() == 1,
|
||||||
"element length in json array must be one");
|
"element length in json array must be one");
|
||||||
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
||||||
|
|
||||||
auto execute_sub_batch = [](const ArrayView* data,
|
auto execute_sub_batch = [](const ArrayView* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const ValueType& target_val) {
|
const ValueType& target_val) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
for (int i = 0; i < data[i].length(); i++) {
|
for (int i = 0; i < data[i].length(); i++) {
|
||||||
@ -263,12 +272,16 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
executor(i);
|
executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, target_val);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -289,9 +302,11 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
int index = -1;
|
int index = -1;
|
||||||
if (expr_->column_.nested_path_.size() > 0) {
|
if (expr_->column_.nested_path_.size() > 0) {
|
||||||
@ -309,12 +324,18 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto execute_sub_batch = [](const ArrayView* data,
|
auto execute_sub_batch = [](const ArrayView* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
int index,
|
int index,
|
||||||
const std::unordered_set<ValueType>& term_set) {
|
const std::unordered_set<ValueType>& term_set) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
if (index >= data[i].length()) {
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (term_set.empty() || index >= data[i].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -324,7 +345,7 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, index, term_set);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, index, term_set);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -344,9 +365,11 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
AssertInfo(expr_->vals_.size() == 1,
|
AssertInfo(expr_->vals_.size() == 1,
|
||||||
"element length in json array must be one");
|
"element length in json array must be one");
|
||||||
@ -354,8 +377,10 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
auto execute_sub_batch = [](const Json* data,
|
auto execute_sub_batch = [](const Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string pointer,
|
const std::string pointer,
|
||||||
const ValueType& target_val) {
|
const ValueType& target_val) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
@ -375,11 +400,15 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer, val);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -399,9 +428,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
std::unordered_set<ValueType> term_set;
|
std::unordered_set<ValueType> term_set;
|
||||||
@ -416,8 +447,10 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto execute_sub_batch = [](const Json* data,
|
auto execute_sub_batch = [](const Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::string pointer,
|
const std::string pointer,
|
||||||
const std::unordered_set<ValueType>& terms) {
|
const std::unordered_set<ValueType>& terms) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
@ -439,11 +472,19 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||||||
return terms.find(ValueType(x.value())) != terms.end();
|
return terms.find(ValueType(x.value())) != terms.end();
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (terms.empty()) {
|
||||||
|
res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, pointer, term_set);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, term_set);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -489,12 +530,12 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() {
|
|||||||
return func(index_ptr, vals.size(), vals.data());
|
return func(index_ptr, vals.size(), vals.data());
|
||||||
};
|
};
|
||||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, vals);
|
auto res = ProcessIndexChunks<T>(execute_sub_batch, vals);
|
||||||
AssertInfo(res.size() == real_batch_size,
|
AssertInfo(res->size() == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
res.size(),
|
res->size(),
|
||||||
real_batch_size);
|
real_batch_size);
|
||||||
return std::make_shared<ColumnVector>(std::move(res));
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -516,7 +557,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
|
|||||||
return std::move(func(index_ptr, vals.size(), (bool*)vals.data()));
|
return std::move(func(index_ptr, vals.size(), (bool*)vals.data()));
|
||||||
};
|
};
|
||||||
auto res = ProcessIndexChunks<bool>(execute_sub_batch, vals);
|
auto res = ProcessIndexChunks<bool>(execute_sub_batch, vals);
|
||||||
return std::make_shared<ColumnVector>(std::move(res));
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -527,9 +568,11 @@ PhyTermFilterExpr::ExecVisitorImplForData() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
std::vector<T> vals;
|
std::vector<T> vals;
|
||||||
for (auto& val : expr_->vals_) {
|
for (auto& val : expr_->vals_) {
|
||||||
@ -542,16 +585,22 @@ PhyTermFilterExpr::ExecVisitorImplForData() {
|
|||||||
}
|
}
|
||||||
std::unordered_set<T> vals_set(vals.begin(), vals.end());
|
std::unordered_set<T> vals_set(vals.begin(), vals.end());
|
||||||
auto execute_sub_batch = [](const T* data,
|
auto execute_sub_batch = [](const T* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
const std::unordered_set<T>& vals) {
|
const std::unordered_set<T>& vals) {
|
||||||
TermElementFuncSet<T> func;
|
TermElementFuncSet<T> func;
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
res[i] = func(vals, data[i]);
|
res[i] = func(vals, data[i]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<T>(
|
int64_t processed_size = ProcessDataChunks<T>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, vals_set);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -15,6 +15,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "UnaryExpr.h"
|
#include "UnaryExpr.h"
|
||||||
|
#include <optional>
|
||||||
#include "common/Json.h"
|
#include "common/Json.h"
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
@ -260,9 +261,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
ValueType val = GetValueFromProto<ValueType>(expr_->val_);
|
ValueType val = GetValueFromProto<ValueType>(expr_->val_);
|
||||||
auto op_type = expr_->op_type_;
|
auto op_type = expr_->op_type_;
|
||||||
@ -271,48 +274,50 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||||
}
|
}
|
||||||
auto execute_sub_batch = [op_type](const milvus::ArrayView* data,
|
auto execute_sub_batch = [op_type](const milvus::ArrayView* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValueType val,
|
ValueType val,
|
||||||
int index) {
|
int index) {
|
||||||
switch (op_type) {
|
switch (op_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::GreaterThan>
|
UnaryElementFuncForArray<ValueType, proto::plan::GreaterThan>
|
||||||
func;
|
func;
|
||||||
func(data, size, val, index, res);
|
func(data, valid_data, size, val, index, res, valid_res);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::GreaterEqual>
|
UnaryElementFuncForArray<ValueType, proto::plan::GreaterEqual>
|
||||||
func;
|
func;
|
||||||
func(data, size, val, index, res);
|
func(data, valid_data, size, val, index, res, valid_res);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::LessThan> func;
|
UnaryElementFuncForArray<ValueType, proto::plan::LessThan> func;
|
||||||
func(data, size, val, index, res);
|
func(data, valid_data, size, val, index, res, valid_res);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::LessEqual>
|
UnaryElementFuncForArray<ValueType, proto::plan::LessEqual>
|
||||||
func;
|
func;
|
||||||
func(data, size, val, index, res);
|
func(data, valid_data, size, val, index, res, valid_res);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::Equal> func;
|
UnaryElementFuncForArray<ValueType, proto::plan::Equal> func;
|
||||||
func(data, size, val, index, res);
|
func(data, valid_data, size, val, index, res, valid_res);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::NotEqual> func;
|
UnaryElementFuncForArray<ValueType, proto::plan::NotEqual> func;
|
||||||
func(data, size, val, index, res);
|
func(data, valid_data, size, val, index, res, valid_res);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::PrefixMatch: {
|
case proto::plan::PrefixMatch: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::PrefixMatch>
|
UnaryElementFuncForArray<ValueType, proto::plan::PrefixMatch>
|
||||||
func;
|
func;
|
||||||
func(data, size, val, index, res);
|
func(data, valid_data, size, val, index, res, valid_res);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -323,7 +328,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, val, index);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -432,14 +437,14 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
|||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
});
|
});
|
||||||
AssertInfo(batch_res.size() == real_batch_size,
|
AssertInfo(batch_res->size() == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
batch_res.size(),
|
batch_res->size(),
|
||||||
real_batch_size);
|
real_batch_size);
|
||||||
|
|
||||||
// return the result.
|
// return the result.
|
||||||
return std::make_shared<ColumnVector>(std::move(batch_res));
|
return batch_res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
@ -455,9 +460,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExprValueType val = GetValueFromProto<ExprValueType>(expr_->val_);
|
ExprValueType val = GetValueFromProto<ExprValueType>(expr_->val_);
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
auto op_type = expr_->op_type_;
|
auto op_type = expr_->op_type_;
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
@ -492,12 +499,18 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
auto execute_sub_batch = [op_type, pointer](const milvus::Json* data,
|
auto execute_sub_batch = [op_type, pointer](const milvus::Json* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ExprValueType val) {
|
ExprValueType val) {
|
||||||
switch (op_type) {
|
switch (op_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -508,6 +521,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -518,6 +535,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -528,6 +549,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -538,6 +563,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -554,6 +583,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -570,6 +603,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::PrefixMatch: {
|
case proto::plan::PrefixMatch: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -584,6 +621,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
auto regex_pattern = translator(val);
|
auto regex_pattern = translator(val);
|
||||||
RegexMatcher matcher(regex_pattern);
|
RegexMatcher matcher(regex_pattern);
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
@ -601,7 +642,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
execute_sub_batch, std::nullptr_t{}, res, val);
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, val);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -693,12 +734,12 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||||||
};
|
};
|
||||||
auto val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
auto val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||||
auto res = ProcessIndexChunks<T>(execute_sub_batch, val);
|
auto res = ProcessIndexChunks<T>(execute_sub_batch, val);
|
||||||
AssertInfo(res.size() == real_batch_size,
|
AssertInfo(res->size() == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
res.size(),
|
res->size(),
|
||||||
real_batch_size);
|
real_batch_size);
|
||||||
return std::make_shared<ColumnVector>(std::move(res));
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -720,10 +761,11 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
|||||||
switch (expr_->op_type_) {
|
switch (expr_->op_type_) {
|
||||||
case proto::plan::GreaterThan:
|
case proto::plan::GreaterThan:
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
|
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
TargetBitmap(batch_size));
|
TargetBitmap(batch_size), std::move(valid_res));
|
||||||
cached_overflow_res_ = res_vec;
|
|
||||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||||
|
cached_overflow_res_ = res_vec;
|
||||||
|
|
||||||
if (milvus::query::lt_lb<T>(val)) {
|
if (milvus::query::lt_lb<T>(val)) {
|
||||||
res.set();
|
res.set();
|
||||||
@ -733,10 +775,11 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
|||||||
}
|
}
|
||||||
case proto::plan::LessThan:
|
case proto::plan::LessThan:
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
|
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
TargetBitmap(batch_size));
|
TargetBitmap(batch_size), std::move(valid_res));
|
||||||
cached_overflow_res_ = res_vec;
|
|
||||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||||
|
cached_overflow_res_ = res_vec;
|
||||||
|
|
||||||
if (milvus::query::gt_ub<T>(val)) {
|
if (milvus::query::gt_ub<T>(val)) {
|
||||||
res.set();
|
res.set();
|
||||||
@ -745,19 +788,21 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
|||||||
return res_vec;
|
return res_vec;
|
||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
|
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
TargetBitmap(batch_size));
|
TargetBitmap(batch_size), std::move(valid_res));
|
||||||
cached_overflow_res_ = res_vec;
|
|
||||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||||
|
cached_overflow_res_ = res_vec;
|
||||||
|
|
||||||
res.reset();
|
res.reset();
|
||||||
return res_vec;
|
return res_vec;
|
||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
|
auto valid_res = ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
TargetBitmap(batch_size));
|
TargetBitmap(batch_size), std::move(valid_res));
|
||||||
cached_overflow_res_ = res_vec;
|
|
||||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||||
|
cached_overflow_res_ = res_vec;
|
||||||
|
|
||||||
res.set();
|
res.set();
|
||||||
return res_vec;
|
return res_vec;
|
||||||
@ -788,13 +833,17 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||||
auto res_vec =
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
auto expr_type = expr_->op_type_;
|
auto expr_type = expr_->op_type_;
|
||||||
auto execute_sub_batch = [expr_type](const T* data,
|
auto execute_sub_batch = [expr_type](const T* data,
|
||||||
|
const bool* valid_data,
|
||||||
const int size,
|
const int size,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
IndexInnerType val) {
|
IndexInnerType val) {
|
||||||
switch (expr_type) {
|
switch (expr_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
@ -843,6 +892,16 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
fmt::format("unsupported operator type for unary expr: {}",
|
fmt::format("unsupported operator type for unary expr: {}",
|
||||||
expr_type));
|
expr_type));
|
||||||
}
|
}
|
||||||
|
// there is a batch operation in BinaryRangeElementFunc,
|
||||||
|
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||||
|
// but to mask res with valid_data after the batch operation.
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
||||||
FieldId field_id,
|
FieldId field_id,
|
||||||
@ -850,8 +909,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
return skip_index.CanSkipUnaryRange<T>(
|
return skip_index.CanSkipUnaryRange<T>(
|
||||||
field_id, chunk_id, expr_type, val);
|
field_id, chunk_id, expr_type, val);
|
||||||
};
|
};
|
||||||
int64_t processed_size =
|
int64_t processed_size = ProcessDataChunks<T>(
|
||||||
ProcessDataChunks<T>(execute_sub_batch, skip_index_func, res, val);
|
execute_sub_batch, skip_index_func, res, valid_res, val);
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}, related params[active_count:{}, "
|
"expect batch size {}, related params[active_count:{}, "
|
||||||
@ -881,7 +940,7 @@ PhyUnaryRangeFilterExpr::ExecTextMatch() {
|
|||||||
return index->MatchQuery(query);
|
return index->MatchQuery(query);
|
||||||
};
|
};
|
||||||
auto res = ProcessTextMatchIndex(func, query);
|
auto res = ProcessTextMatchIndex(func, query);
|
||||||
return std::make_shared<ColumnVector>(std::move(res));
|
return res;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace exec
|
} // namespace exec
|
||||||
|
|||||||
@ -148,11 +148,17 @@ struct UnaryElementFuncForArray {
|
|||||||
ValueType>;
|
ValueType>;
|
||||||
void
|
void
|
||||||
operator()(const ArrayView* src,
|
operator()(const ArrayView* src,
|
||||||
|
const bool* valid_data,
|
||||||
size_t size,
|
size_t size,
|
||||||
ValueType val,
|
ValueType val,
|
||||||
int index,
|
int index,
|
||||||
TargetBitmapView res) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
res[i] = valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = src[i].is_same_array(val);
|
res[i] = src[i].is_same_array(val);
|
||||||
@ -224,7 +230,11 @@ struct UnaryIndexFuncForMatch {
|
|||||||
RegexMatcher matcher(regex_pattern);
|
RegexMatcher matcher(regex_pattern);
|
||||||
for (int64_t i = 0; i < cnt; i++) {
|
for (int64_t i = 0; i < cnt; i++) {
|
||||||
auto raw = index->Reverse_Lookup(i);
|
auto raw = index->Reverse_Lookup(i);
|
||||||
res[i] = matcher(raw);
|
if (!raw.has_value()) {
|
||||||
|
res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res[i] = matcher(raw.value());
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -68,6 +68,7 @@ PhyFilterBitsNode::GetOutput() {
|
|||||||
operator_context_->get_exec_context(), exprs_.get(), input_.get());
|
operator_context_->get_exec_context(), exprs_.get(), input_.get());
|
||||||
|
|
||||||
TargetBitmap bitset;
|
TargetBitmap bitset;
|
||||||
|
TargetBitmap valid_bitset;
|
||||||
while (num_processed_rows_ < need_process_rows_) {
|
while (num_processed_rows_ < need_process_rows_) {
|
||||||
exprs_->Eval(0, 1, true, eval_ctx, results_);
|
exprs_->Eval(0, 1, true, eval_ctx, results_);
|
||||||
|
|
||||||
@ -79,13 +80,17 @@ PhyFilterBitsNode::GetOutput() {
|
|||||||
auto col_vec_size = col_vec->size();
|
auto col_vec_size = col_vec->size();
|
||||||
TargetBitmapView view(col_vec->GetRawData(), col_vec_size);
|
TargetBitmapView view(col_vec->GetRawData(), col_vec_size);
|
||||||
bitset.append(view);
|
bitset.append(view);
|
||||||
|
TargetBitmapView valid_view(col_vec->GetValidRawData(), col_vec_size);
|
||||||
|
valid_bitset.append(valid_view);
|
||||||
num_processed_rows_ += col_vec_size;
|
num_processed_rows_ += col_vec_size;
|
||||||
}
|
}
|
||||||
bitset.flip();
|
bitset.flip();
|
||||||
Assert(bitset.size() == need_process_rows_);
|
Assert(bitset.size() == need_process_rows_);
|
||||||
|
Assert(valid_bitset.size() == need_process_rows_);
|
||||||
// num_processed_rows_ = need_process_rows_;
|
// num_processed_rows_ = need_process_rows_;
|
||||||
std::vector<VectorPtr> col_res;
|
std::vector<VectorPtr> col_res;
|
||||||
col_res.push_back(std::make_shared<ColumnVector>(std::move(bitset)));
|
col_res.push_back(std::make_shared<ColumnVector>(std::move(bitset),
|
||||||
|
std::move(valid_bitset)));
|
||||||
std::chrono::high_resolution_clock::time_point scalar_end =
|
std::chrono::high_resolution_clock::time_point scalar_end =
|
||||||
std::chrono::high_resolution_clock::now();
|
std::chrono::high_resolution_clock::now();
|
||||||
double scalar_cost =
|
double scalar_cost =
|
||||||
|
|||||||
@ -51,13 +51,15 @@ PhyMvccNode::GetOutput() {
|
|||||||
is_finished_ = true;
|
is_finished_ = true;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
// the first vector is filtering result and second bitset is a valid bitset
|
||||||
auto col_input =
|
// if valid_bitset[i]==false, means result[i] is null
|
||||||
is_source_node_
|
auto col_input = is_source_node_ ? std::make_shared<ColumnVector>(
|
||||||
? std::make_shared<ColumnVector>(TargetBitmap(active_count_))
|
TargetBitmap(active_count_),
|
||||||
|
TargetBitmap(active_count_))
|
||||||
: GetColumnVector(input_);
|
: GetColumnVector(input_);
|
||||||
|
|
||||||
TargetBitmapView data(col_input->GetRawData(), col_input->size());
|
TargetBitmapView data(col_input->GetRawData(), col_input->size());
|
||||||
|
// need to expose null?
|
||||||
segment_->mask_with_timestamps(data, query_timestamp_);
|
segment_->mask_with_timestamps(data, query_timestamp_);
|
||||||
segment_->mask_with_delete(data, active_count_, query_timestamp_);
|
segment_->mask_with_delete(data, active_count_, query_timestamp_);
|
||||||
is_finished_ = true;
|
is_finished_ = true;
|
||||||
|
|||||||
@ -100,7 +100,9 @@ class SealedDataGetter : public DataGetter<T> {
|
|||||||
}
|
}
|
||||||
return field_data_->operator[](idx);
|
return field_data_->operator[](idx);
|
||||||
} else {
|
} else {
|
||||||
return (*field_index_).Reverse_Lookup(idx);
|
auto raw = (*field_index_).Reverse_Lookup(idx);
|
||||||
|
AssertInfo(raw.has_value(), "field data not found");
|
||||||
|
return raw.value();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -80,7 +80,7 @@ BitmapIndex<T>::Build(const Config& config) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
BitmapIndex<T>::Build(size_t n, const T* data) {
|
BitmapIndex<T>::Build(size_t n, const T* data, const bool* valid_data) {
|
||||||
if (is_built_) {
|
if (is_built_) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -89,12 +89,14 @@ BitmapIndex<T>::Build(size_t n, const T* data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
total_num_rows_ = n;
|
total_num_rows_ = n;
|
||||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||||
|
|
||||||
T* p = const_cast<T*>(data);
|
T* p = const_cast<T*>(data);
|
||||||
for (int i = 0; i < n; ++i, ++p) {
|
for (int i = 0; i < n; ++i, ++p) {
|
||||||
|
if (valid_data == nullptr || valid_data[i]) {
|
||||||
data_[*p].add(i);
|
data_[*p].add(i);
|
||||||
valid_bitset.set(i);
|
valid_bitset_.set(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data_.size() < DEFAULT_BITMAP_INDEX_BUILD_MODE_BOUND) {
|
if (data_.size() < DEFAULT_BITMAP_INDEX_BUILD_MODE_BOUND) {
|
||||||
@ -120,7 +122,7 @@ BitmapIndex<T>::BuildPrimitiveField(
|
|||||||
if (data->is_valid(i)) {
|
if (data->is_valid(i)) {
|
||||||
auto val = reinterpret_cast<const T*>(data->RawValue(i));
|
auto val = reinterpret_cast<const T*>(data->RawValue(i));
|
||||||
data_[*val].add(offset);
|
data_[*val].add(offset);
|
||||||
valid_bitset.set(offset);
|
valid_bitset_.set(offset);
|
||||||
}
|
}
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
@ -139,7 +141,7 @@ BitmapIndex<T>::BuildWithFieldData(
|
|||||||
PanicInfo(DataIsEmpty, "scalar bitmap index can not build null values");
|
PanicInfo(DataIsEmpty, "scalar bitmap index can not build null values");
|
||||||
}
|
}
|
||||||
total_num_rows_ = total_num_rows;
|
total_num_rows_ = total_num_rows;
|
||||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||||
|
|
||||||
switch (schema_.data_type()) {
|
switch (schema_.data_type()) {
|
||||||
case proto::schema::DataType::Bool:
|
case proto::schema::DataType::Bool:
|
||||||
@ -184,7 +186,7 @@ BitmapIndex<T>::BuildArrayField(const std::vector<FieldDataPtr>& field_datas) {
|
|||||||
auto val = array->template get_data<T>(j);
|
auto val = array->template get_data<T>(j);
|
||||||
data_[val].add(offset);
|
data_[val].add(offset);
|
||||||
}
|
}
|
||||||
valid_bitset.set(offset);
|
valid_bitset_.set(offset);
|
||||||
}
|
}
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
@ -359,7 +361,7 @@ BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||||||
data_[key] = value;
|
data_[key] = value;
|
||||||
}
|
}
|
||||||
for (const auto& v : value) {
|
for (const auto& v : value) {
|
||||||
valid_bitset.set(v);
|
valid_bitset_.set(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -422,7 +424,7 @@ BitmapIndex<std::string>::DeserializeIndexData(const uint8_t* data_ptr,
|
|||||||
data_[key] = value;
|
data_[key] = value;
|
||||||
}
|
}
|
||||||
for (const auto& v : value) {
|
for (const auto& v : value) {
|
||||||
valid_bitset.set(v);
|
valid_bitset_.set(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -516,7 +518,7 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
|||||||
index_meta_buffer->size);
|
index_meta_buffer->size);
|
||||||
auto index_length = index_meta.first;
|
auto index_length = index_meta.first;
|
||||||
total_num_rows_ = index_meta.second;
|
total_num_rows_ = index_meta.second;
|
||||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||||
|
|
||||||
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
|
auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA);
|
||||||
|
|
||||||
@ -645,7 +647,7 @@ BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||||
res &= valid_bitset;
|
res &= valid_bitset_;
|
||||||
return res;
|
return res;
|
||||||
} else {
|
} else {
|
||||||
TargetBitmap res(total_num_rows_, false);
|
TargetBitmap res(total_num_rows_, false);
|
||||||
@ -657,7 +659,7 @@ BitmapIndex<T>::NotIn(const size_t n, const T* values) {
|
|||||||
}
|
}
|
||||||
res.flip();
|
res.flip();
|
||||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||||
res &= valid_bitset;
|
res &= valid_bitset_;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -667,7 +669,7 @@ const TargetBitmap
|
|||||||
BitmapIndex<T>::IsNull() {
|
BitmapIndex<T>::IsNull() {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap res(total_num_rows_, true);
|
TargetBitmap res(total_num_rows_, true);
|
||||||
res &= valid_bitset;
|
res &= valid_bitset_;
|
||||||
res.flip();
|
res.flip();
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -677,7 +679,7 @@ const TargetBitmap
|
|||||||
BitmapIndex<T>::IsNotNull() {
|
BitmapIndex<T>::IsNotNull() {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap res(total_num_rows_, true);
|
TargetBitmap res(total_num_rows_, true);
|
||||||
res &= valid_bitset;
|
res &= valid_bitset_;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1086,11 +1088,15 @@ BitmapIndex<T>::Reverse_Lookup_InCache(size_t idx) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T
|
std::optional<T>
|
||||||
BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
AssertInfo(idx < total_num_rows_, "out of range of total coun");
|
AssertInfo(idx < total_num_rows_, "out of range of total coun");
|
||||||
|
|
||||||
|
if (!valid_bitset_[idx]) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
if (use_offset_cache_) {
|
if (use_offset_cache_) {
|
||||||
return Reverse_Lookup_InCache(idx);
|
return Reverse_Lookup_InCache(idx);
|
||||||
}
|
}
|
||||||
@ -1125,6 +1131,7 @@ BitmapIndex<T>::Reverse_Lookup(size_t idx) const {
|
|||||||
fmt::format(
|
fmt::format(
|
||||||
"scalar bitmap index can not lookup target value of index {}",
|
"scalar bitmap index can not lookup target value of index {}",
|
||||||
idx));
|
idx));
|
||||||
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|||||||
@ -77,7 +77,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Build(size_t n, const T* values) override;
|
Build(size_t n, const T* values, const bool* valid_data = nullptr) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
@ -106,7 +106,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
T upper_bound_value,
|
T upper_bound_value,
|
||||||
bool ub_inclusive) override;
|
bool ub_inclusive) override;
|
||||||
|
|
||||||
T
|
std::optional<T>
|
||||||
Reverse_Lookup(size_t offset) const override;
|
Reverse_Lookup(size_t offset) const override;
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
@ -267,7 +267,7 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
|
|
||||||
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
||||||
TargetBitmap valid_bitset;
|
TargetBitmap valid_bitset_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace index
|
} // namespace index
|
||||||
|
|||||||
@ -67,10 +67,12 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Build(size_t n, const T* values) override {
|
Build(size_t n,
|
||||||
|
const T* values,
|
||||||
|
const bool* valid_data = nullptr) override {
|
||||||
SelectIndexBuildType(n, values);
|
SelectIndexBuildType(n, values);
|
||||||
auto index = GetInternalIndex();
|
auto index = GetInternalIndex();
|
||||||
index->Build(n, values);
|
index->Build(n, values, valid_data);
|
||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,7 +135,7 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||||||
lower_bound_value, lb_inclusive, upper_bound_value, ub_inclusive);
|
lower_bound_value, lb_inclusive, upper_bound_value, ub_inclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
T
|
std::optional<T>
|
||||||
Reverse_Lookup(size_t offset) const override {
|
Reverse_Lookup(size_t offset) const override {
|
||||||
return internal_index_->Reverse_Lookup(offset);
|
return internal_index_->Reverse_Lookup(offset);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -94,7 +94,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
* deprecated, only used in small chunk index.
|
* deprecated, only used in small chunk index.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
Build(size_t n, const T* values) override {
|
Build(size_t n, const T* values, const bool* valid_data) override {
|
||||||
PanicInfo(ErrorCode::NotImplemented, "Build should not be called");
|
PanicInfo(ErrorCode::NotImplemented, "Build should not be called");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +136,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
T
|
std::optional<T>
|
||||||
Reverse_Lookup(size_t offset) const override {
|
Reverse_Lookup(size_t offset) const override {
|
||||||
PanicInfo(ErrorCode::NotImplemented,
|
PanicInfo(ErrorCode::NotImplemented,
|
||||||
"Reverse_Lookup should not be handled by inverted index");
|
"Reverse_Lookup should not be handled by inverted index");
|
||||||
|
|||||||
@ -80,7 +80,7 @@ class ScalarIndex : public IndexBase {
|
|||||||
GetIndexType() const = 0;
|
GetIndexType() const = 0;
|
||||||
|
|
||||||
virtual void
|
virtual void
|
||||||
Build(size_t n, const T* values) = 0;
|
Build(size_t n, const T* values, const bool* valid_data = nullptr) = 0;
|
||||||
|
|
||||||
virtual const TargetBitmap
|
virtual const TargetBitmap
|
||||||
In(size_t n, const T* values) = 0;
|
In(size_t n, const T* values) = 0;
|
||||||
@ -117,7 +117,7 @@ class ScalarIndex : public IndexBase {
|
|||||||
T upper_bound_value,
|
T upper_bound_value,
|
||||||
bool ub_inclusive) = 0;
|
bool ub_inclusive) = 0;
|
||||||
|
|
||||||
virtual T
|
virtual std::optional<T>
|
||||||
Reverse_Lookup(size_t offset) const = 0;
|
Reverse_Lookup(size_t offset) const = 0;
|
||||||
|
|
||||||
virtual const TargetBitmap
|
virtual const TargetBitmap
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <pb/schema.pb.h>
|
#include <pb/schema.pb.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -61,7 +62,7 @@ ScalarIndexSort<T>::Build(const Config& config) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
ScalarIndexSort<T>::Build(size_t n, const T* values, const bool* valid_data) {
|
||||||
if (is_built_)
|
if (is_built_)
|
||||||
return;
|
return;
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
@ -69,13 +70,17 @@ ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
|||||||
}
|
}
|
||||||
data_.reserve(n);
|
data_.reserve(n);
|
||||||
total_num_rows_ = n;
|
total_num_rows_ = n;
|
||||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||||
idx_to_offsets_.resize(n);
|
idx_to_offsets_.resize(n);
|
||||||
|
|
||||||
T* p = const_cast<T*>(values);
|
T* p = const_cast<T*>(values);
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i, ++p) {
|
||||||
data_.emplace_back(IndexStructure(*p++, i));
|
if (!valid_data || valid_data[i]) {
|
||||||
valid_bitset.set(i);
|
data_.emplace_back(IndexStructure(*p, i));
|
||||||
|
valid_bitset_.set(i);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::sort(data_.begin(), data_.end());
|
std::sort(data_.begin(), data_.end());
|
||||||
for (size_t i = 0; i < data_.size(); ++i) {
|
for (size_t i = 0; i < data_.size(); ++i) {
|
||||||
idx_to_offsets_[data_[i].idx_] = i;
|
idx_to_offsets_[data_[i].idx_] = i;
|
||||||
@ -97,7 +102,7 @@ ScalarIndexSort<T>::BuildWithFieldData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
data_.reserve(length);
|
data_.reserve(length);
|
||||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||||
int64_t offset = 0;
|
int64_t offset = 0;
|
||||||
for (const auto& data : field_datas) {
|
for (const auto& data : field_datas) {
|
||||||
auto slice_num = data->get_num_rows();
|
auto slice_num = data->get_num_rows();
|
||||||
@ -105,7 +110,7 @@ ScalarIndexSort<T>::BuildWithFieldData(
|
|||||||
if (data->is_valid(i)) {
|
if (data->is_valid(i)) {
|
||||||
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
||||||
data_.emplace_back(IndexStructure(*value, offset));
|
data_.emplace_back(IndexStructure(*value, offset));
|
||||||
valid_bitset.set(offset);
|
valid_bitset_.set(offset);
|
||||||
}
|
}
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
@ -175,11 +180,11 @@ ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
|
|||||||
index_num_rows->data.get(),
|
index_num_rows->data.get(),
|
||||||
(size_t)index_num_rows->size);
|
(size_t)index_num_rows->size);
|
||||||
idx_to_offsets_.resize(total_num_rows_);
|
idx_to_offsets_.resize(total_num_rows_);
|
||||||
valid_bitset = TargetBitmap(total_num_rows_, false);
|
valid_bitset_ = TargetBitmap(total_num_rows_, false);
|
||||||
memcpy(data_.data(), index_data->data.get(), (size_t)index_data->size);
|
memcpy(data_.data(), index_data->data.get(), (size_t)index_data->size);
|
||||||
for (size_t i = 0; i < data_.size(); ++i) {
|
for (size_t i = 0; i < data_.size(); ++i) {
|
||||||
idx_to_offsets_[data_[i].idx_] = i;
|
idx_to_offsets_[data_[i].idx_] = i;
|
||||||
valid_bitset.set(data_[i].idx_);
|
valid_bitset_.set(data_[i].idx_);
|
||||||
}
|
}
|
||||||
|
|
||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
@ -256,7 +261,7 @@ ScalarIndexSort<T>::NotIn(const size_t n, const T* values) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
// NotIn(null) and In(null) is both false, need to mask with IsNotNull operate
|
||||||
bitset &= valid_bitset;
|
bitset &= valid_bitset_;
|
||||||
return bitset;
|
return bitset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,7 +270,7 @@ const TargetBitmap
|
|||||||
ScalarIndexSort<T>::IsNull() {
|
ScalarIndexSort<T>::IsNull() {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap bitset(total_num_rows_, true);
|
TargetBitmap bitset(total_num_rows_, true);
|
||||||
bitset &= valid_bitset;
|
bitset &= valid_bitset_;
|
||||||
bitset.flip();
|
bitset.flip();
|
||||||
return bitset;
|
return bitset;
|
||||||
}
|
}
|
||||||
@ -275,7 +280,7 @@ const TargetBitmap
|
|||||||
ScalarIndexSort<T>::IsNotNull() {
|
ScalarIndexSort<T>::IsNotNull() {
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
TargetBitmap bitset(total_num_rows_, true);
|
TargetBitmap bitset(total_num_rows_, true);
|
||||||
bitset &= valid_bitset;
|
bitset &= valid_bitset_;
|
||||||
return bitset;
|
return bitset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -355,11 +360,14 @@ ScalarIndexSort<T>::Range(T lower_bound_value,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T
|
std::optional<T>
|
||||||
ScalarIndexSort<T>::Reverse_Lookup(size_t idx) const {
|
ScalarIndexSort<T>::Reverse_Lookup(size_t idx) const {
|
||||||
AssertInfo(idx < idx_to_offsets_.size(), "out of range of total count");
|
AssertInfo(idx < idx_to_offsets_.size(), "out of range of total count");
|
||||||
AssertInfo(is_built_, "index has not been built");
|
AssertInfo(is_built_, "index has not been built");
|
||||||
|
|
||||||
|
if (!valid_bitset_[idx]) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
auto offset = idx_to_offsets_[idx];
|
auto offset = idx_to_offsets_[idx];
|
||||||
return data_[offset].a_;
|
return data_[offset].a_;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -56,7 +56,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Build(size_t n, const T* values) override;
|
Build(size_t n, const T* values, const bool* valid_data = nullptr) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
@ -82,7 +82,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
T upper_bound_value,
|
T upper_bound_value,
|
||||||
bool ub_inclusive) override;
|
bool ub_inclusive) override;
|
||||||
|
|
||||||
T
|
std::optional<T>
|
||||||
Reverse_Lookup(size_t offset) const override;
|
Reverse_Lookup(size_t offset) const override;
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
@ -127,8 +127,8 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
std::vector<IndexStructure<T>> data_;
|
std::vector<IndexStructure<T>> data_;
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
size_t total_num_rows_{0};
|
size_t total_num_rows_{0};
|
||||||
// generate valid_bitset to speed up NotIn and IsNull and IsNotNull operate
|
// generate valid_bitset_ to speed up NotIn and IsNull and IsNotNull operate
|
||||||
TargetBitmap valid_bitset;
|
TargetBitmap valid_bitset_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|||||||
@ -19,6 +19,7 @@
|
|||||||
#include <boost/uuid/uuid_generators.hpp>
|
#include <boost/uuid/uuid_generators.hpp>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
@ -118,7 +119,9 @@ StringIndexMarisa::BuildWithFieldData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
StringIndexMarisa::Build(size_t n, const std::string* values) {
|
StringIndexMarisa::Build(size_t n,
|
||||||
|
const std::string* values,
|
||||||
|
const bool* valid_data) {
|
||||||
if (built_) {
|
if (built_) {
|
||||||
PanicInfo(IndexAlreadyBuild, "index has been built");
|
PanicInfo(IndexAlreadyBuild, "index has been built");
|
||||||
}
|
}
|
||||||
@ -127,12 +130,14 @@ StringIndexMarisa::Build(size_t n, const std::string* values) {
|
|||||||
{
|
{
|
||||||
// fill key set.
|
// fill key set.
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
if (valid_data == nullptr || valid_data[i]) {
|
||||||
keyset.push_back(values[i].c_str());
|
keyset.push_back(values[i].c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
trie_.build(keyset, MARISA_LABEL_ORDER);
|
trie_.build(keyset, MARISA_LABEL_ORDER);
|
||||||
fill_str_ids(n, values);
|
fill_str_ids(n, values, valid_data);
|
||||||
fill_offsets();
|
fill_offsets();
|
||||||
|
|
||||||
built_ = true;
|
built_ = true;
|
||||||
@ -213,7 +218,7 @@ StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
|
|||||||
|
|
||||||
auto str_ids = set.GetByName(MARISA_STR_IDS);
|
auto str_ids = set.GetByName(MARISA_STR_IDS);
|
||||||
auto str_ids_len = str_ids->size;
|
auto str_ids_len = str_ids->size;
|
||||||
str_ids_.resize(str_ids_len / sizeof(size_t));
|
str_ids_.resize(str_ids_len / sizeof(size_t), MARISA_NULL_KEY_ID);
|
||||||
memcpy(str_ids_.data(), str_ids->data.get(), str_ids_len);
|
memcpy(str_ids_.data(), str_ids->data.get(), str_ids_len);
|
||||||
|
|
||||||
fill_offsets();
|
fill_offsets();
|
||||||
@ -491,9 +496,14 @@ StringIndexMarisa::PrefixMatch(std::string_view prefix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
StringIndexMarisa::fill_str_ids(size_t n, const std::string* values) {
|
StringIndexMarisa::fill_str_ids(size_t n,
|
||||||
str_ids_.resize(n);
|
const std::string* values,
|
||||||
|
const bool* valid_data) {
|
||||||
|
str_ids_.resize(n, MARISA_NULL_KEY_ID);
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
if (valid_data != nullptr && !valid_data[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
auto str = values[i];
|
auto str = values[i];
|
||||||
auto str_id = lookup(str);
|
auto str_id = lookup(str);
|
||||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
||||||
@ -534,11 +544,13 @@ StringIndexMarisa::prefix_match(const std::string_view prefix) {
|
|||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
std::optional<std::string>
|
||||||
std::string
|
|
||||||
StringIndexMarisa::Reverse_Lookup(size_t offset) const {
|
StringIndexMarisa::Reverse_Lookup(size_t offset) const {
|
||||||
AssertInfo(offset < str_ids_.size(), "out of range of total count");
|
AssertInfo(offset < str_ids_.size(), "out of range of total count");
|
||||||
marisa::Agent agent;
|
marisa::Agent agent;
|
||||||
|
if (str_ids_[offset] < 0) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
agent.set_query(str_ids_[offset]);
|
agent.set_query(str_ids_[offset]);
|
||||||
trie_.reverse_lookup(agent);
|
trie_.reverse_lookup(agent);
|
||||||
return std::string(agent.key().ptr(), agent.key().length());
|
return std::string(agent.key().ptr(), agent.key().length());
|
||||||
|
|||||||
@ -55,7 +55,9 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Build(size_t n, const std::string* values) override;
|
Build(size_t n,
|
||||||
|
const std::string* values,
|
||||||
|
const bool* valid_data = nullptr) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
@ -87,7 +89,7 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
PrefixMatch(const std::string_view prefix) override;
|
PrefixMatch(const std::string_view prefix) override;
|
||||||
|
|
||||||
std::string
|
std::optional<std::string>
|
||||||
Reverse_Lookup(size_t offset) const override;
|
Reverse_Lookup(size_t offset) const override;
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
@ -100,7 +102,7 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void
|
void
|
||||||
fill_str_ids(size_t n, const std::string* values);
|
fill_str_ids(size_t n, const std::string* values, const bool* valid_data);
|
||||||
|
|
||||||
void
|
void
|
||||||
fill_offsets();
|
fill_offsets();
|
||||||
@ -122,7 +124,7 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
private:
|
private:
|
||||||
Config config_;
|
Config config_;
|
||||||
marisa::Trie trie_;
|
marisa::Trie trie_;
|
||||||
std::vector<size_t> str_ids_; // used to retrieve.
|
std::vector<int64_t> str_ids_; // used to retrieve.
|
||||||
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
||||||
bool built_ = false;
|
bool built_ = false;
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
|
|||||||
@ -26,7 +26,7 @@ template <typename T>
|
|||||||
inline index::ScalarIndexPtr<T>
|
inline index::ScalarIndexPtr<T>
|
||||||
generate_scalar_index(Span<T> data) {
|
generate_scalar_index(Span<T> data) {
|
||||||
auto indexing = std::make_unique<index::ScalarIndexSort<T>>();
|
auto indexing = std::make_unique<index::ScalarIndexSort<T>>();
|
||||||
indexing->Build(data.row_count(), data.data());
|
indexing->Build(data.row_count(), data.data(), data.valid_data());
|
||||||
return indexing;
|
return indexing;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ template <>
|
|||||||
inline index::ScalarIndexPtr<std::string>
|
inline index::ScalarIndexPtr<std::string>
|
||||||
generate_scalar_index(Span<std::string> data) {
|
generate_scalar_index(Span<std::string> data) {
|
||||||
auto indexing = index::CreateStringIndexSort();
|
auto indexing = index::CreateStringIndexSort();
|
||||||
indexing->Build(data.row_count(), data.data());
|
indexing->Build(data.row_count(), data.data(), data.valid_data());
|
||||||
return indexing;
|
return indexing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -196,8 +196,9 @@ ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||||||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||||
int64_index->HasRawData()) {
|
int64_index->HasRawData()) {
|
||||||
for (int i = 0; i < row_count; ++i) {
|
for (int i = 0; i < row_count; ++i) {
|
||||||
insert_record_.insert_pk(int64_index->Reverse_Lookup(i),
|
auto raw = int64_index->Reverse_Lookup(i);
|
||||||
i);
|
AssertInfo(raw.has_value(), "pk not found");
|
||||||
|
insert_record_.insert_pk(raw.value(), i);
|
||||||
}
|
}
|
||||||
insert_record_.seal_pks();
|
insert_record_.seal_pks();
|
||||||
}
|
}
|
||||||
@ -210,8 +211,9 @@ ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||||||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||||
string_index->HasRawData()) {
|
string_index->HasRawData()) {
|
||||||
for (int i = 0; i < row_count; ++i) {
|
for (int i = 0; i < row_count; ++i) {
|
||||||
insert_record_.insert_pk(
|
auto raw = string_index->Reverse_Lookup(i);
|
||||||
string_index->Reverse_Lookup(i), i);
|
AssertInfo(raw.has_value(), "pk not found");
|
||||||
|
insert_record_.insert_pk(raw.value(), i);
|
||||||
}
|
}
|
||||||
insert_record_.seal_pks();
|
insert_record_.seal_pks();
|
||||||
}
|
}
|
||||||
@ -1630,7 +1632,11 @@ ChunkedSegmentSealedImpl::CreateTextIndex(FieldId field_id) {
|
|||||||
"converted to string index");
|
"converted to string index");
|
||||||
auto n = impl->Size();
|
auto n = impl->Size();
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
index->AddText(impl->Reverse_Lookup(i), i);
|
auto raw = impl->Reverse_Lookup(i);
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
index->AddText(raw.value(), i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -299,6 +299,7 @@ ScalarFieldIndexing<T>::BuildIndexRange(int64_t ack_beg,
|
|||||||
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
|
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
|
||||||
auto chunk_data = source->get_chunk_data(chunk_id);
|
auto chunk_data = source->get_chunk_data(chunk_id);
|
||||||
// build index for chunk
|
// build index for chunk
|
||||||
|
// seem no lint, not pass valid_data here
|
||||||
// TODO
|
// TODO
|
||||||
if constexpr (std::is_same_v<T, std::string>) {
|
if constexpr (std::is_same_v<T, std::string>) {
|
||||||
auto indexing = index::CreateStringIndexSort();
|
auto indexing = index::CreateStringIndexSort();
|
||||||
|
|||||||
@ -198,8 +198,9 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||||||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||||
int64_index->HasRawData()) {
|
int64_index->HasRawData()) {
|
||||||
for (int i = 0; i < row_count; ++i) {
|
for (int i = 0; i < row_count; ++i) {
|
||||||
insert_record_.insert_pk(int64_index->Reverse_Lookup(i),
|
auto raw = int64_index->Reverse_Lookup(i);
|
||||||
i);
|
AssertInfo(raw.has_value(), "Primary key not found");
|
||||||
|
insert_record_.insert_pk(raw.value(), i);
|
||||||
}
|
}
|
||||||
insert_record_.seal_pks();
|
insert_record_.seal_pks();
|
||||||
}
|
}
|
||||||
@ -212,8 +213,9 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
|||||||
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
|
||||||
string_index->HasRawData()) {
|
string_index->HasRawData()) {
|
||||||
for (int i = 0; i < row_count; ++i) {
|
for (int i = 0; i < row_count; ++i) {
|
||||||
insert_record_.insert_pk(
|
auto raw = string_index->Reverse_Lookup(i);
|
||||||
string_index->Reverse_Lookup(i), i);
|
AssertInfo(raw.has_value(), "Primary key not found");
|
||||||
|
insert_record_.insert_pk(raw.value(), i);
|
||||||
}
|
}
|
||||||
insert_record_.seal_pks();
|
insert_record_.seal_pks();
|
||||||
}
|
}
|
||||||
@ -2108,7 +2110,11 @@ SegmentSealedImpl::CreateTextIndex(FieldId field_id) {
|
|||||||
"converted to string index");
|
"converted to string index");
|
||||||
auto n = impl->Size();
|
auto n = impl->Size();
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
index->AddText(impl->Reverse_Lookup(i), i);
|
auto raw = impl->Reverse_Lookup(i);
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
index->AddText(raw.value(), i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -683,6 +683,11 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
data_array->set_field_id(field_meta.get_id().get());
|
data_array->set_field_id(field_meta.get_id().get());
|
||||||
data_array->set_type(static_cast<milvus::proto::schema::DataType>(
|
data_array->set_type(static_cast<milvus::proto::schema::DataType>(
|
||||||
field_meta.get_data_type()));
|
field_meta.get_data_type()));
|
||||||
|
auto nullable = field_meta.is_nullable();
|
||||||
|
std::vector<bool> valid_data;
|
||||||
|
if (nullable) {
|
||||||
|
valid_data.resize(count);
|
||||||
|
}
|
||||||
|
|
||||||
auto scalar_array = data_array->mutable_scalars();
|
auto scalar_array = data_array->mutable_scalars();
|
||||||
switch (data_type) {
|
switch (data_type) {
|
||||||
@ -691,7 +696,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<bool> raw_data(count);
|
std::vector<bool> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_bool_data();
|
auto obj = scalar_array->mutable_bool_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -702,7 +716,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<int8_t> raw_data(count);
|
std::vector<int8_t> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_int_data();
|
auto obj = scalar_array->mutable_int_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -713,7 +736,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<int16_t> raw_data(count);
|
std::vector<int16_t> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_int_data();
|
auto obj = scalar_array->mutable_int_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -724,7 +756,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<int32_t> raw_data(count);
|
std::vector<int32_t> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_int_data();
|
auto obj = scalar_array->mutable_int_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -735,7 +776,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<int64_t> raw_data(count);
|
std::vector<int64_t> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_long_data();
|
auto obj = scalar_array->mutable_long_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -746,7 +796,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<float> raw_data(count);
|
std::vector<float> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_float_data();
|
auto obj = scalar_array->mutable_float_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -757,7 +816,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<double> raw_data(count);
|
std::vector<double> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_double_data();
|
auto obj = scalar_array->mutable_double_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -768,7 +836,16 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||||
std::vector<std::string> raw_data(count);
|
std::vector<std::string> raw_data(count);
|
||||||
for (int64_t i = 0; i < count; ++i) {
|
for (int64_t i = 0; i < count; ++i) {
|
||||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
auto raw = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||||
|
// if has no value, means nullable must be true, no need to check nullable again here
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
valid_data[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nullable) {
|
||||||
|
valid_data[i] = true;
|
||||||
|
}
|
||||||
|
raw_data[i] = raw.value();
|
||||||
}
|
}
|
||||||
auto obj = scalar_array->mutable_string_data();
|
auto obj = scalar_array->mutable_string_data();
|
||||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||||
@ -780,6 +857,11 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nullable) {
|
||||||
|
*(data_array->mutable_valid_data()) = {valid_data.begin(),
|
||||||
|
valid_data.end()};
|
||||||
|
}
|
||||||
|
|
||||||
return data_array;
|
return data_array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -166,7 +166,8 @@ GenAlwaysFalseExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto
|
auto
|
||||||
GenAlwaysTrueExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
GenAlwaysTrueExprIfValid(const FieldMeta& fvec_meta,
|
||||||
|
const FieldMeta& str_meta) {
|
||||||
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
||||||
auto not_expr = GenNotExpr();
|
auto not_expr = GenNotExpr();
|
||||||
not_expr->set_allocated_child(always_false_expr);
|
not_expr->set_allocated_child(always_false_expr);
|
||||||
@ -196,7 +197,7 @@ GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|||||||
|
|
||||||
auto
|
auto
|
||||||
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
||||||
auto always_true_expr = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
auto always_true_expr = GenAlwaysTrueExprIfValid(fvec_meta, str_meta);
|
||||||
proto::plan::VectorType vector_type;
|
proto::plan::VectorType vector_type;
|
||||||
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
vector_type = proto::plan::VectorType::FloatVector;
|
vector_type = proto::plan::VectorType::FloatVector;
|
||||||
@ -299,6 +300,82 @@ TEST(StringExpr, Term) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(StringExpr, TermNullable) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||||
|
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||||
|
schema->AddDebugField(
|
||||||
|
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||||
|
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
schema->set_primary_field_id(pk);
|
||||||
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||||
|
|
||||||
|
auto vec_2k_3k = []() -> std::vector<std::string> {
|
||||||
|
std::vector<std::string> ret;
|
||||||
|
for (int i = 2000; i < 3000; i++) {
|
||||||
|
ret.push_back(std::to_string(i));
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}();
|
||||||
|
|
||||||
|
std::map<int, std::vector<std::string>> terms = {
|
||||||
|
{0, {"2000", "3000"}},
|
||||||
|
{1, {"2000"}},
|
||||||
|
{2, {"3000"}},
|
||||||
|
{3, {}},
|
||||||
|
{4, {vec_2k_3k}},
|
||||||
|
};
|
||||||
|
|
||||||
|
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<std::string> str_col;
|
||||||
|
FixedVector<bool> valid_data;
|
||||||
|
int num_iters = 100;
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
auto raw_data = DataGen(schema, N, iter);
|
||||||
|
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||||
|
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||||
|
auto end = FIELD_DATA(new_str_col, string).end();
|
||||||
|
str_col.insert(str_col.end(), begin, end);
|
||||||
|
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||||
|
valid_data.insert(valid_data.end(),
|
||||||
|
new_str_valid_col.begin(),
|
||||||
|
new_str_valid_col.end());
|
||||||
|
seg->PreInsert(N);
|
||||||
|
seg->Insert(iter * N,
|
||||||
|
N,
|
||||||
|
raw_data.row_ids_.data(),
|
||||||
|
raw_data.timestamps_.data(),
|
||||||
|
raw_data.raw_);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||||
|
for (const auto& [_, term] : terms) {
|
||||||
|
auto plan_proto = GenTermPlan(fvec_meta, str_meta, term);
|
||||||
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||||
|
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||||
|
BitsetType final;
|
||||||
|
final = ExecuteQueryExpr(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP);
|
||||||
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
|
auto ans = final[i];
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
ASSERT_EQ(ans, false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto val = str_col[i];
|
||||||
|
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
||||||
|
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(StringExpr, Compare) {
|
TEST(StringExpr, Compare) {
|
||||||
auto schema = GenTestSchema();
|
auto schema = GenTestSchema();
|
||||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
@ -395,6 +472,7 @@ TEST(StringExpr, Compare) {
|
|||||||
for (const auto& [op, ref_func] : testcases) {
|
for (const auto& [op, ref_func] : testcases) {
|
||||||
auto plan_proto = gen_compare_plan(op);
|
auto plan_proto = gen_compare_plan(op);
|
||||||
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||||
|
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||||
BitsetType final;
|
BitsetType final;
|
||||||
final = ExecuteQueryExpr(
|
final = ExecuteQueryExpr(
|
||||||
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||||
@ -414,6 +492,269 @@ TEST(StringExpr, Compare) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(StringExpr, CompareNullable) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||||
|
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||||
|
schema->AddDebugField(
|
||||||
|
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||||
|
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
schema->set_primary_field_id(pk);
|
||||||
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||||
|
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
|
||||||
|
|
||||||
|
auto gen_compare_plan =
|
||||||
|
[&, fvec_meta, str_meta, another_str_meta](
|
||||||
|
proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||||
|
auto str_col_info =
|
||||||
|
test::GenColumnInfo(str_meta.get_id().get(),
|
||||||
|
proto::schema::DataType::VarChar,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
auto another_str_col_info =
|
||||||
|
test::GenColumnInfo(another_str_meta.get_id().get(),
|
||||||
|
proto::schema::DataType::VarChar,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
|
||||||
|
auto compare_expr = GenCompareExpr(op);
|
||||||
|
compare_expr->set_allocated_left_column_info(str_col_info);
|
||||||
|
compare_expr->set_allocated_right_column_info(another_str_col_info);
|
||||||
|
|
||||||
|
auto expr = test::GenExpr().release();
|
||||||
|
expr->set_allocated_compare_expr(compare_expr);
|
||||||
|
|
||||||
|
proto::plan::VectorType vector_type;
|
||||||
|
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
|
vector_type = proto::plan::VectorType::FloatVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||||
|
vector_type = proto::plan::VectorType::BinaryVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||||
|
vector_type = proto::plan::VectorType::Float16Vector;
|
||||||
|
}
|
||||||
|
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||||
|
|
||||||
|
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||||
|
plan_node->set_allocated_vector_anns(anns);
|
||||||
|
return plan_node;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::tuple<proto::plan::OpType,
|
||||||
|
std::function<bool(std::string&, std::string&)>>>
|
||||||
|
testcases{
|
||||||
|
{proto::plan::OpType::GreaterThan,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 > v2; }},
|
||||||
|
{proto::plan::OpType::GreaterEqual,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 >= v2; }},
|
||||||
|
{proto::plan::OpType::LessThan,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 < v2; }},
|
||||||
|
{proto::plan::OpType::LessEqual,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 <= v2; }},
|
||||||
|
{proto::plan::OpType::Equal,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 == v2; }},
|
||||||
|
{proto::plan::OpType::NotEqual,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 != v2; }},
|
||||||
|
{proto::plan::OpType::PrefixMatch,
|
||||||
|
[](std::string& v1, std::string& v2) {
|
||||||
|
return PrefixMatch(v1, v2);
|
||||||
|
}},
|
||||||
|
};
|
||||||
|
|
||||||
|
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<std::string> str_col;
|
||||||
|
std::vector<std::string> another_str_col;
|
||||||
|
FixedVector<bool> valid_data;
|
||||||
|
int num_iters = 100;
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
auto raw_data = DataGen(schema, N, iter);
|
||||||
|
|
||||||
|
auto reserve_col = [&, raw_data](const FieldMeta& field_meta,
|
||||||
|
std::vector<std::string>& str_col) {
|
||||||
|
auto new_str_col = raw_data.get_col(field_meta.get_id());
|
||||||
|
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||||
|
auto end = FIELD_DATA(new_str_col, string).end();
|
||||||
|
str_col.insert(str_col.end(), begin, end);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||||
|
valid_data.insert(valid_data.end(),
|
||||||
|
new_str_valid_col.begin(),
|
||||||
|
new_str_valid_col.end());
|
||||||
|
|
||||||
|
reserve_col(str_meta, str_col);
|
||||||
|
reserve_col(another_str_meta, another_str_col);
|
||||||
|
|
||||||
|
{
|
||||||
|
seg->PreInsert(N);
|
||||||
|
seg->Insert(iter * N,
|
||||||
|
N,
|
||||||
|
raw_data.row_ids_.data(),
|
||||||
|
raw_data.timestamps_.data(),
|
||||||
|
raw_data.raw_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||||
|
for (const auto& [op, ref_func] : testcases) {
|
||||||
|
auto plan_proto = gen_compare_plan(op);
|
||||||
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||||
|
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||||
|
BitsetType final;
|
||||||
|
final = ExecuteQueryExpr(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP);
|
||||||
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
|
auto ans = final[i];
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
ASSERT_EQ(ans, false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto val = str_col[i];
|
||||||
|
auto another_val = another_str_col[i];
|
||||||
|
auto ref = ref_func(val, another_val);
|
||||||
|
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(StringExpr, CompareNullable2) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
schema->AddDebugField("str", DataType::VARCHAR);
|
||||||
|
schema->AddDebugField("another_str", DataType::VARCHAR, true);
|
||||||
|
schema->AddDebugField(
|
||||||
|
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||||
|
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
schema->set_primary_field_id(pk);
|
||||||
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||||
|
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
|
||||||
|
|
||||||
|
auto gen_compare_plan =
|
||||||
|
[&, fvec_meta, str_meta, another_str_meta](
|
||||||
|
proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||||
|
auto str_col_info =
|
||||||
|
test::GenColumnInfo(str_meta.get_id().get(),
|
||||||
|
proto::schema::DataType::VarChar,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
auto another_str_col_info =
|
||||||
|
test::GenColumnInfo(another_str_meta.get_id().get(),
|
||||||
|
proto::schema::DataType::VarChar,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
|
||||||
|
auto compare_expr = GenCompareExpr(op);
|
||||||
|
compare_expr->set_allocated_left_column_info(str_col_info);
|
||||||
|
compare_expr->set_allocated_right_column_info(another_str_col_info);
|
||||||
|
|
||||||
|
auto expr = test::GenExpr().release();
|
||||||
|
expr->set_allocated_compare_expr(compare_expr);
|
||||||
|
|
||||||
|
proto::plan::VectorType vector_type;
|
||||||
|
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
|
vector_type = proto::plan::VectorType::FloatVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||||
|
vector_type = proto::plan::VectorType::BinaryVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||||
|
vector_type = proto::plan::VectorType::Float16Vector;
|
||||||
|
}
|
||||||
|
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||||
|
|
||||||
|
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||||
|
plan_node->set_allocated_vector_anns(anns);
|
||||||
|
return plan_node;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::tuple<proto::plan::OpType,
|
||||||
|
std::function<bool(std::string&, std::string&)>>>
|
||||||
|
testcases{
|
||||||
|
{proto::plan::OpType::GreaterThan,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 > v2; }},
|
||||||
|
{proto::plan::OpType::GreaterEqual,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 >= v2; }},
|
||||||
|
{proto::plan::OpType::LessThan,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 < v2; }},
|
||||||
|
{proto::plan::OpType::LessEqual,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 <= v2; }},
|
||||||
|
{proto::plan::OpType::Equal,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 == v2; }},
|
||||||
|
{proto::plan::OpType::NotEqual,
|
||||||
|
[](std::string& v1, std::string& v2) { return v1 != v2; }},
|
||||||
|
{proto::plan::OpType::PrefixMatch,
|
||||||
|
[](std::string& v1, std::string& v2) {
|
||||||
|
return PrefixMatch(v1, v2);
|
||||||
|
}},
|
||||||
|
};
|
||||||
|
|
||||||
|
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<std::string> str_col;
|
||||||
|
std::vector<std::string> another_str_col;
|
||||||
|
FixedVector<bool> valid_data;
|
||||||
|
int num_iters = 100;
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
auto raw_data = DataGen(schema, N, iter);
|
||||||
|
|
||||||
|
auto reserve_col = [&, raw_data](const FieldMeta& field_meta,
|
||||||
|
std::vector<std::string>& str_col) {
|
||||||
|
auto new_str_col = raw_data.get_col(field_meta.get_id());
|
||||||
|
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||||
|
auto end = FIELD_DATA(new_str_col, string).end();
|
||||||
|
str_col.insert(str_col.end(), begin, end);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto new_str_valid_col =
|
||||||
|
raw_data.get_col_valid(another_str_meta.get_id());
|
||||||
|
valid_data.insert(valid_data.end(),
|
||||||
|
new_str_valid_col.begin(),
|
||||||
|
new_str_valid_col.end());
|
||||||
|
|
||||||
|
reserve_col(str_meta, str_col);
|
||||||
|
reserve_col(another_str_meta, another_str_col);
|
||||||
|
|
||||||
|
{
|
||||||
|
seg->PreInsert(N);
|
||||||
|
seg->Insert(iter * N,
|
||||||
|
N,
|
||||||
|
raw_data.row_ids_.data(),
|
||||||
|
raw_data.timestamps_.data(),
|
||||||
|
raw_data.raw_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||||
|
for (const auto& [op, ref_func] : testcases) {
|
||||||
|
auto plan_proto = gen_compare_plan(op);
|
||||||
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||||
|
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||||
|
BitsetType final;
|
||||||
|
final = ExecuteQueryExpr(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP);
|
||||||
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
|
auto ans = final[i];
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
ASSERT_EQ(ans, false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto val = str_col[i];
|
||||||
|
auto another_val = another_str_col[i];
|
||||||
|
auto ref = ref_func(val, another_val);
|
||||||
|
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(StringExpr, UnaryRange) {
|
TEST(StringExpr, UnaryRange) {
|
||||||
auto schema = GenTestSchema();
|
auto schema = GenTestSchema();
|
||||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
@ -510,6 +851,116 @@ TEST(StringExpr, UnaryRange) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(StringExpr, UnaryRangeNullable) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||||
|
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||||
|
schema->AddDebugField(
|
||||||
|
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||||
|
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
schema->set_primary_field_id(pk);
|
||||||
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||||
|
|
||||||
|
auto gen_unary_range_plan =
|
||||||
|
[&, fvec_meta, str_meta](
|
||||||
|
proto::plan::OpType op,
|
||||||
|
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||||
|
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
|
||||||
|
proto::schema::DataType::VarChar,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
auto unary_range_expr = test::GenUnaryRangeExpr(op, value);
|
||||||
|
unary_range_expr->set_allocated_column_info(column_info);
|
||||||
|
|
||||||
|
auto expr = test::GenExpr().release();
|
||||||
|
expr->set_allocated_unary_range_expr(unary_range_expr);
|
||||||
|
|
||||||
|
proto::plan::VectorType vector_type;
|
||||||
|
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
|
vector_type = proto::plan::VectorType::FloatVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||||
|
vector_type = proto::plan::VectorType::BinaryVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||||
|
vector_type = proto::plan::VectorType::Float16Vector;
|
||||||
|
}
|
||||||
|
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||||
|
|
||||||
|
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||||
|
plan_node->set_allocated_vector_anns(anns);
|
||||||
|
return plan_node;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::tuple<proto::plan::OpType,
|
||||||
|
std::string,
|
||||||
|
std::function<bool(std::string&)>>>
|
||||||
|
testcases{
|
||||||
|
{proto::plan::OpType::GreaterThan,
|
||||||
|
"2000",
|
||||||
|
[](std::string& val) { return val > "2000"; }},
|
||||||
|
{proto::plan::OpType::GreaterEqual,
|
||||||
|
"2000",
|
||||||
|
[](std::string& val) { return val >= "2000"; }},
|
||||||
|
{proto::plan::OpType::LessThan,
|
||||||
|
"3000",
|
||||||
|
[](std::string& val) { return val < "3000"; }},
|
||||||
|
{proto::plan::OpType::LessEqual,
|
||||||
|
"3000",
|
||||||
|
[](std::string& val) { return val <= "3000"; }},
|
||||||
|
{proto::plan::OpType::PrefixMatch,
|
||||||
|
"a",
|
||||||
|
[](std::string& val) { return PrefixMatch(val, "a"); }},
|
||||||
|
};
|
||||||
|
|
||||||
|
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<std::string> str_col;
|
||||||
|
FixedVector<bool> valid_data;
|
||||||
|
int num_iters = 100;
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
auto raw_data = DataGen(schema, N, iter);
|
||||||
|
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||||
|
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||||
|
auto end = FIELD_DATA(new_str_col, string).end();
|
||||||
|
str_col.insert(str_col.end(), begin, end);
|
||||||
|
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||||
|
valid_data.insert(valid_data.end(),
|
||||||
|
new_str_valid_col.begin(),
|
||||||
|
new_str_valid_col.end());
|
||||||
|
seg->PreInsert(N);
|
||||||
|
seg->Insert(iter * N,
|
||||||
|
N,
|
||||||
|
raw_data.row_ids_.data(),
|
||||||
|
raw_data.timestamps_.data(),
|
||||||
|
raw_data.raw_);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||||
|
for (const auto& [op, value, ref_func] : testcases) {
|
||||||
|
auto plan_proto = gen_unary_range_plan(op, value);
|
||||||
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||||
|
BitsetType final;
|
||||||
|
final = ExecuteQueryExpr(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP);
|
||||||
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
|
auto ans = final[i];
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
ASSERT_EQ(ans, false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto val = str_col[i];
|
||||||
|
auto ref = ref_func(val);
|
||||||
|
ASSERT_EQ(ans, ref)
|
||||||
|
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(StringExpr, BinaryRange) {
|
TEST(StringExpr, BinaryRange) {
|
||||||
auto schema = GenTestSchema();
|
auto schema = GenTestSchema();
|
||||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
@ -625,6 +1076,136 @@ TEST(StringExpr, BinaryRange) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(StringExpr, BinaryRangeNullable) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||||
|
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||||
|
schema->AddDebugField(
|
||||||
|
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||||
|
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
schema->set_primary_field_id(pk);
|
||||||
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||||
|
|
||||||
|
auto gen_binary_range_plan =
|
||||||
|
[&, fvec_meta, str_meta](
|
||||||
|
bool lb_inclusive,
|
||||||
|
bool ub_inclusive,
|
||||||
|
std::string lb,
|
||||||
|
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
|
||||||
|
auto column_info = test::GenColumnInfo(str_meta.get_id().get(),
|
||||||
|
proto::schema::DataType::VarChar,
|
||||||
|
false,
|
||||||
|
false);
|
||||||
|
auto binary_range_expr =
|
||||||
|
GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
|
||||||
|
binary_range_expr->set_allocated_column_info(column_info);
|
||||||
|
|
||||||
|
auto expr = test::GenExpr().release();
|
||||||
|
expr->set_allocated_binary_range_expr(binary_range_expr);
|
||||||
|
|
||||||
|
proto::plan::VectorType vector_type;
|
||||||
|
if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||||
|
vector_type = proto::plan::VectorType::FloatVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||||
|
vector_type = proto::plan::VectorType::BinaryVector;
|
||||||
|
} else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||||
|
vector_type = proto::plan::VectorType::Float16Vector;
|
||||||
|
}
|
||||||
|
auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0");
|
||||||
|
|
||||||
|
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
||||||
|
plan_node->set_allocated_vector_anns(anns);
|
||||||
|
return plan_node;
|
||||||
|
};
|
||||||
|
|
||||||
|
// bool lb_inclusive, bool ub_inclusive, std::string lb, std::string ub
|
||||||
|
std::vector<std::tuple<bool,
|
||||||
|
bool,
|
||||||
|
std::string,
|
||||||
|
std::string,
|
||||||
|
std::function<bool(std::string&)>>>
|
||||||
|
testcases{
|
||||||
|
{false,
|
||||||
|
false,
|
||||||
|
"2000",
|
||||||
|
"3000",
|
||||||
|
[](std::string& val) { return val > "2000" && val < "3000"; }},
|
||||||
|
{false,
|
||||||
|
true,
|
||||||
|
"2000",
|
||||||
|
"3000",
|
||||||
|
[](std::string& val) { return val > "2000" && val <= "3000"; }},
|
||||||
|
{true,
|
||||||
|
false,
|
||||||
|
"2000",
|
||||||
|
"3000",
|
||||||
|
[](std::string& val) { return val >= "2000" && val < "3000"; }},
|
||||||
|
{true,
|
||||||
|
true,
|
||||||
|
"2000",
|
||||||
|
"3000",
|
||||||
|
[](std::string& val) { return val >= "2000" && val <= "3000"; }},
|
||||||
|
{true,
|
||||||
|
true,
|
||||||
|
"2000",
|
||||||
|
"1000",
|
||||||
|
[](std::string& val) { return false; }},
|
||||||
|
};
|
||||||
|
|
||||||
|
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||||
|
int N = 1000;
|
||||||
|
std::vector<std::string> str_col;
|
||||||
|
FixedVector<bool> valid_data;
|
||||||
|
int num_iters = 100;
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
auto raw_data = DataGen(schema, N, iter);
|
||||||
|
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
||||||
|
auto begin = FIELD_DATA(new_str_col, string).begin();
|
||||||
|
auto end = FIELD_DATA(new_str_col, string).end();
|
||||||
|
str_col.insert(str_col.end(), begin, end);
|
||||||
|
auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id());
|
||||||
|
valid_data.insert(valid_data.end(),
|
||||||
|
new_str_valid_col.begin(),
|
||||||
|
new_str_valid_col.end());
|
||||||
|
seg->PreInsert(N);
|
||||||
|
seg->Insert(iter * N,
|
||||||
|
N,
|
||||||
|
raw_data.row_ids_.data(),
|
||||||
|
raw_data.timestamps_.data(),
|
||||||
|
raw_data.raw_);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||||
|
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] :
|
||||||
|
testcases) {
|
||||||
|
auto plan_proto =
|
||||||
|
gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
|
||||||
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
||||||
|
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||||
|
BitsetType final;
|
||||||
|
final = ExecuteQueryExpr(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0],
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP);
|
||||||
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
|
auto ans = final[i];
|
||||||
|
if (!valid_data[i]) {
|
||||||
|
ASSERT_EQ(ans, false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto val = str_col[i];
|
||||||
|
auto ref = ref_func(val);
|
||||||
|
ASSERT_EQ(ans, ref)
|
||||||
|
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
||||||
|
<< "@" << ub << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
|
TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
|
||||||
auto schema = GenStrPKSchema();
|
auto schema = GenStrPKSchema();
|
||||||
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
@ -718,7 +1299,7 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
|||||||
dataset.timestamps_.data(),
|
dataset.timestamps_.data(),
|
||||||
dataset.raw_);
|
dataset.raw_);
|
||||||
|
|
||||||
auto expr_proto = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
auto expr_proto = GenAlwaysTrueExprIfValid(fvec_meta, str_meta);
|
||||||
auto plan_proto = GenPlanNode();
|
auto plan_proto = GenPlanNode();
|
||||||
plan_proto->mutable_query()->set_allocated_predicates(expr_proto);
|
plan_proto->mutable_query()->set_allocated_predicates(expr_proto);
|
||||||
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
||||||
@ -733,4 +1314,47 @@ TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
|||||||
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
||||||
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(),
|
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(),
|
||||||
N);
|
N);
|
||||||
|
ASSERT_EQ(retrieved->fields_data(0).valid_data_size(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AlwaysTrueStringPlan, QueryWithOutputFieldsNullable) {
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
schema->AddDebugField("str", DataType::VARCHAR, true);
|
||||||
|
schema->AddDebugField("another_str", DataType::VARCHAR);
|
||||||
|
schema->AddDebugField(
|
||||||
|
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
||||||
|
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
schema->set_primary_field_id(pk);
|
||||||
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
||||||
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
||||||
|
|
||||||
|
auto N = 10000;
|
||||||
|
auto dataset = DataGen(schema, N);
|
||||||
|
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
||||||
|
auto str_col =
|
||||||
|
dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
||||||
|
auto valid_data = dataset.get_col_valid(str_meta.get_id());
|
||||||
|
auto segment = CreateGrowingSegment(schema, empty_index_meta);
|
||||||
|
segment->PreInsert(N);
|
||||||
|
segment->Insert(0,
|
||||||
|
N,
|
||||||
|
dataset.row_ids_.data(),
|
||||||
|
dataset.timestamps_.data(),
|
||||||
|
dataset.raw_);
|
||||||
|
|
||||||
|
auto expr_proto = GenAlwaysTrueExprIfValid(fvec_meta, str_meta);
|
||||||
|
auto plan_proto = GenPlanNode();
|
||||||
|
plan_proto->mutable_query()->set_allocated_predicates(expr_proto);
|
||||||
|
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
||||||
|
auto plan = ProtoParser(*schema).CreateRetrievePlan(*plan_proto);
|
||||||
|
|
||||||
|
Timestamp time = MAX_TIMESTAMP;
|
||||||
|
|
||||||
|
auto retrieved = segment->Retrieve(
|
||||||
|
nullptr, plan.get(), time, DEFAULT_MAX_OUTPUT_SIZE, false);
|
||||||
|
ASSERT_EQ(retrieved->offset().size(), N / 2);
|
||||||
|
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
||||||
|
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(),
|
||||||
|
N / 2);
|
||||||
|
ASSERT_EQ(retrieved->fields_data(0).valid_data().size(), N / 2);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -139,7 +139,9 @@ template <typename T>
|
|||||||
inline void
|
inline void
|
||||||
assert_reverse(ScalarIndex<T>* index, const std::vector<T>& arr) {
|
assert_reverse(ScalarIndex<T>* index, const std::vector<T>& arr) {
|
||||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||||
ASSERT_EQ(index->Reverse_Lookup(offset), arr[offset]);
|
auto raw = index->Reverse_Lookup(offset);
|
||||||
|
ASSERT_TRUE(raw.has_value());
|
||||||
|
ASSERT_EQ(raw.value(), arr[offset]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -147,7 +149,9 @@ template <>
|
|||||||
inline void
|
inline void
|
||||||
assert_reverse(ScalarIndex<float>* index, const std::vector<float>& arr) {
|
assert_reverse(ScalarIndex<float>* index, const std::vector<float>& arr) {
|
||||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||||
ASSERT_TRUE(compare_float(index->Reverse_Lookup(offset), arr[offset]));
|
auto raw = index->Reverse_Lookup(offset);
|
||||||
|
ASSERT_TRUE(raw.has_value());
|
||||||
|
ASSERT_TRUE(compare_float(raw.value(), arr[offset]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,7 +159,9 @@ template <>
|
|||||||
inline void
|
inline void
|
||||||
assert_reverse(ScalarIndex<double>* index, const std::vector<double>& arr) {
|
assert_reverse(ScalarIndex<double>* index, const std::vector<double>& arr) {
|
||||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||||
ASSERT_TRUE(compare_double(index->Reverse_Lookup(offset), arr[offset]));
|
auto raw = index->Reverse_Lookup(offset);
|
||||||
|
ASSERT_TRUE(raw.has_value());
|
||||||
|
ASSERT_TRUE(compare_double(raw.value(), arr[offset]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,7 +170,9 @@ inline void
|
|||||||
assert_reverse(ScalarIndex<std::string>* index,
|
assert_reverse(ScalarIndex<std::string>* index,
|
||||||
const std::vector<std::string>& arr) {
|
const std::vector<std::string>& arr) {
|
||||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||||
ASSERT_TRUE(arr[offset].compare(index->Reverse_Lookup(offset)) == 0);
|
auto raw = index->Reverse_Lookup(offset);
|
||||||
|
ASSERT_TRUE(raw.has_value());
|
||||||
|
ASSERT_TRUE(arr[offset].compare(raw.value()) == 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -667,8 +667,14 @@ DataGenForJsonArray(SchemaPtr schema,
|
|||||||
auto insert_data = std::make_unique<InsertRecordProto>();
|
auto insert_data = std::make_unique<InsertRecordProto>();
|
||||||
auto insert_cols = [&insert_data](
|
auto insert_cols = [&insert_data](
|
||||||
auto& data, int64_t count, auto& field_meta) {
|
auto& data, int64_t count, auto& field_meta) {
|
||||||
|
FixedVector<bool> valid_data(count);
|
||||||
|
if (field_meta.is_nullable()) {
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
valid_data[i] = i % 2 == 0 ? true : false;
|
||||||
|
}
|
||||||
|
}
|
||||||
auto array = milvus::segcore::CreateDataArrayFrom(
|
auto array = milvus::segcore::CreateDataArrayFrom(
|
||||||
data.data(), nullptr, count, field_meta);
|
data.data(), valid_data.data(), count, field_meta);
|
||||||
insert_data->mutable_fields_data()->AddAllocated(array.release());
|
insert_data->mutable_fields_data()->AddAllocated(array.release());
|
||||||
};
|
};
|
||||||
for (auto field_id : schema->get_field_ids()) {
|
for (auto field_id : schema->get_field_ids()) {
|
||||||
|
|||||||
@ -13019,7 +13019,6 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
|||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.tags(CaseLabel.GPU)
|
@pytest.mark.tags(CaseLabel.GPU)
|
||||||
@pytest.mark.skip(reason="issue #36184")
|
|
||||||
def test_search_after_different_index_with_params_none_default_data(self, varchar_scalar_index, numeric_scalar_index,
|
def test_search_after_different_index_with_params_none_default_data(self, varchar_scalar_index, numeric_scalar_index,
|
||||||
null_data_percent, _async):
|
null_data_percent, _async):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user