enhance: support iterative filter execution (#37363)

issue: #37360

---------

Signed-off-by: chasingegg <chao.gao@zilliz.com>
This commit is contained in:
Gao 2024-12-11 11:32:44 +08:00 committed by GitHub
parent a118ca14a7
commit 994fc544e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
67 changed files with 6411 additions and 1573 deletions

View File

@ -21,12 +21,28 @@ namespace milvus {
std::pair<std::vector<std::string_view>, FixedVector<bool>>
StringChunk::StringViews() {
std::vector<std::string_view> ret;
ret.reserve(row_nums_);
for (int i = 0; i < row_nums_; i++) {
ret.emplace_back(data_ + offsets_[i], offsets_[i + 1] - offsets_[i]);
}
return {ret, valid_};
}
std::pair<std::vector<std::string_view>, FixedVector<bool>>
StringChunk::ViewsByOffsets(const FixedVector<int32_t>& offsets) {
std::vector<std::string_view> ret;
FixedVector<bool> valid_res;
size_t size = offsets.size();
ret.reserve(size);
valid_res.reserve(size);
for (auto i = 0; i < size; ++i) {
ret.emplace_back(data_ + offsets_[offsets[i]],
offsets_[offsets[i] + 1] - offsets_[offsets[i]]);
valid_res.emplace_back(isValid(offsets[i]));
}
return {ret, valid_res};
}
void
ArrayChunk::ConstructViews() {
views_.reserve(row_nums_);

View File

@ -73,7 +73,10 @@ class Chunk {
virtual bool
isValid(int offset) {
return valid_[offset];
if (nullable_) {
return valid_[offset];
}
return true;
};
protected:
@ -170,6 +173,9 @@ class StringChunk : public Chunk {
return result;
}
std::pair<std::vector<std::string_view>, FixedVector<bool>>
ViewsByOffsets(const FixedVector<int32_t>& offsets);
const char*
ValueAt(int64_t idx) const override {
return (*this)[idx].data();

View File

@ -47,6 +47,8 @@ const char KMEANS_CLUSTER[] = "KMEANS";
const char VEC_OPT_FIELDS[] = "opt_fields";
const char PAGE_RETAIN_ORDER[] = "page_retain_order";
const char TEXT_LOG_ROOT_PATH[] = "text_log";
const char ITERATIVE_FILTER[] = "iterative_filter";
const char HINTS[] = "hints";
const char DEFAULT_PLANNODE_ID[] = "0";
const char DEAFULT_QUERY_ID[] = "0";

View File

@ -35,6 +35,7 @@ struct SearchInfo {
std::optional<FieldId> group_by_field_id_;
tracer::TraceContext trace_ctx_;
bool materialized_view_involved = false;
bool iterative_filter_execution = false;
};
using SearchInfoPtr = std::shared_ptr<SearchInfo>;

View File

@ -23,6 +23,7 @@
#include "exec/operator/CallbackSink.h"
#include "exec/operator/CountNode.h"
#include "exec/operator/FilterBitsNode.h"
#include "exec/operator/IterativeFilterNode.h"
#include "exec/operator/MvccNode.h"
#include "exec/operator/Operator.h"
#include "exec/operator/VectorSearchNode.h"
@ -52,11 +53,16 @@ DriverFactory::CreateDriver(std::unique_ptr<DriverContext> ctx,
for (size_t i = 0; i < plannodes_.size(); ++i) {
auto id = operators.size();
auto plannode = plannodes_[i];
if (auto filternode =
if (auto filterbitsnode =
std::dynamic_pointer_cast<const plan::FilterBitsNode>(
plannode)) {
operators.push_back(
std::make_unique<PhyFilterBitsNode>(id, ctx.get(), filternode));
operators.push_back(std::make_unique<PhyFilterBitsNode>(
id, ctx.get(), filterbitsnode));
} else if (auto filternode =
std::dynamic_pointer_cast<const plan::FilterNode>(
plannode)) {
operators.push_back(std::make_unique<PhyIterativeFilterNode>(
id, ctx.get(), filternode));
} else if (auto mvccnode =
std::dynamic_pointer_cast<const plan::MvccNode>(
plannode)) {

View File

@ -230,6 +230,11 @@ class QueryContext : public Context {
return search_info_;
}
knowhere::MetricType
get_metric_type() {
return search_info_.metric_type_;
}
const query::PlaceholderGroup*
get_placeholder_group() {
return placeholder_group_;

View File

@ -21,9 +21,13 @@ namespace exec {
void
PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_;
auto input = context.get_offset_input();
has_offset_input_ = (input != nullptr);
int64_t real_batch_size = (has_offset_input_)
? input->size()
: (current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_);
// always true no need to skip null
if (real_batch_size == 0) {

View File

@ -47,11 +47,14 @@ class PhyAlwaysTrueExpr : public Expr {
void
MoveCursor() override {
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_;
if (!has_offset_input_) {
int64_t real_batch_size =
current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_;
current_pos_ += real_batch_size;
current_pos_ += real_batch_size;
}
}
private:

View File

@ -88,7 +88,8 @@ struct ArithOpHelper<proto::plan::ArithOpType::Mod> {
template <typename T,
proto::plan::OpType cmp_op,
proto::plan::ArithOpType arith_op>
proto::plan::ArithOpType arith_op,
FilterType filter_type = FilterType::sequential>
struct ArithOpElementFunc {
typedef std::conditional_t<std::is_integral_v<T> &&
!std::is_same_v<bool, T>,
@ -100,145 +101,147 @@ struct ArithOpElementFunc {
size_t size,
HighPrecisonType val,
HighPrecisonType right_operand,
TargetBitmapView res) {
/*
TargetBitmapView res,
const int32_t* offsets = nullptr) {
// This is the original code, kept here for the documentation purposes
for (int i = 0; i < size; ++i) {
if constexpr (cmp_op == proto::plan::OpType::Equal) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[i] + right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[i] - right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[i] * right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[i] / right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[i], right_operand)) == val;
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported arith type:{} for ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::NotEqual) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[i] + right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[i] - right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[i] * right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[i] / right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[i], right_operand)) != val;
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported arith type:{} for ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::GreaterThan) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[i] + right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[i] - right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[i] * right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[i] / right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[i], right_operand)) > val;
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported arith type:{} for ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::GreaterEqual) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[i] + right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[i] - right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[i] * right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[i] / right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[i], right_operand)) >= val;
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported arith type:{} for ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::LessThan) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[i] + right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[i] - right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[i] * right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[i] / right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[i], right_operand)) < val;
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported arith type:{} for ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::LessEqual) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[i] + right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[i] - right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[i] * right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[i] / right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[i], right_operand)) <= val;
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported arith type:{} for ArithOpElementFunc",
arith_op));
// and also this code will be used for iterative filter since iterative filter does not execute as a batch manner
if constexpr (filter_type == FilterType::random) {
for (int i = 0; i < size; ++i) {
auto offset = (offsets) ? offsets[i] : i;
if constexpr (cmp_op == proto::plan::OpType::Equal) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[offset] + right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[offset] - right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[offset] * right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[offset] / right_operand) == val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[offset], right_operand)) == val;
} else {
PanicInfo(OpTypeInvalid,
fmt::format("unsupported arith type:{} for "
"ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::NotEqual) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[offset] + right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[offset] - right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[offset] * right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[offset] / right_operand) != val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[offset], right_operand)) != val;
} else {
PanicInfo(OpTypeInvalid,
fmt::format("unsupported arith type:{} for "
"ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op ==
proto::plan::OpType::GreaterThan) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[offset] + right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[offset] - right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[offset] * right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[offset] / right_operand) > val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[offset], right_operand)) > val;
} else {
PanicInfo(OpTypeInvalid,
fmt::format("unsupported arith type:{} for "
"ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op ==
proto::plan::OpType::GreaterEqual) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[offset] + right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[offset] - right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[offset] * right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[offset] / right_operand) >= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[offset], right_operand)) >= val;
} else {
PanicInfo(OpTypeInvalid,
fmt::format("unsupported arith type:{} for "
"ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::LessThan) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[offset] + right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[offset] - right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[offset] * right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[offset] / right_operand) < val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[offset], right_operand)) < val;
} else {
PanicInfo(OpTypeInvalid,
fmt::format("unsupported arith type:{} for "
"ArithOpElementFunc",
arith_op));
}
} else if constexpr (cmp_op == proto::plan::OpType::LessEqual) {
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
res[i] = (src[offset] + right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Sub) {
res[i] = (src[offset] - right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mul) {
res[i] = (src[offset] * right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Div) {
res[i] = (src[offset] / right_operand) <= val;
} else if constexpr (arith_op ==
proto::plan::ArithOpType::Mod) {
res[i] = (fmod(src[offset], right_operand)) <= val;
} else {
PanicInfo(OpTypeInvalid,
fmt::format("unsupported arith type:{} for "
"ArithOpElementFunc",
arith_op));
}
}
}
return;
}
*/
// more efficient SIMD version
if constexpr (!std::is_same_v<decltype(CmpOpHelper<cmp_op>::op),
void>) {
constexpr auto cmp_op_cvt = CmpOpHelper<cmp_op>::op;
@ -266,7 +269,8 @@ struct ArithOpElementFunc {
template <typename T,
proto::plan::OpType cmp_op,
proto::plan::ArithOpType arith_op>
proto::plan::ArithOpType arith_op,
FilterType filter_type>
struct ArithOpIndexFunc {
typedef std::conditional_t<std::is_integral_v<T> &&
!std::is_same_v<bool, T>,
@ -278,10 +282,15 @@ struct ArithOpIndexFunc {
operator()(Index* index,
size_t size,
HighPrecisonType val,
HighPrecisonType right_operand) {
HighPrecisonType right_operand,
const int32_t* offsets = nullptr) {
TargetBitmap res(size);
for (size_t i = 0; i < size; ++i) {
auto raw = index->Reverse_Lookup(i);
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
auto raw = index->Reverse_Lookup(offset);
if (!raw.has_value()) {
res[i] = false;
continue;
@ -449,23 +458,23 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
private:
template <typename T>
VectorPtr
ExecRangeVisitorImpl();
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
ExecRangeVisitorImplForIndex();
ExecRangeVisitorImplForIndex(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
ExecRangeVisitorImplForData();
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecRangeVisitorImplForJson();
ExecRangeVisitorImplForJson(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecRangeVisitorImplForArray();
ExecRangeVisitorImplForArray(OffsetVector* input = nullptr);
private:
std::shared_ptr<const milvus::expr::BinaryArithOpEvalRangeExpr> expr_;

View File

@ -24,33 +24,35 @@ namespace exec {
void
PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto input = context.get_offset_input();
SetHasOffsetInput((input != nullptr));
switch (expr_->column_.data_type_) {
case DataType::BOOL: {
result = ExecRangeVisitorImpl<bool>();
result = ExecRangeVisitorImpl<bool>(input);
break;
}
case DataType::INT8: {
result = ExecRangeVisitorImpl<int8_t>();
result = ExecRangeVisitorImpl<int8_t>(input);
break;
}
case DataType::INT16: {
result = ExecRangeVisitorImpl<int16_t>();
result = ExecRangeVisitorImpl<int16_t>(input);
break;
}
case DataType::INT32: {
result = ExecRangeVisitorImpl<int32_t>();
result = ExecRangeVisitorImpl<int32_t>(input);
break;
}
case DataType::INT64: {
result = ExecRangeVisitorImpl<int64_t>();
result = ExecRangeVisitorImpl<int64_t>(input);
break;
}
case DataType::FLOAT: {
result = ExecRangeVisitorImpl<float>();
result = ExecRangeVisitorImpl<float>(input);
break;
}
case DataType::DOUBLE: {
result = ExecRangeVisitorImpl<double>();
result = ExecRangeVisitorImpl<double>(input);
break;
}
case DataType::VARCHAR: {
@ -58,9 +60,9 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
!storage::MmapManager::GetInstance()
.GetMmapConfig()
.growing_enable_mmap) {
result = ExecRangeVisitorImpl<std::string>();
result = ExecRangeVisitorImpl<std::string>(input);
} else {
result = ExecRangeVisitorImpl<std::string_view>();
result = ExecRangeVisitorImpl<std::string_view>(input);
}
break;
}
@ -68,15 +70,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto value_type = expr_->lower_val_.val_case();
switch (value_type) {
case proto::plan::GenericValue::ValCase::kInt64Val: {
result = ExecRangeVisitorImplForJson<int64_t>();
result = ExecRangeVisitorImplForJson<int64_t>(input);
break;
}
case proto::plan::GenericValue::ValCase::kFloatVal: {
result = ExecRangeVisitorImplForJson<double>();
result = ExecRangeVisitorImplForJson<double>(input);
break;
}
case proto::plan::GenericValue::ValCase::kStringVal: {
result = ExecRangeVisitorImplForJson<std::string>();
result = ExecRangeVisitorImplForJson<std::string>(input);
break;
}
default: {
@ -93,17 +95,17 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
switch (value_type) {
case proto::plan::GenericValue::ValCase::kInt64Val: {
SetNotUseIndex();
result = ExecRangeVisitorImplForArray<int64_t>();
result = ExecRangeVisitorImplForArray<int64_t>(input);
break;
}
case proto::plan::GenericValue::ValCase::kFloatVal: {
SetNotUseIndex();
result = ExecRangeVisitorImplForArray<double>();
result = ExecRangeVisitorImplForArray<double>(input);
break;
}
case proto::plan::GenericValue::ValCase::kStringVal: {
SetNotUseIndex();
result = ExecRangeVisitorImplForArray<std::string>();
result = ExecRangeVisitorImplForArray<std::string>(input);
break;
}
default: {
@ -124,11 +126,11 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
template <typename T>
VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl() {
if (is_index_mode_) {
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
if (is_index_mode_ && !has_offset_input_) {
return ExecRangeVisitorImplForIndex<T>();
} else {
return ExecRangeVisitorImplForData<T>();
return ExecRangeVisitorImplForData<T>(input);
}
}
@ -137,17 +139,28 @@ ColumnVectorPtr
PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
HighPrecisionType& val2,
bool& lower_inclusive,
bool& upper_inclusive) {
bool& upper_inclusive,
OffsetVector* input) {
lower_inclusive = expr_->lower_inclusive_;
upper_inclusive = expr_->upper_inclusive_;
val1 = GetValueFromProto<HighPrecisionType>(expr_->lower_val_);
val2 = GetValueFromProto<HighPrecisionType>(expr_->upper_val_);
auto get_next_overflow_batch = [this]() -> ColumnVectorPtr {
int64_t batch_size = overflow_check_pos_ + batch_size_ >= active_count_
? active_count_ - overflow_check_pos_
: batch_size_;
overflow_check_pos_ += batch_size;
auto valid_res = ProcessChunksForValid<T>(is_index_mode_);
auto get_next_overflow_batch =
[this](OffsetVector* input) -> ColumnVectorPtr {
int64_t batch_size;
if (input != nullptr) {
batch_size = input->size();
} else {
batch_size = overflow_check_pos_ + batch_size_ >= active_count_
? active_count_ - overflow_check_pos_
: batch_size_;
overflow_check_pos_ += batch_size;
}
auto valid_res =
(input != nullptr)
? ProcessChunksForValidByOffsets<T>(is_index_mode_, *input)
: ProcessChunksForValid<T>(is_index_mode_);
auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
std::move(valid_res));
return res_vec;
@ -155,7 +168,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
if constexpr (std::is_integral_v<T> && !std::is_same_v<bool, T>) {
if (milvus::query::gt_ub<T>(val1)) {
return get_next_overflow_batch();
return get_next_overflow_batch(input);
} else if (milvus::query::lt_lb<T>(val1)) {
val1 = std::numeric_limits<T>::min();
lower_inclusive = true;
@ -165,7 +178,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
val2 = std::numeric_limits<T>::max();
upper_inclusive = true;
} else if (milvus::query::lt_lb<T>(val2)) {
return get_next_overflow_batch();
return get_next_overflow_batch(input);
}
}
return nullptr;
@ -216,7 +229,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
template <typename T>
VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
@ -226,57 +239,67 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
int64_t,
IndexInnerType>
HighPrecisionType;
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
HighPrecisionType val1;
HighPrecisionType val2;
bool lower_inclusive = false;
bool upper_inclusive = false;
if (auto res =
PreCheckOverflow<T>(val1, val2, lower_inclusive, upper_inclusive)) {
if (auto res = PreCheckOverflow<T>(
val1, val2, lower_inclusive, upper_inclusive, input)) {
return res;
}
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
const T* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
HighPrecisionType val1,
HighPrecisionType val2) {
auto execute_sub_batch =
[ lower_inclusive,
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
const T* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
HighPrecisionType val1,
HighPrecisionType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, true, true> func;
func(val1, val2, data, size, res);
BinaryRangeElementFunc<T, true, true, filter_type> func;
func(val1, val2, data, size, res, offsets);
} else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFunc<T, true, false> func;
func(val1, val2, data, size, res);
BinaryRangeElementFunc<T, true, false, filter_type> func;
func(val1, val2, data, size, res, offsets);
} else if (!lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, false, true> func;
func(val1, val2, data, size, res);
BinaryRangeElementFunc<T, false, true, filter_type> func;
func(val1, val2, data, size, res, offsets);
} else {
BinaryRangeElementFunc<T, false, false> func;
func(val1, val2, data, size, res);
BinaryRangeElementFunc<T, false, false, filter_type> func;
func(val1, val2, data, size, res, offsets);
}
// there is a batch operation in BinaryRangeElementFunc,
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
// but to mask res with valid_data after the batch operation.
if (valid_data != nullptr) {
for (int i = 0; i < size; i++) {
if (!valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (!valid_data[offset]) {
res[i] = valid_res[i] = false;
}
}
}
};
auto skip_index_func =
[val1, val2, lower_inclusive, upper_inclusive](
const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) {
@ -294,8 +317,19 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
field_id, chunk_id, val1, val2, false, false);
}
};
int64_t processed_size = ProcessDataChunks<T>(
execute_sub_batch, skip_index_func, res, valid_res, val1, val2);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<T>(execute_sub_batch,
skip_index_func,
input,
res,
valid_res,
val1,
val2);
} else {
processed_size = ProcessDataChunks<T>(
execute_sub_batch, skip_index_func, res, valid_res, val1, val2);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -306,11 +340,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
template <typename ValueType>
VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -326,30 +361,81 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer](
const milvus::Json* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2) {
auto execute_sub_batch =
[ lower_inclusive, upper_inclusive,
pointer ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, true, true> func;
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForJson<ValueType, true, true, filter_type>
func;
func(val1,
val2,
pointer,
data,
valid_data,
size,
res,
valid_res,
offsets);
} else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, true, false> func;
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForJson<ValueType, true, false, filter_type>
func;
func(val1,
val2,
pointer,
data,
valid_data,
size,
res,
valid_res,
offsets);
} else if (!lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, false, true> func;
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForJson<ValueType, false, true, filter_type>
func;
func(val1,
val2,
pointer,
data,
valid_data,
size,
res,
valid_res,
offsets);
} else {
BinaryRangeElementFuncForJson<ValueType, false, false> func;
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForJson<ValueType, false, false, filter_type>
func;
func(val1,
val2,
pointer,
data,
valid_data,
size,
res,
valid_res,
offsets);
}
};
int64_t processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<milvus::Json>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
val1,
val2);
} else {
processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -360,11 +446,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
template <typename ValueType>
VectorPtr
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -383,31 +470,90 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
index = std::stoi(expr_->column_.nested_path_[0]);
}
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
const milvus::ArrayView* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2,
int index) {
auto execute_sub_batch =
[ lower_inclusive,
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
const milvus::ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2,
int index) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, true, true> func;
func(val1, val2, index, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForArray<ValueType, true, true, filter_type>
func;
func(val1,
val2,
index,
data,
valid_data,
size,
res,
valid_res,
offsets);
} else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, true, false> func;
func(val1, val2, index, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForArray<ValueType, true, false, filter_type>
func;
func(val1,
val2,
index,
data,
valid_data,
size,
res,
valid_res,
offsets);
} else if (!lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, false, true> func;
func(val1, val2, index, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForArray<ValueType, false, true, filter_type>
func;
func(val1,
val2,
index,
data,
valid_data,
size,
res,
valid_res,
offsets);
} else {
BinaryRangeElementFuncForArray<ValueType, false, false> func;
func(val1, val2, index, data, valid_data, size, res, valid_res);
BinaryRangeElementFuncForArray<ValueType, false, false, filter_type>
func;
func(val1,
val2,
index,
data,
valid_data,
size,
res,
valid_res,
offsets);
}
};
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index);
int64_t processed_size;
if (has_offset_input_) {
processed_size =
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
val1,
val2,
index);
} else {
processed_size = ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
val1,
val2,
index);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",

View File

@ -27,7 +27,10 @@
namespace milvus {
namespace exec {
template <typename T, bool lower_inclusive, bool upper_inclusive>
template <typename T,
bool lower_inclusive,
bool upper_inclusive,
FilterType filter_type = FilterType::sequential>
struct BinaryRangeElementFunc {
typedef std::conditional_t<std::is_integral_v<T> &&
!std::is_same_v<bool, T>,
@ -35,7 +38,28 @@ struct BinaryRangeElementFunc {
T>
HighPrecisionType;
void
operator()(T val1, T val2, const T* src, size_t n, TargetBitmapView res) {
operator()(T val1,
T val2,
const T* src,
size_t n,
TargetBitmapView res,
const int32_t* offsets = nullptr) {
if constexpr (filter_type == FilterType::random) {
for (size_t i = 0; i < n; ++i) {
auto offset = (offsets) ? offsets[i] : i;
if constexpr (lower_inclusive && upper_inclusive) {
res[i] = val1 <= src[offset] && src[offset] <= val2;
} else if constexpr (lower_inclusive && !upper_inclusive) {
res[i] = val1 <= src[offset] && src[offset] < val2;
} else if constexpr (!lower_inclusive && upper_inclusive) {
res[i] = val1 < src[offset] && src[offset] <= val2;
} else {
res[i] = val1 < src[offset] && src[offset] < val2;
}
}
return;
}
if constexpr (lower_inclusive && upper_inclusive) {
res.inplace_within_range_val<T, milvus::bitset::RangeType::IncInc>(
val1, val2, src, n);
@ -52,30 +76,33 @@ struct BinaryRangeElementFunc {
}
};
#define BinaryRangeJSONCompare(cmp) \
do { \
if (valid_data != nullptr && !valid_data[i]) { \
res[i] = valid_res[i] = false; \
break; \
} \
auto x = src[i].template at<GetType>(pointer); \
if (x.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \
auto x = src[i].template at<double>(pointer); \
if (!x.error()) { \
auto value = x.value(); \
res[i] = (cmp); \
break; \
} \
} \
res[i] = false; \
break; \
} \
auto value = x.value(); \
res[i] = (cmp); \
#define BinaryRangeJSONCompare(cmp) \
do { \
if (valid_data != nullptr && !valid_data[offset]) { \
res[i] = valid_res[i] = false; \
break; \
} \
auto x = src[offset].template at<GetType>(pointer); \
if (x.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \
auto x = src[offset].template at<double>(pointer); \
if (!x.error()) { \
auto value = x.value(); \
res[i] = (cmp); \
break; \
} \
} \
res[i] = false; \
break; \
} \
auto value = x.value(); \
res[i] = (cmp); \
} while (false)
template <typename ValueType, bool lower_inclusive, bool upper_inclusive>
template <typename ValueType,
bool lower_inclusive,
bool upper_inclusive,
FilterType filter_type = FilterType::sequential>
struct BinaryRangeElementFuncForJson {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
@ -88,8 +115,13 @@ struct BinaryRangeElementFuncForJson {
const bool* valid_data,
size_t n,
TargetBitmapView res,
TargetBitmapView valid_res) {
TargetBitmapView valid_res,
const int32_t* offsets = nullptr) {
for (size_t i = 0; i < n; ++i) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if constexpr (lower_inclusive && upper_inclusive) {
BinaryRangeJSONCompare(val1 <= value && value <= val2);
} else if constexpr (lower_inclusive && !upper_inclusive) {
@ -103,7 +135,10 @@ struct BinaryRangeElementFuncForJson {
}
};
template <typename ValueType, bool lower_inclusive, bool upper_inclusive>
template <typename ValueType,
bool lower_inclusive,
bool upper_inclusive,
FilterType filter_type = FilterType::sequential>
struct BinaryRangeElementFuncForArray {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
@ -116,39 +151,44 @@ struct BinaryRangeElementFuncForArray {
const bool* valid_data,
size_t n,
TargetBitmapView res,
TargetBitmapView valid_res) {
TargetBitmapView valid_res,
const int32_t* offsets = nullptr) {
for (size_t i = 0; i < n; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
size_t offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
if constexpr (lower_inclusive && upper_inclusive) {
if (index >= src[i].length()) {
if (index >= src[offset].length()) {
res[i] = false;
continue;
}
auto value = src[i].get_data<GetType>(index);
auto value = src[offset].get_data<GetType>(index);
res[i] = val1 <= value && value <= val2;
} else if constexpr (lower_inclusive && !upper_inclusive) {
if (index >= src[i].length()) {
if (index >= src[offset].length()) {
res[i] = false;
continue;
}
auto value = src[i].get_data<GetType>(index);
auto value = src[offset].get_data<GetType>(index);
res[i] = val1 <= value && value < val2;
} else if constexpr (!lower_inclusive && upper_inclusive) {
if (index >= src[i].length()) {
if (index >= src[offset].length()) {
res[i] = false;
continue;
}
auto value = src[i].get_data<GetType>(index);
auto value = src[offset].get_data<GetType>(index);
res[i] = val1 < value && value <= val2;
} else {
if (index >= src[i].length()) {
if (index >= src[offset].length()) {
res[i] = false;
continue;
}
auto value = src[i].get_data<GetType>(index);
auto value = src[offset].get_data<GetType>(index);
res[i] = val1 < value && value < val2;
}
}
@ -211,11 +251,12 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
PreCheckOverflow(HighPrecisionType& val1,
HighPrecisionType& val2,
bool& lower_inclusive,
bool& upper_inclusive);
bool& upper_inclusive,
OffsetVector* input = nullptr);
template <typename T>
VectorPtr
ExecRangeVisitorImpl();
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
@ -223,15 +264,15 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
template <typename T>
VectorPtr
ExecRangeVisitorImplForData();
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecRangeVisitorImplForJson();
ExecRangeVisitorImplForJson(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecRangeVisitorImplForArray();
ExecRangeVisitorImplForArray(OffsetVector* input = nullptr);
private:
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;

View File

@ -28,6 +28,8 @@ namespace exec {
void
PhyCallExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto offset_input = context.get_offset_input();
SetHasOffsetInput(offset_input != nullptr);
AssertInfo(inputs_.size() == expr_->inputs().size(),
"logical call expr needs {} inputs, but {} inputs are provided",
expr_->inputs().size(),

View File

@ -61,8 +61,10 @@ class PhyCallExpr : public Expr {
void
MoveCursor() override {
for (auto input : inputs_) {
input->MoveCursor();
if (!has_offset_input_) {
for (auto input : inputs_) {
input->MoveCursor();
}
}
}

View File

@ -30,30 +30,32 @@ PhyColumnExpr::GetNextBatchSize() {
void
PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto input = context.get_offset_input();
SetHasOffsetInput(input != nullptr);
switch (this->expr_->type()) {
case DataType::BOOL:
result = DoEval<bool>();
result = DoEval<bool>(input);
break;
case DataType::INT8:
result = DoEval<int8_t>();
result = DoEval<int8_t>(input);
break;
case DataType::INT16:
result = DoEval<int16_t>();
result = DoEval<int16_t>(input);
break;
case DataType::INT32:
result = DoEval<int32_t>();
result = DoEval<int32_t>(input);
break;
case DataType::INT64:
result = DoEval<int64_t>();
result = DoEval<int64_t>(input);
break;
case DataType::FLOAT:
result = DoEval<float>();
result = DoEval<float>(input);
break;
case DataType::DOUBLE:
result = DoEval<double>();
result = DoEval<double>(input);
break;
case DataType::VARCHAR: {
result = DoEval<std::string>();
result = DoEval<std::string>(input);
break;
}
default:
@ -65,8 +67,59 @@ PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) {
template <typename T>
VectorPtr
PhyColumnExpr::DoEval() {
PhyColumnExpr::DoEval(OffsetVector* input) {
// similar to PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op)
// take offsets as input
if (has_offset_input_) {
auto real_batch_size = input->size();
if (real_batch_size == 0) {
return nullptr;
}
auto res_vec = std::make_shared<ColumnVector>(
expr_->GetColumn().data_type_, real_batch_size);
T* res_value = res_vec->RawAsValues<T>();
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
expr_->GetColumn().field_id_);
int64_t processed_rows = 0;
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
for (auto i = 0; i < real_batch_size; ++i) {
auto offset = (*input)[i];
auto [chunk_id,
chunk_offset] = [&]() -> std::pair<int64_t, int64_t> {
if (segment_chunk_reader_.segment_->type() ==
SegmentType::Growing) {
return {offset / size_per_chunk, offset % size_per_chunk};
} else if (segment_chunk_reader_.segment_->is_chunked() &&
data_barrier > 0) {
return segment_chunk_reader_.segment_->get_chunk_by_offset(
expr_->GetColumn().field_id_, offset);
} else {
return {0, offset};
}
}();
auto chunk_data = segment_chunk_reader_.GetChunkDataAccessor(
expr_->GetColumn().data_type_,
expr_->GetColumn().field_id_,
chunk_id,
data_barrier);
auto chunk_data_by_offset = chunk_data(chunk_offset);
if (!chunk_data_by_offset.has_value()) {
valid_res[processed_rows] = false;
} else {
res_value[processed_rows] =
boost::get<T>(chunk_data_by_offset.value());
}
processed_rows++;
}
return res_vec;
}
// normal path
if (segment_chunk_reader_.segment_->is_chunked()) {
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {

View File

@ -67,16 +67,21 @@ class PhyColumnExpr : public Expr {
void
MoveCursor() override {
if (segment_chunk_reader_.segment_->is_chunked()) {
segment_chunk_reader_.MoveCursorForMultipleChunk(
current_chunk_id_,
current_chunk_pos_,
expr_->GetColumn().field_id_,
num_chunk_,
batch_size_);
} else {
segment_chunk_reader_.MoveCursorForSingleChunk(
current_chunk_id_, current_chunk_pos_, num_chunk_, batch_size_);
if (!has_offset_input_) {
if (segment_chunk_reader_.segment_->is_chunked()) {
segment_chunk_reader_.MoveCursorForMultipleChunk(
current_chunk_id_,
current_chunk_pos_,
expr_->GetColumn().field_id_,
num_chunk_,
batch_size_);
} else {
segment_chunk_reader_.MoveCursorForSingleChunk(
current_chunk_id_,
current_chunk_pos_,
num_chunk_,
batch_size_);
}
}
}
@ -107,7 +112,7 @@ class PhyColumnExpr : public Expr {
template <typename T>
VectorPtr
DoEval();
DoEval(OffsetVector* input = nullptr);
private:
bool is_indexed_;

View File

@ -38,7 +38,77 @@ PhyCompareFilterExpr::GetNextBatchSize() {
template <typename OpType>
VectorPtr
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op,
OffsetVector* input) {
// take offsets as input
if (has_offset_input_) {
auto real_batch_size = input->size();
if (real_batch_size == 0) {
return nullptr;
}
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
expr_->left_field_id_);
auto right_data_barrier =
segment_chunk_reader_.segment_->num_chunk_data(
expr_->right_field_id_);
int64_t processed_rows = 0;
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
for (auto i = 0; i < real_batch_size; ++i) {
auto offset = (*input)[i];
auto get_chunk_id_and_offset =
[&](const FieldId field,
const int64_t data_barrier) -> std::pair<int64_t, int64_t> {
if (segment_chunk_reader_.segment_->type() ==
SegmentType::Growing) {
return {offset / size_per_chunk, offset % size_per_chunk};
} else if (segment_chunk_reader_.segment_->is_chunked() &&
data_barrier > 0) {
return segment_chunk_reader_.segment_->get_chunk_by_offset(
field, offset);
} else {
return {0, offset};
}
};
auto [left_chunk_id, left_chunk_offset] =
get_chunk_id_and_offset(left_field_, left_data_barrier);
auto [right_chunk_id, right_chunk_offset] =
get_chunk_id_and_offset(right_field_, right_data_barrier);
auto left = segment_chunk_reader_.GetChunkDataAccessor(
expr_->left_data_type_,
expr_->left_field_id_,
left_chunk_id,
left_data_barrier);
auto right = segment_chunk_reader_.GetChunkDataAccessor(
expr_->right_data_type_,
expr_->right_field_id_,
right_chunk_id,
right_data_barrier);
auto left_opt = left(left_chunk_offset);
auto right_opt = right(right_chunk_offset);
if (!left_opt.has_value() || !right_opt.has_value()) {
res[processed_rows] = false;
valid_res[processed_rows] = false;
} else {
res[processed_rows] = boost::apply_visitor(
milvus::query::Relational<decltype(op)>{},
left_opt.value(),
right_opt.value());
}
processed_rows++;
}
return res_vec;
}
// normal path
if (segment_chunk_reader_.segment_->is_chunked()) {
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
@ -140,39 +210,42 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
void
PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto input = context.get_offset_input();
SetHasOffsetInput((input != nullptr));
// For segment both fields has no index, can use SIMD to speed up.
// Avoiding too much call stack that blocks SIMD.
if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) {
result = ExecCompareExprDispatcherForBothDataSegment();
result = ExecCompareExprDispatcherForBothDataSegment(input);
return;
}
result = ExecCompareExprDispatcherForHybridSegment();
result = ExecCompareExprDispatcherForHybridSegment(input);
}
VectorPtr
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() {
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
OffsetVector* input) {
switch (expr_->op_type_) {
case OpType::Equal: {
return ExecCompareExprDispatcher(std::equal_to<>{});
return ExecCompareExprDispatcher(std::equal_to<>{}, input);
}
case OpType::NotEqual: {
return ExecCompareExprDispatcher(std::not_equal_to<>{});
return ExecCompareExprDispatcher(std::not_equal_to<>{}, input);
}
case OpType::GreaterEqual: {
return ExecCompareExprDispatcher(std::greater_equal<>{});
return ExecCompareExprDispatcher(std::greater_equal<>{}, input);
}
case OpType::GreaterThan: {
return ExecCompareExprDispatcher(std::greater<>{});
return ExecCompareExprDispatcher(std::greater<>{}, input);
}
case OpType::LessEqual: {
return ExecCompareExprDispatcher(std::less_equal<>{});
return ExecCompareExprDispatcher(std::less_equal<>{}, input);
}
case OpType::LessThan: {
return ExecCompareExprDispatcher(std::less<>{});
return ExecCompareExprDispatcher(std::less<>{}, input);
}
case OpType::PrefixMatch: {
return ExecCompareExprDispatcher(
milvus::query::MatchOp<OpType::PrefixMatch>{});
milvus::query::MatchOp<OpType::PrefixMatch>{}, input);
}
// case OpType::PostfixMatch: {
// }
@ -183,22 +256,23 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() {
}
VectorPtr
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() {
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
OffsetVector* input) {
switch (expr_->left_data_type_) {
case DataType::BOOL:
return ExecCompareLeftType<bool>();
return ExecCompareLeftType<bool>(input);
case DataType::INT8:
return ExecCompareLeftType<int8_t>();
return ExecCompareLeftType<int8_t>(input);
case DataType::INT16:
return ExecCompareLeftType<int16_t>();
return ExecCompareLeftType<int16_t>(input);
case DataType::INT32:
return ExecCompareLeftType<int32_t>();
return ExecCompareLeftType<int32_t>(input);
case DataType::INT64:
return ExecCompareLeftType<int64_t>();
return ExecCompareLeftType<int64_t>(input);
case DataType::FLOAT:
return ExecCompareLeftType<float>();
return ExecCompareLeftType<float>(input);
case DataType::DOUBLE:
return ExecCompareLeftType<double>();
return ExecCompareLeftType<double>(input);
default:
PanicInfo(
DataTypeInvalid,
@ -209,22 +283,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() {
template <typename T>
VectorPtr
PhyCompareFilterExpr::ExecCompareLeftType() {
PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) {
switch (expr_->right_data_type_) {
case DataType::BOOL:
return ExecCompareRightType<T, bool>();
return ExecCompareRightType<T, bool>(input);
case DataType::INT8:
return ExecCompareRightType<T, int8_t>();
return ExecCompareRightType<T, int8_t>(input);
case DataType::INT16:
return ExecCompareRightType<T, int16_t>();
return ExecCompareRightType<T, int16_t>(input);
case DataType::INT32:
return ExecCompareRightType<T, int32_t>();
return ExecCompareRightType<T, int32_t>(input);
case DataType::INT64:
return ExecCompareRightType<T, int64_t>();
return ExecCompareRightType<T, int64_t>(input);
case DataType::FLOAT:
return ExecCompareRightType<T, float>();
return ExecCompareRightType<T, float>(input);
case DataType::DOUBLE:
return ExecCompareRightType<T, double>();
return ExecCompareRightType<T, double>(input);
default:
PanicInfo(
DataTypeInvalid,
@ -235,8 +309,9 @@ PhyCompareFilterExpr::ExecCompareLeftType() {
template <typename T, typename U>
VectorPtr
PhyCompareFilterExpr::ExecCompareRightType() {
auto real_batch_size = GetNextBatchSize();
PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -248,39 +323,47 @@ PhyCompareFilterExpr::ExecCompareRightType() {
valid_res.set();
auto expr_type = expr_->op_type_;
auto execute_sub_batch = [expr_type](const T* left,
const U* right,
const int size,
TargetBitmapView res) {
auto execute_sub_batch = [expr_type]<FilterType filter_type =
FilterType::sequential>(
const T* left,
const U* right,
const int32_t* offsets,
const int size,
TargetBitmapView res) {
switch (expr_type) {
case proto::plan::GreaterThan: {
CompareElementFunc<T, U, proto::plan::GreaterThan> func;
func(left, right, size, res);
CompareElementFunc<T, U, proto::plan::GreaterThan, filter_type>
func;
func(left, right, size, res, offsets);
break;
}
case proto::plan::GreaterEqual: {
CompareElementFunc<T, U, proto::plan::GreaterEqual> func;
func(left, right, size, res);
CompareElementFunc<T, U, proto::plan::GreaterEqual, filter_type>
func;
func(left, right, size, res, offsets);
break;
}
case proto::plan::LessThan: {
CompareElementFunc<T, U, proto::plan::LessThan> func;
func(left, right, size, res);
CompareElementFunc<T, U, proto::plan::LessThan, filter_type>
func;
func(left, right, size, res, offsets);
break;
}
case proto::plan::LessEqual: {
CompareElementFunc<T, U, proto::plan::LessEqual> func;
func(left, right, size, res);
CompareElementFunc<T, U, proto::plan::LessEqual, filter_type>
func;
func(left, right, size, res, offsets);
break;
}
case proto::plan::Equal: {
CompareElementFunc<T, U, proto::plan::Equal> func;
func(left, right, size, res);
CompareElementFunc<T, U, proto::plan::Equal, filter_type> func;
func(left, right, size, res, offsets);
break;
}
case proto::plan::NotEqual: {
CompareElementFunc<T, U, proto::plan::NotEqual> func;
func(left, right, size, res);
CompareElementFunc<T, U, proto::plan::NotEqual, filter_type>
func;
func(left, right, size, res, offsets);
break;
}
default:
@ -290,8 +373,14 @@ PhyCompareFilterExpr::ExecCompareRightType() {
expr_type));
}
};
int64_t processed_size =
ProcessBothDataChunks<T, U>(execute_sub_batch, res, valid_res);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessBothDataByOffsets<T, U>(
execute_sub_batch, input, res, valid_res);
} else {
processed_size = ProcessBothDataChunks<T, U>(
execute_sub_batch, input, res, valid_res);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",

View File

@ -30,36 +30,44 @@
namespace milvus {
namespace exec {
template <typename T, typename U, proto::plan::OpType op>
template <typename T,
typename U,
proto::plan::OpType op,
FilterType filter_type>
struct CompareElementFunc {
void
operator()(const T* left,
const U* right,
size_t size,
TargetBitmapView res) {
/*
TargetBitmapView res,
const int32_t* offsets = nullptr) {
// This is the original code, kept here for the documentation purposes
for (int i = 0; i < size; ++i) {
if constexpr (op == proto::plan::OpType::Equal) {
res[i] = left[i] == right[i];
} else if constexpr (op == proto::plan::OpType::NotEqual) {
res[i] = left[i] != right[i];
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
res[i] = left[i] > right[i];
} else if constexpr (op == proto::plan::OpType::LessThan) {
res[i] = left[i] < right[i];
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
res[i] = left[i] >= right[i];
} else if constexpr (op == proto::plan::OpType::LessEqual) {
res[i] = left[i] <= right[i];
} else {
PanicInfo(
OpTypeInvalid,
fmt::format("unsupported op_type:{} for CompareElementFunc",
op));
// also, used for iterative filter
if constexpr (filter_type == FilterType::random) {
for (int i = 0; i < size; ++i) {
auto offset = (offsets != nullptr) ? offsets[i] : i;
if constexpr (op == proto::plan::OpType::Equal) {
res[i] = left[offset] == right[offset];
} else if constexpr (op == proto::plan::OpType::NotEqual) {
res[i] = left[offset] != right[offset];
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
res[i] = left[offset] > right[offset];
} else if constexpr (op == proto::plan::OpType::LessThan) {
res[i] = left[offset] < right[offset];
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
res[i] = left[offset] >= right[offset];
} else if constexpr (op == proto::plan::OpType::LessEqual) {
res[i] = left[offset] <= right[offset];
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported op_type:{} for CompareElementFunc",
op));
}
}
return;
}
*/
if constexpr (op == proto::plan::OpType::Equal) {
res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>(
@ -138,22 +146,27 @@ class PhyCompareFilterExpr : public Expr {
void
MoveCursor() override {
if (segment_chunk_reader_.segment_->is_chunked()) {
segment_chunk_reader_.MoveCursorForMultipleChunk(
left_current_chunk_id_,
left_current_chunk_pos_,
left_field_,
left_num_chunk_,
batch_size_);
segment_chunk_reader_.MoveCursorForMultipleChunk(
right_current_chunk_id_,
right_current_chunk_pos_,
right_field_,
right_num_chunk_,
batch_size_);
} else {
segment_chunk_reader_.MoveCursorForSingleChunk(
current_chunk_id_, current_chunk_pos_, num_chunk_, batch_size_);
if (!has_offset_input_) {
if (segment_chunk_reader_.segment_->is_chunked()) {
segment_chunk_reader_.MoveCursorForMultipleChunk(
left_current_chunk_id_,
left_current_chunk_pos_,
left_field_,
left_num_chunk_,
batch_size_);
segment_chunk_reader_.MoveCursorForMultipleChunk(
right_current_chunk_id_,
right_current_chunk_pos_,
right_field_,
right_num_chunk_,
batch_size_);
} else {
segment_chunk_reader_.MoveCursorForSingleChunk(
current_chunk_id_,
current_chunk_pos_,
num_chunk_,
batch_size_);
}
}
}
@ -188,6 +201,7 @@ class PhyCompareFilterExpr : public Expr {
template <typename T, typename U, typename FUNC, typename... ValTypes>
int64_t
ProcessBothDataChunks(FUNC func,
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
@ -203,6 +217,97 @@ class PhyCompareFilterExpr : public Expr {
}
}
template <typename T, typename U, typename FUNC, typename... ValTypes>
int64_t
ProcessBothDataByOffsets(FUNC func,
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
int64_t size = input->size();
int64_t processed_size = 0;
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
if (segment_chunk_reader_.segment_->is_chunked() ||
segment_chunk_reader_.segment_->type() == SegmentType::Growing) {
for (auto i = 0; i < size; ++i) {
auto offset = (*input)[i];
auto get_chunk_id_and_offset =
[&](const FieldId field) -> std::pair<int64_t, int64_t> {
if (segment_chunk_reader_.segment_->type() ==
SegmentType::Growing) {
auto size_per_chunk =
segment_chunk_reader_.SizePerChunk();
return {offset / size_per_chunk,
offset % size_per_chunk};
} else {
return segment_chunk_reader_.segment_
->get_chunk_by_offset(field, offset);
}
};
auto [left_chunk_id, left_chunk_offset] =
get_chunk_id_and_offset(left_field_);
auto [right_chunk_id, right_chunk_offset] =
get_chunk_id_and_offset(right_field_);
auto left_chunk = segment_chunk_reader_.segment_->chunk_data<T>(
left_field_, left_chunk_id);
auto right_chunk =
segment_chunk_reader_.segment_->chunk_data<U>(
right_field_, right_chunk_id);
const T* left_data = left_chunk.data() + left_chunk_offset;
const U* right_data = right_chunk.data() + right_chunk_offset;
func.template operator()<FilterType::random>(
left_data,
right_data,
nullptr,
1,
res + processed_size,
values...);
const bool* left_valid_data = left_chunk.valid_data();
const bool* right_valid_data = right_chunk.valid_data();
// mask with valid_data
if (left_valid_data && !left_valid_data[left_chunk_offset]) {
res[processed_size] = false;
valid_res[processed_size] = false;
continue;
}
if (right_valid_data && !right_valid_data[right_chunk_offset]) {
res[processed_size] = false;
valid_res[processed_size] = false;
}
processed_size++;
}
return processed_size;
} else {
auto left_chunk =
segment_chunk_reader_.segment_->chunk_data<T>(left_field_, 0);
auto right_chunk =
segment_chunk_reader_.segment_->chunk_data<U>(right_field_, 0);
const T* left_data = left_chunk.data();
const U* right_data = right_chunk.data();
func.template operator()<FilterType::random>(
left_data, right_data, input->data(), size, res, values...);
const bool* left_valid_data = left_chunk.valid_data();
const bool* right_valid_data = right_chunk.valid_data();
// mask with valid_data
for (int i = 0; i < size; ++i) {
if (left_valid_data && !left_valid_data[(*input)[i]]) {
res[i] = false;
valid_res[i] = false;
continue;
}
if (right_valid_data && !right_valid_data[(*input)[i]]) {
res[i] = false;
valid_res[i] = false;
}
}
processed_size += size;
return processed_size;
}
}
template <typename T, typename U, typename FUNC, typename... ValTypes>
int64_t
ProcessBothDataChunksForSingleChunk(FUNC func,
@ -239,7 +344,12 @@ class PhyCompareFilterExpr : public Expr {
const T* left_data = left_chunk.data() + data_pos;
const U* right_data = right_chunk.data() + data_pos;
func(left_data, right_data, size, res + processed_size, values...);
func(left_data,
right_data,
nullptr,
size,
res + processed_size,
values...);
const bool* left_valid_data = left_chunk.valid_data();
const bool* right_valid_data = right_chunk.valid_data();
// mask with valid_data
@ -307,7 +417,12 @@ class PhyCompareFilterExpr : public Expr {
const T* left_data = left_chunk.data() + data_pos;
const U* right_data = right_chunk.data() + data_pos;
func(left_data, right_data, size, res + processed_size, values...);
func(left_data,
right_data,
nullptr,
size,
res + processed_size,
values...);
const bool* left_valid_data = left_chunk.valid_data();
const bool* right_valid_data = right_chunk.valid_data();
// mask with valid_data
@ -336,21 +451,21 @@ class PhyCompareFilterExpr : public Expr {
template <typename OpType>
VectorPtr
ExecCompareExprDispatcher(OpType op);
ExecCompareExprDispatcher(OpType op, OffsetVector* input = nullptr);
VectorPtr
ExecCompareExprDispatcherForHybridSegment();
ExecCompareExprDispatcherForHybridSegment(OffsetVector* input = nullptr);
VectorPtr
ExecCompareExprDispatcherForBothDataSegment();
ExecCompareExprDispatcherForBothDataSegment(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
ExecCompareLeftType();
ExecCompareLeftType(OffsetVector* input = nullptr);
template <typename T, typename U>
VectorPtr
ExecCompareRightType();
ExecCompareRightType(OffsetVector* input = nullptr);
private:
const FieldId left_field_;

View File

@ -84,11 +84,23 @@ class PhyConjunctFilterExpr : public Expr {
void
MoveCursor() override {
for (auto& input : inputs_) {
input->MoveCursor();
if (!has_offset_input_) {
for (auto& input : inputs_) {
input->MoveCursor();
}
}
}
bool
SupportOffsetInput() override {
for (auto& input : inputs_) {
if (!(input->SupportOffsetInput())) {
return false;
}
}
return true;
}
private:
int64_t
UpdateResult(ColumnVectorPtr& input_result,

View File

@ -28,17 +28,26 @@ namespace milvus {
namespace exec {
class ExprSet;
using OffsetVector = FixedVector<int32_t>;
class EvalCtx {
public:
EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set, RowVector* row)
: exec_ctx_(exec_ctx), expr_set_(expr_set), row_(row) {
EvalCtx(ExecContext* exec_ctx,
ExprSet* expr_set,
OffsetVector* offset_input)
: exec_ctx_(exec_ctx),
expr_set_(expr_set),
offset_input_(offset_input) {
assert(exec_ctx_ != nullptr);
assert(expr_set_ != nullptr);
// assert(row_ != nullptr);
}
explicit EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set)
: exec_ctx_(exec_ctx), expr_set_(expr_set), offset_input_(nullptr) {
}
explicit EvalCtx(ExecContext* exec_ctx)
: exec_ctx_(exec_ctx), expr_set_(nullptr), row_(nullptr) {
: exec_ctx_(exec_ctx), expr_set_(nullptr), offset_input_(nullptr) {
}
ExecContext*
@ -51,11 +60,22 @@ class EvalCtx {
return exec_ctx_->get_query_config();
}
inline OffsetVector*
get_offset_input() {
return offset_input_;
}
inline void
set_offset_input(OffsetVector* offset_input) {
offset_input_ = offset_input;
}
private:
ExecContext* exec_ctx_;
ExprSet* expr_set_;
RowVector* row_;
bool input_no_nulls_;
ExecContext* exec_ctx_ = nullptr;
ExprSet* expr_set_ = nullptr;
// we may accept offsets array as input and do expr filtering on these data
OffsetVector* offset_input_ = nullptr;
bool input_no_nulls_ = false;
};
} // namespace exec

View File

@ -22,13 +22,15 @@ namespace exec {
void
PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto input = context.get_offset_input();
SetHasOffsetInput((input != nullptr));
switch (expr_->column_.data_type_) {
case DataType::JSON: {
if (is_index_mode_) {
PanicInfo(ExprInvalid,
"exists expr for json index mode not supported");
}
result = EvalJsonExistsForDataSegment();
result = EvalJsonExistsForDataSegment(input);
break;
}
default:
@ -39,8 +41,9 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
}
VectorPtr
PhyExistsFilterExpr::EvalJsonExistsForDataSegment() {
auto real_batch_size = GetNextBatchSize();
PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -51,23 +54,40 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment() {
valid_res.set();
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
auto execute_sub_batch = [](const milvus::Json* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string& pointer) {
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string& pointer) {
for (int i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = data[i].exist(pointer);
res[i] = data[offset].exist(pointer);
}
};
int64_t processed_size = ProcessDataChunks<Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<Json>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
pointer);
} else {
processed_size = ProcessDataChunks<Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",

View File

@ -57,7 +57,7 @@ class PhyExistsFilterExpr : public SegmentExpr {
private:
VectorPtr
EvalJsonExistsForDataSegment();
EvalJsonExistsForDataSegment(OffsetVector* input = nullptr);
private:
std::shared_ptr<const milvus::expr::ExistsExpr> expr_;

View File

@ -31,6 +31,8 @@
namespace milvus {
namespace exec {
enum class FilterType { sequential = 0, random = 1 };
class Expr {
public:
Expr(DataType type,
@ -73,12 +75,26 @@ class Expr {
MoveCursor() {
}
void
SetHasOffsetInput(bool has_offset_input) {
has_offset_input_ = has_offset_input;
}
virtual bool
SupportOffsetInput() {
return true;
}
protected:
DataType type_;
const std::vector<std::shared_ptr<Expr>> inputs_;
std::string name_;
// NOTE: unused
std::shared_ptr<VectorFunction> vector_func_;
// whether we have offset input and do expr filtering on these data
// default is false which means we will do expr filtering on the total segment data
bool has_offset_input_ = false;
};
using ExprPtr = std::shared_ptr<milvus::exec::Expr>;
@ -204,13 +220,16 @@ class SegmentExpr : public Expr {
void
MoveCursor() override {
if (is_index_mode_) {
MoveCursorForIndex();
if (segment_->HasFieldData(field_id_)) {
// when we specify input, do not maintain states
if (!has_offset_input_) {
if (is_index_mode_) {
MoveCursorForIndex();
if (segment_->HasFieldData(field_id_)) {
MoveCursorForData();
}
} else {
MoveCursorForData();
}
} else {
MoveCursorForData();
}
}
@ -275,6 +294,7 @@ class SegmentExpr : public Expr {
// use valid_data to see if raw data is null
func(views_info.first.data(),
views_info.second.data(),
nullptr,
need_size,
res,
valid_res,
@ -286,6 +306,253 @@ class SegmentExpr : public Expr {
return need_size;
}
// accept offsets array and process on the scalar data by offsets
// stateless! Just check and set bitset as result, does not need to move cursor
// used for processing raw data expr for sealed segments.
// now only used for std::string_view && json
// TODO: support more types
template <typename T, typename FUNC, typename... ValTypes>
int64_t
ProcessDataByOffsetsForSealedSeg(
FUNC func,
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
// For non_chunked sealed segment, only single chunk
Assert(num_data_chunk_ == 1);
auto& skip_index = segment_->GetSkipIndex();
auto [data_vec, valid_data] =
segment_->get_views_by_offsets<T>(field_id_, 0, *input);
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
func(data_vec.data(),
valid_data.data(),
nullptr,
input->size(),
res,
valid_res,
values...);
} else {
ApplyValidData(valid_data.data(), res, valid_res, input->size());
}
return input->size();
}
template <typename T, typename FUNC, typename... ValTypes>
VectorPtr
ProcessIndexChunksByOffsets(FUNC func,
OffsetVector* input,
ValTypes... values) {
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
using Index = index::ScalarIndex<IndexInnerType>;
TargetBitmap valid_res(input->size());
const Index& index =
segment_->chunk_scalar_index<IndexInnerType>(field_id_, 0);
auto* index_ptr = const_cast<Index*>(&index);
auto valid_result = index_ptr->IsNotNull();
for (auto i = 0; i < input->size(); ++i) {
valid_res[i] = valid_result[(*input)[i]];
}
auto result = std::move(func.template operator()<FilterType::random>(
index_ptr, values..., input->data()));
return std::make_shared<ColumnVector>(std::move(result),
std::move(valid_res));
}
// when we have scalar index and index contains raw data, could go with index chunk by offsets
template <typename T, typename FUNC, typename... ValTypes>
int64_t
ProcessIndexLookupByOffsets(
FUNC func,
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
auto& skip_index = segment_->GetSkipIndex();
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
using Index = index::ScalarIndex<IndexInnerType>;
int64_t processed_size = 0;
const Index& index =
segment_->chunk_scalar_index<IndexInnerType>(field_id_, 0);
auto* index_ptr = const_cast<Index*>(&index);
auto valid_result = index_ptr->IsNotNull();
auto batch_size = input->size();
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
for (auto i = 0; i < batch_size; ++i) {
auto offset = (*input)[i];
auto raw = index_ptr->Reverse_Lookup(offset);
if (!raw.has_value()) {
res[i] = false;
continue;
}
T raw_data = raw.value();
bool valid_data = valid_result[offset];
func.template operator()<FilterType::random>(&raw_data,
&valid_data,
nullptr,
1,
res + i,
valid_res + i,
values...);
}
} else {
for (auto i = 0; i < batch_size; ++i) {
auto offset = (*input)[i];
res[i] = valid_res[i] = valid_result[offset];
}
}
return batch_size;
}
// accept offsets array and process on the scalar data by offsets
// stateless! Just check and set bitset as result, does not need to move cursor
template <typename T, typename FUNC, typename... ValTypes>
int64_t
ProcessDataByOffsets(
FUNC func,
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
OffsetVector* input,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
int64_t processed_size = 0;
// index reverse lookup
if (is_index_mode_ && num_data_chunk_ == 0) {
return ProcessIndexLookupByOffsets<T>(
func, skip_func, input, res, valid_res, values...);
}
auto& skip_index = segment_->GetSkipIndex();
// raw data scan
// sealed segment
if (segment_->type() == SegmentType::Sealed) {
if (segment_->is_chunked()) {
if constexpr (std::is_same_v<T, std::string_view> ||
std::is_same_v<T, Json>) {
for (size_t i = 0; i < input->size(); ++i) {
int64_t offset = (*input)[i];
auto [chunk_id, chunk_offset] =
segment_->get_chunk_by_offset(field_id_, offset);
auto [data_vec, valid_data] =
segment_->get_views_by_offsets<T>(
field_id_, chunk_id, {int32_t(chunk_offset)});
if (!skip_func ||
!skip_func(skip_index, field_id_, chunk_id)) {
func.template operator()<FilterType::random>(
data_vec.data(),
valid_data.data(),
nullptr,
1,
res + processed_size,
valid_res + processed_size,
values...);
} else {
res[processed_size] = valid_res[processed_size] =
(valid_data[0]);
}
processed_size++;
}
return input->size();
}
for (size_t i = 0; i < input->size(); ++i) {
int64_t offset = (*input)[i];
auto [chunk_id, chunk_offset] =
segment_->get_chunk_by_offset(field_id_, offset);
auto chunk = segment_->chunk_data<T>(field_id_, chunk_id);
const T* data = chunk.data() + chunk_offset;
const bool* valid_data = chunk.valid_data();
if (valid_data != nullptr) {
valid_data += chunk_offset;
}
if (!skip_func ||
!skip_func(skip_index, field_id_, chunk_id)) {
func.template operator()<FilterType::random>(
data,
valid_data,
nullptr,
1,
res + processed_size,
valid_res + processed_size,
values...);
} else {
ApplyValidData(valid_data,
res + processed_size,
valid_res + processed_size,
1);
}
processed_size++;
}
return input->size();
} else {
if constexpr (std::is_same_v<T, std::string_view> ||
std::is_same_v<T, Json>) {
return ProcessDataByOffsetsForSealedSeg<T>(
func, skip_func, input, res, valid_res, values...);
}
auto chunk = segment_->chunk_data<T>(field_id_, 0);
const T* data = chunk.data();
const bool* valid_data = chunk.valid_data();
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
func.template operator()<FilterType::random>(data,
valid_data,
input->data(),
input->size(),
res,
valid_res,
values...);
} else {
ApplyValidData(valid_data, res, valid_res, input->size());
}
return input->size();
}
} else {
// growing segment
for (size_t i = 0; i < input->size(); ++i) {
int64_t offset = (*input)[i];
auto chunk_id = offset / size_per_chunk_;
auto chunk_offset = offset % size_per_chunk_;
auto chunk = segment_->chunk_data<T>(field_id_, chunk_id);
const T* data = chunk.data() + chunk_offset;
const bool* valid_data = chunk.valid_data();
if (valid_data != nullptr) {
valid_data += chunk_offset;
}
if (!skip_func || !skip_func(skip_index, field_id_, chunk_id)) {
func.template operator()<FilterType::random>(
data,
valid_data,
nullptr,
1,
res + processed_size,
valid_res + processed_size,
values...);
} else {
ApplyValidData(valid_data,
res + processed_size,
valid_res + processed_size,
1);
}
processed_size++;
}
}
return input->size();
}
template <typename T, typename FUNC, typename... ValTypes>
int64_t
ProcessDataChunksForSingleChunk(
@ -328,6 +595,7 @@ class SegmentExpr : public Expr {
const T* data = chunk.data() + data_pos;
func(data,
valid_data,
nullptr,
size,
res + processed_size,
valid_res + processed_size,
@ -384,12 +652,12 @@ class SegmentExpr : public Expr {
if (segment_->type() == SegmentType::Sealed) {
// first is the raw data, second is valid_data
// use valid_data to see if raw data is null
auto fetched_data = segment_->get_batch_views<T>(
field_id_, i, data_pos, size);
auto data_vec = fetched_data.first;
auto valid_data = fetched_data.second;
auto [data_vec, valid_data] =
segment_->get_batch_views<T>(
field_id_, i, data_pos, size);
func(data_vec.data(),
valid_data.data(),
nullptr,
size,
res + processed_size,
valid_res + processed_size,
@ -406,6 +674,7 @@ class SegmentExpr : public Expr {
}
func(data,
valid_data,
nullptr,
size,
res + processed_size,
valid_res + processed_size,
@ -451,13 +720,14 @@ class SegmentExpr : public Expr {
FUNC func,
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
TargetBitmapView res,
TargetBitmapView valid_res,
ValTypes... values) {
if (segment_->is_chunked()) {
return ProcessDataChunksForMultipleChunk<T>(
func, skip_func, res, values...);
func, skip_func, res, valid_res, values...);
} else {
return ProcessDataChunksForSingleChunk<T>(
func, skip_func, res, values...);
func, skip_func, res, valid_res, values...);
}
}
@ -538,6 +808,51 @@ class SegmentExpr : public Expr {
}
}
template <typename T>
TargetBitmap
ProcessChunksForValidByOffsets(bool use_index, const OffsetVector& input) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
using Index = index::ScalarIndex<IndexInnerType>;
auto batch_size = input.size();
TargetBitmap valid_result(batch_size);
valid_result.set();
if (use_index) {
const Index& index =
segment_->chunk_scalar_index<IndexInnerType>(field_id_, 0);
auto* index_ptr = const_cast<Index*>(&index);
const auto& res = index_ptr->IsNotNull();
for (auto i = 0; i < batch_size; ++i) {
valid_result[i] = res[input[i]];
}
} else {
for (auto i = 0; i < batch_size; ++i) {
auto offset = input[i];
auto [chunk_id,
chunk_offset] = [&]() -> std::pair<int64_t, int64_t> {
if (segment_->type() == SegmentType::Growing) {
return {offset / size_per_chunk_,
offset % size_per_chunk_};
} else if (segment_->is_chunked()) {
return segment_->get_chunk_by_offset(field_id_, offset);
} else {
return {0, offset};
}
}();
auto chunk = segment_->chunk_data<T>(field_id_, chunk_id);
const bool* valid_data = chunk.valid_data();
if (valid_data != nullptr) {
valid_result[i] = valid_data[chunk_offset];
} else {
break;
}
}
}
return valid_result;
}
template <typename T>
TargetBitmap
ProcessDataChunksForValid() {
@ -569,9 +884,9 @@ class SegmentExpr : public Expr {
return valid_result;
}
valid_data += data_pos;
for (int i = 0; i < size; i++) {
if (!valid_data[i]) {
valid_result[i + data_pos] = false;
for (int j = 0; j < size; j++) {
if (!valid_data[j]) {
valid_result[j + processed_size] = false;
}
}
processed_size += size;

File diff suppressed because it is too large Load Diff

View File

@ -50,35 +50,35 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
private:
VectorPtr
EvalJsonContainsForDataSegment();
EvalJsonContainsForDataSegment(OffsetVector* input = nullptr);
template <typename ExprValueType>
VectorPtr
ExecJsonContains();
ExecJsonContains(OffsetVector* input = nullptr);
template <typename ExprValueType>
VectorPtr
ExecArrayContains();
ExecArrayContains(OffsetVector* input = nullptr);
template <typename ExprValueType>
VectorPtr
ExecJsonContainsAll();
ExecJsonContainsAll(OffsetVector* input = nullptr);
template <typename ExprValueType>
VectorPtr
ExecArrayContainsAll();
ExecArrayContainsAll(OffsetVector* input = nullptr);
VectorPtr
ExecJsonContainsArray();
ExecJsonContainsArray(OffsetVector* input = nullptr);
VectorPtr
ExecJsonContainsAllArray();
ExecJsonContainsAllArray(OffsetVector* input = nullptr);
VectorPtr
ExecJsonContainsAllWithDiffType();
ExecJsonContainsAllWithDiffType(OffsetVector* input = nullptr);
VectorPtr
ExecJsonContainsWithDiffType();
ExecJsonContainsWithDiffType(OffsetVector* input = nullptr);
VectorPtr
EvalArrayContainsForIndexSegment();

View File

@ -75,8 +75,16 @@ class PhyLogicalBinaryExpr : public Expr {
void
MoveCursor() override {
inputs_[0]->MoveCursor();
inputs_[1]->MoveCursor();
if (!has_offset_input_) {
inputs_[0]->MoveCursor();
inputs_[1]->MoveCursor();
}
}
bool
SupportOffsetInput() override {
return inputs_[0]->SupportOffsetInput() &&
inputs_[1]->SupportOffsetInput();
}
private:

View File

@ -41,7 +41,14 @@ class PhyLogicalUnaryExpr : public Expr {
void
MoveCursor() override {
inputs_[0]->MoveCursor();
if (!has_offset_input_) {
inputs_[0]->MoveCursor();
}
}
bool
SupportOffsetInput() override {
return inputs_[0]->SupportOffsetInput();
}
private:

View File

@ -24,37 +24,39 @@ namespace exec {
void
PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
if (is_pk_field_) {
auto input = context.get_offset_input();
SetHasOffsetInput((input != nullptr));
if (is_pk_field_ && !has_offset_input_) {
result = ExecPkTermImpl();
return;
}
switch (expr_->column_.data_type_) {
case DataType::BOOL: {
result = ExecVisitorImpl<bool>();
result = ExecVisitorImpl<bool>(input);
break;
}
case DataType::INT8: {
result = ExecVisitorImpl<int8_t>();
result = ExecVisitorImpl<int8_t>(input);
break;
}
case DataType::INT16: {
result = ExecVisitorImpl<int16_t>();
result = ExecVisitorImpl<int16_t>(input);
break;
}
case DataType::INT32: {
result = ExecVisitorImpl<int32_t>();
result = ExecVisitorImpl<int32_t>(input);
break;
}
case DataType::INT64: {
result = ExecVisitorImpl<int64_t>();
result = ExecVisitorImpl<int64_t>(input);
break;
}
case DataType::FLOAT: {
result = ExecVisitorImpl<float>();
result = ExecVisitorImpl<float>(input);
break;
}
case DataType::DOUBLE: {
result = ExecVisitorImpl<double>();
result = ExecVisitorImpl<double>(input);
break;
}
case DataType::VARCHAR: {
@ -62,30 +64,30 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
!storage::MmapManager::GetInstance()
.GetMmapConfig()
.growing_enable_mmap) {
result = ExecVisitorImpl<std::string>();
result = ExecVisitorImpl<std::string>(input);
} else {
result = ExecVisitorImpl<std::string_view>();
result = ExecVisitorImpl<std::string_view>(input);
}
break;
}
case DataType::JSON: {
if (expr_->vals_.size() == 0) {
result = ExecVisitorImplTemplateJson<bool>();
result = ExecVisitorImplTemplateJson<bool>(input);
break;
}
auto type = expr_->vals_[0].val_case();
switch (type) {
case proto::plan::GenericValue::ValCase::kBoolVal:
result = ExecVisitorImplTemplateJson<bool>();
result = ExecVisitorImplTemplateJson<bool>(input);
break;
case proto::plan::GenericValue::ValCase::kInt64Val:
result = ExecVisitorImplTemplateJson<int64_t>();
result = ExecVisitorImplTemplateJson<int64_t>(input);
break;
case proto::plan::GenericValue::ValCase::kFloatVal:
result = ExecVisitorImplTemplateJson<double>();
result = ExecVisitorImplTemplateJson<double>(input);
break;
case proto::plan::GenericValue::ValCase::kStringVal:
result = ExecVisitorImplTemplateJson<std::string>();
result = ExecVisitorImplTemplateJson<std::string>(input);
break;
default:
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
@ -95,26 +97,26 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
case DataType::ARRAY: {
if (expr_->vals_.size() == 0) {
SetNotUseIndex();
result = ExecVisitorImplTemplateArray<bool>();
result = ExecVisitorImplTemplateArray<bool>(input);
break;
}
auto type = expr_->vals_[0].val_case();
switch (type) {
case proto::plan::GenericValue::ValCase::kBoolVal:
SetNotUseIndex();
result = ExecVisitorImplTemplateArray<bool>();
result = ExecVisitorImplTemplateArray<bool>(input);
break;
case proto::plan::GenericValue::ValCase::kInt64Val:
SetNotUseIndex();
result = ExecVisitorImplTemplateArray<int64_t>();
result = ExecVisitorImplTemplateArray<int64_t>(input);
break;
case proto::plan::GenericValue::ValCase::kFloatVal:
SetNotUseIndex();
result = ExecVisitorImplTemplateArray<double>();
result = ExecVisitorImplTemplateArray<double>(input);
break;
case proto::plan::GenericValue::ValCase::kStringVal:
SetNotUseIndex();
result = ExecVisitorImplTemplateArray<std::string>();
result = ExecVisitorImplTemplateArray<std::string>(input);
break;
default:
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
@ -230,31 +232,32 @@ PhyTermFilterExpr::ExecPkTermImpl() {
template <typename ValueType>
VectorPtr
PhyTermFilterExpr::ExecVisitorImplTemplateJson() {
PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) {
if (expr_->is_in_field_) {
return ExecTermJsonVariableInField<ValueType>();
return ExecTermJsonVariableInField<ValueType>(input);
} else {
return ExecTermJsonFieldInVariable<ValueType>();
return ExecTermJsonFieldInVariable<ValueType>(input);
}
}
template <typename ValueType>
VectorPtr
PhyTermFilterExpr::ExecVisitorImplTemplateArray() {
PhyTermFilterExpr::ExecVisitorImplTemplateArray(OffsetVector* input) {
if (expr_->is_in_field_) {
return ExecTermArrayVariableInField<ValueType>();
return ExecTermArrayVariableInField<ValueType>(input);
} else {
return ExecTermArrayFieldInVariable<ValueType>();
return ExecTermArrayFieldInVariable<ValueType>(input);
}
}
template <typename ValueType>
VectorPtr
PhyTermFilterExpr::ExecTermArrayVariableInField() {
PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -269,15 +272,18 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
"element length in json array must be one");
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
auto execute_sub_batch = [](const ArrayView* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const ValueType& target_val) {
auto executor = [&](size_t idx) {
for (int i = 0; i < data[idx].length(); i++) {
auto val = data[idx].template get_data<GetType>(i);
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
const ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const ValueType& target_val) {
auto executor = [&](size_t offset) {
for (int i = 0; i < data[offset].length(); i++) {
auto val = data[offset].template get_data<GetType>(i);
if (val == target_val) {
return true;
}
@ -285,16 +291,31 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
return false;
};
for (int i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = executor(i);
res[i] = executor(offset);
}
};
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val);
int64_t processed_size;
if (has_offset_input_) {
processed_size =
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
target_val);
} else {
processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -305,12 +326,13 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
template <typename ValueType>
VectorPtr
PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -336,29 +358,52 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
return res_vec;
}
auto execute_sub_batch = [](const ArrayView* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
int index,
const std::unordered_set<ValueType>& term_set) {
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
const ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
int index,
const std::unordered_set<ValueType>& term_set) {
for (int i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
if (term_set.empty() || index >= data[i].length()) {
if (term_set.empty() || index >= data[offset].length()) {
res[i] = false;
continue;
}
auto value = data[i].get_data<GetType>(index);
auto value = data[offset].get_data<GetType>(index);
res[i] = term_set.find(ValueType(value)) != term_set.end();
}
};
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, index, term_set);
int64_t processed_size;
if (has_offset_input_) {
processed_size =
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
index,
term_set);
} else {
processed_size = ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
index,
term_set);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -369,11 +414,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
template <typename ValueType>
VectorPtr
PhyTermFilterExpr::ExecTermJsonVariableInField() {
PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -389,13 +435,16 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
ValueType val = GetValueFromProto<ValueType>(expr_->vals_[0]);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
auto execute_sub_batch = [](const Json* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string pointer,
const ValueType& target_val) {
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
const Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string pointer,
const ValueType& target_val) {
auto executor = [&](size_t i) {
auto doc = data[i].doc();
auto array = doc.at_pointer(pointer).get_array();
@ -413,15 +462,30 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
return false;
};
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = executor(i);
res[i] = executor(offset);
}
};
int64_t processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<milvus::Json>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
pointer,
val);
} else {
processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -432,11 +496,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
template <typename ValueType>
VectorPtr
PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -459,13 +524,16 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
return res_vec;
}
auto execute_sub_batch = [](const Json* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string pointer,
const std::unordered_set<ValueType>& terms) {
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
const Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string pointer,
const std::unordered_set<ValueType>& terms) {
auto executor = [&](size_t i) {
auto x = data[i].template at<GetType>(pointer);
if (x.error()) {
@ -485,7 +553,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
return terms.find(ValueType(x.value())) != terms.end();
};
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -493,11 +565,26 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
res[i] = false;
continue;
}
res[i] = executor(i);
res[i] = executor(offset);
}
};
int64_t processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, term_set);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<milvus::Json>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
pointer,
term_set);
} else {
processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
std::nullptr_t{},
res,
valid_res,
pointer,
term_set);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -508,17 +595,17 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
template <typename T>
VectorPtr
PhyTermFilterExpr::ExecVisitorImpl() {
if (is_index_mode_) {
return ExecVisitorImplForIndex<T>();
PhyTermFilterExpr::ExecVisitorImpl(OffsetVector* input) {
if (is_index_mode_ && !has_offset_input_) {
return ExecVisitorImplForIndex<T>(input);
} else {
return ExecVisitorImplForData<T>();
return ExecVisitorImplForData<T>(input);
}
}
template <typename T>
VectorPtr
PhyTermFilterExpr::ExecVisitorImplForIndex() {
PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
@ -553,7 +640,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() {
template <>
VectorPtr
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) {
using Index = index::ScalarIndex<bool>;
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
@ -575,8 +662,9 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
template <typename T>
VectorPtr
PhyTermFilterExpr::ExecVisitorImplForData() {
auto real_batch_size = GetNextBatchSize();
PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -597,23 +685,40 @@ PhyTermFilterExpr::ExecVisitorImplForData() {
}
}
std::unordered_set<T> vals_set(vals.begin(), vals.end());
auto execute_sub_batch = [](const T* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::unordered_set<T>& vals) {
auto execute_sub_batch =
[]<FilterType filter_type = FilterType::sequential>(
const T* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
const std::unordered_set<T>& vals) {
TermElementFuncSet<T> func;
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = func(vals, data[i]);
res[i] = func(vals, data[offset]);
}
};
int64_t processed_size = ProcessDataChunks<T>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<T>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
vals_set);
} else {
processed_size = ProcessDataChunks<T>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",

View File

@ -83,39 +83,39 @@ class PhyTermFilterExpr : public SegmentExpr {
template <typename T>
VectorPtr
ExecVisitorImpl();
ExecVisitorImpl(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
ExecVisitorImplForIndex();
ExecVisitorImplForIndex(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
ExecVisitorImplForData();
ExecVisitorImplForData(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecVisitorImplTemplateJson();
ExecVisitorImplTemplateJson(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecTermJsonVariableInField();
ExecTermJsonVariableInField(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecTermJsonFieldInVariable();
ExecTermJsonFieldInVariable(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecVisitorImplTemplateArray();
ExecVisitorImplTemplateArray(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecTermArrayVariableInField();
ExecTermArrayVariableInField(OffsetVector* input = nullptr);
template <typename ValueType>
VectorPtr
ExecTermArrayFieldInVariable();
ExecTermArrayFieldInVariable(OffsetVector* input = nullptr);
private:
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;

View File

@ -121,7 +121,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
case DataType::FLOAT:
case DataType::DOUBLE: {
// not accurate on floating point number, rollback to bruteforce.
return ExecRangeVisitorImplArray<proto::plan::Array>();
return ExecRangeVisitorImplArray<proto::plan::Array>(
nullptr);
}
case DataType::VARCHAR: {
if (segment_->type() == SegmentType::Growing) {
@ -146,33 +147,35 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
void
PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto input = context.get_offset_input();
SetHasOffsetInput((input != nullptr));
switch (expr_->column_.data_type_) {
case DataType::BOOL: {
result = ExecRangeVisitorImpl<bool>();
result = ExecRangeVisitorImpl<bool>(input);
break;
}
case DataType::INT8: {
result = ExecRangeVisitorImpl<int8_t>();
result = ExecRangeVisitorImpl<int8_t>(input);
break;
}
case DataType::INT16: {
result = ExecRangeVisitorImpl<int16_t>();
result = ExecRangeVisitorImpl<int16_t>(input);
break;
}
case DataType::INT32: {
result = ExecRangeVisitorImpl<int32_t>();
result = ExecRangeVisitorImpl<int32_t>(input);
break;
}
case DataType::INT64: {
result = ExecRangeVisitorImpl<int64_t>();
result = ExecRangeVisitorImpl<int64_t>(input);
break;
}
case DataType::FLOAT: {
result = ExecRangeVisitorImpl<float>();
result = ExecRangeVisitorImpl<float>(input);
break;
}
case DataType::DOUBLE: {
result = ExecRangeVisitorImpl<double>();
result = ExecRangeVisitorImpl<double>(input);
break;
}
case DataType::VARCHAR: {
@ -180,9 +183,9 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
!storage::MmapManager::GetInstance()
.GetMmapConfig()
.growing_enable_mmap) {
result = ExecRangeVisitorImpl<std::string>();
result = ExecRangeVisitorImpl<std::string>(input);
} else {
result = ExecRangeVisitorImpl<std::string_view>();
result = ExecRangeVisitorImpl<std::string_view>(input);
}
break;
}
@ -190,19 +193,20 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto val_type = expr_->val_.val_case();
switch (val_type) {
case proto::plan::GenericValue::ValCase::kBoolVal:
result = ExecRangeVisitorImplJson<bool>();
result = ExecRangeVisitorImplJson<bool>(input);
break;
case proto::plan::GenericValue::ValCase::kInt64Val:
result = ExecRangeVisitorImplJson<int64_t>();
result = ExecRangeVisitorImplJson<int64_t>(input);
break;
case proto::plan::GenericValue::ValCase::kFloatVal:
result = ExecRangeVisitorImplJson<double>();
result = ExecRangeVisitorImplJson<double>(input);
break;
case proto::plan::GenericValue::ValCase::kStringVal:
result = ExecRangeVisitorImplJson<std::string>();
result = ExecRangeVisitorImplJson<std::string>(input);
break;
case proto::plan::GenericValue::ValCase::kArrayVal:
result = ExecRangeVisitorImplJson<proto::plan::Array>();
result =
ExecRangeVisitorImplJson<proto::plan::Array>(input);
break;
default:
PanicInfo(
@ -215,27 +219,28 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
switch (val_type) {
case proto::plan::GenericValue::ValCase::kBoolVal:
SetNotUseIndex();
result = ExecRangeVisitorImplArray<bool>();
result = ExecRangeVisitorImplArray<bool>(input);
break;
case proto::plan::GenericValue::ValCase::kInt64Val:
SetNotUseIndex();
result = ExecRangeVisitorImplArray<int64_t>();
result = ExecRangeVisitorImplArray<int64_t>(input);
break;
case proto::plan::GenericValue::ValCase::kFloatVal:
SetNotUseIndex();
result = ExecRangeVisitorImplArray<double>();
result = ExecRangeVisitorImplArray<double>(input);
break;
case proto::plan::GenericValue::ValCase::kStringVal:
SetNotUseIndex();
result = ExecRangeVisitorImplArray<std::string>();
result = ExecRangeVisitorImplArray<std::string>(input);
break;
case proto::plan::GenericValue::ValCase::kArrayVal:
if (CanUseIndexForArray<milvus::Array>()) {
if (!has_offset_input_ &&
CanUseIndexForArray<milvus::Array>()) {
result = ExecRangeVisitorImplArrayForIndex<
proto::plan::Array>();
} else {
result =
ExecRangeVisitorImplArray<proto::plan::Array>();
result = ExecRangeVisitorImplArray<proto::plan::Array>(
input);
}
break;
default:
@ -253,11 +258,12 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
template <typename ValueType>
VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -273,56 +279,135 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
if (expr_->column_.nested_path_.size() > 0) {
index = std::stoi(expr_->column_.nested_path_[0]);
}
auto execute_sub_batch = [op_type](const milvus::ArrayView* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val,
int index) {
auto execute_sub_batch = [op_type]<FilterType filter_type =
FilterType::sequential>(
const milvus::ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val,
int index) {
switch (op_type) {
case proto::plan::GreaterThan: {
UnaryElementFuncForArray<ValueType, proto::plan::GreaterThan>
UnaryElementFuncForArray<ValueType,
proto::plan::GreaterThan,
filter_type>
func;
func(data, valid_data, size, val, index, res, valid_res);
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
case proto::plan::GreaterEqual: {
UnaryElementFuncForArray<ValueType, proto::plan::GreaterEqual>
UnaryElementFuncForArray<ValueType,
proto::plan::GreaterEqual,
filter_type>
func;
func(data, valid_data, size, val, index, res, valid_res);
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
case proto::plan::LessThan: {
UnaryElementFuncForArray<ValueType, proto::plan::LessThan> func;
func(data, valid_data, size, val, index, res, valid_res);
UnaryElementFuncForArray<ValueType,
proto::plan::LessThan,
filter_type>
func;
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
case proto::plan::LessEqual: {
UnaryElementFuncForArray<ValueType, proto::plan::LessEqual>
UnaryElementFuncForArray<ValueType,
proto::plan::LessEqual,
filter_type>
func;
func(data, valid_data, size, val, index, res, valid_res);
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
case proto::plan::Equal: {
UnaryElementFuncForArray<ValueType, proto::plan::Equal> func;
func(data, valid_data, size, val, index, res, valid_res);
UnaryElementFuncForArray<ValueType,
proto::plan::Equal,
filter_type>
func;
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
case proto::plan::NotEqual: {
UnaryElementFuncForArray<ValueType, proto::plan::NotEqual> func;
func(data, valid_data, size, val, index, res, valid_res);
UnaryElementFuncForArray<ValueType,
proto::plan::NotEqual,
filter_type>
func;
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
case proto::plan::PrefixMatch: {
UnaryElementFuncForArray<ValueType, proto::plan::PrefixMatch>
UnaryElementFuncForArray<ValueType,
proto::plan::PrefixMatch,
filter_type>
func;
func(data, valid_data, size, val, index, res, valid_res);
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
case proto::plan::Match: {
UnaryElementFuncForArray<ValueType, proto::plan::Match> func;
func(data, valid_data, size, val, index, res, valid_res);
UnaryElementFuncForArray<ValueType,
proto::plan::Match,
filter_type>
func;
func(data,
valid_data,
size,
val,
index,
res,
valid_res,
offsets);
break;
}
default:
@ -332,8 +417,20 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
op_type));
}
};
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index);
int64_t processed_size;
if (has_offset_input_) {
processed_size =
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
std::nullptr_t{},
input,
res,
valid_res,
val,
index);
} else {
processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -454,12 +551,13 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
template <typename ExprValueType>
VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
using GetType =
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
std::string_view,
ExprValueType>;
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
@ -473,46 +571,53 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
auto op_type = expr_->op_type_;
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
#define UnaryRangeJSONCompare(cmp) \
do { \
auto x = data[i].template at<GetType>(pointer); \
if (x.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \
auto x = data[i].template at<double>(pointer); \
res[i] = !x.error() && (cmp); \
break; \
} \
res[i] = false; \
break; \
} \
res[i] = (cmp); \
#define UnaryRangeJSONCompare(cmp) \
do { \
auto x = data[offset].template at<GetType>(pointer); \
if (x.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \
auto x = data[offset].template at<double>(pointer); \
res[i] = !x.error() && (cmp); \
break; \
} \
res[i] = false; \
break; \
} \
res[i] = (cmp); \
} while (false)
#define UnaryRangeJSONCompareNotEqual(cmp) \
do { \
auto x = data[i].template at<GetType>(pointer); \
if (x.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \
auto x = data[i].template at<double>(pointer); \
res[i] = x.error() || (cmp); \
break; \
} \
res[i] = true; \
break; \
} \
res[i] = (cmp); \
#define UnaryRangeJSONCompareNotEqual(cmp) \
do { \
auto x = data[offset].template at<GetType>(pointer); \
if (x.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \
auto x = data[offset].template at<double>(pointer); \
res[i] = x.error() || (cmp); \
break; \
} \
res[i] = true; \
break; \
} \
res[i] = (cmp); \
} while (false)
auto execute_sub_batch = [op_type, pointer](const milvus::Json* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ExprValueType val) {
auto execute_sub_batch =
[ op_type, pointer ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ExprValueType val) {
switch (op_type) {
case proto::plan::GreaterThan: {
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -526,7 +631,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
}
case proto::plan::GreaterEqual: {
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -540,7 +649,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
}
case proto::plan::LessThan: {
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -554,7 +667,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
}
case proto::plan::LessEqual: {
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -568,7 +685,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
}
case proto::plan::Equal: {
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -588,7 +709,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
}
case proto::plan::NotEqual: {
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -608,7 +733,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
}
case proto::plan::PrefixMatch: {
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -626,7 +755,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
auto regex_pattern = translator(val);
RegexMatcher matcher(regex_pattern);
for (size_t i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
@ -646,8 +779,15 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
op_type));
}
};
int64_t processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, val);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, input, res, valid_res, val);
} else {
processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, valid_res, val);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
@ -658,15 +798,20 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
template <typename T>
VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl() {
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
if (expr_->op_type_ == proto::plan::OpType::TextMatch) {
if (has_offset_input_) {
PanicInfo(
OpTypeInvalid,
fmt::format("text match does not support iterative filter"));
}
return ExecTextMatch();
}
if (CanUseIndex<T>()) {
if (CanUseIndex<T>() && !has_offset_input_) {
return ExecRangeVisitorImplForIndex<T>();
} else {
return ExecRangeVisitorImplForData<T>();
return ExecRangeVisitorImplForData<T>(input);
}
}
@ -749,17 +894,24 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
template <typename T>
ColumnVectorPtr
PhyUnaryRangeFilterExpr::PreCheckOverflow() {
PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) {
if constexpr (std::is_integral_v<T> && !std::is_same_v<T, bool>) {
int64_t val = GetValueFromProto<int64_t>(expr_->val_);
if (milvus::query::out_of_range<T>(val)) {
int64_t batch_size =
overflow_check_pos_ + batch_size_ >= active_count_
? active_count_ - overflow_check_pos_
: batch_size_;
overflow_check_pos_ += batch_size;
auto valid = ProcessChunksForValid<T>(CanUseIndex<T>());
int64_t batch_size;
if (input != nullptr) {
batch_size = input->size();
} else {
batch_size = overflow_check_pos_ + batch_size_ >= active_count_
? active_count_ - overflow_check_pos_
: batch_size_;
overflow_check_pos_ += batch_size;
}
auto valid = (input != nullptr)
? ProcessChunksForValidByOffsets<T>(
CanUseIndex<T>(), *input)
: ProcessChunksForValid<T>(CanUseIndex<T>());
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(batch_size), std::move(valid));
TargetBitmapView res(res_vec->GetRawData(), batch_size);
@ -805,18 +957,20 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
template <typename T>
VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
if (auto res = PreCheckOverflow<T>()) {
if (auto res = PreCheckOverflow<T>(input)) {
return res;
}
auto real_batch_size = GetNextBatchSize();
auto real_batch_size =
has_offset_input_ ? input->size() : GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
@ -824,51 +978,56 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
auto expr_type = expr_->op_type_;
auto execute_sub_batch = [expr_type](const T* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
IndexInnerType val) {
auto execute_sub_batch = [expr_type]<FilterType filter_type =
FilterType::sequential>(
const T* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
IndexInnerType val) {
switch (expr_type) {
case proto::plan::GreaterThan: {
UnaryElementFunc<T, proto::plan::GreaterThan> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func;
func(data, size, val, res, offsets);
break;
}
case proto::plan::GreaterEqual: {
UnaryElementFunc<T, proto::plan::GreaterEqual> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::GreaterEqual, filter_type>
func;
func(data, size, val, res, offsets);
break;
}
case proto::plan::LessThan: {
UnaryElementFunc<T, proto::plan::LessThan> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::LessThan, filter_type> func;
func(data, size, val, res, offsets);
break;
}
case proto::plan::LessEqual: {
UnaryElementFunc<T, proto::plan::LessEqual> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::LessEqual, filter_type> func;
func(data, size, val, res, offsets);
break;
}
case proto::plan::Equal: {
UnaryElementFunc<T, proto::plan::Equal> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::Equal, filter_type> func;
func(data, size, val, res, offsets);
break;
}
case proto::plan::NotEqual: {
UnaryElementFunc<T, proto::plan::NotEqual> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::NotEqual, filter_type> func;
func(data, size, val, res, offsets);
break;
}
case proto::plan::PrefixMatch: {
UnaryElementFunc<T, proto::plan::PrefixMatch> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::PrefixMatch, filter_type> func;
func(data, size, val, res, offsets);
break;
}
case proto::plan::Match: {
UnaryElementFunc<T, proto::plan::Match> func;
func(data, size, val, res);
UnaryElementFunc<T, proto::plan::Match, filter_type> func;
func(data, size, val, res, offsets);
break;
}
default:
@ -882,20 +1041,32 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
// but to mask res with valid_data after the batch operation.
if (valid_data != nullptr) {
for (int i = 0; i < size; i++) {
if (!valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (!valid_data[offset]) {
res[i] = valid_res[i] = false;
}
}
}
};
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
FieldId field_id,
int64_t chunk_id) {
return skip_index.CanSkipUnaryRange<T>(
field_id, chunk_id, expr_type, val);
};
int64_t processed_size = ProcessDataChunks<T>(
execute_sub_batch, skip_index_func, res, valid_res, val);
int64_t processed_size;
if (has_offset_input_) {
processed_size = ProcessDataByOffsets<T>(
execute_sub_batch, skip_index_func, input, res, valid_res, val);
} else {
processed_size = ProcessDataChunks<T>(
execute_sub_batch, skip_index_func, res, valid_res, val);
}
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}, related params[active_count:{}, "

View File

@ -33,7 +33,7 @@
namespace milvus {
namespace exec {
template <typename T>
template <typename T, FilterType filter_type>
struct UnaryElementFuncForMatch {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
@ -43,58 +43,68 @@ struct UnaryElementFuncForMatch {
operator()(const T* src,
size_t size,
IndexInnerType val,
TargetBitmapView res) {
TargetBitmapView res,
int64_t* offsets = nullptr) {
PatternMatchTranslator translator;
auto regex_pattern = translator(val);
RegexMatcher matcher(regex_pattern);
for (int i = 0; i < size; ++i) {
res[i] = matcher(src[i]);
if constexpr (filter_type == FilterType::random) {
res[i] = matcher(src[offsets ? offsets[i] : i]);
} else {
res[i] = matcher(src[i]);
}
}
}
};
template <typename T, proto::plan::OpType op>
template <typename T, proto::plan::OpType op, FilterType filter_type>
struct UnaryElementFunc {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
void
operator()(const T* src,
size_t size,
IndexInnerType val,
TargetBitmapView res) {
TargetBitmapView res,
const int32_t* offsets = nullptr) {
if constexpr (op == proto::plan::OpType::Match) {
UnaryElementFuncForMatch<T> func;
UnaryElementFuncForMatch<T, filter_type> func;
func(src, size, val, res);
return;
}
/*
// This is the original code, which is kept for the documentation purposes
for (int i = 0; i < size; ++i) {
if constexpr (op == proto::plan::OpType::Equal) {
res[i] = src[i] == val;
} else if constexpr (op == proto::plan::OpType::NotEqual) {
res[i] = src[i] != val;
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
res[i] = src[i] > val;
} else if constexpr (op == proto::plan::OpType::LessThan) {
res[i] = src[i] < val;
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
res[i] = src[i] >= val;
} else if constexpr (op == proto::plan::OpType::LessEqual) {
res[i] = src[i] <= val;
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
res[i] = milvus::query::Match(
src[i], val, proto::plan::OpType::PrefixMatch);
} else {
PanicInfo(
OpTypeInvalid,
fmt::format("unsupported op_type:{} for UnaryElementFunc",
op));
// also, for iterative filter
if constexpr (filter_type == FilterType::random) {
for (int i = 0; i < size; ++i) {
auto offset = (offsets != nullptr) ? offsets[i] : i;
if constexpr (op == proto::plan::OpType::Equal) {
res[i] = src[offset] == val;
} else if constexpr (op == proto::plan::OpType::NotEqual) {
res[i] = src[offset] != val;
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
res[i] = src[offset] > val;
} else if constexpr (op == proto::plan::OpType::LessThan) {
res[i] = src[offset] < val;
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
res[i] = src[offset] >= val;
} else if constexpr (op == proto::plan::OpType::LessEqual) {
res[i] = src[offset] <= val;
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
res[i] = milvus::query::Match(
src[offset], val, proto::plan::OpType::PrefixMatch);
} else {
PanicInfo(
OpTypeInvalid,
fmt::format(
"unsupported op_type:{} for UnaryElementFunc", op));
}
}
return;
}
*/
if constexpr (op == proto::plan::OpType::PrefixMatch) {
for (int i = 0; i < size; ++i) {
@ -141,7 +151,7 @@ struct UnaryElementFunc {
} \
} while (false)
template <typename ValueType, proto::plan::OpType op>
template <typename ValueType, proto::plan::OpType op, FilterType filter_type>
struct UnaryElementFuncForArray {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
@ -153,32 +163,39 @@ struct UnaryElementFuncForArray {
ValueType val,
int index,
TargetBitmapView res,
TargetBitmapView valid_res) {
TargetBitmapView valid_res,
const int32_t* offsets = nullptr) {
for (int i = 0; i < size; ++i) {
if (valid_data != nullptr && !valid_data[i]) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
if constexpr (op == proto::plan::OpType::Equal) {
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = src[i].is_same_array(val);
res[i] = src[offset].is_same_array(val);
} else {
if (index >= src[i].length()) {
if (index >= src[offset].length()) {
res[i] = false;
continue;
}
auto array_data = src[i].template get_data<GetType>(index);
auto array_data =
src[offset].template get_data<GetType>(index);
res[i] = array_data == val;
}
} else if constexpr (op == proto::plan::OpType::NotEqual) {
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = !src[i].is_same_array(val);
res[i] = !src[offset].is_same_array(val);
} else {
if (index >= src[i].length()) {
if (index >= src[offset].length()) {
res[i] = false;
continue;
}
auto array_data = src[i].template get_data<GetType>(index);
auto array_data =
src[offset].template get_data<GetType>(index);
res[i] = array_data != val;
}
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
@ -195,14 +212,15 @@ struct UnaryElementFuncForArray {
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
res[i] = false;
} else {
if (index >= src[i].length()) {
if (index >= src[offset].length()) {
res[i] = false;
continue;
}
PatternMatchTranslator translator;
auto regex_pattern = translator(val);
RegexMatcher matcher(regex_pattern);
auto array_data = src[i].template get_data<GetType>(index);
auto array_data =
src[offset].template get_data<GetType>(index);
res[i] = matcher(array_data);
}
} else {
@ -313,10 +331,18 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
void
Eval(EvalCtx& context, VectorPtr& result) override;
bool
SupportOffsetInput() override {
if (expr_->op_type_ == proto::plan::OpType::TextMatch) {
return false;
}
return true;
}
private:
template <typename T>
VectorPtr
ExecRangeVisitorImpl();
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
@ -324,15 +350,15 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
template <typename T>
VectorPtr
ExecRangeVisitorImplForData();
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
template <typename ExprValueType>
VectorPtr
ExecRangeVisitorImplJson();
ExecRangeVisitorImplJson(OffsetVector* input = nullptr);
template <typename ExprValueType>
VectorPtr
ExecRangeVisitorImplArray();
ExecRangeVisitorImplArray(OffsetVector* input = nullptr);
template <typename T>
VectorPtr
@ -345,7 +371,7 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
// Check overflow and cache result for performace
template <typename T>
ColumnVectorPtr
PreCheckOverflow();
PreCheckOverflow(OffsetVector* input = nullptr);
template <typename T>
bool

View File

@ -22,9 +22,13 @@ namespace exec {
void
PhyValueExpr::Eval(EvalCtx& context, VectorPtr& result) {
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_;
auto input = context.get_offset_input();
SetHasOffsetInput((input != nullptr));
int64_t real_batch_size = has_offset_input_
? input->size()
: (current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_);
if (real_batch_size == 0) {
result = nullptr;

View File

@ -49,11 +49,14 @@ class PhyValueExpr : public Expr {
void
MoveCursor() override {
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_;
if (!has_offset_input_) {
int64_t real_batch_size =
current_pos_ + batch_size_ >= active_count_
? active_count_ - current_pos_
: batch_size_;
current_pos_ += real_batch_size;
current_pos_ += real_batch_size;
}
}
private:

View File

@ -64,8 +64,7 @@ PhyFilterBitsNode::GetOutput() {
std::chrono::high_resolution_clock::time_point scalar_start =
std::chrono::high_resolution_clock::now();
EvalCtx eval_ctx(
operator_context_->get_exec_context(), exprs_.get(), input_.get());
EvalCtx eval_ctx(operator_context_->get_exec_context(), exprs_.get());
TargetBitmap bitset;
TargetBitmap valid_bitset;

View File

@ -0,0 +1,273 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "IterativeFilterNode.h"
namespace milvus {
namespace exec {
PhyIterativeFilterNode::PhyIterativeFilterNode(
int32_t operator_id,
DriverContext* driverctx,
const std::shared_ptr<const plan::FilterNode>& filter)
: Operator(driverctx,
filter->output_type(),
operator_id,
filter->id(),
"PhyIterativeFilterNode") {
ExecContext* exec_context = operator_context_->get_exec_context();
query_context_ = exec_context->get_query_context();
std::vector<expr::TypedExprPtr> filters;
filters.emplace_back(filter->filter());
exprs_ = std::make_unique<ExprSet>(filters, exec_context);
const auto& exprs = exprs_->exprs();
for (const auto& expr : exprs) {
is_native_supported_ =
(is_native_supported_ && (expr->SupportOffsetInput()));
}
need_process_rows_ = query_context_->get_active_count();
num_processed_rows_ = 0;
}
void
PhyIterativeFilterNode::AddInput(RowVectorPtr& input) {
input_ = std::move(input);
}
bool
PhyIterativeFilterNode::IsFinished() {
return is_finished_;
}
template <bool large_is_better>
inline size_t
find_binsert_position(const std::vector<float>& distances,
size_t lo,
size_t hi,
float dist) {
while (lo < hi) {
size_t mid = lo + ((hi - lo) >> 1);
if constexpr (large_is_better) {
if (distances[mid] < dist) {
hi = mid;
} else {
lo = mid + 1;
}
} else {
if (distances[mid] > dist) {
hi = mid;
} else {
lo = mid + 1;
}
}
}
return lo;
}
inline void
insert_helper(milvus::SearchResult& search_result,
int& topk,
const bool large_is_better,
const FixedVector<float>& distances,
const FixedVector<int32_t>& offsets,
const int64_t nq_index,
const int64_t unity_topk,
const int i) {
auto pos = large_is_better
? find_binsert_position<true>(search_result.distances_,
nq_index * unity_topk,
nq_index * unity_topk + topk,
distances[i])
: find_binsert_position<false>(search_result.distances_,
nq_index * unity_topk,
nq_index * unity_topk + topk,
distances[i]);
if (topk > pos) {
std::memmove(&search_result.distances_[pos + 1],
&search_result.distances_[pos],
(topk - pos) * sizeof(float));
std::memmove(&search_result.seg_offsets_[pos + 1],
&search_result.seg_offsets_[pos],
(topk - pos) * sizeof(int64_t));
}
search_result.seg_offsets_[pos] = offsets[i];
search_result.distances_[pos] = distances[i];
++topk;
}
RowVectorPtr
PhyIterativeFilterNode::GetOutput() {
if (is_finished_ || !no_more_input_) {
return nullptr;
}
DeferLambda([&]() { is_finished_ = true; });
if (input_ == nullptr) {
return nullptr;
}
std::chrono::high_resolution_clock::time_point scalar_start =
std::chrono::high_resolution_clock::now();
milvus::SearchResult search_result = query_context_->get_search_result();
int64_t nq = search_result.total_nq_;
int64_t unity_topk = search_result.unity_topK_;
knowhere::MetricType metric_type = query_context_->get_metric_type();
bool large_is_better = PositivelyRelated(metric_type);
TargetBitmap bitset;
// get bitset of whole segment first
if (!is_native_supported_) {
EvalCtx eval_ctx(operator_context_->get_exec_context(), exprs_.get());
TargetBitmap valid_bitset;
while (num_processed_rows_ < need_process_rows_) {
exprs_->Eval(0, 1, true, eval_ctx, results_);
AssertInfo(
results_.size() == 1 && results_[0] != nullptr,
"PhyIterativeFilterNode result size should be size one and not "
"be nullptr");
if (auto col_vec =
std::dynamic_pointer_cast<ColumnVector>(results_[0])) {
if (col_vec->IsBitmap()) {
auto col_vec_size = col_vec->size();
TargetBitmapView view(col_vec->GetRawData(), col_vec_size);
bitset.append(view);
TargetBitmapView valid_view(col_vec->GetValidRawData(),
col_vec_size);
valid_bitset.append(valid_view);
num_processed_rows_ += col_vec_size;
} else {
PanicInfo(ExprInvalid,
"PhyIterativeFilterNode result should be bitmap");
}
} else {
PanicInfo(
ExprInvalid,
"PhyIterativeFilterNode result should be ColumnVector");
}
}
Assert(bitset.size() == need_process_rows_);
Assert(valid_bitset.size() == need_process_rows_);
}
if (search_result.vector_iterators_.has_value()) {
AssertInfo(search_result.vector_iterators_.value().size() ==
search_result.total_nq_,
"Vector Iterators' count must be equal to total_nq_, Check "
"your code");
int nq_index = 0;
AssertInfo(nq = search_result.vector_iterators_.value().size(),
"nq and iterator not equal size");
search_result.seg_offsets_.resize(nq * unity_topk, INVALID_SEG_OFFSET);
search_result.distances_.resize(nq * unity_topk);
for (auto& iterator : search_result.vector_iterators_.value()) {
EvalCtx eval_ctx(operator_context_->get_exec_context(),
exprs_.get());
int topk = 0;
while (iterator->HasNext() && topk < unity_topk) {
FixedVector<int32_t> offsets;
FixedVector<float> distances;
// remain unfilled size as iterator batch size
int64_t batch_size = unity_topk - topk;
offsets.reserve(batch_size);
distances.reserve(batch_size);
while (iterator->HasNext()) {
auto offset_dis_pair = iterator->Next();
AssertInfo(
offset_dis_pair.has_value(),
"Wrong state! iterator cannot return valid result "
"whereas it still"
"tells hasNext, terminate operation");
auto offset = offset_dis_pair.value().first;
auto dis = offset_dis_pair.value().second;
offsets.emplace_back(offset);
distances.emplace_back(dis);
if (offsets.size() == batch_size) {
break;
}
}
if (is_native_supported_) {
eval_ctx.set_offset_input(&offsets);
std::vector<VectorPtr> results;
exprs_->Eval(0, 1, true, eval_ctx, results);
AssertInfo(
results.size() == 1 && results[0] != nullptr,
"PhyIterativeFilterNode result size should be size "
"one and not "
"be nullptr");
auto col_vec =
std::dynamic_pointer_cast<ColumnVector>(results[0]);
auto col_vec_size = col_vec->size();
TargetBitmapView bitsetview(col_vec->GetRawData(),
col_vec_size);
Assert(bitsetview.size() <= batch_size);
Assert(bitsetview.size() == offsets.size());
for (auto i = 0; i < offsets.size(); ++i) {
if (bitsetview[i] > 0) {
insert_helper(search_result,
topk,
large_is_better,
distances,
offsets,
nq_index,
unity_topk,
i);
if (topk == unity_topk) {
break;
}
}
}
} else {
for (auto i = 0; i < offsets.size(); ++i) {
if (bitset[offsets[i]] > 0) {
insert_helper(search_result,
topk,
large_is_better,
distances,
offsets,
nq_index,
unity_topk,
i);
if (topk == unity_topk) {
break;
}
}
}
}
if (topk == unity_topk) {
break;
}
}
nq_index++;
}
}
query_context_->set_search_result(std::move(search_result));
std::chrono::high_resolution_clock::time_point scalar_end =
std::chrono::high_resolution_clock::now();
double scalar_cost =
std::chrono::duration<double, std::micro>(scalar_end - scalar_start)
.count();
monitor::internal_core_search_latency_iterative_filter.Observe(scalar_cost /
1000);
return input_;
}
} // namespace exec
} // namespace milvus

View File

@ -0,0 +1,83 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include "exec/Driver.h"
#include "exec/expression/Expr.h"
#include "exec/operator/Operator.h"
#include "exec/QueryContext.h"
// difference between FilterBitsNode and IterativeFilterNode is that
// FilterBitsNode will go through whole segment and return bitset to indicate which offset is filtered out or not
// IterativeFilterNode will accept offsets array and execute over these and generate result valid offsets
namespace milvus {
namespace exec {
class PhyIterativeFilterNode : public Operator {
public:
PhyIterativeFilterNode(
int32_t operator_id,
DriverContext* ctx,
const std::shared_ptr<const plan::FilterNode>& filter);
bool
IsFilter() override {
return true;
}
bool
NeedInput() const override {
return !is_finished_;
}
void
AddInput(RowVectorPtr& input) override;
RowVectorPtr
GetOutput() override;
bool
IsFinished() override;
void
Close() override {
Operator::Close();
exprs_->Clear();
}
BlockingReason
IsBlocked(ContinueFuture* /* unused */) override {
return BlockingReason::kNotBlocked;
}
virtual std::string
ToString() const override {
return "PhyIterativeFilterNode";
}
private:
std::unique_ptr<ExprSet> exprs_;
QueryContext* query_context_;
int64_t num_processed_rows_;
int64_t need_process_rows_;
bool is_finished_{false};
bool is_native_supported_{true};
};
} // namespace exec
} // namespace milvus

View File

@ -0,0 +1,101 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "common/QueryInfo.h"
#include "knowhere/index/index_node.h"
#include "segcore/SegmentInterface.h"
#include "segcore/SegmentGrowingImpl.h"
#include "segcore/SegmentSealedImpl.h"
#include "segcore/ConcurrentVector.h"
#include "common/Span.h"
#include "query/Utils.h"
#include "common/EasyAssert.h"
namespace milvus {
namespace exec {
static bool
UseVectorIterator(const SearchInfo& search_info) {
return search_info.group_by_field_id_.has_value() ||
search_info.iterative_filter_execution;
}
static bool
PrepareVectorIteratorsFromIndex(const SearchInfo& search_info,
int nq,
const DatasetPtr dataset,
SearchResult& search_result,
const BitsetView& bitset,
const index::VectorIndex& index) {
// when we use group by, we will use vector iterator to continously get results and group on them
// when we use iterative filtered search, we will use vector iterator to continously get results and check scalar attr on them
// until we get valid topk results
if (UseVectorIterator(search_info)) {
try {
auto search_conf = index.PrepareSearchParams(search_info);
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
iterators_val =
index.VectorIterators(dataset, search_conf, bitset);
if (iterators_val.has_value()) {
search_result.AssembleChunkVectorIterators(
nq, 1, {0}, iterators_val.value());
} else {
std::string operator_type = "";
if (search_info.group_by_field_id_.has_value()) {
operator_type = "group_by";
} else {
operator_type = "iterative filter";
}
LOG_ERROR(
"Returned knowhere iterator has non-ready iterators "
"inside, terminate {} operation:{}",
operator_type,
knowhere::Status2String(iterators_val.error()));
PanicInfo(
ErrorCode::Unsupported,
fmt::format(
"Returned knowhere iterator has non-ready iterators "
"inside, terminate {} operation",
operator_type));
}
search_result.total_nq_ = dataset->GetRows();
search_result.unity_topK_ = search_info.topk_;
} catch (const std::runtime_error& e) {
std::string operator_type = "";
if (search_info.group_by_field_id_.has_value()) {
operator_type = "group_by";
} else {
operator_type = "iterative filter";
}
LOG_ERROR(
"Caught error:{} when trying to initialize ann iterators for "
"{}: "
"operation will be terminated",
e.what(),
operator_type);
PanicInfo(ErrorCode::Unsupported,
fmt::format("Failed to {}, current index:" +
index.GetIndexType() + " doesn't support",
operator_type));
}
return true;
}
return false;
}
} // namespace exec
} // namespace milvus

View File

@ -86,6 +86,7 @@ PhyVectorSearchNode::GetOutput() {
query_timestamp_,
final_view,
search_result);
search_result.total_data_cnt_ = final_view.size();
query_context_->set_search_result(std::move(search_result));
std::chrono::high_resolution_clock::time_point vector_end =

View File

@ -125,49 +125,6 @@ GetDataGetter(const segcore::SegmentInternalInterface& segment,
}
}
static bool
PrepareVectorIteratorsFromIndex(const SearchInfo& search_info,
int nq,
const DatasetPtr dataset,
SearchResult& search_result,
const BitsetView& bitset,
const index::VectorIndex& index) {
if (search_info.group_by_field_id_.has_value()) {
try {
auto search_conf = index.PrepareSearchParams(search_info);
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
iterators_val =
index.VectorIterators(dataset, search_conf, bitset);
if (iterators_val.has_value()) {
search_result.AssembleChunkVectorIterators(
nq, 1, {0}, iterators_val.value());
} else {
LOG_ERROR(
"Returned knowhere iterator has non-ready iterators "
"inside, terminate group_by operation:{}",
knowhere::Status2String(iterators_val.error()));
PanicInfo(ErrorCode::Unsupported,
"Returned knowhere iterator has non-ready iterators "
"inside, terminate group_by operation");
}
search_result.total_nq_ = dataset->GetRows();
search_result.unity_topK_ = search_info.topk_;
} catch (const std::runtime_error& e) {
LOG_ERROR(
"Caught error:{} when trying to initialize ann iterators for "
"group_by: "
"group_by operation will be terminated",
e.what());
PanicInfo(
ErrorCode::Unsupported,
"Failed to groupBy, current index:" + index.GetIndexType() +
" doesn't support search_group_by");
}
return true;
}
return false;
}
void
SearchGroupBy(const std::vector<std::shared_ptr<VectorIterator>>& iterators,
const SearchInfo& searchInfo,

View File

@ -155,6 +155,13 @@ class ChunkedColumnBase : public ColumnBase {
"StringViews only supported for VariableColumn");
}
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
ViewsByOffsets(int64_t chunk_id,
const FixedVector<int32_t>& offsets) const {
PanicInfo(ErrorCode::Unsupported,
"viewsbyoffsets only supported for VariableColumn");
}
std::pair<size_t, size_t>
GetChunkIDByOffset(int64_t offset) const {
AssertInfo(offset < num_rows_,
@ -333,6 +340,13 @@ class ChunkedVariableColumn : public ChunkedColumnBase {
return chunks_[chunk_id];
}
std::pair<std::vector<std::string_view>, FixedVector<bool>>
ViewsByOffsets(int64_t chunk_id,
const FixedVector<int32_t>& offsets) const override {
return std::dynamic_pointer_cast<StringChunk>(chunks_[chunk_id])
->ViewsByOffsets(offsets);
}
BufferView
GetBatchBuffer(int64_t chunk_id,
int64_t start_offset,

View File

@ -323,6 +323,12 @@ class SingleChunkColumnBase : public ColumnBase {
"StringViews only supported for VariableColumn");
}
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
ViewsByOffsets(const FixedVector<int32_t>& offsets) const {
PanicInfo(ErrorCode::Unsupported,
"viewsbyoffsets only supported for VariableColumn");
}
virtual void
AppendBatch(const FieldDataPtr data) {
size_t required_size = data_size_ + data->DataSize();
@ -698,6 +704,19 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase {
return std::make_pair(res, valid_data_);
}
std::pair<std::vector<std::string_view>, FixedVector<bool>>
ViewsByOffsets(const FixedVector<int32_t>& offsets) const {
std::vector<std::string_view> res;
FixedVector<bool> valid;
res.reserve(offsets.size());
valid.reserve(offsets.size());
for (size_t i = 0; i < offsets.size(); ++i) {
res.emplace_back(RawAt(offsets[i]));
valid.emplace_back(IsValid(offsets[i]));
}
return {res, valid};
}
[[nodiscard]] std::vector<ViewType>
Views() const {
std::vector<ViewType> res;

View File

@ -177,6 +177,8 @@ std::map<std::string, std::string> vectorLatencyLabels{
{"type", "vector_latency"}};
std::map<std::string, std::string> groupbyLatencyLabels{
{"type", "groupby_latency"}};
std::map<std::string, std::string> iterativeFilterLatencyLabels{
{"type", "iterative_filter_latency"}};
std::map<std::string, std::string> scalarProportionLabels{
{"type", "scalar_proportion"}};
DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency,
@ -190,6 +192,9 @@ DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector,
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby,
internal_core_search_latency,
groupbyLatencyLabels)
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_iterative_filter,
internal_core_search_latency,
iterativeFilterLatencyLabels)
DEFINE_PROMETHEUS_HISTOGRAM_WITH_BUCKETS(
internal_core_search_latency_scalar_proportion,
internal_core_search_latency,

View File

@ -136,6 +136,7 @@ DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_iterative_filter);
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar_proportion);
} // namespace milvus::monitor

View File

@ -53,6 +53,16 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
nlohmann::json::parse(query_info_proto.search_params());
search_info.materialized_view_involved =
query_info_proto.materialized_view_involved();
// currently, iterative filter does not support range search
if (!search_info.search_params_.contains(RADIUS)) {
search_info.iterative_filter_execution =
(query_info_proto.hints() == ITERATIVE_FILTER);
if (!search_info.iterative_filter_execution &&
search_info.search_params_.contains(HINTS)) {
search_info.iterative_filter_execution =
(search_info.search_params_[HINTS] == ITERATIVE_FILTER);
}
}
if (query_info_proto.bm25_avgdl() > 0) {
search_info.search_params_[knowhere::meta::BM25_AVGDL] =
@ -94,7 +104,24 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
milvus::plan::PlanNodePtr plannode;
std::vector<milvus::plan::PlanNodePtr> sources;
if (anns_proto.has_predicates()) {
// mvcc node -> vector search node -> iterative filter node
auto iterative_filter_plan = [&]() {
plannode = std::make_shared<milvus::plan::MvccNode>(
milvus::plan::GetNextPlanNodeId());
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
milvus::plan::GetNextPlanNodeId(), sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
auto expr = ParseExprs(anns_proto.predicates());
plannode = std::make_shared<plan::FilterNode>(
milvus::plan::GetNextPlanNodeId(), expr, sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
};
// pre filter node -> mvcc node -> vector search node
auto pre_filter_plan = [&]() {
plannode = std::move(expr_parser());
if (plan_node->search_info_.materialized_view_involved) {
const auto expr_info = plannode->GatherInfo();
@ -113,16 +140,33 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
materialized_view_search_info;
}
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
plannode = std::make_shared<milvus::plan::MvccNode>(
milvus::plan::GetNextPlanNodeId(), sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
milvus::plan::GetNextPlanNodeId(), sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
};
if (anns_proto.has_predicates()) {
// currently limit iterative filter scope to search only
if (plan_node->search_info_.iterative_filter_execution &&
plan_node->search_info_.group_by_field_id_ == std::nullopt) {
iterative_filter_plan();
} else {
pre_filter_plan();
}
} else {
plannode = std::make_shared<milvus::plan::MvccNode>(
milvus::plan::GetNextPlanNodeId(), sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
milvus::plan::GetNextPlanNodeId(), sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
}
plannode = std::make_shared<milvus::plan::MvccNode>(
milvus::plan::GetNextPlanNodeId(), sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
milvus::plan::GetNextPlanNodeId(), sources);
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
if (plan_node->search_info_.group_by_field_id_ != std::nullopt) {
plannode = std::make_shared<milvus::plan::GroupByNode>(
milvus::plan::GetNextPlanNodeId(), sources);

View File

@ -272,12 +272,11 @@ BruteForceSearchIterators(const dataset::SearchDataset& query_ds,
"equal to nq:{} for single chunk",
iterators_val.value().size(),
nq);
SubSearchResult subSearchResult(query_ds.num_queries,
query_ds.topk,
query_ds.metric_type,
query_ds.round_decimal,
iterators_val.value());
return std::move(subSearchResult);
return SubSearchResult(query_ds.num_queries,
query_ds.topk,
query_ds.metric_type,
query_ds.round_decimal,
iterators_val.value());
} else {
LOG_ERROR(
"Failed to get valid knowhere brute-force-iterators from chunk, "

View File

@ -20,6 +20,7 @@
#include "log/Log.h"
#include "query/SearchBruteForce.h"
#include "query/SearchOnIndex.h"
#include "exec/operator/Utils.h"
namespace milvus::query {
@ -138,7 +139,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
auto sub_data = query::dataset::RawDataset{
element_begin, dim, size_per_chunk, chunk_data};
if (info.group_by_field_id_.has_value()) {
if (milvus::exec::UseVectorIterator(info)) {
auto sub_qr = BruteForceSearchIterators(search_dataset,
sub_data,
info,
@ -156,7 +157,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
final_qr.merge(sub_qr);
}
}
if (info.group_by_field_id_.has_value()) {
if (milvus::exec::UseVectorIterator(info)) {
std::vector<int64_t> chunk_rows(max_chunk, 0);
for (int i = 1; i < max_chunk; ++i) {
chunk_rows[i] = i * vec_size_per_chunk;

View File

@ -10,7 +10,7 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "SearchOnIndex.h"
#include "exec/operator/groupby/SearchGroupByOperator.h"
#include "exec/operator/Utils.h"
namespace milvus::query {
void

View File

@ -21,7 +21,7 @@
#include "query/SearchBruteForce.h"
#include "query/SearchOnSealed.h"
#include "query/helper.h"
#include "exec/operator/groupby/SearchGroupByOperator.h"
#include "exec/operator/Utils.h"
namespace milvus::query {
@ -119,7 +119,7 @@ SearchOnSealed(const Schema& schema,
auto data_id = offset;
auto raw_dataset =
query::dataset::RawDataset{offset, dim, chunk_size, vec_data};
if (search_info.group_by_field_id_.has_value()) {
if (milvus::exec::UseVectorIterator(search_info)) {
auto sub_qr = BruteForceSearchIterators(query_dataset,
raw_dataset,
search_info,
@ -139,7 +139,7 @@ SearchOnSealed(const Schema& schema,
offset += chunk_size;
}
if (search_info.group_by_field_id_.has_value()) {
if (milvus::exec::UseVectorIterator(search_info)) {
result.AssembleChunkVectorIterators(num_queries,
num_chunk,
column->GetNumRowsUntilChunk(),
@ -180,7 +180,7 @@ SearchOnSealed(const Schema& schema,
auto data_type = field.get_data_type();
CheckBruteForceSearchParam(field, search_info);
auto raw_dataset = query::dataset::RawDataset{0, dim, row_count, vec_data};
if (search_info.group_by_field_id_.has_value()) {
if (milvus::exec::UseVectorIterator(search_info)) {
auto sub_qr = BruteForceSearchIterators(query_dataset,
raw_dataset,
search_info,

View File

@ -723,7 +723,11 @@ ChunkedSegmentSealedImpl::num_chunk_index(FieldId field_id) const {
int64_t
ChunkedSegmentSealedImpl::num_chunk_data(FieldId field_id) const {
return fields_.at(field_id)->num_chunks();
return get_bit(field_data_ready_bitset_, field_id)
? fields_.find(field_id) != fields_.end()
? fields_.at(field_id)->num_chunks()
: 1
: 0;
}
int64_t
@ -732,7 +736,7 @@ ChunkedSegmentSealedImpl::num_chunk(FieldId field_id) const {
? fields_.find(field_id) != fields_.end()
? fields_.at(field_id)->num_chunks()
: 1
: 0;
: 1;
}
int64_t
@ -800,7 +804,6 @@ ChunkedSegmentSealedImpl::chunk_data_impl(FieldId field_id,
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
auto& field_meta = schema_->operator[](field_id);
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->Span(chunk_id);
@ -818,7 +821,6 @@ ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id,
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
auto& field_meta = schema_->operator[](field_id);
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->StringViews(chunk_id);
@ -827,6 +829,22 @@ ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id,
"chunk_view_impl only used for variable column field ");
}
std::pair<std::vector<std::string_view>, FixedVector<bool>>
ChunkedSegmentSealedImpl::chunk_view_by_offsets(
FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const {
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->ViewsByOffsets(chunk_id, offsets);
}
PanicInfo(ErrorCode::UnexpectedError,
"chunk_view_by_offsets only used for variable column field ");
}
const index::IndexBase*
ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id,
int64_t chunk_id) const {

View File

@ -206,6 +206,11 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_by_offsets(FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const override;
std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
int64_t chunk_id,

View File

@ -409,6 +409,15 @@ SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
"chunk view impl not implement for growing segment");
}
std::pair<std::vector<std::string_view>, FixedVector<bool>>
SegmentGrowingImpl::chunk_view_by_offsets(
FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const {
PanicInfo(ErrorCode::NotImplemented,
"chunk view by offsets not implemented for growing segment");
}
int64_t
SegmentGrowingImpl::num_chunk(FieldId field_id) const {
auto size = get_insert_record().ack_responder_.GetAck();

View File

@ -344,6 +344,11 @@ class SegmentGrowingImpl : public SegmentGrowing {
std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_by_offsets(FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const override;
std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
int64_t chunk_id,

View File

@ -200,6 +200,28 @@ class SegmentInternalInterface : public SegmentInterface {
return std::make_pair(res, chunk_info.second);
}
template <typename ViewType>
std::pair<std::vector<ViewType>, FixedVector<bool>>
get_views_by_offsets(FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const {
if (this->type() == SegmentType::Growing) {
PanicInfo(ErrorCode::Unsupported,
"get chunk views not supported for growing segment");
}
auto chunk_view = chunk_view_by_offsets(field_id, chunk_id, offsets);
if constexpr (std::is_same_v<ViewType, std::string_view>) {
return chunk_view;
} else {
std::vector<ViewType> res;
res.reserve(chunk_view.first.size());
for (const auto& view : chunk_view.first) {
res.emplace_back(view);
}
return {res, chunk_view.second};
}
}
template <typename T>
const index::ScalarIndex<T>&
chunk_scalar_index(FieldId field_id, int64_t chunk_id) const {
@ -414,6 +436,11 @@ class SegmentInternalInterface : public SegmentInterface {
int64_t start_offset,
int64_t length) const = 0;
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_by_offsets(FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const = 0;
// internal API: return chunk_index in span, support scalar index only
virtual const index::IndexBase*
chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0;

View File

@ -770,7 +770,6 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
auto& field_meta = schema_->operator[](field_id);
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->Span();
@ -787,7 +786,6 @@ SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
auto& field_meta = schema_->operator[](field_id);
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->StringViews();
@ -796,6 +794,22 @@ SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
"chunk_view_impl only used for variable column field ");
}
std::pair<std::vector<std::string_view>, FixedVector<bool>>
SegmentSealedImpl::chunk_view_by_offsets(
FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const {
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->ViewsByOffsets(offsets);
}
PanicInfo(ErrorCode::UnexpectedError,
"chunk_view_by_offsets only used for variable column field ");
}
const index::IndexBase*
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(),

View File

@ -212,6 +212,11 @@ class SegmentSealedImpl : public SegmentSealed {
std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_by_offsets(FieldId field_id,
int64_t chunk_id,
const FixedVector<int32_t>& offsets) const override;
std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
int64_t chunk_id,

View File

@ -51,6 +51,7 @@ set(MILVUS_TEST_FILES
test_function.cpp
test_futures.cpp
test_group_by.cpp
test_iterative_filter.cpp
test_growing.cpp
test_growing_index.cpp
test_hybrid_index.cpp

View File

@ -67,10 +67,29 @@ TEST_P(ExprAlwaysTrueTest, AlwaysTrue) {
final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(plan->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto val = age_col[i];
ASSERT_EQ(ans, true) << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], true) << "@" << i << "!!" << val;
}
}
}

View File

@ -27,6 +27,7 @@
#include "segcore/SegmentGrowingImpl.h"
#include "simdjson/padded_string.h"
#include "test_utils/DataGen.h"
#include "test_utils/GenExprProto.h"
using namespace milvus;
using namespace milvus::query;
@ -611,11 +612,31 @@ TEST(Expr, TestArrayRange) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols[array_type][i]);
auto ref = ref_func(array);
ASSERT_EQ(ans, ref);
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref);
}
}
}
}
@ -728,6 +749,23 @@ TEST(Expr, TestArrayEqual) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(long_array_col[i]);
@ -737,6 +775,9 @@ TEST(Expr, TestArrayEqual) {
}
auto ref = ref_func(array_values);
ASSERT_EQ(ans, ref);
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref);
}
}
}
}
@ -927,6 +968,19 @@ TEST(Expr, TestArrayContains) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["bool"][i]);
@ -935,6 +989,9 @@ TEST(Expr, TestArrayContains) {
res.push_back(array.get_data<bool>(j));
}
ASSERT_EQ(ans, check(res)) << "@" << i;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], check(res)) << "@" << i;
}
}
}
@ -982,6 +1039,19 @@ TEST(Expr, TestArrayContains) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["double"][i]);
@ -990,6 +1060,9 @@ TEST(Expr, TestArrayContains) {
res.push_back(array.get_data<double>(j));
}
ASSERT_EQ(ans, check(res));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], check(res));
}
}
}
@ -1027,6 +1100,19 @@ TEST(Expr, TestArrayContains) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["float"][i]);
@ -1035,6 +1121,9 @@ TEST(Expr, TestArrayContains) {
res.push_back(array.get_data<float>(j));
}
ASSERT_EQ(ans, check(res));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], check(res));
}
}
}
@ -1082,6 +1171,19 @@ TEST(Expr, TestArrayContains) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["int"][i]);
@ -1090,6 +1192,9 @@ TEST(Expr, TestArrayContains) {
res.push_back(array.get_data<int64_t>(j));
}
ASSERT_EQ(ans, check(res));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], check(res));
}
}
}
@ -1128,6 +1233,19 @@ TEST(Expr, TestArrayContains) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["long"][i]);
@ -1136,6 +1254,9 @@ TEST(Expr, TestArrayContains) {
res.push_back(array.get_data<int64_t>(j));
}
ASSERT_EQ(ans, check(res));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], check(res));
}
}
}
@ -1181,6 +1302,19 @@ TEST(Expr, TestArrayContains) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["string"][i]);
@ -1189,6 +1323,9 @@ TEST(Expr, TestArrayContains) {
res.push_back(array.get_data<std::string_view>(j));
}
ASSERT_EQ(ans, check(res));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], check(res));
}
}
}
}
@ -2127,11 +2264,31 @@ TEST(Expr, TestArrayBinaryArith) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols[array_type][i]);
auto ref = ref_func(array);
ASSERT_EQ(ans, ref);
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref);
}
}
}
}
@ -2217,10 +2374,26 @@ TEST(Expr, TestArrayStringMatch) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["string"][i]);
ASSERT_EQ(ans, testcase.check_func(array));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], testcase.check_func(array));
}
}
}
}
@ -2420,10 +2593,30 @@ TEST(Expr, TestArrayInTerm) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols[array_type][i]);
ASSERT_EQ(ans, ref_func(array));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref_func(array));
}
}
}
}
@ -2510,10 +2703,26 @@ TEST(Expr, TestTermInArray) {
<< std::endl;
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto array = milvus::Array(array_cols["long"][i]);
ASSERT_EQ(ans, testcase.check_func(array));
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], testcase.check_func(array));
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,594 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include "common/Schema.h"
#include "query/Plan.h"
#include "segcore/SegmentSealedImpl.h"
#include "segcore/reduce_c.h"
#include "segcore/plan_c.h"
#include "segcore/segment_c.h"
#include "test_utils/DataGen.h"
#include "test_utils/c_api_test_utils.h"
using namespace milvus;
using namespace milvus::query;
using namespace milvus::segcore;
using namespace milvus::storage;
using namespace milvus::tracer;
/**
* this UT is to cover Iterative filtering execution logic (knowhere iterator next() -> scalar filtering)
* so we will not cover all expr type here, just some examples
*/
void
prepareSegmentFieldData(const std::unique_ptr<SegmentSealed>& segment,
size_t row_count,
GeneratedData& data_set) {
auto field_data =
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
field_data->FillFieldData(data_set.row_ids_.data(), row_count);
auto field_data_info =
FieldDataInfo{RowFieldID.get(),
row_count,
std::vector<milvus::FieldDataPtr>{field_data}};
segment->LoadFieldData(RowFieldID, field_data_info);
field_data =
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
field_data->FillFieldData(data_set.timestamps_.data(), row_count);
field_data_info =
FieldDataInfo{TimestampFieldID.get(),
row_count,
std::vector<milvus::FieldDataPtr>{field_data}};
segment->LoadFieldData(TimestampFieldID, field_data_info);
}
void
CheckFilterSearchResult(const SearchResult& search_result_by_iterative_filter,
const SearchResult& search_result_by_pre_filter,
int topK,
int nq) {
ASSERT_EQ(search_result_by_pre_filter.seg_offsets_.size(), topK * nq);
ASSERT_EQ(search_result_by_pre_filter.distances_.size(), topK * nq);
ASSERT_EQ(search_result_by_iterative_filter.seg_offsets_.size(), topK * nq);
ASSERT_EQ(search_result_by_iterative_filter.distances_.size(), topK * nq);
for (int i = 0; i < topK * nq; ++i) {
std::cout << search_result_by_pre_filter.seg_offsets_[i] << " "
<< search_result_by_pre_filter.distances_[i] << " "
<< search_result_by_iterative_filter.seg_offsets_[i] << " "
<< search_result_by_iterative_filter.distances_[i]
<< std::endl;
ASSERT_EQ(search_result_by_pre_filter.seg_offsets_[i],
search_result_by_iterative_filter.seg_offsets_[i]);
}
}
TEST(IterativeFilter, SealedIndex) {
using namespace milvus;
using namespace milvus::query;
using namespace milvus::segcore;
//0. prepare schema
int dim = 64;
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR);
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
schema->set_primary_field_id(str_fid);
auto segment = CreateSealedSegment(schema);
size_t N = 50;
//2. load raw data
auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false);
auto fields = schema->get_fields();
for (auto field_data : raw_data.raw_->fields_data()) {
int64_t field_id = field_data.field_id();
auto info = FieldDataInfo(field_data.field_id(), N);
auto field_meta = fields.at(FieldId(field_id));
info.channel->push(
CreateFieldDataFromDataArray(N, &field_data, field_meta));
info.channel->close();
segment->LoadFieldData(FieldId(field_id), info);
}
prepareSegmentFieldData(segment, N, raw_data);
//3. load index
auto vector_data = raw_data.get_col<float>(vec_fid);
auto indexing = GenVecIndexing(
N, dim, vector_data.data(), knowhere::IndexEnum::INDEX_HNSW);
LoadIndexInfo load_index_info;
load_index_info.field_id = vec_fid.get();
load_index_info.index = std::move(indexing);
load_index_info.index_params["metric_type"] = knowhere::metric::L2;
segment->LoadIndex(load_index_info);
int topK = 10;
int group_size = 3;
// int8 binaryRange
{
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_range_expr: <
column_info: <
field_id: 101
data_type: Int8
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 100
>
>
>
query_info: <
topk: 10
metric_type: "L2"
hints: "iterative_filter"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node;
auto ok =
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
auto num_queries = 1;
auto seed = 1024;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto search_result =
segment->Search(plan.get(), ph_group.get(), 1L << 63);
const char* raw_plan2 = R"(vector_anns: <
field_id: 100
predicates: <
binary_range_expr: <
column_info: <
field_id: 101
data_type: Int8
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 100
>
>
>
query_info: <
topk: 10
metric_type: "L2"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node2;
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
&plan_node2);
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
auto search_result2 =
segment->Search(plan2.get(), ph_group.get(), 1L << 63);
CheckFilterSearchResult(
*search_result, *search_result2, topK, num_queries);
}
// int16 Termexpr
{
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 102
data_type: Int16
>
values:<int64_val:1> values:<int64_val:2 >
>
>
query_info: <
topk: 10
metric_type: "L2"
hints: "iterative_filter"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node;
auto ok =
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
auto num_queries = 1;
auto seed = 1024;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto search_result =
segment->Search(plan.get(), ph_group.get(), 1L << 63);
const char* raw_plan2 = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 102
data_type: Int16
>
values:<int64_val:1> values:<int64_val:2 >
>
>
query_info: <
topk: 10
metric_type: "L2"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node2;
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
&plan_node2);
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
auto search_result2 =
segment->Search(plan2.get(), ph_group.get(), 1L << 63);
CheckFilterSearchResult(
*search_result, *search_result2, topK, num_queries);
}
}
TEST(IterativeFilter, SealedData) {
using namespace milvus;
using namespace milvus::query;
using namespace milvus::segcore;
//0. prepare schema
int dim = 64;
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR);
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
schema->set_primary_field_id(str_fid);
auto segment = CreateSealedSegment(schema);
size_t N = 100;
//2. load raw data
auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false);
auto fields = schema->get_fields();
for (auto field_data : raw_data.raw_->fields_data()) {
int64_t field_id = field_data.field_id();
auto info = FieldDataInfo(field_data.field_id(), N);
auto field_meta = fields.at(FieldId(field_id));
info.channel->push(
CreateFieldDataFromDataArray(N, &field_data, field_meta));
info.channel->close();
segment->LoadFieldData(FieldId(field_id), info);
}
prepareSegmentFieldData(segment, N, raw_data);
int topK = 10;
// int8 binaryRange
{
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_range_expr: <
column_info: <
field_id: 101
data_type: Int8
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 100
>
>
>
query_info: <
topk: 10
metric_type: "L2"
hints: "iterative_filter"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node;
auto ok =
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
auto num_queries = 1;
auto seed = 1024;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto search_result =
segment->Search(plan.get(), ph_group.get(), 1L << 63);
const char* raw_plan2 = R"(vector_anns: <
field_id: 100
predicates: <
binary_range_expr: <
column_info: <
field_id: 101
data_type: Int8
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 100
>
>
>
query_info: <
topk: 10
metric_type: "L2"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node2;
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
&plan_node2);
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
auto search_result2 =
segment->Search(plan2.get(), ph_group.get(), 1L << 63);
CheckFilterSearchResult(
*search_result, *search_result2, topK, num_queries);
}
}
TEST(IterativeFilter, GrowingRawData) {
int dim = 128;
uint64_t seed = 512;
auto schema = std::make_shared<Schema>();
auto metric_type = knowhere::metric::L2;
auto int64_field_id = schema->AddDebugField("int64", DataType::INT64);
auto int32_field_id = schema->AddDebugField("int32", DataType::INT32);
auto vec_field_id = schema->AddDebugField(
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
schema->set_primary_field_id(int64_field_id);
auto config = SegcoreConfig::default_config();
config.set_chunk_rows(8);
config.set_enable_interim_segment_index(
false); //no growing index, test brute force
auto segment_growing = CreateGrowingSegment(schema, nullptr, 1, config);
auto segment_growing_impl =
dynamic_cast<SegmentGrowingImpl*>(segment_growing.get());
int64_t rows_per_batch = 30;
int n_batch = 1;
for (int i = 0; i < n_batch; i++) {
auto data_set =
DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false);
auto offset = segment_growing_impl->PreInsert(rows_per_batch);
segment_growing_impl->Insert(offset,
rows_per_batch,
data_set.row_ids_.data(),
data_set.timestamps_.data(),
data_set.raw_);
}
auto topK = 10;
// int8 binaryRange
{
const char* raw_plan = R"(vector_anns: <
field_id: 102
predicates: <
binary_range_expr: <
column_info: <
field_id: 100
data_type: Int64
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 1
>
>
>
query_info: <
topk: 10
metric_type: "L2"
hints: "iterative_filter"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node;
auto ok =
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
auto num_queries = 1;
auto seed = 1024;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto search_result =
segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63);
const char* raw_plan2 = R"(vector_anns: <
field_id: 102
predicates: <
binary_range_expr: <
column_info: <
field_id: 100
data_type: Int64
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 1
>
>
>
query_info: <
topk: 10
metric_type: "L2"
search_params: "{\"ef\": 50}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node2;
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
&plan_node2);
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
auto search_result2 =
segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63);
CheckFilterSearchResult(
*search_result, *search_result2, topK, num_queries);
}
}
TEST(IterativeFilter, GrowingIndex) {
int dim = 128;
uint64_t seed = 512;
auto schema = std::make_shared<Schema>();
auto metric_type = knowhere::metric::L2;
auto int64_field_id = schema->AddDebugField("int64", DataType::INT64);
auto int32_field_id = schema->AddDebugField("int32", DataType::INT32);
auto vec_field_id = schema->AddDebugField(
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
schema->set_primary_field_id(int64_field_id);
std::map<std::string, std::string> index_params = {
{"index_type", "IVF_FLAT"},
{"metric_type", metric_type},
{"nlist", "4"}};
std::map<std::string, std::string> type_params = {{"dim", "128"}};
FieldIndexMeta fieldIndexMeta(
vec_field_id, std::move(index_params), std::move(type_params));
std::map<FieldId, FieldIndexMeta> fieldMap = {
{vec_field_id, fieldIndexMeta}};
IndexMetaPtr metaPtr =
std::make_shared<CollectionIndexMeta>(10000, std::move(fieldMap));
auto config = SegcoreConfig::default_config();
config.set_chunk_rows(16);
config.set_enable_interim_segment_index(true); // test growing inter index
config.set_nlist(4);
config.set_nlist(4);
auto segment_growing = CreateGrowingSegment(schema, metaPtr, 1, config);
auto segment_growing_impl =
dynamic_cast<SegmentGrowingImpl*>(segment_growing.get());
//1. prepare raw data in growing segment
int64_t rows_per_batch = 100;
int n_batch = 1;
for (int i = 0; i < n_batch; i++) {
auto data_set =
DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false);
auto offset = segment_growing_impl->PreInsert(rows_per_batch);
segment_growing_impl->Insert(offset,
rows_per_batch,
data_set.row_ids_.data(),
data_set.timestamps_.data(),
data_set.raw_);
}
auto topK = 10;
{
const char* raw_plan = R"(vector_anns: <
field_id: 102
predicates: <
binary_range_expr: <
column_info: <
field_id: 100
data_type: Int64
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 1
>
>
>
query_info: <
topk: 10
metric_type: "L2"
hints: "iterative_filter"
search_params: "{\"nprobe\": 4}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node;
auto ok =
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
auto num_queries = 1;
auto seed = 1024;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
auto ph_group =
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
auto search_result =
segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63);
const char* raw_plan2 = R"(vector_anns: <
field_id: 102
predicates: <
binary_range_expr: <
column_info: <
field_id: 100
data_type: Int64
>
lower_inclusive: true,
upper_inclusive: false,
lower_value: <
int64_val: -1
>
upper_value: <
int64_val: 1
>
>
>
query_info: <
topk: 10
metric_type: "L2"
search_params: "{\"nprobe\": 4}"
>
placeholder_tag: "$0">)";
proto::plan::PlanNode plan_node2;
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
&plan_node2);
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
auto search_result2 =
segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63);
CheckFilterSearchResult(
*search_result, *search_result2, topK, num_queries);
}
}

View File

@ -290,12 +290,32 @@ TEST(StringExpr, Term) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto val = str_col[i];
auto ref = std::find(term.begin(), term.end(), val) != term.end();
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val;
}
}
}
}
@ -363,6 +383,23 @@ TEST(StringExpr, TermNullable) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
if (!valid_data[i]) {
@ -372,6 +409,9 @@ TEST(StringExpr, TermNullable) {
auto val = str_col[i];
auto ref = std::find(term.begin(), term.end(), val) != term.end();
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val;
}
}
}
}
@ -481,6 +521,23 @@ TEST(StringExpr, Compare) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
@ -488,6 +545,10 @@ TEST(StringExpr, Compare) {
auto another_val = another_str_col[i];
auto ref = ref_func(val, another_val);
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref)
<< "@" << op << "@" << i << "!!" << val;
}
}
}
}
@ -609,6 +670,23 @@ TEST(StringExpr, CompareNullable) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
if (!valid_data[i]) {
@ -619,6 +697,10 @@ TEST(StringExpr, CompareNullable) {
auto another_val = another_str_col[i];
auto ref = ref_func(val, another_val);
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref)
<< "@" << op << "@" << i << "!!" << val;
}
}
}
}
@ -741,16 +823,40 @@ TEST(StringExpr, CompareNullable2) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
if (!valid_data[i]) {
ASSERT_EQ(ans, false);
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], false);
}
continue;
}
auto val = str_col[i];
auto another_val = another_str_col[i];
auto ref = ref_func(val, another_val);
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref)
<< "@" << op << "@" << i << "!!" << val;
}
}
}
}
@ -840,6 +946,23 @@ TEST(StringExpr, UnaryRange) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
@ -847,6 +970,10 @@ TEST(StringExpr, UnaryRange) {
auto ref = ref_func(val);
ASSERT_EQ(ans, ref)
<< "@" << op << "@" << value << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref)
<< "@" << op << "@" << value << "@" << i << "!!" << val;
}
}
}
}
@ -947,6 +1074,23 @@ TEST(StringExpr, UnaryRangeNullable) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
if (!valid_data[i]) {
@ -957,6 +1101,10 @@ TEST(StringExpr, UnaryRangeNullable) {
auto ref = ref_func(val);
ASSERT_EQ(ans, ref)
<< "@" << op << "@" << value << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref)
<< "@" << op << "@" << value << "@" << i << "!!" << val;
}
}
}
}
@ -1064,6 +1212,23 @@ TEST(StringExpr, BinaryRange) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
@ -1072,6 +1237,11 @@ TEST(StringExpr, BinaryRange) {
ASSERT_EQ(ans, ref)
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
<< "@" << ub << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref)
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
<< "@" << ub << "@" << i << "!!" << val;
}
}
}
}
@ -1191,10 +1361,30 @@ TEST(StringExpr, BinaryRangeNullable) {
MAX_TIMESTAMP);
EXPECT_EQ(final.size(), N * num_iters);
// specify some offsets and do scalar filtering on these offsets
milvus::exec::OffsetVector offsets;
offsets.reserve(N * num_iters / 2);
for (auto i = 0; i < N * num_iters; ++i) {
if (i % 2 == 0) {
offsets.emplace_back(i);
}
}
auto col_vec = milvus::test::gen_filter_res(
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
seg_promote,
N * num_iters,
MAX_TIMESTAMP,
&offsets);
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
EXPECT_EQ(view.size(), N * num_iters / 2);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
if (!valid_data[i]) {
ASSERT_EQ(ans, false);
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], false);
}
continue;
}
auto val = str_col[i];
@ -1202,6 +1392,11 @@ TEST(StringExpr, BinaryRangeNullable) {
ASSERT_EQ(ans, ref)
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
<< "@" << ub << "@" << i << "!!" << val;
if (i % 2 == 0) {
ASSERT_EQ(view[int(i / 2)], ref)
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
<< "@" << ub << "@" << i << "!!" << val;
}
}
}
}

View File

@ -16,6 +16,7 @@
#include "common/Consts.h"
#include "expr/ITypeExpr.h"
#include "exec/expression/Expr.h"
#include "pb/plan.pb.h"
#include "plan/PlanNode.h"
@ -104,4 +105,30 @@ CreateSearchPlanByExpr(std::shared_ptr<milvus::expr::ITypeExpr> expr) {
return plannode;
}
inline ColumnVectorPtr
gen_filter_res(milvus::plan::PlanNode* plan_node,
const milvus::segcore::SegmentInternalInterface* segment,
uint64_t active_count,
uint64_t timestamp,
FixedVector<int32_t>* offsets = nullptr) {
auto filter_node = dynamic_cast<milvus::plan::FilterBitsNode*>(plan_node);
assert(filter_node != nullptr);
std::vector<milvus::expr::TypedExprPtr> filters;
filters.emplace_back(filter_node->filter());
auto query_context = std::make_shared<milvus::exec::QueryContext>(
DEAFULT_QUERY_ID, segment, active_count, timestamp);
std::unique_ptr<milvus::exec::ExecContext> exec_context =
std::make_unique<milvus::exec::ExecContext>(query_context.get());
auto exprs_ =
std::make_unique<milvus::exec::ExprSet>(filters, exec_context.get());
std::vector<VectorPtr> results_;
milvus::exec::EvalCtx eval_ctx(exec_context.get(), exprs_.get());
eval_ctx.set_offset_input(offsets);
exprs_->Eval(0, 1, true, eval_ctx, results_);
auto col_vec = std::dynamic_pointer_cast<milvus::ColumnVector>(results_[0]);
return col_vec;
}
} // namespace milvus::test

View File

@ -975,6 +975,7 @@ func generateSearchParams(reqSearchParams searchParams) []*commonpb.KeyValuePair
bs, _ := json.Marshal(reqSearchParams.Params)
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: Params, Value: string(bs)})
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: common.IgnoreGrowing, Value: strconv.FormatBool(reqSearchParams.IgnoreGrowing)})
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: common.HintsKey, Value: reqSearchParams.Hints})
// need to exposure ParamRoundDecimal in req?
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: ParamRoundDecimal, Value: "-1"})
return searchParams

View File

@ -181,6 +181,7 @@ type searchParams struct {
MetricType string `json:"metricType"`
Params map[string]interface{} `json:"params"`
IgnoreGrowing bool `json:"ignoreGrowing"`
Hints string `json:"hints"`
}
type SearchReqV2 struct {

View File

@ -66,6 +66,7 @@ message QueryInfo {
bool strict_group_size = 9;
double bm25_avgdl = 10;
int64 query_field_id =11;
string hints = 12;
}
message ColumnInfo {

View File

@ -153,6 +153,11 @@ func parseSearchInfo(searchParamsPair []*commonpb.KeyValuePair, schema *schemapb
roundDecimalStr = "-1"
}
hints, err := funcutil.GetAttrByKeyFromRepeatedKV(common.HintsKey, searchParamsPair)
if err != nil {
hints = ""
}
roundDecimal, err := strconv.ParseInt(roundDecimalStr, 0, 64)
if err != nil {
return &SearchInfo{planInfo: nil, offset: 0, isIterator: false, parseError: fmt.Errorf("%s [%s] is invalid, should be -1 or an integer in range [0, 6]", RoundDecimalKey, roundDecimalStr)}
@ -200,6 +205,7 @@ func parseSearchInfo(searchParamsPair []*commonpb.KeyValuePair, schema *schemapb
GroupByFieldId: groupByFieldId,
GroupSize: groupSize,
StrictGroupSize: strictGroupSize,
Hints: hints,
},
offset: offset,
isIterator: isIterator,

View File

@ -138,6 +138,7 @@ const (
BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
IgnoreGrowing = "ignore_growing"
ConsistencyLevel = "consistency_level"
HintsKey = "hints"
)
// Doc-in-doc-out