mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: support iterative filter execution (#37363)
issue: #37360 --------- Signed-off-by: chasingegg <chao.gao@zilliz.com>
This commit is contained in:
parent
a118ca14a7
commit
994fc544e7
@ -21,12 +21,28 @@ namespace milvus {
|
|||||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
StringChunk::StringViews() {
|
StringChunk::StringViews() {
|
||||||
std::vector<std::string_view> ret;
|
std::vector<std::string_view> ret;
|
||||||
|
ret.reserve(row_nums_);
|
||||||
for (int i = 0; i < row_nums_; i++) {
|
for (int i = 0; i < row_nums_; i++) {
|
||||||
ret.emplace_back(data_ + offsets_[i], offsets_[i + 1] - offsets_[i]);
|
ret.emplace_back(data_ + offsets_[i], offsets_[i + 1] - offsets_[i]);
|
||||||
}
|
}
|
||||||
return {ret, valid_};
|
return {ret, valid_};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
StringChunk::ViewsByOffsets(const FixedVector<int32_t>& offsets) {
|
||||||
|
std::vector<std::string_view> ret;
|
||||||
|
FixedVector<bool> valid_res;
|
||||||
|
size_t size = offsets.size();
|
||||||
|
ret.reserve(size);
|
||||||
|
valid_res.reserve(size);
|
||||||
|
for (auto i = 0; i < size; ++i) {
|
||||||
|
ret.emplace_back(data_ + offsets_[offsets[i]],
|
||||||
|
offsets_[offsets[i] + 1] - offsets_[offsets[i]]);
|
||||||
|
valid_res.emplace_back(isValid(offsets[i]));
|
||||||
|
}
|
||||||
|
return {ret, valid_res};
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
ArrayChunk::ConstructViews() {
|
ArrayChunk::ConstructViews() {
|
||||||
views_.reserve(row_nums_);
|
views_.reserve(row_nums_);
|
||||||
|
|||||||
@ -73,7 +73,10 @@ class Chunk {
|
|||||||
|
|
||||||
virtual bool
|
virtual bool
|
||||||
isValid(int offset) {
|
isValid(int offset) {
|
||||||
return valid_[offset];
|
if (nullable_) {
|
||||||
|
return valid_[offset];
|
||||||
|
}
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -170,6 +173,9 @@ class StringChunk : public Chunk {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
ViewsByOffsets(const FixedVector<int32_t>& offsets);
|
||||||
|
|
||||||
const char*
|
const char*
|
||||||
ValueAt(int64_t idx) const override {
|
ValueAt(int64_t idx) const override {
|
||||||
return (*this)[idx].data();
|
return (*this)[idx].data();
|
||||||
|
|||||||
@ -47,6 +47,8 @@ const char KMEANS_CLUSTER[] = "KMEANS";
|
|||||||
const char VEC_OPT_FIELDS[] = "opt_fields";
|
const char VEC_OPT_FIELDS[] = "opt_fields";
|
||||||
const char PAGE_RETAIN_ORDER[] = "page_retain_order";
|
const char PAGE_RETAIN_ORDER[] = "page_retain_order";
|
||||||
const char TEXT_LOG_ROOT_PATH[] = "text_log";
|
const char TEXT_LOG_ROOT_PATH[] = "text_log";
|
||||||
|
const char ITERATIVE_FILTER[] = "iterative_filter";
|
||||||
|
const char HINTS[] = "hints";
|
||||||
|
|
||||||
const char DEFAULT_PLANNODE_ID[] = "0";
|
const char DEFAULT_PLANNODE_ID[] = "0";
|
||||||
const char DEAFULT_QUERY_ID[] = "0";
|
const char DEAFULT_QUERY_ID[] = "0";
|
||||||
|
|||||||
@ -35,6 +35,7 @@ struct SearchInfo {
|
|||||||
std::optional<FieldId> group_by_field_id_;
|
std::optional<FieldId> group_by_field_id_;
|
||||||
tracer::TraceContext trace_ctx_;
|
tracer::TraceContext trace_ctx_;
|
||||||
bool materialized_view_involved = false;
|
bool materialized_view_involved = false;
|
||||||
|
bool iterative_filter_execution = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
using SearchInfoPtr = std::shared_ptr<SearchInfo>;
|
using SearchInfoPtr = std::shared_ptr<SearchInfo>;
|
||||||
|
|||||||
@ -23,6 +23,7 @@
|
|||||||
#include "exec/operator/CallbackSink.h"
|
#include "exec/operator/CallbackSink.h"
|
||||||
#include "exec/operator/CountNode.h"
|
#include "exec/operator/CountNode.h"
|
||||||
#include "exec/operator/FilterBitsNode.h"
|
#include "exec/operator/FilterBitsNode.h"
|
||||||
|
#include "exec/operator/IterativeFilterNode.h"
|
||||||
#include "exec/operator/MvccNode.h"
|
#include "exec/operator/MvccNode.h"
|
||||||
#include "exec/operator/Operator.h"
|
#include "exec/operator/Operator.h"
|
||||||
#include "exec/operator/VectorSearchNode.h"
|
#include "exec/operator/VectorSearchNode.h"
|
||||||
@ -52,11 +53,16 @@ DriverFactory::CreateDriver(std::unique_ptr<DriverContext> ctx,
|
|||||||
for (size_t i = 0; i < plannodes_.size(); ++i) {
|
for (size_t i = 0; i < plannodes_.size(); ++i) {
|
||||||
auto id = operators.size();
|
auto id = operators.size();
|
||||||
auto plannode = plannodes_[i];
|
auto plannode = plannodes_[i];
|
||||||
if (auto filternode =
|
if (auto filterbitsnode =
|
||||||
std::dynamic_pointer_cast<const plan::FilterBitsNode>(
|
std::dynamic_pointer_cast<const plan::FilterBitsNode>(
|
||||||
plannode)) {
|
plannode)) {
|
||||||
operators.push_back(
|
operators.push_back(std::make_unique<PhyFilterBitsNode>(
|
||||||
std::make_unique<PhyFilterBitsNode>(id, ctx.get(), filternode));
|
id, ctx.get(), filterbitsnode));
|
||||||
|
} else if (auto filternode =
|
||||||
|
std::dynamic_pointer_cast<const plan::FilterNode>(
|
||||||
|
plannode)) {
|
||||||
|
operators.push_back(std::make_unique<PhyIterativeFilterNode>(
|
||||||
|
id, ctx.get(), filternode));
|
||||||
} else if (auto mvccnode =
|
} else if (auto mvccnode =
|
||||||
std::dynamic_pointer_cast<const plan::MvccNode>(
|
std::dynamic_pointer_cast<const plan::MvccNode>(
|
||||||
plannode)) {
|
plannode)) {
|
||||||
|
|||||||
@ -230,6 +230,11 @@ class QueryContext : public Context {
|
|||||||
return search_info_;
|
return search_info_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
knowhere::MetricType
|
||||||
|
get_metric_type() {
|
||||||
|
return search_info_.metric_type_;
|
||||||
|
}
|
||||||
|
|
||||||
const query::PlaceholderGroup*
|
const query::PlaceholderGroup*
|
||||||
get_placeholder_group() {
|
get_placeholder_group() {
|
||||||
return placeholder_group_;
|
return placeholder_group_;
|
||||||
|
|||||||
@ -21,9 +21,13 @@ namespace exec {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
|
auto input = context.get_offset_input();
|
||||||
? active_count_ - current_pos_
|
has_offset_input_ = (input != nullptr);
|
||||||
: batch_size_;
|
int64_t real_batch_size = (has_offset_input_)
|
||||||
|
? input->size()
|
||||||
|
: (current_pos_ + batch_size_ >= active_count_
|
||||||
|
? active_count_ - current_pos_
|
||||||
|
: batch_size_);
|
||||||
|
|
||||||
// always true no need to skip null
|
// always true no need to skip null
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
|
|||||||
@ -47,11 +47,14 @@ class PhyAlwaysTrueExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
|
if (!has_offset_input_) {
|
||||||
? active_count_ - current_pos_
|
int64_t real_batch_size =
|
||||||
: batch_size_;
|
current_pos_ + batch_size_ >= active_count_
|
||||||
|
? active_count_ - current_pos_
|
||||||
|
: batch_size_;
|
||||||
|
|
||||||
current_pos_ += real_batch_size;
|
current_pos_ += real_batch_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -88,7 +88,8 @@ struct ArithOpHelper<proto::plan::ArithOpType::Mod> {
|
|||||||
|
|
||||||
template <typename T,
|
template <typename T,
|
||||||
proto::plan::OpType cmp_op,
|
proto::plan::OpType cmp_op,
|
||||||
proto::plan::ArithOpType arith_op>
|
proto::plan::ArithOpType arith_op,
|
||||||
|
FilterType filter_type = FilterType::sequential>
|
||||||
struct ArithOpElementFunc {
|
struct ArithOpElementFunc {
|
||||||
typedef std::conditional_t<std::is_integral_v<T> &&
|
typedef std::conditional_t<std::is_integral_v<T> &&
|
||||||
!std::is_same_v<bool, T>,
|
!std::is_same_v<bool, T>,
|
||||||
@ -100,145 +101,147 @@ struct ArithOpElementFunc {
|
|||||||
size_t size,
|
size_t size,
|
||||||
HighPrecisonType val,
|
HighPrecisonType val,
|
||||||
HighPrecisonType right_operand,
|
HighPrecisonType right_operand,
|
||||||
TargetBitmapView res) {
|
TargetBitmapView res,
|
||||||
/*
|
const int32_t* offsets = nullptr) {
|
||||||
// This is the original code, kept here for the documentation purposes
|
// This is the original code, kept here for the documentation purposes
|
||||||
for (int i = 0; i < size; ++i) {
|
// and also this code will be used for iterative filter since iterative filter does not execute as a batch manner
|
||||||
if constexpr (cmp_op == proto::plan::OpType::Equal) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
for (int i = 0; i < size; ++i) {
|
||||||
res[i] = (src[i] + right_operand) == val;
|
auto offset = (offsets) ? offsets[i] : i;
|
||||||
} else if constexpr (arith_op ==
|
if constexpr (cmp_op == proto::plan::OpType::Equal) {
|
||||||
proto::plan::ArithOpType::Sub) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (src[i] - right_operand) == val;
|
res[i] = (src[offset] + right_operand) == val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (src[i] * right_operand) == val;
|
res[i] = (src[offset] - right_operand) == val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (src[i] / right_operand) == val;
|
res[i] = (src[offset] * right_operand) == val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (fmod(src[i], right_operand)) == val;
|
res[i] = (src[offset] / right_operand) == val;
|
||||||
} else {
|
} else if constexpr (arith_op ==
|
||||||
PanicInfo(
|
proto::plan::ArithOpType::Mod) {
|
||||||
OpTypeInvalid,
|
res[i] = (fmod(src[offset], right_operand)) == val;
|
||||||
fmt::format(
|
} else {
|
||||||
"unsupported arith type:{} for ArithOpElementFunc",
|
PanicInfo(OpTypeInvalid,
|
||||||
arith_op));
|
fmt::format("unsupported arith type:{} for "
|
||||||
}
|
"ArithOpElementFunc",
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::NotEqual) {
|
arith_op));
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
}
|
||||||
res[i] = (src[i] + right_operand) != val;
|
} else if constexpr (cmp_op == proto::plan::OpType::NotEqual) {
|
||||||
} else if constexpr (arith_op ==
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
proto::plan::ArithOpType::Sub) {
|
res[i] = (src[offset] + right_operand) != val;
|
||||||
res[i] = (src[i] - right_operand) != val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Sub) {
|
||||||
proto::plan::ArithOpType::Mul) {
|
res[i] = (src[offset] - right_operand) != val;
|
||||||
res[i] = (src[i] * right_operand) != val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Mul) {
|
||||||
proto::plan::ArithOpType::Div) {
|
res[i] = (src[offset] * right_operand) != val;
|
||||||
res[i] = (src[i] / right_operand) != val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Div) {
|
||||||
proto::plan::ArithOpType::Mod) {
|
res[i] = (src[offset] / right_operand) != val;
|
||||||
res[i] = (fmod(src[i], right_operand)) != val;
|
} else if constexpr (arith_op ==
|
||||||
} else {
|
proto::plan::ArithOpType::Mod) {
|
||||||
PanicInfo(
|
res[i] = (fmod(src[offset], right_operand)) != val;
|
||||||
OpTypeInvalid,
|
} else {
|
||||||
fmt::format(
|
PanicInfo(OpTypeInvalid,
|
||||||
"unsupported arith type:{} for ArithOpElementFunc",
|
fmt::format("unsupported arith type:{} for "
|
||||||
arith_op));
|
"ArithOpElementFunc",
|
||||||
}
|
arith_op));
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::GreaterThan) {
|
}
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
} else if constexpr (cmp_op ==
|
||||||
res[i] = (src[i] + right_operand) > val;
|
proto::plan::OpType::GreaterThan) {
|
||||||
} else if constexpr (arith_op ==
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
proto::plan::ArithOpType::Sub) {
|
res[i] = (src[offset] + right_operand) > val;
|
||||||
res[i] = (src[i] - right_operand) > val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Sub) {
|
||||||
proto::plan::ArithOpType::Mul) {
|
res[i] = (src[offset] - right_operand) > val;
|
||||||
res[i] = (src[i] * right_operand) > val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Mul) {
|
||||||
proto::plan::ArithOpType::Div) {
|
res[i] = (src[offset] * right_operand) > val;
|
||||||
res[i] = (src[i] / right_operand) > val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Div) {
|
||||||
proto::plan::ArithOpType::Mod) {
|
res[i] = (src[offset] / right_operand) > val;
|
||||||
res[i] = (fmod(src[i], right_operand)) > val;
|
} else if constexpr (arith_op ==
|
||||||
} else {
|
proto::plan::ArithOpType::Mod) {
|
||||||
PanicInfo(
|
res[i] = (fmod(src[offset], right_operand)) > val;
|
||||||
OpTypeInvalid,
|
} else {
|
||||||
fmt::format(
|
PanicInfo(OpTypeInvalid,
|
||||||
"unsupported arith type:{} for ArithOpElementFunc",
|
fmt::format("unsupported arith type:{} for "
|
||||||
arith_op));
|
"ArithOpElementFunc",
|
||||||
}
|
arith_op));
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::GreaterEqual) {
|
}
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
} else if constexpr (cmp_op ==
|
||||||
res[i] = (src[i] + right_operand) >= val;
|
proto::plan::OpType::GreaterEqual) {
|
||||||
} else if constexpr (arith_op ==
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
proto::plan::ArithOpType::Sub) {
|
res[i] = (src[offset] + right_operand) >= val;
|
||||||
res[i] = (src[i] - right_operand) >= val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Sub) {
|
||||||
proto::plan::ArithOpType::Mul) {
|
res[i] = (src[offset] - right_operand) >= val;
|
||||||
res[i] = (src[i] * right_operand) >= val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Mul) {
|
||||||
proto::plan::ArithOpType::Div) {
|
res[i] = (src[offset] * right_operand) >= val;
|
||||||
res[i] = (src[i] / right_operand) >= val;
|
} else if constexpr (arith_op ==
|
||||||
} else if constexpr (arith_op ==
|
proto::plan::ArithOpType::Div) {
|
||||||
proto::plan::ArithOpType::Mod) {
|
res[i] = (src[offset] / right_operand) >= val;
|
||||||
res[i] = (fmod(src[i], right_operand)) >= val;
|
} else if constexpr (arith_op ==
|
||||||
} else {
|
proto::plan::ArithOpType::Mod) {
|
||||||
PanicInfo(
|
res[i] = (fmod(src[offset], right_operand)) >= val;
|
||||||
OpTypeInvalid,
|
} else {
|
||||||
fmt::format(
|
PanicInfo(OpTypeInvalid,
|
||||||
"unsupported arith type:{} for ArithOpElementFunc",
|
fmt::format("unsupported arith type:{} for "
|
||||||
arith_op));
|
"ArithOpElementFunc",
|
||||||
}
|
arith_op));
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::LessThan) {
|
}
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
} else if constexpr (cmp_op == proto::plan::OpType::LessThan) {
|
||||||
res[i] = (src[i] + right_operand) < val;
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
} else if constexpr (arith_op ==
|
res[i] = (src[offset] + right_operand) < val;
|
||||||
proto::plan::ArithOpType::Sub) {
|
} else if constexpr (arith_op ==
|
||||||
res[i] = (src[i] - right_operand) < val;
|
proto::plan::ArithOpType::Sub) {
|
||||||
} else if constexpr (arith_op ==
|
res[i] = (src[offset] - right_operand) < val;
|
||||||
proto::plan::ArithOpType::Mul) {
|
} else if constexpr (arith_op ==
|
||||||
res[i] = (src[i] * right_operand) < val;
|
proto::plan::ArithOpType::Mul) {
|
||||||
} else if constexpr (arith_op ==
|
res[i] = (src[offset] * right_operand) < val;
|
||||||
proto::plan::ArithOpType::Div) {
|
} else if constexpr (arith_op ==
|
||||||
res[i] = (src[i] / right_operand) < val;
|
proto::plan::ArithOpType::Div) {
|
||||||
} else if constexpr (arith_op ==
|
res[i] = (src[offset] / right_operand) < val;
|
||||||
proto::plan::ArithOpType::Mod) {
|
} else if constexpr (arith_op ==
|
||||||
res[i] = (fmod(src[i], right_operand)) < val;
|
proto::plan::ArithOpType::Mod) {
|
||||||
} else {
|
res[i] = (fmod(src[offset], right_operand)) < val;
|
||||||
PanicInfo(
|
} else {
|
||||||
OpTypeInvalid,
|
PanicInfo(OpTypeInvalid,
|
||||||
fmt::format(
|
fmt::format("unsupported arith type:{} for "
|
||||||
"unsupported arith type:{} for ArithOpElementFunc",
|
"ArithOpElementFunc",
|
||||||
arith_op));
|
arith_op));
|
||||||
}
|
}
|
||||||
} else if constexpr (cmp_op == proto::plan::OpType::LessEqual) {
|
} else if constexpr (cmp_op == proto::plan::OpType::LessEqual) {
|
||||||
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
if constexpr (arith_op == proto::plan::ArithOpType::Add) {
|
||||||
res[i] = (src[i] + right_operand) <= val;
|
res[i] = (src[offset] + right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Sub) {
|
proto::plan::ArithOpType::Sub) {
|
||||||
res[i] = (src[i] - right_operand) <= val;
|
res[i] = (src[offset] - right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mul) {
|
proto::plan::ArithOpType::Mul) {
|
||||||
res[i] = (src[i] * right_operand) <= val;
|
res[i] = (src[offset] * right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Div) {
|
proto::plan::ArithOpType::Div) {
|
||||||
res[i] = (src[i] / right_operand) <= val;
|
res[i] = (src[offset] / right_operand) <= val;
|
||||||
} else if constexpr (arith_op ==
|
} else if constexpr (arith_op ==
|
||||||
proto::plan::ArithOpType::Mod) {
|
proto::plan::ArithOpType::Mod) {
|
||||||
res[i] = (fmod(src[i], right_operand)) <= val;
|
res[i] = (fmod(src[offset], right_operand)) <= val;
|
||||||
} else {
|
} else {
|
||||||
PanicInfo(
|
PanicInfo(OpTypeInvalid,
|
||||||
OpTypeInvalid,
|
fmt::format("unsupported arith type:{} for "
|
||||||
fmt::format(
|
"ArithOpElementFunc",
|
||||||
"unsupported arith type:{} for ArithOpElementFunc",
|
arith_op));
|
||||||
arith_op));
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
// more efficient SIMD version
|
||||||
if constexpr (!std::is_same_v<decltype(CmpOpHelper<cmp_op>::op),
|
if constexpr (!std::is_same_v<decltype(CmpOpHelper<cmp_op>::op),
|
||||||
void>) {
|
void>) {
|
||||||
constexpr auto cmp_op_cvt = CmpOpHelper<cmp_op>::op;
|
constexpr auto cmp_op_cvt = CmpOpHelper<cmp_op>::op;
|
||||||
@ -266,7 +269,8 @@ struct ArithOpElementFunc {
|
|||||||
|
|
||||||
template <typename T,
|
template <typename T,
|
||||||
proto::plan::OpType cmp_op,
|
proto::plan::OpType cmp_op,
|
||||||
proto::plan::ArithOpType arith_op>
|
proto::plan::ArithOpType arith_op,
|
||||||
|
FilterType filter_type>
|
||||||
struct ArithOpIndexFunc {
|
struct ArithOpIndexFunc {
|
||||||
typedef std::conditional_t<std::is_integral_v<T> &&
|
typedef std::conditional_t<std::is_integral_v<T> &&
|
||||||
!std::is_same_v<bool, T>,
|
!std::is_same_v<bool, T>,
|
||||||
@ -278,10 +282,15 @@ struct ArithOpIndexFunc {
|
|||||||
operator()(Index* index,
|
operator()(Index* index,
|
||||||
size_t size,
|
size_t size,
|
||||||
HighPrecisonType val,
|
HighPrecisonType val,
|
||||||
HighPrecisonType right_operand) {
|
HighPrecisonType right_operand,
|
||||||
|
const int32_t* offsets = nullptr) {
|
||||||
TargetBitmap res(size);
|
TargetBitmap res(size);
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto raw = index->Reverse_Lookup(i);
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
auto raw = index->Reverse_Lookup(offset);
|
||||||
if (!raw.has_value()) {
|
if (!raw.has_value()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
@ -449,23 +458,23 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
|
|||||||
private:
|
private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImpl();
|
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForIndex();
|
ExecRangeVisitorImplForIndex(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForData();
|
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForJson();
|
ExecRangeVisitorImplForJson(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForArray();
|
ExecRangeVisitorImplForArray(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::BinaryArithOpEvalRangeExpr> expr_;
|
std::shared_ptr<const milvus::expr::BinaryArithOpEvalRangeExpr> expr_;
|
||||||
|
|||||||
@ -24,33 +24,35 @@ namespace exec {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
|
auto input = context.get_offset_input();
|
||||||
|
SetHasOffsetInput((input != nullptr));
|
||||||
switch (expr_->column_.data_type_) {
|
switch (expr_->column_.data_type_) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
result = ExecRangeVisitorImpl<bool>();
|
result = ExecRangeVisitorImpl<bool>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
result = ExecRangeVisitorImpl<int8_t>();
|
result = ExecRangeVisitorImpl<int8_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
result = ExecRangeVisitorImpl<int16_t>();
|
result = ExecRangeVisitorImpl<int16_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
result = ExecRangeVisitorImpl<int32_t>();
|
result = ExecRangeVisitorImpl<int32_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
result = ExecRangeVisitorImpl<int64_t>();
|
result = ExecRangeVisitorImpl<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::FLOAT: {
|
case DataType::FLOAT: {
|
||||||
result = ExecRangeVisitorImpl<float>();
|
result = ExecRangeVisitorImpl<float>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
result = ExecRangeVisitorImpl<double>();
|
result = ExecRangeVisitorImpl<double>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
@ -58,9 +60,9 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
!storage::MmapManager::GetInstance()
|
!storage::MmapManager::GetInstance()
|
||||||
.GetMmapConfig()
|
.GetMmapConfig()
|
||||||
.growing_enable_mmap) {
|
.growing_enable_mmap) {
|
||||||
result = ExecRangeVisitorImpl<std::string>();
|
result = ExecRangeVisitorImpl<std::string>(input);
|
||||||
} else {
|
} else {
|
||||||
result = ExecRangeVisitorImpl<std::string_view>();
|
result = ExecRangeVisitorImpl<std::string_view>(input);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -68,15 +70,15 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
auto value_type = expr_->lower_val_.val_case();
|
auto value_type = expr_->lower_val_.val_case();
|
||||||
switch (value_type) {
|
switch (value_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||||
result = ExecRangeVisitorImplForJson<int64_t>();
|
result = ExecRangeVisitorImplForJson<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||||
result = ExecRangeVisitorImplForJson<double>();
|
result = ExecRangeVisitorImplForJson<double>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal: {
|
case proto::plan::GenericValue::ValCase::kStringVal: {
|
||||||
result = ExecRangeVisitorImplForJson<std::string>();
|
result = ExecRangeVisitorImplForJson<std::string>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@ -93,17 +95,17 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
switch (value_type) {
|
switch (value_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
case proto::plan::GenericValue::ValCase::kInt64Val: {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplForArray<int64_t>();
|
result = ExecRangeVisitorImplForArray<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
case proto::plan::GenericValue::ValCase::kFloatVal: {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplForArray<double>();
|
result = ExecRangeVisitorImplForArray<double>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal: {
|
case proto::plan::GenericValue::ValCase::kStringVal: {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplForArray<std::string>();
|
result = ExecRangeVisitorImplForArray<std::string>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@ -124,11 +126,11 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl() {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
||||||
if (is_index_mode_) {
|
if (is_index_mode_ && !has_offset_input_) {
|
||||||
return ExecRangeVisitorImplForIndex<T>();
|
return ExecRangeVisitorImplForIndex<T>();
|
||||||
} else {
|
} else {
|
||||||
return ExecRangeVisitorImplForData<T>();
|
return ExecRangeVisitorImplForData<T>(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,17 +139,28 @@ ColumnVectorPtr
|
|||||||
PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
|
PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
|
||||||
HighPrecisionType& val2,
|
HighPrecisionType& val2,
|
||||||
bool& lower_inclusive,
|
bool& lower_inclusive,
|
||||||
bool& upper_inclusive) {
|
bool& upper_inclusive,
|
||||||
|
OffsetVector* input) {
|
||||||
lower_inclusive = expr_->lower_inclusive_;
|
lower_inclusive = expr_->lower_inclusive_;
|
||||||
upper_inclusive = expr_->upper_inclusive_;
|
upper_inclusive = expr_->upper_inclusive_;
|
||||||
val1 = GetValueFromProto<HighPrecisionType>(expr_->lower_val_);
|
val1 = GetValueFromProto<HighPrecisionType>(expr_->lower_val_);
|
||||||
val2 = GetValueFromProto<HighPrecisionType>(expr_->upper_val_);
|
val2 = GetValueFromProto<HighPrecisionType>(expr_->upper_val_);
|
||||||
auto get_next_overflow_batch = [this]() -> ColumnVectorPtr {
|
|
||||||
int64_t batch_size = overflow_check_pos_ + batch_size_ >= active_count_
|
auto get_next_overflow_batch =
|
||||||
? active_count_ - overflow_check_pos_
|
[this](OffsetVector* input) -> ColumnVectorPtr {
|
||||||
: batch_size_;
|
int64_t batch_size;
|
||||||
overflow_check_pos_ += batch_size;
|
if (input != nullptr) {
|
||||||
auto valid_res = ProcessChunksForValid<T>(is_index_mode_);
|
batch_size = input->size();
|
||||||
|
} else {
|
||||||
|
batch_size = overflow_check_pos_ + batch_size_ >= active_count_
|
||||||
|
? active_count_ - overflow_check_pos_
|
||||||
|
: batch_size_;
|
||||||
|
overflow_check_pos_ += batch_size;
|
||||||
|
}
|
||||||
|
auto valid_res =
|
||||||
|
(input != nullptr)
|
||||||
|
? ProcessChunksForValidByOffsets<T>(is_index_mode_, *input)
|
||||||
|
: ProcessChunksForValid<T>(is_index_mode_);
|
||||||
auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
|
auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
|
||||||
std::move(valid_res));
|
std::move(valid_res));
|
||||||
return res_vec;
|
return res_vec;
|
||||||
@ -155,7 +168,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
|
|||||||
|
|
||||||
if constexpr (std::is_integral_v<T> && !std::is_same_v<bool, T>) {
|
if constexpr (std::is_integral_v<T> && !std::is_same_v<bool, T>) {
|
||||||
if (milvus::query::gt_ub<T>(val1)) {
|
if (milvus::query::gt_ub<T>(val1)) {
|
||||||
return get_next_overflow_batch();
|
return get_next_overflow_batch(input);
|
||||||
} else if (milvus::query::lt_lb<T>(val1)) {
|
} else if (milvus::query::lt_lb<T>(val1)) {
|
||||||
val1 = std::numeric_limits<T>::min();
|
val1 = std::numeric_limits<T>::min();
|
||||||
lower_inclusive = true;
|
lower_inclusive = true;
|
||||||
@ -165,7 +178,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
|
|||||||
val2 = std::numeric_limits<T>::max();
|
val2 = std::numeric_limits<T>::max();
|
||||||
upper_inclusive = true;
|
upper_inclusive = true;
|
||||||
} else if (milvus::query::lt_lb<T>(val2)) {
|
} else if (milvus::query::lt_lb<T>(val2)) {
|
||||||
return get_next_overflow_batch();
|
return get_next_overflow_batch(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -216,7 +229,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
@ -226,57 +239,67 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
int64_t,
|
int64_t,
|
||||||
IndexInnerType>
|
IndexInnerType>
|
||||||
HighPrecisionType;
|
HighPrecisionType;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
|
||||||
if (real_batch_size == 0) {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
HighPrecisionType val1;
|
HighPrecisionType val1;
|
||||||
HighPrecisionType val2;
|
HighPrecisionType val2;
|
||||||
bool lower_inclusive = false;
|
bool lower_inclusive = false;
|
||||||
bool upper_inclusive = false;
|
bool upper_inclusive = false;
|
||||||
if (auto res =
|
if (auto res = PreCheckOverflow<T>(
|
||||||
PreCheckOverflow<T>(val1, val2, lower_inclusive, upper_inclusive)) {
|
val1, val2, lower_inclusive, upper_inclusive, input)) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
|
if (real_batch_size == 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
valid_res.set();
|
||||||
|
|
||||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
auto execute_sub_batch =
|
||||||
const T* data,
|
[ lower_inclusive,
|
||||||
const bool* valid_data,
|
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const T* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
HighPrecisionType val1,
|
const int size,
|
||||||
HighPrecisionType val2) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
HighPrecisionType val1,
|
||||||
|
HighPrecisionType val2) {
|
||||||
if (lower_inclusive && upper_inclusive) {
|
if (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFunc<T, true, true> func;
|
BinaryRangeElementFunc<T, true, true, filter_type> func;
|
||||||
func(val1, val2, data, size, res);
|
func(val1, val2, data, size, res, offsets);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFunc<T, true, false> func;
|
BinaryRangeElementFunc<T, true, false, filter_type> func;
|
||||||
func(val1, val2, data, size, res);
|
func(val1, val2, data, size, res, offsets);
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFunc<T, false, true> func;
|
BinaryRangeElementFunc<T, false, true, filter_type> func;
|
||||||
func(val1, val2, data, size, res);
|
func(val1, val2, data, size, res, offsets);
|
||||||
} else {
|
} else {
|
||||||
BinaryRangeElementFunc<T, false, false> func;
|
BinaryRangeElementFunc<T, false, false, filter_type> func;
|
||||||
func(val1, val2, data, size, res);
|
func(val1, val2, data, size, res, offsets);
|
||||||
}
|
}
|
||||||
// there is a batch operation in BinaryRangeElementFunc,
|
// there is a batch operation in BinaryRangeElementFunc,
|
||||||
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
// so not divide data again for the reason that it may reduce performance if the null distribution is scattered
|
||||||
// but to mask res with valid_data after the batch operation.
|
// but to mask res with valid_data after the batch operation.
|
||||||
if (valid_data != nullptr) {
|
if (valid_data != nullptr) {
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
if (!valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (!valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
auto skip_index_func =
|
auto skip_index_func =
|
||||||
[val1, val2, lower_inclusive, upper_inclusive](
|
[val1, val2, lower_inclusive, upper_inclusive](
|
||||||
const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) {
|
const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) {
|
||||||
@ -294,8 +317,19 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
field_id, chunk_id, val1, val2, false, false);
|
field_id, chunk_id, val1, val2, false, false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<T>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, skip_index_func, res, valid_res, val1, val2);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<T>(execute_sub_batch,
|
||||||
|
skip_index_func,
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
val1,
|
||||||
|
val2);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<T>(
|
||||||
|
execute_sub_batch, skip_index_func, res, valid_res, val1, val2);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -306,11 +340,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -326,30 +361,81 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
|
|||||||
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
|
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer](
|
auto execute_sub_batch =
|
||||||
const milvus::Json* data,
|
[ lower_inclusive, upper_inclusive,
|
||||||
const bool* valid_data,
|
pointer ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const milvus::Json* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
ValueType val1,
|
const int size,
|
||||||
ValueType val2) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ValueType val1,
|
||||||
|
ValueType val2) {
|
||||||
if (lower_inclusive && upper_inclusive) {
|
if (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForJson<ValueType, true, true> func;
|
BinaryRangeElementFuncForJson<ValueType, true, true, filter_type>
|
||||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
pointer,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFuncForJson<ValueType, true, false> func;
|
BinaryRangeElementFuncForJson<ValueType, true, false, filter_type>
|
||||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
pointer,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
|
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForJson<ValueType, false, true> func;
|
BinaryRangeElementFuncForJson<ValueType, false, true, filter_type>
|
||||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
pointer,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
} else {
|
} else {
|
||||||
BinaryRangeElementFuncForJson<ValueType, false, false> func;
|
BinaryRangeElementFuncForJson<ValueType, false, false, filter_type>
|
||||||
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
pointer,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<milvus::Json>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
val1,
|
||||||
|
val2);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -360,11 +446,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
|
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -383,31 +470,90 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
|
|||||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto execute_sub_batch = [lower_inclusive, upper_inclusive](
|
auto execute_sub_batch =
|
||||||
const milvus::ArrayView* data,
|
[ lower_inclusive,
|
||||||
const bool* valid_data,
|
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const milvus::ArrayView* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
ValueType val1,
|
const int size,
|
||||||
ValueType val2,
|
TargetBitmapView res,
|
||||||
int index) {
|
TargetBitmapView valid_res,
|
||||||
|
ValueType val1,
|
||||||
|
ValueType val2,
|
||||||
|
int index) {
|
||||||
if (lower_inclusive && upper_inclusive) {
|
if (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForArray<ValueType, true, true> func;
|
BinaryRangeElementFuncForArray<ValueType, true, true, filter_type>
|
||||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
index,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
} else if (lower_inclusive && !upper_inclusive) {
|
} else if (lower_inclusive && !upper_inclusive) {
|
||||||
BinaryRangeElementFuncForArray<ValueType, true, false> func;
|
BinaryRangeElementFuncForArray<ValueType, true, false, filter_type>
|
||||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
index,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
|
|
||||||
} else if (!lower_inclusive && upper_inclusive) {
|
} else if (!lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeElementFuncForArray<ValueType, false, true> func;
|
BinaryRangeElementFuncForArray<ValueType, false, true, filter_type>
|
||||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
index,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
BinaryRangeElementFuncForArray<ValueType, false, false> func;
|
BinaryRangeElementFuncForArray<ValueType, false, false, filter_type>
|
||||||
func(val1, val2, index, data, valid_data, size, res, valid_res);
|
func;
|
||||||
|
func(val1,
|
||||||
|
val2,
|
||||||
|
index,
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index);
|
if (has_offset_input_) {
|
||||||
|
processed_size =
|
||||||
|
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
val1,
|
||||||
|
val2,
|
||||||
|
index);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
val1,
|
||||||
|
val2,
|
||||||
|
index);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -27,7 +27,10 @@
|
|||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace exec {
|
namespace exec {
|
||||||
|
|
||||||
template <typename T, bool lower_inclusive, bool upper_inclusive>
|
template <typename T,
|
||||||
|
bool lower_inclusive,
|
||||||
|
bool upper_inclusive,
|
||||||
|
FilterType filter_type = FilterType::sequential>
|
||||||
struct BinaryRangeElementFunc {
|
struct BinaryRangeElementFunc {
|
||||||
typedef std::conditional_t<std::is_integral_v<T> &&
|
typedef std::conditional_t<std::is_integral_v<T> &&
|
||||||
!std::is_same_v<bool, T>,
|
!std::is_same_v<bool, T>,
|
||||||
@ -35,7 +38,28 @@ struct BinaryRangeElementFunc {
|
|||||||
T>
|
T>
|
||||||
HighPrecisionType;
|
HighPrecisionType;
|
||||||
void
|
void
|
||||||
operator()(T val1, T val2, const T* src, size_t n, TargetBitmapView res) {
|
operator()(T val1,
|
||||||
|
T val2,
|
||||||
|
const T* src,
|
||||||
|
size_t n,
|
||||||
|
TargetBitmapView res,
|
||||||
|
const int32_t* offsets = nullptr) {
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
auto offset = (offsets) ? offsets[i] : i;
|
||||||
|
if constexpr (lower_inclusive && upper_inclusive) {
|
||||||
|
res[i] = val1 <= src[offset] && src[offset] <= val2;
|
||||||
|
} else if constexpr (lower_inclusive && !upper_inclusive) {
|
||||||
|
res[i] = val1 <= src[offset] && src[offset] < val2;
|
||||||
|
} else if constexpr (!lower_inclusive && upper_inclusive) {
|
||||||
|
res[i] = val1 < src[offset] && src[offset] <= val2;
|
||||||
|
} else {
|
||||||
|
res[i] = val1 < src[offset] && src[offset] < val2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if constexpr (lower_inclusive && upper_inclusive) {
|
if constexpr (lower_inclusive && upper_inclusive) {
|
||||||
res.inplace_within_range_val<T, milvus::bitset::RangeType::IncInc>(
|
res.inplace_within_range_val<T, milvus::bitset::RangeType::IncInc>(
|
||||||
val1, val2, src, n);
|
val1, val2, src, n);
|
||||||
@ -52,30 +76,33 @@ struct BinaryRangeElementFunc {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BinaryRangeJSONCompare(cmp) \
|
#define BinaryRangeJSONCompare(cmp) \
|
||||||
do { \
|
do { \
|
||||||
if (valid_data != nullptr && !valid_data[i]) { \
|
if (valid_data != nullptr && !valid_data[offset]) { \
|
||||||
res[i] = valid_res[i] = false; \
|
res[i] = valid_res[i] = false; \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
auto x = src[i].template at<GetType>(pointer); \
|
auto x = src[offset].template at<GetType>(pointer); \
|
||||||
if (x.error()) { \
|
if (x.error()) { \
|
||||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||||
auto x = src[i].template at<double>(pointer); \
|
auto x = src[offset].template at<double>(pointer); \
|
||||||
if (!x.error()) { \
|
if (!x.error()) { \
|
||||||
auto value = x.value(); \
|
auto value = x.value(); \
|
||||||
res[i] = (cmp); \
|
res[i] = (cmp); \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
res[i] = false; \
|
res[i] = false; \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
auto value = x.value(); \
|
auto value = x.value(); \
|
||||||
res[i] = (cmp); \
|
res[i] = (cmp); \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
template <typename ValueType, bool lower_inclusive, bool upper_inclusive>
|
template <typename ValueType,
|
||||||
|
bool lower_inclusive,
|
||||||
|
bool upper_inclusive,
|
||||||
|
FilterType filter_type = FilterType::sequential>
|
||||||
struct BinaryRangeElementFuncForJson {
|
struct BinaryRangeElementFuncForJson {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
@ -88,8 +115,13 @@ struct BinaryRangeElementFuncForJson {
|
|||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
size_t n,
|
size_t n,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res) {
|
TargetBitmapView valid_res,
|
||||||
|
const int32_t* offsets = nullptr) {
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
if constexpr (lower_inclusive && upper_inclusive) {
|
if constexpr (lower_inclusive && upper_inclusive) {
|
||||||
BinaryRangeJSONCompare(val1 <= value && value <= val2);
|
BinaryRangeJSONCompare(val1 <= value && value <= val2);
|
||||||
} else if constexpr (lower_inclusive && !upper_inclusive) {
|
} else if constexpr (lower_inclusive && !upper_inclusive) {
|
||||||
@ -103,7 +135,10 @@ struct BinaryRangeElementFuncForJson {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename ValueType, bool lower_inclusive, bool upper_inclusive>
|
template <typename ValueType,
|
||||||
|
bool lower_inclusive,
|
||||||
|
bool upper_inclusive,
|
||||||
|
FilterType filter_type = FilterType::sequential>
|
||||||
struct BinaryRangeElementFuncForArray {
|
struct BinaryRangeElementFuncForArray {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
@ -116,39 +151,44 @@ struct BinaryRangeElementFuncForArray {
|
|||||||
const bool* valid_data,
|
const bool* valid_data,
|
||||||
size_t n,
|
size_t n,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res) {
|
TargetBitmapView valid_res,
|
||||||
|
const int32_t* offsets = nullptr) {
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
size_t offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if constexpr (lower_inclusive && upper_inclusive) {
|
if constexpr (lower_inclusive && upper_inclusive) {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto value = src[i].get_data<GetType>(index);
|
auto value = src[offset].get_data<GetType>(index);
|
||||||
res[i] = val1 <= value && value <= val2;
|
res[i] = val1 <= value && value <= val2;
|
||||||
} else if constexpr (lower_inclusive && !upper_inclusive) {
|
} else if constexpr (lower_inclusive && !upper_inclusive) {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto value = src[i].get_data<GetType>(index);
|
auto value = src[offset].get_data<GetType>(index);
|
||||||
res[i] = val1 <= value && value < val2;
|
res[i] = val1 <= value && value < val2;
|
||||||
} else if constexpr (!lower_inclusive && upper_inclusive) {
|
} else if constexpr (!lower_inclusive && upper_inclusive) {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto value = src[i].get_data<GetType>(index);
|
auto value = src[offset].get_data<GetType>(index);
|
||||||
res[i] = val1 < value && value <= val2;
|
res[i] = val1 < value && value <= val2;
|
||||||
} else {
|
} else {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto value = src[i].get_data<GetType>(index);
|
auto value = src[offset].get_data<GetType>(index);
|
||||||
res[i] = val1 < value && value < val2;
|
res[i] = val1 < value && value < val2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -211,11 +251,12 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||||||
PreCheckOverflow(HighPrecisionType& val1,
|
PreCheckOverflow(HighPrecisionType& val1,
|
||||||
HighPrecisionType& val2,
|
HighPrecisionType& val2,
|
||||||
bool& lower_inclusive,
|
bool& lower_inclusive,
|
||||||
bool& upper_inclusive);
|
bool& upper_inclusive,
|
||||||
|
OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImpl();
|
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
@ -223,15 +264,15 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForData();
|
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForJson();
|
ExecRangeVisitorImplForJson(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForArray();
|
ExecRangeVisitorImplForArray(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
std::shared_ptr<const milvus::expr::BinaryRangeFilterExpr> expr_;
|
||||||
|
|||||||
@ -28,6 +28,8 @@ namespace exec {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyCallExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyCallExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
|
auto offset_input = context.get_offset_input();
|
||||||
|
SetHasOffsetInput(offset_input != nullptr);
|
||||||
AssertInfo(inputs_.size() == expr_->inputs().size(),
|
AssertInfo(inputs_.size() == expr_->inputs().size(),
|
||||||
"logical call expr needs {} inputs, but {} inputs are provided",
|
"logical call expr needs {} inputs, but {} inputs are provided",
|
||||||
expr_->inputs().size(),
|
expr_->inputs().size(),
|
||||||
|
|||||||
@ -61,8 +61,10 @@ class PhyCallExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
for (auto input : inputs_) {
|
if (!has_offset_input_) {
|
||||||
input->MoveCursor();
|
for (auto input : inputs_) {
|
||||||
|
input->MoveCursor();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -30,30 +30,32 @@ PhyColumnExpr::GetNextBatchSize() {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
|
auto input = context.get_offset_input();
|
||||||
|
SetHasOffsetInput(input != nullptr);
|
||||||
switch (this->expr_->type()) {
|
switch (this->expr_->type()) {
|
||||||
case DataType::BOOL:
|
case DataType::BOOL:
|
||||||
result = DoEval<bool>();
|
result = DoEval<bool>(input);
|
||||||
break;
|
break;
|
||||||
case DataType::INT8:
|
case DataType::INT8:
|
||||||
result = DoEval<int8_t>();
|
result = DoEval<int8_t>(input);
|
||||||
break;
|
break;
|
||||||
case DataType::INT16:
|
case DataType::INT16:
|
||||||
result = DoEval<int16_t>();
|
result = DoEval<int16_t>(input);
|
||||||
break;
|
break;
|
||||||
case DataType::INT32:
|
case DataType::INT32:
|
||||||
result = DoEval<int32_t>();
|
result = DoEval<int32_t>(input);
|
||||||
break;
|
break;
|
||||||
case DataType::INT64:
|
case DataType::INT64:
|
||||||
result = DoEval<int64_t>();
|
result = DoEval<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
result = DoEval<float>();
|
result = DoEval<float>(input);
|
||||||
break;
|
break;
|
||||||
case DataType::DOUBLE:
|
case DataType::DOUBLE:
|
||||||
result = DoEval<double>();
|
result = DoEval<double>(input);
|
||||||
break;
|
break;
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
result = DoEval<std::string>();
|
result = DoEval<std::string>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -65,8 +67,59 @@ PhyColumnExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyColumnExpr::DoEval() {
|
PhyColumnExpr::DoEval(OffsetVector* input) {
|
||||||
// similar to PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op)
|
// similar to PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op)
|
||||||
|
// take offsets as input
|
||||||
|
if (has_offset_input_) {
|
||||||
|
auto real_batch_size = input->size();
|
||||||
|
if (real_batch_size == 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
|
expr_->GetColumn().data_type_, real_batch_size);
|
||||||
|
T* res_value = res_vec->RawAsValues<T>();
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
|
auto data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
|
||||||
|
expr_->GetColumn().field_id_);
|
||||||
|
|
||||||
|
int64_t processed_rows = 0;
|
||||||
|
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
|
||||||
|
for (auto i = 0; i < real_batch_size; ++i) {
|
||||||
|
auto offset = (*input)[i];
|
||||||
|
auto [chunk_id,
|
||||||
|
chunk_offset] = [&]() -> std::pair<int64_t, int64_t> {
|
||||||
|
if (segment_chunk_reader_.segment_->type() ==
|
||||||
|
SegmentType::Growing) {
|
||||||
|
return {offset / size_per_chunk, offset % size_per_chunk};
|
||||||
|
} else if (segment_chunk_reader_.segment_->is_chunked() &&
|
||||||
|
data_barrier > 0) {
|
||||||
|
return segment_chunk_reader_.segment_->get_chunk_by_offset(
|
||||||
|
expr_->GetColumn().field_id_, offset);
|
||||||
|
} else {
|
||||||
|
return {0, offset};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
auto chunk_data = segment_chunk_reader_.GetChunkDataAccessor(
|
||||||
|
expr_->GetColumn().data_type_,
|
||||||
|
expr_->GetColumn().field_id_,
|
||||||
|
chunk_id,
|
||||||
|
data_barrier);
|
||||||
|
auto chunk_data_by_offset = chunk_data(chunk_offset);
|
||||||
|
if (!chunk_data_by_offset.has_value()) {
|
||||||
|
valid_res[processed_rows] = false;
|
||||||
|
} else {
|
||||||
|
res_value[processed_rows] =
|
||||||
|
boost::get<T>(chunk_data_by_offset.value());
|
||||||
|
}
|
||||||
|
processed_rows++;
|
||||||
|
}
|
||||||
|
return res_vec;
|
||||||
|
}
|
||||||
|
|
||||||
|
// normal path
|
||||||
if (segment_chunk_reader_.segment_->is_chunked()) {
|
if (segment_chunk_reader_.segment_->is_chunked()) {
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size = GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
|
|||||||
@ -67,16 +67,21 @@ class PhyColumnExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
if (segment_chunk_reader_.segment_->is_chunked()) {
|
if (!has_offset_input_) {
|
||||||
segment_chunk_reader_.MoveCursorForMultipleChunk(
|
if (segment_chunk_reader_.segment_->is_chunked()) {
|
||||||
current_chunk_id_,
|
segment_chunk_reader_.MoveCursorForMultipleChunk(
|
||||||
current_chunk_pos_,
|
current_chunk_id_,
|
||||||
expr_->GetColumn().field_id_,
|
current_chunk_pos_,
|
||||||
num_chunk_,
|
expr_->GetColumn().field_id_,
|
||||||
batch_size_);
|
num_chunk_,
|
||||||
} else {
|
batch_size_);
|
||||||
segment_chunk_reader_.MoveCursorForSingleChunk(
|
} else {
|
||||||
current_chunk_id_, current_chunk_pos_, num_chunk_, batch_size_);
|
segment_chunk_reader_.MoveCursorForSingleChunk(
|
||||||
|
current_chunk_id_,
|
||||||
|
current_chunk_pos_,
|
||||||
|
num_chunk_,
|
||||||
|
batch_size_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,7 +112,7 @@ class PhyColumnExpr : public Expr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
DoEval();
|
DoEval(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool is_indexed_;
|
bool is_indexed_;
|
||||||
|
|||||||
@ -38,7 +38,77 @@ PhyCompareFilterExpr::GetNextBatchSize() {
|
|||||||
|
|
||||||
template <typename OpType>
|
template <typename OpType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op,
|
||||||
|
OffsetVector* input) {
|
||||||
|
// take offsets as input
|
||||||
|
if (has_offset_input_) {
|
||||||
|
auto real_batch_size = input->size();
|
||||||
|
if (real_batch_size == 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
|
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
|
||||||
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
|
valid_res.set();
|
||||||
|
|
||||||
|
auto left_data_barrier = segment_chunk_reader_.segment_->num_chunk_data(
|
||||||
|
expr_->left_field_id_);
|
||||||
|
auto right_data_barrier =
|
||||||
|
segment_chunk_reader_.segment_->num_chunk_data(
|
||||||
|
expr_->right_field_id_);
|
||||||
|
|
||||||
|
int64_t processed_rows = 0;
|
||||||
|
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
|
||||||
|
for (auto i = 0; i < real_batch_size; ++i) {
|
||||||
|
auto offset = (*input)[i];
|
||||||
|
auto get_chunk_id_and_offset =
|
||||||
|
[&](const FieldId field,
|
||||||
|
const int64_t data_barrier) -> std::pair<int64_t, int64_t> {
|
||||||
|
if (segment_chunk_reader_.segment_->type() ==
|
||||||
|
SegmentType::Growing) {
|
||||||
|
return {offset / size_per_chunk, offset % size_per_chunk};
|
||||||
|
} else if (segment_chunk_reader_.segment_->is_chunked() &&
|
||||||
|
data_barrier > 0) {
|
||||||
|
return segment_chunk_reader_.segment_->get_chunk_by_offset(
|
||||||
|
field, offset);
|
||||||
|
} else {
|
||||||
|
return {0, offset};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto [left_chunk_id, left_chunk_offset] =
|
||||||
|
get_chunk_id_and_offset(left_field_, left_data_barrier);
|
||||||
|
auto [right_chunk_id, right_chunk_offset] =
|
||||||
|
get_chunk_id_and_offset(right_field_, right_data_barrier);
|
||||||
|
auto left = segment_chunk_reader_.GetChunkDataAccessor(
|
||||||
|
expr_->left_data_type_,
|
||||||
|
expr_->left_field_id_,
|
||||||
|
left_chunk_id,
|
||||||
|
left_data_barrier);
|
||||||
|
auto right = segment_chunk_reader_.GetChunkDataAccessor(
|
||||||
|
expr_->right_data_type_,
|
||||||
|
expr_->right_field_id_,
|
||||||
|
right_chunk_id,
|
||||||
|
right_data_barrier);
|
||||||
|
auto left_opt = left(left_chunk_offset);
|
||||||
|
auto right_opt = right(right_chunk_offset);
|
||||||
|
if (!left_opt.has_value() || !right_opt.has_value()) {
|
||||||
|
res[processed_rows] = false;
|
||||||
|
valid_res[processed_rows] = false;
|
||||||
|
} else {
|
||||||
|
res[processed_rows] = boost::apply_visitor(
|
||||||
|
milvus::query::Relational<decltype(op)>{},
|
||||||
|
left_opt.value(),
|
||||||
|
right_opt.value());
|
||||||
|
}
|
||||||
|
processed_rows++;
|
||||||
|
}
|
||||||
|
return res_vec;
|
||||||
|
}
|
||||||
|
|
||||||
|
// normal path
|
||||||
if (segment_chunk_reader_.segment_->is_chunked()) {
|
if (segment_chunk_reader_.segment_->is_chunked()) {
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size = GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
@ -140,39 +210,42 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
|
auto input = context.get_offset_input();
|
||||||
|
SetHasOffsetInput((input != nullptr));
|
||||||
// For segment both fields has no index, can use SIMD to speed up.
|
// For segment both fields has no index, can use SIMD to speed up.
|
||||||
// Avoiding too much call stack that blocks SIMD.
|
// Avoiding too much call stack that blocks SIMD.
|
||||||
if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) {
|
if (!is_left_indexed_ && !is_right_indexed_ && !IsStringExpr()) {
|
||||||
result = ExecCompareExprDispatcherForBothDataSegment();
|
result = ExecCompareExprDispatcherForBothDataSegment(input);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
result = ExecCompareExprDispatcherForHybridSegment();
|
result = ExecCompareExprDispatcherForHybridSegment(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() {
|
PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment(
|
||||||
|
OffsetVector* input) {
|
||||||
switch (expr_->op_type_) {
|
switch (expr_->op_type_) {
|
||||||
case OpType::Equal: {
|
case OpType::Equal: {
|
||||||
return ExecCompareExprDispatcher(std::equal_to<>{});
|
return ExecCompareExprDispatcher(std::equal_to<>{}, input);
|
||||||
}
|
}
|
||||||
case OpType::NotEqual: {
|
case OpType::NotEqual: {
|
||||||
return ExecCompareExprDispatcher(std::not_equal_to<>{});
|
return ExecCompareExprDispatcher(std::not_equal_to<>{}, input);
|
||||||
}
|
}
|
||||||
case OpType::GreaterEqual: {
|
case OpType::GreaterEqual: {
|
||||||
return ExecCompareExprDispatcher(std::greater_equal<>{});
|
return ExecCompareExprDispatcher(std::greater_equal<>{}, input);
|
||||||
}
|
}
|
||||||
case OpType::GreaterThan: {
|
case OpType::GreaterThan: {
|
||||||
return ExecCompareExprDispatcher(std::greater<>{});
|
return ExecCompareExprDispatcher(std::greater<>{}, input);
|
||||||
}
|
}
|
||||||
case OpType::LessEqual: {
|
case OpType::LessEqual: {
|
||||||
return ExecCompareExprDispatcher(std::less_equal<>{});
|
return ExecCompareExprDispatcher(std::less_equal<>{}, input);
|
||||||
}
|
}
|
||||||
case OpType::LessThan: {
|
case OpType::LessThan: {
|
||||||
return ExecCompareExprDispatcher(std::less<>{});
|
return ExecCompareExprDispatcher(std::less<>{}, input);
|
||||||
}
|
}
|
||||||
case OpType::PrefixMatch: {
|
case OpType::PrefixMatch: {
|
||||||
return ExecCompareExprDispatcher(
|
return ExecCompareExprDispatcher(
|
||||||
milvus::query::MatchOp<OpType::PrefixMatch>{});
|
milvus::query::MatchOp<OpType::PrefixMatch>{}, input);
|
||||||
}
|
}
|
||||||
// case OpType::PostfixMatch: {
|
// case OpType::PostfixMatch: {
|
||||||
// }
|
// }
|
||||||
@ -183,22 +256,23 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForHybridSegment() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() {
|
PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment(
|
||||||
|
OffsetVector* input) {
|
||||||
switch (expr_->left_data_type_) {
|
switch (expr_->left_data_type_) {
|
||||||
case DataType::BOOL:
|
case DataType::BOOL:
|
||||||
return ExecCompareLeftType<bool>();
|
return ExecCompareLeftType<bool>(input);
|
||||||
case DataType::INT8:
|
case DataType::INT8:
|
||||||
return ExecCompareLeftType<int8_t>();
|
return ExecCompareLeftType<int8_t>(input);
|
||||||
case DataType::INT16:
|
case DataType::INT16:
|
||||||
return ExecCompareLeftType<int16_t>();
|
return ExecCompareLeftType<int16_t>(input);
|
||||||
case DataType::INT32:
|
case DataType::INT32:
|
||||||
return ExecCompareLeftType<int32_t>();
|
return ExecCompareLeftType<int32_t>(input);
|
||||||
case DataType::INT64:
|
case DataType::INT64:
|
||||||
return ExecCompareLeftType<int64_t>();
|
return ExecCompareLeftType<int64_t>(input);
|
||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
return ExecCompareLeftType<float>();
|
return ExecCompareLeftType<float>(input);
|
||||||
case DataType::DOUBLE:
|
case DataType::DOUBLE:
|
||||||
return ExecCompareLeftType<double>();
|
return ExecCompareLeftType<double>(input);
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
DataTypeInvalid,
|
DataTypeInvalid,
|
||||||
@ -209,22 +283,22 @@ PhyCompareFilterExpr::ExecCompareExprDispatcherForBothDataSegment() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareLeftType() {
|
PhyCompareFilterExpr::ExecCompareLeftType(OffsetVector* input) {
|
||||||
switch (expr_->right_data_type_) {
|
switch (expr_->right_data_type_) {
|
||||||
case DataType::BOOL:
|
case DataType::BOOL:
|
||||||
return ExecCompareRightType<T, bool>();
|
return ExecCompareRightType<T, bool>(input);
|
||||||
case DataType::INT8:
|
case DataType::INT8:
|
||||||
return ExecCompareRightType<T, int8_t>();
|
return ExecCompareRightType<T, int8_t>(input);
|
||||||
case DataType::INT16:
|
case DataType::INT16:
|
||||||
return ExecCompareRightType<T, int16_t>();
|
return ExecCompareRightType<T, int16_t>(input);
|
||||||
case DataType::INT32:
|
case DataType::INT32:
|
||||||
return ExecCompareRightType<T, int32_t>();
|
return ExecCompareRightType<T, int32_t>(input);
|
||||||
case DataType::INT64:
|
case DataType::INT64:
|
||||||
return ExecCompareRightType<T, int64_t>();
|
return ExecCompareRightType<T, int64_t>(input);
|
||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
return ExecCompareRightType<T, float>();
|
return ExecCompareRightType<T, float>(input);
|
||||||
case DataType::DOUBLE:
|
case DataType::DOUBLE:
|
||||||
return ExecCompareRightType<T, double>();
|
return ExecCompareRightType<T, double>(input);
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
DataTypeInvalid,
|
DataTypeInvalid,
|
||||||
@ -235,8 +309,9 @@ PhyCompareFilterExpr::ExecCompareLeftType() {
|
|||||||
|
|
||||||
template <typename T, typename U>
|
template <typename T, typename U>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyCompareFilterExpr::ExecCompareRightType() {
|
PhyCompareFilterExpr::ExecCompareRightType(OffsetVector* input) {
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -248,39 +323,47 @@ PhyCompareFilterExpr::ExecCompareRightType() {
|
|||||||
valid_res.set();
|
valid_res.set();
|
||||||
|
|
||||||
auto expr_type = expr_->op_type_;
|
auto expr_type = expr_->op_type_;
|
||||||
auto execute_sub_batch = [expr_type](const T* left,
|
auto execute_sub_batch = [expr_type]<FilterType filter_type =
|
||||||
const U* right,
|
FilterType::sequential>(
|
||||||
const int size,
|
const T* left,
|
||||||
TargetBitmapView res) {
|
const U* right,
|
||||||
|
const int32_t* offsets,
|
||||||
|
const int size,
|
||||||
|
TargetBitmapView res) {
|
||||||
switch (expr_type) {
|
switch (expr_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
CompareElementFunc<T, U, proto::plan::GreaterThan> func;
|
CompareElementFunc<T, U, proto::plan::GreaterThan, filter_type>
|
||||||
func(left, right, size, res);
|
func;
|
||||||
|
func(left, right, size, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
CompareElementFunc<T, U, proto::plan::GreaterEqual> func;
|
CompareElementFunc<T, U, proto::plan::GreaterEqual, filter_type>
|
||||||
func(left, right, size, res);
|
func;
|
||||||
|
func(left, right, size, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
CompareElementFunc<T, U, proto::plan::LessThan> func;
|
CompareElementFunc<T, U, proto::plan::LessThan, filter_type>
|
||||||
func(left, right, size, res);
|
func;
|
||||||
|
func(left, right, size, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
CompareElementFunc<T, U, proto::plan::LessEqual> func;
|
CompareElementFunc<T, U, proto::plan::LessEqual, filter_type>
|
||||||
func(left, right, size, res);
|
func;
|
||||||
|
func(left, right, size, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
CompareElementFunc<T, U, proto::plan::Equal> func;
|
CompareElementFunc<T, U, proto::plan::Equal, filter_type> func;
|
||||||
func(left, right, size, res);
|
func(left, right, size, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
CompareElementFunc<T, U, proto::plan::NotEqual> func;
|
CompareElementFunc<T, U, proto::plan::NotEqual, filter_type>
|
||||||
func(left, right, size, res);
|
func;
|
||||||
|
func(left, right, size, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -290,8 +373,14 @@ PhyCompareFilterExpr::ExecCompareRightType() {
|
|||||||
expr_type));
|
expr_type));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size =
|
int64_t processed_size;
|
||||||
ProcessBothDataChunks<T, U>(execute_sub_batch, res, valid_res);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessBothDataByOffsets<T, U>(
|
||||||
|
execute_sub_batch, input, res, valid_res);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessBothDataChunks<T, U>(
|
||||||
|
execute_sub_batch, input, res, valid_res);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -30,36 +30,44 @@
|
|||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace exec {
|
namespace exec {
|
||||||
|
|
||||||
template <typename T, typename U, proto::plan::OpType op>
|
template <typename T,
|
||||||
|
typename U,
|
||||||
|
proto::plan::OpType op,
|
||||||
|
FilterType filter_type>
|
||||||
struct CompareElementFunc {
|
struct CompareElementFunc {
|
||||||
void
|
void
|
||||||
operator()(const T* left,
|
operator()(const T* left,
|
||||||
const U* right,
|
const U* right,
|
||||||
size_t size,
|
size_t size,
|
||||||
TargetBitmapView res) {
|
TargetBitmapView res,
|
||||||
/*
|
const int32_t* offsets = nullptr) {
|
||||||
// This is the original code, kept here for the documentation purposes
|
// This is the original code, kept here for the documentation purposes
|
||||||
for (int i = 0; i < size; ++i) {
|
// also, used for iterative filter
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
res[i] = left[i] == right[i];
|
for (int i = 0; i < size; ++i) {
|
||||||
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
auto offset = (offsets != nullptr) ? offsets[i] : i;
|
||||||
res[i] = left[i] != right[i];
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
res[i] = left[offset] == right[offset];
|
||||||
res[i] = left[i] > right[i];
|
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
||||||
} else if constexpr (op == proto::plan::OpType::LessThan) {
|
res[i] = left[offset] != right[offset];
|
||||||
res[i] = left[i] < right[i];
|
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
||||||
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
|
res[i] = left[offset] > right[offset];
|
||||||
res[i] = left[i] >= right[i];
|
} else if constexpr (op == proto::plan::OpType::LessThan) {
|
||||||
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
res[i] = left[offset] < right[offset];
|
||||||
res[i] = left[i] <= right[i];
|
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
|
||||||
} else {
|
res[i] = left[offset] >= right[offset];
|
||||||
PanicInfo(
|
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
||||||
OpTypeInvalid,
|
res[i] = left[offset] <= right[offset];
|
||||||
fmt::format("unsupported op_type:{} for CompareElementFunc",
|
} else {
|
||||||
op));
|
PanicInfo(
|
||||||
|
OpTypeInvalid,
|
||||||
|
fmt::format(
|
||||||
|
"unsupported op_type:{} for CompareElementFunc",
|
||||||
|
op));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>(
|
res.inplace_compare_column<T, U, milvus::bitset::CompareOpType::EQ>(
|
||||||
@ -138,22 +146,27 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
if (segment_chunk_reader_.segment_->is_chunked()) {
|
if (!has_offset_input_) {
|
||||||
segment_chunk_reader_.MoveCursorForMultipleChunk(
|
if (segment_chunk_reader_.segment_->is_chunked()) {
|
||||||
left_current_chunk_id_,
|
segment_chunk_reader_.MoveCursorForMultipleChunk(
|
||||||
left_current_chunk_pos_,
|
left_current_chunk_id_,
|
||||||
left_field_,
|
left_current_chunk_pos_,
|
||||||
left_num_chunk_,
|
left_field_,
|
||||||
batch_size_);
|
left_num_chunk_,
|
||||||
segment_chunk_reader_.MoveCursorForMultipleChunk(
|
batch_size_);
|
||||||
right_current_chunk_id_,
|
segment_chunk_reader_.MoveCursorForMultipleChunk(
|
||||||
right_current_chunk_pos_,
|
right_current_chunk_id_,
|
||||||
right_field_,
|
right_current_chunk_pos_,
|
||||||
right_num_chunk_,
|
right_field_,
|
||||||
batch_size_);
|
right_num_chunk_,
|
||||||
} else {
|
batch_size_);
|
||||||
segment_chunk_reader_.MoveCursorForSingleChunk(
|
} else {
|
||||||
current_chunk_id_, current_chunk_pos_, num_chunk_, batch_size_);
|
segment_chunk_reader_.MoveCursorForSingleChunk(
|
||||||
|
current_chunk_id_,
|
||||||
|
current_chunk_pos_,
|
||||||
|
num_chunk_,
|
||||||
|
batch_size_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,6 +201,7 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
||||||
int64_t
|
int64_t
|
||||||
ProcessBothDataChunks(FUNC func,
|
ProcessBothDataChunks(FUNC func,
|
||||||
|
OffsetVector* input,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res,
|
TargetBitmapView valid_res,
|
||||||
ValTypes... values) {
|
ValTypes... values) {
|
||||||
@ -203,6 +217,97 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
||||||
|
int64_t
|
||||||
|
ProcessBothDataByOffsets(FUNC func,
|
||||||
|
OffsetVector* input,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ValTypes... values) {
|
||||||
|
int64_t size = input->size();
|
||||||
|
int64_t processed_size = 0;
|
||||||
|
const auto size_per_chunk = segment_chunk_reader_.SizePerChunk();
|
||||||
|
if (segment_chunk_reader_.segment_->is_chunked() ||
|
||||||
|
segment_chunk_reader_.segment_->type() == SegmentType::Growing) {
|
||||||
|
for (auto i = 0; i < size; ++i) {
|
||||||
|
auto offset = (*input)[i];
|
||||||
|
auto get_chunk_id_and_offset =
|
||||||
|
[&](const FieldId field) -> std::pair<int64_t, int64_t> {
|
||||||
|
if (segment_chunk_reader_.segment_->type() ==
|
||||||
|
SegmentType::Growing) {
|
||||||
|
auto size_per_chunk =
|
||||||
|
segment_chunk_reader_.SizePerChunk();
|
||||||
|
return {offset / size_per_chunk,
|
||||||
|
offset % size_per_chunk};
|
||||||
|
} else {
|
||||||
|
return segment_chunk_reader_.segment_
|
||||||
|
->get_chunk_by_offset(field, offset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto [left_chunk_id, left_chunk_offset] =
|
||||||
|
get_chunk_id_and_offset(left_field_);
|
||||||
|
auto [right_chunk_id, right_chunk_offset] =
|
||||||
|
get_chunk_id_and_offset(right_field_);
|
||||||
|
|
||||||
|
auto left_chunk = segment_chunk_reader_.segment_->chunk_data<T>(
|
||||||
|
left_field_, left_chunk_id);
|
||||||
|
|
||||||
|
auto right_chunk =
|
||||||
|
segment_chunk_reader_.segment_->chunk_data<U>(
|
||||||
|
right_field_, right_chunk_id);
|
||||||
|
const T* left_data = left_chunk.data() + left_chunk_offset;
|
||||||
|
const U* right_data = right_chunk.data() + right_chunk_offset;
|
||||||
|
func.template operator()<FilterType::random>(
|
||||||
|
left_data,
|
||||||
|
right_data,
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
res + processed_size,
|
||||||
|
values...);
|
||||||
|
const bool* left_valid_data = left_chunk.valid_data();
|
||||||
|
const bool* right_valid_data = right_chunk.valid_data();
|
||||||
|
// mask with valid_data
|
||||||
|
if (left_valid_data && !left_valid_data[left_chunk_offset]) {
|
||||||
|
res[processed_size] = false;
|
||||||
|
valid_res[processed_size] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (right_valid_data && !right_valid_data[right_chunk_offset]) {
|
||||||
|
res[processed_size] = false;
|
||||||
|
valid_res[processed_size] = false;
|
||||||
|
}
|
||||||
|
processed_size++;
|
||||||
|
}
|
||||||
|
return processed_size;
|
||||||
|
} else {
|
||||||
|
auto left_chunk =
|
||||||
|
segment_chunk_reader_.segment_->chunk_data<T>(left_field_, 0);
|
||||||
|
auto right_chunk =
|
||||||
|
segment_chunk_reader_.segment_->chunk_data<U>(right_field_, 0);
|
||||||
|
const T* left_data = left_chunk.data();
|
||||||
|
const U* right_data = right_chunk.data();
|
||||||
|
func.template operator()<FilterType::random>(
|
||||||
|
left_data, right_data, input->data(), size, res, values...);
|
||||||
|
const bool* left_valid_data = left_chunk.valid_data();
|
||||||
|
const bool* right_valid_data = right_chunk.valid_data();
|
||||||
|
// mask with valid_data
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
if (left_valid_data && !left_valid_data[(*input)[i]]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (right_valid_data && !right_valid_data[(*input)[i]]) {
|
||||||
|
res[i] = false;
|
||||||
|
valid_res[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
processed_size += size;
|
||||||
|
return processed_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
template <typename T, typename U, typename FUNC, typename... ValTypes>
|
||||||
int64_t
|
int64_t
|
||||||
ProcessBothDataChunksForSingleChunk(FUNC func,
|
ProcessBothDataChunksForSingleChunk(FUNC func,
|
||||||
@ -239,7 +344,12 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
|
|
||||||
const T* left_data = left_chunk.data() + data_pos;
|
const T* left_data = left_chunk.data() + data_pos;
|
||||||
const U* right_data = right_chunk.data() + data_pos;
|
const U* right_data = right_chunk.data() + data_pos;
|
||||||
func(left_data, right_data, size, res + processed_size, values...);
|
func(left_data,
|
||||||
|
right_data,
|
||||||
|
nullptr,
|
||||||
|
size,
|
||||||
|
res + processed_size,
|
||||||
|
values...);
|
||||||
const bool* left_valid_data = left_chunk.valid_data();
|
const bool* left_valid_data = left_chunk.valid_data();
|
||||||
const bool* right_valid_data = right_chunk.valid_data();
|
const bool* right_valid_data = right_chunk.valid_data();
|
||||||
// mask with valid_data
|
// mask with valid_data
|
||||||
@ -307,7 +417,12 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
|
|
||||||
const T* left_data = left_chunk.data() + data_pos;
|
const T* left_data = left_chunk.data() + data_pos;
|
||||||
const U* right_data = right_chunk.data() + data_pos;
|
const U* right_data = right_chunk.data() + data_pos;
|
||||||
func(left_data, right_data, size, res + processed_size, values...);
|
func(left_data,
|
||||||
|
right_data,
|
||||||
|
nullptr,
|
||||||
|
size,
|
||||||
|
res + processed_size,
|
||||||
|
values...);
|
||||||
const bool* left_valid_data = left_chunk.valid_data();
|
const bool* left_valid_data = left_chunk.valid_data();
|
||||||
const bool* right_valid_data = right_chunk.valid_data();
|
const bool* right_valid_data = right_chunk.valid_data();
|
||||||
// mask with valid_data
|
// mask with valid_data
|
||||||
@ -336,21 +451,21 @@ class PhyCompareFilterExpr : public Expr {
|
|||||||
|
|
||||||
template <typename OpType>
|
template <typename OpType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareExprDispatcher(OpType op);
|
ExecCompareExprDispatcher(OpType op, OffsetVector* input = nullptr);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareExprDispatcherForHybridSegment();
|
ExecCompareExprDispatcherForHybridSegment(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareExprDispatcherForBothDataSegment();
|
ExecCompareExprDispatcherForBothDataSegment(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareLeftType();
|
ExecCompareLeftType(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T, typename U>
|
template <typename T, typename U>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecCompareRightType();
|
ExecCompareRightType(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const FieldId left_field_;
|
const FieldId left_field_;
|
||||||
|
|||||||
@ -84,11 +84,23 @@ class PhyConjunctFilterExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
for (auto& input : inputs_) {
|
if (!has_offset_input_) {
|
||||||
input->MoveCursor();
|
for (auto& input : inputs_) {
|
||||||
|
input->MoveCursor();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
SupportOffsetInput() override {
|
||||||
|
for (auto& input : inputs_) {
|
||||||
|
if (!(input->SupportOffsetInput())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int64_t
|
int64_t
|
||||||
UpdateResult(ColumnVectorPtr& input_result,
|
UpdateResult(ColumnVectorPtr& input_result,
|
||||||
|
|||||||
@ -28,17 +28,26 @@ namespace milvus {
|
|||||||
namespace exec {
|
namespace exec {
|
||||||
|
|
||||||
class ExprSet;
|
class ExprSet;
|
||||||
|
|
||||||
|
using OffsetVector = FixedVector<int32_t>;
|
||||||
class EvalCtx {
|
class EvalCtx {
|
||||||
public:
|
public:
|
||||||
EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set, RowVector* row)
|
EvalCtx(ExecContext* exec_ctx,
|
||||||
: exec_ctx_(exec_ctx), expr_set_(expr_set), row_(row) {
|
ExprSet* expr_set,
|
||||||
|
OffsetVector* offset_input)
|
||||||
|
: exec_ctx_(exec_ctx),
|
||||||
|
expr_set_(expr_set),
|
||||||
|
offset_input_(offset_input) {
|
||||||
assert(exec_ctx_ != nullptr);
|
assert(exec_ctx_ != nullptr);
|
||||||
assert(expr_set_ != nullptr);
|
assert(expr_set_ != nullptr);
|
||||||
// assert(row_ != nullptr);
|
}
|
||||||
|
|
||||||
|
explicit EvalCtx(ExecContext* exec_ctx, ExprSet* expr_set)
|
||||||
|
: exec_ctx_(exec_ctx), expr_set_(expr_set), offset_input_(nullptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit EvalCtx(ExecContext* exec_ctx)
|
explicit EvalCtx(ExecContext* exec_ctx)
|
||||||
: exec_ctx_(exec_ctx), expr_set_(nullptr), row_(nullptr) {
|
: exec_ctx_(exec_ctx), expr_set_(nullptr), offset_input_(nullptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ExecContext*
|
ExecContext*
|
||||||
@ -51,11 +60,22 @@ class EvalCtx {
|
|||||||
return exec_ctx_->get_query_config();
|
return exec_ctx_->get_query_config();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline OffsetVector*
|
||||||
|
get_offset_input() {
|
||||||
|
return offset_input_;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
set_offset_input(OffsetVector* offset_input) {
|
||||||
|
offset_input_ = offset_input;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ExecContext* exec_ctx_;
|
ExecContext* exec_ctx_ = nullptr;
|
||||||
ExprSet* expr_set_;
|
ExprSet* expr_set_ = nullptr;
|
||||||
RowVector* row_;
|
// we may accept offsets array as input and do expr filtering on these data
|
||||||
bool input_no_nulls_;
|
OffsetVector* offset_input_ = nullptr;
|
||||||
|
bool input_no_nulls_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace exec
|
} // namespace exec
|
||||||
|
|||||||
@ -22,13 +22,15 @@ namespace exec {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
|
auto input = context.get_offset_input();
|
||||||
|
SetHasOffsetInput((input != nullptr));
|
||||||
switch (expr_->column_.data_type_) {
|
switch (expr_->column_.data_type_) {
|
||||||
case DataType::JSON: {
|
case DataType::JSON: {
|
||||||
if (is_index_mode_) {
|
if (is_index_mode_) {
|
||||||
PanicInfo(ExprInvalid,
|
PanicInfo(ExprInvalid,
|
||||||
"exists expr for json index mode not supported");
|
"exists expr for json index mode not supported");
|
||||||
}
|
}
|
||||||
result = EvalJsonExistsForDataSegment();
|
result = EvalJsonExistsForDataSegment(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -39,8 +41,9 @@ PhyExistsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyExistsFilterExpr::EvalJsonExistsForDataSegment() {
|
PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -51,23 +54,40 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment() {
|
|||||||
valid_res.set();
|
valid_res.set();
|
||||||
|
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
auto execute_sub_batch = [](const milvus::Json* data,
|
auto execute_sub_batch =
|
||||||
const bool* valid_data,
|
[]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const milvus::Json* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
const std::string& pointer) {
|
const int size,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
const std::string& pointer) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
res[i] = data[i].exist(pointer);
|
res[i] = data[offset].exist(pointer);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<Json>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<Json>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
pointer);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<Json>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -57,7 +57,7 @@ class PhyExistsFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
VectorPtr
|
VectorPtr
|
||||||
EvalJsonExistsForDataSegment();
|
EvalJsonExistsForDataSegment(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::ExistsExpr> expr_;
|
std::shared_ptr<const milvus::expr::ExistsExpr> expr_;
|
||||||
|
|||||||
@ -31,6 +31,8 @@
|
|||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace exec {
|
namespace exec {
|
||||||
|
|
||||||
|
enum class FilterType { sequential = 0, random = 1 };
|
||||||
|
|
||||||
class Expr {
|
class Expr {
|
||||||
public:
|
public:
|
||||||
Expr(DataType type,
|
Expr(DataType type,
|
||||||
@ -73,12 +75,26 @@ class Expr {
|
|||||||
MoveCursor() {
|
MoveCursor() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SetHasOffsetInput(bool has_offset_input) {
|
||||||
|
has_offset_input_ = has_offset_input;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
SupportOffsetInput() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DataType type_;
|
DataType type_;
|
||||||
const std::vector<std::shared_ptr<Expr>> inputs_;
|
const std::vector<std::shared_ptr<Expr>> inputs_;
|
||||||
std::string name_;
|
std::string name_;
|
||||||
// NOTE: unused
|
// NOTE: unused
|
||||||
std::shared_ptr<VectorFunction> vector_func_;
|
std::shared_ptr<VectorFunction> vector_func_;
|
||||||
|
|
||||||
|
// whether we have offset input and do expr filtering on these data
|
||||||
|
// default is false which means we will do expr filtering on the total segment data
|
||||||
|
bool has_offset_input_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
using ExprPtr = std::shared_ptr<milvus::exec::Expr>;
|
using ExprPtr = std::shared_ptr<milvus::exec::Expr>;
|
||||||
@ -204,13 +220,16 @@ class SegmentExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
if (is_index_mode_) {
|
// when we specify input, do not maintain states
|
||||||
MoveCursorForIndex();
|
if (!has_offset_input_) {
|
||||||
if (segment_->HasFieldData(field_id_)) {
|
if (is_index_mode_) {
|
||||||
|
MoveCursorForIndex();
|
||||||
|
if (segment_->HasFieldData(field_id_)) {
|
||||||
|
MoveCursorForData();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
MoveCursorForData();
|
MoveCursorForData();
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
MoveCursorForData();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -275,6 +294,7 @@ class SegmentExpr : public Expr {
|
|||||||
// use valid_data to see if raw data is null
|
// use valid_data to see if raw data is null
|
||||||
func(views_info.first.data(),
|
func(views_info.first.data(),
|
||||||
views_info.second.data(),
|
views_info.second.data(),
|
||||||
|
nullptr,
|
||||||
need_size,
|
need_size,
|
||||||
res,
|
res,
|
||||||
valid_res,
|
valid_res,
|
||||||
@ -286,6 +306,253 @@ class SegmentExpr : public Expr {
|
|||||||
return need_size;
|
return need_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// accept offsets array and process on the scalar data by offsets
|
||||||
|
// stateless! Just check and set bitset as result, does not need to move cursor
|
||||||
|
// used for processing raw data expr for sealed segments.
|
||||||
|
// now only used for std::string_view && json
|
||||||
|
// TODO: support more types
|
||||||
|
template <typename T, typename FUNC, typename... ValTypes>
|
||||||
|
int64_t
|
||||||
|
ProcessDataByOffsetsForSealedSeg(
|
||||||
|
FUNC func,
|
||||||
|
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||||
|
OffsetVector* input,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ValTypes... values) {
|
||||||
|
// For non_chunked sealed segment, only single chunk
|
||||||
|
Assert(num_data_chunk_ == 1);
|
||||||
|
|
||||||
|
auto& skip_index = segment_->GetSkipIndex();
|
||||||
|
auto [data_vec, valid_data] =
|
||||||
|
segment_->get_views_by_offsets<T>(field_id_, 0, *input);
|
||||||
|
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||||
|
func(data_vec.data(),
|
||||||
|
valid_data.data(),
|
||||||
|
nullptr,
|
||||||
|
input->size(),
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
values...);
|
||||||
|
} else {
|
||||||
|
ApplyValidData(valid_data.data(), res, valid_res, input->size());
|
||||||
|
}
|
||||||
|
return input->size();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename FUNC, typename... ValTypes>
|
||||||
|
VectorPtr
|
||||||
|
ProcessIndexChunksByOffsets(FUNC func,
|
||||||
|
OffsetVector* input,
|
||||||
|
ValTypes... values) {
|
||||||
|
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
|
||||||
|
typedef std::
|
||||||
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
|
IndexInnerType;
|
||||||
|
using Index = index::ScalarIndex<IndexInnerType>;
|
||||||
|
TargetBitmap valid_res(input->size());
|
||||||
|
|
||||||
|
const Index& index =
|
||||||
|
segment_->chunk_scalar_index<IndexInnerType>(field_id_, 0);
|
||||||
|
auto* index_ptr = const_cast<Index*>(&index);
|
||||||
|
auto valid_result = index_ptr->IsNotNull();
|
||||||
|
for (auto i = 0; i < input->size(); ++i) {
|
||||||
|
valid_res[i] = valid_result[(*input)[i]];
|
||||||
|
}
|
||||||
|
auto result = std::move(func.template operator()<FilterType::random>(
|
||||||
|
index_ptr, values..., input->data()));
|
||||||
|
return std::make_shared<ColumnVector>(std::move(result),
|
||||||
|
std::move(valid_res));
|
||||||
|
}
|
||||||
|
|
||||||
|
// when we have scalar index and index contains raw data, could go with index chunk by offsets
|
||||||
|
template <typename T, typename FUNC, typename... ValTypes>
|
||||||
|
int64_t
|
||||||
|
ProcessIndexLookupByOffsets(
|
||||||
|
FUNC func,
|
||||||
|
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||||
|
OffsetVector* input,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ValTypes... values) {
|
||||||
|
AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
|
||||||
|
auto& skip_index = segment_->GetSkipIndex();
|
||||||
|
|
||||||
|
typedef std::
|
||||||
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
|
IndexInnerType;
|
||||||
|
using Index = index::ScalarIndex<IndexInnerType>;
|
||||||
|
int64_t processed_size = 0;
|
||||||
|
const Index& index =
|
||||||
|
segment_->chunk_scalar_index<IndexInnerType>(field_id_, 0);
|
||||||
|
auto* index_ptr = const_cast<Index*>(&index);
|
||||||
|
auto valid_result = index_ptr->IsNotNull();
|
||||||
|
auto batch_size = input->size();
|
||||||
|
|
||||||
|
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||||
|
for (auto i = 0; i < batch_size; ++i) {
|
||||||
|
auto offset = (*input)[i];
|
||||||
|
auto raw = index_ptr->Reverse_Lookup(offset);
|
||||||
|
if (!raw.has_value()) {
|
||||||
|
res[i] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
T raw_data = raw.value();
|
||||||
|
bool valid_data = valid_result[offset];
|
||||||
|
func.template operator()<FilterType::random>(&raw_data,
|
||||||
|
&valid_data,
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
res + i,
|
||||||
|
valid_res + i,
|
||||||
|
values...);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto i = 0; i < batch_size; ++i) {
|
||||||
|
auto offset = (*input)[i];
|
||||||
|
res[i] = valid_res[i] = valid_result[offset];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return batch_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// accept offsets array and process on the scalar data by offsets
|
||||||
|
// stateless! Just check and set bitset as result, does not need to move cursor
|
||||||
|
template <typename T, typename FUNC, typename... ValTypes>
|
||||||
|
int64_t
|
||||||
|
ProcessDataByOffsets(
|
||||||
|
FUNC func,
|
||||||
|
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||||
|
OffsetVector* input,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ValTypes... values) {
|
||||||
|
int64_t processed_size = 0;
|
||||||
|
|
||||||
|
// index reverse lookup
|
||||||
|
if (is_index_mode_ && num_data_chunk_ == 0) {
|
||||||
|
return ProcessIndexLookupByOffsets<T>(
|
||||||
|
func, skip_func, input, res, valid_res, values...);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& skip_index = segment_->GetSkipIndex();
|
||||||
|
|
||||||
|
// raw data scan
|
||||||
|
// sealed segment
|
||||||
|
if (segment_->type() == SegmentType::Sealed) {
|
||||||
|
if (segment_->is_chunked()) {
|
||||||
|
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||||
|
std::is_same_v<T, Json>) {
|
||||||
|
for (size_t i = 0; i < input->size(); ++i) {
|
||||||
|
int64_t offset = (*input)[i];
|
||||||
|
auto [chunk_id, chunk_offset] =
|
||||||
|
segment_->get_chunk_by_offset(field_id_, offset);
|
||||||
|
auto [data_vec, valid_data] =
|
||||||
|
segment_->get_views_by_offsets<T>(
|
||||||
|
field_id_, chunk_id, {int32_t(chunk_offset)});
|
||||||
|
if (!skip_func ||
|
||||||
|
!skip_func(skip_index, field_id_, chunk_id)) {
|
||||||
|
func.template operator()<FilterType::random>(
|
||||||
|
data_vec.data(),
|
||||||
|
valid_data.data(),
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
|
values...);
|
||||||
|
} else {
|
||||||
|
res[processed_size] = valid_res[processed_size] =
|
||||||
|
(valid_data[0]);
|
||||||
|
}
|
||||||
|
processed_size++;
|
||||||
|
}
|
||||||
|
return input->size();
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < input->size(); ++i) {
|
||||||
|
int64_t offset = (*input)[i];
|
||||||
|
auto [chunk_id, chunk_offset] =
|
||||||
|
segment_->get_chunk_by_offset(field_id_, offset);
|
||||||
|
auto chunk = segment_->chunk_data<T>(field_id_, chunk_id);
|
||||||
|
const T* data = chunk.data() + chunk_offset;
|
||||||
|
const bool* valid_data = chunk.valid_data();
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
valid_data += chunk_offset;
|
||||||
|
}
|
||||||
|
if (!skip_func ||
|
||||||
|
!skip_func(skip_index, field_id_, chunk_id)) {
|
||||||
|
func.template operator()<FilterType::random>(
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
|
values...);
|
||||||
|
} else {
|
||||||
|
ApplyValidData(valid_data,
|
||||||
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
|
1);
|
||||||
|
}
|
||||||
|
processed_size++;
|
||||||
|
}
|
||||||
|
return input->size();
|
||||||
|
} else {
|
||||||
|
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||||
|
std::is_same_v<T, Json>) {
|
||||||
|
return ProcessDataByOffsetsForSealedSeg<T>(
|
||||||
|
func, skip_func, input, res, valid_res, values...);
|
||||||
|
}
|
||||||
|
auto chunk = segment_->chunk_data<T>(field_id_, 0);
|
||||||
|
const T* data = chunk.data();
|
||||||
|
const bool* valid_data = chunk.valid_data();
|
||||||
|
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||||
|
func.template operator()<FilterType::random>(data,
|
||||||
|
valid_data,
|
||||||
|
input->data(),
|
||||||
|
input->size(),
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
values...);
|
||||||
|
} else {
|
||||||
|
ApplyValidData(valid_data, res, valid_res, input->size());
|
||||||
|
}
|
||||||
|
return input->size();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// growing segment
|
||||||
|
for (size_t i = 0; i < input->size(); ++i) {
|
||||||
|
int64_t offset = (*input)[i];
|
||||||
|
auto chunk_id = offset / size_per_chunk_;
|
||||||
|
auto chunk_offset = offset % size_per_chunk_;
|
||||||
|
auto chunk = segment_->chunk_data<T>(field_id_, chunk_id);
|
||||||
|
const T* data = chunk.data() + chunk_offset;
|
||||||
|
const bool* valid_data = chunk.valid_data();
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
valid_data += chunk_offset;
|
||||||
|
}
|
||||||
|
if (!skip_func || !skip_func(skip_index, field_id_, chunk_id)) {
|
||||||
|
func.template operator()<FilterType::random>(
|
||||||
|
data,
|
||||||
|
valid_data,
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
|
values...);
|
||||||
|
} else {
|
||||||
|
ApplyValidData(valid_data,
|
||||||
|
res + processed_size,
|
||||||
|
valid_res + processed_size,
|
||||||
|
1);
|
||||||
|
}
|
||||||
|
processed_size++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return input->size();
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, typename FUNC, typename... ValTypes>
|
template <typename T, typename FUNC, typename... ValTypes>
|
||||||
int64_t
|
int64_t
|
||||||
ProcessDataChunksForSingleChunk(
|
ProcessDataChunksForSingleChunk(
|
||||||
@ -328,6 +595,7 @@ class SegmentExpr : public Expr {
|
|||||||
const T* data = chunk.data() + data_pos;
|
const T* data = chunk.data() + data_pos;
|
||||||
func(data,
|
func(data,
|
||||||
valid_data,
|
valid_data,
|
||||||
|
nullptr,
|
||||||
size,
|
size,
|
||||||
res + processed_size,
|
res + processed_size,
|
||||||
valid_res + processed_size,
|
valid_res + processed_size,
|
||||||
@ -384,12 +652,12 @@ class SegmentExpr : public Expr {
|
|||||||
if (segment_->type() == SegmentType::Sealed) {
|
if (segment_->type() == SegmentType::Sealed) {
|
||||||
// first is the raw data, second is valid_data
|
// first is the raw data, second is valid_data
|
||||||
// use valid_data to see if raw data is null
|
// use valid_data to see if raw data is null
|
||||||
auto fetched_data = segment_->get_batch_views<T>(
|
auto [data_vec, valid_data] =
|
||||||
field_id_, i, data_pos, size);
|
segment_->get_batch_views<T>(
|
||||||
auto data_vec = fetched_data.first;
|
field_id_, i, data_pos, size);
|
||||||
auto valid_data = fetched_data.second;
|
|
||||||
func(data_vec.data(),
|
func(data_vec.data(),
|
||||||
valid_data.data(),
|
valid_data.data(),
|
||||||
|
nullptr,
|
||||||
size,
|
size,
|
||||||
res + processed_size,
|
res + processed_size,
|
||||||
valid_res + processed_size,
|
valid_res + processed_size,
|
||||||
@ -406,6 +674,7 @@ class SegmentExpr : public Expr {
|
|||||||
}
|
}
|
||||||
func(data,
|
func(data,
|
||||||
valid_data,
|
valid_data,
|
||||||
|
nullptr,
|
||||||
size,
|
size,
|
||||||
res + processed_size,
|
res + processed_size,
|
||||||
valid_res + processed_size,
|
valid_res + processed_size,
|
||||||
@ -451,13 +720,14 @@ class SegmentExpr : public Expr {
|
|||||||
FUNC func,
|
FUNC func,
|
||||||
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
ValTypes... values) {
|
ValTypes... values) {
|
||||||
if (segment_->is_chunked()) {
|
if (segment_->is_chunked()) {
|
||||||
return ProcessDataChunksForMultipleChunk<T>(
|
return ProcessDataChunksForMultipleChunk<T>(
|
||||||
func, skip_func, res, values...);
|
func, skip_func, res, valid_res, values...);
|
||||||
} else {
|
} else {
|
||||||
return ProcessDataChunksForSingleChunk<T>(
|
return ProcessDataChunksForSingleChunk<T>(
|
||||||
func, skip_func, res, values...);
|
func, skip_func, res, valid_res, values...);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -538,6 +808,51 @@ class SegmentExpr : public Expr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
TargetBitmap
|
||||||
|
ProcessChunksForValidByOffsets(bool use_index, const OffsetVector& input) {
|
||||||
|
typedef std::
|
||||||
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
|
IndexInnerType;
|
||||||
|
using Index = index::ScalarIndex<IndexInnerType>;
|
||||||
|
auto batch_size = input.size();
|
||||||
|
TargetBitmap valid_result(batch_size);
|
||||||
|
valid_result.set();
|
||||||
|
|
||||||
|
if (use_index) {
|
||||||
|
const Index& index =
|
||||||
|
segment_->chunk_scalar_index<IndexInnerType>(field_id_, 0);
|
||||||
|
auto* index_ptr = const_cast<Index*>(&index);
|
||||||
|
const auto& res = index_ptr->IsNotNull();
|
||||||
|
for (auto i = 0; i < batch_size; ++i) {
|
||||||
|
valid_result[i] = res[input[i]];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto i = 0; i < batch_size; ++i) {
|
||||||
|
auto offset = input[i];
|
||||||
|
auto [chunk_id,
|
||||||
|
chunk_offset] = [&]() -> std::pair<int64_t, int64_t> {
|
||||||
|
if (segment_->type() == SegmentType::Growing) {
|
||||||
|
return {offset / size_per_chunk_,
|
||||||
|
offset % size_per_chunk_};
|
||||||
|
} else if (segment_->is_chunked()) {
|
||||||
|
return segment_->get_chunk_by_offset(field_id_, offset);
|
||||||
|
} else {
|
||||||
|
return {0, offset};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
auto chunk = segment_->chunk_data<T>(field_id_, chunk_id);
|
||||||
|
const bool* valid_data = chunk.valid_data();
|
||||||
|
if (valid_data != nullptr) {
|
||||||
|
valid_result[i] = valid_data[chunk_offset];
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return valid_result;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
TargetBitmap
|
TargetBitmap
|
||||||
ProcessDataChunksForValid() {
|
ProcessDataChunksForValid() {
|
||||||
@ -569,9 +884,9 @@ class SegmentExpr : public Expr {
|
|||||||
return valid_result;
|
return valid_result;
|
||||||
}
|
}
|
||||||
valid_data += data_pos;
|
valid_data += data_pos;
|
||||||
for (int i = 0; i < size; i++) {
|
for (int j = 0; j < size; j++) {
|
||||||
if (!valid_data[i]) {
|
if (!valid_data[j]) {
|
||||||
valid_result[i + data_pos] = false;
|
valid_result[j + processed_size] = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
processed_size += size;
|
processed_size += size;
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -50,35 +50,35 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
VectorPtr
|
VectorPtr
|
||||||
EvalJsonContainsForDataSegment();
|
EvalJsonContainsForDataSegment(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContains();
|
ExecJsonContains(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecArrayContains();
|
ExecArrayContains(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsAll();
|
ExecJsonContainsAll(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecArrayContainsAll();
|
ExecArrayContainsAll(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsArray();
|
ExecJsonContainsArray(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsAllArray();
|
ExecJsonContainsAllArray(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsAllWithDiffType();
|
ExecJsonContainsAllWithDiffType(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecJsonContainsWithDiffType();
|
ExecJsonContainsWithDiffType(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
VectorPtr
|
VectorPtr
|
||||||
EvalArrayContainsForIndexSegment();
|
EvalArrayContainsForIndexSegment();
|
||||||
|
|||||||
@ -75,8 +75,16 @@ class PhyLogicalBinaryExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
inputs_[0]->MoveCursor();
|
if (!has_offset_input_) {
|
||||||
inputs_[1]->MoveCursor();
|
inputs_[0]->MoveCursor();
|
||||||
|
inputs_[1]->MoveCursor();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
SupportOffsetInput() override {
|
||||||
|
return inputs_[0]->SupportOffsetInput() &&
|
||||||
|
inputs_[1]->SupportOffsetInput();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -41,7 +41,14 @@ class PhyLogicalUnaryExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
inputs_[0]->MoveCursor();
|
if (!has_offset_input_) {
|
||||||
|
inputs_[0]->MoveCursor();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
SupportOffsetInput() override {
|
||||||
|
return inputs_[0]->SupportOffsetInput();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -24,37 +24,39 @@ namespace exec {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
if (is_pk_field_) {
|
auto input = context.get_offset_input();
|
||||||
|
SetHasOffsetInput((input != nullptr));
|
||||||
|
if (is_pk_field_ && !has_offset_input_) {
|
||||||
result = ExecPkTermImpl();
|
result = ExecPkTermImpl();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
switch (expr_->column_.data_type_) {
|
switch (expr_->column_.data_type_) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
result = ExecVisitorImpl<bool>();
|
result = ExecVisitorImpl<bool>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
result = ExecVisitorImpl<int8_t>();
|
result = ExecVisitorImpl<int8_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
result = ExecVisitorImpl<int16_t>();
|
result = ExecVisitorImpl<int16_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
result = ExecVisitorImpl<int32_t>();
|
result = ExecVisitorImpl<int32_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
result = ExecVisitorImpl<int64_t>();
|
result = ExecVisitorImpl<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::FLOAT: {
|
case DataType::FLOAT: {
|
||||||
result = ExecVisitorImpl<float>();
|
result = ExecVisitorImpl<float>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
result = ExecVisitorImpl<double>();
|
result = ExecVisitorImpl<double>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
@ -62,30 +64,30 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
!storage::MmapManager::GetInstance()
|
!storage::MmapManager::GetInstance()
|
||||||
.GetMmapConfig()
|
.GetMmapConfig()
|
||||||
.growing_enable_mmap) {
|
.growing_enable_mmap) {
|
||||||
result = ExecVisitorImpl<std::string>();
|
result = ExecVisitorImpl<std::string>(input);
|
||||||
} else {
|
} else {
|
||||||
result = ExecVisitorImpl<std::string_view>();
|
result = ExecVisitorImpl<std::string_view>(input);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::JSON: {
|
case DataType::JSON: {
|
||||||
if (expr_->vals_.size() == 0) {
|
if (expr_->vals_.size() == 0) {
|
||||||
result = ExecVisitorImplTemplateJson<bool>();
|
result = ExecVisitorImplTemplateJson<bool>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto type = expr_->vals_[0].val_case();
|
auto type = expr_->vals_[0].val_case();
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
result = ExecVisitorImplTemplateJson<bool>();
|
result = ExecVisitorImplTemplateJson<bool>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
result = ExecVisitorImplTemplateJson<int64_t>();
|
result = ExecVisitorImplTemplateJson<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
result = ExecVisitorImplTemplateJson<double>();
|
result = ExecVisitorImplTemplateJson<double>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
result = ExecVisitorImplTemplateJson<std::string>();
|
result = ExecVisitorImplTemplateJson<std::string>(input);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
||||||
@ -95,26 +97,26 @@ PhyTermFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
case DataType::ARRAY: {
|
case DataType::ARRAY: {
|
||||||
if (expr_->vals_.size() == 0) {
|
if (expr_->vals_.size() == 0) {
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<bool>();
|
result = ExecVisitorImplTemplateArray<bool>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto type = expr_->vals_[0].val_case();
|
auto type = expr_->vals_[0].val_case();
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<bool>();
|
result = ExecVisitorImplTemplateArray<bool>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<int64_t>();
|
result = ExecVisitorImplTemplateArray<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<double>();
|
result = ExecVisitorImplTemplateArray<double>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecVisitorImplTemplateArray<std::string>();
|
result = ExecVisitorImplTemplateArray<std::string>(input);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
PanicInfo(DataTypeInvalid, "unknown data type: {}", type);
|
||||||
@ -230,31 +232,32 @@ PhyTermFilterExpr::ExecPkTermImpl() {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplTemplateJson() {
|
PhyTermFilterExpr::ExecVisitorImplTemplateJson(OffsetVector* input) {
|
||||||
if (expr_->is_in_field_) {
|
if (expr_->is_in_field_) {
|
||||||
return ExecTermJsonVariableInField<ValueType>();
|
return ExecTermJsonVariableInField<ValueType>(input);
|
||||||
} else {
|
} else {
|
||||||
return ExecTermJsonFieldInVariable<ValueType>();
|
return ExecTermJsonFieldInVariable<ValueType>(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplTemplateArray() {
|
PhyTermFilterExpr::ExecVisitorImplTemplateArray(OffsetVector* input) {
|
||||||
if (expr_->is_in_field_) {
|
if (expr_->is_in_field_) {
|
||||||
return ExecTermArrayVariableInField<ValueType>();
|
return ExecTermArrayVariableInField<ValueType>(input);
|
||||||
} else {
|
} else {
|
||||||
return ExecTermArrayFieldInVariable<ValueType>();
|
return ExecTermArrayFieldInVariable<ValueType>(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
PhyTermFilterExpr::ExecTermArrayVariableInField(OffsetVector* input) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -269,15 +272,18 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
|||||||
"element length in json array must be one");
|
"element length in json array must be one");
|
||||||
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
ValueType target_val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
||||||
|
|
||||||
auto execute_sub_batch = [](const ArrayView* data,
|
auto execute_sub_batch =
|
||||||
const bool* valid_data,
|
[]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const ArrayView* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
const ValueType& target_val) {
|
const int size,
|
||||||
auto executor = [&](size_t idx) {
|
TargetBitmapView res,
|
||||||
for (int i = 0; i < data[idx].length(); i++) {
|
TargetBitmapView valid_res,
|
||||||
auto val = data[idx].template get_data<GetType>(i);
|
const ValueType& target_val) {
|
||||||
|
auto executor = [&](size_t offset) {
|
||||||
|
for (int i = 0; i < data[offset].length(); i++) {
|
||||||
|
auto val = data[offset].template get_data<GetType>(i);
|
||||||
if (val == target_val) {
|
if (val == target_val) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -285,16 +291,31 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val);
|
if (has_offset_input_) {
|
||||||
|
processed_size =
|
||||||
|
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
target_val);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, target_val);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -305,12 +326,13 @@ PhyTermFilterExpr::ExecTermArrayVariableInField() {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
PhyTermFilterExpr::ExecTermArrayFieldInVariable(OffsetVector* input) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
|
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -336,29 +358,52 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||||||
return res_vec;
|
return res_vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto execute_sub_batch = [](const ArrayView* data,
|
auto execute_sub_batch =
|
||||||
const bool* valid_data,
|
[]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const ArrayView* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
int index,
|
const int size,
|
||||||
const std::unordered_set<ValueType>& term_set) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
int index,
|
||||||
|
const std::unordered_set<ValueType>& term_set) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (term_set.empty() || index >= data[i].length()) {
|
if (term_set.empty() || index >= data[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto value = data[i].get_data<GetType>(index);
|
auto value = data[offset].get_data<GetType>(index);
|
||||||
res[i] = term_set.find(ValueType(value)) != term_set.end();
|
res[i] = term_set.find(ValueType(value)) != term_set.end();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, index, term_set);
|
if (has_offset_input_) {
|
||||||
|
processed_size =
|
||||||
|
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
index,
|
||||||
|
term_set);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::ArrayView>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
index,
|
||||||
|
term_set);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -369,11 +414,12 @@ PhyTermFilterExpr::ExecTermArrayFieldInVariable() {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
PhyTermFilterExpr::ExecTermJsonVariableInField(OffsetVector* input) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -389,13 +435,16 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||||||
ValueType val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
ValueType val = GetValueFromProto<ValueType>(expr_->vals_[0]);
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
auto execute_sub_batch = [](const Json* data,
|
auto execute_sub_batch =
|
||||||
const bool* valid_data,
|
[]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const Json* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
const std::string pointer,
|
const int size,
|
||||||
const ValueType& target_val) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
const std::string pointer,
|
||||||
|
const ValueType& target_val) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
auto doc = data[i].doc();
|
auto doc = data[i].doc();
|
||||||
auto array = doc.at_pointer(pointer).get_array();
|
auto array = doc.at_pointer(pointer).get_array();
|
||||||
@ -413,15 +462,30 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<milvus::Json>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
pointer,
|
||||||
|
val);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, val);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -432,11 +496,12 @@ PhyTermFilterExpr::ExecTermJsonVariableInField() {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
PhyTermFilterExpr::ExecTermJsonFieldInVariable(OffsetVector* input) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -459,13 +524,16 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||||||
return res_vec;
|
return res_vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto execute_sub_batch = [](const Json* data,
|
auto execute_sub_batch =
|
||||||
const bool* valid_data,
|
[]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const Json* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
const std::string pointer,
|
const int size,
|
||||||
const std::unordered_set<ValueType>& terms) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
const std::string pointer,
|
||||||
|
const std::unordered_set<ValueType>& terms) {
|
||||||
auto executor = [&](size_t i) {
|
auto executor = [&](size_t i) {
|
||||||
auto x = data[i].template at<GetType>(pointer);
|
auto x = data[i].template at<GetType>(pointer);
|
||||||
if (x.error()) {
|
if (x.error()) {
|
||||||
@ -485,7 +553,11 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||||||
return terms.find(ValueType(x.value())) != terms.end();
|
return terms.find(ValueType(x.value())) != terms.end();
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -493,11 +565,26 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
res[i] = executor(i);
|
res[i] = executor(offset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, pointer, term_set);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<milvus::Json>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
pointer,
|
||||||
|
term_set);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::Json>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
pointer,
|
||||||
|
term_set);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -508,17 +595,17 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImpl() {
|
PhyTermFilterExpr::ExecVisitorImpl(OffsetVector* input) {
|
||||||
if (is_index_mode_) {
|
if (is_index_mode_ && !has_offset_input_) {
|
||||||
return ExecVisitorImplForIndex<T>();
|
return ExecVisitorImplForIndex<T>(input);
|
||||||
} else {
|
} else {
|
||||||
return ExecVisitorImplForData<T>();
|
return ExecVisitorImplForData<T>(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplForIndex() {
|
PhyTermFilterExpr::ExecVisitorImplForIndex(OffsetVector* input) {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
@ -553,7 +640,7 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() {
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
|
PhyTermFilterExpr::ExecVisitorImplForIndex<bool>(OffsetVector* input) {
|
||||||
using Index = index::ScalarIndex<bool>;
|
using Index = index::ScalarIndex<bool>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size = GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
@ -575,8 +662,9 @@ PhyTermFilterExpr::ExecVisitorImplForIndex<bool>() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyTermFilterExpr::ExecVisitorImplForData() {
|
PhyTermFilterExpr::ExecVisitorImplForData(OffsetVector* input) {
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -597,23 +685,40 @@ PhyTermFilterExpr::ExecVisitorImplForData() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::unordered_set<T> vals_set(vals.begin(), vals.end());
|
std::unordered_set<T> vals_set(vals.begin(), vals.end());
|
||||||
auto execute_sub_batch = [](const T* data,
|
auto execute_sub_batch =
|
||||||
const bool* valid_data,
|
[]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const T* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
const std::unordered_set<T>& vals) {
|
const int size,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
const std::unordered_set<T>& vals) {
|
||||||
TermElementFuncSet<T> func;
|
TermElementFuncSet<T> func;
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
res[i] = func(vals, data[i]);
|
res[i] = func(vals, data[offset]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<T>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<T>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
vals_set);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<T>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, vals_set);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
|
|||||||
@ -83,39 +83,39 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImpl();
|
ExecVisitorImpl(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplForIndex();
|
ExecVisitorImplForIndex(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplForData();
|
ExecVisitorImplForData(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplTemplateJson();
|
ExecVisitorImplTemplateJson(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermJsonVariableInField();
|
ExecTermJsonVariableInField(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermJsonFieldInVariable();
|
ExecTermJsonFieldInVariable(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecVisitorImplTemplateArray();
|
ExecVisitorImplTemplateArray(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermArrayVariableInField();
|
ExecTermArrayVariableInField(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecTermArrayFieldInVariable();
|
ExecTermArrayFieldInVariable(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;
|
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;
|
||||||
|
|||||||
@ -121,7 +121,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
|
|||||||
case DataType::FLOAT:
|
case DataType::FLOAT:
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
// not accurate on floating point number, rollback to bruteforce.
|
// not accurate on floating point number, rollback to bruteforce.
|
||||||
return ExecRangeVisitorImplArray<proto::plan::Array>();
|
return ExecRangeVisitorImplArray<proto::plan::Array>(
|
||||||
|
nullptr);
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
if (segment_->type() == SegmentType::Growing) {
|
if (segment_->type() == SegmentType::Growing) {
|
||||||
@ -146,33 +147,35 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
|
auto input = context.get_offset_input();
|
||||||
|
SetHasOffsetInput((input != nullptr));
|
||||||
switch (expr_->column_.data_type_) {
|
switch (expr_->column_.data_type_) {
|
||||||
case DataType::BOOL: {
|
case DataType::BOOL: {
|
||||||
result = ExecRangeVisitorImpl<bool>();
|
result = ExecRangeVisitorImpl<bool>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT8: {
|
case DataType::INT8: {
|
||||||
result = ExecRangeVisitorImpl<int8_t>();
|
result = ExecRangeVisitorImpl<int8_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT16: {
|
case DataType::INT16: {
|
||||||
result = ExecRangeVisitorImpl<int16_t>();
|
result = ExecRangeVisitorImpl<int16_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT32: {
|
case DataType::INT32: {
|
||||||
result = ExecRangeVisitorImpl<int32_t>();
|
result = ExecRangeVisitorImpl<int32_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::INT64: {
|
case DataType::INT64: {
|
||||||
result = ExecRangeVisitorImpl<int64_t>();
|
result = ExecRangeVisitorImpl<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::FLOAT: {
|
case DataType::FLOAT: {
|
||||||
result = ExecRangeVisitorImpl<float>();
|
result = ExecRangeVisitorImpl<float>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::DOUBLE: {
|
case DataType::DOUBLE: {
|
||||||
result = ExecRangeVisitorImpl<double>();
|
result = ExecRangeVisitorImpl<double>(input);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case DataType::VARCHAR: {
|
case DataType::VARCHAR: {
|
||||||
@ -180,9 +183,9 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
!storage::MmapManager::GetInstance()
|
!storage::MmapManager::GetInstance()
|
||||||
.GetMmapConfig()
|
.GetMmapConfig()
|
||||||
.growing_enable_mmap) {
|
.growing_enable_mmap) {
|
||||||
result = ExecRangeVisitorImpl<std::string>();
|
result = ExecRangeVisitorImpl<std::string>(input);
|
||||||
} else {
|
} else {
|
||||||
result = ExecRangeVisitorImpl<std::string_view>();
|
result = ExecRangeVisitorImpl<std::string_view>(input);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -190,19 +193,20 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
auto val_type = expr_->val_.val_case();
|
auto val_type = expr_->val_.val_case();
|
||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
result = ExecRangeVisitorImplJson<bool>();
|
result = ExecRangeVisitorImplJson<bool>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
result = ExecRangeVisitorImplJson<int64_t>();
|
result = ExecRangeVisitorImplJson<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
result = ExecRangeVisitorImplJson<double>();
|
result = ExecRangeVisitorImplJson<double>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
result = ExecRangeVisitorImplJson<std::string>();
|
result = ExecRangeVisitorImplJson<std::string>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kArrayVal:
|
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||||
result = ExecRangeVisitorImplJson<proto::plan::Array>();
|
result =
|
||||||
|
ExecRangeVisitorImplJson<proto::plan::Array>(input);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicInfo(
|
PanicInfo(
|
||||||
@ -215,27 +219,28 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
switch (val_type) {
|
switch (val_type) {
|
||||||
case proto::plan::GenericValue::ValCase::kBoolVal:
|
case proto::plan::GenericValue::ValCase::kBoolVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<bool>();
|
result = ExecRangeVisitorImplArray<bool>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kInt64Val:
|
case proto::plan::GenericValue::ValCase::kInt64Val:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<int64_t>();
|
result = ExecRangeVisitorImplArray<int64_t>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kFloatVal:
|
case proto::plan::GenericValue::ValCase::kFloatVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<double>();
|
result = ExecRangeVisitorImplArray<double>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kStringVal:
|
case proto::plan::GenericValue::ValCase::kStringVal:
|
||||||
SetNotUseIndex();
|
SetNotUseIndex();
|
||||||
result = ExecRangeVisitorImplArray<std::string>();
|
result = ExecRangeVisitorImplArray<std::string>(input);
|
||||||
break;
|
break;
|
||||||
case proto::plan::GenericValue::ValCase::kArrayVal:
|
case proto::plan::GenericValue::ValCase::kArrayVal:
|
||||||
if (CanUseIndexForArray<milvus::Array>()) {
|
if (!has_offset_input_ &&
|
||||||
|
CanUseIndexForArray<milvus::Array>()) {
|
||||||
result = ExecRangeVisitorImplArrayForIndex<
|
result = ExecRangeVisitorImplArrayForIndex<
|
||||||
proto::plan::Array>();
|
proto::plan::Array>();
|
||||||
} else {
|
} else {
|
||||||
result =
|
result = ExecRangeVisitorImplArray<proto::plan::Array>(
|
||||||
ExecRangeVisitorImplArray<proto::plan::Array>();
|
input);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -253,11 +258,12 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(OffsetVector* input) {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ValueType>;
|
ValueType>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -273,56 +279,135 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||||||
if (expr_->column_.nested_path_.size() > 0) {
|
if (expr_->column_.nested_path_.size() > 0) {
|
||||||
index = std::stoi(expr_->column_.nested_path_[0]);
|
index = std::stoi(expr_->column_.nested_path_[0]);
|
||||||
}
|
}
|
||||||
auto execute_sub_batch = [op_type](const milvus::ArrayView* data,
|
auto execute_sub_batch = [op_type]<FilterType filter_type =
|
||||||
const bool* valid_data,
|
FilterType::sequential>(
|
||||||
const int size,
|
const milvus::ArrayView* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
ValueType val,
|
const int size,
|
||||||
int index) {
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ValueType val,
|
||||||
|
int index) {
|
||||||
switch (op_type) {
|
switch (op_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::GreaterThan>
|
UnaryElementFuncForArray<ValueType,
|
||||||
|
proto::plan::GreaterThan,
|
||||||
|
filter_type>
|
||||||
func;
|
func;
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::GreaterEqual>
|
UnaryElementFuncForArray<ValueType,
|
||||||
|
proto::plan::GreaterEqual,
|
||||||
|
filter_type>
|
||||||
func;
|
func;
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::LessThan> func;
|
UnaryElementFuncForArray<ValueType,
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
proto::plan::LessThan,
|
||||||
|
filter_type>
|
||||||
|
func;
|
||||||
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::LessEqual>
|
UnaryElementFuncForArray<ValueType,
|
||||||
|
proto::plan::LessEqual,
|
||||||
|
filter_type>
|
||||||
func;
|
func;
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::Equal> func;
|
UnaryElementFuncForArray<ValueType,
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
proto::plan::Equal,
|
||||||
|
filter_type>
|
||||||
|
func;
|
||||||
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::NotEqual> func;
|
UnaryElementFuncForArray<ValueType,
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
proto::plan::NotEqual,
|
||||||
|
filter_type>
|
||||||
|
func;
|
||||||
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::PrefixMatch: {
|
case proto::plan::PrefixMatch: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::PrefixMatch>
|
UnaryElementFuncForArray<ValueType,
|
||||||
|
proto::plan::PrefixMatch,
|
||||||
|
filter_type>
|
||||||
func;
|
func;
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Match: {
|
case proto::plan::Match: {
|
||||||
UnaryElementFuncForArray<ValueType, proto::plan::Match> func;
|
UnaryElementFuncForArray<ValueType,
|
||||||
func(data, valid_data, size, val, index, res, valid_res);
|
proto::plan::Match,
|
||||||
|
filter_type>
|
||||||
|
func;
|
||||||
|
func(data,
|
||||||
|
valid_data,
|
||||||
|
size,
|
||||||
|
val,
|
||||||
|
index,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -332,8 +417,20 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
|
|||||||
op_type));
|
op_type));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index);
|
if (has_offset_input_) {
|
||||||
|
processed_size =
|
||||||
|
ProcessDataByOffsets<milvus::ArrayView>(execute_sub_batch,
|
||||||
|
std::nullptr_t{},
|
||||||
|
input,
|
||||||
|
res,
|
||||||
|
valid_res,
|
||||||
|
val,
|
||||||
|
index);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::ArrayView>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, val, index);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -454,12 +551,13 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
|
|||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(OffsetVector* input) {
|
||||||
using GetType =
|
using GetType =
|
||||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
ExprValueType>;
|
ExprValueType>;
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -473,46 +571,53 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
auto op_type = expr_->op_type_;
|
auto op_type = expr_->op_type_;
|
||||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||||
|
|
||||||
#define UnaryRangeJSONCompare(cmp) \
|
#define UnaryRangeJSONCompare(cmp) \
|
||||||
do { \
|
do { \
|
||||||
auto x = data[i].template at<GetType>(pointer); \
|
auto x = data[offset].template at<GetType>(pointer); \
|
||||||
if (x.error()) { \
|
if (x.error()) { \
|
||||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||||
auto x = data[i].template at<double>(pointer); \
|
auto x = data[offset].template at<double>(pointer); \
|
||||||
res[i] = !x.error() && (cmp); \
|
res[i] = !x.error() && (cmp); \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
res[i] = false; \
|
res[i] = false; \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
res[i] = (cmp); \
|
res[i] = (cmp); \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
#define UnaryRangeJSONCompareNotEqual(cmp) \
|
#define UnaryRangeJSONCompareNotEqual(cmp) \
|
||||||
do { \
|
do { \
|
||||||
auto x = data[i].template at<GetType>(pointer); \
|
auto x = data[offset].template at<GetType>(pointer); \
|
||||||
if (x.error()) { \
|
if (x.error()) { \
|
||||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||||
auto x = data[i].template at<double>(pointer); \
|
auto x = data[offset].template at<double>(pointer); \
|
||||||
res[i] = x.error() || (cmp); \
|
res[i] = x.error() || (cmp); \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
res[i] = true; \
|
res[i] = true; \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
res[i] = (cmp); \
|
res[i] = (cmp); \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
auto execute_sub_batch = [op_type, pointer](const milvus::Json* data,
|
auto execute_sub_batch =
|
||||||
const bool* valid_data,
|
[ op_type, pointer ]<FilterType filter_type = FilterType::sequential>(
|
||||||
const int size,
|
const milvus::Json* data,
|
||||||
TargetBitmapView res,
|
const bool* valid_data,
|
||||||
TargetBitmapView valid_res,
|
const int32_t* offsets,
|
||||||
ExprValueType val) {
|
const int size,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
ExprValueType val) {
|
||||||
switch (op_type) {
|
switch (op_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -526,7 +631,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -540,7 +649,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -554,7 +667,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -568,7 +685,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -588,7 +709,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -608,7 +733,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
}
|
}
|
||||||
case proto::plan::PrefixMatch: {
|
case proto::plan::PrefixMatch: {
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -626,7 +755,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
auto regex_pattern = translator(val);
|
auto regex_pattern = translator(val);
|
||||||
RegexMatcher matcher(regex_pattern);
|
RegexMatcher matcher(regex_pattern);
|
||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -646,8 +779,15 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
op_type));
|
op_type));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<milvus::Json>(
|
int64_t processed_size;
|
||||||
execute_sub_batch, std::nullptr_t{}, res, valid_res, val);
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<milvus::Json>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, input, res, valid_res, val);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<milvus::Json>(
|
||||||
|
execute_sub_batch, std::nullptr_t{}, res, valid_res, val);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}",
|
"expect batch size {}",
|
||||||
@ -658,15 +798,20 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl() {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(OffsetVector* input) {
|
||||||
if (expr_->op_type_ == proto::plan::OpType::TextMatch) {
|
if (expr_->op_type_ == proto::plan::OpType::TextMatch) {
|
||||||
|
if (has_offset_input_) {
|
||||||
|
PanicInfo(
|
||||||
|
OpTypeInvalid,
|
||||||
|
fmt::format("text match does not support iterative filter"));
|
||||||
|
}
|
||||||
return ExecTextMatch();
|
return ExecTextMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CanUseIndex<T>()) {
|
if (CanUseIndex<T>() && !has_offset_input_) {
|
||||||
return ExecRangeVisitorImplForIndex<T>();
|
return ExecRangeVisitorImplForIndex<T>();
|
||||||
} else {
|
} else {
|
||||||
return ExecRangeVisitorImplForData<T>();
|
return ExecRangeVisitorImplForData<T>(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -749,17 +894,24 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnVectorPtr
|
ColumnVectorPtr
|
||||||
PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) {
|
||||||
if constexpr (std::is_integral_v<T> && !std::is_same_v<T, bool>) {
|
if constexpr (std::is_integral_v<T> && !std::is_same_v<T, bool>) {
|
||||||
int64_t val = GetValueFromProto<int64_t>(expr_->val_);
|
int64_t val = GetValueFromProto<int64_t>(expr_->val_);
|
||||||
|
|
||||||
if (milvus::query::out_of_range<T>(val)) {
|
if (milvus::query::out_of_range<T>(val)) {
|
||||||
int64_t batch_size =
|
int64_t batch_size;
|
||||||
overflow_check_pos_ + batch_size_ >= active_count_
|
if (input != nullptr) {
|
||||||
? active_count_ - overflow_check_pos_
|
batch_size = input->size();
|
||||||
: batch_size_;
|
} else {
|
||||||
overflow_check_pos_ += batch_size;
|
batch_size = overflow_check_pos_ + batch_size_ >= active_count_
|
||||||
auto valid = ProcessChunksForValid<T>(CanUseIndex<T>());
|
? active_count_ - overflow_check_pos_
|
||||||
|
: batch_size_;
|
||||||
|
overflow_check_pos_ += batch_size;
|
||||||
|
}
|
||||||
|
auto valid = (input != nullptr)
|
||||||
|
? ProcessChunksForValidByOffsets<T>(
|
||||||
|
CanUseIndex<T>(), *input)
|
||||||
|
: ProcessChunksForValid<T>(CanUseIndex<T>());
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
TargetBitmap(batch_size), std::move(valid));
|
TargetBitmap(batch_size), std::move(valid));
|
||||||
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
TargetBitmapView res(res_vec->GetRawData(), batch_size);
|
||||||
@ -805,18 +957,20 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow() {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
if (auto res = PreCheckOverflow<T>()) {
|
if (auto res = PreCheckOverflow<T>(input)) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto real_batch_size = GetNextBatchSize();
|
auto real_batch_size =
|
||||||
|
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
IndexInnerType val = GetValueFromProto<IndexInnerType>(expr_->val_);
|
||||||
auto res_vec = std::make_shared<ColumnVector>(
|
auto res_vec = std::make_shared<ColumnVector>(
|
||||||
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
|
||||||
@ -824,51 +978,56 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
|
||||||
valid_res.set();
|
valid_res.set();
|
||||||
auto expr_type = expr_->op_type_;
|
auto expr_type = expr_->op_type_;
|
||||||
auto execute_sub_batch = [expr_type](const T* data,
|
|
||||||
const bool* valid_data,
|
auto execute_sub_batch = [expr_type]<FilterType filter_type =
|
||||||
const int size,
|
FilterType::sequential>(
|
||||||
TargetBitmapView res,
|
const T* data,
|
||||||
TargetBitmapView valid_res,
|
const bool* valid_data,
|
||||||
IndexInnerType val) {
|
const int32_t* offsets,
|
||||||
|
const int size,
|
||||||
|
TargetBitmapView res,
|
||||||
|
TargetBitmapView valid_res,
|
||||||
|
IndexInnerType val) {
|
||||||
switch (expr_type) {
|
switch (expr_type) {
|
||||||
case proto::plan::GreaterThan: {
|
case proto::plan::GreaterThan: {
|
||||||
UnaryElementFunc<T, proto::plan::GreaterThan> func;
|
UnaryElementFunc<T, proto::plan::GreaterThan, filter_type> func;
|
||||||
func(data, size, val, res);
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::GreaterEqual: {
|
case proto::plan::GreaterEqual: {
|
||||||
UnaryElementFunc<T, proto::plan::GreaterEqual> func;
|
UnaryElementFunc<T, proto::plan::GreaterEqual, filter_type>
|
||||||
func(data, size, val, res);
|
func;
|
||||||
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessThan: {
|
case proto::plan::LessThan: {
|
||||||
UnaryElementFunc<T, proto::plan::LessThan> func;
|
UnaryElementFunc<T, proto::plan::LessThan, filter_type> func;
|
||||||
func(data, size, val, res);
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::LessEqual: {
|
case proto::plan::LessEqual: {
|
||||||
UnaryElementFunc<T, proto::plan::LessEqual> func;
|
UnaryElementFunc<T, proto::plan::LessEqual, filter_type> func;
|
||||||
func(data, size, val, res);
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Equal: {
|
case proto::plan::Equal: {
|
||||||
UnaryElementFunc<T, proto::plan::Equal> func;
|
UnaryElementFunc<T, proto::plan::Equal, filter_type> func;
|
||||||
func(data, size, val, res);
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::NotEqual: {
|
case proto::plan::NotEqual: {
|
||||||
UnaryElementFunc<T, proto::plan::NotEqual> func;
|
UnaryElementFunc<T, proto::plan::NotEqual, filter_type> func;
|
||||||
func(data, size, val, res);
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::PrefixMatch: {
|
case proto::plan::PrefixMatch: {
|
||||||
UnaryElementFunc<T, proto::plan::PrefixMatch> func;
|
UnaryElementFunc<T, proto::plan::PrefixMatch, filter_type> func;
|
||||||
func(data, size, val, res);
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case proto::plan::Match: {
|
case proto::plan::Match: {
|
||||||
UnaryElementFunc<T, proto::plan::Match> func;
|
UnaryElementFunc<T, proto::plan::Match, filter_type> func;
|
||||||
func(data, size, val, res);
|
func(data, size, val, res, offsets);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -882,20 +1041,32 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData() {
|
|||||||
// but to mask res with valid_data after the batch operation.
|
// but to mask res with valid_data after the batch operation.
|
||||||
if (valid_data != nullptr) {
|
if (valid_data != nullptr) {
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
if (!valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (!valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
auto skip_index_func = [expr_type, val](const SkipIndex& skip_index,
|
||||||
FieldId field_id,
|
FieldId field_id,
|
||||||
int64_t chunk_id) {
|
int64_t chunk_id) {
|
||||||
return skip_index.CanSkipUnaryRange<T>(
|
return skip_index.CanSkipUnaryRange<T>(
|
||||||
field_id, chunk_id, expr_type, val);
|
field_id, chunk_id, expr_type, val);
|
||||||
};
|
};
|
||||||
int64_t processed_size = ProcessDataChunks<T>(
|
|
||||||
execute_sub_batch, skip_index_func, res, valid_res, val);
|
int64_t processed_size;
|
||||||
|
if (has_offset_input_) {
|
||||||
|
processed_size = ProcessDataByOffsets<T>(
|
||||||
|
execute_sub_batch, skip_index_func, input, res, valid_res, val);
|
||||||
|
} else {
|
||||||
|
processed_size = ProcessDataChunks<T>(
|
||||||
|
execute_sub_batch, skip_index_func, res, valid_res, val);
|
||||||
|
}
|
||||||
AssertInfo(processed_size == real_batch_size,
|
AssertInfo(processed_size == real_batch_size,
|
||||||
"internal error: expr processed rows {} not equal "
|
"internal error: expr processed rows {} not equal "
|
||||||
"expect batch size {}, related params[active_count:{}, "
|
"expect batch size {}, related params[active_count:{}, "
|
||||||
|
|||||||
@ -33,7 +33,7 @@
|
|||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace exec {
|
namespace exec {
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, FilterType filter_type>
|
||||||
struct UnaryElementFuncForMatch {
|
struct UnaryElementFuncForMatch {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
@ -43,58 +43,68 @@ struct UnaryElementFuncForMatch {
|
|||||||
operator()(const T* src,
|
operator()(const T* src,
|
||||||
size_t size,
|
size_t size,
|
||||||
IndexInnerType val,
|
IndexInnerType val,
|
||||||
TargetBitmapView res) {
|
TargetBitmapView res,
|
||||||
|
int64_t* offsets = nullptr) {
|
||||||
PatternMatchTranslator translator;
|
PatternMatchTranslator translator;
|
||||||
auto regex_pattern = translator(val);
|
auto regex_pattern = translator(val);
|
||||||
RegexMatcher matcher(regex_pattern);
|
RegexMatcher matcher(regex_pattern);
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
res[i] = matcher(src[i]);
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
res[i] = matcher(src[offsets ? offsets[i] : i]);
|
||||||
|
} else {
|
||||||
|
res[i] = matcher(src[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, proto::plan::OpType op>
|
template <typename T, proto::plan::OpType op, FilterType filter_type>
|
||||||
struct UnaryElementFunc {
|
struct UnaryElementFunc {
|
||||||
typedef std::
|
typedef std::
|
||||||
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
|
||||||
IndexInnerType;
|
IndexInnerType;
|
||||||
|
|
||||||
void
|
void
|
||||||
operator()(const T* src,
|
operator()(const T* src,
|
||||||
size_t size,
|
size_t size,
|
||||||
IndexInnerType val,
|
IndexInnerType val,
|
||||||
TargetBitmapView res) {
|
TargetBitmapView res,
|
||||||
|
const int32_t* offsets = nullptr) {
|
||||||
if constexpr (op == proto::plan::OpType::Match) {
|
if constexpr (op == proto::plan::OpType::Match) {
|
||||||
UnaryElementFuncForMatch<T> func;
|
UnaryElementFuncForMatch<T, filter_type> func;
|
||||||
func(src, size, val, res);
|
func(src, size, val, res);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
// This is the original code, which is kept for the documentation purposes
|
// This is the original code, which is kept for the documentation purposes
|
||||||
for (int i = 0; i < size; ++i) {
|
// also, for iterative filter
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (filter_type == FilterType::random) {
|
||||||
res[i] = src[i] == val;
|
for (int i = 0; i < size; ++i) {
|
||||||
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
auto offset = (offsets != nullptr) ? offsets[i] : i;
|
||||||
res[i] = src[i] != val;
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
res[i] = src[offset] == val;
|
||||||
res[i] = src[i] > val;
|
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
||||||
} else if constexpr (op == proto::plan::OpType::LessThan) {
|
res[i] = src[offset] != val;
|
||||||
res[i] = src[i] < val;
|
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
||||||
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
|
res[i] = src[offset] > val;
|
||||||
res[i] = src[i] >= val;
|
} else if constexpr (op == proto::plan::OpType::LessThan) {
|
||||||
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
res[i] = src[offset] < val;
|
||||||
res[i] = src[i] <= val;
|
} else if constexpr (op == proto::plan::OpType::GreaterEqual) {
|
||||||
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
res[i] = src[offset] >= val;
|
||||||
res[i] = milvus::query::Match(
|
} else if constexpr (op == proto::plan::OpType::LessEqual) {
|
||||||
src[i], val, proto::plan::OpType::PrefixMatch);
|
res[i] = src[offset] <= val;
|
||||||
} else {
|
} else if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
||||||
PanicInfo(
|
res[i] = milvus::query::Match(
|
||||||
OpTypeInvalid,
|
src[offset], val, proto::plan::OpType::PrefixMatch);
|
||||||
fmt::format("unsupported op_type:{} for UnaryElementFunc",
|
} else {
|
||||||
op));
|
PanicInfo(
|
||||||
|
OpTypeInvalid,
|
||||||
|
fmt::format(
|
||||||
|
"unsupported op_type:{} for UnaryElementFunc", op));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
if constexpr (op == proto::plan::OpType::PrefixMatch) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
@ -141,7 +151,7 @@ struct UnaryElementFunc {
|
|||||||
} \
|
} \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
template <typename ValueType, proto::plan::OpType op>
|
template <typename ValueType, proto::plan::OpType op, FilterType filter_type>
|
||||||
struct UnaryElementFuncForArray {
|
struct UnaryElementFuncForArray {
|
||||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||||
std::string_view,
|
std::string_view,
|
||||||
@ -153,32 +163,39 @@ struct UnaryElementFuncForArray {
|
|||||||
ValueType val,
|
ValueType val,
|
||||||
int index,
|
int index,
|
||||||
TargetBitmapView res,
|
TargetBitmapView res,
|
||||||
TargetBitmapView valid_res) {
|
TargetBitmapView valid_res,
|
||||||
|
const int32_t* offsets = nullptr) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
if (valid_data != nullptr && !valid_data[i]) {
|
auto offset = i;
|
||||||
|
if constexpr (filter_type == FilterType::random) {
|
||||||
|
offset = (offsets) ? offsets[i] : i;
|
||||||
|
}
|
||||||
|
if (valid_data != nullptr && !valid_data[offset]) {
|
||||||
res[i] = valid_res[i] = false;
|
res[i] = valid_res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if constexpr (op == proto::plan::OpType::Equal) {
|
if constexpr (op == proto::plan::OpType::Equal) {
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = src[i].is_same_array(val);
|
res[i] = src[offset].is_same_array(val);
|
||||||
} else {
|
} else {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto array_data = src[i].template get_data<GetType>(index);
|
auto array_data =
|
||||||
|
src[offset].template get_data<GetType>(index);
|
||||||
res[i] = array_data == val;
|
res[i] = array_data == val;
|
||||||
}
|
}
|
||||||
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
} else if constexpr (op == proto::plan::OpType::NotEqual) {
|
||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = !src[i].is_same_array(val);
|
res[i] = !src[offset].is_same_array(val);
|
||||||
} else {
|
} else {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto array_data = src[i].template get_data<GetType>(index);
|
auto array_data =
|
||||||
|
src[offset].template get_data<GetType>(index);
|
||||||
res[i] = array_data != val;
|
res[i] = array_data != val;
|
||||||
}
|
}
|
||||||
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
} else if constexpr (op == proto::plan::OpType::GreaterThan) {
|
||||||
@ -195,14 +212,15 @@ struct UnaryElementFuncForArray {
|
|||||||
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
if constexpr (std::is_same_v<GetType, proto::plan::Array>) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
} else {
|
} else {
|
||||||
if (index >= src[i].length()) {
|
if (index >= src[offset].length()) {
|
||||||
res[i] = false;
|
res[i] = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
PatternMatchTranslator translator;
|
PatternMatchTranslator translator;
|
||||||
auto regex_pattern = translator(val);
|
auto regex_pattern = translator(val);
|
||||||
RegexMatcher matcher(regex_pattern);
|
RegexMatcher matcher(regex_pattern);
|
||||||
auto array_data = src[i].template get_data<GetType>(index);
|
auto array_data =
|
||||||
|
src[offset].template get_data<GetType>(index);
|
||||||
res[i] = matcher(array_data);
|
res[i] = matcher(array_data);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -313,10 +331,18 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||||||
void
|
void
|
||||||
Eval(EvalCtx& context, VectorPtr& result) override;
|
Eval(EvalCtx& context, VectorPtr& result) override;
|
||||||
|
|
||||||
|
bool
|
||||||
|
SupportOffsetInput() override {
|
||||||
|
if (expr_->op_type_ == proto::plan::OpType::TextMatch) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImpl();
|
ExecRangeVisitorImpl(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
@ -324,15 +350,15 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplForData();
|
ExecRangeVisitorImplForData(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplJson();
|
ExecRangeVisitorImplJson(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename ExprValueType>
|
template <typename ExprValueType>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
ExecRangeVisitorImplArray();
|
ExecRangeVisitorImplArray(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VectorPtr
|
VectorPtr
|
||||||
@ -345,7 +371,7 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||||||
// Check overflow and cache result for performace
|
// Check overflow and cache result for performace
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnVectorPtr
|
ColumnVectorPtr
|
||||||
PreCheckOverflow();
|
PreCheckOverflow(OffsetVector* input = nullptr);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool
|
bool
|
||||||
|
|||||||
@ -22,9 +22,13 @@ namespace exec {
|
|||||||
|
|
||||||
void
|
void
|
||||||
PhyValueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
PhyValueExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||||
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
|
auto input = context.get_offset_input();
|
||||||
? active_count_ - current_pos_
|
SetHasOffsetInput((input != nullptr));
|
||||||
: batch_size_;
|
int64_t real_batch_size = has_offset_input_
|
||||||
|
? input->size()
|
||||||
|
: (current_pos_ + batch_size_ >= active_count_
|
||||||
|
? active_count_ - current_pos_
|
||||||
|
: batch_size_);
|
||||||
|
|
||||||
if (real_batch_size == 0) {
|
if (real_batch_size == 0) {
|
||||||
result = nullptr;
|
result = nullptr;
|
||||||
|
|||||||
@ -49,11 +49,14 @@ class PhyValueExpr : public Expr {
|
|||||||
|
|
||||||
void
|
void
|
||||||
MoveCursor() override {
|
MoveCursor() override {
|
||||||
int64_t real_batch_size = current_pos_ + batch_size_ >= active_count_
|
if (!has_offset_input_) {
|
||||||
? active_count_ - current_pos_
|
int64_t real_batch_size =
|
||||||
: batch_size_;
|
current_pos_ + batch_size_ >= active_count_
|
||||||
|
? active_count_ - current_pos_
|
||||||
|
: batch_size_;
|
||||||
|
|
||||||
current_pos_ += real_batch_size;
|
current_pos_ += real_batch_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -64,8 +64,7 @@ PhyFilterBitsNode::GetOutput() {
|
|||||||
std::chrono::high_resolution_clock::time_point scalar_start =
|
std::chrono::high_resolution_clock::time_point scalar_start =
|
||||||
std::chrono::high_resolution_clock::now();
|
std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
EvalCtx eval_ctx(
|
EvalCtx eval_ctx(operator_context_->get_exec_context(), exprs_.get());
|
||||||
operator_context_->get_exec_context(), exprs_.get(), input_.get());
|
|
||||||
|
|
||||||
TargetBitmap bitset;
|
TargetBitmap bitset;
|
||||||
TargetBitmap valid_bitset;
|
TargetBitmap valid_bitset;
|
||||||
|
|||||||
273
internal/core/src/exec/operator/IterativeFilterNode.cpp
Normal file
273
internal/core/src/exec/operator/IterativeFilterNode.cpp
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "IterativeFilterNode.h"
|
||||||
|
|
||||||
|
namespace milvus {
|
||||||
|
namespace exec {
|
||||||
|
PhyIterativeFilterNode::PhyIterativeFilterNode(
|
||||||
|
int32_t operator_id,
|
||||||
|
DriverContext* driverctx,
|
||||||
|
const std::shared_ptr<const plan::FilterNode>& filter)
|
||||||
|
: Operator(driverctx,
|
||||||
|
filter->output_type(),
|
||||||
|
operator_id,
|
||||||
|
filter->id(),
|
||||||
|
"PhyIterativeFilterNode") {
|
||||||
|
ExecContext* exec_context = operator_context_->get_exec_context();
|
||||||
|
query_context_ = exec_context->get_query_context();
|
||||||
|
std::vector<expr::TypedExprPtr> filters;
|
||||||
|
filters.emplace_back(filter->filter());
|
||||||
|
exprs_ = std::make_unique<ExprSet>(filters, exec_context);
|
||||||
|
const auto& exprs = exprs_->exprs();
|
||||||
|
for (const auto& expr : exprs) {
|
||||||
|
is_native_supported_ =
|
||||||
|
(is_native_supported_ && (expr->SupportOffsetInput()));
|
||||||
|
}
|
||||||
|
need_process_rows_ = query_context_->get_active_count();
|
||||||
|
num_processed_rows_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
PhyIterativeFilterNode::AddInput(RowVectorPtr& input) {
|
||||||
|
input_ = std::move(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
PhyIterativeFilterNode::IsFinished() {
|
||||||
|
return is_finished_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool large_is_better>
|
||||||
|
inline size_t
|
||||||
|
find_binsert_position(const std::vector<float>& distances,
|
||||||
|
size_t lo,
|
||||||
|
size_t hi,
|
||||||
|
float dist) {
|
||||||
|
while (lo < hi) {
|
||||||
|
size_t mid = lo + ((hi - lo) >> 1);
|
||||||
|
if constexpr (large_is_better) {
|
||||||
|
if (distances[mid] < dist) {
|
||||||
|
hi = mid;
|
||||||
|
} else {
|
||||||
|
lo = mid + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (distances[mid] > dist) {
|
||||||
|
hi = mid;
|
||||||
|
} else {
|
||||||
|
lo = mid + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lo;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
insert_helper(milvus::SearchResult& search_result,
|
||||||
|
int& topk,
|
||||||
|
const bool large_is_better,
|
||||||
|
const FixedVector<float>& distances,
|
||||||
|
const FixedVector<int32_t>& offsets,
|
||||||
|
const int64_t nq_index,
|
||||||
|
const int64_t unity_topk,
|
||||||
|
const int i) {
|
||||||
|
auto pos = large_is_better
|
||||||
|
? find_binsert_position<true>(search_result.distances_,
|
||||||
|
nq_index * unity_topk,
|
||||||
|
nq_index * unity_topk + topk,
|
||||||
|
distances[i])
|
||||||
|
: find_binsert_position<false>(search_result.distances_,
|
||||||
|
nq_index * unity_topk,
|
||||||
|
nq_index * unity_topk + topk,
|
||||||
|
distances[i]);
|
||||||
|
if (topk > pos) {
|
||||||
|
std::memmove(&search_result.distances_[pos + 1],
|
||||||
|
&search_result.distances_[pos],
|
||||||
|
(topk - pos) * sizeof(float));
|
||||||
|
std::memmove(&search_result.seg_offsets_[pos + 1],
|
||||||
|
&search_result.seg_offsets_[pos],
|
||||||
|
(topk - pos) * sizeof(int64_t));
|
||||||
|
}
|
||||||
|
search_result.seg_offsets_[pos] = offsets[i];
|
||||||
|
search_result.distances_[pos] = distances[i];
|
||||||
|
++topk;
|
||||||
|
}
|
||||||
|
|
||||||
|
RowVectorPtr
|
||||||
|
PhyIterativeFilterNode::GetOutput() {
|
||||||
|
if (is_finished_ || !no_more_input_) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
DeferLambda([&]() { is_finished_ = true; });
|
||||||
|
|
||||||
|
if (input_ == nullptr) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::high_resolution_clock::time_point scalar_start =
|
||||||
|
std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
milvus::SearchResult search_result = query_context_->get_search_result();
|
||||||
|
int64_t nq = search_result.total_nq_;
|
||||||
|
int64_t unity_topk = search_result.unity_topK_;
|
||||||
|
knowhere::MetricType metric_type = query_context_->get_metric_type();
|
||||||
|
bool large_is_better = PositivelyRelated(metric_type);
|
||||||
|
TargetBitmap bitset;
|
||||||
|
// get bitset of whole segment first
|
||||||
|
if (!is_native_supported_) {
|
||||||
|
EvalCtx eval_ctx(operator_context_->get_exec_context(), exprs_.get());
|
||||||
|
|
||||||
|
TargetBitmap valid_bitset;
|
||||||
|
while (num_processed_rows_ < need_process_rows_) {
|
||||||
|
exprs_->Eval(0, 1, true, eval_ctx, results_);
|
||||||
|
|
||||||
|
AssertInfo(
|
||||||
|
results_.size() == 1 && results_[0] != nullptr,
|
||||||
|
"PhyIterativeFilterNode result size should be size one and not "
|
||||||
|
"be nullptr");
|
||||||
|
|
||||||
|
if (auto col_vec =
|
||||||
|
std::dynamic_pointer_cast<ColumnVector>(results_[0])) {
|
||||||
|
if (col_vec->IsBitmap()) {
|
||||||
|
auto col_vec_size = col_vec->size();
|
||||||
|
TargetBitmapView view(col_vec->GetRawData(), col_vec_size);
|
||||||
|
bitset.append(view);
|
||||||
|
TargetBitmapView valid_view(col_vec->GetValidRawData(),
|
||||||
|
col_vec_size);
|
||||||
|
valid_bitset.append(valid_view);
|
||||||
|
num_processed_rows_ += col_vec_size;
|
||||||
|
} else {
|
||||||
|
PanicInfo(ExprInvalid,
|
||||||
|
"PhyIterativeFilterNode result should be bitmap");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PanicInfo(
|
||||||
|
ExprInvalid,
|
||||||
|
"PhyIterativeFilterNode result should be ColumnVector");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert(bitset.size() == need_process_rows_);
|
||||||
|
Assert(valid_bitset.size() == need_process_rows_);
|
||||||
|
}
|
||||||
|
if (search_result.vector_iterators_.has_value()) {
|
||||||
|
AssertInfo(search_result.vector_iterators_.value().size() ==
|
||||||
|
search_result.total_nq_,
|
||||||
|
"Vector Iterators' count must be equal to total_nq_, Check "
|
||||||
|
"your code");
|
||||||
|
int nq_index = 0;
|
||||||
|
|
||||||
|
AssertInfo(nq = search_result.vector_iterators_.value().size(),
|
||||||
|
"nq and iterator not equal size");
|
||||||
|
search_result.seg_offsets_.resize(nq * unity_topk, INVALID_SEG_OFFSET);
|
||||||
|
search_result.distances_.resize(nq * unity_topk);
|
||||||
|
for (auto& iterator : search_result.vector_iterators_.value()) {
|
||||||
|
EvalCtx eval_ctx(operator_context_->get_exec_context(),
|
||||||
|
exprs_.get());
|
||||||
|
int topk = 0;
|
||||||
|
while (iterator->HasNext() && topk < unity_topk) {
|
||||||
|
FixedVector<int32_t> offsets;
|
||||||
|
FixedVector<float> distances;
|
||||||
|
// remain unfilled size as iterator batch size
|
||||||
|
int64_t batch_size = unity_topk - topk;
|
||||||
|
offsets.reserve(batch_size);
|
||||||
|
distances.reserve(batch_size);
|
||||||
|
while (iterator->HasNext()) {
|
||||||
|
auto offset_dis_pair = iterator->Next();
|
||||||
|
AssertInfo(
|
||||||
|
offset_dis_pair.has_value(),
|
||||||
|
"Wrong state! iterator cannot return valid result "
|
||||||
|
"whereas it still"
|
||||||
|
"tells hasNext, terminate operation");
|
||||||
|
auto offset = offset_dis_pair.value().first;
|
||||||
|
auto dis = offset_dis_pair.value().second;
|
||||||
|
offsets.emplace_back(offset);
|
||||||
|
distances.emplace_back(dis);
|
||||||
|
if (offsets.size() == batch_size) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_native_supported_) {
|
||||||
|
eval_ctx.set_offset_input(&offsets);
|
||||||
|
std::vector<VectorPtr> results;
|
||||||
|
exprs_->Eval(0, 1, true, eval_ctx, results);
|
||||||
|
AssertInfo(
|
||||||
|
results.size() == 1 && results[0] != nullptr,
|
||||||
|
"PhyIterativeFilterNode result size should be size "
|
||||||
|
"one and not "
|
||||||
|
"be nullptr");
|
||||||
|
|
||||||
|
auto col_vec =
|
||||||
|
std::dynamic_pointer_cast<ColumnVector>(results[0]);
|
||||||
|
auto col_vec_size = col_vec->size();
|
||||||
|
TargetBitmapView bitsetview(col_vec->GetRawData(),
|
||||||
|
col_vec_size);
|
||||||
|
Assert(bitsetview.size() <= batch_size);
|
||||||
|
Assert(bitsetview.size() == offsets.size());
|
||||||
|
for (auto i = 0; i < offsets.size(); ++i) {
|
||||||
|
if (bitsetview[i] > 0) {
|
||||||
|
insert_helper(search_result,
|
||||||
|
topk,
|
||||||
|
large_is_better,
|
||||||
|
distances,
|
||||||
|
offsets,
|
||||||
|
nq_index,
|
||||||
|
unity_topk,
|
||||||
|
i);
|
||||||
|
if (topk == unity_topk) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto i = 0; i < offsets.size(); ++i) {
|
||||||
|
if (bitset[offsets[i]] > 0) {
|
||||||
|
insert_helper(search_result,
|
||||||
|
topk,
|
||||||
|
large_is_better,
|
||||||
|
distances,
|
||||||
|
offsets,
|
||||||
|
nq_index,
|
||||||
|
unity_topk,
|
||||||
|
i);
|
||||||
|
if (topk == unity_topk) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (topk == unity_topk) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nq_index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
query_context_->set_search_result(std::move(search_result));
|
||||||
|
std::chrono::high_resolution_clock::time_point scalar_end =
|
||||||
|
std::chrono::high_resolution_clock::now();
|
||||||
|
double scalar_cost =
|
||||||
|
std::chrono::duration<double, std::micro>(scalar_end - scalar_start)
|
||||||
|
.count();
|
||||||
|
monitor::internal_core_search_latency_iterative_filter.Observe(scalar_cost /
|
||||||
|
1000);
|
||||||
|
|
||||||
|
return input_;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace exec
|
||||||
|
} // namespace milvus
|
||||||
83
internal/core/src/exec/operator/IterativeFilterNode.h
Normal file
83
internal/core/src/exec/operator/IterativeFilterNode.h
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "exec/Driver.h"
|
||||||
|
#include "exec/expression/Expr.h"
|
||||||
|
#include "exec/operator/Operator.h"
|
||||||
|
#include "exec/QueryContext.h"
|
||||||
|
|
||||||
|
// difference between FilterBitsNode and IterativeFilterNode is that
|
||||||
|
// FilterBitsNode will go through whole segment and return bitset to indicate which offset is filtered out or not
|
||||||
|
// IterativeFilterNode will accept offsets array and execute over these and generate result valid offsets
|
||||||
|
namespace milvus {
|
||||||
|
namespace exec {
|
||||||
|
class PhyIterativeFilterNode : public Operator {
|
||||||
|
public:
|
||||||
|
PhyIterativeFilterNode(
|
||||||
|
int32_t operator_id,
|
||||||
|
DriverContext* ctx,
|
||||||
|
const std::shared_ptr<const plan::FilterNode>& filter);
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsFilter() override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
NeedInput() const override {
|
||||||
|
return !is_finished_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AddInput(RowVectorPtr& input) override;
|
||||||
|
|
||||||
|
RowVectorPtr
|
||||||
|
GetOutput() override;
|
||||||
|
|
||||||
|
bool
|
||||||
|
IsFinished() override;
|
||||||
|
|
||||||
|
void
|
||||||
|
Close() override {
|
||||||
|
Operator::Close();
|
||||||
|
exprs_->Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
BlockingReason
|
||||||
|
IsBlocked(ContinueFuture* /* unused */) override {
|
||||||
|
return BlockingReason::kNotBlocked;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual std::string
|
||||||
|
ToString() const override {
|
||||||
|
return "PhyIterativeFilterNode";
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<ExprSet> exprs_;
|
||||||
|
QueryContext* query_context_;
|
||||||
|
int64_t num_processed_rows_;
|
||||||
|
int64_t need_process_rows_;
|
||||||
|
bool is_finished_{false};
|
||||||
|
bool is_native_supported_{true};
|
||||||
|
};
|
||||||
|
} // namespace exec
|
||||||
|
} // namespace milvus
|
||||||
101
internal/core/src/exec/operator/Utils.h
Normal file
101
internal/core/src/exec/operator/Utils.h
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/QueryInfo.h"
|
||||||
|
#include "knowhere/index/index_node.h"
|
||||||
|
#include "segcore/SegmentInterface.h"
|
||||||
|
#include "segcore/SegmentGrowingImpl.h"
|
||||||
|
#include "segcore/SegmentSealedImpl.h"
|
||||||
|
#include "segcore/ConcurrentVector.h"
|
||||||
|
#include "common/Span.h"
|
||||||
|
#include "query/Utils.h"
|
||||||
|
#include "common/EasyAssert.h"
|
||||||
|
|
||||||
|
namespace milvus {
|
||||||
|
namespace exec {
|
||||||
|
|
||||||
|
static bool
|
||||||
|
UseVectorIterator(const SearchInfo& search_info) {
|
||||||
|
return search_info.group_by_field_id_.has_value() ||
|
||||||
|
search_info.iterative_filter_execution;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
PrepareVectorIteratorsFromIndex(const SearchInfo& search_info,
|
||||||
|
int nq,
|
||||||
|
const DatasetPtr dataset,
|
||||||
|
SearchResult& search_result,
|
||||||
|
const BitsetView& bitset,
|
||||||
|
const index::VectorIndex& index) {
|
||||||
|
// when we use group by, we will use vector iterator to continously get results and group on them
|
||||||
|
// when we use iterative filtered search, we will use vector iterator to continously get results and check scalar attr on them
|
||||||
|
// until we get valid topk results
|
||||||
|
if (UseVectorIterator(search_info)) {
|
||||||
|
try {
|
||||||
|
auto search_conf = index.PrepareSearchParams(search_info);
|
||||||
|
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
||||||
|
iterators_val =
|
||||||
|
index.VectorIterators(dataset, search_conf, bitset);
|
||||||
|
if (iterators_val.has_value()) {
|
||||||
|
search_result.AssembleChunkVectorIterators(
|
||||||
|
nq, 1, {0}, iterators_val.value());
|
||||||
|
} else {
|
||||||
|
std::string operator_type = "";
|
||||||
|
if (search_info.group_by_field_id_.has_value()) {
|
||||||
|
operator_type = "group_by";
|
||||||
|
} else {
|
||||||
|
operator_type = "iterative filter";
|
||||||
|
}
|
||||||
|
LOG_ERROR(
|
||||||
|
"Returned knowhere iterator has non-ready iterators "
|
||||||
|
"inside, terminate {} operation:{}",
|
||||||
|
operator_type,
|
||||||
|
knowhere::Status2String(iterators_val.error()));
|
||||||
|
PanicInfo(
|
||||||
|
ErrorCode::Unsupported,
|
||||||
|
fmt::format(
|
||||||
|
"Returned knowhere iterator has non-ready iterators "
|
||||||
|
"inside, terminate {} operation",
|
||||||
|
operator_type));
|
||||||
|
}
|
||||||
|
search_result.total_nq_ = dataset->GetRows();
|
||||||
|
search_result.unity_topK_ = search_info.topk_;
|
||||||
|
} catch (const std::runtime_error& e) {
|
||||||
|
std::string operator_type = "";
|
||||||
|
if (search_info.group_by_field_id_.has_value()) {
|
||||||
|
operator_type = "group_by";
|
||||||
|
} else {
|
||||||
|
operator_type = "iterative filter";
|
||||||
|
}
|
||||||
|
LOG_ERROR(
|
||||||
|
"Caught error:{} when trying to initialize ann iterators for "
|
||||||
|
"{}: "
|
||||||
|
"operation will be terminated",
|
||||||
|
e.what(),
|
||||||
|
operator_type);
|
||||||
|
PanicInfo(ErrorCode::Unsupported,
|
||||||
|
fmt::format("Failed to {}, current index:" +
|
||||||
|
index.GetIndexType() + " doesn't support",
|
||||||
|
operator_type));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} // namespace exec
|
||||||
|
} // namespace milvus
|
||||||
@ -86,6 +86,7 @@ PhyVectorSearchNode::GetOutput() {
|
|||||||
query_timestamp_,
|
query_timestamp_,
|
||||||
final_view,
|
final_view,
|
||||||
search_result);
|
search_result);
|
||||||
|
|
||||||
search_result.total_data_cnt_ = final_view.size();
|
search_result.total_data_cnt_ = final_view.size();
|
||||||
query_context_->set_search_result(std::move(search_result));
|
query_context_->set_search_result(std::move(search_result));
|
||||||
std::chrono::high_resolution_clock::time_point vector_end =
|
std::chrono::high_resolution_clock::time_point vector_end =
|
||||||
|
|||||||
@ -125,49 +125,6 @@ GetDataGetter(const segcore::SegmentInternalInterface& segment,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
|
||||||
PrepareVectorIteratorsFromIndex(const SearchInfo& search_info,
|
|
||||||
int nq,
|
|
||||||
const DatasetPtr dataset,
|
|
||||||
SearchResult& search_result,
|
|
||||||
const BitsetView& bitset,
|
|
||||||
const index::VectorIndex& index) {
|
|
||||||
if (search_info.group_by_field_id_.has_value()) {
|
|
||||||
try {
|
|
||||||
auto search_conf = index.PrepareSearchParams(search_info);
|
|
||||||
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
|
||||||
iterators_val =
|
|
||||||
index.VectorIterators(dataset, search_conf, bitset);
|
|
||||||
if (iterators_val.has_value()) {
|
|
||||||
search_result.AssembleChunkVectorIterators(
|
|
||||||
nq, 1, {0}, iterators_val.value());
|
|
||||||
} else {
|
|
||||||
LOG_ERROR(
|
|
||||||
"Returned knowhere iterator has non-ready iterators "
|
|
||||||
"inside, terminate group_by operation:{}",
|
|
||||||
knowhere::Status2String(iterators_val.error()));
|
|
||||||
PanicInfo(ErrorCode::Unsupported,
|
|
||||||
"Returned knowhere iterator has non-ready iterators "
|
|
||||||
"inside, terminate group_by operation");
|
|
||||||
}
|
|
||||||
search_result.total_nq_ = dataset->GetRows();
|
|
||||||
search_result.unity_topK_ = search_info.topk_;
|
|
||||||
} catch (const std::runtime_error& e) {
|
|
||||||
LOG_ERROR(
|
|
||||||
"Caught error:{} when trying to initialize ann iterators for "
|
|
||||||
"group_by: "
|
|
||||||
"group_by operation will be terminated",
|
|
||||||
e.what());
|
|
||||||
PanicInfo(
|
|
||||||
ErrorCode::Unsupported,
|
|
||||||
"Failed to groupBy, current index:" + index.GetIndexType() +
|
|
||||||
" doesn't support search_group_by");
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
SearchGroupBy(const std::vector<std::shared_ptr<VectorIterator>>& iterators,
|
SearchGroupBy(const std::vector<std::shared_ptr<VectorIterator>>& iterators,
|
||||||
const SearchInfo& searchInfo,
|
const SearchInfo& searchInfo,
|
||||||
|
|||||||
@ -155,6 +155,13 @@ class ChunkedColumnBase : public ColumnBase {
|
|||||||
"StringViews only supported for VariableColumn");
|
"StringViews only supported for VariableColumn");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
ViewsByOffsets(int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const {
|
||||||
|
PanicInfo(ErrorCode::Unsupported,
|
||||||
|
"viewsbyoffsets only supported for VariableColumn");
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<size_t, size_t>
|
std::pair<size_t, size_t>
|
||||||
GetChunkIDByOffset(int64_t offset) const {
|
GetChunkIDByOffset(int64_t offset) const {
|
||||||
AssertInfo(offset < num_rows_,
|
AssertInfo(offset < num_rows_,
|
||||||
@ -333,6 +340,13 @@ class ChunkedVariableColumn : public ChunkedColumnBase {
|
|||||||
return chunks_[chunk_id];
|
return chunks_[chunk_id];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
ViewsByOffsets(int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const override {
|
||||||
|
return std::dynamic_pointer_cast<StringChunk>(chunks_[chunk_id])
|
||||||
|
->ViewsByOffsets(offsets);
|
||||||
|
}
|
||||||
|
|
||||||
BufferView
|
BufferView
|
||||||
GetBatchBuffer(int64_t chunk_id,
|
GetBatchBuffer(int64_t chunk_id,
|
||||||
int64_t start_offset,
|
int64_t start_offset,
|
||||||
|
|||||||
@ -323,6 +323,12 @@ class SingleChunkColumnBase : public ColumnBase {
|
|||||||
"StringViews only supported for VariableColumn");
|
"StringViews only supported for VariableColumn");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
ViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||||
|
PanicInfo(ErrorCode::Unsupported,
|
||||||
|
"viewsbyoffsets only supported for VariableColumn");
|
||||||
|
}
|
||||||
|
|
||||||
virtual void
|
virtual void
|
||||||
AppendBatch(const FieldDataPtr data) {
|
AppendBatch(const FieldDataPtr data) {
|
||||||
size_t required_size = data_size_ + data->DataSize();
|
size_t required_size = data_size_ + data->DataSize();
|
||||||
@ -698,6 +704,19 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase {
|
|||||||
return std::make_pair(res, valid_data_);
|
return std::make_pair(res, valid_data_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
ViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||||
|
std::vector<std::string_view> res;
|
||||||
|
FixedVector<bool> valid;
|
||||||
|
res.reserve(offsets.size());
|
||||||
|
valid.reserve(offsets.size());
|
||||||
|
for (size_t i = 0; i < offsets.size(); ++i) {
|
||||||
|
res.emplace_back(RawAt(offsets[i]));
|
||||||
|
valid.emplace_back(IsValid(offsets[i]));
|
||||||
|
}
|
||||||
|
return {res, valid};
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::vector<ViewType>
|
[[nodiscard]] std::vector<ViewType>
|
||||||
Views() const {
|
Views() const {
|
||||||
std::vector<ViewType> res;
|
std::vector<ViewType> res;
|
||||||
|
|||||||
@ -177,6 +177,8 @@ std::map<std::string, std::string> vectorLatencyLabels{
|
|||||||
{"type", "vector_latency"}};
|
{"type", "vector_latency"}};
|
||||||
std::map<std::string, std::string> groupbyLatencyLabels{
|
std::map<std::string, std::string> groupbyLatencyLabels{
|
||||||
{"type", "groupby_latency"}};
|
{"type", "groupby_latency"}};
|
||||||
|
std::map<std::string, std::string> iterativeFilterLatencyLabels{
|
||||||
|
{"type", "iterative_filter_latency"}};
|
||||||
std::map<std::string, std::string> scalarProportionLabels{
|
std::map<std::string, std::string> scalarProportionLabels{
|
||||||
{"type", "scalar_proportion"}};
|
{"type", "scalar_proportion"}};
|
||||||
DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency,
|
DEFINE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency,
|
||||||
@ -190,6 +192,9 @@ DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector,
|
|||||||
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby,
|
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby,
|
||||||
internal_core_search_latency,
|
internal_core_search_latency,
|
||||||
groupbyLatencyLabels)
|
groupbyLatencyLabels)
|
||||||
|
DEFINE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_iterative_filter,
|
||||||
|
internal_core_search_latency,
|
||||||
|
iterativeFilterLatencyLabels)
|
||||||
DEFINE_PROMETHEUS_HISTOGRAM_WITH_BUCKETS(
|
DEFINE_PROMETHEUS_HISTOGRAM_WITH_BUCKETS(
|
||||||
internal_core_search_latency_scalar_proportion,
|
internal_core_search_latency_scalar_proportion,
|
||||||
internal_core_search_latency,
|
internal_core_search_latency,
|
||||||
|
|||||||
@ -136,6 +136,7 @@ DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_core_search_latency);
|
|||||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar);
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar);
|
||||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector);
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_vector);
|
||||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby);
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_groupby);
|
||||||
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_iterative_filter);
|
||||||
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar_proportion);
|
DECLARE_PROMETHEUS_HISTOGRAM(internal_core_search_latency_scalar_proportion);
|
||||||
|
|
||||||
} // namespace milvus::monitor
|
} // namespace milvus::monitor
|
||||||
|
|||||||
@ -53,6 +53,16 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
|
|||||||
nlohmann::json::parse(query_info_proto.search_params());
|
nlohmann::json::parse(query_info_proto.search_params());
|
||||||
search_info.materialized_view_involved =
|
search_info.materialized_view_involved =
|
||||||
query_info_proto.materialized_view_involved();
|
query_info_proto.materialized_view_involved();
|
||||||
|
// currently, iterative filter does not support range search
|
||||||
|
if (!search_info.search_params_.contains(RADIUS)) {
|
||||||
|
search_info.iterative_filter_execution =
|
||||||
|
(query_info_proto.hints() == ITERATIVE_FILTER);
|
||||||
|
if (!search_info.iterative_filter_execution &&
|
||||||
|
search_info.search_params_.contains(HINTS)) {
|
||||||
|
search_info.iterative_filter_execution =
|
||||||
|
(search_info.search_params_[HINTS] == ITERATIVE_FILTER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (query_info_proto.bm25_avgdl() > 0) {
|
if (query_info_proto.bm25_avgdl() > 0) {
|
||||||
search_info.search_params_[knowhere::meta::BM25_AVGDL] =
|
search_info.search_params_[knowhere::meta::BM25_AVGDL] =
|
||||||
@ -94,7 +104,24 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
|
|||||||
|
|
||||||
milvus::plan::PlanNodePtr plannode;
|
milvus::plan::PlanNodePtr plannode;
|
||||||
std::vector<milvus::plan::PlanNodePtr> sources;
|
std::vector<milvus::plan::PlanNodePtr> sources;
|
||||||
if (anns_proto.has_predicates()) {
|
|
||||||
|
// mvcc node -> vector search node -> iterative filter node
|
||||||
|
auto iterative_filter_plan = [&]() {
|
||||||
|
plannode = std::make_shared<milvus::plan::MvccNode>(
|
||||||
|
milvus::plan::GetNextPlanNodeId());
|
||||||
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
|
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
|
||||||
|
milvus::plan::GetNextPlanNodeId(), sources);
|
||||||
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
|
|
||||||
|
auto expr = ParseExprs(anns_proto.predicates());
|
||||||
|
plannode = std::make_shared<plan::FilterNode>(
|
||||||
|
milvus::plan::GetNextPlanNodeId(), expr, sources);
|
||||||
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
|
};
|
||||||
|
|
||||||
|
// pre filter node -> mvcc node -> vector search node
|
||||||
|
auto pre_filter_plan = [&]() {
|
||||||
plannode = std::move(expr_parser());
|
plannode = std::move(expr_parser());
|
||||||
if (plan_node->search_info_.materialized_view_involved) {
|
if (plan_node->search_info_.materialized_view_involved) {
|
||||||
const auto expr_info = plannode->GatherInfo();
|
const auto expr_info = plannode->GatherInfo();
|
||||||
@ -113,16 +140,33 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) {
|
|||||||
materialized_view_search_info;
|
materialized_view_search_info;
|
||||||
}
|
}
|
||||||
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
|
plannode = std::make_shared<milvus::plan::MvccNode>(
|
||||||
|
milvus::plan::GetNextPlanNodeId(), sources);
|
||||||
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
|
|
||||||
|
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
|
||||||
|
milvus::plan::GetNextPlanNodeId(), sources);
|
||||||
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
|
};
|
||||||
|
|
||||||
|
if (anns_proto.has_predicates()) {
|
||||||
|
// currently limit iterative filter scope to search only
|
||||||
|
if (plan_node->search_info_.iterative_filter_execution &&
|
||||||
|
plan_node->search_info_.group_by_field_id_ == std::nullopt) {
|
||||||
|
iterative_filter_plan();
|
||||||
|
} else {
|
||||||
|
pre_filter_plan();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
plannode = std::make_shared<milvus::plan::MvccNode>(
|
||||||
|
milvus::plan::GetNextPlanNodeId(), sources);
|
||||||
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
|
|
||||||
|
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
|
||||||
|
milvus::plan::GetNextPlanNodeId(), sources);
|
||||||
|
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
||||||
}
|
}
|
||||||
|
|
||||||
plannode = std::make_shared<milvus::plan::MvccNode>(
|
|
||||||
milvus::plan::GetNextPlanNodeId(), sources);
|
|
||||||
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
|
||||||
|
|
||||||
plannode = std::make_shared<milvus::plan::VectorSearchNode>(
|
|
||||||
milvus::plan::GetNextPlanNodeId(), sources);
|
|
||||||
sources = std::vector<milvus::plan::PlanNodePtr>{plannode};
|
|
||||||
|
|
||||||
if (plan_node->search_info_.group_by_field_id_ != std::nullopt) {
|
if (plan_node->search_info_.group_by_field_id_ != std::nullopt) {
|
||||||
plannode = std::make_shared<milvus::plan::GroupByNode>(
|
plannode = std::make_shared<milvus::plan::GroupByNode>(
|
||||||
milvus::plan::GetNextPlanNodeId(), sources);
|
milvus::plan::GetNextPlanNodeId(), sources);
|
||||||
|
|||||||
@ -272,12 +272,11 @@ BruteForceSearchIterators(const dataset::SearchDataset& query_ds,
|
|||||||
"equal to nq:{} for single chunk",
|
"equal to nq:{} for single chunk",
|
||||||
iterators_val.value().size(),
|
iterators_val.value().size(),
|
||||||
nq);
|
nq);
|
||||||
SubSearchResult subSearchResult(query_ds.num_queries,
|
return SubSearchResult(query_ds.num_queries,
|
||||||
query_ds.topk,
|
query_ds.topk,
|
||||||
query_ds.metric_type,
|
query_ds.metric_type,
|
||||||
query_ds.round_decimal,
|
query_ds.round_decimal,
|
||||||
iterators_val.value());
|
iterators_val.value());
|
||||||
return std::move(subSearchResult);
|
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR(
|
LOG_ERROR(
|
||||||
"Failed to get valid knowhere brute-force-iterators from chunk, "
|
"Failed to get valid knowhere brute-force-iterators from chunk, "
|
||||||
|
|||||||
@ -20,6 +20,7 @@
|
|||||||
#include "log/Log.h"
|
#include "log/Log.h"
|
||||||
#include "query/SearchBruteForce.h"
|
#include "query/SearchBruteForce.h"
|
||||||
#include "query/SearchOnIndex.h"
|
#include "query/SearchOnIndex.h"
|
||||||
|
#include "exec/operator/Utils.h"
|
||||||
|
|
||||||
namespace milvus::query {
|
namespace milvus::query {
|
||||||
|
|
||||||
@ -138,7 +139,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||||||
|
|
||||||
auto sub_data = query::dataset::RawDataset{
|
auto sub_data = query::dataset::RawDataset{
|
||||||
element_begin, dim, size_per_chunk, chunk_data};
|
element_begin, dim, size_per_chunk, chunk_data};
|
||||||
if (info.group_by_field_id_.has_value()) {
|
if (milvus::exec::UseVectorIterator(info)) {
|
||||||
auto sub_qr = BruteForceSearchIterators(search_dataset,
|
auto sub_qr = BruteForceSearchIterators(search_dataset,
|
||||||
sub_data,
|
sub_data,
|
||||||
info,
|
info,
|
||||||
@ -156,7 +157,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
|||||||
final_qr.merge(sub_qr);
|
final_qr.merge(sub_qr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (info.group_by_field_id_.has_value()) {
|
if (milvus::exec::UseVectorIterator(info)) {
|
||||||
std::vector<int64_t> chunk_rows(max_chunk, 0);
|
std::vector<int64_t> chunk_rows(max_chunk, 0);
|
||||||
for (int i = 1; i < max_chunk; ++i) {
|
for (int i = 1; i < max_chunk; ++i) {
|
||||||
chunk_rows[i] = i * vec_size_per_chunk;
|
chunk_rows[i] = i * vec_size_per_chunk;
|
||||||
|
|||||||
@ -10,7 +10,7 @@
|
|||||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||||
|
|
||||||
#include "SearchOnIndex.h"
|
#include "SearchOnIndex.h"
|
||||||
#include "exec/operator/groupby/SearchGroupByOperator.h"
|
#include "exec/operator/Utils.h"
|
||||||
|
|
||||||
namespace milvus::query {
|
namespace milvus::query {
|
||||||
void
|
void
|
||||||
|
|||||||
@ -21,7 +21,7 @@
|
|||||||
#include "query/SearchBruteForce.h"
|
#include "query/SearchBruteForce.h"
|
||||||
#include "query/SearchOnSealed.h"
|
#include "query/SearchOnSealed.h"
|
||||||
#include "query/helper.h"
|
#include "query/helper.h"
|
||||||
#include "exec/operator/groupby/SearchGroupByOperator.h"
|
#include "exec/operator/Utils.h"
|
||||||
|
|
||||||
namespace milvus::query {
|
namespace milvus::query {
|
||||||
|
|
||||||
@ -119,7 +119,7 @@ SearchOnSealed(const Schema& schema,
|
|||||||
auto data_id = offset;
|
auto data_id = offset;
|
||||||
auto raw_dataset =
|
auto raw_dataset =
|
||||||
query::dataset::RawDataset{offset, dim, chunk_size, vec_data};
|
query::dataset::RawDataset{offset, dim, chunk_size, vec_data};
|
||||||
if (search_info.group_by_field_id_.has_value()) {
|
if (milvus::exec::UseVectorIterator(search_info)) {
|
||||||
auto sub_qr = BruteForceSearchIterators(query_dataset,
|
auto sub_qr = BruteForceSearchIterators(query_dataset,
|
||||||
raw_dataset,
|
raw_dataset,
|
||||||
search_info,
|
search_info,
|
||||||
@ -139,7 +139,7 @@ SearchOnSealed(const Schema& schema,
|
|||||||
|
|
||||||
offset += chunk_size;
|
offset += chunk_size;
|
||||||
}
|
}
|
||||||
if (search_info.group_by_field_id_.has_value()) {
|
if (milvus::exec::UseVectorIterator(search_info)) {
|
||||||
result.AssembleChunkVectorIterators(num_queries,
|
result.AssembleChunkVectorIterators(num_queries,
|
||||||
num_chunk,
|
num_chunk,
|
||||||
column->GetNumRowsUntilChunk(),
|
column->GetNumRowsUntilChunk(),
|
||||||
@ -180,7 +180,7 @@ SearchOnSealed(const Schema& schema,
|
|||||||
auto data_type = field.get_data_type();
|
auto data_type = field.get_data_type();
|
||||||
CheckBruteForceSearchParam(field, search_info);
|
CheckBruteForceSearchParam(field, search_info);
|
||||||
auto raw_dataset = query::dataset::RawDataset{0, dim, row_count, vec_data};
|
auto raw_dataset = query::dataset::RawDataset{0, dim, row_count, vec_data};
|
||||||
if (search_info.group_by_field_id_.has_value()) {
|
if (milvus::exec::UseVectorIterator(search_info)) {
|
||||||
auto sub_qr = BruteForceSearchIterators(query_dataset,
|
auto sub_qr = BruteForceSearchIterators(query_dataset,
|
||||||
raw_dataset,
|
raw_dataset,
|
||||||
search_info,
|
search_info,
|
||||||
|
|||||||
@ -723,7 +723,11 @@ ChunkedSegmentSealedImpl::num_chunk_index(FieldId field_id) const {
|
|||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
ChunkedSegmentSealedImpl::num_chunk_data(FieldId field_id) const {
|
ChunkedSegmentSealedImpl::num_chunk_data(FieldId field_id) const {
|
||||||
return fields_.at(field_id)->num_chunks();
|
return get_bit(field_data_ready_bitset_, field_id)
|
||||||
|
? fields_.find(field_id) != fields_.end()
|
||||||
|
? fields_.at(field_id)->num_chunks()
|
||||||
|
: 1
|
||||||
|
: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
@ -732,7 +736,7 @@ ChunkedSegmentSealedImpl::num_chunk(FieldId field_id) const {
|
|||||||
? fields_.find(field_id) != fields_.end()
|
? fields_.find(field_id) != fields_.end()
|
||||||
? fields_.at(field_id)->num_chunks()
|
? fields_.at(field_id)->num_chunks()
|
||||||
: 1
|
: 1
|
||||||
: 0;
|
: 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
@ -800,7 +804,6 @@ ChunkedSegmentSealedImpl::chunk_data_impl(FieldId field_id,
|
|||||||
std::shared_lock lck(mutex_);
|
std::shared_lock lck(mutex_);
|
||||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||||
auto& field_meta = schema_->operator[](field_id);
|
|
||||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||||
auto& field_data = it->second;
|
auto& field_data = it->second;
|
||||||
return field_data->Span(chunk_id);
|
return field_data->Span(chunk_id);
|
||||||
@ -818,7 +821,6 @@ ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id,
|
|||||||
std::shared_lock lck(mutex_);
|
std::shared_lock lck(mutex_);
|
||||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||||
auto& field_meta = schema_->operator[](field_id);
|
|
||||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||||
auto& field_data = it->second;
|
auto& field_data = it->second;
|
||||||
return field_data->StringViews(chunk_id);
|
return field_data->StringViews(chunk_id);
|
||||||
@ -827,6 +829,22 @@ ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id,
|
|||||||
"chunk_view_impl only used for variable column field ");
|
"chunk_view_impl only used for variable column field ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
ChunkedSegmentSealedImpl::chunk_view_by_offsets(
|
||||||
|
FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const {
|
||||||
|
std::shared_lock lck(mutex_);
|
||||||
|
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||||
|
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||||
|
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||||
|
auto& field_data = it->second;
|
||||||
|
return field_data->ViewsByOffsets(chunk_id, offsets);
|
||||||
|
}
|
||||||
|
PanicInfo(ErrorCode::UnexpectedError,
|
||||||
|
"chunk_view_by_offsets only used for variable column field ");
|
||||||
|
}
|
||||||
|
|
||||||
const index::IndexBase*
|
const index::IndexBase*
|
||||||
ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id,
|
ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id,
|
||||||
int64_t chunk_id) const {
|
int64_t chunk_id) const {
|
||||||
|
|||||||
@ -206,6 +206,11 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
chunk_view_by_offsets(FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const override;
|
||||||
|
|
||||||
std::pair<BufferView, FixedVector<bool>>
|
std::pair<BufferView, FixedVector<bool>>
|
||||||
get_chunk_buffer(FieldId field_id,
|
get_chunk_buffer(FieldId field_id,
|
||||||
int64_t chunk_id,
|
int64_t chunk_id,
|
||||||
|
|||||||
@ -409,6 +409,15 @@ SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
|
|||||||
"chunk view impl not implement for growing segment");
|
"chunk view impl not implement for growing segment");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
SegmentGrowingImpl::chunk_view_by_offsets(
|
||||||
|
FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const {
|
||||||
|
PanicInfo(ErrorCode::NotImplemented,
|
||||||
|
"chunk view by offsets not implemented for growing segment");
|
||||||
|
}
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
SegmentGrowingImpl::num_chunk(FieldId field_id) const {
|
SegmentGrowingImpl::num_chunk(FieldId field_id) const {
|
||||||
auto size = get_insert_record().ack_responder_.GetAck();
|
auto size = get_insert_record().ack_responder_.GetAck();
|
||||||
|
|||||||
@ -344,6 +344,11 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
chunk_view_by_offsets(FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const override;
|
||||||
|
|
||||||
std::pair<BufferView, FixedVector<bool>>
|
std::pair<BufferView, FixedVector<bool>>
|
||||||
get_chunk_buffer(FieldId field_id,
|
get_chunk_buffer(FieldId field_id,
|
||||||
int64_t chunk_id,
|
int64_t chunk_id,
|
||||||
|
|||||||
@ -200,6 +200,28 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
return std::make_pair(res, chunk_info.second);
|
return std::make_pair(res, chunk_info.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename ViewType>
|
||||||
|
std::pair<std::vector<ViewType>, FixedVector<bool>>
|
||||||
|
get_views_by_offsets(FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const {
|
||||||
|
if (this->type() == SegmentType::Growing) {
|
||||||
|
PanicInfo(ErrorCode::Unsupported,
|
||||||
|
"get chunk views not supported for growing segment");
|
||||||
|
}
|
||||||
|
auto chunk_view = chunk_view_by_offsets(field_id, chunk_id, offsets);
|
||||||
|
if constexpr (std::is_same_v<ViewType, std::string_view>) {
|
||||||
|
return chunk_view;
|
||||||
|
} else {
|
||||||
|
std::vector<ViewType> res;
|
||||||
|
res.reserve(chunk_view.first.size());
|
||||||
|
for (const auto& view : chunk_view.first) {
|
||||||
|
res.emplace_back(view);
|
||||||
|
}
|
||||||
|
return {res, chunk_view.second};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const index::ScalarIndex<T>&
|
const index::ScalarIndex<T>&
|
||||||
chunk_scalar_index(FieldId field_id, int64_t chunk_id) const {
|
chunk_scalar_index(FieldId field_id, int64_t chunk_id) const {
|
||||||
@ -414,6 +436,11 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
int64_t start_offset,
|
int64_t start_offset,
|
||||||
int64_t length) const = 0;
|
int64_t length) const = 0;
|
||||||
|
|
||||||
|
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
chunk_view_by_offsets(FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const = 0;
|
||||||
|
|
||||||
// internal API: return chunk_index in span, support scalar index only
|
// internal API: return chunk_index in span, support scalar index only
|
||||||
virtual const index::IndexBase*
|
virtual const index::IndexBase*
|
||||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0;
|
||||||
|
|||||||
@ -770,7 +770,6 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
|
|||||||
std::shared_lock lck(mutex_);
|
std::shared_lock lck(mutex_);
|
||||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||||
auto& field_meta = schema_->operator[](field_id);
|
|
||||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||||
auto& field_data = it->second;
|
auto& field_data = it->second;
|
||||||
return field_data->Span();
|
return field_data->Span();
|
||||||
@ -787,7 +786,6 @@ SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
|
|||||||
std::shared_lock lck(mutex_);
|
std::shared_lock lck(mutex_);
|
||||||
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||||
auto& field_meta = schema_->operator[](field_id);
|
|
||||||
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||||
auto& field_data = it->second;
|
auto& field_data = it->second;
|
||||||
return field_data->StringViews();
|
return field_data->StringViews();
|
||||||
@ -796,6 +794,22 @@ SegmentSealedImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const {
|
|||||||
"chunk_view_impl only used for variable column field ");
|
"chunk_view_impl only used for variable column field ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
SegmentSealedImpl::chunk_view_by_offsets(
|
||||||
|
FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const {
|
||||||
|
std::shared_lock lck(mutex_);
|
||||||
|
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
|
||||||
|
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||||
|
if (auto it = fields_.find(field_id); it != fields_.end()) {
|
||||||
|
auto& field_data = it->second;
|
||||||
|
return field_data->ViewsByOffsets(offsets);
|
||||||
|
}
|
||||||
|
PanicInfo(ErrorCode::UnexpectedError,
|
||||||
|
"chunk_view_by_offsets only used for variable column field ");
|
||||||
|
}
|
||||||
|
|
||||||
const index::IndexBase*
|
const index::IndexBase*
|
||||||
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
|
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
|
||||||
AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(),
|
AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(),
|
||||||
|
|||||||
@ -212,6 +212,11 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||||
|
|
||||||
|
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||||
|
chunk_view_by_offsets(FieldId field_id,
|
||||||
|
int64_t chunk_id,
|
||||||
|
const FixedVector<int32_t>& offsets) const override;
|
||||||
|
|
||||||
std::pair<BufferView, FixedVector<bool>>
|
std::pair<BufferView, FixedVector<bool>>
|
||||||
get_chunk_buffer(FieldId field_id,
|
get_chunk_buffer(FieldId field_id,
|
||||||
int64_t chunk_id,
|
int64_t chunk_id,
|
||||||
|
|||||||
@ -51,6 +51,7 @@ set(MILVUS_TEST_FILES
|
|||||||
test_function.cpp
|
test_function.cpp
|
||||||
test_futures.cpp
|
test_futures.cpp
|
||||||
test_group_by.cpp
|
test_group_by.cpp
|
||||||
|
test_iterative_filter.cpp
|
||||||
test_growing.cpp
|
test_growing.cpp
|
||||||
test_growing_index.cpp
|
test_growing_index.cpp
|
||||||
test_hybrid_index.cpp
|
test_hybrid_index.cpp
|
||||||
|
|||||||
@ -67,10 +67,29 @@ TEST_P(ExprAlwaysTrueTest, AlwaysTrue) {
|
|||||||
final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP);
|
final = ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(plan->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
|
|
||||||
auto val = age_col[i];
|
auto val = age_col[i];
|
||||||
ASSERT_EQ(ans, true) << "@" << i << "!!" << val;
|
ASSERT_EQ(ans, true) << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], true) << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,6 +27,7 @@
|
|||||||
#include "segcore/SegmentGrowingImpl.h"
|
#include "segcore/SegmentGrowingImpl.h"
|
||||||
#include "simdjson/padded_string.h"
|
#include "simdjson/padded_string.h"
|
||||||
#include "test_utils/DataGen.h"
|
#include "test_utils/DataGen.h"
|
||||||
|
#include "test_utils/GenExprProto.h"
|
||||||
|
|
||||||
using namespace milvus;
|
using namespace milvus;
|
||||||
using namespace milvus::query;
|
using namespace milvus::query;
|
||||||
@ -611,11 +612,31 @@ TEST(Expr, TestArrayRange) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols[array_type][i]);
|
auto array = milvus::Array(array_cols[array_type][i]);
|
||||||
auto ref = ref_func(array);
|
auto ref = ref_func(array);
|
||||||
ASSERT_EQ(ans, ref);
|
ASSERT_EQ(ans, ref);
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -728,6 +749,23 @@ TEST(Expr, TestArrayEqual) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(long_array_col[i]);
|
auto array = milvus::Array(long_array_col[i]);
|
||||||
@ -737,6 +775,9 @@ TEST(Expr, TestArrayEqual) {
|
|||||||
}
|
}
|
||||||
auto ref = ref_func(array_values);
|
auto ref = ref_func(array_values);
|
||||||
ASSERT_EQ(ans, ref);
|
ASSERT_EQ(ans, ref);
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -927,6 +968,19 @@ TEST(Expr, TestArrayContains) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["bool"][i]);
|
auto array = milvus::Array(array_cols["bool"][i]);
|
||||||
@ -935,6 +989,9 @@ TEST(Expr, TestArrayContains) {
|
|||||||
res.push_back(array.get_data<bool>(j));
|
res.push_back(array.get_data<bool>(j));
|
||||||
}
|
}
|
||||||
ASSERT_EQ(ans, check(res)) << "@" << i;
|
ASSERT_EQ(ans, check(res)) << "@" << i;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], check(res)) << "@" << i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -982,6 +1039,19 @@ TEST(Expr, TestArrayContains) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["double"][i]);
|
auto array = milvus::Array(array_cols["double"][i]);
|
||||||
@ -990,6 +1060,9 @@ TEST(Expr, TestArrayContains) {
|
|||||||
res.push_back(array.get_data<double>(j));
|
res.push_back(array.get_data<double>(j));
|
||||||
}
|
}
|
||||||
ASSERT_EQ(ans, check(res));
|
ASSERT_EQ(ans, check(res));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], check(res));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1027,6 +1100,19 @@ TEST(Expr, TestArrayContains) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["float"][i]);
|
auto array = milvus::Array(array_cols["float"][i]);
|
||||||
@ -1035,6 +1121,9 @@ TEST(Expr, TestArrayContains) {
|
|||||||
res.push_back(array.get_data<float>(j));
|
res.push_back(array.get_data<float>(j));
|
||||||
}
|
}
|
||||||
ASSERT_EQ(ans, check(res));
|
ASSERT_EQ(ans, check(res));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], check(res));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1082,6 +1171,19 @@ TEST(Expr, TestArrayContains) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["int"][i]);
|
auto array = milvus::Array(array_cols["int"][i]);
|
||||||
@ -1090,6 +1192,9 @@ TEST(Expr, TestArrayContains) {
|
|||||||
res.push_back(array.get_data<int64_t>(j));
|
res.push_back(array.get_data<int64_t>(j));
|
||||||
}
|
}
|
||||||
ASSERT_EQ(ans, check(res));
|
ASSERT_EQ(ans, check(res));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], check(res));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1128,6 +1233,19 @@ TEST(Expr, TestArrayContains) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["long"][i]);
|
auto array = milvus::Array(array_cols["long"][i]);
|
||||||
@ -1136,6 +1254,9 @@ TEST(Expr, TestArrayContains) {
|
|||||||
res.push_back(array.get_data<int64_t>(j));
|
res.push_back(array.get_data<int64_t>(j));
|
||||||
}
|
}
|
||||||
ASSERT_EQ(ans, check(res));
|
ASSERT_EQ(ans, check(res));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], check(res));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1181,6 +1302,19 @@ TEST(Expr, TestArrayContains) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["string"][i]);
|
auto array = milvus::Array(array_cols["string"][i]);
|
||||||
@ -1189,6 +1323,9 @@ TEST(Expr, TestArrayContains) {
|
|||||||
res.push_back(array.get_data<std::string_view>(j));
|
res.push_back(array.get_data<std::string_view>(j));
|
||||||
}
|
}
|
||||||
ASSERT_EQ(ans, check(res));
|
ASSERT_EQ(ans, check(res));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], check(res));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2127,11 +2264,31 @@ TEST(Expr, TestArrayBinaryArith) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols[array_type][i]);
|
auto array = milvus::Array(array_cols[array_type][i]);
|
||||||
auto ref = ref_func(array);
|
auto ref = ref_func(array);
|
||||||
ASSERT_EQ(ans, ref);
|
ASSERT_EQ(ans, ref);
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2217,10 +2374,26 @@ TEST(Expr, TestArrayStringMatch) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["string"][i]);
|
auto array = milvus::Array(array_cols["string"][i]);
|
||||||
ASSERT_EQ(ans, testcase.check_func(array));
|
ASSERT_EQ(ans, testcase.check_func(array));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], testcase.check_func(array));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2420,10 +2593,30 @@ TEST(Expr, TestArrayInTerm) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols[array_type][i]);
|
auto array = milvus::Array(array_cols[array_type][i]);
|
||||||
ASSERT_EQ(ans, ref_func(array));
|
ASSERT_EQ(ans, ref_func(array));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref_func(array));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2510,10 +2703,26 @@ TEST(Expr, TestTermInArray) {
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan.get(), seg_promote, N * num_iters, MAX_TIMESTAMP, &offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
auto array = milvus::Array(array_cols["long"][i]);
|
auto array = milvus::Array(array_cols["long"][i]);
|
||||||
ASSERT_EQ(ans, testcase.check_func(array));
|
ASSERT_EQ(ans, testcase.check_func(array));
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], testcase.check_func(array));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
594
internal/core/unittest/test_iterative_filter.cpp
Normal file
594
internal/core/unittest/test_iterative_filter.cpp
Normal file
@ -0,0 +1,594 @@
|
|||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include "common/Schema.h"
|
||||||
|
#include "query/Plan.h"
|
||||||
|
#include "segcore/SegmentSealedImpl.h"
|
||||||
|
#include "segcore/reduce_c.h"
|
||||||
|
#include "segcore/plan_c.h"
|
||||||
|
#include "segcore/segment_c.h"
|
||||||
|
#include "test_utils/DataGen.h"
|
||||||
|
#include "test_utils/c_api_test_utils.h"
|
||||||
|
|
||||||
|
using namespace milvus;
|
||||||
|
using namespace milvus::query;
|
||||||
|
using namespace milvus::segcore;
|
||||||
|
using namespace milvus::storage;
|
||||||
|
using namespace milvus::tracer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* this UT is to cover Iterative filtering execution logic (knowhere iterator next() -> scalar filtering)
|
||||||
|
* so we will not cover all expr type here, just some examples
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
prepareSegmentFieldData(const std::unique_ptr<SegmentSealed>& segment,
|
||||||
|
size_t row_count,
|
||||||
|
GeneratedData& data_set) {
|
||||||
|
auto field_data =
|
||||||
|
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||||
|
field_data->FillFieldData(data_set.row_ids_.data(), row_count);
|
||||||
|
auto field_data_info =
|
||||||
|
FieldDataInfo{RowFieldID.get(),
|
||||||
|
row_count,
|
||||||
|
std::vector<milvus::FieldDataPtr>{field_data}};
|
||||||
|
segment->LoadFieldData(RowFieldID, field_data_info);
|
||||||
|
|
||||||
|
field_data =
|
||||||
|
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||||
|
field_data->FillFieldData(data_set.timestamps_.data(), row_count);
|
||||||
|
field_data_info =
|
||||||
|
FieldDataInfo{TimestampFieldID.get(),
|
||||||
|
row_count,
|
||||||
|
std::vector<milvus::FieldDataPtr>{field_data}};
|
||||||
|
segment->LoadFieldData(TimestampFieldID, field_data_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
CheckFilterSearchResult(const SearchResult& search_result_by_iterative_filter,
|
||||||
|
const SearchResult& search_result_by_pre_filter,
|
||||||
|
int topK,
|
||||||
|
int nq) {
|
||||||
|
ASSERT_EQ(search_result_by_pre_filter.seg_offsets_.size(), topK * nq);
|
||||||
|
ASSERT_EQ(search_result_by_pre_filter.distances_.size(), topK * nq);
|
||||||
|
ASSERT_EQ(search_result_by_iterative_filter.seg_offsets_.size(), topK * nq);
|
||||||
|
ASSERT_EQ(search_result_by_iterative_filter.distances_.size(), topK * nq);
|
||||||
|
|
||||||
|
for (int i = 0; i < topK * nq; ++i) {
|
||||||
|
std::cout << search_result_by_pre_filter.seg_offsets_[i] << " "
|
||||||
|
<< search_result_by_pre_filter.distances_[i] << " "
|
||||||
|
<< search_result_by_iterative_filter.seg_offsets_[i] << " "
|
||||||
|
<< search_result_by_iterative_filter.distances_[i]
|
||||||
|
<< std::endl;
|
||||||
|
ASSERT_EQ(search_result_by_pre_filter.seg_offsets_[i],
|
||||||
|
search_result_by_iterative_filter.seg_offsets_[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IterativeFilter, SealedIndex) {
|
||||||
|
using namespace milvus;
|
||||||
|
using namespace milvus::query;
|
||||||
|
using namespace milvus::segcore;
|
||||||
|
|
||||||
|
//0. prepare schema
|
||||||
|
int dim = 64;
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
auto vec_fid = schema->AddDebugField(
|
||||||
|
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
||||||
|
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
|
||||||
|
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
|
||||||
|
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
|
||||||
|
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR);
|
||||||
|
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
|
||||||
|
schema->set_primary_field_id(str_fid);
|
||||||
|
auto segment = CreateSealedSegment(schema);
|
||||||
|
size_t N = 50;
|
||||||
|
|
||||||
|
//2. load raw data
|
||||||
|
auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false);
|
||||||
|
auto fields = schema->get_fields();
|
||||||
|
for (auto field_data : raw_data.raw_->fields_data()) {
|
||||||
|
int64_t field_id = field_data.field_id();
|
||||||
|
|
||||||
|
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||||
|
auto field_meta = fields.at(FieldId(field_id));
|
||||||
|
info.channel->push(
|
||||||
|
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||||
|
info.channel->close();
|
||||||
|
|
||||||
|
segment->LoadFieldData(FieldId(field_id), info);
|
||||||
|
}
|
||||||
|
prepareSegmentFieldData(segment, N, raw_data);
|
||||||
|
|
||||||
|
//3. load index
|
||||||
|
auto vector_data = raw_data.get_col<float>(vec_fid);
|
||||||
|
auto indexing = GenVecIndexing(
|
||||||
|
N, dim, vector_data.data(), knowhere::IndexEnum::INDEX_HNSW);
|
||||||
|
LoadIndexInfo load_index_info;
|
||||||
|
load_index_info.field_id = vec_fid.get();
|
||||||
|
load_index_info.index = std::move(indexing);
|
||||||
|
load_index_info.index_params["metric_type"] = knowhere::metric::L2;
|
||||||
|
segment->LoadIndex(load_index_info);
|
||||||
|
int topK = 10;
|
||||||
|
int group_size = 3;
|
||||||
|
|
||||||
|
// int8 binaryRange
|
||||||
|
{
|
||||||
|
const char* raw_plan = R"(vector_anns: <
|
||||||
|
field_id: 100
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 101
|
||||||
|
data_type: Int8
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 100
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
hints: "iterative_filter"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node;
|
||||||
|
auto ok =
|
||||||
|
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
|
||||||
|
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
|
||||||
|
auto num_queries = 1;
|
||||||
|
auto seed = 1024;
|
||||||
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||||
|
auto ph_group =
|
||||||
|
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||||
|
auto search_result =
|
||||||
|
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||||
|
|
||||||
|
const char* raw_plan2 = R"(vector_anns: <
|
||||||
|
field_id: 100
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 101
|
||||||
|
data_type: Int8
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 100
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node2;
|
||||||
|
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
|
||||||
|
&plan_node2);
|
||||||
|
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
|
||||||
|
auto search_result2 =
|
||||||
|
segment->Search(plan2.get(), ph_group.get(), 1L << 63);
|
||||||
|
CheckFilterSearchResult(
|
||||||
|
*search_result, *search_result2, topK, num_queries);
|
||||||
|
}
|
||||||
|
|
||||||
|
// int16 Termexpr
|
||||||
|
{
|
||||||
|
const char* raw_plan = R"(vector_anns: <
|
||||||
|
field_id: 100
|
||||||
|
predicates: <
|
||||||
|
term_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 102
|
||||||
|
data_type: Int16
|
||||||
|
>
|
||||||
|
values:<int64_val:1> values:<int64_val:2 >
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
hints: "iterative_filter"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node;
|
||||||
|
auto ok =
|
||||||
|
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
|
||||||
|
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
|
||||||
|
auto num_queries = 1;
|
||||||
|
auto seed = 1024;
|
||||||
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||||
|
auto ph_group =
|
||||||
|
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||||
|
auto search_result =
|
||||||
|
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||||
|
|
||||||
|
const char* raw_plan2 = R"(vector_anns: <
|
||||||
|
field_id: 100
|
||||||
|
predicates: <
|
||||||
|
term_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 102
|
||||||
|
data_type: Int16
|
||||||
|
>
|
||||||
|
values:<int64_val:1> values:<int64_val:2 >
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node2;
|
||||||
|
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
|
||||||
|
&plan_node2);
|
||||||
|
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
|
||||||
|
auto search_result2 =
|
||||||
|
segment->Search(plan2.get(), ph_group.get(), 1L << 63);
|
||||||
|
CheckFilterSearchResult(
|
||||||
|
*search_result, *search_result2, topK, num_queries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IterativeFilter, SealedData) {
|
||||||
|
using namespace milvus;
|
||||||
|
using namespace milvus::query;
|
||||||
|
using namespace milvus::segcore;
|
||||||
|
|
||||||
|
//0. prepare schema
|
||||||
|
int dim = 64;
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
auto vec_fid = schema->AddDebugField(
|
||||||
|
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
||||||
|
auto int8_fid = schema->AddDebugField("int8", DataType::INT8);
|
||||||
|
auto int16_fid = schema->AddDebugField("int16", DataType::INT16);
|
||||||
|
auto int32_fid = schema->AddDebugField("int32", DataType::INT32);
|
||||||
|
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
auto str_fid = schema->AddDebugField("string1", DataType::VARCHAR);
|
||||||
|
auto bool_fid = schema->AddDebugField("bool", DataType::BOOL);
|
||||||
|
schema->set_primary_field_id(str_fid);
|
||||||
|
auto segment = CreateSealedSegment(schema);
|
||||||
|
size_t N = 100;
|
||||||
|
|
||||||
|
//2. load raw data
|
||||||
|
auto raw_data = DataGen(schema, N, 42, 0, 8, 10, false, false);
|
||||||
|
auto fields = schema->get_fields();
|
||||||
|
for (auto field_data : raw_data.raw_->fields_data()) {
|
||||||
|
int64_t field_id = field_data.field_id();
|
||||||
|
|
||||||
|
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||||
|
auto field_meta = fields.at(FieldId(field_id));
|
||||||
|
info.channel->push(
|
||||||
|
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||||
|
info.channel->close();
|
||||||
|
|
||||||
|
segment->LoadFieldData(FieldId(field_id), info);
|
||||||
|
}
|
||||||
|
prepareSegmentFieldData(segment, N, raw_data);
|
||||||
|
|
||||||
|
int topK = 10;
|
||||||
|
// int8 binaryRange
|
||||||
|
{
|
||||||
|
const char* raw_plan = R"(vector_anns: <
|
||||||
|
field_id: 100
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 101
|
||||||
|
data_type: Int8
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 100
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
hints: "iterative_filter"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node;
|
||||||
|
auto ok =
|
||||||
|
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
|
||||||
|
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
|
||||||
|
auto num_queries = 1;
|
||||||
|
auto seed = 1024;
|
||||||
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||||
|
auto ph_group =
|
||||||
|
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||||
|
auto search_result =
|
||||||
|
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||||
|
|
||||||
|
const char* raw_plan2 = R"(vector_anns: <
|
||||||
|
field_id: 100
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 101
|
||||||
|
data_type: Int8
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 100
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node2;
|
||||||
|
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
|
||||||
|
&plan_node2);
|
||||||
|
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
|
||||||
|
auto search_result2 =
|
||||||
|
segment->Search(plan2.get(), ph_group.get(), 1L << 63);
|
||||||
|
CheckFilterSearchResult(
|
||||||
|
*search_result, *search_result2, topK, num_queries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IterativeFilter, GrowingRawData) {
|
||||||
|
int dim = 128;
|
||||||
|
uint64_t seed = 512;
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
auto metric_type = knowhere::metric::L2;
|
||||||
|
auto int64_field_id = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
auto int32_field_id = schema->AddDebugField("int32", DataType::INT32);
|
||||||
|
auto vec_field_id = schema->AddDebugField(
|
||||||
|
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
|
||||||
|
schema->set_primary_field_id(int64_field_id);
|
||||||
|
|
||||||
|
auto config = SegcoreConfig::default_config();
|
||||||
|
config.set_chunk_rows(8);
|
||||||
|
config.set_enable_interim_segment_index(
|
||||||
|
false); //no growing index, test brute force
|
||||||
|
auto segment_growing = CreateGrowingSegment(schema, nullptr, 1, config);
|
||||||
|
auto segment_growing_impl =
|
||||||
|
dynamic_cast<SegmentGrowingImpl*>(segment_growing.get());
|
||||||
|
|
||||||
|
int64_t rows_per_batch = 30;
|
||||||
|
int n_batch = 1;
|
||||||
|
for (int i = 0; i < n_batch; i++) {
|
||||||
|
auto data_set =
|
||||||
|
DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false);
|
||||||
|
auto offset = segment_growing_impl->PreInsert(rows_per_batch);
|
||||||
|
segment_growing_impl->Insert(offset,
|
||||||
|
rows_per_batch,
|
||||||
|
data_set.row_ids_.data(),
|
||||||
|
data_set.timestamps_.data(),
|
||||||
|
data_set.raw_);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto topK = 10;
|
||||||
|
// int8 binaryRange
|
||||||
|
{
|
||||||
|
const char* raw_plan = R"(vector_anns: <
|
||||||
|
field_id: 102
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 100
|
||||||
|
data_type: Int64
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 1
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
hints: "iterative_filter"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node;
|
||||||
|
auto ok =
|
||||||
|
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
|
||||||
|
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
|
||||||
|
auto num_queries = 1;
|
||||||
|
auto seed = 1024;
|
||||||
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||||
|
auto ph_group =
|
||||||
|
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||||
|
auto search_result =
|
||||||
|
segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||||
|
|
||||||
|
const char* raw_plan2 = R"(vector_anns: <
|
||||||
|
field_id: 102
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 100
|
||||||
|
data_type: Int64
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 1
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
search_params: "{\"ef\": 50}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node2;
|
||||||
|
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
|
||||||
|
&plan_node2);
|
||||||
|
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
|
||||||
|
auto search_result2 =
|
||||||
|
segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63);
|
||||||
|
CheckFilterSearchResult(
|
||||||
|
*search_result, *search_result2, topK, num_queries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IterativeFilter, GrowingIndex) {
|
||||||
|
int dim = 128;
|
||||||
|
uint64_t seed = 512;
|
||||||
|
auto schema = std::make_shared<Schema>();
|
||||||
|
auto metric_type = knowhere::metric::L2;
|
||||||
|
auto int64_field_id = schema->AddDebugField("int64", DataType::INT64);
|
||||||
|
auto int32_field_id = schema->AddDebugField("int32", DataType::INT32);
|
||||||
|
auto vec_field_id = schema->AddDebugField(
|
||||||
|
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
|
||||||
|
schema->set_primary_field_id(int64_field_id);
|
||||||
|
|
||||||
|
std::map<std::string, std::string> index_params = {
|
||||||
|
{"index_type", "IVF_FLAT"},
|
||||||
|
{"metric_type", metric_type},
|
||||||
|
{"nlist", "4"}};
|
||||||
|
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||||
|
FieldIndexMeta fieldIndexMeta(
|
||||||
|
vec_field_id, std::move(index_params), std::move(type_params));
|
||||||
|
std::map<FieldId, FieldIndexMeta> fieldMap = {
|
||||||
|
{vec_field_id, fieldIndexMeta}};
|
||||||
|
IndexMetaPtr metaPtr =
|
||||||
|
std::make_shared<CollectionIndexMeta>(10000, std::move(fieldMap));
|
||||||
|
|
||||||
|
auto config = SegcoreConfig::default_config();
|
||||||
|
config.set_chunk_rows(16);
|
||||||
|
config.set_enable_interim_segment_index(true); // test growing inter index
|
||||||
|
config.set_nlist(4);
|
||||||
|
config.set_nlist(4);
|
||||||
|
auto segment_growing = CreateGrowingSegment(schema, metaPtr, 1, config);
|
||||||
|
auto segment_growing_impl =
|
||||||
|
dynamic_cast<SegmentGrowingImpl*>(segment_growing.get());
|
||||||
|
|
||||||
|
//1. prepare raw data in growing segment
|
||||||
|
int64_t rows_per_batch = 100;
|
||||||
|
int n_batch = 1;
|
||||||
|
for (int i = 0; i < n_batch; i++) {
|
||||||
|
auto data_set =
|
||||||
|
DataGen(schema, rows_per_batch, 42, 0, 8, 10, false, false);
|
||||||
|
auto offset = segment_growing_impl->PreInsert(rows_per_batch);
|
||||||
|
segment_growing_impl->Insert(offset,
|
||||||
|
rows_per_batch,
|
||||||
|
data_set.row_ids_.data(),
|
||||||
|
data_set.timestamps_.data(),
|
||||||
|
data_set.raw_);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto topK = 10;
|
||||||
|
{
|
||||||
|
const char* raw_plan = R"(vector_anns: <
|
||||||
|
field_id: 102
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 100
|
||||||
|
data_type: Int64
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 1
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
hints: "iterative_filter"
|
||||||
|
search_params: "{\"nprobe\": 4}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node;
|
||||||
|
auto ok =
|
||||||
|
google::protobuf::TextFormat::ParseFromString(raw_plan, &plan_node);
|
||||||
|
auto plan = CreateSearchPlanFromPlanNode(*schema, plan_node);
|
||||||
|
auto num_queries = 1;
|
||||||
|
auto seed = 1024;
|
||||||
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, seed);
|
||||||
|
auto ph_group =
|
||||||
|
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||||
|
auto search_result =
|
||||||
|
segment_growing_impl->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||||
|
|
||||||
|
const char* raw_plan2 = R"(vector_anns: <
|
||||||
|
field_id: 102
|
||||||
|
predicates: <
|
||||||
|
binary_range_expr: <
|
||||||
|
column_info: <
|
||||||
|
field_id: 100
|
||||||
|
data_type: Int64
|
||||||
|
>
|
||||||
|
lower_inclusive: true,
|
||||||
|
upper_inclusive: false,
|
||||||
|
lower_value: <
|
||||||
|
int64_val: -1
|
||||||
|
>
|
||||||
|
upper_value: <
|
||||||
|
int64_val: 1
|
||||||
|
>
|
||||||
|
>
|
||||||
|
>
|
||||||
|
query_info: <
|
||||||
|
topk: 10
|
||||||
|
metric_type: "L2"
|
||||||
|
search_params: "{\"nprobe\": 4}"
|
||||||
|
>
|
||||||
|
placeholder_tag: "$0">)";
|
||||||
|
proto::plan::PlanNode plan_node2;
|
||||||
|
auto ok2 = google::protobuf::TextFormat::ParseFromString(raw_plan2,
|
||||||
|
&plan_node2);
|
||||||
|
auto plan2 = CreateSearchPlanFromPlanNode(*schema, plan_node2);
|
||||||
|
auto search_result2 =
|
||||||
|
segment_growing_impl->Search(plan2.get(), ph_group.get(), 1L << 63);
|
||||||
|
CheckFilterSearchResult(
|
||||||
|
*search_result, *search_result2, topK, num_queries);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -290,12 +290,32 @@ TEST(StringExpr, Term) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
|
|
||||||
auto val = str_col[i];
|
auto val = str_col[i];
|
||||||
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
||||||
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -363,6 +383,23 @@ TEST(StringExpr, TermNullable) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
if (!valid_data[i]) {
|
if (!valid_data[i]) {
|
||||||
@ -372,6 +409,9 @@ TEST(StringExpr, TermNullable) {
|
|||||||
auto val = str_col[i];
|
auto val = str_col[i];
|
||||||
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
||||||
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref) << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -481,6 +521,23 @@ TEST(StringExpr, Compare) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
|
|
||||||
@ -488,6 +545,10 @@ TEST(StringExpr, Compare) {
|
|||||||
auto another_val = another_str_col[i];
|
auto another_val = another_str_col[i];
|
||||||
auto ref = ref_func(val, another_val);
|
auto ref = ref_func(val, another_val);
|
||||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref)
|
||||||
|
<< "@" << op << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -609,6 +670,23 @@ TEST(StringExpr, CompareNullable) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
if (!valid_data[i]) {
|
if (!valid_data[i]) {
|
||||||
@ -619,6 +697,10 @@ TEST(StringExpr, CompareNullable) {
|
|||||||
auto another_val = another_str_col[i];
|
auto another_val = another_str_col[i];
|
||||||
auto ref = ref_func(val, another_val);
|
auto ref = ref_func(val, another_val);
|
||||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref)
|
||||||
|
<< "@" << op << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -741,16 +823,40 @@ TEST(StringExpr, CompareNullable2) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
if (!valid_data[i]) {
|
if (!valid_data[i]) {
|
||||||
ASSERT_EQ(ans, false);
|
ASSERT_EQ(ans, false);
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], false);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto val = str_col[i];
|
auto val = str_col[i];
|
||||||
auto another_val = another_str_col[i];
|
auto another_val = another_str_col[i];
|
||||||
auto ref = ref_func(val, another_val);
|
auto ref = ref_func(val, another_val);
|
||||||
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref)
|
||||||
|
<< "@" << op << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -840,6 +946,23 @@ TEST(StringExpr, UnaryRange) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
|
|
||||||
@ -847,6 +970,10 @@ TEST(StringExpr, UnaryRange) {
|
|||||||
auto ref = ref_func(val);
|
auto ref = ref_func(val);
|
||||||
ASSERT_EQ(ans, ref)
|
ASSERT_EQ(ans, ref)
|
||||||
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref)
|
||||||
|
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -947,6 +1074,23 @@ TEST(StringExpr, UnaryRangeNullable) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
if (!valid_data[i]) {
|
if (!valid_data[i]) {
|
||||||
@ -957,6 +1101,10 @@ TEST(StringExpr, UnaryRangeNullable) {
|
|||||||
auto ref = ref_func(val);
|
auto ref = ref_func(val);
|
||||||
ASSERT_EQ(ans, ref)
|
ASSERT_EQ(ans, ref)
|
||||||
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref)
|
||||||
|
<< "@" << op << "@" << value << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1064,6 +1212,23 @@ TEST(StringExpr, BinaryRange) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
|
|
||||||
@ -1072,6 +1237,11 @@ TEST(StringExpr, BinaryRange) {
|
|||||||
ASSERT_EQ(ans, ref)
|
ASSERT_EQ(ans, ref)
|
||||||
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
||||||
<< "@" << ub << "@" << i << "!!" << val;
|
<< "@" << ub << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref)
|
||||||
|
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
||||||
|
<< "@" << ub << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1191,10 +1361,30 @@ TEST(StringExpr, BinaryRangeNullable) {
|
|||||||
MAX_TIMESTAMP);
|
MAX_TIMESTAMP);
|
||||||
EXPECT_EQ(final.size(), N * num_iters);
|
EXPECT_EQ(final.size(), N * num_iters);
|
||||||
|
|
||||||
|
// specify some offsets and do scalar filtering on these offsets
|
||||||
|
milvus::exec::OffsetVector offsets;
|
||||||
|
offsets.reserve(N * num_iters / 2);
|
||||||
|
for (auto i = 0; i < N * num_iters; ++i) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
offsets.emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto col_vec = milvus::test::gen_filter_res(
|
||||||
|
plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(),
|
||||||
|
seg_promote,
|
||||||
|
N * num_iters,
|
||||||
|
MAX_TIMESTAMP,
|
||||||
|
&offsets);
|
||||||
|
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||||
|
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||||
|
|
||||||
for (int i = 0; i < N * num_iters; ++i) {
|
for (int i = 0; i < N * num_iters; ++i) {
|
||||||
auto ans = final[i];
|
auto ans = final[i];
|
||||||
if (!valid_data[i]) {
|
if (!valid_data[i]) {
|
||||||
ASSERT_EQ(ans, false);
|
ASSERT_EQ(ans, false);
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], false);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto val = str_col[i];
|
auto val = str_col[i];
|
||||||
@ -1202,6 +1392,11 @@ TEST(StringExpr, BinaryRangeNullable) {
|
|||||||
ASSERT_EQ(ans, ref)
|
ASSERT_EQ(ans, ref)
|
||||||
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
||||||
<< "@" << ub << "@" << i << "!!" << val;
|
<< "@" << ub << "@" << i << "!!" << val;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
ASSERT_EQ(view[int(i / 2)], ref)
|
||||||
|
<< "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb
|
||||||
|
<< "@" << ub << "@" << i << "!!" << val;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "common/Consts.h"
|
#include "common/Consts.h"
|
||||||
#include "expr/ITypeExpr.h"
|
#include "expr/ITypeExpr.h"
|
||||||
|
#include "exec/expression/Expr.h"
|
||||||
#include "pb/plan.pb.h"
|
#include "pb/plan.pb.h"
|
||||||
#include "plan/PlanNode.h"
|
#include "plan/PlanNode.h"
|
||||||
|
|
||||||
@ -104,4 +105,30 @@ CreateSearchPlanByExpr(std::shared_ptr<milvus::expr::ITypeExpr> expr) {
|
|||||||
return plannode;
|
return plannode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline ColumnVectorPtr
|
||||||
|
gen_filter_res(milvus::plan::PlanNode* plan_node,
|
||||||
|
const milvus::segcore::SegmentInternalInterface* segment,
|
||||||
|
uint64_t active_count,
|
||||||
|
uint64_t timestamp,
|
||||||
|
FixedVector<int32_t>* offsets = nullptr) {
|
||||||
|
auto filter_node = dynamic_cast<milvus::plan::FilterBitsNode*>(plan_node);
|
||||||
|
assert(filter_node != nullptr);
|
||||||
|
std::vector<milvus::expr::TypedExprPtr> filters;
|
||||||
|
filters.emplace_back(filter_node->filter());
|
||||||
|
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||||
|
DEAFULT_QUERY_ID, segment, active_count, timestamp);
|
||||||
|
|
||||||
|
std::unique_ptr<milvus::exec::ExecContext> exec_context =
|
||||||
|
std::make_unique<milvus::exec::ExecContext>(query_context.get());
|
||||||
|
auto exprs_ =
|
||||||
|
std::make_unique<milvus::exec::ExprSet>(filters, exec_context.get());
|
||||||
|
std::vector<VectorPtr> results_;
|
||||||
|
milvus::exec::EvalCtx eval_ctx(exec_context.get(), exprs_.get());
|
||||||
|
eval_ctx.set_offset_input(offsets);
|
||||||
|
exprs_->Eval(0, 1, true, eval_ctx, results_);
|
||||||
|
|
||||||
|
auto col_vec = std::dynamic_pointer_cast<milvus::ColumnVector>(results_[0]);
|
||||||
|
return col_vec;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace milvus::test
|
} // namespace milvus::test
|
||||||
|
|||||||
@ -975,6 +975,7 @@ func generateSearchParams(reqSearchParams searchParams) []*commonpb.KeyValuePair
|
|||||||
bs, _ := json.Marshal(reqSearchParams.Params)
|
bs, _ := json.Marshal(reqSearchParams.Params)
|
||||||
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: Params, Value: string(bs)})
|
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: Params, Value: string(bs)})
|
||||||
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: common.IgnoreGrowing, Value: strconv.FormatBool(reqSearchParams.IgnoreGrowing)})
|
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: common.IgnoreGrowing, Value: strconv.FormatBool(reqSearchParams.IgnoreGrowing)})
|
||||||
|
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: common.HintsKey, Value: reqSearchParams.Hints})
|
||||||
// need to exposure ParamRoundDecimal in req?
|
// need to exposure ParamRoundDecimal in req?
|
||||||
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: ParamRoundDecimal, Value: "-1"})
|
searchParams = append(searchParams, &commonpb.KeyValuePair{Key: ParamRoundDecimal, Value: "-1"})
|
||||||
return searchParams
|
return searchParams
|
||||||
|
|||||||
@ -181,6 +181,7 @@ type searchParams struct {
|
|||||||
MetricType string `json:"metricType"`
|
MetricType string `json:"metricType"`
|
||||||
Params map[string]interface{} `json:"params"`
|
Params map[string]interface{} `json:"params"`
|
||||||
IgnoreGrowing bool `json:"ignoreGrowing"`
|
IgnoreGrowing bool `json:"ignoreGrowing"`
|
||||||
|
Hints string `json:"hints"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SearchReqV2 struct {
|
type SearchReqV2 struct {
|
||||||
|
|||||||
@ -66,6 +66,7 @@ message QueryInfo {
|
|||||||
bool strict_group_size = 9;
|
bool strict_group_size = 9;
|
||||||
double bm25_avgdl = 10;
|
double bm25_avgdl = 10;
|
||||||
int64 query_field_id =11;
|
int64 query_field_id =11;
|
||||||
|
string hints = 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ColumnInfo {
|
message ColumnInfo {
|
||||||
|
|||||||
@ -153,6 +153,11 @@ func parseSearchInfo(searchParamsPair []*commonpb.KeyValuePair, schema *schemapb
|
|||||||
roundDecimalStr = "-1"
|
roundDecimalStr = "-1"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hints, err := funcutil.GetAttrByKeyFromRepeatedKV(common.HintsKey, searchParamsPair)
|
||||||
|
if err != nil {
|
||||||
|
hints = ""
|
||||||
|
}
|
||||||
|
|
||||||
roundDecimal, err := strconv.ParseInt(roundDecimalStr, 0, 64)
|
roundDecimal, err := strconv.ParseInt(roundDecimalStr, 0, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &SearchInfo{planInfo: nil, offset: 0, isIterator: false, parseError: fmt.Errorf("%s [%s] is invalid, should be -1 or an integer in range [0, 6]", RoundDecimalKey, roundDecimalStr)}
|
return &SearchInfo{planInfo: nil, offset: 0, isIterator: false, parseError: fmt.Errorf("%s [%s] is invalid, should be -1 or an integer in range [0, 6]", RoundDecimalKey, roundDecimalStr)}
|
||||||
@ -200,6 +205,7 @@ func parseSearchInfo(searchParamsPair []*commonpb.KeyValuePair, schema *schemapb
|
|||||||
GroupByFieldId: groupByFieldId,
|
GroupByFieldId: groupByFieldId,
|
||||||
GroupSize: groupSize,
|
GroupSize: groupSize,
|
||||||
StrictGroupSize: strictGroupSize,
|
StrictGroupSize: strictGroupSize,
|
||||||
|
Hints: hints,
|
||||||
},
|
},
|
||||||
offset: offset,
|
offset: offset,
|
||||||
isIterator: isIterator,
|
isIterator: isIterator,
|
||||||
|
|||||||
@ -138,6 +138,7 @@ const (
|
|||||||
BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
|
BitmapCardinalityLimitKey = "bitmap_cardinality_limit"
|
||||||
IgnoreGrowing = "ignore_growing"
|
IgnoreGrowing = "ignore_growing"
|
||||||
ConsistencyLevel = "consistency_level"
|
ConsistencyLevel = "consistency_level"
|
||||||
|
HintsKey = "hints"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Doc-in-doc-out
|
// Doc-in-doc-out
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user