mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
fix:disable using shredding for json_path contains digital (#44724)
#44132 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
53e8f150e8
commit
3dd5deb70a
@ -213,6 +213,20 @@ Join(const std::vector<T>& items, const std::string& delimiter) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
inline bool
|
||||
IsInteger(const std::string& str) {
|
||||
if (str.empty())
|
||||
return false;
|
||||
|
||||
try {
|
||||
size_t pos;
|
||||
std::stoi(str, &pos);
|
||||
return pos == str.length();
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string
|
||||
PrintBitsetTypeView(const BitsetTypeView& view) {
|
||||
std::stringstream ss;
|
||||
|
||||
@ -304,10 +304,9 @@ class BsonView {
|
||||
AssertInfo(offset < size_, "bson offset out of range");
|
||||
const uint8_t* ptr = data_ + offset;
|
||||
|
||||
// check type
|
||||
AssertInfo(static_cast<bsoncxx::type>(*ptr) == bsoncxx::type::k_array,
|
||||
"ParseAsArrayAtOffset expects an array at offset {}",
|
||||
offset);
|
||||
if (static_cast<bsoncxx::type>(*ptr) != bsoncxx::type::k_array) {
|
||||
return std::nullopt;
|
||||
}
|
||||
ptr++;
|
||||
|
||||
// skip key
|
||||
|
||||
@ -422,7 +422,8 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) {
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto* input = context.get_offset_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecRangeVisitorImplForJsonStats<ValueType>();
|
||||
}
|
||||
auto real_batch_size =
|
||||
|
||||
@ -116,7 +116,8 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonStats(context, field_id) && !has_offset_input_) {
|
||||
if (CanUseJsonStats(context, field_id, expr_->column_.nested_path_) &&
|
||||
!has_offset_input_) {
|
||||
return EvalJsonExistsForDataSegmentByStats();
|
||||
}
|
||||
auto real_batch_size =
|
||||
|
||||
@ -1404,8 +1404,22 @@ class SegmentExpr : public Expr {
|
||||
}
|
||||
|
||||
bool
|
||||
CanUseJsonStats(EvalCtx& context, FieldId field_id) const {
|
||||
return PlanUseJsonStats(context) && HasJsonStats(field_id);
|
||||
CanUseJsonStats(EvalCtx& context,
|
||||
FieldId field_id,
|
||||
const std::vector<std::string>& nested_path) const {
|
||||
// if path contains integer, we can't use json stats such as "a.1.b", "a.1",
|
||||
// because we can't know the integer is a key or a array indice
|
||||
auto path_contains_integer = [](const std::vector<std::string>& path) {
|
||||
for (auto i = 0; i < path.size(); i++) {
|
||||
if (milvus::IsInteger(path[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
return PlanUseJsonStats(context) && HasJsonStats(field_id) &&
|
||||
!path_contains_integer(nested_path);
|
||||
}
|
||||
|
||||
virtual bool
|
||||
|
||||
@ -295,7 +295,8 @@ PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecJsonContainsByStats<ExprValueType>();
|
||||
}
|
||||
|
||||
@ -509,7 +510,8 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecJsonContainsArrayByStats();
|
||||
}
|
||||
auto real_batch_size =
|
||||
@ -796,7 +798,8 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecJsonContainsAllByStats<ExprValueType>();
|
||||
}
|
||||
auto real_batch_size =
|
||||
@ -991,7 +994,8 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecJsonContainsAllWithDiffTypeByStats();
|
||||
}
|
||||
auto real_batch_size =
|
||||
@ -1315,7 +1319,8 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecJsonContainsAllArrayByStats();
|
||||
}
|
||||
auto real_batch_size =
|
||||
@ -1521,7 +1526,8 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecJsonContainsWithDiffTypeByStats();
|
||||
}
|
||||
auto real_batch_size =
|
||||
|
||||
@ -709,7 +709,8 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecJsonInVariableByStats<ValueType>();
|
||||
}
|
||||
|
||||
|
||||
@ -663,7 +663,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
|
||||
if (!has_offset_input_ && CanUseJsonStats(context, field_id)) {
|
||||
if (!has_offset_input_ &&
|
||||
CanUseJsonStats(context, field_id, expr_->column_.nested_path_)) {
|
||||
return ExecRangeVisitorImplJsonByStats<ExprValueType>();
|
||||
}
|
||||
|
||||
@ -992,7 +993,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
pinned_json_stats_ = segment->GetJsonStats(op_ctx_, field_id);
|
||||
auto* index = pinned_json_stats_.get();
|
||||
Assert(index != nullptr);
|
||||
cached_index_chunk_res_ = std::make_shared<TargetBitmap>(active_count_);
|
||||
cached_index_chunk_res_ =
|
||||
(op_type == proto::plan::OpType::NotEqual)
|
||||
? std::make_shared<TargetBitmap>(active_count_, true)
|
||||
: std::make_shared<TargetBitmap>(active_count_);
|
||||
cached_index_chunk_valid_res_ =
|
||||
std::make_shared<TargetBitmap>(active_count_, true);
|
||||
TargetBitmapView res_view(*cached_index_chunk_res_);
|
||||
@ -1117,14 +1121,18 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
if (array_index != INVALID_ARRAY_INDEX) {
|
||||
auto array_value = bson.ParseAsArrayAtOffset(value_offset);
|
||||
if (!array_value.has_value()) {
|
||||
res_view[row_id] = false;
|
||||
// For NotEqual: path not exists means "not equal", keep true
|
||||
// For Equal: path not exists means no match, set false
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
return;
|
||||
}
|
||||
auto sub_array = milvus::BsonView::GetNthElementInArray<
|
||||
bsoncxx::array::view>(array_value.value().data(),
|
||||
array_index);
|
||||
if (!sub_array.has_value()) {
|
||||
res_view[row_id] = false;
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
return;
|
||||
}
|
||||
res_view[row_id] =
|
||||
@ -1134,7 +1142,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
} else {
|
||||
auto array_value = bson.ParseAsArrayAtOffset(value_offset);
|
||||
if (!array_value.has_value()) {
|
||||
res_view[row_id] = false;
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
return;
|
||||
}
|
||||
res_view[row_id] =
|
||||
@ -1147,7 +1156,9 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
if (array_index != INVALID_ARRAY_INDEX) {
|
||||
auto array_value = bson.ParseAsArrayAtOffset(value_offset);
|
||||
if (!array_value.has_value()) {
|
||||
res_view[row_id] = false;
|
||||
// Path not exists: NotEqual->true, others->false
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
return;
|
||||
}
|
||||
get_value = milvus::BsonView::GetNthElementInArray<GetType>(
|
||||
@ -1161,6 +1172,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
if (get_value.has_value()) {
|
||||
res_view[row_id] = UnaryCompare(
|
||||
get_value.value(), val, op_type);
|
||||
} else {
|
||||
// Type mismatch: NotEqual->true, others->false
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -1172,6 +1187,9 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
if (get_value.has_value()) {
|
||||
res_view[row_id] = UnaryCompare(
|
||||
get_value.value(), val, op_type);
|
||||
} else {
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -1187,6 +1205,9 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
if (get_value.has_value()) {
|
||||
res_view[row_id] = UnaryCompare(
|
||||
get_value.value(), val, op_type);
|
||||
} else {
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -1197,13 +1218,17 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() {
|
||||
if (get_value.has_value()) {
|
||||
res_view[row_id] = UnaryCompare(
|
||||
get_value.value(), val, op_type);
|
||||
} else {
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!get_value.has_value()) {
|
||||
res_view[row_id] = false;
|
||||
res_view[row_id] =
|
||||
(op_type == proto::plan::OpType::NotEqual);
|
||||
return;
|
||||
}
|
||||
res_view[row_id] =
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user