fix: handle mixed int64/float types in BinaryRangeExpr for JSON fields (#46681)

test: add unit tests for mixed int64/float types in BinaryRangeExpr

When processing binary range expressions (e.g., `x > 499 && x <= 512.0`)
on JSON/dynamic fields with expression templates, the lower and upper
bounds could have different numeric types (int64 vs float64). This
caused an assertion failure in GetValueFromProto when the template type
didn't match the actual proto value type.

Fixes:
1. Go side (fill_expression_value.go): Normalize numeric types for JSON
fields - if either bound is float and the other is int, convert the int
to float.

2. C++ side (BinaryRangeExpr.cpp):
   - Check both lower_val and upper_val types when dispatching
   - Use double template when either bound is float
- Use GetValueWithCastNumber instead of GetValueFromProto to safely
handle int64->double conversion

issue: #46588

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: JSON field binary-range expressions must present
numeric bounds to the evaluator with a consistent numeric type; if
either bound is floating-point, both bounds must be treated as double to
avoid proto-type mismatches during template instantiation.
- Bug fix (issue #46588 & concrete change): mixed int64/float bounds
could dispatch the wrong template (e.g.,
ExecRangeVisitorImplForJson<int64_t>) and trigger assertions in
GetValueFromProto. Fixes: (1) Go parser (FillBinaryRangeExpressionValue
in fill_expression_value.go) normalizes mixed JSON numeric bounds by
promoting the int bound to float; (2) C++ evaluator
(PhyBinaryRangeFilterExpr::Eval in BinaryRangeExpr.cpp) inspects both
lower_type and upper_type, sets use_double when either is float, selects
ExecRangeVisitorImplForJson<double> for mixed numeric cases, and
replaces GetValueFromProto with GetValueWithCastNumber so int64→double
conversions are handled safely.
- Removed / simplified logic: the previous evaluator branched on only
the lower bound's proto type and had separate index/non-index handling
for int64 vs float; that per-bound branching is replaced by unified
numeric handling (convert to double when needed) and a single numeric
path for index use — eliminating redundant, error-prone branches that
assumed homogeneous bound types.
- No data loss or regression: changes only promote int→double for
JSON-range comparisons when the other bound is float; integer-only and
float-only paths remain unchanged. Promotion uses IEEE double (C++
double and Go float64) and only affects template dispatch and
value-extraction paths; GetValueWithCastNumber safely converts int64 to
double and index/non-index code paths both normalize consistently,
preserving semantics for comparisons and avoiding assertion failures.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
This commit is contained in:
Buqian Zheng 2025-12-31 11:52:24 +08:00 committed by GitHub
parent 1100d8f7e2
commit 724598d231
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 280 additions and 48 deletions

View File

@ -77,61 +77,74 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
break;
}
case DataType::JSON: {
auto value_type = expr_->lower_val_.val_case();
auto lower_type = expr_->lower_val_.val_case();
auto upper_type = expr_->upper_val_.val_case();
// For numeric types, if either bound is float, use double for both.
// This handles mixed int64/float cases properly.
bool use_double =
(lower_type == proto::plan::GenericValue::ValCase::kFloatVal ||
upper_type == proto::plan::GenericValue::ValCase::kFloatVal);
bool is_numeric =
((lower_type == proto::plan::GenericValue::ValCase::kInt64Val ||
lower_type ==
proto::plan::GenericValue::ValCase::kFloatVal) &&
(upper_type == proto::plan::GenericValue::ValCase::kInt64Val ||
upper_type == proto::plan::GenericValue::ValCase::kFloatVal));
if (SegmentExpr::CanUseIndex() && !has_offset_input_) {
switch (value_type) {
case proto::plan::GenericValue::ValCase::kInt64Val: {
proto::plan::GenericValue double_lower_val;
if (is_numeric) {
// Convert both bounds to double for index path
proto::plan::GenericValue double_lower_val;
if (lower_type ==
proto::plan::GenericValue::ValCase::kInt64Val) {
double_lower_val.set_float_val(
static_cast<double>(expr_->lower_val_.int64_val()));
proto::plan::GenericValue double_upper_val;
} else {
double_lower_val.set_float_val(
expr_->lower_val_.float_val());
}
proto::plan::GenericValue double_upper_val;
if (upper_type ==
proto::plan::GenericValue::ValCase::kInt64Val) {
double_upper_val.set_float_val(
static_cast<double>(expr_->upper_val_.int64_val()));
} else {
double_upper_val.set_float_val(
expr_->upper_val_.float_val());
}
lower_arg_.SetValue<double>(double_lower_val);
upper_arg_.SetValue<double>(double_upper_val);
arg_inited_ = true;
lower_arg_.SetValue<double>(double_lower_val);
upper_arg_.SetValue<double>(double_upper_val);
arg_inited_ = true;
result = ExecRangeVisitorImplForIndex<double>();
break;
}
case proto::plan::GenericValue::ValCase::kFloatVal: {
result = ExecRangeVisitorImplForIndex<double>();
break;
}
case proto::plan::GenericValue::ValCase::kStringVal: {
result =
ExecRangeVisitorImplForJson<std::string>(context);
break;
}
default: {
ThrowInfo(DataTypeInvalid,
fmt::format(
"unsupported value type {} in expression",
value_type));
}
result = ExecRangeVisitorImplForIndex<double>();
} else if (lower_type ==
proto::plan::GenericValue::ValCase::kStringVal) {
result = ExecRangeVisitorImplForJson<std::string>(context);
} else {
ThrowInfo(
DataTypeInvalid,
fmt::format("unsupported value type {} in expression",
lower_type));
}
} else {
switch (value_type) {
case proto::plan::GenericValue::ValCase::kInt64Val: {
result = ExecRangeVisitorImplForJson<int64_t>(context);
break;
}
case proto::plan::GenericValue::ValCase::kFloatVal: {
result = ExecRangeVisitorImplForJson<double>(context);
break;
}
case proto::plan::GenericValue::ValCase::kStringVal: {
result =
ExecRangeVisitorImplForJson<std::string>(context);
break;
}
default: {
ThrowInfo(DataTypeInvalid,
fmt::format(
"unsupported value type {} in expression",
value_type));
}
if (is_numeric && use_double) {
// Use double when either bound is float
result = ExecRangeVisitorImplForJson<double>(context);
} else if (lower_type ==
proto::plan::GenericValue::ValCase::kInt64Val) {
result = ExecRangeVisitorImplForJson<int64_t>(context);
} else if (lower_type ==
proto::plan::GenericValue::ValCase::kFloatVal) {
result = ExecRangeVisitorImplForJson<double>(context);
} else if (lower_type ==
proto::plan::GenericValue::ValCase::kStringVal) {
result = ExecRangeVisitorImplForJson<std::string>(context);
} else {
ThrowInfo(
DataTypeInvalid,
fmt::format("unsupported value type {} in expression",
lower_type));
}
}
break;
@ -604,8 +617,10 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJsonStats() {
auto pointer = milvus::index::JsonPointer(expr_->column_.nested_path_);
bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_;
ValueType val1 = GetValueFromProto<ValueType>(expr_->lower_val_);
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
// Use GetValueWithCastNumber to handle mixed int64/float types safely.
// This allows int64 values to be cast to double when ValueType is double.
ValueType val1 = GetValueWithCastNumber<ValueType>(expr_->lower_val_);
ValueType val2 = GetValueWithCastNumber<ValueType>(expr_->upper_val_);
if (cached_index_chunk_id_ != 0 &&
segment_->type() == SegmentType::Sealed) {

View File

@ -17833,4 +17833,123 @@ TEST(ExprTest, TestCancellationHelper) {
source.requestCancellation();
ASSERT_THROW(milvus::exec::checkCancellation(query_context.get()),
folly::FutureCancellation);
}
// Test for issue #46588: BinaryRangeExpr with mixed int64/float types for JSON fields
// When lower_val is int64 and upper_val is float (or vice versa), the code should
// handle the type mismatch correctly without assertion failures.
TEST(ExprTest, TestBinaryRangeExprMixedTypesForJSON) {
auto schema = std::make_shared<Schema>();
auto json_fid = schema->AddDebugField("json", DataType::JSON);
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
schema->set_primary_field_id(i64_fid);
auto seg = CreateGrowingSegment(schema, empty_index_meta);
int N = 100;
auto raw_data = DataGen(schema, N);
seg->PreInsert(N);
seg->Insert(0,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
// Test case 1: lower_val is int64, upper_val is float
// Expression: 100 < json["value"] < 500.5
{
proto::plan::GenericValue lower_val;
lower_val.set_int64_val(100); // int64 type
proto::plan::GenericValue upper_val;
upper_val.set_float_val(500.5); // float type
auto expr = std::make_shared<expr::BinaryRangeFilterExpr>(
expr::ColumnInfo(json_fid, DataType::JSON, {"value"}),
lower_val,
upper_val,
false, // lower_inclusive
false // upper_inclusive
);
auto plan =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
// Should not throw - the fix handles mixed types correctly
ASSERT_NO_THROW({
auto result = milvus::query::ExecuteQueryExpr(
plan, seg_promote, N, MAX_TIMESTAMP);
});
}
// Test case 2: lower_val is float, upper_val is int64
// Expression: 100.5 < json["value"] < 500
{
proto::plan::GenericValue lower_val;
lower_val.set_float_val(100.5); // float type
proto::plan::GenericValue upper_val;
upper_val.set_int64_val(500); // int64 type
auto expr = std::make_shared<expr::BinaryRangeFilterExpr>(
expr::ColumnInfo(json_fid, DataType::JSON, {"value"}),
lower_val,
upper_val,
false, // lower_inclusive
false // upper_inclusive
);
auto plan =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
// Should not throw - the fix handles mixed types correctly
ASSERT_NO_THROW({
auto result = milvus::query::ExecuteQueryExpr(
plan, seg_promote, N, MAX_TIMESTAMP);
});
}
// Test case 3: both are int64 (baseline test)
{
proto::plan::GenericValue lower_val;
lower_val.set_int64_val(100);
proto::plan::GenericValue upper_val;
upper_val.set_int64_val(500);
auto expr = std::make_shared<expr::BinaryRangeFilterExpr>(
expr::ColumnInfo(json_fid, DataType::JSON, {"value"}),
lower_val,
upper_val,
false,
false);
auto plan =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
ASSERT_NO_THROW({
auto result = milvus::query::ExecuteQueryExpr(
plan, seg_promote, N, MAX_TIMESTAMP);
});
}
// Test case 4: both are float (baseline test)
{
proto::plan::GenericValue lower_val;
lower_val.set_float_val(100.5);
proto::plan::GenericValue upper_val;
upper_val.set_float_val(500.5);
auto expr = std::make_shared<expr::BinaryRangeFilterExpr>(
expr::ColumnInfo(json_fid, DataType::JSON, {"value"}),
lower_val,
upper_val,
false,
false);
auto plan =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
ASSERT_NO_THROW({
auto result = milvus::query::ExecuteQueryExpr(
plan, seg_promote, N, MAX_TIMESTAMP);
});
}
}

View File

@ -140,6 +140,24 @@ func FillBinaryRangeExpressionValue(expr *planpb.BinaryRangeExpr, templateValues
expr.UpperValue = castedUpperValue
}
// For JSON type, normalize numeric types to ensure both bounds have the same type.
// If one is float and the other is int, convert the int to float.
// This prevents type mismatch assertions in C++ expression execution.
if typeutil.IsJSONType(dataType) {
lowerVal := expr.GetLowerValue()
upperVal := expr.GetUpperValue()
lowerIsFloat := IsFloating(lowerVal)
upperIsFloat := IsFloating(upperVal)
lowerIsInt := IsInteger(lowerVal)
upperIsInt := IsInteger(upperVal)
if lowerIsFloat && upperIsInt {
expr.UpperValue = NewFloat(float64(upperVal.GetInt64Val()))
} else if lowerIsInt && upperIsFloat {
expr.LowerValue = NewFloat(float64(lowerVal.GetInt64Val()))
}
}
if !(expr.GetLowerInclusive() && expr.GetUpperInclusive()) {
if getGenericValue(GreaterEqual(lowerValue, upperValue)).GetBoolVal() {
return errors.New("invalid range: lowerbound is greater than upperbound")

View File

@ -499,3 +499,83 @@ func (s *FillExpressionValueSuite) TestBinaryExpression() {
}
})
}
func (s *FillExpressionValueSuite) TestBinaryRangeWithMixedNumericTypesForJSON() {
// Test that mixed int64/float types are normalized to float for JSON fields.
// This prevents assertion failures in C++ expression execution.
// Related issue: https://github.com/milvus-io/milvus/issues/46588
schemaH := newTestSchemaHelper(s.T())
s.Run("lower int64 upper float should normalize to float", func() {
// A is a dynamic field (JSON type)
exprStr := `{min} < A < {max}`
templateValues := map[string]*schemapb.TemplateValue{
"min": generateTemplateValue(schemapb.DataType_Int64, int64(499)),
"max": generateTemplateValue(schemapb.DataType_Double, float64(512.0)),
}
expr, err := ParseExpr(schemaH, exprStr, templateValues)
s.NoError(err)
s.NotNil(expr)
// Verify both bounds are normalized to float type
bre := expr.GetBinaryRangeExpr()
s.NotNil(bre, "expected BinaryRangeExpr")
s.Equal(float64(499), bre.GetLowerValue().GetFloatVal())
s.Equal(float64(512.0), bre.GetUpperValue().GetFloatVal())
})
s.Run("lower float upper int64 should normalize to float", func() {
exprStr := `{min} < A < {max}`
templateValues := map[string]*schemapb.TemplateValue{
"min": generateTemplateValue(schemapb.DataType_Double, float64(10.5)),
"max": generateTemplateValue(schemapb.DataType_Int64, int64(100)),
}
expr, err := ParseExpr(schemaH, exprStr, templateValues)
s.NoError(err)
s.NotNil(expr)
// Verify both bounds are normalized to float type
bre := expr.GetBinaryRangeExpr()
s.NotNil(bre, "expected BinaryRangeExpr")
s.Equal(float64(10.5), bre.GetLowerValue().GetFloatVal())
s.Equal(float64(100), bre.GetUpperValue().GetFloatVal())
})
s.Run("both int64 should remain int64", func() {
exprStr := `{min} < A < {max}`
templateValues := map[string]*schemapb.TemplateValue{
"min": generateTemplateValue(schemapb.DataType_Int64, int64(10)),
"max": generateTemplateValue(schemapb.DataType_Int64, int64(100)),
}
expr, err := ParseExpr(schemaH, exprStr, templateValues)
s.NoError(err)
s.NotNil(expr)
// Verify both bounds remain int64 type
bre := expr.GetBinaryRangeExpr()
s.NotNil(bre, "expected BinaryRangeExpr")
s.Equal(int64(10), bre.GetLowerValue().GetInt64Val())
s.Equal(int64(100), bre.GetUpperValue().GetInt64Val())
})
s.Run("both float should remain float", func() {
exprStr := `{min} < A < {max}`
templateValues := map[string]*schemapb.TemplateValue{
"min": generateTemplateValue(schemapb.DataType_Double, float64(10.5)),
"max": generateTemplateValue(schemapb.DataType_Double, float64(100.5)),
}
expr, err := ParseExpr(schemaH, exprStr, templateValues)
s.NoError(err)
s.NotNil(expr)
// Verify both bounds remain float type
bre := expr.GetBinaryRangeExpr()
s.NotNil(bre, "expected BinaryRangeExpr")
s.Equal(float64(10.5), bre.GetLowerValue().GetFloatVal())
s.Equal(float64(100.5), bre.GetUpperValue().GetFloatVal())
})
}