From 724598d23153ab1d4f0588a786613a1d77bb0dd5 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Wed, 31 Dec 2025 11:52:24 +0800 Subject: [PATCH] fix: handle mixed int64/float types in BinaryRangeExpr for JSON fields (#46681) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test: add unit tests for mixed int64/float types in BinaryRangeExpr When processing binary range expressions (e.g., `x > 499 && x <= 512.0`) on JSON/dynamic fields with expression templates, the lower and upper bounds could have different numeric types (int64 vs float64). This caused an assertion failure in GetValueFromProto when the template type didn't match the actual proto value type. Fixes: 1. Go side (fill_expression_value.go): Normalize numeric types for JSON fields - if either bound is float and the other is int, convert the int to float. 2. C++ side (BinaryRangeExpr.cpp): - Check both lower_val and upper_val types when dispatching - Use double template when either bound is float - Use GetValueWithCastNumber instead of GetValueFromProto to safely handle int64->double conversion issue: #46588 - Core invariant: JSON field binary-range expressions must present numeric bounds to the evaluator with a consistent numeric type; if either bound is floating-point, both bounds must be treated as double to avoid proto-type mismatches during template instantiation. - Bug fix (issue #46588 & concrete change): mixed int64/float bounds could dispatch the wrong template (e.g., ExecRangeVisitorImplForJson) and trigger assertions in GetValueFromProto. Fixes: (1) Go parser (FillBinaryRangeExpressionValue in fill_expression_value.go) normalizes mixed JSON numeric bounds by promoting the int bound to float; (2) C++ evaluator (PhyBinaryRangeFilterExpr::Eval in BinaryRangeExpr.cpp) inspects both lower_type and upper_type, sets use_double when either is float, selects ExecRangeVisitorImplForJson for mixed numeric cases, and replaces GetValueFromProto with GetValueWithCastNumber so int64→double conversions are handled safely. - Removed / simplified logic: the previous evaluator branched on only the lower bound's proto type and had separate index/non-index handling for int64 vs float; that per-bound branching is replaced by unified numeric handling (convert to double when needed) and a single numeric path for index use — eliminating redundant, error-prone branches that assumed homogeneous bound types. - No data loss or regression: changes only promote int→double for JSON-range comparisons when the other bound is float; integer-only and float-only paths remain unchanged. Promotion uses IEEE double (C++ double and Go float64) and only affects template dispatch and value-extraction paths; GetValueWithCastNumber safely converts int64 to double and index/non-index code paths both normalize consistently, preserving semantics for comparisons and avoiding assertion failures. --------- Signed-off-by: Buqian Zheng --- .../src/exec/expression/BinaryRangeExpr.cpp | 111 +++++++++------- .../core/src/exec/expression/ExprTest.cpp | 119 ++++++++++++++++++ .../planparserv2/fill_expression_value.go | 18 +++ .../fill_expression_value_test.go | 80 ++++++++++++ 4 files changed, 280 insertions(+), 48 deletions(-) diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index a7a74797ac..10a488f810 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -77,61 +77,74 @@ PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { break; } case DataType::JSON: { - auto value_type = expr_->lower_val_.val_case(); + auto lower_type = expr_->lower_val_.val_case(); + auto upper_type = expr_->upper_val_.val_case(); + // For numeric types, if either bound is float, use double for both. + // This handles mixed int64/float cases properly. + bool use_double = + (lower_type == proto::plan::GenericValue::ValCase::kFloatVal || + upper_type == proto::plan::GenericValue::ValCase::kFloatVal); + bool is_numeric = + ((lower_type == proto::plan::GenericValue::ValCase::kInt64Val || + lower_type == + proto::plan::GenericValue::ValCase::kFloatVal) && + (upper_type == proto::plan::GenericValue::ValCase::kInt64Val || + upper_type == proto::plan::GenericValue::ValCase::kFloatVal)); + if (SegmentExpr::CanUseIndex() && !has_offset_input_) { - switch (value_type) { - case proto::plan::GenericValue::ValCase::kInt64Val: { - proto::plan::GenericValue double_lower_val; + if (is_numeric) { + // Convert both bounds to double for index path + proto::plan::GenericValue double_lower_val; + if (lower_type == + proto::plan::GenericValue::ValCase::kInt64Val) { double_lower_val.set_float_val( static_cast(expr_->lower_val_.int64_val())); - proto::plan::GenericValue double_upper_val; + } else { + double_lower_val.set_float_val( + expr_->lower_val_.float_val()); + } + proto::plan::GenericValue double_upper_val; + if (upper_type == + proto::plan::GenericValue::ValCase::kInt64Val) { double_upper_val.set_float_val( static_cast(expr_->upper_val_.int64_val())); + } else { + double_upper_val.set_float_val( + expr_->upper_val_.float_val()); + } - lower_arg_.SetValue(double_lower_val); - upper_arg_.SetValue(double_upper_val); - arg_inited_ = true; + lower_arg_.SetValue(double_lower_val); + upper_arg_.SetValue(double_upper_val); + arg_inited_ = true; - result = ExecRangeVisitorImplForIndex(); - break; - } - case proto::plan::GenericValue::ValCase::kFloatVal: { - result = ExecRangeVisitorImplForIndex(); - break; - } - case proto::plan::GenericValue::ValCase::kStringVal: { - result = - ExecRangeVisitorImplForJson(context); - break; - } - default: { - ThrowInfo(DataTypeInvalid, - fmt::format( - "unsupported value type {} in expression", - value_type)); - } + result = ExecRangeVisitorImplForIndex(); + } else if (lower_type == + proto::plan::GenericValue::ValCase::kStringVal) { + result = ExecRangeVisitorImplForJson(context); + } else { + ThrowInfo( + DataTypeInvalid, + fmt::format("unsupported value type {} in expression", + lower_type)); } } else { - switch (value_type) { - case proto::plan::GenericValue::ValCase::kInt64Val: { - result = ExecRangeVisitorImplForJson(context); - break; - } - case proto::plan::GenericValue::ValCase::kFloatVal: { - result = ExecRangeVisitorImplForJson(context); - break; - } - case proto::plan::GenericValue::ValCase::kStringVal: { - result = - ExecRangeVisitorImplForJson(context); - break; - } - default: { - ThrowInfo(DataTypeInvalid, - fmt::format( - "unsupported value type {} in expression", - value_type)); - } + if (is_numeric && use_double) { + // Use double when either bound is float + result = ExecRangeVisitorImplForJson(context); + } else if (lower_type == + proto::plan::GenericValue::ValCase::kInt64Val) { + result = ExecRangeVisitorImplForJson(context); + } else if (lower_type == + proto::plan::GenericValue::ValCase::kFloatVal) { + result = ExecRangeVisitorImplForJson(context); + } else if (lower_type == + proto::plan::GenericValue::ValCase::kStringVal) { + result = ExecRangeVisitorImplForJson(context); + } else { + ThrowInfo( + DataTypeInvalid, + fmt::format("unsupported value type {} in expression", + lower_type)); } } break; @@ -604,8 +617,10 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJsonStats() { auto pointer = milvus::index::JsonPointer(expr_->column_.nested_path_); bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; - ValueType val1 = GetValueFromProto(expr_->lower_val_); - ValueType val2 = GetValueFromProto(expr_->upper_val_); + // Use GetValueWithCastNumber to handle mixed int64/float types safely. + // This allows int64 values to be cast to double when ValueType is double. + ValueType val1 = GetValueWithCastNumber(expr_->lower_val_); + ValueType val2 = GetValueWithCastNumber(expr_->upper_val_); if (cached_index_chunk_id_ != 0 && segment_->type() == SegmentType::Sealed) { diff --git a/internal/core/src/exec/expression/ExprTest.cpp b/internal/core/src/exec/expression/ExprTest.cpp index eb2568fe41..3a0e22462d 100644 --- a/internal/core/src/exec/expression/ExprTest.cpp +++ b/internal/core/src/exec/expression/ExprTest.cpp @@ -17833,4 +17833,123 @@ TEST(ExprTest, TestCancellationHelper) { source.requestCancellation(); ASSERT_THROW(milvus::exec::checkCancellation(query_context.get()), folly::FutureCancellation); +} + +// Test for issue #46588: BinaryRangeExpr with mixed int64/float types for JSON fields +// When lower_val is int64 and upper_val is float (or vice versa), the code should +// handle the type mismatch correctly without assertion failures. +TEST(ExprTest, TestBinaryRangeExprMixedTypesForJSON) { + auto schema = std::make_shared(); + auto json_fid = schema->AddDebugField("json", DataType::JSON); + auto i64_fid = schema->AddDebugField("id", DataType::INT64); + schema->set_primary_field_id(i64_fid); + + auto seg = CreateGrowingSegment(schema, empty_index_meta); + int N = 100; + auto raw_data = DataGen(schema, N); + seg->PreInsert(N); + seg->Insert(0, + N, + raw_data.row_ids_.data(), + raw_data.timestamps_.data(), + raw_data.raw_); + auto seg_promote = dynamic_cast(seg.get()); + + // Test case 1: lower_val is int64, upper_val is float + // Expression: 100 < json["value"] < 500.5 + { + proto::plan::GenericValue lower_val; + lower_val.set_int64_val(100); // int64 type + proto::plan::GenericValue upper_val; + upper_val.set_float_val(500.5); // float type + + auto expr = std::make_shared( + expr::ColumnInfo(json_fid, DataType::JSON, {"value"}), + lower_val, + upper_val, + false, // lower_inclusive + false // upper_inclusive + ); + + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + + // Should not throw - the fix handles mixed types correctly + ASSERT_NO_THROW({ + auto result = milvus::query::ExecuteQueryExpr( + plan, seg_promote, N, MAX_TIMESTAMP); + }); + } + + // Test case 2: lower_val is float, upper_val is int64 + // Expression: 100.5 < json["value"] < 500 + { + proto::plan::GenericValue lower_val; + lower_val.set_float_val(100.5); // float type + proto::plan::GenericValue upper_val; + upper_val.set_int64_val(500); // int64 type + + auto expr = std::make_shared( + expr::ColumnInfo(json_fid, DataType::JSON, {"value"}), + lower_val, + upper_val, + false, // lower_inclusive + false // upper_inclusive + ); + + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + + // Should not throw - the fix handles mixed types correctly + ASSERT_NO_THROW({ + auto result = milvus::query::ExecuteQueryExpr( + plan, seg_promote, N, MAX_TIMESTAMP); + }); + } + + // Test case 3: both are int64 (baseline test) + { + proto::plan::GenericValue lower_val; + lower_val.set_int64_val(100); + proto::plan::GenericValue upper_val; + upper_val.set_int64_val(500); + + auto expr = std::make_shared( + expr::ColumnInfo(json_fid, DataType::JSON, {"value"}), + lower_val, + upper_val, + false, + false); + + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + + ASSERT_NO_THROW({ + auto result = milvus::query::ExecuteQueryExpr( + plan, seg_promote, N, MAX_TIMESTAMP); + }); + } + + // Test case 4: both are float (baseline test) + { + proto::plan::GenericValue lower_val; + lower_val.set_float_val(100.5); + proto::plan::GenericValue upper_val; + upper_val.set_float_val(500.5); + + auto expr = std::make_shared( + expr::ColumnInfo(json_fid, DataType::JSON, {"value"}), + lower_val, + upper_val, + false, + false); + + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + + ASSERT_NO_THROW({ + auto result = milvus::query::ExecuteQueryExpr( + plan, seg_promote, N, MAX_TIMESTAMP); + }); + } } \ No newline at end of file diff --git a/internal/parser/planparserv2/fill_expression_value.go b/internal/parser/planparserv2/fill_expression_value.go index 1a87303aa2..b7ae5c1b81 100644 --- a/internal/parser/planparserv2/fill_expression_value.go +++ b/internal/parser/planparserv2/fill_expression_value.go @@ -140,6 +140,24 @@ func FillBinaryRangeExpressionValue(expr *planpb.BinaryRangeExpr, templateValues expr.UpperValue = castedUpperValue } + // For JSON type, normalize numeric types to ensure both bounds have the same type. + // If one is float and the other is int, convert the int to float. + // This prevents type mismatch assertions in C++ expression execution. + if typeutil.IsJSONType(dataType) { + lowerVal := expr.GetLowerValue() + upperVal := expr.GetUpperValue() + lowerIsFloat := IsFloating(lowerVal) + upperIsFloat := IsFloating(upperVal) + lowerIsInt := IsInteger(lowerVal) + upperIsInt := IsInteger(upperVal) + + if lowerIsFloat && upperIsInt { + expr.UpperValue = NewFloat(float64(upperVal.GetInt64Val())) + } else if lowerIsInt && upperIsFloat { + expr.LowerValue = NewFloat(float64(lowerVal.GetInt64Val())) + } + } + if !(expr.GetLowerInclusive() && expr.GetUpperInclusive()) { if getGenericValue(GreaterEqual(lowerValue, upperValue)).GetBoolVal() { return errors.New("invalid range: lowerbound is greater than upperbound") diff --git a/internal/parser/planparserv2/fill_expression_value_test.go b/internal/parser/planparserv2/fill_expression_value_test.go index 1407b86249..296d9f756b 100644 --- a/internal/parser/planparserv2/fill_expression_value_test.go +++ b/internal/parser/planparserv2/fill_expression_value_test.go @@ -499,3 +499,83 @@ func (s *FillExpressionValueSuite) TestBinaryExpression() { } }) } + +func (s *FillExpressionValueSuite) TestBinaryRangeWithMixedNumericTypesForJSON() { + // Test that mixed int64/float types are normalized to float for JSON fields. + // This prevents assertion failures in C++ expression execution. + // Related issue: https://github.com/milvus-io/milvus/issues/46588 + schemaH := newTestSchemaHelper(s.T()) + + s.Run("lower int64 upper float should normalize to float", func() { + // A is a dynamic field (JSON type) + exprStr := `{min} < A < {max}` + templateValues := map[string]*schemapb.TemplateValue{ + "min": generateTemplateValue(schemapb.DataType_Int64, int64(499)), + "max": generateTemplateValue(schemapb.DataType_Double, float64(512.0)), + } + + expr, err := ParseExpr(schemaH, exprStr, templateValues) + s.NoError(err) + s.NotNil(expr) + + // Verify both bounds are normalized to float type + bre := expr.GetBinaryRangeExpr() + s.NotNil(bre, "expected BinaryRangeExpr") + s.Equal(float64(499), bre.GetLowerValue().GetFloatVal()) + s.Equal(float64(512.0), bre.GetUpperValue().GetFloatVal()) + }) + + s.Run("lower float upper int64 should normalize to float", func() { + exprStr := `{min} < A < {max}` + templateValues := map[string]*schemapb.TemplateValue{ + "min": generateTemplateValue(schemapb.DataType_Double, float64(10.5)), + "max": generateTemplateValue(schemapb.DataType_Int64, int64(100)), + } + + expr, err := ParseExpr(schemaH, exprStr, templateValues) + s.NoError(err) + s.NotNil(expr) + + // Verify both bounds are normalized to float type + bre := expr.GetBinaryRangeExpr() + s.NotNil(bre, "expected BinaryRangeExpr") + s.Equal(float64(10.5), bre.GetLowerValue().GetFloatVal()) + s.Equal(float64(100), bre.GetUpperValue().GetFloatVal()) + }) + + s.Run("both int64 should remain int64", func() { + exprStr := `{min} < A < {max}` + templateValues := map[string]*schemapb.TemplateValue{ + "min": generateTemplateValue(schemapb.DataType_Int64, int64(10)), + "max": generateTemplateValue(schemapb.DataType_Int64, int64(100)), + } + + expr, err := ParseExpr(schemaH, exprStr, templateValues) + s.NoError(err) + s.NotNil(expr) + + // Verify both bounds remain int64 type + bre := expr.GetBinaryRangeExpr() + s.NotNil(bre, "expected BinaryRangeExpr") + s.Equal(int64(10), bre.GetLowerValue().GetInt64Val()) + s.Equal(int64(100), bre.GetUpperValue().GetInt64Val()) + }) + + s.Run("both float should remain float", func() { + exprStr := `{min} < A < {max}` + templateValues := map[string]*schemapb.TemplateValue{ + "min": generateTemplateValue(schemapb.DataType_Double, float64(10.5)), + "max": generateTemplateValue(schemapb.DataType_Double, float64(100.5)), + } + + expr, err := ParseExpr(schemaH, exprStr, templateValues) + s.NoError(err) + s.NotNil(expr) + + // Verify both bounds remain float type + bre := expr.GetBinaryRangeExpr() + s.NotNil(bre, "expected BinaryRangeExpr") + s.Equal(float64(10.5), bre.GetLowerValue().GetFloatVal()) + s.Equal(float64(100.5), bre.GetUpperValue().GetFloatVal()) + }) +}