milvus/internal/parser/planparserv2/fill_expression_value.go
Buqian Zheng 724598d231
fix: handle mixed int64/float types in BinaryRangeExpr for JSON fields (#46681)
test: add unit tests for mixed int64/float types in BinaryRangeExpr

When processing binary range expressions (e.g., `x > 499 && x <= 512.0`)
on JSON/dynamic fields with expression templates, the lower and upper
bounds could have different numeric types (int64 vs float64). This
caused an assertion failure in GetValueFromProto when the template type
didn't match the actual proto value type.

Fixes:
1. Go side (fill_expression_value.go): Normalize numeric types for JSON
fields - if either bound is float and the other is int, convert the int
to float.

2. C++ side (BinaryRangeExpr.cpp):
   - Check both lower_val and upper_val types when dispatching
   - Use double template when either bound is float
- Use GetValueWithCastNumber instead of GetValueFromProto to safely
handle int64->double conversion

issue: #46588

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: JSON field binary-range expressions must present
numeric bounds to the evaluator with a consistent numeric type; if
either bound is floating-point, both bounds must be treated as double to
avoid proto-type mismatches during template instantiation.
- Bug fix (issue #46588 & concrete change): mixed int64/float bounds
could dispatch the wrong template (e.g.,
ExecRangeVisitorImplForJson<int64_t>) and trigger assertions in
GetValueFromProto. Fixes: (1) Go parser (FillBinaryRangeExpressionValue
in fill_expression_value.go) normalizes mixed JSON numeric bounds by
promoting the int bound to float; (2) C++ evaluator
(PhyBinaryRangeFilterExpr::Eval in BinaryRangeExpr.cpp) inspects both
lower_type and upper_type, sets use_double when either is float, selects
ExecRangeVisitorImplForJson<double> for mixed numeric cases, and
replaces GetValueFromProto with GetValueWithCastNumber so int64→double
conversions are handled safely.
- Removed / simplified logic: the previous evaluator branched on only
the lower bound's proto type and had separate index/non-index handling
for int64 vs float; that per-bound branching is replaced by unified
numeric handling (convert to double when needed) and a single numeric
path for index use — eliminating redundant, error-prone branches that
assumed homogeneous bound types.
- No data loss or regression: changes only promote int→double for
JSON-range comparisons when the other bound is float; integer-only and
float-only paths remain unchanged. Promotion uses IEEE double (C++
double and Go float64) and only affects template dispatch and
value-extraction paths; GetValueWithCastNumber safely converts int64 to
double and index/non-index code paths both normalize consistently,
preserving semantics for comparisons and avoiding assertion failures.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
2025-12-31 11:52:24 +08:00

265 lines
9.0 KiB
Go

package planparserv2
import (
"fmt"
"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
func FillExpressionValue(expr *planpb.Expr, templateValues map[string]*planpb.GenericValue) error {
if !expr.GetIsTemplate() {
return nil
}
switch e := expr.GetExpr().(type) {
case *planpb.Expr_TermExpr:
return FillTermExpressionValue(e.TermExpr, templateValues)
case *planpb.Expr_UnaryExpr:
return FillExpressionValue(e.UnaryExpr.GetChild(), templateValues)
case *planpb.Expr_BinaryExpr:
if err := FillExpressionValue(e.BinaryExpr.GetLeft(), templateValues); err != nil {
return err
}
return FillExpressionValue(e.BinaryExpr.GetRight(), templateValues)
case *planpb.Expr_UnaryRangeExpr:
return FillUnaryRangeExpressionValue(e.UnaryRangeExpr, templateValues)
case *planpb.Expr_BinaryRangeExpr:
return FillBinaryRangeExpressionValue(e.BinaryRangeExpr, templateValues)
case *planpb.Expr_BinaryArithOpEvalRangeExpr:
return FillBinaryArithOpEvalRangeExpressionValue(e.BinaryArithOpEvalRangeExpr, templateValues)
case *planpb.Expr_BinaryArithExpr:
if err := FillExpressionValue(e.BinaryArithExpr.GetLeft(), templateValues); err != nil {
return err
}
return FillExpressionValue(e.BinaryArithExpr.GetRight(), templateValues)
case *planpb.Expr_JsonContainsExpr:
return FillJSONContainsExpressionValue(e.JsonContainsExpr, templateValues)
case *planpb.Expr_RandomSampleExpr:
return FillExpressionValue(expr.GetExpr().(*planpb.Expr_RandomSampleExpr).RandomSampleExpr.GetPredicate(), templateValues)
case *planpb.Expr_ElementFilterExpr:
if err := FillExpressionValue(e.ElementFilterExpr.GetElementExpr(), templateValues); err != nil {
return err
}
if e.ElementFilterExpr.GetPredicate() != nil {
return FillExpressionValue(e.ElementFilterExpr.GetPredicate(), templateValues)
}
return nil
default:
return fmt.Errorf("this expression no need to fill placeholder with expr type: %T", e)
}
}
func FillTermExpressionValue(expr *planpb.TermExpr, templateValues map[string]*planpb.GenericValue) error {
value, ok := templateValues[expr.GetTemplateVariableName()]
if !ok && expr.GetValues() == nil {
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetTemplateVariableName())
}
if value == nil || value.GetArrayVal() == nil {
return fmt.Errorf("the value of term expression template variable {%s} is not array", expr.GetTemplateVariableName())
}
dataType := expr.GetColumnInfo().GetDataType()
if typeutil.IsArrayType(dataType) {
// Use element type if accessing array element
if len(expr.GetColumnInfo().GetNestedPath()) != 0 || expr.GetColumnInfo().GetIsElementLevel() {
dataType = expr.GetColumnInfo().GetElementType()
}
}
array := value.GetArrayVal().GetArray()
values := make([]*planpb.GenericValue, len(array))
for i, e := range array {
castedValue, err := castValue(dataType, e)
if err != nil {
return err
}
values[i] = castedValue
}
expr.Values = values
return nil
}
func FillUnaryRangeExpressionValue(expr *planpb.UnaryRangeExpr, templateValues map[string]*planpb.GenericValue) error {
value, ok := templateValues[expr.GetTemplateVariableName()]
if !ok {
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetTemplateVariableName())
}
dataType := expr.GetColumnInfo().GetDataType()
if typeutil.IsArrayType(dataType) {
// Use element type if accessing array element
if len(expr.GetColumnInfo().GetNestedPath()) != 0 || expr.GetColumnInfo().GetIsElementLevel() {
dataType = expr.GetColumnInfo().GetElementType()
}
}
castedValue, err := castValue(dataType, value)
if err != nil {
return err
}
expr.Value = castedValue
return nil
}
func FillBinaryRangeExpressionValue(expr *planpb.BinaryRangeExpr, templateValues map[string]*planpb.GenericValue) error {
var ok bool
dataType := expr.GetColumnInfo().GetDataType()
// Use element type if accessing array element
if typeutil.IsArrayType(dataType) && (len(expr.GetColumnInfo().GetNestedPath()) != 0 || expr.GetColumnInfo().GetIsElementLevel()) {
dataType = expr.GetColumnInfo().GetElementType()
}
lowerValue := expr.GetLowerValue()
if lowerValue == nil || expr.GetLowerTemplateVariableName() != "" {
lowerValue, ok = templateValues[expr.GetLowerTemplateVariableName()]
if !ok {
return fmt.Errorf("the lower value of expression template variable name {%s} is not found", expr.GetLowerTemplateVariableName())
}
castedLowerValue, err := castValue(dataType, lowerValue)
if err != nil {
return err
}
expr.LowerValue = castedLowerValue
}
upperValue := expr.GetUpperValue()
if upperValue == nil || expr.GetUpperTemplateVariableName() != "" {
upperValue, ok = templateValues[expr.GetUpperTemplateVariableName()]
if !ok {
return fmt.Errorf("the upper value of expression template variable name {%s} is not found", expr.GetUpperTemplateVariableName())
}
castedUpperValue, err := castValue(dataType, upperValue)
if err != nil {
return err
}
expr.UpperValue = castedUpperValue
}
// For JSON type, normalize numeric types to ensure both bounds have the same type.
// If one is float and the other is int, convert the int to float.
// This prevents type mismatch assertions in C++ expression execution.
if typeutil.IsJSONType(dataType) {
lowerVal := expr.GetLowerValue()
upperVal := expr.GetUpperValue()
lowerIsFloat := IsFloating(lowerVal)
upperIsFloat := IsFloating(upperVal)
lowerIsInt := IsInteger(lowerVal)
upperIsInt := IsInteger(upperVal)
if lowerIsFloat && upperIsInt {
expr.UpperValue = NewFloat(float64(upperVal.GetInt64Val()))
} else if lowerIsInt && upperIsFloat {
expr.LowerValue = NewFloat(float64(lowerVal.GetInt64Val()))
}
}
if !(expr.GetLowerInclusive() && expr.GetUpperInclusive()) {
if getGenericValue(GreaterEqual(lowerValue, upperValue)).GetBoolVal() {
return errors.New("invalid range: lowerbound is greater than upperbound")
}
} else {
if getGenericValue(Greater(lowerValue, upperValue)).GetBoolVal() {
return errors.New("invalid range: lowerbound is greater than upperbound")
}
}
return nil
}
func FillBinaryArithOpEvalRangeExpressionValue(expr *planpb.BinaryArithOpEvalRangeExpr, templateValues map[string]*planpb.GenericValue) error {
var dataType schemapb.DataType
var err error
var ok bool
if expr.ArithOp == planpb.ArithOpType_ArrayLength {
dataType = schemapb.DataType_Int64
} else {
operand := expr.GetRightOperand()
if operand == nil || expr.GetOperandTemplateVariableName() != "" {
operand, ok = templateValues[expr.GetOperandTemplateVariableName()]
if !ok {
return fmt.Errorf("the right operand value of expression template variable name {%s} is not found", expr.GetOperandTemplateVariableName())
}
}
operandExpr := toValueExpr(operand)
lDataType, rDataType := expr.GetColumnInfo().GetDataType(), operandExpr.dataType
if typeutil.IsArrayType(expr.GetColumnInfo().GetDataType()) {
lDataType = expr.GetColumnInfo().GetElementType()
}
if err = checkValidModArith(expr.GetArithOp(), expr.GetColumnInfo().GetDataType(), expr.GetColumnInfo().GetElementType(),
rDataType, schemapb.DataType_None); err != nil {
return err
}
if operand.GetArrayVal() != nil {
return errors.New("can not comparisons array directly")
}
dataType, err = getTargetType(lDataType, rDataType)
if err != nil {
return err
}
castedOperand, err := castValue(dataType, operand)
if err != nil {
return err
}
expr.RightOperand = castedOperand
}
value := expr.GetValue()
if expr.GetValue() == nil || expr.GetValueTemplateVariableName() != "" {
value, ok = templateValues[expr.GetValueTemplateVariableName()]
if !ok {
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetValueTemplateVariableName())
}
}
castedValue, err := castValue(dataType, value)
if err != nil {
return err
}
expr.Value = castedValue
return nil
}
func FillJSONContainsExpressionValue(expr *planpb.JSONContainsExpr, templateValues map[string]*planpb.GenericValue) error {
if expr.GetElements() != nil && expr.GetTemplateVariableName() == "" {
return nil
}
value, ok := templateValues[expr.GetTemplateVariableName()]
if !ok {
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetTemplateVariableName())
}
if err := checkContainsElement(toColumnExpr(expr.GetColumnInfo()), expr.GetOp(), value); err != nil {
return err
}
dataType := expr.GetColumnInfo().GetDataType()
if typeutil.IsArrayType(dataType) {
dataType = expr.GetColumnInfo().GetElementType()
}
if expr.GetOp() == planpb.JSONContainsExpr_Contains {
castedValue, err := castValue(dataType, value)
if err != nil {
return err
}
expr.Elements = append(expr.Elements, castedValue)
} else {
for _, e := range value.GetArrayVal().GetArray() {
castedValue, err := castValue(dataType, e)
if err != nil {
return err
}
expr.Elements = append(expr.Elements, castedValue)
}
}
return nil
}