mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: https://github.com/milvus-io/milvus/issues/46517 ref: https://github.com/milvus-io/milvus/issues/42148 This PR supports match operator family with struct array and brute force search only. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant: match operators only target struct-array element-level predicates and assume callers provide a correct row_start so element indices form a contiguous range; IArrayOffsets implementations convert row-level bitmaps/rows (starting at row_start) into element-level bitmaps or a contiguous element-offset vector used by brute-force evaluation. - New capability added: end-to-end support for MATCH_* semantics (match_any, match_all, match_least, match_most, match_exact) — parser (grammar + proto), planner (ParseMatchExprs), expr model (expr::MatchExpr), compilation (Expr→PhyMatchFilterExpr), execution (PhyMatchFilterExpr::Eval uses element offsets/bitmaps), and unit tests (MatchExprTest + parser tests). Implementation currently works for struct-array inputs and uses brute-force element counting via RowBitsetToElementOffsets/RowBitsetToElementBitset. - Logic removed or simplified and why: removed the ad-hoc DocBitsetToElementOffsets helper and consolidated offset/bitset derivation into IArrayOffsets::RowBitsetToElementOffsets and a row_start-aware RowBitsetToElementBitset, and removed EvalCtx overloads that embedded ExprSet (now EvalCtx(exec_ctx, offset_input)). This centralizes array-layout logic in ArrayOffsets and removes duplicated offset conversion and EvalCtx variants that were redundant for element-level evaluation. - No data loss / no behavior regression: persistent formats are unchanged (no proto storage or on-disk layout changed); callers were updated to supply row_start and now route through the centralized ArrayOffsets APIs which still use the authoritative row_to_element_start_ mapping, preserving exact element index mappings. Eval logic changes are limited to in-memory plumbing (how offsets/bitmaps are produced and how EvalCtx is constructed); expression evaluation still invokes exprs_->Eval where needed, so existing behavior and stored data remain intact. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com> Signed-off-by: SpadeA-Tang <tangchenjie1210@gmail.com>
247 lines
8.3 KiB
Go
247 lines
8.3 KiB
Go
package planparserv2
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
func FillExpressionValue(expr *planpb.Expr, templateValues map[string]*planpb.GenericValue) error {
|
|
if !expr.GetIsTemplate() {
|
|
return nil
|
|
}
|
|
|
|
switch e := expr.GetExpr().(type) {
|
|
case *planpb.Expr_TermExpr:
|
|
return FillTermExpressionValue(e.TermExpr, templateValues)
|
|
case *planpb.Expr_UnaryExpr:
|
|
return FillExpressionValue(e.UnaryExpr.GetChild(), templateValues)
|
|
case *planpb.Expr_BinaryExpr:
|
|
if err := FillExpressionValue(e.BinaryExpr.GetLeft(), templateValues); err != nil {
|
|
return err
|
|
}
|
|
return FillExpressionValue(e.BinaryExpr.GetRight(), templateValues)
|
|
case *planpb.Expr_UnaryRangeExpr:
|
|
return FillUnaryRangeExpressionValue(e.UnaryRangeExpr, templateValues)
|
|
case *planpb.Expr_BinaryRangeExpr:
|
|
return FillBinaryRangeExpressionValue(e.BinaryRangeExpr, templateValues)
|
|
case *planpb.Expr_BinaryArithOpEvalRangeExpr:
|
|
return FillBinaryArithOpEvalRangeExpressionValue(e.BinaryArithOpEvalRangeExpr, templateValues)
|
|
case *planpb.Expr_BinaryArithExpr:
|
|
if err := FillExpressionValue(e.BinaryArithExpr.GetLeft(), templateValues); err != nil {
|
|
return err
|
|
}
|
|
return FillExpressionValue(e.BinaryArithExpr.GetRight(), templateValues)
|
|
case *planpb.Expr_JsonContainsExpr:
|
|
return FillJSONContainsExpressionValue(e.JsonContainsExpr, templateValues)
|
|
case *planpb.Expr_RandomSampleExpr:
|
|
return FillExpressionValue(expr.GetExpr().(*planpb.Expr_RandomSampleExpr).RandomSampleExpr.GetPredicate(), templateValues)
|
|
case *planpb.Expr_ElementFilterExpr:
|
|
if err := FillExpressionValue(e.ElementFilterExpr.GetElementExpr(), templateValues); err != nil {
|
|
return err
|
|
}
|
|
if e.ElementFilterExpr.GetPredicate() != nil {
|
|
return FillExpressionValue(e.ElementFilterExpr.GetPredicate(), templateValues)
|
|
}
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("this expression no need to fill placeholder with expr type: %T", e)
|
|
}
|
|
}
|
|
|
|
func FillTermExpressionValue(expr *planpb.TermExpr, templateValues map[string]*planpb.GenericValue) error {
|
|
value, ok := templateValues[expr.GetTemplateVariableName()]
|
|
if !ok && expr.GetValues() == nil {
|
|
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetTemplateVariableName())
|
|
}
|
|
|
|
if value == nil || value.GetArrayVal() == nil {
|
|
return fmt.Errorf("the value of term expression template variable {%s} is not array", expr.GetTemplateVariableName())
|
|
}
|
|
dataType := expr.GetColumnInfo().GetDataType()
|
|
if typeutil.IsArrayType(dataType) {
|
|
// Use element type if accessing array element
|
|
if len(expr.GetColumnInfo().GetNestedPath()) != 0 || expr.GetColumnInfo().GetIsElementLevel() {
|
|
dataType = expr.GetColumnInfo().GetElementType()
|
|
}
|
|
}
|
|
|
|
array := value.GetArrayVal().GetArray()
|
|
values := make([]*planpb.GenericValue, len(array))
|
|
for i, e := range array {
|
|
castedValue, err := castValue(dataType, e)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
values[i] = castedValue
|
|
}
|
|
expr.Values = values
|
|
|
|
return nil
|
|
}
|
|
|
|
func FillUnaryRangeExpressionValue(expr *planpb.UnaryRangeExpr, templateValues map[string]*planpb.GenericValue) error {
|
|
value, ok := templateValues[expr.GetTemplateVariableName()]
|
|
if !ok {
|
|
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetTemplateVariableName())
|
|
}
|
|
|
|
dataType := expr.GetColumnInfo().GetDataType()
|
|
if typeutil.IsArrayType(dataType) {
|
|
// Use element type if accessing array element
|
|
if len(expr.GetColumnInfo().GetNestedPath()) != 0 || expr.GetColumnInfo().GetIsElementLevel() {
|
|
dataType = expr.GetColumnInfo().GetElementType()
|
|
}
|
|
}
|
|
|
|
castedValue, err := castValue(dataType, value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
expr.Value = castedValue
|
|
return nil
|
|
}
|
|
|
|
func FillBinaryRangeExpressionValue(expr *planpb.BinaryRangeExpr, templateValues map[string]*planpb.GenericValue) error {
|
|
var ok bool
|
|
dataType := expr.GetColumnInfo().GetDataType()
|
|
// Use element type if accessing array element
|
|
if typeutil.IsArrayType(dataType) && (len(expr.GetColumnInfo().GetNestedPath()) != 0 || expr.GetColumnInfo().GetIsElementLevel()) {
|
|
dataType = expr.GetColumnInfo().GetElementType()
|
|
}
|
|
lowerValue := expr.GetLowerValue()
|
|
if lowerValue == nil || expr.GetLowerTemplateVariableName() != "" {
|
|
lowerValue, ok = templateValues[expr.GetLowerTemplateVariableName()]
|
|
if !ok {
|
|
return fmt.Errorf("the lower value of expression template variable name {%s} is not found", expr.GetLowerTemplateVariableName())
|
|
}
|
|
castedLowerValue, err := castValue(dataType, lowerValue)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
expr.LowerValue = castedLowerValue
|
|
}
|
|
|
|
upperValue := expr.GetUpperValue()
|
|
if upperValue == nil || expr.GetUpperTemplateVariableName() != "" {
|
|
upperValue, ok = templateValues[expr.GetUpperTemplateVariableName()]
|
|
if !ok {
|
|
return fmt.Errorf("the upper value of expression template variable name {%s} is not found", expr.GetUpperTemplateVariableName())
|
|
}
|
|
|
|
castedUpperValue, err := castValue(dataType, upperValue)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
expr.UpperValue = castedUpperValue
|
|
}
|
|
|
|
if !(expr.GetLowerInclusive() && expr.GetUpperInclusive()) {
|
|
if getGenericValue(GreaterEqual(lowerValue, upperValue)).GetBoolVal() {
|
|
return errors.New("invalid range: lowerbound is greater than upperbound")
|
|
}
|
|
} else {
|
|
if getGenericValue(Greater(lowerValue, upperValue)).GetBoolVal() {
|
|
return errors.New("invalid range: lowerbound is greater than upperbound")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func FillBinaryArithOpEvalRangeExpressionValue(expr *planpb.BinaryArithOpEvalRangeExpr, templateValues map[string]*planpb.GenericValue) error {
|
|
var dataType schemapb.DataType
|
|
var err error
|
|
var ok bool
|
|
|
|
if expr.ArithOp == planpb.ArithOpType_ArrayLength {
|
|
dataType = schemapb.DataType_Int64
|
|
} else {
|
|
operand := expr.GetRightOperand()
|
|
if operand == nil || expr.GetOperandTemplateVariableName() != "" {
|
|
operand, ok = templateValues[expr.GetOperandTemplateVariableName()]
|
|
if !ok {
|
|
return fmt.Errorf("the right operand value of expression template variable name {%s} is not found", expr.GetOperandTemplateVariableName())
|
|
}
|
|
}
|
|
|
|
operandExpr := toValueExpr(operand)
|
|
lDataType, rDataType := expr.GetColumnInfo().GetDataType(), operandExpr.dataType
|
|
if typeutil.IsArrayType(expr.GetColumnInfo().GetDataType()) {
|
|
lDataType = expr.GetColumnInfo().GetElementType()
|
|
}
|
|
|
|
if err = checkValidModArith(expr.GetArithOp(), expr.GetColumnInfo().GetDataType(), expr.GetColumnInfo().GetElementType(),
|
|
rDataType, schemapb.DataType_None); err != nil {
|
|
return err
|
|
}
|
|
|
|
if operand.GetArrayVal() != nil {
|
|
return errors.New("can not comparisons array directly")
|
|
}
|
|
|
|
dataType, err = getTargetType(lDataType, rDataType)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
castedOperand, err := castValue(dataType, operand)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
expr.RightOperand = castedOperand
|
|
}
|
|
|
|
value := expr.GetValue()
|
|
if expr.GetValue() == nil || expr.GetValueTemplateVariableName() != "" {
|
|
value, ok = templateValues[expr.GetValueTemplateVariableName()]
|
|
if !ok {
|
|
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetValueTemplateVariableName())
|
|
}
|
|
}
|
|
castedValue, err := castValue(dataType, value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
expr.Value = castedValue
|
|
|
|
return nil
|
|
}
|
|
|
|
func FillJSONContainsExpressionValue(expr *planpb.JSONContainsExpr, templateValues map[string]*planpb.GenericValue) error {
|
|
if expr.GetElements() != nil && expr.GetTemplateVariableName() == "" {
|
|
return nil
|
|
}
|
|
value, ok := templateValues[expr.GetTemplateVariableName()]
|
|
if !ok {
|
|
return fmt.Errorf("the value of expression template variable name {%s} is not found", expr.GetTemplateVariableName())
|
|
}
|
|
if err := checkContainsElement(toColumnExpr(expr.GetColumnInfo()), expr.GetOp(), value); err != nil {
|
|
return err
|
|
}
|
|
dataType := expr.GetColumnInfo().GetDataType()
|
|
if typeutil.IsArrayType(dataType) {
|
|
dataType = expr.GetColumnInfo().GetElementType()
|
|
}
|
|
if expr.GetOp() == planpb.JSONContainsExpr_Contains {
|
|
castedValue, err := castValue(dataType, value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
expr.Elements = append(expr.Elements, castedValue)
|
|
} else {
|
|
for _, e := range value.GetArrayVal().GetArray() {
|
|
castedValue, err := castValue(dataType, e)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
expr.Elements = append(expr.Elements, castedValue)
|
|
}
|
|
}
|
|
return nil
|
|
}
|