milvus/internal/util/exprutil/expr_checker.go
Chun Han f34eb3ae90
enhance: remove useless code(#30376) (#45685)
related: #30376

Signed-off-by: MrPresent-Han <chun.han@gmail.com>
Co-authored-by: MrPresent-Han <chun.han@gmail.com>
2025-11-27 10:43:07 +08:00

401 lines
12 KiB
Go

package exprutil
import (
"math"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type KeyType int64
const (
PartitionKey KeyType = iota
ClusteringKey KeyType = PartitionKey + 1
)
func ParseExprFromPlan(plan *planpb.PlanNode) (*planpb.Expr, error) {
node := plan.GetNode()
if node == nil {
return nil, errors.New("can't get expr from empty plan node")
}
var expr *planpb.Expr
switch node := node.(type) {
case *planpb.PlanNode_VectorAnns:
expr = node.VectorAnns.GetPredicates()
case *planpb.PlanNode_Query:
expr = node.Query.GetPredicates()
default:
return nil, errors.New("unsupported plan node type")
}
return expr, nil
}
// ParsePartitionKeysFromBinaryExpr parses BinaryExpr is prunble
// if true, returns candidate key values base on the Logical op type.
func ParsePartitionKeysFromBinaryExpr(expr *planpb.BinaryExpr, keyType KeyType) ([]*planpb.GenericValue, bool) {
lCandidates, lPrunable := ParseKeysFromExpr(expr.Left, keyType)
rCandidate, rPrunable := ParseKeysFromExpr(expr.Right, keyType)
if expr.Op == planpb.BinaryExpr_LogicalAnd {
switch {
case lPrunable && rPrunable:
// case: partition_key in [7, 8] && partition_key in [8, 9]
// return [7, 8] intersect [8, 9] = [8]
return IntersectKeys(lCandidates, rCandidate), true
case lPrunable && !rPrunable:
return lCandidates, true
case !lPrunable && rPrunable:
return rCandidate, true
case !lPrunable && !rPrunable:
return nil, false
}
}
if expr.Op == planpb.BinaryExpr_LogicalOr {
if lPrunable && rPrunable {
// case: partition_key in [7, 8] || partition_key in [8, 9]
// return [7, 8] union [8, 9] = [7, 8, 9]
return append(lCandidates, rCandidate...), true
}
return nil, false
}
return nil, false
}
// ParsePartitionKeysFromUnaryExpr parses UnaryExpr is prunble.
// currently, only "Not" is supported, which means unary expression is always not prunable.
func ParsePartitionKeysFromUnaryExpr(expr *planpb.UnaryExpr, keyType KeyType) ([]*planpb.GenericValue, bool) {
return nil, false
}
// ParsePartitionKeysFromTermExpr parses TermExpr is prunble.
// it checks if the term expression is a partition key or clustering key.
func ParsePartitionKeysFromTermExpr(expr *planpb.TermExpr, keyType KeyType) ([]*planpb.GenericValue, bool) {
if keyType == PartitionKey && expr.GetColumnInfo().GetIsPartitionKey() {
return expr.GetValues(), true
} else if keyType == ClusteringKey && expr.GetColumnInfo().GetIsClusteringKey() {
return expr.GetValues(), true
}
return nil, false
}
// ParsePartitionKeysFromUnaryRangeExpr parses UnaryRangeExpr is prunble.
func ParsePartitionKeysFromUnaryRangeExpr(expr *planpb.UnaryRangeExpr, keyType KeyType) (candidate []*planpb.GenericValue, prunable bool) {
if expr.GetOp() == planpb.OpType_Equal {
if expr.GetColumnInfo().GetIsPartitionKey() && keyType == PartitionKey ||
expr.GetColumnInfo().GetIsClusteringKey() && keyType == ClusteringKey {
return []*planpb.GenericValue{expr.Value}, true
}
}
return nil, false
}
// ParseKeysFromExpr parses keys from the given expression based on the key type.
// If the expression can limit the search scope to specified partitions, return the corresponding key values and a flag indicating whether pruning is possible.
// otherwise, return nil and false indicating that pruning is not possible base on this expression.
func ParseKeysFromExpr(expr *planpb.Expr, keyType KeyType) (candidates []*planpb.GenericValue, prunable bool) {
switch expr := expr.GetExpr().(type) {
case *planpb.Expr_BinaryExpr:
candidates, prunable = ParsePartitionKeysFromBinaryExpr(expr.BinaryExpr, keyType)
case *planpb.Expr_UnaryExpr:
candidates, prunable = ParsePartitionKeysFromUnaryExpr(expr.UnaryExpr, keyType)
case *planpb.Expr_TermExpr:
candidates, prunable = ParsePartitionKeysFromTermExpr(expr.TermExpr, keyType)
case *planpb.Expr_UnaryRangeExpr:
candidates, prunable = ParsePartitionKeysFromUnaryRangeExpr(expr.UnaryRangeExpr, keyType)
}
return candidates, prunable
}
func IntersectKeys(l []*planpb.GenericValue, r []*planpb.GenericValue) []*planpb.GenericValue {
if len(l) == 0 || len(r) == 0 {
return nil
}
// all elements shall be in same type
switch l[0].Val.(type) {
case *planpb.GenericValue_Int64Val:
lSet := typeutil.NewSet(lo.Map(l, func(e *planpb.GenericValue, _ int) int64 { return e.GetInt64Val() })...)
rSet := typeutil.NewSet(lo.Map(r, func(e *planpb.GenericValue, _ int) int64 { return e.GetInt64Val() })...)
return lo.Map(lSet.Intersection(rSet).Collect(), func(e int64, _ int) *planpb.GenericValue {
return &planpb.GenericValue{
Val: &planpb.GenericValue_Int64Val{
Int64Val: e,
},
}
})
case *planpb.GenericValue_StringVal:
lSet := typeutil.NewSet(lo.Map(l, func(e *planpb.GenericValue, _ int) string { return e.GetStringVal() })...)
rSet := typeutil.NewSet(lo.Map(r, func(e *planpb.GenericValue, _ int) string { return e.GetStringVal() })...)
return lo.Map(lSet.Intersection(rSet).Collect(), func(e string, _ int) *planpb.GenericValue {
return &planpb.GenericValue{
Val: &planpb.GenericValue_StringVal{
StringVal: e,
},
}
})
}
return nil
}
func ParseKeys(expr *planpb.Expr, kType KeyType) []*planpb.GenericValue {
res, prunable := ParseKeysFromExpr(expr, kType)
if !prunable {
res = nil
}
// TODO return empty result if prunable and candidates lens is 0
return res
}
type PlanRange struct {
lower *planpb.GenericValue
upper *planpb.GenericValue
includeLower bool
includeUpper bool
}
func (planRange *PlanRange) ToIntRange() *IntRange {
iRange := &IntRange{}
if planRange.lower == nil {
iRange.lower = math.MinInt64
iRange.includeLower = false
} else {
iRange.lower = planRange.lower.GetInt64Val()
iRange.includeLower = planRange.includeLower
}
if planRange.upper == nil {
iRange.upper = math.MaxInt64
iRange.includeUpper = false
} else {
iRange.upper = planRange.upper.GetInt64Val()
iRange.includeUpper = planRange.includeUpper
}
return iRange
}
func (planRange *PlanRange) ToStrRange() *StrRange {
sRange := &StrRange{}
if planRange.lower == nil {
sRange.lower = ""
sRange.includeLower = false
} else {
sRange.lower = planRange.lower.GetStringVal()
sRange.includeLower = planRange.includeLower
}
if planRange.upper == nil {
sRange.upper = ""
sRange.includeUpper = false
} else {
sRange.upper = planRange.upper.GetStringVal()
sRange.includeUpper = planRange.includeUpper
}
return sRange
}
type IntRange struct {
lower int64
upper int64
includeLower bool
includeUpper bool
}
func NewIntRange(l int64, r int64, includeL bool, includeR bool) *IntRange {
return &IntRange{
lower: l,
upper: r,
includeLower: includeL,
includeUpper: includeR,
}
}
func IntRangeOverlap(range1 *IntRange, range2 *IntRange) bool {
var leftBound int64
if range1.lower < range2.lower {
leftBound = range2.lower
} else {
leftBound = range1.lower
}
var rightBound int64
if range1.upper < range2.upper {
rightBound = range1.upper
} else {
rightBound = range2.upper
}
return leftBound <= rightBound
}
type StrRange struct {
lower string
upper string
includeLower bool
includeUpper bool
}
func NewStrRange(l string, r string, includeL bool, includeR bool) *StrRange {
return &StrRange{
lower: l,
upper: r,
includeLower: includeL,
includeUpper: includeR,
}
}
func StrRangeOverlap(range1 *StrRange, range2 *StrRange) bool {
var leftBound string
if range1.lower < range2.lower {
leftBound = range2.lower
} else {
leftBound = range1.lower
}
var rightBound string
if range1.upper < range2.upper || range2.upper == "" {
rightBound = range1.upper
} else {
rightBound = range2.upper
}
return leftBound <= rightBound
}
func GetCommonDataType(a *PlanRange, b *PlanRange) schemapb.DataType {
var bound *planpb.GenericValue
if a.lower != nil {
bound = a.lower
} else if a.upper != nil {
bound = a.upper
}
if bound == nil {
if b.lower != nil {
bound = b.lower
} else if b.upper != nil {
bound = b.upper
}
}
if bound == nil {
return schemapb.DataType_None
}
switch bound.Val.(type) {
case *planpb.GenericValue_Int64Val:
{
return schemapb.DataType_Int64
}
case *planpb.GenericValue_StringVal:
{
return schemapb.DataType_VarChar
}
}
return schemapb.DataType_None
}
func ValidatePartitionKeyIsolation(expr *planpb.Expr) error {
foundPartitionKey, err := validatePartitionKeyIsolationFromExpr(expr)
if err != nil {
return err
}
if !foundPartitionKey {
return errors.New("partition key not found in expr or the expr is invalid when validating partition key isolation")
}
return nil
}
func validatePartitionKeyIsolationFromExpr(expr *planpb.Expr) (bool, error) {
switch expr := expr.GetExpr().(type) {
case *planpb.Expr_BinaryExpr:
return validatePartitionKeyIsolationFromBinaryExpr(expr.BinaryExpr)
case *planpb.Expr_UnaryExpr:
return validatePartitionKeyIsolationFromUnaryExpr(expr.UnaryExpr)
case *planpb.Expr_TermExpr:
return validatePartitionKeyIsolationFromTermExpr(expr.TermExpr)
case *planpb.Expr_UnaryRangeExpr:
return validatePartitionKeyIsolationFromRangeExpr(expr.UnaryRangeExpr)
case *planpb.Expr_BinaryRangeExpr:
return validatePartitionKeyIsolationFromBinaryRangeExpr(expr.BinaryRangeExpr)
}
return false, nil
}
func validatePartitionKeyIsolationFromBinaryExpr(expr *planpb.BinaryExpr) (bool, error) {
// return directly if has errors on either or both sides
leftRes, leftErr := validatePartitionKeyIsolationFromExpr(expr.Left)
if leftErr != nil {
return leftRes, leftErr
}
rightRes, rightErr := validatePartitionKeyIsolationFromExpr(expr.Right)
if rightErr != nil {
return rightRes, rightErr
}
// the following deals with no error on either side
if expr.Op == planpb.BinaryExpr_LogicalAnd {
// if one of them is partition key
// e.g. partition_key_field == 1 && other_field > 10
if leftRes || rightRes {
return true, nil
}
// if none of them is partition key
return false, nil
}
if expr.Op == planpb.BinaryExpr_LogicalOr {
// if either side has partition key, but OR them
// e.g. partition_key_field == 1 || other_field > 10
if leftRes || rightRes {
return true, errors.New("partition key isolation does not support OR")
}
// if none of them has partition key
return false, nil
}
return false, nil
}
func validatePartitionKeyIsolationFromUnaryExpr(expr *planpb.UnaryExpr) (bool, error) {
res, err := validatePartitionKeyIsolationFromExpr(expr.GetChild())
if err != nil {
return res, err
}
if expr.Op == planpb.UnaryExpr_Not {
if res {
return true, errors.New("partition key isolation does not support NOT")
}
return false, nil
}
return res, err
}
func validatePartitionKeyIsolationFromTermExpr(expr *planpb.TermExpr) (bool, error) {
if expr.GetColumnInfo().GetIsPartitionKey() {
// e.g. partition_key_field in [1, 2, 3]
return true, errors.New("partition key isolation does not support IN")
}
return false, nil
}
func validatePartitionKeyIsolationFromRangeExpr(expr *planpb.UnaryRangeExpr) (bool, error) {
if expr.GetColumnInfo().GetIsPartitionKey() {
if expr.GetOp() == planpb.OpType_Equal {
// e.g. partition_key_field == 1
return true, nil
}
return true, errors.Newf("partition key isolation does not support %s", expr.GetOp().String())
}
return false, nil
}
func validatePartitionKeyIsolationFromBinaryRangeExpr(expr *planpb.BinaryRangeExpr) (bool, error) {
if expr.GetColumnInfo().GetIsPartitionKey() {
return true, errors.New("partition key isolation does not support BinaryRange")
}
return false, nil
}