milvus/internal/util/exprutil/expr_checker.go

package exprutil

import (
	"math"

	"github.com/cockroachdb/errors"
	"github.com/samber/lo"

	"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
	"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
	"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)

type KeyType int64

const (
	PartitionKey  KeyType = iota
	ClusteringKey KeyType = PartitionKey + 1
)

func ParseExprFromPlan(plan *planpb.PlanNode) (*planpb.Expr, error) {
	node := plan.GetNode()

	if node == nil {
		return nil, errors.New("can't get expr from empty plan node")
	}

	var expr *planpb.Expr
	switch node := node.(type) {
	case *planpb.PlanNode_VectorAnns:
		expr = node.VectorAnns.GetPredicates()
	case *planpb.PlanNode_Query:
		expr = node.Query.GetPredicates()
	default:
		return nil, errors.New("unsupported plan node type")
	}

	return expr, nil
}

// ParsePartitionKeysFromBinaryExpr parses BinaryExpr is prunble
// if true, returns candidate key values base on the Logical op type.
func ParsePartitionKeysFromBinaryExpr(expr *planpb.BinaryExpr, keyType KeyType) ([]*planpb.GenericValue, bool) {
	lCandidates, lPrunable := ParseKeysFromExpr(expr.Left, keyType)
	rCandidate, rPrunable := ParseKeysFromExpr(expr.Right, keyType)

	if expr.Op == planpb.BinaryExpr_LogicalAnd {
		switch {
		case lPrunable && rPrunable:
			// case: partition_key in [7, 8] && partition_key in [8, 9]
			// return [7, 8] intersect [8, 9] = [8]
			return IntersectKeys(lCandidates, rCandidate), true
		case lPrunable && !rPrunable:
			return lCandidates, true
		case !lPrunable && rPrunable:
			return rCandidate, true
		case !lPrunable && !rPrunable:
			return nil, false
		}
	}

	if expr.Op == planpb.BinaryExpr_LogicalOr {
		if lPrunable && rPrunable {
			// case: partition_key in [7, 8] || partition_key in [8, 9]
			// return [7, 8] union [8, 9] = [7, 8, 9]
			return append(lCandidates, rCandidate...), true
		}
		return nil, false
	}

	return nil, false
}

// ParsePartitionKeysFromUnaryExpr parses UnaryExpr is prunble.
// currently, only "Not" is supported, which means unary expression is always not prunable.
func ParsePartitionKeysFromUnaryExpr(expr *planpb.UnaryExpr, keyType KeyType) ([]*planpb.GenericValue, bool) {
	return nil, false
}

// ParsePartitionKeysFromTermExpr parses TermExpr is prunble.
// it checks if the term expression is a partition key or clustering key.
func ParsePartitionKeysFromTermExpr(expr *planpb.TermExpr, keyType KeyType) ([]*planpb.GenericValue, bool) {
	if keyType == PartitionKey && expr.GetColumnInfo().GetIsPartitionKey() {
		return expr.GetValues(), true
	} else if keyType == ClusteringKey && expr.GetColumnInfo().GetIsClusteringKey() {
		return expr.GetValues(), true
	}
	return nil, false
}

// ParsePartitionKeysFromUnaryRangeExpr parses UnaryRangeExpr is prunble.
func ParsePartitionKeysFromUnaryRangeExpr(expr *planpb.UnaryRangeExpr, keyType KeyType) (candidate []*planpb.GenericValue, prunable bool) {
	if expr.GetOp() == planpb.OpType_Equal {
		if expr.GetColumnInfo().GetIsPartitionKey() && keyType == PartitionKey ||
			expr.GetColumnInfo().GetIsClusteringKey() && keyType == ClusteringKey {
			return []*planpb.GenericValue{expr.Value}, true
		}
	}
	return nil, false
}

// ParseKeysFromExpr parses keys from the given expression based on the key type.
// If the expression can limit the search scope to specified partitions, return the corresponding key values and a flag indicating whether pruning is possible.
// otherwise, return nil and false indicating that pruning is not possible base on this expression.
func ParseKeysFromExpr(expr *planpb.Expr, keyType KeyType) (candidates []*planpb.GenericValue, prunable bool) {
	switch expr := expr.GetExpr().(type) {
	case *planpb.Expr_BinaryExpr:
		candidates, prunable = ParsePartitionKeysFromBinaryExpr(expr.BinaryExpr, keyType)
	case *planpb.Expr_UnaryExpr:
		candidates, prunable = ParsePartitionKeysFromUnaryExpr(expr.UnaryExpr, keyType)
	case *planpb.Expr_TermExpr:
		candidates, prunable = ParsePartitionKeysFromTermExpr(expr.TermExpr, keyType)
	case *planpb.Expr_UnaryRangeExpr:
		candidates, prunable = ParsePartitionKeysFromUnaryRangeExpr(expr.UnaryRangeExpr, keyType)
	}

	return candidates, prunable
}

func IntersectKeys(l []*planpb.GenericValue, r []*planpb.GenericValue) []*planpb.GenericValue {
	if len(l) == 0 || len(r) == 0 {
		return nil
	}
	// all elements shall be in same type
	switch l[0].Val.(type) {
	case *planpb.GenericValue_Int64Val:
		lSet := typeutil.NewSet(lo.Map(l, func(e *planpb.GenericValue, _ int) int64 { return e.GetInt64Val() })...)
		rSet := typeutil.NewSet(lo.Map(r, func(e *planpb.GenericValue, _ int) int64 { return e.GetInt64Val() })...)
		return lo.Map(lSet.Intersection(rSet).Collect(), func(e int64, _ int) *planpb.GenericValue {
			return &planpb.GenericValue{
				Val: &planpb.GenericValue_Int64Val{
					Int64Val: e,
				},
			}
		})
	case *planpb.GenericValue_StringVal:
		lSet := typeutil.NewSet(lo.Map(l, func(e *planpb.GenericValue, _ int) string { return e.GetStringVal() })...)
		rSet := typeutil.NewSet(lo.Map(r, func(e *planpb.GenericValue, _ int) string { return e.GetStringVal() })...)
		return lo.Map(lSet.Intersection(rSet).Collect(), func(e string, _ int) *planpb.GenericValue {
			return &planpb.GenericValue{
				Val: &planpb.GenericValue_StringVal{
					StringVal: e,
				},
			}
		})
	}
	return nil
}

func ParseKeys(expr *planpb.Expr, kType KeyType) []*planpb.GenericValue {
	res, prunable := ParseKeysFromExpr(expr, kType)
	if !prunable {
		res = nil
	}
	// TODO return empty result if prunable and candidates lens is 0

	return res
}

type PlanRange struct {
	lower        *planpb.GenericValue
	upper        *planpb.GenericValue
	includeLower bool
	includeUpper bool
}

func (planRange *PlanRange) ToIntRange() *IntRange {
	iRange := &IntRange{}
	if planRange.lower == nil {
		iRange.lower = math.MinInt64
		iRange.includeLower = false
	} else {
		iRange.lower = planRange.lower.GetInt64Val()
		iRange.includeLower = planRange.includeLower
	}

	if planRange.upper == nil {
		iRange.upper = math.MaxInt64
		iRange.includeUpper = false
	} else {
		iRange.upper = planRange.upper.GetInt64Val()
		iRange.includeUpper = planRange.includeUpper
	}
	return iRange
}

func (planRange *PlanRange) ToStrRange() *StrRange {
	sRange := &StrRange{}
	if planRange.lower == nil {
		sRange.lower = ""
		sRange.includeLower = false
	} else {
		sRange.lower = planRange.lower.GetStringVal()
		sRange.includeLower = planRange.includeLower
	}

	if planRange.upper == nil {
		sRange.upper = ""
		sRange.includeUpper = false
	} else {
		sRange.upper = planRange.upper.GetStringVal()
		sRange.includeUpper = planRange.includeUpper
	}
	return sRange
}

type IntRange struct {
	lower        int64
	upper        int64
	includeLower bool
	includeUpper bool
}

func NewIntRange(l int64, r int64, includeL bool, includeR bool) *IntRange {
	return &IntRange{
		lower:        l,
		upper:        r,
		includeLower: includeL,
		includeUpper: includeR,
	}
}

func IntRangeOverlap(range1 *IntRange, range2 *IntRange) bool {
	var leftBound int64
	if range1.lower < range2.lower {
		leftBound = range2.lower
	} else {
		leftBound = range1.lower
	}
	var rightBound int64
	if range1.upper < range2.upper {
		rightBound = range1.upper
	} else {
		rightBound = range2.upper
	}
	return leftBound <= rightBound
}

type StrRange struct {
	lower        string
	upper        string
	includeLower bool
	includeUpper bool
}

func NewStrRange(l string, r string, includeL bool, includeR bool) *StrRange {
	return &StrRange{
		lower:        l,
		upper:        r,
		includeLower: includeL,
		includeUpper: includeR,
	}
}

func StrRangeOverlap(range1 *StrRange, range2 *StrRange) bool {
	var leftBound string
	if range1.lower < range2.lower {
		leftBound = range2.lower
	} else {
		leftBound = range1.lower
	}
	var rightBound string
	if range1.upper < range2.upper || range2.upper == "" {
		rightBound = range1.upper
	} else {
		rightBound = range2.upper
	}
	return leftBound <= rightBound
}

func GetCommonDataType(a *PlanRange, b *PlanRange) schemapb.DataType {
	var bound *planpb.GenericValue
	if a.lower != nil {
		bound = a.lower
	} else if a.upper != nil {
		bound = a.upper
	}
	if bound == nil {
		if b.lower != nil {
			bound = b.lower
		} else if b.upper != nil {
			bound = b.upper
		}
	}
	if bound == nil {
		return schemapb.DataType_None
	}
	switch bound.Val.(type) {
	case *planpb.GenericValue_Int64Val:
		{
			return schemapb.DataType_Int64
		}
	case *planpb.GenericValue_StringVal:
		{
			return schemapb.DataType_VarChar
		}
	}
	return schemapb.DataType_None
}

func ValidatePartitionKeyIsolation(expr *planpb.Expr) error {
	foundPartitionKey, err := validatePartitionKeyIsolationFromExpr(expr)
	if err != nil {
		return err
	}
	if !foundPartitionKey {
		return errors.New("partition key not found in expr or the expr is invalid when validating partition key isolation")
	}
	return nil
}

func validatePartitionKeyIsolationFromExpr(expr *planpb.Expr) (bool, error) {
	switch expr := expr.GetExpr().(type) {
	case *planpb.Expr_BinaryExpr:
		return validatePartitionKeyIsolationFromBinaryExpr(expr.BinaryExpr)
	case *planpb.Expr_UnaryExpr:
		return validatePartitionKeyIsolationFromUnaryExpr(expr.UnaryExpr)
	case *planpb.Expr_TermExpr:
		return validatePartitionKeyIsolationFromTermExpr(expr.TermExpr)
	case *planpb.Expr_UnaryRangeExpr:
		return validatePartitionKeyIsolationFromRangeExpr(expr.UnaryRangeExpr)
	case *planpb.Expr_BinaryRangeExpr:
		return validatePartitionKeyIsolationFromBinaryRangeExpr(expr.BinaryRangeExpr)
	}
	return false, nil
}

func validatePartitionKeyIsolationFromBinaryExpr(expr *planpb.BinaryExpr) (bool, error) {
	// return directly if has errors on either or both sides
	leftRes, leftErr := validatePartitionKeyIsolationFromExpr(expr.Left)
	if leftErr != nil {
		return leftRes, leftErr
	}
	rightRes, rightErr := validatePartitionKeyIsolationFromExpr(expr.Right)
	if rightErr != nil {
		return rightRes, rightErr
	}

	// the following deals with no error on either side
	if expr.Op == planpb.BinaryExpr_LogicalAnd {
		// if one of them is partition key
		// e.g. partition_key_field == 1 && other_field > 10
		if leftRes || rightRes {
			return true, nil
		}
		// if none of them is partition key
		return false, nil
	}

	if expr.Op == planpb.BinaryExpr_LogicalOr {
		// if either side has partition key, but OR them
		// e.g. partition_key_field == 1 || other_field > 10
		if leftRes || rightRes {
			return true, errors.New("partition key isolation does not support OR")
		}
		// if none of them has partition key
		return false, nil
	}
	return false, nil
}

func validatePartitionKeyIsolationFromUnaryExpr(expr *planpb.UnaryExpr) (bool, error) {
	res, err := validatePartitionKeyIsolationFromExpr(expr.GetChild())
	if err != nil {
		return res, err
	}
	if expr.Op == planpb.UnaryExpr_Not {
		if res {
			return true, errors.New("partition key isolation does not support NOT")
		}
		return false, nil
	}
	return res, err
}

func validatePartitionKeyIsolationFromTermExpr(expr *planpb.TermExpr) (bool, error) {
	if expr.GetColumnInfo().GetIsPartitionKey() {
		// e.g. partition_key_field in [1, 2, 3]
		return true, errors.New("partition key isolation does not support IN")
	}
	return false, nil
}

func validatePartitionKeyIsolationFromRangeExpr(expr *planpb.UnaryRangeExpr) (bool, error) {
	if expr.GetColumnInfo().GetIsPartitionKey() {
		if expr.GetOp() == planpb.OpType_Equal {
			// e.g. partition_key_field == 1
			return true, nil
		}
		return true, errors.Newf("partition key isolation does not support %s", expr.GetOp().String())
	}
	return false, nil
}

func validatePartitionKeyIsolationFromBinaryRangeExpr(expr *planpb.BinaryRangeExpr) (bool, error) {
	if expr.GetColumnInfo().GetIsPartitionKey() {
		return true, errors.New("partition key isolation does not support BinaryRange")
	}
	return false, nil
}