From 1399d955fc95f1a7b993a551ece7116872f206c1 Mon Sep 17 00:00:00 2001 From: "zhenshan.cao" Date: Mon, 29 Dec 2025 11:05:20 +0800 Subject: [PATCH] enhance: optimize timestamptz comparison without interval (#46619) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit issue: https://github.com/milvus-io/milvus/issues/46618 • **Core Invariant**: TIMESTAMPTZ values are internally stored as int64 Unix microseconds. Simple comparisons without intervals can safely use native int64 range evaluation (`ExecRangeVisitorImpl`) and `UnaryRangeExpr` to leverage index-based scans, since the underlying data type and comparison semantics remain unchanged. • **Logic Optimization**: The parser now branches on interval presence. When `ctx.GetOp1() == nil` (no interval), it returns a lightweight `UnaryRangeExpr` for fast indexed range scans. When an interval exists, it falls back to the heavier `TimestamptzArithCompareExpr` for arithmetic evaluation. This eliminates redundant ISO interval parsing and type conversions for the common case of interval-free comparisons. • **No Regression**: The `UnaryRangeExpr` path preserves exact comparison semantics by treating TIMESTAMPTZ as int64 directly, matching the storage format. For reverse comparisons (e.g., `'2025-01-01' > column`), operator reversal correctly normalizes to column-centric form (`column < '2025-01-01'`), maintaining logical equivalence. Interval-based comparisons continue through the unchanged `TimestamptzArithCompareExpr` path. • **Coverage**: Both forward (column left of operator) and reverse (column right of operator) comparison syntaxes are handled with explicit branching logic, ensuring the optimization applies uniformly across comparison patterns. Signed-off-by: zhenshan.cao --- .../core/src/exec/expression/UnaryExpr.cpp | 4 + .../parser/planparserv2/parser_visitor.go | 147 +++++++++++------- 2 files changed, 95 insertions(+), 56 deletions(-) diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index a11ab01800..9910e880a7 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -188,6 +188,10 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { result = ExecRangeVisitorImpl(context); break; } + case DataType::TIMESTAMPTZ: { + result = ExecRangeVisitorImpl(context); + break; + } case DataType::FLOAT: { result = ExecRangeVisitorImpl(context); break; diff --git a/internal/parser/planparserv2/parser_visitor.go b/internal/parser/planparserv2/parser_visitor.go index 8ac764732e..f741bf7f5a 100644 --- a/internal/parser/planparserv2/parser_visitor.go +++ b/internal/parser/planparserv2/parser_visitor.go @@ -2099,6 +2099,15 @@ func (v *ParserVisitor) VisitSTDWithin(ctx *parser.STDWithinContext) interface{} } } +// VisitTimestamptzCompareForward handles comparison expressions where the column +// is on the left side of the operator. +// Syntax example: column > '2025-01-01' [ + INTERVAL 'P1D' ] +// +// Optimization Logic: +// 1. Quick Path: If no INTERVAL is provided, it generates a UnaryRangeExpr +// to enable index-based scan performance in Milvus. +// 2. Slow Path: If an INTERVAL exists, it generates a TimestamptzArithCompareExpr +// for specialized arithmetic evaluation. func (v *ParserVisitor) VisitTimestamptzCompareForward(ctx *parser.TimestamptzCompareForwardContext) interface{} { colExpr, err := v.translateIdentifier(ctx.Identifier().GetText()) identifier := ctx.Identifier().Accept(v) @@ -2109,53 +2118,70 @@ func (v *ParserVisitor) VisitTimestamptzCompareForward(ctx *parser.TimestamptzCo return fmt.Errorf("field '%s' is not a timestamptz datatype", identifier) } - arithOp := planpb.ArithOpType_Unknown - interval := &planpb.Interval{} - if ctx.GetOp1() != nil { - arithOp = arithExprMap[ctx.GetOp1().GetTokenType()] - rawIntervalStr := ctx.GetInterval_string().GetText() - unquotedIntervalStr, err := convertEscapeSingle(rawIntervalStr) - if err != nil { - return fmt.Errorf("can not convert interval string: %s", rawIntervalStr) - } - interval, err = parseISODuration(unquotedIntervalStr) - if err != nil { - return err - } - } + compareOp := cmpOpMap[ctx.GetOp2().GetTokenType()] rawCompareStr := ctx.GetCompare_string().GetText() unquotedCompareStr, err := convertEscapeSingle(rawCompareStr) if err != nil { return fmt.Errorf("can not convert compare string: %s", rawCompareStr) } - - compareOp := cmpOpMap[ctx.GetOp2().GetTokenType()] - timestamptzInt64, err := timestamptz.ValidateAndReturnUnixMicroTz(unquotedCompareStr, v.args.Timezone) if err != nil { return err } - newExpr := &planpb.Expr{ - Expr: &planpb.Expr_TimestamptzArithCompareExpr{ - TimestamptzArithCompareExpr: &planpb.TimestamptzArithCompareExpr{ - TimestamptzColumn: toColumnInfo(colExpr), - ArithOp: arithOp, - Interval: interval, - CompareOp: compareOp, - CompareValue: &planpb.GenericValue{ - Val: &planpb.GenericValue_Int64Val{Int64Val: timestamptzInt64}, + if ctx.GetOp1() == nil { + return &ExprWithType{ + expr: &planpb.Expr{ + Expr: &planpb.Expr_UnaryRangeExpr{ + UnaryRangeExpr: &planpb.UnaryRangeExpr{ + ColumnInfo: toColumnInfo(colExpr), + Op: compareOp, + Value: &planpb.GenericValue{Val: &planpb.GenericValue_Int64Val{Int64Val: timestamptzInt64}}, + }, }, }, - }, + dataType: schemapb.DataType_Bool, + } + } + + arithOp := arithExprMap[ctx.GetOp1().GetTokenType()] + rawIntervalStr := ctx.GetInterval_string().GetText() + unquotedIntervalStr, err := convertEscapeSingle(rawIntervalStr) + if err != nil { + return fmt.Errorf("can not convert interval string: %s", rawIntervalStr) + } + interval, err := parseISODuration(unquotedIntervalStr) + if err != nil { + return err } return &ExprWithType{ - expr: newExpr, + expr: &planpb.Expr{ + Expr: &planpb.Expr_TimestamptzArithCompareExpr{ + TimestamptzArithCompareExpr: &planpb.TimestamptzArithCompareExpr{ + TimestamptzColumn: toColumnInfo(colExpr), + ArithOp: arithOp, + Interval: interval, + CompareOp: compareOp, + CompareValue: &planpb.GenericValue{Val: &planpb.GenericValue_Int64Val{Int64Val: timestamptzInt64}}, + }, + }, + }, dataType: schemapb.DataType_Bool, } } +// VisitTimestamptzCompareReverse handles comparison expressions where the column +// is on the right side of the operator. +// Syntax example: '2025-01-01' [ + INTERVAL 'P1D' ] > column +// +// Optimization and Normalization Logic: +// 1. Operator Reversal: The comparison operator is flipped (e.g., '>' to '<') +// to normalize the expression into a column-centric format. +// 2. Quick Path: For simple comparisons without INTERVAL, it generates a +// UnaryRangeExpr with the reversed operator to leverage indexing. +// 3. Slow Path: For complex expressions involving INTERVAL, it produces a +// TimestamptzArithCompareExpr with the reversed operator. func (v *ParserVisitor) VisitTimestamptzCompareReverse(ctx *parser.TimestamptzCompareReverseContext) interface{} { colExpr, err := v.translateIdentifier(ctx.Identifier().GetText()) identifier := ctx.Identifier().GetText() @@ -2166,21 +2192,6 @@ func (v *ParserVisitor) VisitTimestamptzCompareReverse(ctx *parser.TimestamptzCo return fmt.Errorf("field '%s' is not a timestamptz datatype", identifier) } - arithOp := planpb.ArithOpType_Unknown - interval := &planpb.Interval{} - if ctx.GetOp1() != nil { - arithOp = arithExprMap[ctx.GetOp1().GetTokenType()] - rawIntervalStr := ctx.GetInterval_string().GetText() - unquotedIntervalStr, err := convertEscapeSingle(rawIntervalStr) - if err != nil { - return fmt.Errorf("can not convert interval string: %s", rawIntervalStr) - } - interval, err = parseISODuration(unquotedIntervalStr) - if err != nil { - return err - } - } - rawCompareStr := ctx.GetCompare_string().GetText() unquotedCompareStr, err := convertEscapeSingle(rawCompareStr) if err != nil { @@ -2188,9 +2199,7 @@ func (v *ParserVisitor) VisitTimestamptzCompareReverse(ctx *parser.TimestamptzCo } originalCompareOp := cmpOpMap[ctx.GetOp2().GetTokenType()] - compareOp := reverseCompareOp(originalCompareOp) - if compareOp == planpb.OpType_Invalid && originalCompareOp != planpb.OpType_Invalid { return fmt.Errorf("unsupported comparison operator for reverse Timestamptz: %s", ctx.GetOp2().GetText()) } @@ -2200,22 +2209,48 @@ func (v *ParserVisitor) VisitTimestamptzCompareReverse(ctx *parser.TimestamptzCo return err } - newExpr := &planpb.Expr{ - Expr: &planpb.Expr_TimestamptzArithCompareExpr{ - TimestamptzArithCompareExpr: &planpb.TimestamptzArithCompareExpr{ - TimestamptzColumn: toColumnInfo(colExpr), - ArithOp: arithOp, - Interval: interval, - CompareOp: compareOp, - CompareValue: &planpb.GenericValue{ - Val: &planpb.GenericValue_Int64Val{Int64Val: timestamptzInt64}, + // Quick Path: No arithmetic operation. Use UnaryRangeExpr for index optimization. + if ctx.GetOp1() == nil { + return &ExprWithType{ + expr: &planpb.Expr{ + Expr: &planpb.Expr_UnaryRangeExpr{ + UnaryRangeExpr: &planpb.UnaryRangeExpr{ + ColumnInfo: toColumnInfo(colExpr), + Op: compareOp, + Value: &planpb.GenericValue{Val: &planpb.GenericValue_Int64Val{Int64Val: timestamptzInt64}}, + }, }, }, - }, + dataType: schemapb.DataType_Bool, + } + } + + // Slow Path: Handle arithmetic with TimestamptzArithCompareExpr. + arithOp := arithExprMap[ctx.GetOp1().GetTokenType()] + rawIntervalStr := ctx.GetInterval_string().GetText() + unquotedIntervalStr, err := convertEscapeSingle(rawIntervalStr) + if err != nil { + return fmt.Errorf("can not convert interval string: %s", rawIntervalStr) + } + interval, err := parseISODuration(unquotedIntervalStr) + if err != nil { + return err } return &ExprWithType{ - expr: newExpr, + expr: &planpb.Expr{ + Expr: &planpb.Expr_TimestamptzArithCompareExpr{ + TimestamptzArithCompareExpr: &planpb.TimestamptzArithCompareExpr{ + TimestamptzColumn: toColumnInfo(colExpr), + ArithOp: arithOp, + Interval: interval, + CompareOp: compareOp, + CompareValue: &planpb.GenericValue{ + Val: &planpb.GenericValue_Int64Val{Int64Val: timestamptzInt64}, + }, + }, + }, + }, dataType: schemapb.DataType_Bool, } }