milvus/pkg/proto/plan.proto
Tianx 2c0c5ef41e
feat: timestamptz expression & index & timezone (#44080)
issue: https://github.com/milvus-io/milvus/issues/27467

>My plan is as follows.
>- [x] M1 Create collection with timestamptz field
>- [x] M2 Insert timestamptz field data
>- [x] M3 Retrieve timestamptz field data
>- [x] M4 Implement handoff
>- [x] M5 Implement compare operator
>- [x] M6 Implement extract operator
 >- [x] M8 Support database/collection level default timezone
>- [x] M7 Support STL-SORT index for datatype timestamptz

---

The third PR of issue: https://github.com/milvus-io/milvus/issues/27467,
which completes M5, M6, M7, M8 described above.

## M8 Default Timezone

We will be able to use alter_collection() and alter_database() in a
future Python SDK release to modify the default timezone at the
collection or database level.

For insert requests, the timezone will be resolved using the following
order of precedence: String Literal-> Collection Default -> Database
Default.
For retrieval requests, the timezone will be resolved in this order:
Query Parameters -> Collection Default -> Database Default.
In both cases, the final fallback timezone is UTC.


## M5: Comparison Operators

We can now use the following expression format to filter on the
timestamptz field:

- `timestamptz_field [+/- INTERVAL 'interval_string'] {comparison_op}
ISO 'iso_string' `

- The interval_string follows the ISO 8601 duration format, for example:
P1Y2M3DT1H2M3S.

- The iso_string follows the ISO 8601 timestamp format, for example:
2025-01-03T00:00:00+08:00.

- Example expressions: "tsz + INTERVAL 'P0D' != ISO
'2025-01-03T00:00:00+08:00'" or "tsz != ISO
'2025-01-03T00:00:00+08:00'".

## M6: Extract

We will be able to extract sepecific time filed by kwargs in a future
Python SDK release.
The key is `time_fields`, and value should be one or more of "year,
month, day, hour, minute, second, microsecond", seperated by comma or
space. Then the result of each record would be an array of int64.



## M7: Indexing Support

Expressions without interval arithmetic can be accelerated using an
STL-SORT index. However, expressions that include interval arithmetic
cannot be indexed. This is because the result of an interval calculation
depends on the specific timestamp value. For example, adding one month
to a date in February results in a different number of added days than
adding one month to a date in March.

--- 

After this PR, the input / output type of timestamptz would be iso
string. Timestampz would be stored as timestamptz data, which is int64_t
finally.

> for more information, see https://en.wikipedia.org/wiki/ISO_8601

---------

Signed-off-by: xtx <xtianx@smail.nju.edu.cn>
2025-09-23 10:24:12 +08:00

302 lines
6.2 KiB
Protocol Buffer

syntax = "proto3";
package milvus.proto.plan;
option go_package = "github.com/milvus-io/milvus/pkg/v2/proto/planpb";
import "schema.proto";
enum OpType {
Invalid = 0;
GreaterThan = 1;
GreaterEqual = 2;
LessThan = 3;
LessEqual = 4;
Equal = 5;
NotEqual = 6;
PrefixMatch = 7; // startsWith
PostfixMatch = 8; // endsWith
Match = 9; // like
Range = 10; // for case 1 < a < b
In = 11; // TODO:: used for term expr
NotIn = 12;
TextMatch = 13; // text match
PhraseMatch = 14; // phrase match
InnerMatch = 15; // substring (e.g., "%value%")
};
enum ArithOpType {
Unknown = 0;
Add = 1;
Sub = 2;
Mul = 3;
Div = 4;
Mod = 5;
ArrayLength = 6;
};
enum VectorType {
BinaryVector = 0;
FloatVector = 1;
Float16Vector = 2;
BFloat16Vector = 3;
SparseFloatVector = 4;
Int8Vector = 5;
EmbListFloatVector = 6;
EmbListFloat16Vector = 7;
EmbListBFloat16Vector = 8;
EmbListInt8Vector = 9;
};
message GenericValue {
oneof val {
bool bool_val = 1;
int64 int64_val = 2;
double float_val = 3;
string string_val = 4;
Array array_val = 5;
};
}
message Array {
repeated GenericValue array = 1;
bool same_type = 2;
schema.DataType element_type = 3;
}
message SearchIteratorV2Info {
string token = 1;
uint32 batch_size = 2;
optional float last_bound = 3;
}
message QueryInfo {
int64 topk = 1;
string metric_type = 3;
string search_params = 4;
int64 round_decimal = 5;
int64 group_by_field_id = 6;
bool materialized_view_involved = 7;
int64 group_size = 8;
bool strict_group_size = 9;
double bm25_avgdl = 10;
int64 query_field_id =11;
string hints = 12;
optional SearchIteratorV2Info search_iterator_v2_info = 13;
string json_path = 14;
schema.DataType json_type = 15;
bool strict_cast = 16;
}
message ColumnInfo {
int64 field_id = 1;
schema.DataType data_type = 2;
bool is_primary_key = 3;
bool is_autoID = 4;
repeated string nested_path = 5;
bool is_partition_key = 6;
schema.DataType element_type = 7;
bool is_clustering_key = 8;
bool nullable = 9;
}
message ColumnExpr {
ColumnInfo info = 1;
}
message ExistsExpr {
ColumnInfo info = 1;
}
message ValueExpr {
GenericValue value = 1;
string template_variable_name = 2;
}
message UnaryRangeExpr {
ColumnInfo column_info = 1;
OpType op = 2;
GenericValue value = 3;
string template_variable_name = 4;
repeated GenericValue extra_values = 5;
}
message BinaryRangeExpr {
ColumnInfo column_info = 1;
bool lower_inclusive = 2;
bool upper_inclusive = 3;
GenericValue lower_value = 4;
GenericValue upper_value = 5;
string lower_template_variable_name = 6;
string upper_template_variable_name = 7;
}
message CallExpr {
string function_name = 1;
repeated Expr function_parameters = 2;
}
message CompareExpr {
ColumnInfo left_column_info = 1;
ColumnInfo right_column_info = 2;
OpType op = 3;
}
message TermExpr {
ColumnInfo column_info = 1;
repeated GenericValue values = 2;
bool is_in_field = 3;
string template_variable_name = 4;
}
message JSONContainsExpr {
ColumnInfo column_info = 1;
repeated GenericValue elements = 2;
// 0: invalid
// 1: json_contains | array_contains
// 2: json_contains_all | array_contains_all
// 3: json_contains_any | array_contains_any
enum JSONOp {
Invalid = 0;
Contains = 1;
ContainsAll = 2;
ContainsAny = 3;
}
JSONOp op = 3;
bool elements_same_type = 4;
string template_variable_name = 5;
}
message NullExpr {
ColumnInfo column_info = 1;
enum NullOp {
Invalid = 0;
IsNull = 1;
IsNotNull = 2;
}
NullOp op = 2;
}
message UnaryExpr {
enum UnaryOp {
Invalid = 0;
Not = 1;
};
UnaryOp op = 1;
Expr child = 2;
}
message BinaryExpr {
enum BinaryOp {
Invalid = 0;
LogicalAnd = 1;
LogicalOr = 2;
}
BinaryOp op = 1;
Expr left = 2;
Expr right = 3;
}
message BinaryArithOp {
ColumnInfo column_info = 1;
ArithOpType arith_op = 2;
GenericValue right_operand = 3;
}
message BinaryArithExpr {
Expr left = 1;
Expr right = 2;
ArithOpType op = 3;
}
message BinaryArithOpEvalRangeExpr {
ColumnInfo column_info = 1;
ArithOpType arith_op = 2;
GenericValue right_operand = 3;
OpType op = 4;
GenericValue value = 5;
string operand_template_variable_name = 6;
string value_template_variable_name = 7;
}
message RandomSampleExpr {
float sample_factor = 1;
Expr predicate = 2;
}
message AlwaysTrueExpr {}
message Interval {
int64 years = 1;
int64 months = 2;
int64 days = 3;
int64 hours = 4;
int64 minutes = 5;
int64 seconds = 6;
}
// New expression type for the operation: (timestamp_col + interval) OP iso_string
message TimestamptzArithCompareExpr {
ColumnInfo timestamptz_column = 1;
ArithOpType arith_op = 2; // ADD or SUB
Interval interval = 3;
OpType compare_op = 4;
GenericValue compare_value = 5;
}
message Expr {
oneof expr {
TermExpr term_expr = 1;
UnaryExpr unary_expr = 2;
BinaryExpr binary_expr = 3;
CompareExpr compare_expr = 4;
UnaryRangeExpr unary_range_expr = 5;
BinaryRangeExpr binary_range_expr = 6;
BinaryArithOpEvalRangeExpr binary_arith_op_eval_range_expr = 7;
BinaryArithExpr binary_arith_expr = 8;
ValueExpr value_expr = 9;
ColumnExpr column_expr = 10;
ExistsExpr exists_expr = 11;
AlwaysTrueExpr always_true_expr = 12;
JSONContainsExpr json_contains_expr = 13;
CallExpr call_expr = 14;
NullExpr null_expr = 15;
RandomSampleExpr random_sample_expr = 16;
TimestamptzArithCompareExpr timestamptz_arith_compare_expr = 17;
};
bool is_template = 20;
}
message VectorANNS {
VectorType vector_type = 1;
int64 field_id = 2;
Expr predicates = 3;
QueryInfo query_info = 4;
string placeholder_tag = 5; // always be "$0"
}
message QueryPlanNode {
Expr predicates = 1;
bool is_count = 2;
int64 limit = 3;
};
message ScoreFunction {
Expr filter =1;
float weight = 2;
}
message PlanOption {
bool expr_use_json_stats = 1;
};
message PlanNode {
oneof node {
VectorANNS vector_anns = 1;
Expr predicates = 2; // deprecated, use query instead.
QueryPlanNode query = 4;
}
repeated int64 output_field_ids = 3;
repeated string dynamic_fields = 5;
repeated ScoreFunction scorers = 6;
PlanOption plan_options = 7;
}