mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
issue: https://github.com/milvus-io/milvus/issues/27467 >My plan is as follows. >- [x] M1 Create collection with timestamptz field >- [x] M2 Insert timestamptz field data >- [x] M3 Retrieve timestamptz field data >- [x] M4 Implement handoff >- [x] M5 Implement compare operator >- [x] M6 Implement extract operator >- [x] M8 Support database/collection level default timezone >- [x] M7 Support STL-SORT index for datatype timestamptz --- The third PR of issue: https://github.com/milvus-io/milvus/issues/27467, which completes M5, M6, M7, M8 described above. ## M8 Default Timezone We will be able to use alter_collection() and alter_database() in a future Python SDK release to modify the default timezone at the collection or database level. For insert requests, the timezone will be resolved using the following order of precedence: String Literal-> Collection Default -> Database Default. For retrieval requests, the timezone will be resolved in this order: Query Parameters -> Collection Default -> Database Default. In both cases, the final fallback timezone is UTC. ## M5: Comparison Operators We can now use the following expression format to filter on the timestamptz field: - `timestamptz_field [+/- INTERVAL 'interval_string'] {comparison_op} ISO 'iso_string' ` - The interval_string follows the ISO 8601 duration format, for example: P1Y2M3DT1H2M3S. - The iso_string follows the ISO 8601 timestamp format, for example: 2025-01-03T00:00:00+08:00. - Example expressions: "tsz + INTERVAL 'P0D' != ISO '2025-01-03T00:00:00+08:00'" or "tsz != ISO '2025-01-03T00:00:00+08:00'". ## M6: Extract We will be able to extract sepecific time filed by kwargs in a future Python SDK release. The key is `time_fields`, and value should be one or more of "year, month, day, hour, minute, second, microsecond", seperated by comma or space. Then the result of each record would be an array of int64. ## M7: Indexing Support Expressions without interval arithmetic can be accelerated using an STL-SORT index. However, expressions that include interval arithmetic cannot be indexed. This is because the result of an interval calculation depends on the specific timestamp value. For example, adding one month to a date in February results in a different number of added days than adding one month to a date in March. --- After this PR, the input / output type of timestamptz would be iso string. Timestampz would be stored as timestamptz data, which is int64_t finally. > for more information, see https://en.wikipedia.org/wiki/ISO_8601 --------- Signed-off-by: xtx <xtianx@smail.nju.edu.cn>
387 lines
12 KiB
Go
387 lines
12 KiB
Go
package planparserv2
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/antlr4-go/antlr/v4"
|
|
"github.com/hashicorp/golang-lru/v2/expirable"
|
|
"github.com/samber/lo"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
planparserv2 "github.com/milvus-io/milvus/internal/parser/planparserv2/generated"
|
|
"github.com/milvus-io/milvus/internal/util/function/rerank"
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/planpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
var (
|
|
exprCache = expirable.NewLRU[string, any](1024, nil, time.Minute*10)
|
|
trueLiteral = &ExprWithType{
|
|
dataType: schemapb.DataType_Bool,
|
|
expr: alwaysTrueExpr(),
|
|
}
|
|
)
|
|
|
|
type ExprParams struct {
|
|
UseJSONStats bool
|
|
}
|
|
|
|
func ParseExprParams(vals map[string]*schemapb.TemplateValue) *ExprParams {
|
|
ep := &ExprParams{
|
|
UseJSONStats: paramtable.Get().CommonCfg.UsingJSONStatsForQuery.GetAsBool(),
|
|
}
|
|
|
|
if vals != nil {
|
|
if v, ok := vals[common.ExprUseJSONStatsKey]; ok && v != nil {
|
|
ep.UseJSONStats = v.GetBoolVal()
|
|
}
|
|
}
|
|
return ep
|
|
}
|
|
|
|
func handleInternal(exprStr string) (ast planparserv2.IExprContext, err error) {
|
|
val, ok := exprCache.Get(exprStr)
|
|
if ok {
|
|
switch v := val.(type) {
|
|
case planparserv2.IExprContext:
|
|
return v, nil
|
|
case error:
|
|
return nil, v
|
|
default:
|
|
return nil, fmt.Errorf("unknown cache error: %v", v)
|
|
}
|
|
}
|
|
|
|
// Note that the errors will be cached, too.
|
|
defer func() {
|
|
if err != nil {
|
|
exprCache.Add(exprStr, err)
|
|
}
|
|
}()
|
|
exprNormal := convertHanToASCII(exprStr)
|
|
listener := &errorListenerImpl{}
|
|
|
|
inputStream := antlr.NewInputStream(exprNormal)
|
|
lexer := getLexer(inputStream, listener)
|
|
if err = listener.Error(); err != nil {
|
|
return
|
|
}
|
|
|
|
parser := getParser(lexer, listener)
|
|
if err = listener.Error(); err != nil {
|
|
return
|
|
}
|
|
|
|
ast = parser.Expr()
|
|
if err = listener.Error(); err != nil {
|
|
return
|
|
}
|
|
|
|
if parser.GetCurrentToken().GetTokenType() != antlr.TokenEOF {
|
|
log.Info("invalid expression", zap.String("expr", exprStr))
|
|
err = fmt.Errorf("invalid expression: %s", exprStr)
|
|
return
|
|
}
|
|
|
|
// lexer & parser won't be used by this thread, can be put into pool.
|
|
putLexer(lexer)
|
|
putParser(parser)
|
|
|
|
exprCache.Add(exprStr, ast)
|
|
return
|
|
}
|
|
|
|
func handleExprInternal(schema *typeutil.SchemaHelper, exprStr string, visitorArgs *ParserVisitorArgs) (result interface{}) {
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
result = fmt.Errorf("unsupported expression: %s", exprStr)
|
|
}
|
|
}()
|
|
|
|
if isEmptyExpression(exprStr) {
|
|
return trueLiteral
|
|
}
|
|
ast, err := handleInternal(exprStr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
visitor := NewParserVisitor(schema, visitorArgs)
|
|
return ast.Accept(visitor)
|
|
}
|
|
|
|
func handleExpr(schema *typeutil.SchemaHelper, exprStr string) (result interface{}) {
|
|
return handleExprInternal(schema, exprStr, &ParserVisitorArgs{})
|
|
}
|
|
|
|
func parseExprInner(schema *typeutil.SchemaHelper, exprStr string, exprTemplateValues map[string]*schemapb.TemplateValue, visitorArgs *ParserVisitorArgs) (*planpb.Expr, error) {
|
|
ret := handleExprInternal(schema, exprStr, visitorArgs)
|
|
|
|
if err := getError(ret); err != nil {
|
|
return nil, fmt.Errorf("cannot parse expression: %s, error: %s", exprStr, err)
|
|
}
|
|
|
|
predicate := getExpr(ret)
|
|
if predicate == nil {
|
|
return nil, fmt.Errorf("cannot parse expression: %s", exprStr)
|
|
}
|
|
if !canBeExecuted(predicate) {
|
|
return nil, fmt.Errorf("predicate is not a boolean expression: %s, data type: %s", exprStr, predicate.dataType)
|
|
}
|
|
|
|
valueMap, err := UnmarshalExpressionValues(exprTemplateValues)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := FillExpressionValue(predicate.expr, valueMap); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return predicate.expr, nil
|
|
}
|
|
|
|
func ParseExpr(schema *typeutil.SchemaHelper, exprStr string, exprTemplateValues map[string]*schemapb.TemplateValue) (*planpb.Expr, error) {
|
|
return parseExprInner(schema, exprStr, exprTemplateValues, &ParserVisitorArgs{})
|
|
}
|
|
|
|
func parseIdentifierInner(schema *typeutil.SchemaHelper, identifier string, checkFunc func(*planpb.Expr) error, visitorArgs *ParserVisitorArgs) error {
|
|
ret := handleExprInternal(schema, identifier, visitorArgs)
|
|
|
|
if err := getError(ret); err != nil {
|
|
return fmt.Errorf("cannot parse identifier: %s, error: %s", identifier, err)
|
|
}
|
|
|
|
predicate := getExpr(ret)
|
|
if predicate == nil {
|
|
return fmt.Errorf("cannot parse identifier: %s", identifier)
|
|
}
|
|
if predicate.expr.GetColumnExpr() == nil {
|
|
return fmt.Errorf("cannot parse identifier: %s", identifier)
|
|
}
|
|
|
|
return checkFunc(predicate.expr)
|
|
}
|
|
|
|
func ParseIdentifier(schema *typeutil.SchemaHelper, identifier string, checkFunc func(*planpb.Expr) error) error {
|
|
visitorArgs := &ParserVisitorArgs{}
|
|
return parseIdentifierInner(schema, identifier, checkFunc, visitorArgs)
|
|
}
|
|
|
|
func CreateRetrievePlanArgs(schema *typeutil.SchemaHelper, exprStr string, exprTemplateValues map[string]*schemapb.TemplateValue, visitorArgs *ParserVisitorArgs) (*planpb.PlanNode, error) {
|
|
expr, err := parseExprInner(schema, exprStr, exprTemplateValues, visitorArgs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
exprParams := ParseExprParams(exprTemplateValues)
|
|
|
|
planNode := &planpb.PlanNode{
|
|
Node: &planpb.PlanNode_Query{
|
|
Query: &planpb.QueryPlanNode{
|
|
Predicates: expr,
|
|
},
|
|
},
|
|
PlanOptions: &planpb.PlanOption{
|
|
ExprUseJsonStats: exprParams.UseJSONStats,
|
|
},
|
|
}
|
|
return planNode, nil
|
|
}
|
|
|
|
func CreateRetrievePlan(schema *typeutil.SchemaHelper, exprStr string, exprTemplateValues map[string]*schemapb.TemplateValue) (*planpb.PlanNode, error) {
|
|
visitorArgs := &ParserVisitorArgs{}
|
|
return CreateRetrievePlanArgs(schema, exprStr, exprTemplateValues, visitorArgs)
|
|
}
|
|
|
|
func CreateSearchPlanArgs(schema *typeutil.SchemaHelper, exprStr string, vectorFieldName string, queryInfo *planpb.QueryInfo, exprTemplateValues map[string]*schemapb.TemplateValue, functionScorer *schemapb.FunctionScore, visitorArgs *ParserVisitorArgs) (*planpb.PlanNode, error) {
|
|
parse := func() (*planpb.Expr, error) {
|
|
if len(exprStr) <= 0 {
|
|
return nil, nil
|
|
}
|
|
return parseExprInner(schema, exprStr, exprTemplateValues, visitorArgs)
|
|
}
|
|
|
|
expr, err := parse()
|
|
if err != nil {
|
|
log.Info("CreateSearchPlan failed", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
vectorField, err := schema.GetFieldFromName(vectorFieldName)
|
|
if err != nil {
|
|
log.Info("CreateSearchPlan failed", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
// plan ok with schema, check ann field
|
|
fieldID := vectorField.FieldID
|
|
dataType := vectorField.DataType
|
|
elementType := vectorField.ElementType
|
|
|
|
var vectorType planpb.VectorType
|
|
if !typeutil.IsVectorType(dataType) {
|
|
return nil, fmt.Errorf("field (%s) to search is not of vector data type", vectorFieldName)
|
|
}
|
|
switch dataType {
|
|
case schemapb.DataType_BinaryVector:
|
|
vectorType = planpb.VectorType_BinaryVector
|
|
case schemapb.DataType_FloatVector:
|
|
vectorType = planpb.VectorType_FloatVector
|
|
case schemapb.DataType_Float16Vector:
|
|
vectorType = planpb.VectorType_Float16Vector
|
|
case schemapb.DataType_BFloat16Vector:
|
|
vectorType = planpb.VectorType_BFloat16Vector
|
|
case schemapb.DataType_SparseFloatVector:
|
|
vectorType = planpb.VectorType_SparseFloatVector
|
|
case schemapb.DataType_Int8Vector:
|
|
vectorType = planpb.VectorType_Int8Vector
|
|
case schemapb.DataType_ArrayOfVector:
|
|
switch elementType {
|
|
case schemapb.DataType_FloatVector:
|
|
vectorType = planpb.VectorType_EmbListFloatVector
|
|
default:
|
|
log.Error("Invalid elementType", zap.Any("elementType", elementType))
|
|
return nil, err
|
|
}
|
|
|
|
default:
|
|
log.Error("Invalid dataType", zap.Any("dataType", dataType))
|
|
return nil, err
|
|
}
|
|
|
|
scorers, err := CreateSearchScorers(schema, functionScorer, exprTemplateValues)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(scorers) != 0 && (queryInfo.GroupByFieldId != -1 || queryInfo.SearchIteratorV2Info != nil) {
|
|
return nil, fmt.Errorf("don't support use segment scorer with group_by or search_iterator")
|
|
}
|
|
|
|
exprParams := ParseExprParams(exprTemplateValues)
|
|
|
|
planNode := &planpb.PlanNode{
|
|
Node: &planpb.PlanNode_VectorAnns{
|
|
VectorAnns: &planpb.VectorANNS{
|
|
VectorType: vectorType,
|
|
Predicates: expr,
|
|
QueryInfo: queryInfo,
|
|
PlaceholderTag: "$0",
|
|
FieldId: fieldID,
|
|
},
|
|
},
|
|
Scorers: scorers,
|
|
PlanOptions: &planpb.PlanOption{
|
|
ExprUseJsonStats: exprParams.UseJSONStats,
|
|
},
|
|
}
|
|
return planNode, nil
|
|
}
|
|
|
|
func CreateSearchScorer(schema *typeutil.SchemaHelper, function *schemapb.FunctionSchema, exprTemplateValues map[string]*schemapb.TemplateValue) (*planpb.ScoreFunction, error) {
|
|
rerankerName := rerank.GetRerankName(function)
|
|
switch rerankerName {
|
|
case rerank.BoostName:
|
|
scorer := &planpb.ScoreFunction{}
|
|
filter, ok := funcutil.TryGetAttrByKeyFromRepeatedKV(rerank.FilterKey, function.GetParams())
|
|
if ok {
|
|
expr, err := ParseExpr(schema, filter, exprTemplateValues)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse expr failed with error: {%v}", err)
|
|
}
|
|
scorer.Filter = expr
|
|
}
|
|
|
|
weightStr, ok := funcutil.TryGetAttrByKeyFromRepeatedKV(rerank.WeightKey, function.GetParams())
|
|
if !ok {
|
|
return nil, fmt.Errorf("must set weight params for weight scorer")
|
|
}
|
|
|
|
weight, err := strconv.ParseFloat(weightStr, 32)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse function scorer weight params failed with error: {%v}", err)
|
|
}
|
|
scorer.Weight = float32(weight)
|
|
return scorer, nil
|
|
default:
|
|
// if not boost scorer, regard as normal function scorer
|
|
// will be checked at ranker
|
|
// return nil here
|
|
return nil, nil
|
|
}
|
|
}
|
|
|
|
func CreateSearchScorers(schema *typeutil.SchemaHelper, functionScore *schemapb.FunctionScore, exprTemplateValues map[string]*schemapb.TemplateValue) ([]*planpb.ScoreFunction, error) {
|
|
scorers := []*planpb.ScoreFunction{}
|
|
for _, function := range functionScore.GetFunctions() {
|
|
// create scorer for search plan
|
|
scorer, err := CreateSearchScorer(schema, function, exprTemplateValues)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if scorer != nil {
|
|
scorers = append(scorers, scorer)
|
|
}
|
|
}
|
|
if len(scorers) == 0 {
|
|
return nil, nil
|
|
}
|
|
return scorers, nil
|
|
}
|
|
|
|
func CreateSearchPlan(schema *typeutil.SchemaHelper, exprStr string, vectorFieldName string, queryInfo *planpb.QueryInfo, exprTemplateValues map[string]*schemapb.TemplateValue, functionScorer *schemapb.FunctionScore) (*planpb.PlanNode, error) {
|
|
visitorArgs := &ParserVisitorArgs{}
|
|
return CreateSearchPlanArgs(schema, exprStr, vectorFieldName, queryInfo, exprTemplateValues, functionScorer, visitorArgs)
|
|
}
|
|
|
|
func CreateRequeryPlan(pkField *schemapb.FieldSchema, ids *schemapb.IDs) *planpb.PlanNode {
|
|
var values []*planpb.GenericValue
|
|
switch ids.GetIdField().(type) {
|
|
case *schemapb.IDs_IntId:
|
|
values = lo.Map(ids.GetIntId().GetData(), func(id int64, _ int) *planpb.GenericValue {
|
|
return &planpb.GenericValue{
|
|
Val: &planpb.GenericValue_Int64Val{
|
|
Int64Val: id,
|
|
},
|
|
}
|
|
})
|
|
case *schemapb.IDs_StrId:
|
|
values = lo.Map(ids.GetStrId().GetData(), func(id string, _ int) *planpb.GenericValue {
|
|
return &planpb.GenericValue{
|
|
Val: &planpb.GenericValue_StringVal{
|
|
StringVal: id,
|
|
},
|
|
}
|
|
})
|
|
}
|
|
|
|
return &planpb.PlanNode{
|
|
Node: &planpb.PlanNode_Query{
|
|
Query: &planpb.QueryPlanNode{
|
|
Predicates: &planpb.Expr{
|
|
Expr: &planpb.Expr_TermExpr{
|
|
TermExpr: &planpb.TermExpr{
|
|
ColumnInfo: &planpb.ColumnInfo{
|
|
FieldId: pkField.GetFieldID(),
|
|
DataType: pkField.GetDataType(),
|
|
IsPrimaryKey: true,
|
|
IsAutoID: pkField.GetAutoID(),
|
|
IsPartitionKey: pkField.GetIsPartitionKey(),
|
|
},
|
|
Values: values,
|
|
},
|
|
},
|
|
},
|
|
IsCount: false,
|
|
Limit: int64(len(values)),
|
|
},
|
|
},
|
|
}
|
|
}
|