fix: Fix bug where prefix matching fails when wildcards are in prefix (#40020)

issue: #40019

---------

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
This commit is contained in:
cai.zhang 2025-02-28 10:29:59 +08:00 committed by GitHub
parent 762a644d76
commit dc46b08bdf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 73 additions and 39 deletions

View File

@ -1,6 +1,8 @@
package planparserv2 package planparserv2
import ( import (
"strings"
"github.com/milvus-io/milvus/pkg/v2/proto/planpb" "github.com/milvus-io/milvus/pkg/v2/proto/planpb"
) )
@ -12,20 +14,27 @@ var wildcards = map[byte]struct{}{
var escapeCharacter byte = '\\' var escapeCharacter byte = '\\'
// hasWildcards returns true if pattern contains any wildcard. // hasWildcards returns true if pattern contains any wildcard.
func hasWildcards(pattern string) bool { func hasWildcards(pattern string) (string, bool) {
l := len(pattern) var result strings.Builder
i := l - 1 hasWildcard := false
for ; i >= 0; i-- {
_, ok := wildcards[pattern[i]] for i := 0; i < len(pattern); i++ {
if ok { if pattern[i] == escapeCharacter && i+1 < len(pattern) {
if i > 0 && pattern[i-1] == escapeCharacter { next := pattern[i+1]
i-- if _, ok := wildcards[next]; ok {
result.WriteByte(next)
i++
continue continue
} }
return true
} }
if _, ok := wildcards[pattern[i]]; ok {
hasWildcard = true
}
result.WriteByte(pattern[i])
} }
return false
return result.String(), hasWildcard
} }
// findLastNotOfWildcards find the last location not of last wildcard. // findLastNotOfWildcards find the last location not of last wildcard.
@ -55,14 +64,14 @@ func translatePatternMatch(pattern string) (op planpb.OpType, operand string, er
return planpb.OpType_PrefixMatch, "", nil return planpb.OpType_PrefixMatch, "", nil
} }
exist := hasWildcards(pattern[:loc+1]) newPattern, exist := hasWildcards(pattern[:loc+1])
if loc >= l-1 && !exist { if loc >= l-1 && !exist {
// equal match. // equal match.
return planpb.OpType_Equal, pattern, nil return planpb.OpType_Equal, newPattern, nil
} }
if !exist { if !exist {
// prefix match. // prefix match.
return planpb.OpType_PrefixMatch, pattern[:loc+1], nil return planpb.OpType_PrefixMatch, newPattern, nil
} }
return planpb.OpType_Match, pattern, nil return planpb.OpType_Match, pattern, nil

View File

@ -11,38 +11,44 @@ func Test_hasWildcards(t *testing.T) {
pattern string pattern string
} }
tests := []struct { tests := []struct {
name string name string
args args args args
want bool want bool
target string
}{ }{
{ {
args: args{ args: args{
pattern: "no-wildcards", pattern: "no-wildcards",
}, },
want: false, want: false,
target: "no-wildcards",
}, },
{ {
args: args{ args: args{
pattern: "has\\%", pattern: "has\\%",
}, },
want: false, want: false,
target: "has%",
}, },
{ {
args: args{ args: args{
pattern: "%", pattern: "%",
}, },
want: true, want: true,
target: "%",
}, },
{ {
args: args{ args: args{
pattern: "has%", pattern: "has%",
}, },
want: true, want: true,
target: "has%",
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
if got := hasWildcards(tt.args.pattern); got != tt.want { patten, got := hasWildcards(tt.args.pattern)
if got != tt.want || patten != tt.target {
t.Errorf("hasWildcards(%s) = %v, want %v", tt.args.pattern, got, tt.want) t.Errorf("hasWildcards(%s) = %v, want %v", tt.args.pattern, got, tt.want)
} }
}) })

View File

@ -204,25 +204,44 @@ func TestExpr_Like(t *testing.T) {
helper, err := typeutil.CreateSchemaHelper(schema) helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err) assert.NoError(t, err)
exprStrs := []string{ expr := `A like "8\\_0%"`
`VarCharField like "prefix%"`, plan, err := CreateSearchPlan(helper, expr, "FloatVectorField", &planpb.QueryInfo{
`VarCharField like "equal"`, Topk: 0,
`JSONField["A"] like "name*"`, MetricType: "",
`$meta["A"] like "name*"`, SearchParams: "",
} RoundDecimal: 0,
for _, exprStr := range exprStrs { }, nil)
assertValidExpr(t, helper, exprStr) assert.NoError(t, err, expr)
} assert.NotNil(t, plan)
fmt.Println(plan)
assert.Equal(t, planpb.OpType_PrefixMatch, plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetOp())
assert.Equal(t, "8_0", plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetValue().GetStringVal())
// TODO: enable these after regex-match is supported. expr = `A like "8_\\_0%"`
//unsupported := []string{ plan, err = CreateSearchPlan(helper, expr, "FloatVectorField", &planpb.QueryInfo{
// `VarCharField like "not_%_supported"`, Topk: 0,
// `JSONField["A"] like "not_%_supported"`, MetricType: "",
// `$meta["A"] like "not_%_supported"`, SearchParams: "",
//} RoundDecimal: 0,
//for _, exprStr := range unsupported { }, nil)
// assertInvalidExpr(t, helper, exprStr) assert.NoError(t, err, expr)
//} assert.NotNil(t, plan)
fmt.Println(plan)
assert.Equal(t, planpb.OpType_Match, plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetOp())
assert.Equal(t, `8_\_0%`, plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetValue().GetStringVal())
expr = `A like "8\\%-0%"`
plan, err = CreateSearchPlan(helper, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
}, nil)
assert.NoError(t, err, expr)
assert.NotNil(t, plan)
fmt.Println(plan)
assert.Equal(t, planpb.OpType_PrefixMatch, plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetOp())
assert.Equal(t, `8%-0`, plan.GetVectorAnns().GetPredicates().GetUnaryRangeExpr().GetValue().GetStringVal())
} }
func TestExpr_TextMatch(t *testing.T) { func TestExpr_TextMatch(t *testing.T) {