mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-05 02:12:48 +08:00
168 lines
7.7 KiB
Go
168 lines
7.7 KiB
Go
package testcases
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
"github.com/milvus-io/milvus/client/v2/index"
|
|
"github.com/milvus-io/milvus/client/v2/milvusclient"
|
|
"github.com/milvus-io/milvus/tests/go_client/common"
|
|
hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper"
|
|
)
|
|
|
|
// TestPhraseMatchDefault tests basic phrase match functionality with slop=0
|
|
func TestPhraseMatchDefault(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
analyzerParams := map[string]any{"tokenizer": "standard"}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
query := common.GenText(common.DefaultTextLang)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query})
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Test exact phrase match (slop=0)
|
|
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
// Results may vary as we're using auto-generated data, but it should >= 1, since query text has been inserted
|
|
require.GreaterOrEqual(t, queryRes.ResultCount, 1)
|
|
}
|
|
|
|
// TestPhraseMatchWithSlop tests phrase match with different slop values
|
|
func TestPhraseMatchWithSlop(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
analyzerParams := map[string]any{"tokenizer": "standard"}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
|
|
// Insert test data with varying distances between words
|
|
query := common.GenText(common.DefaultTextLang)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query})
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Test cases with different slop values
|
|
testCases := []struct {
|
|
name string
|
|
slop int
|
|
}{
|
|
{"ExactMatch", 0}, // Matches only exact phrase
|
|
{"SmallSlop", 1}, // Matches phrases with 1 word between
|
|
{"MediumSlop", 2}, // Matches phrases with 2 words between
|
|
{"LargeSlop", 3}, // Matches phrases with up to 3 words between
|
|
{"VeryLargeSlop", math.MaxUint32}, // Matches phrases with up to max u32 words between
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", %d)", common.DefaultTextFieldName, query, tc.slop)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.GreaterOrEqual(t, queryRes.ResultCount, 1)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestPhraseMatchWithDiffLang tests phrase match with different languages
|
|
func TestPhraseMatchWithDiffLang(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// Test cases for different languages and analyzers
|
|
testCases := []struct {
|
|
name string
|
|
language string
|
|
analyzer string
|
|
slop int
|
|
}{
|
|
{
|
|
name: "English_Standard",
|
|
language: common.English,
|
|
analyzer: "standard",
|
|
slop: 3,
|
|
},
|
|
{
|
|
name: "Chinese_Jieba",
|
|
language: common.Chinese,
|
|
analyzer: "jieba",
|
|
slop: 3,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
query := common.GenText(tc.language)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithTextData([]string{query})
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", %d)", common.DefaultTextFieldName, query, tc.slop)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.GreaterOrEqual(t, queryRes.ResultCount, 1)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestPhraseMatchWithEmptyData tests phrase match with empty data
|
|
func TestPhraseMatchWithEmptyData(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
analyzerParams := map[string]any{"tokenizer": "standard"}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
|
|
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextEmptyPercent(100)
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Test phrase match with empty data
|
|
query := common.GenText(common.DefaultTextLang)
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, 0, queryRes.ResultCount)
|
|
}
|