mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
238 lines
12 KiB
Go
238 lines
12 KiB
Go
package testcases
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
"github.com/milvus-io/milvus/client/v2/index"
|
|
"github.com/milvus-io/milvus/client/v2/milvusclient"
|
|
"github.com/milvus-io/milvus/tests/go_client/common"
|
|
hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper"
|
|
)
|
|
|
|
// TestPhraseMatchDefault tests basic phrase match functionality with slop=0
|
|
func TestPhraseMatchDefault(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
analyzerParams := map[string]any{"tokenizer": "standard"}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
query := common.GenText(common.DefaultTextLang)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query})
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Test exact phrase match (slop=0)
|
|
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
// Results may vary as we're using auto-generated data, but it should >= 1, since query text has been inserted
|
|
require.GreaterOrEqual(t, queryRes.ResultCount, 1)
|
|
}
|
|
|
|
// TestPhraseMatchWithSlop tests phrase match with different slop values
|
|
func TestPhraseMatchWithSlop(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
analyzerParams := map[string]any{"tokenizer": "standard"}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
|
|
// Insert test data with varying distances between words
|
|
query := common.GenText(common.DefaultTextLang)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query})
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Test cases with different slop values
|
|
testCases := []struct {
|
|
name string
|
|
slop int
|
|
}{
|
|
{"ExactMatch", 0}, // Matches only exact phrase
|
|
{"SmallSlop", 1}, // Matches phrases with 1 word between
|
|
{"MediumSlop", 2}, // Matches phrases with 2 words between
|
|
{"LargeSlop", 3}, // Matches phrases with up to 3 words between
|
|
{"VeryLargeSlop", math.MaxUint32}, // Matches phrases with up to max u32 words between
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", %d)", common.DefaultTextFieldName, query, tc.slop)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.GreaterOrEqual(t, queryRes.ResultCount, 1)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestPhraseMatchWithDiffLang tests phrase match with different languages
|
|
func TestPhraseMatchWithDiffLang(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// Test cases for different languages and analyzers
|
|
testCases := []struct {
|
|
name string
|
|
language string
|
|
analyzer string
|
|
slop int
|
|
}{
|
|
{
|
|
name: "English_Standard",
|
|
language: common.English,
|
|
analyzer: "standard",
|
|
slop: 3,
|
|
},
|
|
{
|
|
name: "Chinese_Jieba",
|
|
language: common.Chinese,
|
|
analyzer: "jieba",
|
|
slop: 3,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
analyzerParams := map[string]any{"tokenizer": tc.analyzer}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
query := common.GenText(tc.language)
|
|
insertOption := hp.TNewDataOption().TWithTextLang(tc.language).TWithTextData([]string{query})
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", %d)", common.DefaultTextFieldName, query, tc.slop)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.GreaterOrEqual(t, queryRes.ResultCount, 1)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestPhraseMatchWithEmptyData tests phrase match with empty data
|
|
func TestPhraseMatchWithEmptyData(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
analyzerParams := map[string]any{"tokenizer": "standard"}
|
|
fieldsOption := hp.TNewFieldsOption().TWithAnalyzerParams(analyzerParams)
|
|
function := hp.TNewBM25Function(common.DefaultTextFieldName, common.DefaultTextSparseVecFieldName)
|
|
schemaOption := hp.TNewSchemaOption().TWithFunction(function)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, schemaOption)
|
|
|
|
insertOption := hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextEmptyPercent(100)
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
indexparams := hp.TNewIndexParams(schema).TWithFieldIndex(map[string]index.Index{common.DefaultTextSparseVecFieldName: index.NewSparseInvertedIndex(entity.BM25, 0.1)})
|
|
prepare.CreateIndex(ctx, t, mc, indexparams)
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Test phrase match with empty data
|
|
query := common.GenText(common.DefaultTextLang)
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, 0, queryRes.ResultCount)
|
|
}
|
|
|
|
// TestPhraseMatchDefault tests basic phrase match functionality with slop=0
|
|
func TestPhraseMatchNullable(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
fieldsOption := hp.TNewFieldOptions().WithFieldOption(common.DefaultTextFieldName, hp.TNewFieldsOption().TWithNullable(true).
|
|
TWithEnableAnalyzer(true).TWithEnableMatch(true))
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, hp.TNewSchemaOption())
|
|
|
|
// insert data with all null
|
|
validData := make([]bool, common.DefaultNb)
|
|
for i := 0; i < common.DefaultNb; i++ {
|
|
validData[i] = i%2 == 0
|
|
}
|
|
query := common.GenText(common.DefaultTextLang)
|
|
insertOption := hp.TNewColumnOptions().WithColumnOption(common.DefaultTextFieldName, hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithTextData([]string{query}).TWithValidData(validData))
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// Test exact phrase match (slop=0)
|
|
expr := fmt.Sprintf("phrase_match(%s, \"%s\", 0)", common.DefaultTextFieldName, query)
|
|
queryRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
// Results may vary as we're using auto-generated data, but it should >= 1, since query text has been inserted
|
|
require.GreaterOrEqual(t, queryRes.ResultCount, 1)
|
|
|
|
exprNull := fmt.Sprintf("%s is null", common.DefaultTextFieldName)
|
|
queryRes, err = mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(exprNull))
|
|
common.CheckErr(t, err, true)
|
|
require.GreaterOrEqual(t, common.DefaultNb/2, 1)
|
|
}
|
|
|
|
// TestPhraseMatchDefault tests basic phrase match functionality with slop=0
|
|
func TestPhraseMatchDefaultValue(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert -> flush -> index -> load
|
|
defaultText := "milvus vector database"
|
|
fieldsOption := hp.TNewFieldOptions().WithFieldOption(common.DefaultTextFieldName, hp.TNewFieldsOption().TWithDefaultValue(defaultText).
|
|
TWithEnableAnalyzer(true).TWithEnableMatch(true))
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.FullTextSearch), fieldsOption, hp.TNewSchemaOption())
|
|
|
|
// insert data with all null
|
|
validData := make([]bool, common.DefaultNb)
|
|
for i := 0; i < common.DefaultNb; i++ {
|
|
validData[i] = i%2 == 0
|
|
}
|
|
insertOption := hp.TNewColumnOptions().WithColumnOption(common.DefaultTextFieldName, hp.TNewDataOption().TWithTextLang(common.DefaultTextLang).TWithValidData(validData))
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), insertOption)
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
exprText := fmt.Sprintf("TEXT_MATCH(%s, 'database vector')", common.DefaultTextFieldName)
|
|
countRes, err := mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(exprText).WithOutputFields(common.QueryCountFieldName))
|
|
common.CheckErr(t, err, true)
|
|
count, _ := countRes.Fields[0].GetAsInt64(0)
|
|
require.EqualValues(t, common.DefaultNb/2, count)
|
|
|
|
exprPhase := fmt.Sprintf("phrase_match(%s, 'database vector', 1)", common.DefaultTextFieldName)
|
|
countRes, err = mc.Query(ctx, milvusclient.NewQueryOption(schema.CollectionName).WithFilter(exprPhase).WithOutputFields(common.QueryCountFieldName))
|
|
common.CheckErr(t, err, true)
|
|
count, _ = countRes.Fields[0].GetAsInt64(0)
|
|
require.EqualValues(t, 0, count)
|
|
}
|