milvus/internal/util/indexparamcheck/ngram_index_checker_test.go
2025-09-03 16:45:54 +08:00

124 lines
3.6 KiB
Go

package indexparamcheck
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/common"
)
func Test_NgramIndexChecker_CheckTrain(t *testing.T) {
checker := newNgramIndexChecker()
t.Run("valid ngram on varchar", func(t *testing.T) {
params := map[string]string{
MinGramKey: "2",
MaxGramKey: "3",
}
err := checker.CheckTrain(schemapb.DataType_VarChar, schemapb.DataType_None, params)
assert.NoError(t, err)
})
t.Run("valid ngram on json with varchar cast", func(t *testing.T) {
testCases := []string{"VARCHAR", "varchar", "VarChar", " VARCHAR "}
for _, castType := range testCases {
params := map[string]string{
MinGramKey: "2",
MaxGramKey: "3",
common.JSONCastTypeKey: castType,
}
err := checker.CheckTrain(schemapb.DataType_JSON, schemapb.DataType_None, params)
assert.NoError(t, err)
}
})
t.Run("invalid ngram on json without cast type", func(t *testing.T) {
params := map[string]string{
MinGramKey: "2",
MaxGramKey: "3",
}
err := checker.CheckTrain(schemapb.DataType_JSON, schemapb.DataType_None, params)
assert.Error(t, err)
assert.Contains(t, err.Error(), "JSON field with ngram index must specify json_cast_type")
})
t.Run("invalid ngram on json with non-varchar cast type", func(t *testing.T) {
invalidTypes := []string{"INT64", "FLOAT", "BOOL", "ARRAY"}
for _, castType := range invalidTypes {
params := map[string]string{
MinGramKey: "2",
MaxGramKey: "3",
common.JSONCastTypeKey: castType,
}
err := checker.CheckTrain(schemapb.DataType_JSON, schemapb.DataType_None, params)
assert.Error(t, err)
assert.Contains(t, err.Error(), "JSON field with ngram index only supports VARCHAR cast type")
assert.Contains(t, err.Error(), castType)
}
})
t.Run("invalid ngram params", func(t *testing.T) {
testCases := []struct {
name string
params map[string]string
errMsg string
}{
{
name: "missing min_gram",
params: map[string]string{MaxGramKey: "3"},
errMsg: "Ngram index must specify both min_gram and max_gram",
},
{
name: "non-integer min_gram",
params: map[string]string{MinGramKey: "abc", MaxGramKey: "3"},
errMsg: "min_gram for Ngram index must be an integer",
},
{
name: "min_gram > max_gram",
params: map[string]string{MinGramKey: "5", MaxGramKey: "3"},
errMsg: "invalid min_gram or max_gram value",
},
}
for _, tc := range testCases {
err := checker.CheckTrain(schemapb.DataType_VarChar, schemapb.DataType_None, tc.params)
assert.Error(t, err)
assert.Contains(t, err.Error(), tc.errMsg)
}
})
}
func Test_NgramIndexChecker_CheckValidDataType(t *testing.T) {
checker := newNgramIndexChecker()
t.Run("valid data types", func(t *testing.T) {
validTypes := []schemapb.DataType{
schemapb.DataType_VarChar,
schemapb.DataType_JSON,
}
for _, dtype := range validTypes {
field := &schemapb.FieldSchema{DataType: dtype}
err := checker.CheckValidDataType(IndexNGRAM, field)
assert.NoError(t, err)
}
})
t.Run("invalid data types", func(t *testing.T) {
invalidTypes := []schemapb.DataType{
schemapb.DataType_Int64,
schemapb.DataType_Float,
schemapb.DataType_Bool,
schemapb.DataType_Array,
schemapb.DataType_FloatVector,
}
for _, dtype := range invalidTypes {
field := &schemapb.FieldSchema{DataType: dtype}
err := checker.CheckValidDataType(IndexNGRAM, field)
assert.Error(t, err)
assert.Contains(t, err.Error(), "ngram index can only be created on VARCHAR or JSON field")
}
})
}