milvus/tests/go_client/common/response_checker.go
ZhuXi cd931a0388
feat:Geospatial Data Type and GIS Function support for milvus (#43661)
issue: #43427
pr: #37417

This pr's main goal is merge #37417 to milvus 2.5 without conflicts.

# Main Goals

1. Create and describe collections with geospatial type
2. Insert geospatial data into the insert binlog
3. Load segments containing geospatial data into memory
4. Enable query and search can display  geospatial data
5. Support using GIS funtions like ST_EQUALS in query

# Solution

1. **Add Type**: Modify the Milvus core by adding a Geospatial type in
both the C++ and Go code layers, defining the Geospatial data structure
and the corresponding interfaces.
2. **Dependency Libraries**: Introduce necessary geospatial data
processing libraries. In the C++ source code, use Conan package
management to include the GDAL library. In the Go source code, add the
go-geom library to the go.mod file.
3. **Protocol Interface**: Revise the Milvus protocol to provide
mechanisms for Geospatial message serialization and deserialization.
4. **Data Pipeline**: Facilitate interaction between the client and
proxy using the WKT format for geospatial data. The proxy will convert
all data into WKB format for downstream processing, providing column
data interfaces, segment encapsulation, segment loading, payload
writing, and cache block management.
5. **Query Operators**: Implement simple display and support for filter
queries. Initially, focus on filtering based on spatial relationships
for a single column of geospatial literal values, providing parsing and
execution for query expressions.Now only support brutal search
6. **Client Modification**: Enable the client to handle user input for
geospatial data and facilitate end-to-end testing.Check the modification
in pymilvus.

---------

Signed-off-by: Yinwei Li <yinwei.li@zilliz.com>
Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
Co-authored-by: cai.zhang <cai.zhang@zilliz.com>
2025-08-26 19:11:55 +08:00

351 lines
14 KiB
Go

package common
import (
"context"
"fmt"
"io"
"reflect"
"strings"
"testing"
"github.com/stretchr/testify/require"
// "github.com/twpayne/go-geom/encoding/wkb"
// "github.com/twpayne/go-geom/encoding/wkt"
"go.uber.org/zap"
"github.com/milvus-io/milvus/client/v2/column"
"github.com/milvus-io/milvus/client/v2/entity"
"github.com/milvus-io/milvus/client/v2/index"
client "github.com/milvus-io/milvus/client/v2/milvusclient"
"github.com/milvus-io/milvus/pkg/v2/log"
)
func CheckErr(t *testing.T, actualErr error, expErrNil bool, expErrorMsg ...string) {
if expErrNil {
require.NoError(t, actualErr)
} else {
require.Error(t, actualErr)
switch len(expErrorMsg) {
case 0:
log.Fatal("expect error message should not be empty")
case 1:
require.ErrorContains(t, actualErr, expErrorMsg[0])
default:
contains := false
for i := 0; i < len(expErrorMsg); i++ {
if strings.Contains(actualErr.Error(), expErrorMsg[i]) {
contains = true
}
}
if !contains {
t.Fatalf("CheckErr failed, actualErr doesn't contains any expErrorMsg, actual msg:%s", actualErr)
}
}
}
}
// EqualColumn assert field data is equal of two columns
func EqualColumn(t *testing.T, columnA column.Column, columnB column.Column) {
require.Equal(t, columnA.Name(), columnB.Name())
require.Equal(t, columnA.Type(), columnB.Type())
_type := columnA.Type()
switch _type {
case entity.FieldTypeBool:
require.ElementsMatch(t, columnA.(*column.ColumnBool).Data(), columnB.(*column.ColumnBool).Data())
case entity.FieldTypeInt8:
require.ElementsMatch(t, columnA.(*column.ColumnInt8).Data(), columnB.(*column.ColumnInt8).Data())
case entity.FieldTypeInt16:
require.ElementsMatch(t, columnA.(*column.ColumnInt16).Data(), columnB.(*column.ColumnInt16).Data())
case entity.FieldTypeInt32:
require.ElementsMatch(t, columnA.(*column.ColumnInt32).Data(), columnB.(*column.ColumnInt32).Data())
case entity.FieldTypeInt64:
require.ElementsMatch(t, columnA.(*column.ColumnInt64).Data(), columnB.(*column.ColumnInt64).Data())
case entity.FieldTypeFloat:
require.ElementsMatch(t, columnA.(*column.ColumnFloat).Data(), columnB.(*column.ColumnFloat).Data())
case entity.FieldTypeDouble:
require.ElementsMatch(t, columnA.(*column.ColumnDouble).Data(), columnB.(*column.ColumnDouble).Data())
case entity.FieldTypeVarChar:
require.ElementsMatch(t, columnA.(*column.ColumnVarChar).Data(), columnB.(*column.ColumnVarChar).Data())
case entity.FieldTypeJSON:
log.Debug("data", zap.String("name", columnA.Name()), zap.Any("type", columnA.Type()), zap.Any("data", columnA.FieldData()))
log.Debug("data", zap.String("name", columnB.Name()), zap.Any("type", columnB.Type()), zap.Any("data", columnB.FieldData()))
require.Equal(t, reflect.TypeOf(columnA), reflect.TypeOf(columnB))
switch _v := columnA.(type) {
case *column.ColumnDynamic:
require.ElementsMatch(t, columnA.(*column.ColumnDynamic).Data(), columnB.(*column.ColumnDynamic).Data())
case *column.ColumnJSONBytes:
require.ElementsMatch(t, columnA.(*column.ColumnJSONBytes).Data(), columnB.(*column.ColumnJSONBytes).Data())
default:
log.Warn("columnA type", zap.String("name", columnB.Name()), zap.Any("type", _v))
}
// case entity.FieldTypeGeometry:
// // currently proxy transform wkb to wkt,the query output wkt has different precision with client input(omit trailing zeros),and omit omissible bracket
// columnAcompData := make([][]byte, 0)
// // simulate proxy replace wkb progress
// for _, bytes := range columnA.(*column.ColumnGeometryBytes).Data() {
// geomT, _ := wkt.Unmarshal(string(bytes))
// wkbBytes, _ := wkb.Marshal(geomT, wkb.NDR)
// geomT, _ = wkb.Unmarshal(wkbBytes)
// realwktstr, _ := wkt.Marshal(geomT)
// columnAcompData = append(columnAcompData, []byte(realwktstr))
// }
// require.ElementsMatch(t, columnAcompData, columnB.(*column.ColumnGeometryBytes).Data())
case entity.FieldTypeFloatVector:
require.ElementsMatch(t, columnA.(*column.ColumnFloatVector).Data(), columnB.(*column.ColumnFloatVector).Data())
case entity.FieldTypeBinaryVector:
require.ElementsMatch(t, columnA.(*column.ColumnBinaryVector).Data(), columnB.(*column.ColumnBinaryVector).Data())
case entity.FieldTypeFloat16Vector:
require.ElementsMatch(t, columnA.(*column.ColumnFloat16Vector).Data(), columnB.(*column.ColumnFloat16Vector).Data())
case entity.FieldTypeBFloat16Vector:
require.ElementsMatch(t, columnA.(*column.ColumnBFloat16Vector).Data(), columnB.(*column.ColumnBFloat16Vector).Data())
case entity.FieldTypeSparseVector:
require.ElementsMatch(t, columnA.(*column.ColumnSparseFloatVector).Data(), columnB.(*column.ColumnSparseFloatVector).Data())
case entity.FieldTypeArray:
EqualArrayColumn(t, columnA, columnB)
default:
log.Info("Support column type is:", zap.Any("FieldType", []entity.FieldType{
entity.FieldTypeBool,
entity.FieldTypeInt8, entity.FieldTypeInt16, entity.FieldTypeInt32,
entity.FieldTypeInt64, entity.FieldTypeFloat, entity.FieldTypeDouble, entity.FieldTypeString,
entity.FieldTypeVarChar, entity.FieldTypeArray, entity.FieldTypeFloatVector, entity.FieldTypeBinaryVector,
}))
}
}
// EqualColumn assert field data is equal of two columns
func EqualArrayColumn(t *testing.T, columnA column.Column, columnB column.Column) {
require.Equal(t, columnA.Name(), columnB.Name())
require.IsType(t, columnA.Type(), entity.FieldTypeArray)
require.IsType(t, columnB.Type(), entity.FieldTypeArray)
switch _type := columnA.(type) {
case *column.ColumnBoolArray:
require.ElementsMatch(t, columnA.(*column.ColumnBoolArray).Data(), columnB.(*column.ColumnBoolArray).Data())
case *column.ColumnInt8Array:
require.ElementsMatch(t, columnA.(*column.ColumnInt8Array).Data(), columnB.(*column.ColumnInt8Array).Data())
case *column.ColumnInt16Array:
require.ElementsMatch(t, columnA.(*column.ColumnInt16Array).Data(), columnB.(*column.ColumnInt16Array).Data())
case *column.ColumnInt32Array:
require.ElementsMatch(t, columnA.(*column.ColumnInt32Array).Data(), columnB.(*column.ColumnInt32Array).Data())
case *column.ColumnInt64Array:
require.ElementsMatch(t, columnA.(*column.ColumnInt64Array).Data(), columnB.(*column.ColumnInt64Array).Data())
case *column.ColumnFloatArray:
require.ElementsMatch(t, columnA.(*column.ColumnFloatArray).Data(), columnB.(*column.ColumnFloatArray).Data())
case *column.ColumnDoubleArray:
require.ElementsMatch(t, columnA.(*column.ColumnDoubleArray).Data(), columnB.(*column.ColumnDoubleArray).Data())
case *column.ColumnVarCharArray:
require.ElementsMatch(t, columnA.(*column.ColumnVarCharArray).Data(), columnB.(*column.ColumnVarCharArray).Data())
default:
log.Debug("columnA type is", zap.Any("type", _type))
log.Info("Support array element type is:", zap.Any("FieldType", []entity.FieldType{
entity.FieldTypeBool, entity.FieldTypeInt8, entity.FieldTypeInt16,
entity.FieldTypeInt32, entity.FieldTypeInt64, entity.FieldTypeFloat, entity.FieldTypeDouble, entity.FieldTypeVarChar,
}))
}
}
// CheckInsertResult check insert result, ids len (insert count), ids data (pks, but no auto ids)
func CheckInsertResult(t *testing.T, expIDs column.Column, insertRes client.InsertResult) {
require.Equal(t, expIDs.Len(), insertRes.IDs.Len())
require.Equal(t, expIDs.Len(), int(insertRes.InsertCount))
actualIDs := insertRes.IDs
switch expIDs.Type() {
// pk field support int64 and varchar type
case entity.FieldTypeInt64:
require.ElementsMatch(t, actualIDs.(*column.ColumnInt64).Data(), expIDs.(*column.ColumnInt64).Data())
case entity.FieldTypeVarChar:
require.ElementsMatch(t, actualIDs.(*column.ColumnVarChar).Data(), expIDs.(*column.ColumnVarChar).Data())
default:
log.Info("The primary field only support ", zap.Any("type", []entity.FieldType{entity.FieldTypeInt64, entity.FieldTypeVarChar}))
}
}
// CheckOutputFields check query output fields
func CheckOutputFields(t *testing.T, expFields []string, actualColumns []column.Column) {
actualFields := make([]string, 0)
for _, actualColumn := range actualColumns {
actualFields = append(actualFields, actualColumn.Name())
}
log.Debug("CheckOutputFields", zap.Any("expFields", expFields), zap.Any("actualFields", actualFields))
require.ElementsMatchf(t, expFields, actualFields, fmt.Sprintf("Expected search output fields: %v, actual: %v", expFields, actualFields))
}
// CheckSearchResult check search result, check nq, topk, ids, score
func CheckSearchResult(t *testing.T, actualSearchResults []client.ResultSet, expNq int, expTopK int) {
require.Equalf(t, len(actualSearchResults), expNq, fmt.Sprintf("Expected nq=%d, actual SearchResultsLen=%d", expNq, len(actualSearchResults)))
require.Len(t, actualSearchResults, expNq)
for _, actualSearchResult := range actualSearchResults {
require.Equalf(t, actualSearchResult.ResultCount, expTopK, fmt.Sprintf("Expected topK=%d, actual ResultCount=%d", expTopK, actualSearchResult.ResultCount))
require.Equalf(t, actualSearchResult.IDs.Len(), expTopK, fmt.Sprintf("Expected topK=%d, actual IDsLen=%d", expTopK, actualSearchResult.IDs.Len()))
require.Equalf(t, len(actualSearchResult.Scores), expTopK, fmt.Sprintf("Expected topK=%d, actual ScoresLen=%d", expTopK, len(actualSearchResult.Scores)))
}
}
// CheckQueryResult check query result, column name, type and field
func CheckQueryResult(t *testing.T, expColumns []column.Column, actualColumns []column.Column) {
require.Equal(t, len(actualColumns), len(expColumns),
"The len of actual columns %d should greater or equal to the expected columns %d", len(actualColumns), len(expColumns))
for _, expColumn := range expColumns {
exist := false
for _, actualColumn := range actualColumns {
if expColumn.Name() == actualColumn.Name() {
exist = true
EqualColumn(t, expColumn, actualColumn)
}
}
if !exist {
log.Error("CheckQueryResult actualColumns no column", zap.String("name", expColumn.Name()))
}
}
}
type CheckIteratorOption func(opt *checkIteratorOpt)
type checkIteratorOpt struct {
expBatchSize []int
expOutputFields []string
}
func WithExpBatchSize(expBatchSize []int) CheckIteratorOption {
return func(opt *checkIteratorOpt) {
opt.expBatchSize = expBatchSize
}
}
func WithExpOutputFields(expOutputFields []string) CheckIteratorOption {
return func(opt *checkIteratorOpt) {
opt.expOutputFields = expOutputFields
}
}
// check queryIterator: result limit, each batch size, output fields
func CheckSearchIteratorResult(ctx context.Context, t *testing.T, itr client.SearchIterator, expLimit int, opts ...CheckIteratorOption) {
opt := &checkIteratorOpt{}
for _, o := range opts {
o(opt)
}
actualLimit := 0
var actualBatchSize []int
for {
rs, err := itr.Next(ctx)
if err != nil {
if err == io.EOF {
break
} else {
log.Error("SearchIterator next gets error", zap.Error(err))
break
}
}
if opt.expBatchSize != nil {
actualBatchSize = append(actualBatchSize, rs.ResultCount)
}
var actualOutputFields []string
if opt.expOutputFields != nil {
for _, column := range rs.Fields {
actualOutputFields = append(actualOutputFields, column.Name())
}
require.ElementsMatch(t, opt.expOutputFields, actualOutputFields)
}
actualLimit = actualLimit + rs.ResultCount
}
require.Equal(t, expLimit, actualLimit)
if opt.expBatchSize != nil {
log.Debug("SearchIterator result len", zap.Any("result len", actualBatchSize))
require.True(t, EqualIntSlice(opt.expBatchSize, actualBatchSize))
}
}
// GenColumnDataOption -- create column data --
type checkIndexOpt struct {
state index.IndexState
pendingIndexRows int64
totalRows int64
indexedRows int64
}
func TNewCheckIndexOpt(totalRows int64) *checkIndexOpt {
return &checkIndexOpt{
state: IndexStateFinished,
totalRows: totalRows,
pendingIndexRows: 0,
indexedRows: totalRows,
}
}
func (opt *checkIndexOpt) TWithIndexState(state index.IndexState) *checkIndexOpt {
opt.state = state
return opt
}
func (opt *checkIndexOpt) TWithIndexRows(totalRows int64, indexedRows int64, pendingIndexRows int64) *checkIndexOpt {
opt.totalRows = totalRows
opt.indexedRows = indexedRows
opt.pendingIndexRows = pendingIndexRows
return opt
}
func CheckIndex(t *testing.T, actualIdxDesc client.IndexDescription, idx index.Index, opt *checkIndexOpt) {
require.EqualValuesf(t, idx, actualIdxDesc.Index, "Actual index is not same with expected index")
require.Equal(t, actualIdxDesc.TotalRows, actualIdxDesc.PendingIndexRows+actualIdxDesc.IndexedRows)
if opt != nil {
require.Equal(t, opt.totalRows, opt.pendingIndexRows+opt.indexedRows)
require.Equal(t, opt.state, actualIdxDesc.State)
require.Equal(t, opt.totalRows, actualIdxDesc.TotalRows)
require.Equal(t, opt.indexedRows, actualIdxDesc.IndexedRows)
require.Equal(t, opt.pendingIndexRows, actualIdxDesc.PendingIndexRows)
}
}
func CheckTransfer(t *testing.T, actualRgs []*entity.ResourceGroupTransfer, expRgs []*entity.ResourceGroupTransfer) {
if len(expRgs) == 0 {
require.Len(t, actualRgs, 0)
} else {
_expRgs := make([]string, 0, len(expRgs))
_actualRgs := make([]string, 0, len(actualRgs))
for _, rg := range expRgs {
_expRgs = append(_expRgs, rg.ResourceGroup)
}
for _, rg := range actualRgs {
_actualRgs = append(_actualRgs, rg.ResourceGroup)
}
require.ElementsMatch(t, _expRgs, _actualRgs)
}
}
func CheckResourceGroupConfig(t *testing.T, actualConfig *entity.ResourceGroupConfig, expConfig *entity.ResourceGroupConfig) {
if expConfig.Requests.NodeNum != 0 {
require.EqualValuesf(t, expConfig.Requests.NodeNum, actualConfig.Requests.NodeNum, "Requests.NodeNum mismatch")
}
if expConfig.Limits.NodeNum != 0 {
require.EqualValuesf(t, expConfig.Limits.NodeNum, actualConfig.Limits.NodeNum, "Limits.NodeNum mismatch")
}
if expConfig.TransferFrom != nil {
CheckTransfer(t, expConfig.TransferFrom, actualConfig.TransferFrom)
}
if expConfig.TransferTo != nil {
CheckTransfer(t, expConfig.TransferTo, actualConfig.TransferTo)
}
if expConfig.NodeFilter.NodeLabels != nil {
require.EqualValues(t, expConfig.NodeFilter, actualConfig.NodeFilter)
}
}
func CheckResourceGroup(t *testing.T, actualRg *entity.ResourceGroup, expRg *entity.ResourceGroup) {
require.EqualValues(t, expRg.Name, actualRg.Name, "ResourceGroup name mismatch")
require.EqualValues(t, expRg.Capacity, actualRg.Capacity, "ResourceGroup capacity mismatch")
if expRg.NumAvailableNode >= 0 {
require.EqualValues(t, expRg.NumAvailableNode, len(actualRg.Nodes), "AvailableNodesNumber mismatch")
}
if expRg.Config != nil {
CheckResourceGroupConfig(t, actualRg.Config, expRg.Config)
}
if expRg.Nodes != nil {
require.ElementsMatch(t, expRg.Nodes, actualRg.Nodes, "Nodes count mismatch")
}
}