milvus/pkg/util/testutils/gen_data.go
ZhuXi cd931a0388
feat:Geospatial Data Type and GIS Function support for milvus (#43661)
issue: #43427
pr: #37417

This pr's main goal is merge #37417 to milvus 2.5 without conflicts.

# Main Goals

1. Create and describe collections with geospatial type
2. Insert geospatial data into the insert binlog
3. Load segments containing geospatial data into memory
4. Enable query and search can display  geospatial data
5. Support using GIS funtions like ST_EQUALS in query

# Solution

1. **Add Type**: Modify the Milvus core by adding a Geospatial type in
both the C++ and Go code layers, defining the Geospatial data structure
and the corresponding interfaces.
2. **Dependency Libraries**: Introduce necessary geospatial data
processing libraries. In the C++ source code, use Conan package
management to include the GDAL library. In the Go source code, add the
go-geom library to the go.mod file.
3. **Protocol Interface**: Revise the Milvus protocol to provide
mechanisms for Geospatial message serialization and deserialization.
4. **Data Pipeline**: Facilitate interaction between the client and
proxy using the WKT format for geospatial data. The proxy will convert
all data into WKB format for downstream processing, providing column
data interfaces, segment encapsulation, segment loading, payload
writing, and cache block management.
5. **Query Operators**: Implement simple display and support for filter
queries. Initially, focus on filtering based on spatial relationships
for a single column of geospatial literal values, providing parsing and
execution for query expressions.Now only support brutal search
6. **Client Modification**: Enable the client to handle user input for
geospatial data and facilitate end-to-end testing.Check the modification
in pymilvus.

---------

Signed-off-by: Yinwei Li <yinwei.li@zilliz.com>
Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
Co-authored-by: cai.zhang <cai.zhang@zilliz.com>
2025-08-26 19:11:55 +08:00

1067 lines
29 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package testutils
import (
"encoding/binary"
"encoding/json"
"fmt"
"math"
"math/rand"
"sort"
"strconv"
"strings"
"github.com/twpayne/go-geom/encoding/wkb"
"github.com/twpayne/go-geom/encoding/wkt"
"github.com/x448/float16"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
const elemCountOfArray = 10
// generate data
func GenerateBoolArray(numRows int) []bool {
ret := make([]bool, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, i%2 == 0)
}
return ret
}
func GenerateInt8Array(numRows int) []int8 {
ret := make([]int8, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int8(i))
}
return ret
}
func GenerateInt16Array(numRows int) []int16 {
ret := make([]int16, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int16(i))
}
return ret
}
func GenerateInt32Array(numRows int) []int32 {
ret := make([]int32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int32(i))
}
return ret
}
func GenerateInt64Array(numRows int) []int64 {
ret := make([]int64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int64(i))
}
return ret
}
func GenerateUint64Array(numRows int) []uint64 {
ret := make([]uint64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, uint64(i))
}
return ret
}
func GenerateFloat32Array(numRows int) []float32 {
ret := make([]float32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, float32(i))
}
return ret
}
func GenerateFloat64Array(numRows int) []float64 {
ret := make([]float64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, float64(i))
}
return ret
}
func GenerateVarCharArray(numRows int, maxLen int) []string {
ret := make([]string, numRows)
for i := 0; i < numRows; i++ {
ret[i] = funcutil.RandomString(rand.Intn(maxLen))
}
return ret
}
func GenerateStringArray(numRows int) []string {
ret := make([]string, 0, numRows)
genSentence := func() string {
words := []string{"hello", "world", "this", "is", "a", "test", "sentence", "milvus", "vector", "database", "search", "engine", "fast", "efficient", "scalable"}
selectedWords := make([]string, rand.Intn(6)+5) // 5 to 10 words
for i := range selectedWords {
selectedWords[i] = words[rand.Intn(len(words))]
}
rand.Shuffle(len(selectedWords), func(i, j int) {
selectedWords[i], selectedWords[j] = selectedWords[j], selectedWords[i]
})
return strings.Join(selectedWords, " ")
}
for i := 0; i < numRows; i++ {
ret = append(ret, genSentence())
}
return ret
}
func GenerateJSONArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
if i%4 == 0 {
v, _ := json.Marshal("{\"a\": \"%s\", \"b\": %d}")
ret = append(ret, v)
} else if i%4 == 1 {
v, _ := json.Marshal(i)
ret = append(ret, v)
} else if i%4 == 2 {
v, _ := json.Marshal(float32(i) * 0.1)
ret = append(ret, v)
} else if i%4 == 3 {
v, _ := json.Marshal(strconv.Itoa(i))
ret = append(ret, v)
}
}
return ret
}
// milvus core compoent view geometry as wkb bytes
func GenerateGeometryArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
const (
point = "POINT (30.123 -10.456)"
linestring = "LINESTRING (30.123 -10.456, 10.789 30.123, -40.567 40.890)"
polygon = "POLYGON ((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456))"
multipoint = "MULTIPOINT ((10.111 40.222), (40.333 30.444), (20.555 20.666), (30.777 10.888))"
multilinestring = "MULTILINESTRING ((10.111 10.222, 20.333 20.444), (15.555 15.666, 25.777 25.888), (-30.999 20.000, 40.111 30.222))"
multipolygon = "MULTIPOLYGON (((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456)),((15.123 5.456, 25.678 5.890, 25.345 15.567, 15.123 15.456, 15.123 5.456)))"
)
wktArray := [6]string{point, linestring, polygon, multipoint, multilinestring, multipolygon}
for i := 0; i < numRows; i++ {
// data of wkt string bytes ,consider to be process by proxy
if i == numRows-1 {
geomT, _ := wkt.Unmarshal("POINT (-84.036 39.997)") // add a special point finally for test
wkbdata, _ := wkb.Marshal(geomT, wkb.NDR)
ret = append(ret, wkbdata)
continue
}
geomT, _ := wkt.Unmarshal(wktArray[i%6])
wkbdata, _ := wkb.Marshal(geomT, wkb.NDR)
ret = append(ret, wkbdata)
}
return ret
}
// milvus client and proxy's insert request input view geometry data as wkt strings
func GenerateGeometryWktArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
const (
point = "POINT (30.123 -10.456)"
linestring = "LINESTRING (30.123 -10.456, 10.789 30.123, -40.567 40.890)"
polygon = "POLYGON ((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456))"
multipoint = "MULTIPOINT ((10.111 40.222), (40.333 30.444), (20.555 20.666), (30.777 10.888))"
multilinestring = "MULTILINESTRING ((10.111 10.222, 20.333 20.444), (15.555 15.666, 25.777 25.888), (-30.999 20.000, 40.111 30.222))"
multipolygon = "MULTIPOLYGON (((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456)),((15.123 5.456, 25.678 5.890, 25.345 15.567, 15.123 15.456, 15.123 5.456)))"
)
wktArray := [6]string{point, linestring, polygon, multipoint, multilinestring, multipolygon}
for i := 0; i < numRows; i++ {
// data of wkt string bytes ,consider to be process by proxy
if i == numRows-1 {
ret = append(ret, []byte("POINT (-84.036 39.997)"))
continue
}
ret = append(ret, []byte(wktArray[i%6]))
}
return ret
}
func GenerateArrayOfBoolArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: GenerateBoolArray(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfIntArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfLongArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: GenerateInt64Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfFloatArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: GenerateFloat32Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfDoubleArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: GenerateFloat64Array(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfStringArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateStringArray(elemCountOfArray),
},
},
})
}
return ret
}
func GenerateBytesArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, []byte(fmt.Sprint(rand.Int())))
}
return ret
}
func GenerateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func GenerateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func GenerateFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, 0, total*2)
for i := 0; i < total; i++ {
f := (rand.Float32() - 0.5) * 100
ret = append(ret, typeutil.Float32ToFloat16Bytes(f)...)
}
return ret
}
func GenerateBFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, 0, total*2)
for i := 0; i < total; i++ {
f := (rand.Float32() - 0.5) * 100
ret = append(ret, typeutil.Float32ToBFloat16Bytes(f)...)
}
return ret
}
func GenerateBFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
var f float32
if i%2 == 0 {
f = float32(math.NaN())
} else {
f = float32(math.Inf(1))
}
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, len(ret16)*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func GenerateFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
if i%2 == 0 {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.Inf(1)))
} else {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.NaN()))
}
}
return ret
}
func GenerateSparseFloatVectorsData(numRows int) ([][]byte, int64) {
dim := 700
avgNnz := 20
var contents [][]byte
maxDim := 0
uniqueAndSort := func(indices []uint32) []uint32 {
seen := make(map[uint32]bool)
var result []uint32
for _, value := range indices {
if _, ok := seen[value]; !ok {
seen[value] = true
result = append(result, value)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i] < result[j]
})
return result
}
for i := 0; i < numRows; i++ {
nnz := rand.Intn(avgNnz*2) + 1
indices := make([]uint32, 0, nnz)
for j := 0; j < nnz; j++ {
indices = append(indices, uint32(rand.Intn(dim)))
}
indices = uniqueAndSort(indices)
values := make([]float32, 0, len(indices))
for j := 0; j < len(indices); j++ {
values = append(values, rand.Float32())
}
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
rowBytes := typeutil.CreateSparseFloatRow(indices, values)
contents = append(contents, rowBytes)
}
return contents, int64(maxDim)
}
func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray {
dim := 700
avgNnz := 20
var contents [][]byte
maxDim := 0
uniqueAndSort := func(indices []uint32) []uint32 {
seen := make(map[uint32]bool)
var result []uint32
for _, value := range indices {
if _, ok := seen[value]; !ok {
seen[value] = true
result = append(result, value)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i] < result[j]
})
return result
}
for i := 0; i < numRows; i++ {
nnz := rand.Intn(avgNnz*2) + 1
indices := make([]uint32, 0, nnz)
for j := 0; j < nnz; j++ {
indices = append(indices, uint32(rand.Intn(dim)))
}
indices = uniqueAndSort(indices)
values := make([]float32, 0, len(indices))
for j := 0; j < len(indices); j++ {
values = append(values, rand.Float32())
}
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
rowBytes := typeutil.CreateSparseFloatRow(indices, values)
contents = append(contents, rowBytes)
}
return &schemapb.SparseFloatArray{
Dim: int64(maxDim),
Contents: contents,
}
}
func GenerateHashKeys(numRows int) []uint32 {
ret := make([]uint32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Uint32())
}
return ret
}
// generate FieldData
func NewBoolFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Bool,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: GenerateBoolArray(numRows),
},
},
},
},
}
}
func NewBoolFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Bool,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: fieldValue.([]bool),
},
},
},
},
}
}
func NewInt8FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int8,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt16FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int16,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt32FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int32,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt32FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int32,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: fieldValue.([]int32),
},
},
},
},
}
}
func NewInt64FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: GenerateInt64Array(numRows),
},
},
},
},
}
}
func NewInt64FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: fieldValue.([]int64),
},
},
},
},
}
}
func NewFloatFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: GenerateFloat32Array(numRows),
},
},
},
},
}
}
func NewFloatFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
}
}
func NewDoubleFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Double,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: GenerateFloat64Array(numRows),
},
},
},
},
}
}
func NewDoubleFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Double,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: fieldValue.([]float64),
},
},
},
},
}
}
func NewVarCharFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateVarCharArray(numRows, 10),
},
},
},
},
}
}
func NewVarCharFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: fieldValue.([]string),
},
},
},
},
}
}
func NewStringFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_String,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateStringArray(numRows),
},
},
},
},
}
}
func NewJSONFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_JSON,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: GenerateJSONArray(numRows),
},
},
},
},
}
}
func NewJSONFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_JSON,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: fieldValue.([][]byte),
},
},
},
},
}
}
func NewArrayFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: GenerateArrayOfIntArray(numRows),
},
},
},
},
}
}
func NewArrayFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: fieldValue.([]*schemapb.ScalarField),
},
},
},
},
}
}
func NewGeometryFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Geometry,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_GeometryData{
GeometryData: &schemapb.GeometryArray{
Data: GenerateGeometryArray(numRows),
},
},
},
},
}
}
func NewGeometryFieldDataWktFormat(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Geometry,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_GeometryData{
GeometryData: &schemapb.GeometryArray{
Data: GenerateGeometryWktArray(numRows),
},
},
},
},
}
}
func NewGeometryFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Geometry,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_GeometryData{
GeometryData: &schemapb.GeometryArray{
Data: fieldValue.([][]byte),
},
},
},
},
}
}
func NewBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: GenerateBinaryVectors(numRows, dim),
},
},
},
}
}
func NewBinaryVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: fieldValue.([]byte),
},
},
},
}
}
func NewFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: GenerateFloatVectors(numRows, dim),
},
},
},
},
}
}
func NewFloatVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
}
}
func NewFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: GenerateFloat16Vectors(numRows, dim),
},
},
},
}
}
func NewFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: fieldValue.([]byte),
},
},
},
}
}
func NewBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: GenerateBFloat16Vectors(numRows, dim),
},
},
},
}
}
func NewBFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: fieldValue.([]byte),
},
},
},
}
}
func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.FieldData {
sparseData := GenerateSparseFloatVectors(numRows)
return &schemapb.FieldData{
Type: schemapb.DataType_SparseFloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: sparseData.Dim,
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &schemapb.SparseFloatArray{
Dim: sparseData.Dim,
Contents: sparseData.Contents,
},
},
},
},
}
}
func GenerateScalarFieldData(dType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData {
switch dType {
case schemapb.DataType_Bool:
return NewBoolFieldData(fieldName, numRows)
case schemapb.DataType_Int8:
return NewInt8FieldData(fieldName, numRows)
case schemapb.DataType_Int16:
return NewInt16FieldData(fieldName, numRows)
case schemapb.DataType_Int32:
return NewInt32FieldData(fieldName, numRows)
case schemapb.DataType_Int64:
return NewInt64FieldData(fieldName, numRows)
case schemapb.DataType_Float:
return NewFloatFieldData(fieldName, numRows)
case schemapb.DataType_Double:
return NewDoubleFieldData(fieldName, numRows)
case schemapb.DataType_VarChar:
return NewVarCharFieldData(fieldName, numRows)
case schemapb.DataType_String:
return NewStringFieldData(fieldName, numRows)
case schemapb.DataType_Array:
return NewArrayFieldData(fieldName, numRows)
case schemapb.DataType_JSON:
return NewJSONFieldData(fieldName, numRows)
case schemapb.DataType_Geometry:
return NewGeometryFieldData(fieldName, numRows)
default:
panic("unsupported data type")
}
}
func GenerateScalarFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int) *schemapb.FieldData {
fieldData := GenerateScalarFieldData(dType, fieldName, numRows)
fieldData.FieldId = fieldID
return fieldData
}
func GenerateScalarFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}) *schemapb.FieldData {
var fieldData *schemapb.FieldData
switch dType {
case schemapb.DataType_Bool:
fieldData = NewBoolFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Int32:
fieldData = NewInt32FieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Int64:
fieldData = NewInt64FieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Float:
fieldData = NewFloatFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Double:
fieldData = NewDoubleFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_VarChar:
fieldData = NewVarCharFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Array:
fieldData = NewArrayFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_JSON:
fieldData = NewJSONFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Geometry:
fieldData = NewGeometryFieldDataWithValue(fieldName, fieldValue)
default:
panic("unsupported data type")
}
fieldData.FieldId = fieldID
return fieldData
}
func GenerateVectorFieldData(dType schemapb.DataType, fieldName string, numRows int, dim int) *schemapb.FieldData {
switch dType {
case schemapb.DataType_BinaryVector:
return NewBinaryVectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_FloatVector:
return NewFloatVectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_Float16Vector:
return NewFloat16VectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_BFloat16Vector:
return NewBFloat16VectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_SparseFloatVector:
return NewSparseFloatVectorFieldData(fieldName, numRows)
default:
panic("unsupported data type")
}
}
func GenerateVectorFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int, dim int) *schemapb.FieldData {
fieldData := GenerateVectorFieldData(dType, fieldName, numRows, dim)
fieldData.FieldId = fieldID
return fieldData
}
func GenerateVectorFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}, dim int) *schemapb.FieldData {
var fieldData *schemapb.FieldData
switch dType {
case schemapb.DataType_BinaryVector:
fieldData = NewBinaryVectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_FloatVector:
fieldData = NewFloatVectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_Float16Vector:
fieldData = NewFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_BFloat16Vector:
fieldData = NewBFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim)
default:
panic("unsupported data type")
}
fieldData.FieldId = fieldID
return fieldData
}