milvus/pkg/util/testutils/gen_data.go
wei liu 8accde97be
test: [skip e2e] unstable ut caused by duplicate pk in same batch (#46132)
issue: #46105

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
2025-12-08 14:13:16 +08:00

1191 lines
33 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package testutils
import (
"encoding/binary"
"encoding/json"
"fmt"
"math"
"math/rand"
"sort"
"strconv"
"strings"
"github.com/twpayne/go-geom/encoding/wkb"
"github.com/twpayne/go-geom/encoding/wkt"
"github.com/x448/float16"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
const ElemCountOfArray = 10
// generate data
func GenerateBoolArray(numRows int) []bool {
ret := make([]bool, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, i%2 == 0)
}
return ret
}
func GenerateInt8Array(numRows int) []int8 {
ret := make([]int8, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int8(i))
}
return ret
}
func GenerateInt16Array(numRows int) []int16 {
ret := make([]int16, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int16(i))
}
return ret
}
func GenerateInt32Array(numRows int) []int32 {
ret := make([]int32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int32(i))
}
return ret
}
func GenerateInt64Array(numRows int) []int64 {
ret := make([]int64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int64(i))
}
return ret
}
func GenerateUint64Array(numRows int) []uint64 {
ret := make([]uint64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, uint64(i))
}
return ret
}
func GenerateFloat32Array(numRows int) []float32 {
ret := make([]float32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, float32(i))
}
return ret
}
func GenerateFloat64Array(numRows int) []float64 {
ret := make([]float64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, float64(i))
}
return ret
}
func GenerateVarCharArray(numRows int, maxLen int) []string {
ret := make([]string, numRows)
for i := 0; i < numRows; i++ {
suffix := fmt.Sprintf("_%d", i)
suffixLen := len(suffix)
availableLen := maxLen - suffixLen
if availableLen < 0 {
availableLen = 0
}
randLen := 0
if availableLen > 0 {
randLen = rand.Intn(availableLen + 1)
}
ret[i] = funcutil.RandomString(randLen) + suffix
}
return ret
}
func GenerateStringArray(numRows int) []string {
ret := make([]string, 0, numRows)
genSentence := func() string {
words := []string{"hello", "world", "this", "is", "a", "test", "sentence", "milvus", "vector", "database", "search", "engine", "fast", "efficient", "scalable"}
selectedWords := make([]string, rand.Intn(6)+5) // 5 to 10 words
for i := range selectedWords {
selectedWords[i] = words[rand.Intn(len(words))]
}
rand.Shuffle(len(selectedWords), func(i, j int) {
selectedWords[i], selectedWords[j] = selectedWords[j], selectedWords[i]
})
return strings.Join(selectedWords, " ")
}
for i := 0; i < numRows; i++ {
ret = append(ret, genSentence())
}
return ret
}
func GenerateJSONArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
if i%4 == 0 {
v, _ := json.Marshal("{\"a\": \"%s\", \"b\": %d}")
ret = append(ret, v)
} else if i%4 == 1 {
v, _ := json.Marshal(i)
ret = append(ret, v)
} else if i%4 == 2 {
v, _ := json.Marshal(float32(i) * 0.1)
ret = append(ret, v)
} else if i%4 == 3 {
v, _ := json.Marshal(strconv.Itoa(i))
ret = append(ret, v)
}
}
return ret
}
// milvus core compoent view geometry as wkb bytes
func GenerateGeometryArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
const (
point = "POINT (30.123 -10.456)"
linestring = "LINESTRING (30.123 -10.456, 10.789 30.123, -40.567 40.890)"
polygon = "POLYGON ((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456))"
multipoint = "MULTIPOINT ((10.111 40.222), (40.333 30.444), (20.555 20.666), (30.777 10.888))"
multilinestring = "MULTILINESTRING ((10.111 10.222, 20.333 20.444), (15.555 15.666, 25.777 25.888), (-30.999 20.000, 40.111 30.222))"
multipolygon = "MULTIPOLYGON (((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456)),((15.123 5.456, 25.678 5.890, 25.345 15.567, 15.123 15.456, 15.123 5.456)))"
)
wktArray := [6]string{point, linestring, polygon, multipoint, multilinestring, multipolygon}
for i := 0; i < numRows; i++ {
// data of wkt string bytes ,consider to be process by proxy
if i == numRows-1 {
geomT, _ := wkt.Unmarshal("POINT (-84.036 39.997)") // add a special point finally for test
wkbdata, _ := wkb.Marshal(geomT, wkb.NDR)
ret = append(ret, wkbdata)
continue
}
geomT, _ := wkt.Unmarshal(wktArray[i%6])
wkbdata, _ := wkb.Marshal(geomT, wkb.NDR)
ret = append(ret, wkbdata)
}
return ret
}
// milvus client and proxy's insert request input view geometry data as wkt strings
func GenerateGeometryWktArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
const (
point = "POINT (30.123 -10.456)"
linestring = "LINESTRING (30.123 -10.456, 10.789 30.123, -40.567 40.890)"
polygon = "POLYGON ((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456))"
multipoint = "MULTIPOINT ((10.111 40.222), (40.333 30.444), (20.555 20.666), (30.777 10.888))"
multilinestring = "MULTILINESTRING ((10.111 10.222, 20.333 20.444), (15.555 15.666, 25.777 25.888), (-30.999 20.000, 40.111 30.222))"
multipolygon = "MULTIPOLYGON (((30.123 -10.456, 40.678 40.890, 20.345 40.567, 10.123 20.456, 30.123 -10.456)),((15.123 5.456, 25.678 5.890, 25.345 15.567, 15.123 15.456, 15.123 5.456)))"
)
wktArray := [6]string{point, linestring, polygon, multipoint, multilinestring, multipolygon}
for i := 0; i < numRows; i++ {
// data of wkt string bytes ,consider to be process by proxy
if i == numRows-1 {
ret = append(ret, []byte("POINT (-84.036 39.997)"))
continue
}
ret = append(ret, []byte(wktArray[i%6]))
}
return ret
}
func GenerateArrayOfBoolArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: GenerateBoolArray(ElemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfIntArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(ElemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfLongArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: GenerateInt64Array(ElemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfFloatArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: GenerateFloat32Array(ElemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfDoubleArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: GenerateFloat64Array(ElemCountOfArray),
},
},
})
}
return ret
}
func GenerateArrayOfFloatVectorArray(numRows int, dim int) []*schemapb.VectorField {
ret := make([]*schemapb.VectorField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: GenerateFloatVectors(ElemCountOfArray, dim),
},
},
})
}
return ret
}
func GenerateArrayOfStringArray(numRows int) []*schemapb.ScalarField {
ret := make([]*schemapb.ScalarField, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateStringArray(ElemCountOfArray),
},
},
})
}
return ret
}
func GenerateBytesArray(numRows int) [][]byte {
ret := make([][]byte, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, []byte(fmt.Sprint(rand.Int())))
}
return ret
}
func GenerateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func GenerateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func GenerateFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, 0, total*2)
for i := 0; i < total; i++ {
f := (rand.Float32() - 0.5) * 100
ret = append(ret, typeutil.Float32ToFloat16Bytes(f)...)
}
return ret
}
func GenerateBFloat16Vectors(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, 0, total*2)
for i := 0; i < total; i++ {
f := (rand.Float32() - 0.5) * 100
ret = append(ret, typeutil.Float32ToBFloat16Bytes(f)...)
}
return ret
}
func GenerateInt8Vectors(numRows, dim int) []int8 {
total := numRows * dim
ret := make([]int8, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, int8(rand.Intn(256)-128))
}
return ret
}
func GenerateBFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret16 := make([]uint16, 0, total)
for i := 0; i < total; i++ {
var f float32
if i%2 == 0 {
f = float32(math.NaN())
} else {
f = float32(math.Inf(1))
}
bits := math.Float32bits(f)
bits >>= 16
bits &= 0x7FFF
ret16 = append(ret16, uint16(bits))
}
ret := make([]byte, len(ret16)*2)
for i, value := range ret16 {
binary.LittleEndian.PutUint16(ret[i*2:], value)
}
return ret
}
func GenerateFloat16VectorsWithInvalidData(numRows, dim int) []byte {
total := numRows * dim
ret := make([]byte, total*2)
for i := 0; i < total; i++ {
if i%2 == 0 {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.Inf(1)))
} else {
binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.NaN()))
}
}
return ret
}
func GenerateSparseFloatVectorsData(numRows int) ([][]byte, int64) {
dim := 700
avgNnz := 20
var contents [][]byte
maxDim := 0
uniqueAndSort := func(indices []uint32) []uint32 {
seen := make(map[uint32]bool)
var result []uint32
for _, value := range indices {
if _, ok := seen[value]; !ok {
seen[value] = true
result = append(result, value)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i] < result[j]
})
return result
}
for i := 0; i < numRows; i++ {
nnz := rand.Intn(avgNnz*2) + 1
indices := make([]uint32, 0, nnz)
for j := 0; j < nnz; j++ {
indices = append(indices, uint32(rand.Intn(dim)))
}
indices = uniqueAndSort(indices)
values := make([]float32, 0, len(indices))
for j := 0; j < len(indices); j++ {
values = append(values, rand.Float32())
}
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
rowBytes := typeutil.CreateSparseFloatRow(indices, values)
contents = append(contents, rowBytes)
}
return contents, int64(maxDim)
}
func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray {
dim := 700
avgNnz := 20
var contents [][]byte
maxDim := 0
uniqueAndSort := func(indices []uint32) []uint32 {
seen := make(map[uint32]bool)
var result []uint32
for _, value := range indices {
if _, ok := seen[value]; !ok {
seen[value] = true
result = append(result, value)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i] < result[j]
})
return result
}
for i := 0; i < numRows; i++ {
nnz := rand.Intn(avgNnz*2) + 1
indices := make([]uint32, 0, nnz)
for j := 0; j < nnz; j++ {
indices = append(indices, uint32(rand.Intn(dim)))
}
indices = uniqueAndSort(indices)
values := make([]float32, 0, len(indices))
for j := 0; j < len(indices); j++ {
values = append(values, rand.Float32())
}
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
rowBytes := typeutil.CreateSparseFloatRow(indices, values)
contents = append(contents, rowBytes)
}
return &schemapb.SparseFloatArray{
Dim: int64(maxDim),
Contents: contents,
}
}
func GenerateHashKeys(numRows int) []uint32 {
ret := make([]uint32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Uint32())
}
return ret
}
// generate FieldData
func NewBoolFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Bool,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: GenerateBoolArray(numRows),
},
},
},
},
}
}
func NewBoolFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Bool,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
BoolData: &schemapb.BoolArray{
Data: fieldValue.([]bool),
},
},
},
},
}
}
func NewInt8FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int8,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt16FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int16,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt32FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int32,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: GenerateInt32Array(numRows),
},
},
},
},
}
}
func NewInt32FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int32,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: fieldValue.([]int32),
},
},
},
},
}
}
func NewInt64FieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: GenerateInt64Array(numRows),
},
},
},
},
}
}
func NewInt64FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int64,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
LongData: &schemapb.LongArray{
Data: fieldValue.([]int64),
},
},
},
},
}
}
func NewFloatFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: GenerateFloat32Array(numRows),
},
},
},
},
}
}
func NewFloatFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
FloatData: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
}
}
func NewDoubleFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Double,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: GenerateFloat64Array(numRows),
},
},
},
},
}
}
func NewDoubleFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Double,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
DoubleData: &schemapb.DoubleArray{
Data: fieldValue.([]float64),
},
},
},
},
}
}
func NewVarCharFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateVarCharArray(numRows, 10),
},
},
},
},
}
}
func NewVarCharFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_VarChar,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: fieldValue.([]string),
},
},
},
},
}
}
func NewStringFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_String,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: GenerateStringArray(numRows),
},
},
},
},
}
}
func NewJSONFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_JSON,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: GenerateJSONArray(numRows),
},
},
},
},
}
}
func NewJSONFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_JSON,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_JsonData{
JsonData: &schemapb.JSONArray{
Data: fieldValue.([][]byte),
},
},
},
},
}
}
func NewArrayFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: GenerateArrayOfIntArray(numRows),
},
},
},
},
}
}
func NewVectorArrayFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_ArrayOfVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_VectorArray{
VectorArray: &schemapb.VectorArray{
Data: GenerateArrayOfFloatVectorArray(numRows, dim),
ElementType: schemapb.DataType_FloatVector,
Dim: int64(dim),
},
},
},
},
}
}
func NewArrayFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: fieldValue.([]*schemapb.ScalarField),
},
},
},
},
}
}
func NewGeometryFieldData(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Geometry,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_GeometryData{
GeometryData: &schemapb.GeometryArray{
Data: GenerateGeometryArray(numRows),
},
},
},
},
}
}
func NewGeometryFieldDataWktFormat(fieldName string, numRows int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Geometry,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_GeometryData{
GeometryData: &schemapb.GeometryArray{
Data: GenerateGeometryWktArray(numRows),
},
},
},
},
}
}
func NewGeometryFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Geometry,
FieldName: fieldName,
Field: &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_GeometryData{
GeometryData: &schemapb.GeometryArray{
Data: fieldValue.([][]byte),
},
},
},
},
}
}
func NewBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: GenerateBinaryVectors(numRows, dim),
},
},
},
}
}
func NewBinaryVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BinaryVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_BinaryVector{
BinaryVector: fieldValue.([]byte),
},
},
},
}
}
func NewFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: GenerateFloatVectors(numRows, dim),
},
},
},
},
}
}
func NewFloatVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_FloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_FloatVector{
FloatVector: &schemapb.FloatArray{
Data: fieldValue.([]float32),
},
},
},
},
}
}
func NewFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: GenerateFloat16Vectors(numRows, dim),
},
},
},
}
}
func NewFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Float16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: fieldValue.([]byte),
},
},
},
}
}
func NewBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: GenerateBFloat16Vectors(numRows, dim),
},
},
},
}
}
func NewBFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: fieldValue.([]byte),
},
},
},
}
}
func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.FieldData {
sparseData := GenerateSparseFloatVectors(numRows)
return &schemapb.FieldData{
Type: schemapb.DataType_SparseFloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: sparseData.Dim,
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &schemapb.SparseFloatArray{
Dim: sparseData.Dim,
Contents: sparseData.Contents,
},
},
},
},
}
}
func NewInt8VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int8Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Int8Vector{
Int8Vector: typeutil.Int8ArrayToBytes(GenerateInt8Vectors(numRows, dim)),
},
},
},
}
}
func NewInt8VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData {
return &schemapb.FieldData{
Type: schemapb.DataType_Int8Vector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(dim),
Data: &schemapb.VectorField_Int8Vector{
Int8Vector: fieldValue.([]byte),
},
},
},
}
}
func GenerateScalarFieldData(dType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData {
switch dType {
case schemapb.DataType_Bool:
return NewBoolFieldData(fieldName, numRows)
case schemapb.DataType_Int8:
return NewInt8FieldData(fieldName, numRows)
case schemapb.DataType_Int16:
return NewInt16FieldData(fieldName, numRows)
case schemapb.DataType_Int32:
return NewInt32FieldData(fieldName, numRows)
case schemapb.DataType_Int64:
return NewInt64FieldData(fieldName, numRows)
case schemapb.DataType_Float:
return NewFloatFieldData(fieldName, numRows)
case schemapb.DataType_Double:
return NewDoubleFieldData(fieldName, numRows)
case schemapb.DataType_VarChar:
return NewVarCharFieldData(fieldName, numRows)
case schemapb.DataType_String:
return NewStringFieldData(fieldName, numRows)
case schemapb.DataType_Array:
return NewArrayFieldData(fieldName, numRows)
case schemapb.DataType_JSON:
return NewJSONFieldData(fieldName, numRows)
case schemapb.DataType_Geometry:
return NewGeometryFieldData(fieldName, numRows)
default:
panic("unsupported data type")
}
}
func GenerateScalarFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int) *schemapb.FieldData {
fieldData := GenerateScalarFieldData(dType, fieldName, numRows)
fieldData.FieldId = fieldID
return fieldData
}
func GenerateScalarFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}) *schemapb.FieldData {
var fieldData *schemapb.FieldData
switch dType {
case schemapb.DataType_Bool:
fieldData = NewBoolFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Int32:
fieldData = NewInt32FieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Int64:
fieldData = NewInt64FieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Float:
fieldData = NewFloatFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Double:
fieldData = NewDoubleFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_VarChar:
fieldData = NewVarCharFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Array:
fieldData = NewArrayFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_JSON:
fieldData = NewJSONFieldDataWithValue(fieldName, fieldValue)
case schemapb.DataType_Geometry:
fieldData = NewGeometryFieldDataWithValue(fieldName, fieldValue)
default:
panic("unsupported data type")
}
fieldData.FieldId = fieldID
return fieldData
}
func GenerateVectorFieldData(dType schemapb.DataType, fieldName string, numRows int, dim int) *schemapb.FieldData {
switch dType {
case schemapb.DataType_BinaryVector:
return NewBinaryVectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_FloatVector:
return NewFloatVectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_Float16Vector:
return NewFloat16VectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_BFloat16Vector:
return NewBFloat16VectorFieldData(fieldName, numRows, dim)
case schemapb.DataType_SparseFloatVector:
return NewSparseFloatVectorFieldData(fieldName, numRows)
case schemapb.DataType_Int8Vector:
return NewInt8VectorFieldData(fieldName, numRows, dim)
default:
panic("unsupported data type")
}
}
func GenerateVectorFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int, dim int) *schemapb.FieldData {
fieldData := GenerateVectorFieldData(dType, fieldName, numRows, dim)
fieldData.FieldId = fieldID
return fieldData
}
func GenerateVectorFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}, dim int) *schemapb.FieldData {
var fieldData *schemapb.FieldData
switch dType {
case schemapb.DataType_BinaryVector:
fieldData = NewBinaryVectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_FloatVector:
fieldData = NewFloatVectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_Float16Vector:
fieldData = NewFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_BFloat16Vector:
fieldData = NewBFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim)
case schemapb.DataType_Int8Vector:
fieldData = NewInt8VectorFieldDataWithValue(fieldName, fieldValue, dim)
default:
panic("unsupported data type")
}
fieldData.FieldId = fieldID
return fieldData
}
// Generate number of fields in StructField FieldDatas where each field is an ArrayType of something
func GenerateArrayOfStructArray(schema *schemapb.StructArrayFieldSchema, numRows int, dim int) []*schemapb.FieldData {
ret := make([]*schemapb.FieldData, 0, numRows)
for _, field := range schema.Fields {
if field.DataType != schemapb.DataType_Array && field.DataType != schemapb.DataType_ArrayOfVector {
panic("Only Array or ArrayOfVector type is supported for StructField")
}
switch field.GetElementType() {
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
fieldData := NewArrayFieldData(field.Name, numRows)
fieldData.FieldId = field.FieldID
ret = append(ret, fieldData)
case schemapb.DataType_FloatVector:
fieldData := NewVectorArrayFieldData(field.Name, numRows, dim)
fieldData.FieldId = field.FieldID
ret = append(ret, fieldData)
default:
panic(fmt.Sprintf("unimplemented data type: %s", field.ElementType))
}
}
return ret
}
func GenerateStructFieldData(schema *schemapb.StructArrayFieldSchema, fieldName string, numRow int, dim int) *schemapb.FieldData {
fieldData := &schemapb.FieldData{
Type: schemapb.DataType_ArrayOfStruct,
FieldName: fieldName,
Field: &schemapb.FieldData_StructArrays{
StructArrays: &schemapb.StructArrayField{
Fields: GenerateArrayOfStructArray(schema, numRow, dim),
},
},
}
return fieldData
}