mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: add nan and inf check (#35683)
fix #35594 add float check on files Signed-off-by: xiaofanluan <xiaofan.luan@zilliz.com>
This commit is contained in:
parent
42f7800b5b
commit
50fcfe8ef1
@ -193,13 +193,13 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
|
||||
if err != nil {
|
||||
return 0, r.wrapTypeError(obj, field)
|
||||
}
|
||||
return float32(num), nil
|
||||
return float32(num), typeutil.VerifyFloats32([]float32{float32(num)})
|
||||
case schemapb.DataType_Double:
|
||||
num, err := strconv.ParseFloat(obj, 64)
|
||||
if err != nil {
|
||||
return 0, r.wrapTypeError(obj, field)
|
||||
}
|
||||
return num, nil
|
||||
return num, typeutil.VerifyFloats64([]float64{num})
|
||||
case schemapb.DataType_VarChar, schemapb.DataType_String:
|
||||
return obj, nil
|
||||
case schemapb.DataType_BinaryVector:
|
||||
@ -228,7 +228,7 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
|
||||
if len(vec) != r.name2Dim[field.GetName()] {
|
||||
return nil, r.wrapDimError(len(vec), field)
|
||||
}
|
||||
return vec, nil
|
||||
return vec, typeutil.VerifyFloats32(vec)
|
||||
case schemapb.DataType_Float16Vector:
|
||||
var vec []float32
|
||||
err := json.Unmarshal([]byte(obj), &vec)
|
||||
@ -242,7 +242,7 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
|
||||
for i := 0; i < len(vec); i++ {
|
||||
copy(vec2[i*2:], typeutil.Float32ToFloat16Bytes(vec[i]))
|
||||
}
|
||||
return vec2, nil
|
||||
return vec2, typeutil.VerifyFloats16(vec2)
|
||||
case schemapb.DataType_BFloat16Vector:
|
||||
var vec []float32
|
||||
err := json.Unmarshal([]byte(obj), &vec)
|
||||
@ -256,7 +256,7 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
|
||||
for i := 0; i < len(vec); i++ {
|
||||
copy(vec2[i*2:], typeutil.Float32ToBFloat16Bytes(vec[i]))
|
||||
}
|
||||
return vec2, nil
|
||||
return vec2, typeutil.VerifyBFloats16(vec2)
|
||||
case schemapb.DataType_SparseFloatVector:
|
||||
// use dec.UseNumber() to avoid float64 precision loss
|
||||
var vec map[string]interface{}
|
||||
@ -293,13 +293,13 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
|
||||
func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataType) (*schemapb.ScalarField, error) {
|
||||
switch eleType {
|
||||
case schemapb.DataType_Bool:
|
||||
values := make([]bool, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(bool)
|
||||
values := make([]bool, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(bool)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
values = append(values, value)
|
||||
values[i] = value
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_BoolData{
|
||||
@ -309,17 +309,17 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
|
||||
values := make([]int32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]int32, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseInt(value.String(), 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse int32: %w", err)
|
||||
}
|
||||
values = append(values, int32(num))
|
||||
values[i] = int32(num)
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
@ -328,18 +328,19 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
|
||||
case schemapb.DataType_Int64:
|
||||
values := make([]int64, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]int64, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseInt(value.String(), 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse int64: %w", err)
|
||||
}
|
||||
values = append(values, num)
|
||||
values[i] = num
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_LongData{
|
||||
@ -349,17 +350,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_Float:
|
||||
values := make([]float32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]float32, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseFloat(value.String(), 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse float32: %w", err)
|
||||
}
|
||||
values = append(values, float32(num))
|
||||
values[i] = float32(num)
|
||||
}
|
||||
if err := typeutil.VerifyFloats32(values); err != nil {
|
||||
return nil, fmt.Errorf("float32 verification failed: %w", err)
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_FloatData{
|
||||
@ -369,17 +373,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_Double:
|
||||
values := make([]float64, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]float64, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseFloat(value.String(), 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse float64: %w", err)
|
||||
}
|
||||
values = append(values, num)
|
||||
values[i] = num
|
||||
}
|
||||
if err := typeutil.VerifyFloats64(values); err != nil {
|
||||
return nil, fmt.Errorf("float64 verification failed: %w", err)
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_DoubleData{
|
||||
@ -389,13 +396,13 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_VarChar, schemapb.DataType_String:
|
||||
values := make([]string, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(string)
|
||||
values := make([]string, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(string)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
values = append(values, value)
|
||||
values[i] = value
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_StringData{
|
||||
@ -405,7 +412,7 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
default:
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("parse csv failed, unsupport data type: %s", eleType.String()))
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("parse csv failed, unsupported data type: %s", eleType.String()))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,6 @@ import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/samber/lo"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
@ -258,7 +257,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return float32(num), nil
|
||||
return float32(num), typeutil.VerifyFloats32([]float32{float32(num)})
|
||||
case schemapb.DataType_Double:
|
||||
value, ok := obj.(json.Number)
|
||||
if !ok {
|
||||
@ -268,7 +267,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return num, nil
|
||||
return num, typeutil.VerifyFloats64([]float64{num})
|
||||
case schemapb.DataType_BinaryVector:
|
||||
arr, ok := obj.([]interface{})
|
||||
if !ok {
|
||||
@ -310,7 +309,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
||||
}
|
||||
vec[i] = float32(num)
|
||||
}
|
||||
return vec, nil
|
||||
return vec, typeutil.VerifyFloats32(vec)
|
||||
case schemapb.DataType_Float16Vector:
|
||||
// parse float string to Float16 bytes
|
||||
arr, ok := obj.([]interface{})
|
||||
@ -332,7 +331,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
||||
}
|
||||
copy(vec[i*2:], typeutil.Float32ToFloat16Bytes(float32(num)))
|
||||
}
|
||||
return vec, nil
|
||||
return vec, typeutil.VerifyFloats16(vec)
|
||||
case schemapb.DataType_BFloat16Vector:
|
||||
// parse float string to BFloat16 bytes
|
||||
arr, ok := obj.([]interface{})
|
||||
@ -354,7 +353,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
||||
}
|
||||
copy(vec[i*2:], typeutil.Float32ToBFloat16Bytes(float32(num)))
|
||||
}
|
||||
return vec, nil
|
||||
return vec, typeutil.VerifyBFloats16(vec)
|
||||
case schemapb.DataType_SparseFloatVector:
|
||||
arr, ok := obj.(map[string]interface{})
|
||||
if !ok {
|
||||
@ -565,13 +564,13 @@ func (r *rowParser) parseNullableEntity(fieldID int64, obj any) (any, error) {
|
||||
func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataType) (*schemapb.ScalarField, error) {
|
||||
switch eleType {
|
||||
case schemapb.DataType_Bool:
|
||||
values := make([]bool, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(bool)
|
||||
values := make([]bool, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(bool)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
values = append(values, value)
|
||||
values[i] = value
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_BoolData{
|
||||
@ -581,17 +580,17 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
|
||||
values := make([]int32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]int32, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseInt(value.String(), 0, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse int32: %w", err)
|
||||
}
|
||||
values = append(values, int32(num))
|
||||
values[i] = int32(num)
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
@ -601,17 +600,17 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_Int64:
|
||||
values := make([]int64, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]int64, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseInt(value.String(), 0, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse int64: %w", err)
|
||||
}
|
||||
values = append(values, num)
|
||||
values[i] = num
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_LongData{
|
||||
@ -621,17 +620,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_Float:
|
||||
values := make([]float32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]float32, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseFloat(value.String(), 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse float32: %w", err)
|
||||
}
|
||||
values = append(values, float32(num))
|
||||
values[i] = float32(num)
|
||||
}
|
||||
if err := typeutil.VerifyFloats32(values); err != nil {
|
||||
return nil, fmt.Errorf("float32 verification failed: %w", err)
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_FloatData{
|
||||
@ -641,17 +643,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_Double:
|
||||
values := make([]float64, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(json.Number)
|
||||
values := make([]float64, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(json.Number)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
num, err := strconv.ParseFloat(value.String(), 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to parse float64: %w", err)
|
||||
}
|
||||
values = append(values, num)
|
||||
values[i] = num
|
||||
}
|
||||
if err := typeutil.VerifyFloats64(values); err != nil {
|
||||
return nil, fmt.Errorf("float32 verification failed: %w", err)
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_DoubleData{
|
||||
@ -661,13 +666,13 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
case schemapb.DataType_VarChar, schemapb.DataType_String:
|
||||
values := make([]string, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
value, ok := arr[i].(string)
|
||||
values := make([]string, len(arr))
|
||||
for i, v := range arr {
|
||||
value, ok := v.(string)
|
||||
if !ok {
|
||||
return nil, r.wrapArrayValueTypeError(arr, eleType)
|
||||
}
|
||||
values = append(values, value)
|
||||
values[i] = value
|
||||
}
|
||||
return &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_StringData{
|
||||
@ -677,6 +682,6 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
|
||||
},
|
||||
}, nil
|
||||
default:
|
||||
return nil, errors.New(fmt.Sprintf("unsupported array data type '%s'", eleType.String()))
|
||||
return nil, fmt.Errorf("unsupported array data type '%s'", eleType.String())
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
package typeutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
|
||||
@ -49,6 +50,16 @@ func Test_VerifyFloats32(t *testing.T) {
|
||||
data = []float32{2.5, 32.2, 53.254, float32(math.Inf(1))}
|
||||
err = VerifyFloats32(data)
|
||||
assert.Error(t, err)
|
||||
|
||||
rawValue := uint32(0xffc00000)
|
||||
floatValue := math.Float32frombits(rawValue)
|
||||
err = VerifyFloats32([]float32{floatValue})
|
||||
assert.Error(t, err)
|
||||
|
||||
floatValue = -math.Float32frombits(rawValue)
|
||||
err = VerifyFloats32([]float32{floatValue})
|
||||
fmt.Println("-nan", floatValue, err)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func Test_VerifyFloats64(t *testing.T) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user