enhance: add nan and inf check (#35683)

fix #35594
add float check on files

Signed-off-by: xiaofanluan <xiaofan.luan@zilliz.com>
This commit is contained in:
Xiaofan 2024-08-25 15:22:57 +08:00 committed by GitHub
parent 42f7800b5b
commit 50fcfe8ef1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 92 additions and 69 deletions

View File

@ -193,13 +193,13 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
if err != nil {
return 0, r.wrapTypeError(obj, field)
}
return float32(num), nil
return float32(num), typeutil.VerifyFloats32([]float32{float32(num)})
case schemapb.DataType_Double:
num, err := strconv.ParseFloat(obj, 64)
if err != nil {
return 0, r.wrapTypeError(obj, field)
}
return num, nil
return num, typeutil.VerifyFloats64([]float64{num})
case schemapb.DataType_VarChar, schemapb.DataType_String:
return obj, nil
case schemapb.DataType_BinaryVector:
@ -228,7 +228,7 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
if len(vec) != r.name2Dim[field.GetName()] {
return nil, r.wrapDimError(len(vec), field)
}
return vec, nil
return vec, typeutil.VerifyFloats32(vec)
case schemapb.DataType_Float16Vector:
var vec []float32
err := json.Unmarshal([]byte(obj), &vec)
@ -242,7 +242,7 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
for i := 0; i < len(vec); i++ {
copy(vec2[i*2:], typeutil.Float32ToFloat16Bytes(vec[i]))
}
return vec2, nil
return vec2, typeutil.VerifyFloats16(vec2)
case schemapb.DataType_BFloat16Vector:
var vec []float32
err := json.Unmarshal([]byte(obj), &vec)
@ -256,7 +256,7 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
for i := 0; i < len(vec); i++ {
copy(vec2[i*2:], typeutil.Float32ToBFloat16Bytes(vec[i]))
}
return vec2, nil
return vec2, typeutil.VerifyBFloats16(vec2)
case schemapb.DataType_SparseFloatVector:
// use dec.UseNumber() to avoid float64 precision loss
var vec map[string]interface{}
@ -293,13 +293,13 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataType) (*schemapb.ScalarField, error) {
switch eleType {
case schemapb.DataType_Bool:
values := make([]bool, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(bool)
values := make([]bool, len(arr))
for i, v := range arr {
value, ok := v.(bool)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
values = append(values, value)
values[i] = value
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
@ -309,17 +309,17 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
values := make([]int32, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]int32, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseInt(value.String(), 10, 32)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse int32: %w", err)
}
values = append(values, int32(num))
values[i] = int32(num)
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
@ -328,18 +328,19 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
},
}, nil
case schemapb.DataType_Int64:
values := make([]int64, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]int64, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseInt(value.String(), 10, 64)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse int64: %w", err)
}
values = append(values, num)
values[i] = num
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
@ -349,17 +350,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_Float:
values := make([]float32, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]float32, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseFloat(value.String(), 32)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse float32: %w", err)
}
values = append(values, float32(num))
values[i] = float32(num)
}
if err := typeutil.VerifyFloats32(values); err != nil {
return nil, fmt.Errorf("float32 verification failed: %w", err)
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
@ -369,17 +373,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_Double:
values := make([]float64, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]float64, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseFloat(value.String(), 64)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse float64: %w", err)
}
values = append(values, num)
values[i] = num
}
if err := typeutil.VerifyFloats64(values); err != nil {
return nil, fmt.Errorf("float64 verification failed: %w", err)
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
@ -389,13 +396,13 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_VarChar, schemapb.DataType_String:
values := make([]string, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(string)
values := make([]string, len(arr))
for i, v := range arr {
value, ok := v.(string)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
values = append(values, value)
values[i] = value
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
@ -405,7 +412,7 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
default:
return nil, merr.WrapErrImportFailed(fmt.Sprintf("parse csv failed, unsupport data type: %s", eleType.String()))
return nil, merr.WrapErrImportFailed(fmt.Sprintf("parse csv failed, unsupported data type: %s", eleType.String()))
}
}

View File

@ -21,7 +21,6 @@ import (
"fmt"
"strconv"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
@ -258,7 +257,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
if err != nil {
return nil, err
}
return float32(num), nil
return float32(num), typeutil.VerifyFloats32([]float32{float32(num)})
case schemapb.DataType_Double:
value, ok := obj.(json.Number)
if !ok {
@ -268,7 +267,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
if err != nil {
return nil, err
}
return num, nil
return num, typeutil.VerifyFloats64([]float64{num})
case schemapb.DataType_BinaryVector:
arr, ok := obj.([]interface{})
if !ok {
@ -310,7 +309,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
}
vec[i] = float32(num)
}
return vec, nil
return vec, typeutil.VerifyFloats32(vec)
case schemapb.DataType_Float16Vector:
// parse float string to Float16 bytes
arr, ok := obj.([]interface{})
@ -332,7 +331,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
}
copy(vec[i*2:], typeutil.Float32ToFloat16Bytes(float32(num)))
}
return vec, nil
return vec, typeutil.VerifyFloats16(vec)
case schemapb.DataType_BFloat16Vector:
// parse float string to BFloat16 bytes
arr, ok := obj.([]interface{})
@ -354,7 +353,7 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
}
copy(vec[i*2:], typeutil.Float32ToBFloat16Bytes(float32(num)))
}
return vec, nil
return vec, typeutil.VerifyBFloats16(vec)
case schemapb.DataType_SparseFloatVector:
arr, ok := obj.(map[string]interface{})
if !ok {
@ -565,13 +564,13 @@ func (r *rowParser) parseNullableEntity(fieldID int64, obj any) (any, error) {
func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataType) (*schemapb.ScalarField, error) {
switch eleType {
case schemapb.DataType_Bool:
values := make([]bool, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(bool)
values := make([]bool, len(arr))
for i, v := range arr {
value, ok := v.(bool)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
values = append(values, value)
values[i] = value
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_BoolData{
@ -581,17 +580,17 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32:
values := make([]int32, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]int32, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseInt(value.String(), 0, 32)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse int32: %w", err)
}
values = append(values, int32(num))
values[i] = int32(num)
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
@ -601,17 +600,17 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_Int64:
values := make([]int64, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]int64, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseInt(value.String(), 0, 64)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse int64: %w", err)
}
values = append(values, num)
values[i] = num
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_LongData{
@ -621,17 +620,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_Float:
values := make([]float32, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]float32, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseFloat(value.String(), 32)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse float32: %w", err)
}
values = append(values, float32(num))
values[i] = float32(num)
}
if err := typeutil.VerifyFloats32(values); err != nil {
return nil, fmt.Errorf("float32 verification failed: %w", err)
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_FloatData{
@ -641,17 +643,20 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_Double:
values := make([]float64, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(json.Number)
values := make([]float64, len(arr))
for i, v := range arr {
value, ok := v.(json.Number)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
num, err := strconv.ParseFloat(value.String(), 64)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse float64: %w", err)
}
values = append(values, num)
values[i] = num
}
if err := typeutil.VerifyFloats64(values); err != nil {
return nil, fmt.Errorf("float32 verification failed: %w", err)
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_DoubleData{
@ -661,13 +666,13 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
case schemapb.DataType_VarChar, schemapb.DataType_String:
values := make([]string, 0)
for i := 0; i < len(arr); i++ {
value, ok := arr[i].(string)
values := make([]string, len(arr))
for i, v := range arr {
value, ok := v.(string)
if !ok {
return nil, r.wrapArrayValueTypeError(arr, eleType)
}
values = append(values, value)
values[i] = value
}
return &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
@ -677,6 +682,6 @@ func (r *rowParser) arrayToFieldData(arr []interface{}, eleType schemapb.DataTyp
},
}, nil
default:
return nil, errors.New(fmt.Sprintf("unsupported array data type '%s'", eleType.String()))
return nil, fmt.Errorf("unsupported array data type '%s'", eleType.String())
}
}

View File

@ -17,6 +17,7 @@
package typeutil
import (
"fmt"
"math"
"testing"
@ -49,6 +50,16 @@ func Test_VerifyFloats32(t *testing.T) {
data = []float32{2.5, 32.2, 53.254, float32(math.Inf(1))}
err = VerifyFloats32(data)
assert.Error(t, err)
rawValue := uint32(0xffc00000)
floatValue := math.Float32frombits(rawValue)
err = VerifyFloats32([]float32{floatValue})
assert.Error(t, err)
floatValue = -math.Float32frombits(rawValue)
err = VerifyFloats32([]float32{floatValue})
fmt.Println("-nan", floatValue, err)
assert.Error(t, err)
}
func Test_VerifyFloats64(t *testing.T) {