mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
Convert invalid UTF-8 string the hex in failure reason. issue: https://github.com/milvus-io/milvus/issues/45066 Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
264 lines
7.2 KiB
Go
264 lines
7.2 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package common
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
"unicode/utf8"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
)
|
|
|
|
func TestUtil_EstimateReadCountPerBatch(t *testing.T) {
|
|
schema := &schemapb.CollectionSchema{
|
|
Fields: []*schemapb.FieldSchema{
|
|
{
|
|
FieldID: 100,
|
|
Name: "pk",
|
|
IsPrimaryKey: true,
|
|
DataType: schemapb.DataType_Int64,
|
|
},
|
|
{
|
|
FieldID: 101,
|
|
Name: "vec",
|
|
DataType: schemapb.DataType_FloatVector,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.DimKey,
|
|
Value: "128",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
count, err := EstimateReadCountPerBatch(16*1024*1024, schema)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(1000), count)
|
|
|
|
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
|
|
FieldID: 102,
|
|
Name: "vec2",
|
|
DataType: schemapb.DataType_FloatVector,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.DimKey,
|
|
Value: "invalidDim",
|
|
},
|
|
},
|
|
})
|
|
_, err = EstimateReadCountPerBatch(16*1024*1024, schema)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
func TestUtil_EstimateReadCountPerBatch_InvalidBufferSize(t *testing.T) {
|
|
schema := &schemapb.CollectionSchema{}
|
|
count, err := EstimateReadCountPerBatch(16*1024*1024, schema)
|
|
assert.Error(t, err)
|
|
assert.Equal(t, int64(0), count)
|
|
t.Logf("err=%v", err)
|
|
|
|
schema = &schemapb.CollectionSchema{
|
|
Fields: []*schemapb.FieldSchema{
|
|
{
|
|
FieldID: 100,
|
|
DataType: schemapb.DataType_Int64,
|
|
},
|
|
},
|
|
}
|
|
count, err = EstimateReadCountPerBatch(0, schema)
|
|
assert.Error(t, err)
|
|
assert.Equal(t, int64(0), count)
|
|
t.Logf("err=%v", err)
|
|
}
|
|
|
|
func TestUtil_EstimateReadCountPerBatch_LargeSchema(t *testing.T) {
|
|
schema := &schemapb.CollectionSchema{}
|
|
for i := 0; i < 100; i++ {
|
|
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
|
|
FieldID: int64(i),
|
|
DataType: schemapb.DataType_VarChar,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxLengthKey,
|
|
Value: "10000000",
|
|
},
|
|
},
|
|
})
|
|
}
|
|
count, err := EstimateReadCountPerBatch(16*1024*1024, schema)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(1), count)
|
|
}
|
|
|
|
func TestUtil_CheckVarcharLength(t *testing.T) {
|
|
fieldSchema := &schemapb.FieldSchema{
|
|
FieldID: 1,
|
|
DataType: schemapb.DataType_VarChar,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxLengthKey,
|
|
Value: "5",
|
|
},
|
|
},
|
|
}
|
|
err := CheckVarcharLength("aaaaaaaa", 5, fieldSchema)
|
|
assert.Error(t, err)
|
|
|
|
err = CheckVarcharLength("aaaaa", 5, fieldSchema)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestUtil_CheckArrayCapacity(t *testing.T) {
|
|
fieldSchema := &schemapb.FieldSchema{
|
|
FieldID: 1,
|
|
DataType: schemapb.DataType_Array,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxCapacityKey,
|
|
Value: "5",
|
|
},
|
|
},
|
|
}
|
|
err := CheckArrayCapacity(6, 5, fieldSchema)
|
|
assert.Error(t, err)
|
|
|
|
err = CheckArrayCapacity(5, 5, fieldSchema)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestUtil_CheckValidUTF8(t *testing.T) {
|
|
fieldSchema := &schemapb.FieldSchema{
|
|
FieldID: 1,
|
|
DataType: schemapb.DataType_VarChar,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxLengthKey,
|
|
Value: "1000",
|
|
},
|
|
},
|
|
}
|
|
err := CheckValidUTF8(string([]byte{0xC0, 0xAF}), fieldSchema)
|
|
assert.Error(t, err)
|
|
|
|
err = CheckValidUTF8("abc", fieldSchema)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestUtil_CheckValidString(t *testing.T) {
|
|
fieldSchema := &schemapb.FieldSchema{
|
|
FieldID: 1,
|
|
DataType: schemapb.DataType_VarChar,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxLengthKey,
|
|
Value: "5",
|
|
},
|
|
},
|
|
}
|
|
err := CheckValidString("aaaaaaaa", 5, fieldSchema)
|
|
assert.Error(t, err)
|
|
|
|
err = CheckValidString(string([]byte{0xC0, 0xAF}), 5, fieldSchema)
|
|
assert.Error(t, err)
|
|
|
|
err = CheckValidString("aaaaa", 5, fieldSchema)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestUtil_SafeStringForError(t *testing.T) {
|
|
// Test valid UTF-8 string
|
|
validStr := "Hello, 世界!"
|
|
result := SafeStringForError(validStr)
|
|
assert.Equal(t, validStr, result)
|
|
|
|
// Test invalid UTF-8 string
|
|
invalidStr := string([]byte{0xC0, 0xAF, 'a', 'b', 'c'})
|
|
result = SafeStringForError(invalidStr)
|
|
assert.Contains(t, result, "\\xc0")
|
|
assert.Contains(t, result, "\\xaf")
|
|
assert.Contains(t, result, "abc")
|
|
|
|
// Test empty string
|
|
result = SafeStringForError("")
|
|
assert.Equal(t, "", result)
|
|
|
|
// Test string with mixed valid and invalid UTF-8
|
|
mixedStr := "valid" + string([]byte{0xFF, 0xFE}) + "text"
|
|
result = SafeStringForError(mixedStr)
|
|
assert.Contains(t, result, "valid")
|
|
assert.Contains(t, result, "\\xff")
|
|
assert.Contains(t, result, "\\xfe")
|
|
assert.Contains(t, result, "text")
|
|
}
|
|
|
|
func TestUtil_SafeStringForErrorWithLimit(t *testing.T) {
|
|
// Test string within limit
|
|
shortStr := "short"
|
|
result := SafeStringForErrorWithLimit(shortStr, 10)
|
|
assert.Equal(t, shortStr, result)
|
|
|
|
// Test string exceeding limit
|
|
longStr := "this is a very long string that exceeds the limit"
|
|
result = SafeStringForErrorWithLimit(longStr, 20)
|
|
assert.Equal(t, 23, len(result)) // 20 chars + "..."
|
|
assert.True(t, strings.HasSuffix(result, "..."))
|
|
|
|
// Test invalid UTF-8 string with limit
|
|
invalidStr := string([]byte{0xC0, 0xAF, 0xFF, 0xFE, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'})
|
|
result = SafeStringForErrorWithLimit(invalidStr, 15)
|
|
assert.True(t, len(result) <= 18) // 15 chars + "..."
|
|
assert.True(t, strings.HasSuffix(result, "..."))
|
|
}
|
|
|
|
func TestUtil_CheckValidUTF8_WithSafeError(t *testing.T) {
|
|
fieldSchema := &schemapb.FieldSchema{
|
|
FieldID: 1,
|
|
Name: "test_field",
|
|
DataType: schemapb.DataType_VarChar,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxLengthKey,
|
|
Value: "1000",
|
|
},
|
|
},
|
|
}
|
|
|
|
// Test with invalid UTF-8 - should not cause gRPC serialization error
|
|
invalidStr := string([]byte{0xC0, 0xAF, 0xFF, 0xFE})
|
|
err := CheckValidUTF8(invalidStr, fieldSchema)
|
|
assert.Error(t, err)
|
|
|
|
// Verify the error message contains safe representation
|
|
errMsg := err.Error()
|
|
assert.Contains(t, errMsg, "test_field")
|
|
assert.Contains(t, errMsg, "invalid UTF-8 data")
|
|
assert.Contains(t, errMsg, "\\xc0") // Should contain hex representation
|
|
assert.Contains(t, errMsg, "\\xaf")
|
|
|
|
// Verify the error message is valid UTF-8 itself
|
|
assert.True(t, utf8.ValidString(errMsg), "Error message should be valid UTF-8")
|
|
|
|
// Test with valid UTF-8
|
|
err = CheckValidUTF8("valid string", fieldSchema)
|
|
assert.NoError(t, err)
|
|
}
|