// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package csv import ( "fmt" "strings" "testing" "github.com/stretchr/testify/assert" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/json" "github.com/milvus-io/milvus/pkg/v2/common" ) func TestNewRowParser_Invalid(t *testing.T) { schema := &schemapb.CollectionSchema{ Fields: []*schemapb.FieldSchema{ { FieldID: 1, Name: "id", IsPrimaryKey: true, DataType: schemapb.DataType_Int64, }, { FieldID: 2, Name: "vector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "2"}}, }, { FieldID: 3, Name: "str", DataType: schemapb.DataType_VarChar, }, { FieldID: 4, Name: "$meta", IsDynamic: true, DataType: schemapb.DataType_JSON, }, }, } type testCase struct { header []string expectErr string } cases := []testCase{ {header: []string{"id", "vector", "$meta"}, expectErr: "value of field is missed: 'str'"}, } nullkey := "" for i, c := range cases { t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { _, err := NewRowParser(schema, c.header, nullkey) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), c.expectErr)) }) } } func TestRowParser_Parse_Valid(t *testing.T) { schema := &schemapb.CollectionSchema{ Fields: []*schemapb.FieldSchema{ { FieldID: 1, Name: "id", IsPrimaryKey: true, DataType: schemapb.DataType_Int64, }, { FieldID: 2, Name: "vector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "2"}}, }, { FieldID: 3, Name: "$meta", IsDynamic: true, DataType: schemapb.DataType_JSON, }, }, } type testCase struct { header []string row []string dyFields map[string]any // expect dynamic fields } cases := []testCase{ {header: []string{"id", "vector", "$meta"}, row: []string{"1", "[1, 2]", "{\"y\": 2}"}, dyFields: map[string]any{"y": 2.0}}, {header: []string{"id", "vector", "x", "$meta"}, row: []string{"1", "[1, 2]", "8", "{\"y\": 2}"}, dyFields: map[string]any{"x": "8", "y": 2.0}}, {header: []string{"id", "vector", "x", "$meta"}, row: []string{"1", "[1, 2]", "8", "{}"}, dyFields: map[string]any{"x": "8"}}, {header: []string{"id", "vector", "x"}, row: []string{"1", "[1, 2]", "8"}, dyFields: map[string]any{"x": "8"}}, {header: []string{"id", "vector", "str", "$meta"}, row: []string{"1", "[1, 2]", "xxsddsffwq", "{\"y\": 2}"}, dyFields: map[string]any{"y": 2.0, "str": "xxsddsffwq"}}, } nullkey := "" for i, c := range cases { r, err := NewRowParser(schema, c.header, nullkey) assert.NoError(t, err) t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { data, err := r.Parse(c.row) assert.NoError(t, err) // validate contains fields for _, field := range schema.GetFields() { _, ok := data[field.GetFieldID()] assert.True(t, ok) } // validate dynamic fields var dynamicFields map[string]interface{} err = json.Unmarshal(data[r.(*rowParser).dynamicField.GetFieldID()].([]byte), &dynamicFields) assert.NoError(t, err) assert.Len(t, dynamicFields, len(c.dyFields)) for k, v := range c.dyFields { rv, ok := dynamicFields[k] assert.True(t, ok) assert.EqualValues(t, rv, v) } }) } } func TestRowParser_Parse_Invalid(t *testing.T) { schema := &schemapb.CollectionSchema{ Fields: []*schemapb.FieldSchema{ { FieldID: 1, Name: "id", IsPrimaryKey: true, DataType: schemapb.DataType_Int64, }, { FieldID: 2, Name: "vector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "2"}}, }, { FieldID: 3, Name: "$meta", IsDynamic: true, DataType: schemapb.DataType_JSON, }, }, } type testCase struct { header []string row []string expectErr string } cases := []testCase{ {header: []string{"id", "vector", "x", "$meta"}, row: []string{"1", "[1, 2]", "6", "{\"x\": 8}"}, expectErr: "duplicated key in dynamic field, key=x"}, {header: []string{"id", "vector", "x", "$meta"}, row: []string{"1", "[1, 2]", "8", "{*&%%&$*(&}"}, expectErr: "illegal value for dynamic field, not a JSON format string"}, {header: []string{"id", "vector", "x", "$meta"}, row: []string{"1", "[1, 2]", "8"}, expectErr: "the number of fields in the row is not equal to the header"}, } nullkey := "" for i, c := range cases { r, err := NewRowParser(schema, c.header, nullkey) assert.NoError(t, err) t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { _, err := r.Parse(c.row) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), c.expectErr)) }) } } func TestRowParser_Parse_NULL(t *testing.T) { schema := &schemapb.CollectionSchema{ Fields: []*schemapb.FieldSchema{ { FieldID: 1, Name: "id", IsPrimaryKey: true, DataType: schemapb.DataType_Int64, }, { FieldID: 2, Name: "vector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "2"}}, }, { FieldID: 3, Name: "str", DataType: schemapb.DataType_String, TypeParams: []*commonpb.KeyValuePair{ { Key: common.MaxLengthKey, Value: "128", }, }, Nullable: true, }, }, } header := []string{"id", "vector", "str"} type testCase struct { nullkey string row []string nulldata interface{} } cases := []testCase{ {nullkey: "", row: []string{"1", "[1, 2]", ""}, nulldata: nil}, {nullkey: "NULL", row: []string{"1", "[1, 2]", "NULL"}, nulldata: nil}, {nullkey: "\\N", row: []string{"1", "[1, 2]", "\\N"}, nulldata: nil}, } for i, c := range cases { r, err := NewRowParser(schema, header, c.nullkey) assert.NoError(t, err) t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { data, err := r.Parse(c.row) assert.NoError(t, err) assert.EqualValues(t, c.nulldata, data[3]) }) } }