diff --git a/internal/distributed/proxy/httpserver/handler_v1_test.go b/internal/distributed/proxy/httpserver/handler_v1_test.go index 98f383627c..336d5500c9 100644 --- a/internal/distributed/proxy/httpserver/handler_v1_test.go +++ b/internal/distributed/proxy/httpserver/handler_v1_test.go @@ -1220,6 +1220,184 @@ func TestUpsert(t *testing.T) { }) } +func TestFp16Bf16VectorsV1(t *testing.T) { + paramtable.Init() + paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true") + mp := mocks.NewMockProxy(t) + collSchema := generateCollectionSchemaWithVectorFields() + testEngine := initHTTPServer(mp, true) + queryTestCases := []requestBodyTestCase{} + for _, path := range []string{VectorInsertPath, VectorUpsertPath} { + queryTestCases = append(queryTestCases, + requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3.0], + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errCode: 1804, + errMsg: "fail to deal the insert data, error: []byte size 2 doesn't equal to vector dimension 2 of Float16Vector", + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3.0], + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errCode: 200, + }, requestBodyTestCase{ + path: path, + // [3, 3] shouble be converted to [float(3), float(3)] + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3], + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errCode: 200, + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": "AQIDBA==", + "bfloat16Vector": "AQIDBA==", + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errCode: 200, + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3.0, 3], + "bfloat16Vector": [4.4, 44], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of Float16Vector", + errCode: 1804, + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3.0], + "bfloat16Vector": [4.4, 442, 44], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of BFloat16Vector", + errCode: 1804, + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": "AQIDBA==", + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + }, + { + "book_id": 1, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3.1, 3.1], + "bfloat16Vector": "AQIDBA==", + "sparseFloatVector": {"3": 1.1, "2": 0.44} + } + ] + }`), + errCode: 200, + }) + } + mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + CollectionName: DefaultCollectionName, + Schema: collSchema, + ShardsNum: ShardNumDefault, + Status: &StatusSuccess, + }, nil).Times(len(queryTestCases)) + mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4) + mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4) + for i, testcase := range queryTestCases { + t.Run(testcase.path, func(t *testing.T) { + bodyReader := bytes.NewReader(testcase.requestBody) + req := httptest.NewRequest(http.MethodPost, versional(testcase.path), bodyReader) + req.SetBasicAuth(util.UserRoot, getDefaultRootPassword()) + w := httptest.NewRecorder() + testEngine.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code, "case %d: ", i, string(testcase.requestBody)) + returnBody := &ReturnErrMsg{} + err := json.Unmarshal(w.Body.Bytes(), returnBody) + assert.Nil(t, err, "case %d: ", i) + assert.Equal(t, testcase.errCode, returnBody.Code, "case %d: ", i, string(testcase.requestBody)) + if testcase.errCode != 0 { + assert.Equal(t, testcase.errMsg, returnBody.Message, "case %d: ", i, string(testcase.requestBody)) + } + fmt.Println(w.Body.String()) + }) + } +} + func genIDs(dataType schemapb.DataType) *schemapb.IDs { return generateIDs(dataType, 3) } diff --git a/internal/distributed/proxy/httpserver/handler_v2_test.go b/internal/distributed/proxy/httpserver/handler_v2_test.go index 643e38d2df..bce91cf76f 100644 --- a/internal/distributed/proxy/httpserver/handler_v2_test.go +++ b/internal/distributed/proxy/httpserver/handler_v2_test.go @@ -1600,6 +1600,29 @@ func TestMethodPost(t *testing.T) { } } +func validateTestCases(t *testing.T, testEngine *gin.Engine, queryTestCases []requestBodyTestCase, allowInt64 bool) { + for i, testcase := range queryTestCases { + t.Run(testcase.path, func(t *testing.T) { + bodyReader := bytes.NewReader(testcase.requestBody) + req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) + if allowInt64 { + req.Header.Set(HTTPHeaderAllowInt64, "true") + } + w := httptest.NewRecorder() + testEngine.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code, "case %d: ", i, string(testcase.requestBody)) + returnBody := &ReturnErrMsg{} + err := json.Unmarshal(w.Body.Bytes(), returnBody) + assert.Nil(t, err, "case %d: ", i) + assert.Equal(t, testcase.errCode, returnBody.Code, "case %d: ", i, string(testcase.requestBody)) + if testcase.errCode != 0 { + assert.Equal(t, testcase.errMsg, returnBody.Message, "case %d: ", i, string(testcase.requestBody)) + } + fmt.Println(w.Body.String()) + }) + } +} + func TestDML(t *testing.T) { paramtable.Init() // disable rate limit @@ -1715,23 +1738,7 @@ func TestDML(t *testing.T) { requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`), }) - for _, testcase := range queryTestCases { - t.Run(testcase.path, func(t *testing.T) { - bodyReader := bytes.NewReader(testcase.requestBody) - req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) - w := httptest.NewRecorder() - testEngine.ServeHTTP(w, req) - assert.Equal(t, http.StatusOK, w.Code) - returnBody := &ReturnErrMsg{} - err := json.Unmarshal(w.Body.Bytes(), returnBody) - assert.Nil(t, err) - assert.Equal(t, testcase.errCode, returnBody.Code) - if testcase.errCode != 0 { - assert.Equal(t, testcase.errMsg, returnBody.Message) - } - fmt.Println(w.Body.String()) - }) - } + validateTestCases(t, testEngine, queryTestCases, false) } func TestAllowInt64(t *testing.T) { @@ -1759,24 +1766,183 @@ func TestAllowInt64(t *testing.T) { mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once() mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once() - for _, testcase := range queryTestCases { - t.Run(testcase.path, func(t *testing.T) { - bodyReader := bytes.NewReader(testcase.requestBody) - req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) - req.Header.Set(HTTPHeaderAllowInt64, "true") - w := httptest.NewRecorder() - testEngine.ServeHTTP(w, req) - assert.Equal(t, http.StatusOK, w.Code) - returnBody := &ReturnErrMsg{} - err := json.Unmarshal(w.Body.Bytes(), returnBody) - assert.Nil(t, err) - assert.Equal(t, testcase.errCode, returnBody.Code) - if testcase.errCode != 0 { - assert.Equal(t, testcase.errMsg, returnBody.Message) - } - fmt.Println(w.Body.String()) - }) + validateTestCases(t, testEngine, queryTestCases, true) +} + +func generateCollectionSchemaWithVectorFields() *schemapb.CollectionSchema { + collSchema := generateCollectionSchema(schemapb.DataType_Int64, false, true) + binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector) + binaryVectorField.Name = "binaryVector" + float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector) + float16VectorField.Name = "float16Vector" + bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) + bfloat16VectorField.Name = "bfloat16Vector" + sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) + sparseFloatVectorField.Name = "sparseFloatVector" + collSchema.Fields = append(collSchema.Fields, binaryVectorField) + collSchema.Fields = append(collSchema.Fields, float16VectorField) + collSchema.Fields = append(collSchema.Fields, bfloat16VectorField) + collSchema.Fields = append(collSchema.Fields, sparseFloatVectorField) + return collSchema +} + +func TestFp16Bf16VectorsV2(t *testing.T) { + paramtable.Init() + // disable rate limit + paramtable.Get().Save(paramtable.Get().QuotaConfig.QuotaAndLimitsEnabled.Key, "false") + defer paramtable.Get().Reset(paramtable.Get().QuotaConfig.QuotaAndLimitsEnabled.Key) + mp := mocks.NewMockProxy(t) + collSchema := generateCollectionSchemaWithVectorFields() + testEngine := initHTTPServerV2(mp, false) + queryTestCases := []requestBodyTestCase{} + for _, path := range []string{InsertAction, UpsertAction} { + queryTestCases = append(queryTestCases, + requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3.0], + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errCode: 1804, + errMsg: "fail to deal the insert data, error: []byte size 2 doesn't equal to vector dimension 2 of Float16Vector", + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3.0], + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + }, requestBodyTestCase{ + path: path, + // [3, 3] shouble be converted to [float(3), float(3)] + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3], + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": "AQIDBA==", + "bfloat16Vector": "AQIDBA==", + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3.0, 3], + "bfloat16Vector": [4.4, 44], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of Float16Vector", + errCode: 1804, + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3, 3.0], + "bfloat16Vector": [4.4, 442, 44], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + } + ] + }`), + errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of BFloat16Vector", + errCode: 1804, + }, requestBodyTestCase{ + path: path, + requestBody: []byte( + `{ + "collectionName": "book", + "data": [ + { + "book_id": 0, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": "AQIDBA==", + "bfloat16Vector": [4.4, 442], + "sparseFloatVector": {"1": 0.1, "2": 0.44} + }, + { + "book_id": 1, + "word_count": 0, + "book_intro": [0.11825, 0.6], + "binaryVector": "AQ==", + "float16Vector": [3.1, 3.1], + "bfloat16Vector": "AQIDBA==", + "sparseFloatVector": {"3": 1.1, "2": 0.44} + } + ] + }`), + }) } + mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + CollectionName: DefaultCollectionName, + Schema: collSchema, + ShardsNum: ShardNumDefault, + Status: &StatusSuccess, + }, nil).Times(len(queryTestCases)) + mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4) + mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4) + validateTestCases(t, testEngine, queryTestCases, false) } func TestSearchV2(t *testing.T) { @@ -1811,26 +1977,14 @@ func TestSearchV2(t *testing.T) { Ids: generateIDs(schemapb.DataType_Int64, 3), Scores: DefaultScores, }}, nil).Once() - mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) - collSchema := generateCollectionSchema(schemapb.DataType_Int64, false, true) - binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector) - binaryVectorField.Name = "binaryVector" - float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector) - float16VectorField.Name = "float16Vector" - bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) - bfloat16VectorField.Name = "bfloat16Vector" - sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) - sparseFloatVectorField.Name = "sparseFloatVector" - collSchema.Fields = append(collSchema.Fields, binaryVectorField) - collSchema.Fields = append(collSchema.Fields, float16VectorField) - collSchema.Fields = append(collSchema.Fields, bfloat16VectorField) - collSchema.Fields = append(collSchema.Fields, sparseFloatVectorField) + mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(5) + collSchema := generateCollectionSchemaWithVectorFields() mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, Schema: collSchema, ShardsNum: ShardNumDefault, Status: &StatusSuccess, - }, nil).Times(10) + }, nil).Times(14) mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ Status: &commonpb.Status{ @@ -1871,7 +2025,7 @@ func TestSearchV2(t *testing.T) { queryTestCases = append(queryTestCases, requestBodyTestCase{ path: SearchAction, requestBody: []byte(`{"collectionName": "book", "data": [["0.1", "0.2"]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"], "params": {"radius":0.9, "range_filter": 0.1}, "groupingField": "test"}`), - errMsg: "can only accept json format request, error: Mismatch type float32 with value string \"at index 8: mismatched type with value\\n\\n\\t[\\\"0.1\\\", \\\"0.2\\\"]\\n\\t........^.....\\n\": invalid parameter[expected=FloatVector][actual=[\"0.1\", \"0.2\"]]", + errMsg: "can only accept json format request, error: Mismatch type float32 with value string \"at index 9: mismatched type with value\\n\\n\\t[[\\\"0.1\\\", \\\"0.2\\\"]]\\n\\t.........^......\\n\": invalid parameter[expected=FloatVector][actual=[[\"0.1\", \"0.2\"]]]", errCode: 1801, }) queryTestCases = append(queryTestCases, requestBodyTestCase{ @@ -1929,6 +2083,50 @@ func TestSearchV2(t *testing.T) { `{"data": ["AQIDBA=="], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` + `], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: AdvancedSearchAction, + requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` + + `{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` + + `{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` + + `{"data": [[0.1, 0.23]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` + + `{"data": [[0.1, 0.43]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` + + `], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), + }) + + // -2, -1, 1, 3 should be float32 + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: AdvancedSearchAction, + requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` + + `{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` + + `{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` + + `{"data": [[-2, -1]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` + + `{"data": [[1, 3]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` + + `], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), + }) + // invalid fp32 vectors for fp16/bf16 + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: AdvancedSearchAction, + requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` + + `{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` + + `{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` + + `{"data": [[0.23]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` + + `{"data": [[0.1, 0.43]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` + + `], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), + errCode: 1801, + errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 1: invalid parameter[expected=Float16Vector][actual=[[0.23]]]", + }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: AdvancedSearchAction, + requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` + + `{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` + + `{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` + + `{"data": [[0.23, 4.4]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` + + `{"data": [[0.1]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` + + `], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), + errCode: 1801, + errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 1: invalid parameter[expected=BFloat16Vector][actual=[[0.1]]]", + }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ path: AdvancedSearchAction, requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` + @@ -1948,7 +2146,7 @@ func TestSearchV2(t *testing.T) { `{"data": ["AQIDBA=="], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` + `{"data": ["AQIDBA=="], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` + `], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), - errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 3: invalid parameter[expected=FloatVector][actual=[0.1, 0.2, 0.3]]", + errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 3: invalid parameter[expected=FloatVector][actual=[[0.1, 0.2, 0.3]]]", errCode: 1801, }) queryTestCases = append(queryTestCases, requestBodyTestCase{ @@ -2006,22 +2204,5 @@ func TestSearchV2(t *testing.T) { errMsg: "mock", errCode: 1100, // ErrParameterInvalid }) - - for _, testcase := range queryTestCases { - t.Run(testcase.path, func(t *testing.T) { - bodyReader := bytes.NewReader(testcase.requestBody) - req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) - w := httptest.NewRecorder() - testEngine.ServeHTTP(w, req) - assert.Equal(t, http.StatusOK, w.Code) - returnBody := &ReturnErrMsg{} - err := json.Unmarshal(w.Body.Bytes(), returnBody) - assert.Nil(t, err) - assert.Equal(t, testcase.errCode, returnBody.Code) - if testcase.errCode != 0 { - assert.Equal(t, testcase.errMsg, returnBody.Message) - } - fmt.Println(w.Body.String()) - }) - } + validateTestCases(t, testEngine, queryTestCases, false) } diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index 0a6f9bfc15..9df0e2a64e 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -19,9 +19,7 @@ package httpserver import ( "bytes" "context" - "encoding/binary" "fmt" - "math" "reflect" "strconv" "strings" @@ -367,27 +365,37 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, } reallyData[fieldName] = sparseVec case schemapb.DataType_Float16Vector: - if dataString == "" { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap - } - vectorStr := gjson.Get(data.Raw, fieldName).Raw - var vectorArray []byte - err := json.Unmarshal([]byte(vectorStr), &vectorArray) - if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap - } - reallyData[fieldName] = vectorArray + fallthrough case schemapb.DataType_BFloat16Vector: if dataString == "" { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap } - vectorStr := gjson.Get(data.Raw, fieldName).Raw - var vectorArray []byte - err := json.Unmarshal([]byte(vectorStr), &vectorArray) - if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap + vectorJSON := gjson.Get(data.Raw, fieldName) + // Clients may send float32 vector because they are inconvenient of processing float16 or bfloat16. + // Float32 vector is an array in JSON format, like `[1.0, 2.0, 3.0]`, `[1, 2, 3]`, etc, + // while float16 or bfloat16 vector is a string in JSON format, like `"4z1jPgAAgL8="`, `"gD+AP4A/gD8="`, etc. + if vectorJSON.IsArray() { + // `data` is a float32 vector + // same as `case schemapb.DataType_FloatVector` + var vectorArray []float32 + err := json.Unmarshal([]byte(dataString), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap + } + reallyData[fieldName] = vectorArray + } else if vectorJSON.Type == gjson.String { + // `data` is a float16 or bfloat16 vector + // same as `case schemapb.DataType_BinaryVector` + vectorStr := gjson.Get(data.Raw, fieldName).Raw + var vectorArray []byte + err := json.Unmarshal([]byte(vectorStr), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap + } + reallyData[fieldName] = vectorArray + } else { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, "invalid vector field: "+fieldName), reallyDataArray, validDataMap } - reallyData[fieldName] = vectorArray case schemapb.DataType_Bool: result, err := cast.ToBoolE(dataString) if err != nil { @@ -633,7 +641,6 @@ func convertFloatVectorToArray(vector [][]float32, dim int64) ([]float32, error) } func convertBinaryVectorToArray(vector [][]byte, dim int64, dataType schemapb.DataType) ([]byte, error) { - binaryArray := make([]byte, 0) var bytesLen int64 switch dataType { case schemapb.DataType_BinaryVector: @@ -643,6 +650,7 @@ func convertBinaryVectorToArray(vector [][]byte, dim int64, dataType schemapb.Da case schemapb.DataType_BFloat16Vector: bytesLen = dim * 2 } + binaryArray := make([]byte, 0, len(vector)*int(bytesLen)) for _, arr := range vector { if int64(len(arr)) != bytesLen { return nil, fmt.Errorf("[]byte size %d doesn't equal to vector dimension %d of %s", @@ -836,9 +844,25 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, case schemapb.DataType_BinaryVector: nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) case schemapb.DataType_Float16Vector: - nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + switch candi.v.Interface().(type) { + case []byte: + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + case []float32: + vec := typeutil.Float32ArrayToFloat16Bytes(candi.v.Interface().([]float32)) + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), vec) + default: + return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("invalid type(%v) of field(%v) ", field.DataType, field.Name)) + } case schemapb.DataType_BFloat16Vector: - nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + switch candi.v.Interface().(type) { + case []byte: + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + case []float32: + vec := typeutil.Float32ArrayToBFloat16Bytes(candi.v.Interface().([]float32)) + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), vec) + default: + return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("invalid type(%v) of field(%v) ", field.DataType, field.Name)) + } case schemapb.DataType_SparseFloatVector: content := candi.v.Interface().([]byte) rowSparseDim := typeutil.SparseFloatRowDim(content) @@ -1074,30 +1098,19 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, return columns, nil } -// --------------------- search param --------------------- // -func serialize(fv []float32) []byte { - data := make([]byte, 0, 4*len(fv)) // float32 occupies 4 bytes - buf := make([]byte, 4) - for _, f := range fv { - binary.LittleEndian.PutUint32(buf, math.Float32bits(f)) - data = append(data, buf...) - } - return data -} - -func serializeFloatVectors(vectors []gjson.Result, dataType schemapb.DataType, dimension, bytesLen int64) ([][]byte, error) { +func serializeFloatVectors(vectorStr string, dataType schemapb.DataType, dimension, bytesLen int64, fpArrayToBytesFunc func([]float32) []byte) ([][]byte, error) { values := make([][]byte, 0) - for _, vector := range vectors { - var vectorArray []float32 - err := json.Unmarshal([]byte(vector.String()), &vectorArray) - if err != nil { - return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error()) - } + var fp32Values [][]float32 + err := json.Unmarshal([]byte(vectorStr), &fp32Values) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, err.Error()) + } + for _, vectorArray := range fp32Values { if int64(len(vectorArray)) != dimension { - return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray))) } - vectorBytes := serialize(vectorArray) + vectorBytes := fpArrayToBytesFunc(vectorArray) values = append(values, vectorBytes) } return values, nil @@ -1105,7 +1118,7 @@ func serializeFloatVectors(vectors []gjson.Result, dataType schemapb.DataType, d func serializeByteVectors(vectorStr string, dataType schemapb.DataType, dimension, bytesLen int64) ([][]byte, error) { values := make([][]byte, 0) - err := json.Unmarshal([]byte(vectorStr), &values) // todo check len == dimension * 1/2/2 + err := json.Unmarshal([]byte(vectorStr), &values) if err != nil { return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, err.Error()) } @@ -1118,6 +1131,24 @@ func serializeByteVectors(vectorStr string, dataType schemapb.DataType, dimensio return values, nil } +// serializeFloatOrByteVectors serializes float32/float16/bfloat16 vectors. +// `[[1, 2, 3], [4.0, 5.0, 6.0]] is float32 vector, +// `["4z1jPgAAgL8=", "gD+AP4A/gD8="]` is float16/bfloat16 vector. +func serializeFloatOrByteVectors(jsonResult gjson.Result, dataType schemapb.DataType, dimension int64, fpArrayToBytesFunc func([]float32) []byte) ([][]byte, error) { + firstElement := jsonResult.Get("0") + + // Clients may send float32 vector because they are inconvenient of processing float16 or bfloat16. + // Float32 vector is an array in JSON format, like `[1.0, 2.0, 3.0]`, `[1, 2, 3]`, etc, + // while float16 or bfloat16 vector is a string in JSON format, like `"4z1jPgAAgL8="`, `"gD+AP4A/gD8="`, etc. + if firstElement.IsArray() { + return serializeFloatVectors(jsonResult.Raw, dataType, dimension, dimension*2, fpArrayToBytesFunc) + } else if firstElement.Type == gjson.String || !firstElement.Exists() { + // consider corner case: `[]` + return serializeByteVectors(jsonResult.Raw, dataType, dimension, dimension*2) + } + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], jsonResult.Raw, "invalid type") +} + func serializeSparseFloatVectors(vectors []gjson.Result, dataType schemapb.DataType) ([][]byte, error) { values := make([][]byte, 0) for _, vector := range vectors { @@ -1138,16 +1169,16 @@ func convertQueries2Placeholder(body string, dataType schemapb.DataType, dimensi switch dataType { case schemapb.DataType_FloatVector: valueType = commonpb.PlaceholderType_FloatVector - values, err = serializeFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType, dimension, dimension*4) + values, err = serializeFloatVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*4, typeutil.Float32ArrayToBytes) case schemapb.DataType_BinaryVector: valueType = commonpb.PlaceholderType_BinaryVector values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension/8) case schemapb.DataType_Float16Vector: valueType = commonpb.PlaceholderType_Float16Vector - values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*2) + values, err = serializeFloatOrByteVectors(gjson.Get(body, HTTPRequestData), dataType, dimension, typeutil.Float32ArrayToFloat16Bytes) case schemapb.DataType_BFloat16Vector: valueType = commonpb.PlaceholderType_BFloat16Vector - values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*2) + values, err = serializeFloatOrByteVectors(gjson.Get(body, HTTPRequestData), dataType, dimension, typeutil.Float32ArrayToBFloat16Bytes) case schemapb.DataType_SparseFloatVector: valueType = commonpb.PlaceholderType_SparseFloatVector values, err = serializeSparseFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType) @@ -1180,7 +1211,7 @@ func vectors2PlaceholderGroupBytes(vectors [][]float32) []byte { ph.Type = placeHolderType for _, vector := range vectors { - ph.Values = append(ph.Values, serialize(vector)) + ph.Values = append(ph.Values, typeutil.Float32ArrayToBytes(vector)) } } phg := &commonpb.PlaceholderGroup{ diff --git a/internal/distributed/proxy/httpserver/utils_bench_serialize_vectors_test.go b/internal/distributed/proxy/httpserver/utils_bench_serialize_vectors_test.go new file mode 100644 index 0000000000..6c6865c0fe --- /dev/null +++ b/internal/distributed/proxy/httpserver/utils_bench_serialize_vectors_test.go @@ -0,0 +1,219 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package httpserver + +import ( + "fmt" + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/tidwall/gjson" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/json" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +/* + * Benchmarkings for different serialization implementations + * See results: https://github.com/milvus-io/milvus/pull/37556#issuecomment-2491668743 + */ + +// serializeFloatVectorsBaseline uses []gjson.Result as input and calls json.Unmarshal in multiple times, +// which downgrades the performance +func serializeFloatVectorsBaseline(vectors []gjson.Result, dataType schemapb.DataType, dimension, bytesLen int64) ([][]byte, error) { + values := make([][]byte, 0) + for _, vector := range vectors { + var vectorArray []float32 + err := json.Unmarshal([]byte(vector.String()), &vectorArray) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error()) + } + if int64(len(vectorArray)) != dimension { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), + fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray))) + } + vectorBytes := typeutil.Float32ArrayToBytes(vectorArray) + values = append(values, vectorBytes) + } + return values, nil +} + +// serializeFloatOrByteVectorsBaseline calls json.Unmarshal in multiple times, which downgrades the performance +func serializeFloatOrByteVectorsBaseline(jsonResult gjson.Result, dataType schemapb.DataType, dimension int64, fpArrayToBytesFunc func([]float32) []byte) ([][]byte, error) { + values := make([][]byte, 0) + for _, vector := range jsonResult.Array() { + if vector.IsArray() { + var vectorArray []float32 + err := json.Unmarshal([]byte(vector.Raw), &vectorArray) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error()) + } + if int64(len(vectorArray)) != dimension { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), + fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray))) + } + vectorBytes := fpArrayToBytesFunc(vectorArray) + values = append(values, vectorBytes) + } else if vector.Type == gjson.String { + var vectorArray []byte + err := json.Unmarshal([]byte(vector.Raw), &vectorArray) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error()) + } + if int64(len(vectorArray)) != dimension*2 { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], string(vectorArray), + fmt.Sprintf("dimension: %d, but length of []byte: %d", dimension, len(vectorArray))) + } + values = append(values, vectorArray) + } else { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), "invalid type") + } + } + return values, nil +} + +// serializeFloatOrByteVectorsUnmarshalTwice calls Unmarshal twice, which downgrades the performance +// See: https://github.com/milvus-io/milvus/pull/37556#discussion_r1849672721 +func serializeFloatOrByteVectorsUnmarshalTwice(vectorStr string, dataType schemapb.DataType, dimension int64, serializeFunc func([]float32) []byte) ([][]byte, error) { + // try to unmarshal as [][]float32 first to make sure `[[3, 3]]` is [][]float32 instead of [][]byte + fp32Values := make([][]float32, 0) + err := json.Unmarshal([]byte(vectorStr), &fp32Values) + if err == nil { + values := make([][]byte, 0) + for _, vectorArray := range fp32Values { + if int64(len(vectorArray)) != dimension { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], fmt.Sprintf("%v", vectorArray), + fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray))) + } + vectorBytes := serializeFunc(vectorArray) + values = append(values, vectorBytes) + } + return values, nil + } + return serializeByteVectors(vectorStr, dataType, dimension, dimension*2) +} + +func generateVectorsStr() string { + vectors := make([][]float32, 0, 10_000) + for i := 0; i < 10_000; i++ { + vector := make([]float32, 0, 128) + for j := 0; j < 128; j++ { + vector = append(vector, rand.Float32()) + } + vectors = append(vectors, vector) + } + vectorJSON, _ := json.Marshal(vectors) + return string(vectorJSON) +} + +func generateVectorsJSON() gjson.Result { + vectorJSON := generateVectorsStr() + return gjson.Parse(vectorJSON) +} + +func generateByteVectorsStr() string { + vectors := make([][]byte, 0, 10_000) + for i := 0; i < 10_000; i++ { + vector := make([]byte, 0, 128*4) + for j := 0; j < 128*4; j++ { + vector = append(vector, byte(rand.Intn(256))) + } + vectors = append(vectors, vector) + } + vectorJSON, _ := json.Marshal(vectors) + return string(vectorJSON) +} + +func generateByteVectorsJSON() gjson.Result { + vectorJSON := generateByteVectorsStr() + return gjson.Parse(vectorJSON) +} + +func BenchmarkSerialize_FloatVectors_Baseline(b *testing.B) { + vectorsJSON := generateVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeFloatVectorsBaseline(vectorsJSON.Array(), schemapb.DataType_FloatVector, 128, -1) + assert.Nil(b, err) + } +} + +func BenchmarkSerialize_FloatVectors(b *testing.B) { + vectorsJSON := generateVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeFloatVectors(vectorsJSON.Raw, schemapb.DataType_FloatVector, 128, -1, typeutil.Float32ArrayToBytes) + assert.Nil(b, err) + } +} + +func BenchmarkSerialize_FloatVectors_Float16(b *testing.B) { + vectorsJSON := generateVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeFloatVectors(vectorsJSON.Raw, schemapb.DataType_Float16Vector, 128, -1, typeutil.Float32ArrayToFloat16Bytes) + assert.Nil(b, err) + } +} + +func BenchmarkSerialize_FloatOrByteVectors_Fp32(b *testing.B) { + vectorsJSON := generateVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeFloatOrByteVectors(vectorsJSON, schemapb.DataType_Float16Vector, 128, typeutil.Float32ArrayToFloat16Bytes) + assert.Nil(b, err) + } +} + +func BenchmarkSerialize_FloatOrByteVectors_Byte(b *testing.B) { + vectorsJSON := generateByteVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeFloatOrByteVectors(vectorsJSON, schemapb.DataType_Float16Vector, 256, typeutil.Float32ArrayToFloat16Bytes) + assert.Nil(b, err) + } +} + +func BenchmarkSerialize_FloatOrByteVectors_Fp32_UnmashalTwice(b *testing.B) { + vectorsJSON := generateVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeFloatOrByteVectorsUnmarshalTwice(vectorsJSON.Raw, schemapb.DataType_Float16Vector, 128, typeutil.Float32ArrayToFloat16Bytes) + assert.Nil(b, err) + } +} + +func BenchmarkSerialize_FloatOrByteVectors_Byte_UnmashalTwice(b *testing.B) { + vectorsJSON := generateByteVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeFloatOrByteVectorsUnmarshalTwice(vectorsJSON.Raw, schemapb.DataType_Float16Vector, 256, typeutil.Float32ArrayToFloat16Bytes) + assert.Nil(b, err) + } +} + +func BenchmarkSerialize_ByteVectors(b *testing.B) { + vectorsJSON := generateByteVectorsJSON() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := serializeByteVectors(vectorsJSON.Raw, schemapb.DataType_BinaryVector, -1, 512) + assert.Nil(b, err) + } +} diff --git a/internal/distributed/proxy/httpserver/utils_test.go b/internal/distributed/proxy/httpserver/utils_test.go index 9704bf399b..5228a487e0 100644 --- a/internal/distributed/proxy/httpserver/utils_test.go +++ b/internal/distributed/proxy/httpserver/utils_test.go @@ -907,11 +907,14 @@ func TestInsertWithDefaultValueField(t *testing.T) { func TestSerialize(t *testing.T) { parameters := []float32{0.11111, 0.22222} - assert.Equal(t, "\xa4\x8d\xe3=\xa4\x8dc>", string(serialize(parameters))) assert.Equal(t, "\n\x10\n\x02$0\x10e\x1a\b\xa4\x8d\xe3=\xa4\x8dc>", string(vectors2PlaceholderGroupBytes([][]float32{parameters}))) // todo + + // test serialize fp32 to {fp32, fp16, bf16} requestBody := "{\"data\": [[0.11111, 0.22222]]}" vectors := gjson.Get(requestBody, HTTPRequestData) - values, err := serializeFloatVectors(vectors.Array(), schemapb.DataType_FloatVector, 2, -1) + + // fp32 -> fp32 + values, err := serializeFloatVectors(vectors.Raw, schemapb.DataType_FloatVector, 2, -1, typeutil.Float32ArrayToBytes) assert.Nil(t, err) placeholderValue := &commonpb.PlaceholderValue{ Tag: "$0", @@ -925,6 +928,32 @@ func TestSerialize(t *testing.T) { }) assert.Nil(t, err) assert.Equal(t, "\n\x10\n\x02$0\x10e\x1a\b\xa4\x8d\xe3=\xa4\x8dc>", string(bytes)) // todo + + // fp32 -> fp16/bf16 + for _, testcase := range []struct { + dataType schemapb.DataType + serializeFunc func([]float32) []byte + byteStr string + }{ + {schemapb.DataType_Float16Vector, typeutil.Float32ArrayToFloat16Bytes, "\n\f\n\x02$0\x10e\x1a\x04\x1c/\x1c3"}, + {schemapb.DataType_BFloat16Vector, typeutil.Float32ArrayToBFloat16Bytes, "\n\f\n\x02$0\x10e\x1a\x04\xe3=c>"}, + } { + values, err = serializeFloatOrByteVectors(vectors, testcase.dataType, 2, testcase.serializeFunc) + assert.Nil(t, err) + placeholderValue := &commonpb.PlaceholderValue{ + Tag: "$0", + Type: commonpb.PlaceholderType_FloatVector, + Values: values, + } + bytes, err := proto.Marshal(&commonpb.PlaceholderGroup{ + Placeholders: []*commonpb.PlaceholderValue{ + placeholderValue, + }, + }) + assert.Nil(t, err) + assert.Equal(t, testcase.byteStr, string(bytes)) + } + for _, dataType := range []schemapb.DataType{schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector} { request := map[string]interface{}{ HTTPRequestData: []interface{}{ @@ -947,6 +976,209 @@ func TestSerialize(t *testing.T) { } } +func TestConvertQueries2Placeholder(t *testing.T) { + fp16Req := map[string]interface{}{ + HTTPRequestData: []interface{}{ + typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}), + typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1}), + }, + } + fp16ReqBody, _ := json.Marshal(fp16Req) + const Float16VecJSON = `{"data":["HC8cMwAAALw=","ADwAPAA8ADw="]}` + assert.Equal(t, Float16VecJSON, string(fp16ReqBody)) + + bf16Req := map[string]interface{}{ + HTTPRequestData: []interface{}{ + typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}), + typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1}), + }, + } + bf16ReqBody, _ := json.Marshal(bf16Req) + const BFloat16VecJSON = `{"data":["4z1jPgAAgL8=","gD+AP4A/gD8="]}` + assert.Equal(t, BFloat16VecJSON, string(bf16ReqBody)) + + type testCase struct { + requestBody string + dataType schemapb.DataType + dim int64 + placehoderValue func() [][]byte + } + testCases := make([]testCase, 0) + + for _, dataType := range []schemapb.DataType{schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_FloatVector} { + // corner case: empty data + testCases = append(testCases, []testCase{ + { + "{\"data\": []}", + dataType, + 0, + func() [][]byte { + return [][]byte{} + }, + }, { + "{\"data\": []}", + dataType, + 100, + func() [][]byte { + return [][]byte{} + }, + }, { + "{\"data\": [[], []]}", + dataType, + 0, + func() [][]byte { + return [][]byte{{}, {}} + }, + }, + }...) + } + + for _, dataType := range []schemapb.DataType{schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector} { + // corner case: empty float16/bfloat16 vector + testCases = append(testCases, []testCase{ + { + `"{"data": ["", ""]}"`, + dataType, + 0, + func() [][]byte { + return [][]byte{nil, nil} + }, + }, { + `"{"data": [""]}"`, + dataType, + 0, + func() [][]byte { + return [][]byte{nil} + }, + }, + }...) + } + + testCases = append(testCases, []testCase{ + { + "{\"data\": [[0.11111, 0.22222]]}", + schemapb.DataType_FloatVector, + 2, + func() [][]byte { + bv := typeutil.Float32ArrayToBytes([]float32{0.11111, 0.22222}) + return [][]byte{bv} + }, + }, { + "{\"data\": [[0.11111, 0.22222, 0, -1]]}", + schemapb.DataType_Float16Vector, + 4, + func() [][]byte { + bv := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + return [][]byte{bv} + }, + }, { + "{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}", + schemapb.DataType_Float16Vector, + 4, + func() [][]byte { + bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, { + "{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}", + schemapb.DataType_BFloat16Vector, + 4, + func() [][]byte { + bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, { + Float16VecJSON, + schemapb.DataType_Float16Vector, + 4, + func() [][]byte { + bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, { + BFloat16VecJSON, + schemapb.DataType_BFloat16Vector, + 4, + func() [][]byte { + bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, + }...) + + for _, testcase := range testCases { + phv, err := convertQueries2Placeholder(testcase.requestBody, testcase.dataType, testcase.dim) + assert.Nil(t, err) + assert.Equal(t, testcase.placehoderValue(), phv.GetValues()) + } + + for _, testcase := range []testCase{ + // mismatched Datatype + { + "{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}", + schemapb.DataType_Float16Vector, + 4, + func() [][]byte { + bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, { + "{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}", + schemapb.DataType_BFloat16Vector, + 4, + func() [][]byte { + bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, + } { + phv, err := convertQueries2Placeholder(testcase.requestBody, testcase.dataType, testcase.dim) + assert.Nil(t, err) + assert.NotEqual(t, testcase.placehoderValue(), phv.GetValues()) + } + + for _, testcase := range []testCase{ + // mismatched dimension + { + "{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}", + schemapb.DataType_Float16Vector, + 2, + func() [][]byte { + bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, { + "{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}", + schemapb.DataType_BFloat16Vector, + 8, + func() [][]byte { + bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, { + "{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1]]}", + schemapb.DataType_BFloat16Vector, + 4, + func() [][]byte { + bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}) + bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1}) + return [][]byte{bv1, bv2} + }, + }, + } { + _, err := convertQueries2Placeholder(testcase.requestBody, testcase.dataType, testcase.dim) + assert.NotNil(t, err) + } +} + func compareRow64(m1 map[string]interface{}, m2 map[string]interface{}) bool { for key, value := range m1 { if key == FieldBookIntro { @@ -1815,31 +2047,50 @@ func TestVector(t *testing.T) { float16Vector := "vector-float16" bfloat16Vector := "vector-bfloat16" sparseFloatVector := "vector-sparse-float" - row1 := map[string]interface{}{ - FieldBookID: int64(1), - floatVector: []float32{0.1, 0.11}, - binaryVector: []byte{1}, - float16Vector: []byte{1, 1, 11, 11}, - bfloat16Vector: []byte{1, 1, 11, 11}, - sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11}, + testcaseRows := []map[string]interface{}{ + { + FieldBookID: int64(1), + floatVector: []float32{0.1, 0.11}, + binaryVector: []byte{1}, + float16Vector: []byte{1, 1, 11, 11}, + bfloat16Vector: []byte{1, 1, 11, 11}, + sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11}, + }, + { + FieldBookID: int64(2), + floatVector: []float32{0.2, 0.22}, + binaryVector: []byte{2}, + float16Vector: []byte{2, 2, 22, 22}, + bfloat16Vector: []byte{2, 2, 22, 22}, + sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44}, + }, + { + FieldBookID: int64(3), + floatVector: []float32{0.3, 0.33}, + binaryVector: []byte{3}, + float16Vector: []byte{3, 3, 33, 33}, + bfloat16Vector: []byte{3, 3, 33, 33}, + sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001}, + }, + { + FieldBookID: int64(4), + floatVector: []float32{0.4, 0.44}, + binaryVector: []byte{4}, + float16Vector: []float32{0.4, 0.44}, + bfloat16Vector: []float32{0.4, 0.44}, + sparseFloatVector: map[uint32]float32{25: 0.1, 1: 0.11}, + }, + { + FieldBookID: int64(5), + floatVector: []float32{-0.4, -0.44}, + binaryVector: []byte{5}, + float16Vector: []int64{99999999, -99999999}, + bfloat16Vector: []int64{99999999, -99999999}, + sparseFloatVector: map[uint32]float32{1121: 0.1, 3: 0.11}, + }, } - row2 := map[string]interface{}{ - FieldBookID: int64(2), - floatVector: []float32{0.2, 0.22}, - binaryVector: []byte{2}, - float16Vector: []byte{2, 2, 22, 22}, - bfloat16Vector: []byte{2, 2, 22, 22}, - sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44}, - } - row3 := map[string]interface{}{ - FieldBookID: int64(3), - floatVector: []float32{0.3, 0.33}, - binaryVector: []byte{3}, - float16Vector: []byte{3, 3, 33, 33}, - bfloat16Vector: []byte{3, 3, 33, 33}, - sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001}, - } - body, _ := wrapRequestBody([]map[string]interface{}{row1, row2, row3}) + body, err := wrapRequestBody(testcaseRows) + assert.Nil(t, err) primaryField := generatePrimaryField(schemapb.DataType_Int64, false) floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector) floatVectorField.Name = floatVector @@ -1862,10 +2113,25 @@ func TestVector(t *testing.T) { } err, rows, validRows := checkAndSetData(string(body), collectionSchema) assert.Equal(t, nil, err) - for _, row := range rows { + for i, row := range rows { + assert.Equal(t, 2, len(row[floatVector].([]float32))) assert.Equal(t, 1, len(row[binaryVector].([]byte))) - assert.Equal(t, 4, len(row[float16Vector].([]byte))) - assert.Equal(t, 4, len(row[bfloat16Vector].([]byte))) + if fv, ok := testcaseRows[i][float16Vector].([]float32); ok { + assert.Equal(t, fv, row[float16Vector].([]float32)) + } else if iv, ok := testcaseRows[i][float16Vector].([]int64); ok { + assert.Equal(t, len(iv), len(row[float16Vector].([]float32))) + } else { + assert.Equal(t, 4, len(row[float16Vector].([]byte))) + assert.Equal(t, testcaseRows[i][float16Vector].([]byte), row[float16Vector].([]byte)) + } + if fv, ok := testcaseRows[i][bfloat16Vector].([]float32); ok { + assert.Equal(t, fv, row[float16Vector].([]float32)) + } else if iv, ok := testcaseRows[i][bfloat16Vector].([]int64); ok { + assert.Equal(t, len(iv), len(row[bfloat16Vector].([]float32))) + } else { + assert.Equal(t, 4, len(row[bfloat16Vector].([]byte))) + assert.Equal(t, testcaseRows[i][bfloat16Vector].([]byte), row[bfloat16Vector].([]byte)) + } // all test sparse rows have 2 elements, each should be of 8 bytes assert.Equal(t, 16, len(row[sparseFloatVector].([]byte))) } @@ -1876,7 +2142,7 @@ func TestVector(t *testing.T) { assertError := func(field string, value interface{}) { row := make(map[string]interface{}) - for k, v := range row1 { + for k, v := range testcaseRows[0] { row[k] = v } row[field] = value @@ -1885,8 +2151,6 @@ func TestVector(t *testing.T) { assert.Error(t, err) } - assertError(bfloat16Vector, []int64{99999999, -99999999}) - assertError(float16Vector, []int64{99999999, -99999999}) assertError(binaryVector, []int64{99999999, -99999999}) assertError(floatVector, []float64{math.MaxFloat64, 0}) assertError(sparseFloatVector, map[uint32]float32{0: -0.1, 1: 0.11, 2: 0.12}) diff --git a/internal/distributed/proxy/httpserver/wrap_request.go b/internal/distributed/proxy/httpserver/wrap_request.go index 7e2fa5ad9d..2536d0fcd3 100644 --- a/internal/distributed/proxy/httpserver/wrap_request.go +++ b/internal/distributed/proxy/httpserver/wrap_request.go @@ -17,9 +17,7 @@ package httpserver import ( - "encoding/binary" "fmt" - "math" "github.com/cockroachdb/errors" "google.golang.org/protobuf/proto" @@ -93,6 +91,27 @@ type FieldData struct { FieldID int64 `json:"field_id,omitempty"` } +func (f *FieldData) makePbFloat16OrBfloat16Array(raw json.RawMessage, serializeFunc func([]float32) []byte) ([]byte, int64, error) { + wrappedData := [][]float32{} + err := json.Unmarshal(raw, &wrappedData) + if err != nil { + return nil, 0, newFieldDataError(f.FieldName, err) + } + if len(wrappedData) < 1 { + return nil, 0, errors.New("at least one row for insert") + } + array0 := wrappedData[0] + dim := len(array0) + if dim < 1 { + return nil, 0, errors.New("dim must >= 1") + } + data := make([]byte, 0, len(wrappedData)*dim*2) + for _, fp32Array := range wrappedData { + data = append(data, serializeFunc(fp32Array)...) + } + return data, int64(dim), nil +} + // AsSchemapb converts the FieldData to schemapb.FieldData func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) { // is scarlar @@ -229,6 +248,34 @@ func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) { }, }, } + case schemapb.DataType_Float16Vector: + // only support float32 conversion right now + data, dim, err := f.makePbFloat16OrBfloat16Array(raw, typeutil.Float32ArrayToFloat16Bytes) + if err != nil { + return nil, err + } + ret.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_Float16Vector{ + Float16Vector: data, + }, + }, + } + case schemapb.DataType_BFloat16Vector: + // only support float32 conversion right now + data, dim, err := f.makePbFloat16OrBfloat16Array(raw, typeutil.Float32ArrayToBFloat16Bytes) + if err != nil { + return nil, err + } + ret.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_Bfloat16Vector{ + Bfloat16Vector: data, + }, + }, + } case schemapb.DataType_SparseFloatVector: var wrappedData []map[string]interface{} err := json.Unmarshal(raw, &wrappedData) @@ -325,7 +372,7 @@ func vector2Bytes(vectors [][]float32) []byte { Values: make([][]byte, 0, len(vectors)), } for _, vector := range vectors { - ph.Values = append(ph.Values, serializeVectors(vector)) + ph.Values = append(ph.Values, typeutil.Float32ArrayToBytes(vector)) } phg := &commonpb.PlaceholderGroup{ Placeholders: []*commonpb.PlaceholderValue{ @@ -336,18 +383,6 @@ func vector2Bytes(vectors [][]float32) []byte { return ret } -// Serialize serialize vector into byte slice, used in search placeholder -// LittleEndian is used for convention -func serializeVectors(fv []float32) []byte { - data := make([]byte, 0, 4*len(fv)) // float32 occupies 4 bytes - buf := make([]byte, 4) - for _, f := range fv { - binary.LittleEndian.PutUint32(buf, math.Float32bits(f)) - data = append(data, buf...) - } - return data -} - // WrappedCalcDistanceRequest is the RESTful request body for calc distance type WrappedCalcDistanceRequest struct { Base *commonpb.MsgBase `protobuf:"bytes,1,opt,name=base,proto3" json:"base,omitempty"` diff --git a/internal/distributed/proxy/httpserver/wrap_request_test.go b/internal/distributed/proxy/httpserver/wrap_request_test.go index b9dcd4e621..e402611902 100644 --- a/internal/distributed/proxy/httpserver/wrap_request_test.go +++ b/internal/distributed/proxy/httpserver/wrap_request_test.go @@ -180,61 +180,76 @@ func TestFieldData_AsSchemapb(t *testing.T) { }) // vectors - - t.Run("floatvector_ok", func(t *testing.T) { - fieldData := FieldData{ - Type: schemapb.DataType_FloatVector, - Field: []byte(`[ - [1.1, 2.2, 3.1], - [1.1, 2.2, 3.1], - [1.1, 2.2, 3.1] - ]`), - } - raw, _ := json.Marshal(fieldData) - json.Unmarshal(raw, &fieldData) - _, err := fieldData.AsSchemapb() - assert.NoError(t, err) - }) - t.Run("floatvector_empty_error", func(t *testing.T) { - fieldData := FieldData{ - Type: schemapb.DataType_FloatVector, - Field: []byte(""), - } - raw, _ := json.Marshal(fieldData) - json.Unmarshal(raw, &fieldData) - _, err := fieldData.AsSchemapb() - assert.Error(t, err) - }) - t.Run("floatvector_dim=0_error", func(t *testing.T) { - fieldData := FieldData{ - Type: schemapb.DataType_FloatVector, - Field: []byte(`[]`), - } - raw, _ := json.Marshal(fieldData) - json.Unmarshal(raw, &fieldData) - _, err := fieldData.AsSchemapb() - assert.Error(t, err) - }) - t.Run("floatvector_vectorTypeError_error", func(t *testing.T) { - fieldData := FieldData{ - Type: schemapb.DataType_FloatVector, - Field: []byte(`["1"]`), - } - raw, _ := json.Marshal(fieldData) - json.Unmarshal(raw, &fieldData) - _, err := fieldData.AsSchemapb() - assert.Error(t, err) - }) - t.Run("floatvector_error", func(t *testing.T) { - fieldData := FieldData{ - Type: schemapb.DataType_FloatVector, - Field: []byte(`["a", "b", "c"]`), - } - raw, _ := json.Marshal(fieldData) - json.Unmarshal(raw, &fieldData) - _, err := fieldData.AsSchemapb() - assert.Error(t, err) - }) + testcases := []struct { + name string + dataType schemapb.DataType + }{ + { + "float", schemapb.DataType_FloatVector, + }, + { + "float16", schemapb.DataType_Float16Vector, + }, + { + "bfloat16", schemapb.DataType_BFloat16Vector, + }, + } + for _, tc := range testcases { + t.Run(tc.name+"vector_ok", func(t *testing.T) { + fieldData := FieldData{ + Type: tc.dataType, + Field: []byte(`[ + [1.1, 2.2, 3.1], + [1.1, 2.2, 3.1], + [1.1, 2.2, 3.1] + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + t.Run(tc.name+"vector_empty_error", func(t *testing.T) { + fieldData := FieldData{ + Type: tc.dataType, + Field: []byte(""), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run(tc.name+"vector_dim=0_error", func(t *testing.T) { + fieldData := FieldData{ + Type: tc.dataType, + Field: []byte(`[]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run(tc.name+"vector_vectorTypeError_error", func(t *testing.T) { + fieldData := FieldData{ + Type: tc.dataType, + Field: []byte(`["1"]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run(tc.name+"vector_error", func(t *testing.T) { + fieldData := FieldData{ + Type: tc.dataType, + Field: []byte(`["a", "b", "c"]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + } t.Run("sparsefloatvector_ok_1", func(t *testing.T) { fieldData := FieldData{ diff --git a/internal/json/sonic.go b/internal/json/sonic.go index b636172fa6..65cbda9b86 100644 --- a/internal/json/sonic.go +++ b/internal/json/sonic.go @@ -24,15 +24,15 @@ import ( var ( json = sonic.ConfigStd - // Marshal is exported by gin/json package. + // Marshal is exported from bytedance/sonic package. Marshal = json.Marshal - // Unmarshal is exported by gin/json package. + // Unmarshal is exported from bytedance/sonic package. Unmarshal = json.Unmarshal - // MarshalIndent is exported by gin/json package. + // MarshalIndent is exported from bytedance/sonic package. MarshalIndent = json.MarshalIndent - // NewDecoder is exported by gin/json package. + // NewDecoder is exported from bytedance/sonic package. NewDecoder = json.NewDecoder - // NewEncoder is exported by gin/json package. + // NewEncoder is exported from bytedance/sonic package. NewEncoder = json.NewEncoder ) diff --git a/pkg/util/typeutil/convension.go b/pkg/util/typeutil/convension.go index 4d0cbd87e0..8fd27c9b52 100644 --- a/pkg/util/typeutil/convension.go +++ b/pkg/util/typeutil/convension.go @@ -165,3 +165,33 @@ func SparseFloatBytesToMap(b []byte) map[uint32]float32 { } return values } + +// Float32ArrayToBytes serialize vector into byte slice, used in search placeholder +// LittleEndian is used for convention +func Float32ArrayToBytes(fv []float32) []byte { + data := make([]byte, 0, 4*len(fv)) // float32 occupies 4 bytes + buf := make([]byte, 4) + for _, f := range fv { + binary.LittleEndian.PutUint32(buf, math.Float32bits(f)) + data = append(data, buf...) + } + return data +} + +// Float32ArrayToFloat16Bytes converts float32 vector `fv` to float16 vector +func Float32ArrayToFloat16Bytes(fv []float32) []byte { + data := make([]byte, 0, 2*len(fv)) // float16 occupies 2 bytes + for _, f := range fv { + data = append(data, Float32ToFloat16Bytes(f)...) + } + return data +} + +// Float32ArrayToBFloat16Bytes converts float32 vector `fv` to bfloat16 vector +func Float32ArrayToBFloat16Bytes(fv []float32) []byte { + data := make([]byte, 0, 2*len(fv)) // bfloat16 occupies 2 bytes + for _, f := range fv { + data = append(data, Float32ToBFloat16Bytes(f)...) + } + return data +} diff --git a/pkg/util/typeutil/conversion_test.go b/pkg/util/typeutil/conversion_test.go index 56bd88b54a..e4fafe0596 100644 --- a/pkg/util/typeutil/conversion_test.go +++ b/pkg/util/typeutil/conversion_test.go @@ -118,4 +118,22 @@ func TestConversion(t *testing.T) { assert.Less(t, math.Abs(float64(v2/v-1)), 0.01) } }) + + t.Run("TestFloatArrays", func(t *testing.T) { + parameters := []float32{0.11111, 0.22222} + assert.Equal(t, "\xa4\x8d\xe3=\xa4\x8dc>", string(Float32ArrayToBytes(parameters))) + + f16vec := Float32ArrayToFloat16Bytes(parameters) + assert.Equal(t, 4, len(f16vec)) + // \x1c/ is 0.1111, \x1c3 is 0.2222 + assert.Equal(t, "\x1c/\x1c3", string(f16vec)) + assert.Equal(t, "\x1c/", string(Float32ToFloat16Bytes(0.11111))) + assert.Equal(t, "\x1c3", string(Float32ToFloat16Bytes(0.22222))) + + bf16vec := Float32ArrayToBFloat16Bytes(parameters) + assert.Equal(t, 4, len(bf16vec)) + assert.Equal(t, "\xe3=c>", string(bf16vec)) + assert.Equal(t, "\xe3=", string(Float32ToBFloat16Bytes(0.11111))) + assert.Equal(t, "c>", string(Float32ToBFloat16Bytes(0.22222))) + }) } diff --git a/tests/restful_client/testcases/test_restful_sdk_mix_use_scenario.py b/tests/restful_client/testcases/test_restful_sdk_mix_use_scenario.py index 5e7b184f3f..28616ee2f4 100644 --- a/tests/restful_client/testcases/test_restful_sdk_mix_use_scenario.py +++ b/tests/restful_client/testcases/test_restful_sdk_mix_use_scenario.py @@ -141,14 +141,17 @@ class TestRestfulSdkCompatibility(TestBase): FieldSchema(name="json", dtype=DataType.JSON), FieldSchema(name="int_array", dtype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=1024), FieldSchema(name="varchar_array", dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_capacity=1024, max_length=65535), - FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128) + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128), + FieldSchema(name="float16_vector", dtype=DataType.FLOAT16_VECTOR, dim=128), + FieldSchema(name="bfloat16_vector", dtype=DataType.BFLOAT16_VECTOR, dim=128), ] default_schema = CollectionSchema(fields=default_fields, description="test collection", enable_dynamic_field=True) collection = Collection(name=name, schema=default_schema) # create index by sdk index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}} - collection.create_index(field_name="float_vector", index_params=index_param) + for field_name in ("float_vector", "float16_vector", "bfloat16_vector"): + collection.create_index(field_name=field_name, index_params=index_param) collection.load() # insert data by restful data = [ @@ -159,6 +162,9 @@ class TestRestfulSdkCompatibility(TestBase): "int_array": [i for i in range(10)], "varchar_array": [str(i) for i in range(10)], "float_vector": [random.random() for _ in range(dim)], + # float16 / bfloat16 field supports float32 arguments + "float16_vector": [random.random() for _ in range(dim)], + "bfloat16_vector": [random.random() for _ in range(dim)], "age": i} for i in range(nb) ] diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py index 39640ba4f1..3f158d5e37 100644 --- a/tests/restful_client_v2/testcases/test_vector_operations.py +++ b/tests/restful_client_v2/testcases/test_vector_operations.py @@ -161,8 +161,10 @@ class TestInsertVector(TestBase): @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False]) def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, + pass_fp32_to_fp16_or_bf16): """ Insert a vector with a simple payload """ @@ -210,9 +212,17 @@ class TestInsertVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), - "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), + "binary_vector": gen_vector(datatype="BinaryVector", dim=dim), } else: tmp = { @@ -221,8 +231,16 @@ class TestInsertVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) } if enable_dynamic_schema: @@ -253,8 +271,10 @@ class TestInsertVector(TestBase): @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False]) def test_insert_entities_with_all_vector_datatype_0(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, + pass_fp32_to_fp16_or_bf16): """ Insert a vector with a simple payload """ @@ -307,8 +327,16 @@ class TestInsertVector(TestBase): "book_describe": f"book_{i}", "book_vector": gen_vector(datatype="FloatVector", dim=dim), "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), } else: tmp = { @@ -318,8 +346,16 @@ class TestInsertVector(TestBase): "book_describe": f"book_{i}", "book_vector": gen_vector(datatype="FloatVector", dim=dim), "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), } if enable_dynamic_schema: tmp.update({f"dynamic_field_{i}": i}) @@ -349,8 +385,10 @@ class TestInsertVector(TestBase): @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False]) def test_insert_entities_with_all_vector_datatype_1(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, + pass_fp32_to_fp16_or_bf16): """ Insert a vector with a simple payload """ @@ -399,8 +437,16 @@ class TestInsertVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), } else: tmp = { @@ -409,8 +455,16 @@ class TestInsertVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), } if enable_dynamic_schema: tmp.update({f"dynamic_field_{i}": i}) @@ -634,7 +688,6 @@ class TestInsertVector(TestBase): assert len(rsp['data']) == 50 - @pytest.mark.L0 class TestInsertVectorNegative(TestBase): @@ -937,8 +990,10 @@ class TestSearchVector(TestBase): @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [16]) + @pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False]) def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, + pass_fp32_to_fp16_or_bf16): """ Insert a vector with a simple payload """ @@ -986,8 +1041,16 @@ class TestSearchVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) } else: @@ -997,8 +1060,16 @@ class TestSearchVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) } if enable_dynamic_schema: @@ -1985,7 +2056,6 @@ class TestSearchVector(TestBase): assert token in d[field] - @pytest.mark.L0 class TestSearchVectorNegative(TestBase): @@ -2210,7 +2280,6 @@ class TestAdvancedSearchVector(TestBase): assert len(rsp['data']) == 10 - @pytest.mark.L0 class TestHybridSearchVector(TestBase): @@ -2318,8 +2387,6 @@ class TestHybridSearchVector(TestBase): assert len(rsp['data']) == 10 - - @pytest.mark.L0 class TestQueryVector(TestBase): @@ -2463,8 +2530,10 @@ class TestQueryVector(TestBase): @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False]) def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, + pass_fp32_to_fp16_or_bf16): """ Insert a vector with a simple payload """ @@ -2512,8 +2581,16 @@ class TestQueryVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) } else: @@ -2523,8 +2600,16 @@ class TestQueryVector(TestBase): "word_count": i, "book_describe": f"book_{i}", "float_vector": gen_vector(datatype="FloatVector", dim=dim), - "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), - "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "float16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="Float16Vector", dim=dim) + ), + "bfloat16_vector": ( + gen_vector(datatype="FloatVector", dim=dim) + if pass_fp32_to_fp16_or_bf16 + else gen_vector(datatype="BFloat16Vector", dim=dim) + ), "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) } if enable_dynamic_schema: @@ -2821,8 +2906,6 @@ class TestQueryVector(TestBase): assert token in d[field] - - @pytest.mark.L0 class TestQueryVectorNegative(TestBase): diff --git a/tests/scripts/e2e-restful.sh b/tests/scripts/e2e-restful.sh index 5224fd763c..fda700cf62 100755 --- a/tests/scripts/e2e-restful.sh +++ b/tests/scripts/e2e-restful.sh @@ -23,93 +23,159 @@ MILVUS_SERVICE_NAME=$(echo "${MILVUS_HELM_RELEASE_NAME}-milvus.${MILVUS_HELM_NAM MILVUS_SERVICE_ADDRESS="${MILVUS_SERVICE_NAME}:9091" # Create a collection -curl -X 'POST' \ +if curl -X 'POST' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ - -d @${DATA_PATH}/create-collection.json + -d @${DATA_PATH}/create-collection.json | grep -q "error_code" ; then + exit 1 +fi # Has collection -curl -X 'GET' \ +if curl -X 'GET' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/existence" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "collection_name": "book" - }' + }' | grep -q "error_code" ; then + exit 1 +fi # Check collection details -curl -X 'GET' \ +if curl -X 'GET' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "collection_name": "book" - }' + }' | grep -q "error_code" ; then + exit 1 +fi -# Load collection -curl -X 'POST' \ - "http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/load" \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -d '{ - "collection_name": "book" - }' ### Data # Insert Data -curl -X 'POST' \ +if curl -X 'POST' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/entities" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ - -d @${DATA_PATH}/insert-data.json + -d @${DATA_PATH}/insert-data.json | grep -q "error_code" ; then + exit 1 +fi -# Build Index -curl -X 'POST' \ +# Build Index for book_intro +if curl -X 'POST' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "collection_name": "book", "field_name": "book_intro", + "index_name": "book_intro_index", "extra_params":[ {"key": "metric_type", "value": "L2"}, {"key": "index_type", "value": "IVF_FLAT"}, {"key": "params", "value": "{\"nlist\":1024}"} ] - }' + }' | grep -q "error_code" ; then + exit 1 +fi -# KNN Search -curl -X 'POST' \ - "http://${MILVUS_SERVICE_ADDRESS}/api/v1/search" \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -d @${DATA_PATH}/search.json - -# Drop Index -curl -X 'DELETE' \ +# Build Index for author_intro +if curl -X 'POST' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "collection_name": "book", - "field_name": "book_intro" - }' + "field_name": "author_intro", + "index_name": "author_intro_index", + "extra_params":[ + {"key": "metric_type", "value": "L2"}, + {"key": "index_type", "value": "IVF_FLAT"}, + {"key": "params", "value": "{\"nlist\":1024}"} + ] + }' | grep -q "error_code" ; then + exit 1 +fi -# Release collection -curl -X 'DELETE' \ +# Build Index for comment +if curl -X 'POST' \ + "http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "collection_name": "book", + "field_name": "comment", + "index_name": "comment_index", + "extra_params":[ + {"key": "metric_type", "value": "L2"}, + {"key": "index_type", "value": "IVF_FLAT"}, + {"key": "params", "value": "{\"nlist\":1024}"} + ] + }' | grep -q "error_code" ; then + exit 1 +fi + +# Load collection +if curl -X 'POST' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/load" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "collection_name": "book" - }' + }' | grep -q "error_code" ; then + exit 1 +fi + +# KNN Search +# TODO: search fp16/bf16 +for SEARCH_JSON in search-book-intro ; do +if curl -X 'POST' \ + "http://${MILVUS_SERVICE_ADDRESS}/api/v1/search" \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d @${DATA_PATH}/${SEARCH_JSON}.json | grep -q "error_code" ; then + exit 1 +fi +done + +# Release collection +if curl -X 'DELETE' \ + "http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/load" \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "collection_name": "book" + }' | grep -q "error_code" ; then + exit 1 +fi + +# Drop Index +for FIELD_NAME in book_intro author_intro search_comment ; do +if curl -X 'DELETE' \ + "http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d "{ + \"collection_name\": \"book\", + \"field_name\": \"${FIELD_NAME}\", + \"index_name\": \"${FIELD_NAME}_index\" + }" | grep -q "error_code" ; then + exit 1 +fi +done # Drop collection -curl -X 'DELETE' \ +if curl -X 'DELETE' \ "http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "collection_name": "book" - }' + }' | grep -q "error_code" ; then + exit 1 +fi + +echo "e2e-restful.sh success!" diff --git a/tests/scripts/restful-data/create-collection.json b/tests/scripts/restful-data/create-collection.json index 6075ca2d26..c51334b07e 100644 --- a/tests/scripts/restful-data/create-collection.json +++ b/tests/scripts/restful-data/create-collection.json @@ -1,7 +1,6 @@ { "collection_name": "book", "consistency_level": 1, - "db_name": "string", "schema": { "autoID": false, "description": "Test book search", @@ -25,8 +24,34 @@ "value": "2" } ] + }, + { + "name": "author_intro", + "description": "embedded vector of author introduction", + "autoID": false, + "data_type": 102, + "is_primary_key": false, + "type_params": [ + { + "key": "dim", + "value": "2" + } + ] + }, + { + "name": "comment", + "description": "embedded vector of comment", + "autoID": false, + "data_type": 103, + "is_primary_key": false, + "type_params": [ + { + "key": "dim", + "value": "2" + } + ] } ], "name": "book" } -} \ No newline at end of file +} diff --git a/tests/scripts/restful-data/insert-data.json b/tests/scripts/restful-data/insert-data.json index fce9191cde..fa5a4c8195 100644 --- a/tests/scripts/restful-data/insert-data.json +++ b/tests/scripts/restful-data/insert-data.json @@ -5,16 +5,30 @@ "field_name": "book_id", "type": 5, "field": [ - 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100, + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100 ] }, { "field_name": "book_intro", "type": 101, "field": [ - [1,1],[2,1],[3,1],[4,1],[5,1],[6,1],[7,1],[8,1],[9,1],[10,1],[11,1],[12,1],[13,1],[14,1],[15,1],[16,1],[17,1],[18,1],[19,1],[20,1],[21,1],[22,1],[23,1],[24,1],[25,1],[26,1],[27,1],[28,1],[29,1],[30,1],[31,1],[32,1],[33,1],[34,1],[35,1],[36,1],[37,1],[38,1],[39,1],[40,1],[41,1],[42,1],[43,1],[44,1],[45,1],[46,1],[47,1],[48,1],[49,1],[50,1],[51,1],[52,1],[53,1],[54,1],[55,1],[56,1],[57,1],[58,1],[59,1],[60,1],[61,1],[62,1],[63,1],[64,1],[65,1],[66,1],[67,1],[68,1],[69,1],[70,1],[71,1],[72,1],[73,1],[74,1],[75,1],[76,1],[77,1],[78,1],[79,1],[80,1],[81,1],[82,1],[83,1],[84,1],[85,1],[86,1],[87,1],[88,1],[89,1],[90,1],[91,1],[92,1],[93,1],[94,1],[95,1],[96,1],[97,1],[98,1],[99,1],[100,1], + [1,1],[2,1],[3,1],[4,1],[5,1],[6,1],[7,1],[8,1],[9,1],[10,1],[11,1],[12,1],[13,1],[14,1],[15,1],[16,1],[17,1],[18,1],[19,1],[20,1],[21,1],[22,1],[23,1],[24,1],[25,1],[26,1],[27,1],[28,1],[29,1],[30,1],[31,1],[32,1],[33,1],[34,1],[35,1],[36,1],[37,1],[38,1],[39,1],[40,1],[41,1],[42,1],[43,1],[44,1],[45,1],[46,1],[47,1],[48,1],[49,1],[50,1],[51,1],[52,1],[53,1],[54,1],[55,1],[56,1],[57,1],[58,1],[59,1],[60,1],[61,1],[62,1],[63,1],[64,1],[65,1],[66,1],[67,1],[68,1],[69,1],[70,1],[71,1],[72,1],[73,1],[74,1],[75,1],[76,1],[77,1],[78,1],[79,1],[80,1],[81,1],[82,1],[83,1],[84,1],[85,1],[86,1],[87,1],[88,1],[89,1],[90,1],[91,1],[92,1],[93,1],[94,1],[95,1],[96,1],[97,1],[98,1],[99,1],[100,1] + ] + }, + { + "field_name": "author_intro", + "type": 102, + "field": [ + [1.0,1.0],[2,1.0],[3,1.0],[4,1.0],[5,1.0],[6,1.0],[7,1.0],[8,1.0],[9,1.0],[1.00,1.0],[1.010,1.0],[1.02,1.0],[1.03,1.0],[1.04,1.0],[1.05,1.0],[1.06,1.0],[1.07,1.0],[1.08,1.0],[1.09,1.0],[20,1.0],[21.0,1.0],[22,1.0],[23,1.0],[24,1.0],[25,1.0],[26,1.0],[27,1.0],[28,1.0],[29,1.0],[30,1.0],[31.0,1.0],[32,1.0],[33,1.0],[34,1.0],[35,1.0],[36,1.0],[37,1.0],[38,1.0],[39,1.0],[40,1.0],[41.0,1.0],[42,1.0],[43,1.0],[44,1.0],[45,1.0],[46,1.0],[47,1.0],[48,1.0],[49,1.0],[50,1.0],[51.0,1.0],[52,1.0],[53,1.0],[54,1.0],[55,1.0],[56,1.0],[57,1.0],[58,1.0],[59,1.0],[60,1.0],[61.0,1.0],[62,1.0],[63,1.0],[64,1.0],[65,1.0],[66,1.0],[67,1.0],[68,1.0],[69,1.0],[70,1.0],[71.0,1.0],[72,1.0],[73,1.0],[74,1.0],[75,1.0],[76,1.0],[77,1.0],[78,1.0],[79,1.0],[80,1.0],[81.0,1.0],[82,1.0],[83,1.0],[84,1.0],[85,1.0],[86,1.0],[87,1.0],[88,1.0],[89,1.0],[90,1.0],[91.0,1.0],[92,1.0],[93,1.0],[94,1.0],[95,1.0],[96,1.0],[97,1.0],[98,1.0],[99,1.0],[1.000,1.0] + ] + }, + { + "field_name": "comment", + "type": 103, + "field": [ + [1.0,1.0],[2,1.0],[3,1.0],[4,1.0],[5,1.0],[6,1.0],[7,1.0],[8,1.0],[9,1.0],[1.00,1.0],[1.010,1.0],[1.02,1.0],[1.03,1.0],[1.04,1.0],[1.05,1.0],[1.06,1.0],[1.07,1.0],[1.08,1.0],[1.09,1.0],[20,1.0],[21.0,1.0],[22,1.0],[23,1.0],[24,1.0],[25,1.0],[26,1.0],[27,1.0],[28,1.0],[29,1.0],[30,1.0],[31.0,1.0],[32,1.0],[33,1.0],[34,1.0],[35,1.0],[36,1.0],[37,1.0],[38,1.0],[39,1.0],[40,1.0],[41.0,1.0],[42,1.0],[43,1.0],[44,1.0],[45,1.0],[46,1.0],[47,1.0],[48,1.0],[49,1.0],[50,1.0],[51.0,1.0],[52,1.0],[53,1.0],[54,1.0],[55,1.0],[56,1.0],[57,1.0],[58,1.0],[59,1.0],[60,1.0],[61.0,1.0],[62,1.0],[63,1.0],[64,1.0],[65,1.0],[66,1.0],[67,1.0],[68,1.0],[69,1.0],[70,1.0],[71.0,1.0],[72,1.0],[73,1.0],[74,1.0],[75,1.0],[76,1.0],[77,1.0],[78,1.0],[79,1.0],[80,1.0],[81.0,1.0],[82,1.0],[83,1.0],[84,1.0],[85,1.0],[86,1.0],[87,1.0],[88,1.0],[89,1.0],[90,1.0],[91.0,1.0],[92,1.0],[93,1.0],[94,1.0],[95,1.0],[96,1.0],[97,1.0],[98,1.0],[99,1.0],[1.000,1.0] ] } ], - "num_rows": 1000 -} \ No newline at end of file + "num_rows": 100 +} diff --git a/tests/scripts/restful-data/search.json b/tests/scripts/restful-data/search-book-intro.json similarity index 99% rename from tests/scripts/restful-data/search.json rename to tests/scripts/restful-data/search-book-intro.json index ebcd37ddab..3d9500fac9 100644 --- a/tests/scripts/restful-data/search.json +++ b/tests/scripts/restful-data/search-book-intro.json @@ -11,4 +11,4 @@ "vectors": [ [10,5] ], "dsl": "", "dsl_type": 1 -} \ No newline at end of file +}