feat: fp32 vector to fp16/bf16 vector conversion for RESTful API (#37556)

RESTful API. The influenced API are as follows:

- Handler. insert
- HandlerV1. insert/upsert
- HandlerV2. insert/upsert/search

We do not modify search API in Handler/HandlerV1 because they do not
support fp16/bf16 vectors.

module github.com/milvus-io/milvus/pkg:

Add `Float32ArrayToBFloat16Bytes()`, `Float32ArrayToFloat16Bytes()` and
`Float32ArrayToBytes()`. These method will be used in GoSDK in the
future.

issue: #37448

Signed-off-by: Yinzuo Jiang <yinzuo.jiang@zilliz.com>
Signed-off-by: Yinzuo Jiang <jiangyinzuo@foxmail.com>
This commit is contained in:
Yinzuo Jiang 2024-11-24 17:46:33 +08:00 committed by GitHub
parent 62af24c1a1
commit 5a06faca39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 1464 additions and 299 deletions

View File

@ -1220,6 +1220,184 @@ func TestUpsert(t *testing.T) {
})
}
func TestFp16Bf16VectorsV1(t *testing.T) {
paramtable.Init()
paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true")
mp := mocks.NewMockProxy(t)
collSchema := generateCollectionSchemaWithVectorFields()
testEngine := initHTTPServer(mp, true)
queryTestCases := []requestBodyTestCase{}
for _, path := range []string{VectorInsertPath, VectorUpsertPath} {
queryTestCases = append(queryTestCases,
requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errCode: 1804,
errMsg: "fail to deal the insert data, error: []byte size 2 doesn't equal to vector dimension 2 of Float16Vector",
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errCode: 200,
}, requestBodyTestCase{
path: path,
// [3, 3] shouble be converted to [float(3), float(3)]
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errCode: 200,
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errCode: 200,
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3.0, 3],
"bfloat16Vector": [4.4, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of Float16Vector",
errCode: 1804,
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of BFloat16Vector",
errCode: 1804,
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
},
{
"book_id": 1,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3.1, 3.1],
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"3": 1.1, "2": 0.44}
}
]
}`),
errCode: 200,
})
}
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: collSchema,
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Times(len(queryTestCases))
mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4)
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4)
for i, testcase := range queryTestCases {
t.Run(testcase.path, func(t *testing.T) {
bodyReader := bytes.NewReader(testcase.requestBody)
req := httptest.NewRequest(http.MethodPost, versional(testcase.path), bodyReader)
req.SetBasicAuth(util.UserRoot, getDefaultRootPassword())
w := httptest.NewRecorder()
testEngine.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code, "case %d: ", i, string(testcase.requestBody))
returnBody := &ReturnErrMsg{}
err := json.Unmarshal(w.Body.Bytes(), returnBody)
assert.Nil(t, err, "case %d: ", i)
assert.Equal(t, testcase.errCode, returnBody.Code, "case %d: ", i, string(testcase.requestBody))
if testcase.errCode != 0 {
assert.Equal(t, testcase.errMsg, returnBody.Message, "case %d: ", i, string(testcase.requestBody))
}
fmt.Println(w.Body.String())
})
}
}
func genIDs(dataType schemapb.DataType) *schemapb.IDs {
return generateIDs(dataType, 3)
}

View File

@ -1600,6 +1600,29 @@ func TestMethodPost(t *testing.T) {
}
}
func validateTestCases(t *testing.T, testEngine *gin.Engine, queryTestCases []requestBodyTestCase, allowInt64 bool) {
for i, testcase := range queryTestCases {
t.Run(testcase.path, func(t *testing.T) {
bodyReader := bytes.NewReader(testcase.requestBody)
req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader)
if allowInt64 {
req.Header.Set(HTTPHeaderAllowInt64, "true")
}
w := httptest.NewRecorder()
testEngine.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code, "case %d: ", i, string(testcase.requestBody))
returnBody := &ReturnErrMsg{}
err := json.Unmarshal(w.Body.Bytes(), returnBody)
assert.Nil(t, err, "case %d: ", i)
assert.Equal(t, testcase.errCode, returnBody.Code, "case %d: ", i, string(testcase.requestBody))
if testcase.errCode != 0 {
assert.Equal(t, testcase.errMsg, returnBody.Message, "case %d: ", i, string(testcase.requestBody))
}
fmt.Println(w.Body.String())
})
}
}
func TestDML(t *testing.T) {
paramtable.Init()
// disable rate limit
@ -1715,23 +1738,7 @@ func TestDML(t *testing.T) {
requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`),
})
for _, testcase := range queryTestCases {
t.Run(testcase.path, func(t *testing.T) {
bodyReader := bytes.NewReader(testcase.requestBody)
req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader)
w := httptest.NewRecorder()
testEngine.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
returnBody := &ReturnErrMsg{}
err := json.Unmarshal(w.Body.Bytes(), returnBody)
assert.Nil(t, err)
assert.Equal(t, testcase.errCode, returnBody.Code)
if testcase.errCode != 0 {
assert.Equal(t, testcase.errMsg, returnBody.Message)
}
fmt.Println(w.Body.String())
})
}
validateTestCases(t, testEngine, queryTestCases, false)
}
func TestAllowInt64(t *testing.T) {
@ -1759,24 +1766,183 @@ func TestAllowInt64(t *testing.T) {
mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once()
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once()
for _, testcase := range queryTestCases {
t.Run(testcase.path, func(t *testing.T) {
bodyReader := bytes.NewReader(testcase.requestBody)
req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader)
req.Header.Set(HTTPHeaderAllowInt64, "true")
w := httptest.NewRecorder()
testEngine.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
returnBody := &ReturnErrMsg{}
err := json.Unmarshal(w.Body.Bytes(), returnBody)
assert.Nil(t, err)
assert.Equal(t, testcase.errCode, returnBody.Code)
if testcase.errCode != 0 {
assert.Equal(t, testcase.errMsg, returnBody.Message)
}
fmt.Println(w.Body.String())
})
validateTestCases(t, testEngine, queryTestCases, true)
}
func generateCollectionSchemaWithVectorFields() *schemapb.CollectionSchema {
collSchema := generateCollectionSchema(schemapb.DataType_Int64, false, true)
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)
binaryVectorField.Name = "binaryVector"
float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector)
float16VectorField.Name = "float16Vector"
bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector)
bfloat16VectorField.Name = "bfloat16Vector"
sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector)
sparseFloatVectorField.Name = "sparseFloatVector"
collSchema.Fields = append(collSchema.Fields, binaryVectorField)
collSchema.Fields = append(collSchema.Fields, float16VectorField)
collSchema.Fields = append(collSchema.Fields, bfloat16VectorField)
collSchema.Fields = append(collSchema.Fields, sparseFloatVectorField)
return collSchema
}
func TestFp16Bf16VectorsV2(t *testing.T) {
paramtable.Init()
// disable rate limit
paramtable.Get().Save(paramtable.Get().QuotaConfig.QuotaAndLimitsEnabled.Key, "false")
defer paramtable.Get().Reset(paramtable.Get().QuotaConfig.QuotaAndLimitsEnabled.Key)
mp := mocks.NewMockProxy(t)
collSchema := generateCollectionSchemaWithVectorFields()
testEngine := initHTTPServerV2(mp, false)
queryTestCases := []requestBodyTestCase{}
for _, path := range []string{InsertAction, UpsertAction} {
queryTestCases = append(queryTestCases,
requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errCode: 1804,
errMsg: "fail to deal the insert data, error: []byte size 2 doesn't equal to vector dimension 2 of Float16Vector",
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
}, requestBodyTestCase{
path: path,
// [3, 3] shouble be converted to [float(3), float(3)]
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3.0, 3],
"bfloat16Vector": [4.4, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of Float16Vector",
errCode: 1804,
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
}
]
}`),
errMsg: "fail to deal the insert data, error: []byte size 6 doesn't equal to vector dimension 2 of BFloat16Vector",
errCode: 1804,
}, requestBodyTestCase{
path: path,
requestBody: []byte(
`{
"collectionName": "book",
"data": [
{
"book_id": 0,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
},
{
"book_id": 1,
"word_count": 0,
"book_intro": [0.11825, 0.6],
"binaryVector": "AQ==",
"float16Vector": [3.1, 3.1],
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"3": 1.1, "2": 0.44}
}
]
}`),
})
}
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: collSchema,
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Times(len(queryTestCases))
mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4)
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Times(4)
validateTestCases(t, testEngine, queryTestCases, false)
}
func TestSearchV2(t *testing.T) {
@ -1811,26 +1977,14 @@ func TestSearchV2(t *testing.T) {
Ids: generateIDs(schemapb.DataType_Int64, 3),
Scores: DefaultScores,
}}, nil).Once()
mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3)
collSchema := generateCollectionSchema(schemapb.DataType_Int64, false, true)
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)
binaryVectorField.Name = "binaryVector"
float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector)
float16VectorField.Name = "float16Vector"
bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector)
bfloat16VectorField.Name = "bfloat16Vector"
sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector)
sparseFloatVectorField.Name = "sparseFloatVector"
collSchema.Fields = append(collSchema.Fields, binaryVectorField)
collSchema.Fields = append(collSchema.Fields, float16VectorField)
collSchema.Fields = append(collSchema.Fields, bfloat16VectorField)
collSchema.Fields = append(collSchema.Fields, sparseFloatVectorField)
mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(5)
collSchema := generateCollectionSchemaWithVectorFields()
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: collSchema,
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Times(10)
}, nil).Times(14)
mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
Status: &commonpb.Status{
@ -1871,7 +2025,7 @@ func TestSearchV2(t *testing.T) {
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: SearchAction,
requestBody: []byte(`{"collectionName": "book", "data": [["0.1", "0.2"]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"], "params": {"radius":0.9, "range_filter": 0.1}, "groupingField": "test"}`),
errMsg: "can only accept json format request, error: Mismatch type float32 with value string \"at index 8: mismatched type with value\\n\\n\\t[\\\"0.1\\\", \\\"0.2\\\"]\\n\\t........^.....\\n\": invalid parameter[expected=FloatVector][actual=[\"0.1\", \"0.2\"]]",
errMsg: "can only accept json format request, error: Mismatch type float32 with value string \"at index 9: mismatched type with value\\n\\n\\t[[\\\"0.1\\\", \\\"0.2\\\"]]\\n\\t.........^......\\n\": invalid parameter[expected=FloatVector][actual=[[\"0.1\", \"0.2\"]]]",
errCode: 1801,
})
queryTestCases = append(queryTestCases, requestBodyTestCase{
@ -1929,6 +2083,50 @@ func TestSearchV2(t *testing.T) {
`{"data": ["AQIDBA=="], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` +
`], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`),
})
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: AdvancedSearchAction,
requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` +
`{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` +
`{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` +
`{"data": [[0.1, 0.23]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` +
`{"data": [[0.1, 0.43]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` +
`], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`),
})
// -2, -1, 1, 3 should be float32
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: AdvancedSearchAction,
requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` +
`{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` +
`{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` +
`{"data": [[-2, -1]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` +
`{"data": [[1, 3]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` +
`], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`),
})
// invalid fp32 vectors for fp16/bf16
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: AdvancedSearchAction,
requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` +
`{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` +
`{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` +
`{"data": [[0.23]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` +
`{"data": [[0.1, 0.43]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` +
`], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`),
errCode: 1801,
errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 1: invalid parameter[expected=Float16Vector][actual=[[0.23]]]",
})
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: AdvancedSearchAction,
requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` +
`{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3},` +
`{"data": ["AQ=="], "annsField": "binaryVector", "metricType": "L2", "limit": 3},` +
`{"data": [[0.23, 4.4]], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` +
`{"data": [[0.1]], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` +
`], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`),
errCode: 1801,
errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 1: invalid parameter[expected=BFloat16Vector][actual=[[0.1]]]",
})
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: AdvancedSearchAction,
requestBody: []byte(`{"collectionName": "hello_milvus", "search": [` +
@ -1948,7 +2146,7 @@ func TestSearchV2(t *testing.T) {
`{"data": ["AQIDBA=="], "annsField": "float16Vector", "metricType": "L2", "limit": 3},` +
`{"data": ["AQIDBA=="], "annsField": "bfloat16Vector", "metricType": "L2", "limit": 3}` +
`], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`),
errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 3: invalid parameter[expected=FloatVector][actual=[0.1, 0.2, 0.3]]",
errMsg: "can only accept json format request, error: dimension: 2, but length of []float: 3: invalid parameter[expected=FloatVector][actual=[[0.1, 0.2, 0.3]]]",
errCode: 1801,
})
queryTestCases = append(queryTestCases, requestBodyTestCase{
@ -2006,22 +2204,5 @@ func TestSearchV2(t *testing.T) {
errMsg: "mock",
errCode: 1100, // ErrParameterInvalid
})
for _, testcase := range queryTestCases {
t.Run(testcase.path, func(t *testing.T) {
bodyReader := bytes.NewReader(testcase.requestBody)
req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader)
w := httptest.NewRecorder()
testEngine.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
returnBody := &ReturnErrMsg{}
err := json.Unmarshal(w.Body.Bytes(), returnBody)
assert.Nil(t, err)
assert.Equal(t, testcase.errCode, returnBody.Code)
if testcase.errCode != 0 {
assert.Equal(t, testcase.errMsg, returnBody.Message)
}
fmt.Println(w.Body.String())
})
}
validateTestCases(t, testEngine, queryTestCases, false)
}

View File

@ -19,9 +19,7 @@ package httpserver
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"math"
"reflect"
"strconv"
"strings"
@ -367,27 +365,37 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error,
}
reallyData[fieldName] = sparseVec
case schemapb.DataType_Float16Vector:
if dataString == "" {
return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap
}
vectorStr := gjson.Get(data.Raw, fieldName).Raw
var vectorArray []byte
err := json.Unmarshal([]byte(vectorStr), &vectorArray)
if err != nil {
return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap
}
reallyData[fieldName] = vectorArray
fallthrough
case schemapb.DataType_BFloat16Vector:
if dataString == "" {
return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap
}
vectorStr := gjson.Get(data.Raw, fieldName).Raw
var vectorArray []byte
err := json.Unmarshal([]byte(vectorStr), &vectorArray)
if err != nil {
return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap
vectorJSON := gjson.Get(data.Raw, fieldName)
// Clients may send float32 vector because they are inconvenient of processing float16 or bfloat16.
// Float32 vector is an array in JSON format, like `[1.0, 2.0, 3.0]`, `[1, 2, 3]`, etc,
// while float16 or bfloat16 vector is a string in JSON format, like `"4z1jPgAAgL8="`, `"gD+AP4A/gD8="`, etc.
if vectorJSON.IsArray() {
// `data` is a float32 vector
// same as `case schemapb.DataType_FloatVector`
var vectorArray []float32
err := json.Unmarshal([]byte(dataString), &vectorArray)
if err != nil {
return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap
}
reallyData[fieldName] = vectorArray
} else if vectorJSON.Type == gjson.String {
// `data` is a float16 or bfloat16 vector
// same as `case schemapb.DataType_BinaryVector`
vectorStr := gjson.Get(data.Raw, fieldName).Raw
var vectorArray []byte
err := json.Unmarshal([]byte(vectorStr), &vectorArray)
if err != nil {
return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap
}
reallyData[fieldName] = vectorArray
} else {
return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, "invalid vector field: "+fieldName), reallyDataArray, validDataMap
}
reallyData[fieldName] = vectorArray
case schemapb.DataType_Bool:
result, err := cast.ToBoolE(dataString)
if err != nil {
@ -633,7 +641,6 @@ func convertFloatVectorToArray(vector [][]float32, dim int64) ([]float32, error)
}
func convertBinaryVectorToArray(vector [][]byte, dim int64, dataType schemapb.DataType) ([]byte, error) {
binaryArray := make([]byte, 0)
var bytesLen int64
switch dataType {
case schemapb.DataType_BinaryVector:
@ -643,6 +650,7 @@ func convertBinaryVectorToArray(vector [][]byte, dim int64, dataType schemapb.Da
case schemapb.DataType_BFloat16Vector:
bytesLen = dim * 2
}
binaryArray := make([]byte, 0, len(vector)*int(bytesLen))
for _, arr := range vector {
if int64(len(arr)) != bytesLen {
return nil, fmt.Errorf("[]byte size %d doesn't equal to vector dimension %d of %s",
@ -836,9 +844,25 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool,
case schemapb.DataType_BinaryVector:
nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte))
case schemapb.DataType_Float16Vector:
nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte))
switch candi.v.Interface().(type) {
case []byte:
nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte))
case []float32:
vec := typeutil.Float32ArrayToFloat16Bytes(candi.v.Interface().([]float32))
nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), vec)
default:
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("invalid type(%v) of field(%v) ", field.DataType, field.Name))
}
case schemapb.DataType_BFloat16Vector:
nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte))
switch candi.v.Interface().(type) {
case []byte:
nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte))
case []float32:
vec := typeutil.Float32ArrayToBFloat16Bytes(candi.v.Interface().([]float32))
nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), vec)
default:
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("invalid type(%v) of field(%v) ", field.DataType, field.Name))
}
case schemapb.DataType_SparseFloatVector:
content := candi.v.Interface().([]byte)
rowSparseDim := typeutil.SparseFloatRowDim(content)
@ -1074,30 +1098,19 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool,
return columns, nil
}
// --------------------- search param --------------------- //
func serialize(fv []float32) []byte {
data := make([]byte, 0, 4*len(fv)) // float32 occupies 4 bytes
buf := make([]byte, 4)
for _, f := range fv {
binary.LittleEndian.PutUint32(buf, math.Float32bits(f))
data = append(data, buf...)
}
return data
}
func serializeFloatVectors(vectors []gjson.Result, dataType schemapb.DataType, dimension, bytesLen int64) ([][]byte, error) {
func serializeFloatVectors(vectorStr string, dataType schemapb.DataType, dimension, bytesLen int64, fpArrayToBytesFunc func([]float32) []byte) ([][]byte, error) {
values := make([][]byte, 0)
for _, vector := range vectors {
var vectorArray []float32
err := json.Unmarshal([]byte(vector.String()), &vectorArray)
if err != nil {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error())
}
var fp32Values [][]float32
err := json.Unmarshal([]byte(vectorStr), &fp32Values)
if err != nil {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, err.Error())
}
for _, vectorArray := range fp32Values {
if int64(len(vectorArray)) != dimension {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(),
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr,
fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray)))
}
vectorBytes := serialize(vectorArray)
vectorBytes := fpArrayToBytesFunc(vectorArray)
values = append(values, vectorBytes)
}
return values, nil
@ -1105,7 +1118,7 @@ func serializeFloatVectors(vectors []gjson.Result, dataType schemapb.DataType, d
func serializeByteVectors(vectorStr string, dataType schemapb.DataType, dimension, bytesLen int64) ([][]byte, error) {
values := make([][]byte, 0)
err := json.Unmarshal([]byte(vectorStr), &values) // todo check len == dimension * 1/2/2
err := json.Unmarshal([]byte(vectorStr), &values)
if err != nil {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, err.Error())
}
@ -1118,6 +1131,24 @@ func serializeByteVectors(vectorStr string, dataType schemapb.DataType, dimensio
return values, nil
}
// serializeFloatOrByteVectors serializes float32/float16/bfloat16 vectors.
// `[[1, 2, 3], [4.0, 5.0, 6.0]] is float32 vector,
// `["4z1jPgAAgL8=", "gD+AP4A/gD8="]` is float16/bfloat16 vector.
func serializeFloatOrByteVectors(jsonResult gjson.Result, dataType schemapb.DataType, dimension int64, fpArrayToBytesFunc func([]float32) []byte) ([][]byte, error) {
firstElement := jsonResult.Get("0")
// Clients may send float32 vector because they are inconvenient of processing float16 or bfloat16.
// Float32 vector is an array in JSON format, like `[1.0, 2.0, 3.0]`, `[1, 2, 3]`, etc,
// while float16 or bfloat16 vector is a string in JSON format, like `"4z1jPgAAgL8="`, `"gD+AP4A/gD8="`, etc.
if firstElement.IsArray() {
return serializeFloatVectors(jsonResult.Raw, dataType, dimension, dimension*2, fpArrayToBytesFunc)
} else if firstElement.Type == gjson.String || !firstElement.Exists() {
// consider corner case: `[]`
return serializeByteVectors(jsonResult.Raw, dataType, dimension, dimension*2)
}
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], jsonResult.Raw, "invalid type")
}
func serializeSparseFloatVectors(vectors []gjson.Result, dataType schemapb.DataType) ([][]byte, error) {
values := make([][]byte, 0)
for _, vector := range vectors {
@ -1138,16 +1169,16 @@ func convertQueries2Placeholder(body string, dataType schemapb.DataType, dimensi
switch dataType {
case schemapb.DataType_FloatVector:
valueType = commonpb.PlaceholderType_FloatVector
values, err = serializeFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType, dimension, dimension*4)
values, err = serializeFloatVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*4, typeutil.Float32ArrayToBytes)
case schemapb.DataType_BinaryVector:
valueType = commonpb.PlaceholderType_BinaryVector
values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension/8)
case schemapb.DataType_Float16Vector:
valueType = commonpb.PlaceholderType_Float16Vector
values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*2)
values, err = serializeFloatOrByteVectors(gjson.Get(body, HTTPRequestData), dataType, dimension, typeutil.Float32ArrayToFloat16Bytes)
case schemapb.DataType_BFloat16Vector:
valueType = commonpb.PlaceholderType_BFloat16Vector
values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*2)
values, err = serializeFloatOrByteVectors(gjson.Get(body, HTTPRequestData), dataType, dimension, typeutil.Float32ArrayToBFloat16Bytes)
case schemapb.DataType_SparseFloatVector:
valueType = commonpb.PlaceholderType_SparseFloatVector
values, err = serializeSparseFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType)
@ -1180,7 +1211,7 @@ func vectors2PlaceholderGroupBytes(vectors [][]float32) []byte {
ph.Type = placeHolderType
for _, vector := range vectors {
ph.Values = append(ph.Values, serialize(vector))
ph.Values = append(ph.Values, typeutil.Float32ArrayToBytes(vector))
}
}
phg := &commonpb.PlaceholderGroup{

View File

@ -0,0 +1,219 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package httpserver
import (
"fmt"
"math/rand"
"testing"
"github.com/stretchr/testify/assert"
"github.com/tidwall/gjson"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/json"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
/*
* Benchmarkings for different serialization implementations
* See results: https://github.com/milvus-io/milvus/pull/37556#issuecomment-2491668743
*/
// serializeFloatVectorsBaseline uses []gjson.Result as input and calls json.Unmarshal in multiple times,
// which downgrades the performance
func serializeFloatVectorsBaseline(vectors []gjson.Result, dataType schemapb.DataType, dimension, bytesLen int64) ([][]byte, error) {
values := make([][]byte, 0)
for _, vector := range vectors {
var vectorArray []float32
err := json.Unmarshal([]byte(vector.String()), &vectorArray)
if err != nil {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error())
}
if int64(len(vectorArray)) != dimension {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(),
fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray)))
}
vectorBytes := typeutil.Float32ArrayToBytes(vectorArray)
values = append(values, vectorBytes)
}
return values, nil
}
// serializeFloatOrByteVectorsBaseline calls json.Unmarshal in multiple times, which downgrades the performance
func serializeFloatOrByteVectorsBaseline(jsonResult gjson.Result, dataType schemapb.DataType, dimension int64, fpArrayToBytesFunc func([]float32) []byte) ([][]byte, error) {
values := make([][]byte, 0)
for _, vector := range jsonResult.Array() {
if vector.IsArray() {
var vectorArray []float32
err := json.Unmarshal([]byte(vector.Raw), &vectorArray)
if err != nil {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error())
}
if int64(len(vectorArray)) != dimension {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(),
fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray)))
}
vectorBytes := fpArrayToBytesFunc(vectorArray)
values = append(values, vectorBytes)
} else if vector.Type == gjson.String {
var vectorArray []byte
err := json.Unmarshal([]byte(vector.Raw), &vectorArray)
if err != nil {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error())
}
if int64(len(vectorArray)) != dimension*2 {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], string(vectorArray),
fmt.Sprintf("dimension: %d, but length of []byte: %d", dimension, len(vectorArray)))
}
values = append(values, vectorArray)
} else {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), "invalid type")
}
}
return values, nil
}
// serializeFloatOrByteVectorsUnmarshalTwice calls Unmarshal twice, which downgrades the performance
// See: https://github.com/milvus-io/milvus/pull/37556#discussion_r1849672721
func serializeFloatOrByteVectorsUnmarshalTwice(vectorStr string, dataType schemapb.DataType, dimension int64, serializeFunc func([]float32) []byte) ([][]byte, error) {
// try to unmarshal as [][]float32 first to make sure `[[3, 3]]` is [][]float32 instead of [][]byte
fp32Values := make([][]float32, 0)
err := json.Unmarshal([]byte(vectorStr), &fp32Values)
if err == nil {
values := make([][]byte, 0)
for _, vectorArray := range fp32Values {
if int64(len(vectorArray)) != dimension {
return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], fmt.Sprintf("%v", vectorArray),
fmt.Sprintf("dimension: %d, but length of []float: %d", dimension, len(vectorArray)))
}
vectorBytes := serializeFunc(vectorArray)
values = append(values, vectorBytes)
}
return values, nil
}
return serializeByteVectors(vectorStr, dataType, dimension, dimension*2)
}
func generateVectorsStr() string {
vectors := make([][]float32, 0, 10_000)
for i := 0; i < 10_000; i++ {
vector := make([]float32, 0, 128)
for j := 0; j < 128; j++ {
vector = append(vector, rand.Float32())
}
vectors = append(vectors, vector)
}
vectorJSON, _ := json.Marshal(vectors)
return string(vectorJSON)
}
func generateVectorsJSON() gjson.Result {
vectorJSON := generateVectorsStr()
return gjson.Parse(vectorJSON)
}
func generateByteVectorsStr() string {
vectors := make([][]byte, 0, 10_000)
for i := 0; i < 10_000; i++ {
vector := make([]byte, 0, 128*4)
for j := 0; j < 128*4; j++ {
vector = append(vector, byte(rand.Intn(256)))
}
vectors = append(vectors, vector)
}
vectorJSON, _ := json.Marshal(vectors)
return string(vectorJSON)
}
func generateByteVectorsJSON() gjson.Result {
vectorJSON := generateByteVectorsStr()
return gjson.Parse(vectorJSON)
}
func BenchmarkSerialize_FloatVectors_Baseline(b *testing.B) {
vectorsJSON := generateVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeFloatVectorsBaseline(vectorsJSON.Array(), schemapb.DataType_FloatVector, 128, -1)
assert.Nil(b, err)
}
}
func BenchmarkSerialize_FloatVectors(b *testing.B) {
vectorsJSON := generateVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeFloatVectors(vectorsJSON.Raw, schemapb.DataType_FloatVector, 128, -1, typeutil.Float32ArrayToBytes)
assert.Nil(b, err)
}
}
func BenchmarkSerialize_FloatVectors_Float16(b *testing.B) {
vectorsJSON := generateVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeFloatVectors(vectorsJSON.Raw, schemapb.DataType_Float16Vector, 128, -1, typeutil.Float32ArrayToFloat16Bytes)
assert.Nil(b, err)
}
}
func BenchmarkSerialize_FloatOrByteVectors_Fp32(b *testing.B) {
vectorsJSON := generateVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeFloatOrByteVectors(vectorsJSON, schemapb.DataType_Float16Vector, 128, typeutil.Float32ArrayToFloat16Bytes)
assert.Nil(b, err)
}
}
func BenchmarkSerialize_FloatOrByteVectors_Byte(b *testing.B) {
vectorsJSON := generateByteVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeFloatOrByteVectors(vectorsJSON, schemapb.DataType_Float16Vector, 256, typeutil.Float32ArrayToFloat16Bytes)
assert.Nil(b, err)
}
}
func BenchmarkSerialize_FloatOrByteVectors_Fp32_UnmashalTwice(b *testing.B) {
vectorsJSON := generateVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeFloatOrByteVectorsUnmarshalTwice(vectorsJSON.Raw, schemapb.DataType_Float16Vector, 128, typeutil.Float32ArrayToFloat16Bytes)
assert.Nil(b, err)
}
}
func BenchmarkSerialize_FloatOrByteVectors_Byte_UnmashalTwice(b *testing.B) {
vectorsJSON := generateByteVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeFloatOrByteVectorsUnmarshalTwice(vectorsJSON.Raw, schemapb.DataType_Float16Vector, 256, typeutil.Float32ArrayToFloat16Bytes)
assert.Nil(b, err)
}
}
func BenchmarkSerialize_ByteVectors(b *testing.B) {
vectorsJSON := generateByteVectorsJSON()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := serializeByteVectors(vectorsJSON.Raw, schemapb.DataType_BinaryVector, -1, 512)
assert.Nil(b, err)
}
}

View File

@ -907,11 +907,14 @@ func TestInsertWithDefaultValueField(t *testing.T) {
func TestSerialize(t *testing.T) {
parameters := []float32{0.11111, 0.22222}
assert.Equal(t, "\xa4\x8d\xe3=\xa4\x8dc>", string(serialize(parameters)))
assert.Equal(t, "\n\x10\n\x02$0\x10e\x1a\b\xa4\x8d\xe3=\xa4\x8dc>", string(vectors2PlaceholderGroupBytes([][]float32{parameters}))) // todo
// test serialize fp32 to {fp32, fp16, bf16}
requestBody := "{\"data\": [[0.11111, 0.22222]]}"
vectors := gjson.Get(requestBody, HTTPRequestData)
values, err := serializeFloatVectors(vectors.Array(), schemapb.DataType_FloatVector, 2, -1)
// fp32 -> fp32
values, err := serializeFloatVectors(vectors.Raw, schemapb.DataType_FloatVector, 2, -1, typeutil.Float32ArrayToBytes)
assert.Nil(t, err)
placeholderValue := &commonpb.PlaceholderValue{
Tag: "$0",
@ -925,6 +928,32 @@ func TestSerialize(t *testing.T) {
})
assert.Nil(t, err)
assert.Equal(t, "\n\x10\n\x02$0\x10e\x1a\b\xa4\x8d\xe3=\xa4\x8dc>", string(bytes)) // todo
// fp32 -> fp16/bf16
for _, testcase := range []struct {
dataType schemapb.DataType
serializeFunc func([]float32) []byte
byteStr string
}{
{schemapb.DataType_Float16Vector, typeutil.Float32ArrayToFloat16Bytes, "\n\f\n\x02$0\x10e\x1a\x04\x1c/\x1c3"},
{schemapb.DataType_BFloat16Vector, typeutil.Float32ArrayToBFloat16Bytes, "\n\f\n\x02$0\x10e\x1a\x04\xe3=c>"},
} {
values, err = serializeFloatOrByteVectors(vectors, testcase.dataType, 2, testcase.serializeFunc)
assert.Nil(t, err)
placeholderValue := &commonpb.PlaceholderValue{
Tag: "$0",
Type: commonpb.PlaceholderType_FloatVector,
Values: values,
}
bytes, err := proto.Marshal(&commonpb.PlaceholderGroup{
Placeholders: []*commonpb.PlaceholderValue{
placeholderValue,
},
})
assert.Nil(t, err)
assert.Equal(t, testcase.byteStr, string(bytes))
}
for _, dataType := range []schemapb.DataType{schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector} {
request := map[string]interface{}{
HTTPRequestData: []interface{}{
@ -947,6 +976,209 @@ func TestSerialize(t *testing.T) {
}
}
func TestConvertQueries2Placeholder(t *testing.T) {
fp16Req := map[string]interface{}{
HTTPRequestData: []interface{}{
typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}),
typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1}),
},
}
fp16ReqBody, _ := json.Marshal(fp16Req)
const Float16VecJSON = `{"data":["HC8cMwAAALw=","ADwAPAA8ADw="]}`
assert.Equal(t, Float16VecJSON, string(fp16ReqBody))
bf16Req := map[string]interface{}{
HTTPRequestData: []interface{}{
typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1}),
typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1}),
},
}
bf16ReqBody, _ := json.Marshal(bf16Req)
const BFloat16VecJSON = `{"data":["4z1jPgAAgL8=","gD+AP4A/gD8="]}`
assert.Equal(t, BFloat16VecJSON, string(bf16ReqBody))
type testCase struct {
requestBody string
dataType schemapb.DataType
dim int64
placehoderValue func() [][]byte
}
testCases := make([]testCase, 0)
for _, dataType := range []schemapb.DataType{schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_FloatVector} {
// corner case: empty data
testCases = append(testCases, []testCase{
{
"{\"data\": []}",
dataType,
0,
func() [][]byte {
return [][]byte{}
},
}, {
"{\"data\": []}",
dataType,
100,
func() [][]byte {
return [][]byte{}
},
}, {
"{\"data\": [[], []]}",
dataType,
0,
func() [][]byte {
return [][]byte{{}, {}}
},
},
}...)
}
for _, dataType := range []schemapb.DataType{schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector} {
// corner case: empty float16/bfloat16 vector
testCases = append(testCases, []testCase{
{
`"{"data": ["", ""]}"`,
dataType,
0,
func() [][]byte {
return [][]byte{nil, nil}
},
}, {
`"{"data": [""]}"`,
dataType,
0,
func() [][]byte {
return [][]byte{nil}
},
},
}...)
}
testCases = append(testCases, []testCase{
{
"{\"data\": [[0.11111, 0.22222]]}",
schemapb.DataType_FloatVector,
2,
func() [][]byte {
bv := typeutil.Float32ArrayToBytes([]float32{0.11111, 0.22222})
return [][]byte{bv}
},
}, {
"{\"data\": [[0.11111, 0.22222, 0, -1]]}",
schemapb.DataType_Float16Vector,
4,
func() [][]byte {
bv := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
return [][]byte{bv}
},
}, {
"{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}",
schemapb.DataType_Float16Vector,
4,
func() [][]byte {
bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
}, {
"{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}",
schemapb.DataType_BFloat16Vector,
4,
func() [][]byte {
bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
}, {
Float16VecJSON,
schemapb.DataType_Float16Vector,
4,
func() [][]byte {
bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
}, {
BFloat16VecJSON,
schemapb.DataType_BFloat16Vector,
4,
func() [][]byte {
bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
},
}...)
for _, testcase := range testCases {
phv, err := convertQueries2Placeholder(testcase.requestBody, testcase.dataType, testcase.dim)
assert.Nil(t, err)
assert.Equal(t, testcase.placehoderValue(), phv.GetValues())
}
for _, testcase := range []testCase{
// mismatched Datatype
{
"{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}",
schemapb.DataType_Float16Vector,
4,
func() [][]byte {
bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
}, {
"{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}",
schemapb.DataType_BFloat16Vector,
4,
func() [][]byte {
bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
},
} {
phv, err := convertQueries2Placeholder(testcase.requestBody, testcase.dataType, testcase.dim)
assert.Nil(t, err)
assert.NotEqual(t, testcase.placehoderValue(), phv.GetValues())
}
for _, testcase := range []testCase{
// mismatched dimension
{
"{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}",
schemapb.DataType_Float16Vector,
2,
func() [][]byte {
bv1 := typeutil.Float32ArrayToBFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToBFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
}, {
"{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1, 1]]}",
schemapb.DataType_BFloat16Vector,
8,
func() [][]byte {
bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
}, {
"{\"data\": [[0.11111, 0.22222, 0, -1], [1, 1, 1]]}",
schemapb.DataType_BFloat16Vector,
4,
func() [][]byte {
bv1 := typeutil.Float32ArrayToFloat16Bytes([]float32{0.11111, 0.22222, 0, -1})
bv2 := typeutil.Float32ArrayToFloat16Bytes([]float32{1, 1, 1, 1})
return [][]byte{bv1, bv2}
},
},
} {
_, err := convertQueries2Placeholder(testcase.requestBody, testcase.dataType, testcase.dim)
assert.NotNil(t, err)
}
}
func compareRow64(m1 map[string]interface{}, m2 map[string]interface{}) bool {
for key, value := range m1 {
if key == FieldBookIntro {
@ -1815,31 +2047,50 @@ func TestVector(t *testing.T) {
float16Vector := "vector-float16"
bfloat16Vector := "vector-bfloat16"
sparseFloatVector := "vector-sparse-float"
row1 := map[string]interface{}{
FieldBookID: int64(1),
floatVector: []float32{0.1, 0.11},
binaryVector: []byte{1},
float16Vector: []byte{1, 1, 11, 11},
bfloat16Vector: []byte{1, 1, 11, 11},
sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11},
testcaseRows := []map[string]interface{}{
{
FieldBookID: int64(1),
floatVector: []float32{0.1, 0.11},
binaryVector: []byte{1},
float16Vector: []byte{1, 1, 11, 11},
bfloat16Vector: []byte{1, 1, 11, 11},
sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11},
},
{
FieldBookID: int64(2),
floatVector: []float32{0.2, 0.22},
binaryVector: []byte{2},
float16Vector: []byte{2, 2, 22, 22},
bfloat16Vector: []byte{2, 2, 22, 22},
sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44},
},
{
FieldBookID: int64(3),
floatVector: []float32{0.3, 0.33},
binaryVector: []byte{3},
float16Vector: []byte{3, 3, 33, 33},
bfloat16Vector: []byte{3, 3, 33, 33},
sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001},
},
{
FieldBookID: int64(4),
floatVector: []float32{0.4, 0.44},
binaryVector: []byte{4},
float16Vector: []float32{0.4, 0.44},
bfloat16Vector: []float32{0.4, 0.44},
sparseFloatVector: map[uint32]float32{25: 0.1, 1: 0.11},
},
{
FieldBookID: int64(5),
floatVector: []float32{-0.4, -0.44},
binaryVector: []byte{5},
float16Vector: []int64{99999999, -99999999},
bfloat16Vector: []int64{99999999, -99999999},
sparseFloatVector: map[uint32]float32{1121: 0.1, 3: 0.11},
},
}
row2 := map[string]interface{}{
FieldBookID: int64(2),
floatVector: []float32{0.2, 0.22},
binaryVector: []byte{2},
float16Vector: []byte{2, 2, 22, 22},
bfloat16Vector: []byte{2, 2, 22, 22},
sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44},
}
row3 := map[string]interface{}{
FieldBookID: int64(3),
floatVector: []float32{0.3, 0.33},
binaryVector: []byte{3},
float16Vector: []byte{3, 3, 33, 33},
bfloat16Vector: []byte{3, 3, 33, 33},
sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001},
}
body, _ := wrapRequestBody([]map[string]interface{}{row1, row2, row3})
body, err := wrapRequestBody(testcaseRows)
assert.Nil(t, err)
primaryField := generatePrimaryField(schemapb.DataType_Int64, false)
floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
floatVectorField.Name = floatVector
@ -1862,10 +2113,25 @@ func TestVector(t *testing.T) {
}
err, rows, validRows := checkAndSetData(string(body), collectionSchema)
assert.Equal(t, nil, err)
for _, row := range rows {
for i, row := range rows {
assert.Equal(t, 2, len(row[floatVector].([]float32)))
assert.Equal(t, 1, len(row[binaryVector].([]byte)))
assert.Equal(t, 4, len(row[float16Vector].([]byte)))
assert.Equal(t, 4, len(row[bfloat16Vector].([]byte)))
if fv, ok := testcaseRows[i][float16Vector].([]float32); ok {
assert.Equal(t, fv, row[float16Vector].([]float32))
} else if iv, ok := testcaseRows[i][float16Vector].([]int64); ok {
assert.Equal(t, len(iv), len(row[float16Vector].([]float32)))
} else {
assert.Equal(t, 4, len(row[float16Vector].([]byte)))
assert.Equal(t, testcaseRows[i][float16Vector].([]byte), row[float16Vector].([]byte))
}
if fv, ok := testcaseRows[i][bfloat16Vector].([]float32); ok {
assert.Equal(t, fv, row[float16Vector].([]float32))
} else if iv, ok := testcaseRows[i][bfloat16Vector].([]int64); ok {
assert.Equal(t, len(iv), len(row[bfloat16Vector].([]float32)))
} else {
assert.Equal(t, 4, len(row[bfloat16Vector].([]byte)))
assert.Equal(t, testcaseRows[i][bfloat16Vector].([]byte), row[bfloat16Vector].([]byte))
}
// all test sparse rows have 2 elements, each should be of 8 bytes
assert.Equal(t, 16, len(row[sparseFloatVector].([]byte)))
}
@ -1876,7 +2142,7 @@ func TestVector(t *testing.T) {
assertError := func(field string, value interface{}) {
row := make(map[string]interface{})
for k, v := range row1 {
for k, v := range testcaseRows[0] {
row[k] = v
}
row[field] = value
@ -1885,8 +2151,6 @@ func TestVector(t *testing.T) {
assert.Error(t, err)
}
assertError(bfloat16Vector, []int64{99999999, -99999999})
assertError(float16Vector, []int64{99999999, -99999999})
assertError(binaryVector, []int64{99999999, -99999999})
assertError(floatVector, []float64{math.MaxFloat64, 0})
assertError(sparseFloatVector, map[uint32]float32{0: -0.1, 1: 0.11, 2: 0.12})

View File

@ -17,9 +17,7 @@
package httpserver
import (
"encoding/binary"
"fmt"
"math"
"github.com/cockroachdb/errors"
"google.golang.org/protobuf/proto"
@ -93,6 +91,27 @@ type FieldData struct {
FieldID int64 `json:"field_id,omitempty"`
}
func (f *FieldData) makePbFloat16OrBfloat16Array(raw json.RawMessage, serializeFunc func([]float32) []byte) ([]byte, int64, error) {
wrappedData := [][]float32{}
err := json.Unmarshal(raw, &wrappedData)
if err != nil {
return nil, 0, newFieldDataError(f.FieldName, err)
}
if len(wrappedData) < 1 {
return nil, 0, errors.New("at least one row for insert")
}
array0 := wrappedData[0]
dim := len(array0)
if dim < 1 {
return nil, 0, errors.New("dim must >= 1")
}
data := make([]byte, 0, len(wrappedData)*dim*2)
for _, fp32Array := range wrappedData {
data = append(data, serializeFunc(fp32Array)...)
}
return data, int64(dim), nil
}
// AsSchemapb converts the FieldData to schemapb.FieldData
func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) {
// is scarlar
@ -229,6 +248,34 @@ func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) {
},
},
}
case schemapb.DataType_Float16Vector:
// only support float32 conversion right now
data, dim, err := f.makePbFloat16OrBfloat16Array(raw, typeutil.Float32ArrayToFloat16Bytes)
if err != nil {
return nil, err
}
ret.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: dim,
Data: &schemapb.VectorField_Float16Vector{
Float16Vector: data,
},
},
}
case schemapb.DataType_BFloat16Vector:
// only support float32 conversion right now
data, dim, err := f.makePbFloat16OrBfloat16Array(raw, typeutil.Float32ArrayToBFloat16Bytes)
if err != nil {
return nil, err
}
ret.Field = &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: dim,
Data: &schemapb.VectorField_Bfloat16Vector{
Bfloat16Vector: data,
},
},
}
case schemapb.DataType_SparseFloatVector:
var wrappedData []map[string]interface{}
err := json.Unmarshal(raw, &wrappedData)
@ -325,7 +372,7 @@ func vector2Bytes(vectors [][]float32) []byte {
Values: make([][]byte, 0, len(vectors)),
}
for _, vector := range vectors {
ph.Values = append(ph.Values, serializeVectors(vector))
ph.Values = append(ph.Values, typeutil.Float32ArrayToBytes(vector))
}
phg := &commonpb.PlaceholderGroup{
Placeholders: []*commonpb.PlaceholderValue{
@ -336,18 +383,6 @@ func vector2Bytes(vectors [][]float32) []byte {
return ret
}
// Serialize serialize vector into byte slice, used in search placeholder
// LittleEndian is used for convention
func serializeVectors(fv []float32) []byte {
data := make([]byte, 0, 4*len(fv)) // float32 occupies 4 bytes
buf := make([]byte, 4)
for _, f := range fv {
binary.LittleEndian.PutUint32(buf, math.Float32bits(f))
data = append(data, buf...)
}
return data
}
// WrappedCalcDistanceRequest is the RESTful request body for calc distance
type WrappedCalcDistanceRequest struct {
Base *commonpb.MsgBase `protobuf:"bytes,1,opt,name=base,proto3" json:"base,omitempty"`

View File

@ -180,61 +180,76 @@ func TestFieldData_AsSchemapb(t *testing.T) {
})
// vectors
t.Run("floatvector_ok", func(t *testing.T) {
fieldData := FieldData{
Type: schemapb.DataType_FloatVector,
Field: []byte(`[
[1.1, 2.2, 3.1],
[1.1, 2.2, 3.1],
[1.1, 2.2, 3.1]
]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.NoError(t, err)
})
t.Run("floatvector_empty_error", func(t *testing.T) {
fieldData := FieldData{
Type: schemapb.DataType_FloatVector,
Field: []byte(""),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
t.Run("floatvector_dim=0_error", func(t *testing.T) {
fieldData := FieldData{
Type: schemapb.DataType_FloatVector,
Field: []byte(`[]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
t.Run("floatvector_vectorTypeError_error", func(t *testing.T) {
fieldData := FieldData{
Type: schemapb.DataType_FloatVector,
Field: []byte(`["1"]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
t.Run("floatvector_error", func(t *testing.T) {
fieldData := FieldData{
Type: schemapb.DataType_FloatVector,
Field: []byte(`["a", "b", "c"]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
testcases := []struct {
name string
dataType schemapb.DataType
}{
{
"float", schemapb.DataType_FloatVector,
},
{
"float16", schemapb.DataType_Float16Vector,
},
{
"bfloat16", schemapb.DataType_BFloat16Vector,
},
}
for _, tc := range testcases {
t.Run(tc.name+"vector_ok", func(t *testing.T) {
fieldData := FieldData{
Type: tc.dataType,
Field: []byte(`[
[1.1, 2.2, 3.1],
[1.1, 2.2, 3.1],
[1.1, 2.2, 3.1]
]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.NoError(t, err)
})
t.Run(tc.name+"vector_empty_error", func(t *testing.T) {
fieldData := FieldData{
Type: tc.dataType,
Field: []byte(""),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
t.Run(tc.name+"vector_dim=0_error", func(t *testing.T) {
fieldData := FieldData{
Type: tc.dataType,
Field: []byte(`[]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
t.Run(tc.name+"vector_vectorTypeError_error", func(t *testing.T) {
fieldData := FieldData{
Type: tc.dataType,
Field: []byte(`["1"]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
t.Run(tc.name+"vector_error", func(t *testing.T) {
fieldData := FieldData{
Type: tc.dataType,
Field: []byte(`["a", "b", "c"]`),
}
raw, _ := json.Marshal(fieldData)
json.Unmarshal(raw, &fieldData)
_, err := fieldData.AsSchemapb()
assert.Error(t, err)
})
}
t.Run("sparsefloatvector_ok_1", func(t *testing.T) {
fieldData := FieldData{

View File

@ -24,15 +24,15 @@ import (
var (
json = sonic.ConfigStd
// Marshal is exported by gin/json package.
// Marshal is exported from bytedance/sonic package.
Marshal = json.Marshal
// Unmarshal is exported by gin/json package.
// Unmarshal is exported from bytedance/sonic package.
Unmarshal = json.Unmarshal
// MarshalIndent is exported by gin/json package.
// MarshalIndent is exported from bytedance/sonic package.
MarshalIndent = json.MarshalIndent
// NewDecoder is exported by gin/json package.
// NewDecoder is exported from bytedance/sonic package.
NewDecoder = json.NewDecoder
// NewEncoder is exported by gin/json package.
// NewEncoder is exported from bytedance/sonic package.
NewEncoder = json.NewEncoder
)

View File

@ -165,3 +165,33 @@ func SparseFloatBytesToMap(b []byte) map[uint32]float32 {
}
return values
}
// Float32ArrayToBytes serialize vector into byte slice, used in search placeholder
// LittleEndian is used for convention
func Float32ArrayToBytes(fv []float32) []byte {
data := make([]byte, 0, 4*len(fv)) // float32 occupies 4 bytes
buf := make([]byte, 4)
for _, f := range fv {
binary.LittleEndian.PutUint32(buf, math.Float32bits(f))
data = append(data, buf...)
}
return data
}
// Float32ArrayToFloat16Bytes converts float32 vector `fv` to float16 vector
func Float32ArrayToFloat16Bytes(fv []float32) []byte {
data := make([]byte, 0, 2*len(fv)) // float16 occupies 2 bytes
for _, f := range fv {
data = append(data, Float32ToFloat16Bytes(f)...)
}
return data
}
// Float32ArrayToBFloat16Bytes converts float32 vector `fv` to bfloat16 vector
func Float32ArrayToBFloat16Bytes(fv []float32) []byte {
data := make([]byte, 0, 2*len(fv)) // bfloat16 occupies 2 bytes
for _, f := range fv {
data = append(data, Float32ToBFloat16Bytes(f)...)
}
return data
}

View File

@ -118,4 +118,22 @@ func TestConversion(t *testing.T) {
assert.Less(t, math.Abs(float64(v2/v-1)), 0.01)
}
})
t.Run("TestFloatArrays", func(t *testing.T) {
parameters := []float32{0.11111, 0.22222}
assert.Equal(t, "\xa4\x8d\xe3=\xa4\x8dc>", string(Float32ArrayToBytes(parameters)))
f16vec := Float32ArrayToFloat16Bytes(parameters)
assert.Equal(t, 4, len(f16vec))
// \x1c/ is 0.1111, \x1c3 is 0.2222
assert.Equal(t, "\x1c/\x1c3", string(f16vec))
assert.Equal(t, "\x1c/", string(Float32ToFloat16Bytes(0.11111)))
assert.Equal(t, "\x1c3", string(Float32ToFloat16Bytes(0.22222)))
bf16vec := Float32ArrayToBFloat16Bytes(parameters)
assert.Equal(t, 4, len(bf16vec))
assert.Equal(t, "\xe3=c>", string(bf16vec))
assert.Equal(t, "\xe3=", string(Float32ToBFloat16Bytes(0.11111)))
assert.Equal(t, "c>", string(Float32ToBFloat16Bytes(0.22222)))
})
}

View File

@ -141,14 +141,17 @@ class TestRestfulSdkCompatibility(TestBase):
FieldSchema(name="json", dtype=DataType.JSON),
FieldSchema(name="int_array", dtype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=1024),
FieldSchema(name="varchar_array", dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_capacity=1024, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128)
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128),
FieldSchema(name="float16_vector", dtype=DataType.FLOAT16_VECTOR, dim=128),
FieldSchema(name="bfloat16_vector", dtype=DataType.BFLOAT16_VECTOR, dim=128),
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=True)
collection = Collection(name=name, schema=default_schema)
# create index by sdk
index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}}
collection.create_index(field_name="float_vector", index_params=index_param)
for field_name in ("float_vector", "float16_vector", "bfloat16_vector"):
collection.create_index(field_name=field_name, index_params=index_param)
collection.load()
# insert data by restful
data = [
@ -159,6 +162,9 @@ class TestRestfulSdkCompatibility(TestBase):
"int_array": [i for i in range(10)],
"varchar_array": [str(i) for i in range(10)],
"float_vector": [random.random() for _ in range(dim)],
# float16 / bfloat16 field supports float32 arguments
"float16_vector": [random.random() for _ in range(dim)],
"bfloat16_vector": [random.random() for _ in range(dim)],
"age": i}
for i in range(nb)
]

View File

@ -161,8 +161,10 @@ class TestInsertVector(TestBase):
@pytest.mark.parametrize("enable_dynamic_schema", [True])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False])
def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, auto_id,
is_partition_key, enable_dynamic_schema):
is_partition_key, enable_dynamic_schema,
pass_fp32_to_fp16_or_bf16):
"""
Insert a vector with a simple payload
"""
@ -210,9 +212,17 @@ class TestInsertVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim),
}
else:
tmp = {
@ -221,8 +231,16 @@ class TestInsertVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
}
if enable_dynamic_schema:
@ -253,8 +271,10 @@ class TestInsertVector(TestBase):
@pytest.mark.parametrize("enable_dynamic_schema", [True])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False])
def test_insert_entities_with_all_vector_datatype_0(self, nb, dim, insert_round, auto_id,
is_partition_key, enable_dynamic_schema):
is_partition_key, enable_dynamic_schema,
pass_fp32_to_fp16_or_bf16):
"""
Insert a vector with a simple payload
"""
@ -307,8 +327,16 @@ class TestInsertVector(TestBase):
"book_describe": f"book_{i}",
"book_vector": gen_vector(datatype="FloatVector", dim=dim),
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
}
else:
tmp = {
@ -318,8 +346,16 @@ class TestInsertVector(TestBase):
"book_describe": f"book_{i}",
"book_vector": gen_vector(datatype="FloatVector", dim=dim),
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
}
if enable_dynamic_schema:
tmp.update({f"dynamic_field_{i}": i})
@ -349,8 +385,10 @@ class TestInsertVector(TestBase):
@pytest.mark.parametrize("enable_dynamic_schema", [True])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False])
def test_insert_entities_with_all_vector_datatype_1(self, nb, dim, insert_round, auto_id,
is_partition_key, enable_dynamic_schema):
is_partition_key, enable_dynamic_schema,
pass_fp32_to_fp16_or_bf16):
"""
Insert a vector with a simple payload
"""
@ -399,8 +437,16 @@ class TestInsertVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
}
else:
tmp = {
@ -409,8 +455,16 @@ class TestInsertVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
}
if enable_dynamic_schema:
tmp.update({f"dynamic_field_{i}": i})
@ -634,7 +688,6 @@ class TestInsertVector(TestBase):
assert len(rsp['data']) == 50
@pytest.mark.L0
class TestInsertVectorNegative(TestBase):
@ -937,8 +990,10 @@ class TestSearchVector(TestBase):
@pytest.mark.parametrize("enable_dynamic_schema", [True])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [16])
@pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False])
def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, auto_id,
is_partition_key, enable_dynamic_schema):
is_partition_key, enable_dynamic_schema,
pass_fp32_to_fp16_or_bf16):
"""
Insert a vector with a simple payload
"""
@ -986,8 +1041,16 @@ class TestSearchVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
}
else:
@ -997,8 +1060,16 @@ class TestSearchVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
}
if enable_dynamic_schema:
@ -1985,7 +2056,6 @@ class TestSearchVector(TestBase):
assert token in d[field]
@pytest.mark.L0
class TestSearchVectorNegative(TestBase):
@ -2210,7 +2280,6 @@ class TestAdvancedSearchVector(TestBase):
assert len(rsp['data']) == 10
@pytest.mark.L0
class TestHybridSearchVector(TestBase):
@ -2318,8 +2387,6 @@ class TestHybridSearchVector(TestBase):
assert len(rsp['data']) == 10
@pytest.mark.L0
class TestQueryVector(TestBase):
@ -2463,8 +2530,10 @@ class TestQueryVector(TestBase):
@pytest.mark.parametrize("enable_dynamic_schema", [True])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("pass_fp32_to_fp16_or_bf16", [True, False])
def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, auto_id,
is_partition_key, enable_dynamic_schema):
is_partition_key, enable_dynamic_schema,
pass_fp32_to_fp16_or_bf16):
"""
Insert a vector with a simple payload
"""
@ -2512,8 +2581,16 @@ class TestQueryVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
}
else:
@ -2523,8 +2600,16 @@ class TestQueryVector(TestBase):
"word_count": i,
"book_describe": f"book_{i}",
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
"float16_vector": gen_vector(datatype="Float16Vector", dim=dim),
"bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim),
"float16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="Float16Vector", dim=dim)
),
"bfloat16_vector": (
gen_vector(datatype="FloatVector", dim=dim)
if pass_fp32_to_fp16_or_bf16
else gen_vector(datatype="BFloat16Vector", dim=dim)
),
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
}
if enable_dynamic_schema:
@ -2821,8 +2906,6 @@ class TestQueryVector(TestBase):
assert token in d[field]
@pytest.mark.L0
class TestQueryVectorNegative(TestBase):

View File

@ -23,93 +23,159 @@ MILVUS_SERVICE_NAME=$(echo "${MILVUS_HELM_RELEASE_NAME}-milvus.${MILVUS_HELM_NAM
MILVUS_SERVICE_ADDRESS="${MILVUS_SERVICE_NAME}:9091"
# Create a collection
curl -X 'POST' \
if curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d @${DATA_PATH}/create-collection.json
-d @${DATA_PATH}/create-collection.json | grep -q "error_code" ; then
exit 1
fi
# Has collection
curl -X 'GET' \
if curl -X 'GET' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/existence" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book"
}'
}' | grep -q "error_code" ; then
exit 1
fi
# Check collection details
curl -X 'GET' \
if curl -X 'GET' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book"
}'
}' | grep -q "error_code" ; then
exit 1
fi
# Load collection
curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/load" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book"
}'
### Data
# Insert Data
curl -X 'POST' \
if curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/entities" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d @${DATA_PATH}/insert-data.json
-d @${DATA_PATH}/insert-data.json | grep -q "error_code" ; then
exit 1
fi
# Build Index
curl -X 'POST' \
# Build Index for book_intro
if curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book",
"field_name": "book_intro",
"index_name": "book_intro_index",
"extra_params":[
{"key": "metric_type", "value": "L2"},
{"key": "index_type", "value": "IVF_FLAT"},
{"key": "params", "value": "{\"nlist\":1024}"}
]
}'
}' | grep -q "error_code" ; then
exit 1
fi
# KNN Search
curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/search" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d @${DATA_PATH}/search.json
# Drop Index
curl -X 'DELETE' \
# Build Index for author_intro
if curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book",
"field_name": "book_intro"
}'
"field_name": "author_intro",
"index_name": "author_intro_index",
"extra_params":[
{"key": "metric_type", "value": "L2"},
{"key": "index_type", "value": "IVF_FLAT"},
{"key": "params", "value": "{\"nlist\":1024}"}
]
}' | grep -q "error_code" ; then
exit 1
fi
# Release collection
curl -X 'DELETE' \
# Build Index for comment
if curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book",
"field_name": "comment",
"index_name": "comment_index",
"extra_params":[
{"key": "metric_type", "value": "L2"},
{"key": "index_type", "value": "IVF_FLAT"},
{"key": "params", "value": "{\"nlist\":1024}"}
]
}' | grep -q "error_code" ; then
exit 1
fi
# Load collection
if curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/load" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book"
}'
}' | grep -q "error_code" ; then
exit 1
fi
# KNN Search
# TODO: search fp16/bf16
for SEARCH_JSON in search-book-intro ; do
if curl -X 'POST' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/search" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d @${DATA_PATH}/${SEARCH_JSON}.json | grep -q "error_code" ; then
exit 1
fi
done
# Release collection
if curl -X 'DELETE' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection/load" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book"
}' | grep -q "error_code" ; then
exit 1
fi
# Drop Index
for FIELD_NAME in book_intro author_intro search_comment ; do
if curl -X 'DELETE' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/index" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d "{
\"collection_name\": \"book\",
\"field_name\": \"${FIELD_NAME}\",
\"index_name\": \"${FIELD_NAME}_index\"
}" | grep -q "error_code" ; then
exit 1
fi
done
# Drop collection
curl -X 'DELETE' \
if curl -X 'DELETE' \
"http://${MILVUS_SERVICE_ADDRESS}/api/v1/collection" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"collection_name": "book"
}'
}' | grep -q "error_code" ; then
exit 1
fi
echo "e2e-restful.sh success!"

View File

@ -1,7 +1,6 @@
{
"collection_name": "book",
"consistency_level": 1,
"db_name": "string",
"schema": {
"autoID": false,
"description": "Test book search",
@ -25,8 +24,34 @@
"value": "2"
}
]
},
{
"name": "author_intro",
"description": "embedded vector of author introduction",
"autoID": false,
"data_type": 102,
"is_primary_key": false,
"type_params": [
{
"key": "dim",
"value": "2"
}
]
},
{
"name": "comment",
"description": "embedded vector of comment",
"autoID": false,
"data_type": 103,
"is_primary_key": false,
"type_params": [
{
"key": "dim",
"value": "2"
}
]
}
],
"name": "book"
}
}
}

View File

@ -5,16 +5,30 @@
"field_name": "book_id",
"type": 5,
"field": [
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
]
},
{
"field_name": "book_intro",
"type": 101,
"field": [
[1,1],[2,1],[3,1],[4,1],[5,1],[6,1],[7,1],[8,1],[9,1],[10,1],[11,1],[12,1],[13,1],[14,1],[15,1],[16,1],[17,1],[18,1],[19,1],[20,1],[21,1],[22,1],[23,1],[24,1],[25,1],[26,1],[27,1],[28,1],[29,1],[30,1],[31,1],[32,1],[33,1],[34,1],[35,1],[36,1],[37,1],[38,1],[39,1],[40,1],[41,1],[42,1],[43,1],[44,1],[45,1],[46,1],[47,1],[48,1],[49,1],[50,1],[51,1],[52,1],[53,1],[54,1],[55,1],[56,1],[57,1],[58,1],[59,1],[60,1],[61,1],[62,1],[63,1],[64,1],[65,1],[66,1],[67,1],[68,1],[69,1],[70,1],[71,1],[72,1],[73,1],[74,1],[75,1],[76,1],[77,1],[78,1],[79,1],[80,1],[81,1],[82,1],[83,1],[84,1],[85,1],[86,1],[87,1],[88,1],[89,1],[90,1],[91,1],[92,1],[93,1],[94,1],[95,1],[96,1],[97,1],[98,1],[99,1],[100,1],
[1,1],[2,1],[3,1],[4,1],[5,1],[6,1],[7,1],[8,1],[9,1],[10,1],[11,1],[12,1],[13,1],[14,1],[15,1],[16,1],[17,1],[18,1],[19,1],[20,1],[21,1],[22,1],[23,1],[24,1],[25,1],[26,1],[27,1],[28,1],[29,1],[30,1],[31,1],[32,1],[33,1],[34,1],[35,1],[36,1],[37,1],[38,1],[39,1],[40,1],[41,1],[42,1],[43,1],[44,1],[45,1],[46,1],[47,1],[48,1],[49,1],[50,1],[51,1],[52,1],[53,1],[54,1],[55,1],[56,1],[57,1],[58,1],[59,1],[60,1],[61,1],[62,1],[63,1],[64,1],[65,1],[66,1],[67,1],[68,1],[69,1],[70,1],[71,1],[72,1],[73,1],[74,1],[75,1],[76,1],[77,1],[78,1],[79,1],[80,1],[81,1],[82,1],[83,1],[84,1],[85,1],[86,1],[87,1],[88,1],[89,1],[90,1],[91,1],[92,1],[93,1],[94,1],[95,1],[96,1],[97,1],[98,1],[99,1],[100,1]
]
},
{
"field_name": "author_intro",
"type": 102,
"field": [
[1.0,1.0],[2,1.0],[3,1.0],[4,1.0],[5,1.0],[6,1.0],[7,1.0],[8,1.0],[9,1.0],[1.00,1.0],[1.010,1.0],[1.02,1.0],[1.03,1.0],[1.04,1.0],[1.05,1.0],[1.06,1.0],[1.07,1.0],[1.08,1.0],[1.09,1.0],[20,1.0],[21.0,1.0],[22,1.0],[23,1.0],[24,1.0],[25,1.0],[26,1.0],[27,1.0],[28,1.0],[29,1.0],[30,1.0],[31.0,1.0],[32,1.0],[33,1.0],[34,1.0],[35,1.0],[36,1.0],[37,1.0],[38,1.0],[39,1.0],[40,1.0],[41.0,1.0],[42,1.0],[43,1.0],[44,1.0],[45,1.0],[46,1.0],[47,1.0],[48,1.0],[49,1.0],[50,1.0],[51.0,1.0],[52,1.0],[53,1.0],[54,1.0],[55,1.0],[56,1.0],[57,1.0],[58,1.0],[59,1.0],[60,1.0],[61.0,1.0],[62,1.0],[63,1.0],[64,1.0],[65,1.0],[66,1.0],[67,1.0],[68,1.0],[69,1.0],[70,1.0],[71.0,1.0],[72,1.0],[73,1.0],[74,1.0],[75,1.0],[76,1.0],[77,1.0],[78,1.0],[79,1.0],[80,1.0],[81.0,1.0],[82,1.0],[83,1.0],[84,1.0],[85,1.0],[86,1.0],[87,1.0],[88,1.0],[89,1.0],[90,1.0],[91.0,1.0],[92,1.0],[93,1.0],[94,1.0],[95,1.0],[96,1.0],[97,1.0],[98,1.0],[99,1.0],[1.000,1.0]
]
},
{
"field_name": "comment",
"type": 103,
"field": [
[1.0,1.0],[2,1.0],[3,1.0],[4,1.0],[5,1.0],[6,1.0],[7,1.0],[8,1.0],[9,1.0],[1.00,1.0],[1.010,1.0],[1.02,1.0],[1.03,1.0],[1.04,1.0],[1.05,1.0],[1.06,1.0],[1.07,1.0],[1.08,1.0],[1.09,1.0],[20,1.0],[21.0,1.0],[22,1.0],[23,1.0],[24,1.0],[25,1.0],[26,1.0],[27,1.0],[28,1.0],[29,1.0],[30,1.0],[31.0,1.0],[32,1.0],[33,1.0],[34,1.0],[35,1.0],[36,1.0],[37,1.0],[38,1.0],[39,1.0],[40,1.0],[41.0,1.0],[42,1.0],[43,1.0],[44,1.0],[45,1.0],[46,1.0],[47,1.0],[48,1.0],[49,1.0],[50,1.0],[51.0,1.0],[52,1.0],[53,1.0],[54,1.0],[55,1.0],[56,1.0],[57,1.0],[58,1.0],[59,1.0],[60,1.0],[61.0,1.0],[62,1.0],[63,1.0],[64,1.0],[65,1.0],[66,1.0],[67,1.0],[68,1.0],[69,1.0],[70,1.0],[71.0,1.0],[72,1.0],[73,1.0],[74,1.0],[75,1.0],[76,1.0],[77,1.0],[78,1.0],[79,1.0],[80,1.0],[81.0,1.0],[82,1.0],[83,1.0],[84,1.0],[85,1.0],[86,1.0],[87,1.0],[88,1.0],[89,1.0],[90,1.0],[91.0,1.0],[92,1.0],[93,1.0],[94,1.0],[95,1.0],[96,1.0],[97,1.0],[98,1.0],[99,1.0],[1.000,1.0]
]
}
],
"num_rows": 1000
}
"num_rows": 100
}

View File

@ -11,4 +11,4 @@
"vectors": [ [10,5] ],
"dsl": "",
"dsl_type": 1
}
}