mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
Ref https://github.com/milvus-io/milvus/issues/42148 --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
307 lines
8.7 KiB
Go
307 lines
8.7 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package importv2
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strconv"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"go.uber.org/zap"
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/util/importutilv2"
|
|
"github.com/milvus-io/milvus/internal/util/testutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/metric"
|
|
"github.com/milvus-io/milvus/tests/integration"
|
|
)
|
|
|
|
func TestGenerateJsonFileWithVectorArray(t *testing.T) {
|
|
const (
|
|
rowCount = 100
|
|
dim = 32
|
|
maxArrayCapacity = 10
|
|
)
|
|
|
|
collectionName := "TestBulkInsert_VectorArray_" + funcutil.GenRandomStr()
|
|
|
|
// Create schema with StructArrayField containing vector array
|
|
schema := integration.ConstructSchema(collectionName, 0, true, &schemapb.FieldSchema{
|
|
FieldID: 100,
|
|
Name: integration.Int64Field,
|
|
IsPrimaryKey: true,
|
|
DataType: schemapb.DataType_Int64,
|
|
AutoID: false,
|
|
}, &schemapb.FieldSchema{
|
|
FieldID: 101,
|
|
Name: integration.VarCharField,
|
|
DataType: schemapb.DataType_VarChar,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxLengthKey,
|
|
Value: "256",
|
|
},
|
|
},
|
|
})
|
|
|
|
// Add StructArrayField with vector array
|
|
structField := &schemapb.StructArrayFieldSchema{
|
|
FieldID: 102,
|
|
Name: "struct_with_vector_array",
|
|
Fields: []*schemapb.FieldSchema{
|
|
{
|
|
FieldID: 103,
|
|
Name: "vector_array_field",
|
|
IsPrimaryKey: false,
|
|
DataType: schemapb.DataType_ArrayOfVector,
|
|
ElementType: schemapb.DataType_FloatVector,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.DimKey,
|
|
Value: strconv.Itoa(dim),
|
|
},
|
|
{
|
|
Key: common.MaxCapacityKey,
|
|
Value: strconv.Itoa(maxArrayCapacity),
|
|
},
|
|
},
|
|
},
|
|
{
|
|
FieldID: 104,
|
|
Name: "scalar_array_field",
|
|
IsPrimaryKey: false,
|
|
DataType: schemapb.DataType_Array,
|
|
ElementType: schemapb.DataType_Int32,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxCapacityKey,
|
|
Value: strconv.Itoa(maxArrayCapacity),
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
schema.StructArrayFields = []*schemapb.StructArrayFieldSchema{structField}
|
|
schema.EnableDynamicField = false
|
|
|
|
insertData, err := testutil.CreateInsertData(schema, rowCount)
|
|
assert.NoError(t, err)
|
|
|
|
rows, err := testutil.CreateInsertDataRowsForJSON(schema, insertData)
|
|
assert.NoError(t, err)
|
|
fmt.Println(rows)
|
|
}
|
|
|
|
func (s *BulkInsertSuite) runForStructArray() {
|
|
const (
|
|
rowCount = 100
|
|
dim = 32
|
|
maxArrayCapacity = 10
|
|
)
|
|
|
|
c := s.Cluster
|
|
ctx, cancel := context.WithTimeout(c.GetContext(), 600*time.Second)
|
|
defer cancel()
|
|
|
|
collectionName := "TestBulkInsert_VectorArray_" + funcutil.GenRandomStr()
|
|
|
|
// Create schema with StructArrayField containing vector array
|
|
schema := integration.ConstructSchema(collectionName, 0, true, &schemapb.FieldSchema{
|
|
FieldID: 100,
|
|
Name: integration.Int64Field,
|
|
IsPrimaryKey: true,
|
|
DataType: schemapb.DataType_Int64,
|
|
AutoID: false,
|
|
}, &schemapb.FieldSchema{
|
|
FieldID: 101,
|
|
Name: integration.VarCharField,
|
|
DataType: schemapb.DataType_VarChar,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxLengthKey,
|
|
Value: "256",
|
|
},
|
|
},
|
|
})
|
|
|
|
// Add StructArrayField with vector array
|
|
structField := &schemapb.StructArrayFieldSchema{
|
|
FieldID: 102,
|
|
Name: "struct_with_vector_array",
|
|
Fields: []*schemapb.FieldSchema{
|
|
{
|
|
FieldID: 103,
|
|
Name: "vector_array_field",
|
|
IsPrimaryKey: false,
|
|
DataType: schemapb.DataType_ArrayOfVector,
|
|
ElementType: schemapb.DataType_FloatVector,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.DimKey,
|
|
Value: strconv.Itoa(dim),
|
|
},
|
|
{
|
|
Key: common.MaxCapacityKey,
|
|
Value: strconv.Itoa(maxArrayCapacity),
|
|
},
|
|
},
|
|
},
|
|
{
|
|
FieldID: 104,
|
|
Name: "scalar_array_field",
|
|
IsPrimaryKey: false,
|
|
DataType: schemapb.DataType_Array,
|
|
ElementType: schemapb.DataType_Int32,
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
{
|
|
Key: common.MaxCapacityKey,
|
|
Value: strconv.Itoa(maxArrayCapacity),
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
schema.StructArrayFields = []*schemapb.StructArrayFieldSchema{structField}
|
|
schema.EnableDynamicField = false
|
|
|
|
marshaledSchema, err := proto.Marshal(schema)
|
|
s.NoError(err)
|
|
|
|
createCollectionStatus, err := c.MilvusClient.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{
|
|
DbName: "",
|
|
CollectionName: collectionName,
|
|
Schema: marshaledSchema,
|
|
ShardsNum: common.DefaultShardsNum,
|
|
})
|
|
s.NoError(err)
|
|
s.Equal(int32(0), createCollectionStatus.GetCode())
|
|
|
|
var files []*internalpb.ImportFile
|
|
|
|
options := []*commonpb.KeyValuePair{}
|
|
|
|
switch s.fileType {
|
|
case importutilv2.JSON:
|
|
rowBasedFile := GenerateJSONFile(s.T(), c, schema, rowCount)
|
|
files = []*internalpb.ImportFile{
|
|
{
|
|
Paths: []string{
|
|
rowBasedFile,
|
|
},
|
|
},
|
|
}
|
|
case importutilv2.Parquet:
|
|
filePath, err := GenerateParquetFile(s.Cluster, schema, rowCount)
|
|
s.NoError(err)
|
|
files = []*internalpb.ImportFile{
|
|
{
|
|
Paths: []string{
|
|
filePath,
|
|
},
|
|
},
|
|
}
|
|
case importutilv2.CSV:
|
|
filePath, sep := GenerateCSVFile(s.T(), s.Cluster, schema, rowCount)
|
|
options = []*commonpb.KeyValuePair{{Key: "sep", Value: string(sep)}}
|
|
s.NoError(err)
|
|
files = []*internalpb.ImportFile{
|
|
{
|
|
Paths: []string{
|
|
filePath,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// Import data
|
|
importResp, err := c.ProxyClient.ImportV2(ctx, &internalpb.ImportRequest{
|
|
CollectionName: collectionName,
|
|
Files: files,
|
|
Options: options,
|
|
})
|
|
s.NoError(err)
|
|
s.NotNil(importResp)
|
|
s.Equal(int32(0), importResp.GetStatus().GetCode())
|
|
|
|
log.Info("Import response", zap.Any("resp", importResp))
|
|
jobID := importResp.GetJobID()
|
|
|
|
// Wait for import to complete
|
|
err = WaitForImportDone(ctx, s.Cluster, jobID)
|
|
s.NoError(err)
|
|
|
|
// Create index for vector array field
|
|
createIndexStatus, err := c.MilvusClient.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
|
|
CollectionName: collectionName,
|
|
FieldName: "vector_array_field",
|
|
IndexName: "_default_idx",
|
|
ExtraParams: integration.ConstructIndexParam(dim, s.indexType, s.metricType),
|
|
})
|
|
if err == nil {
|
|
s.Equal(int32(0), createIndexStatus.GetCode(), createIndexStatus.GetReason())
|
|
}
|
|
|
|
// Load collection
|
|
loadStatus, err := c.MilvusClient.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
|
|
CollectionName: collectionName,
|
|
})
|
|
s.NoError(err)
|
|
s.Equal(int32(0), loadStatus.GetCode(), loadStatus.GetReason())
|
|
s.WaitForLoad(ctx, collectionName)
|
|
|
|
// search
|
|
nq := 10
|
|
topk := 10
|
|
|
|
outputFields := []string{"vector_array_field"}
|
|
params := integration.GetSearchParams(s.indexType, s.metricType)
|
|
searchReq := integration.ConstructEmbeddingListSearchRequest("", collectionName, "",
|
|
"vector_array_field", s.vecType, outputFields, s.metricType, params, nq, dim, topk, -1)
|
|
|
|
searchResp, err := s.Cluster.MilvusClient.Search(ctx, searchReq)
|
|
s.Require().NoError(err)
|
|
s.Require().Equal(commonpb.ErrorCode_Success, searchResp.GetStatus().GetErrorCode(), searchResp.GetStatus().GetReason())
|
|
|
|
result := searchResp.GetResults()
|
|
s.Require().Len(result.GetIds().GetIntId().GetData(), nq*topk)
|
|
s.Require().Len(result.GetScores(), nq*topk)
|
|
s.Require().GreaterOrEqual(len(result.GetFieldsData()), 1)
|
|
s.Require().EqualValues(nq, result.GetNumQueries())
|
|
s.Require().EqualValues(topk, result.GetTopK())
|
|
}
|
|
|
|
func (s *BulkInsertSuite) TestImportWithVectorArray() {
|
|
fileTypeArr := []importutilv2.FileType{importutilv2.CSV, importutilv2.Parquet, importutilv2.JSON}
|
|
for _, fileType := range fileTypeArr {
|
|
s.fileType = fileType
|
|
s.vecType = schemapb.DataType_FloatVector
|
|
s.indexType = integration.IndexEmbListHNSW
|
|
s.metricType = metric.MaxSim
|
|
s.runForStructArray()
|
|
}
|
|
}
|