mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
Report bulk load state (#16555)
Signed-off-by: groot <yihua.mo@zilliz.com>
This commit is contained in:
parent
91e84ffedf
commit
a6a3b69d91
@ -32,12 +32,13 @@ services:
|
|||||||
image: minio/minio:RELEASE.2022-03-17T06-34-49Z
|
image: minio/minio:RELEASE.2022-03-17T06-34-49Z
|
||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
environment:
|
environment:
|
||||||
MINIO_ACCESS_KEY: minioadmin
|
MINIO_ACCESS_KEY: minioadmin
|
||||||
MINIO_SECRET_KEY: minioadmin
|
MINIO_SECRET_KEY: minioadmin
|
||||||
volumes:
|
volumes:
|
||||||
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
|
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
|
||||||
command: minio server /minio_data
|
command: minio server /minio_data --console-address ":9001"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
|||||||
@ -36,12 +36,13 @@ services:
|
|||||||
image: minio/minio:RELEASE.2022-03-17T06-34-49Z
|
image: minio/minio:RELEASE.2022-03-17T06-34-49Z
|
||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
environment:
|
environment:
|
||||||
MINIO_ACCESS_KEY: minioadmin
|
MINIO_ACCESS_KEY: minioadmin
|
||||||
MINIO_SECRET_KEY: minioadmin
|
MINIO_SECRET_KEY: minioadmin
|
||||||
volumes:
|
volumes:
|
||||||
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
|
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
|
||||||
command: minio server /minio_data
|
command: minio server /minio_data --console-address ":9001"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
|||||||
@ -775,19 +775,39 @@ func (node *DataNode) Import(ctx context.Context, req *datapb.ImportTaskRequest)
|
|||||||
zap.Any("channel names", req.GetImportTask().GetChannelNames()),
|
zap.Any("channel names", req.GetImportTask().GetChannelNames()),
|
||||||
zap.Any("working dataNodes", req.WorkingNodes))
|
zap.Any("working dataNodes", req.WorkingNodes))
|
||||||
|
|
||||||
|
importResult := &rootcoordpb.ImportResult{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: commonpb.ErrorCode_Success,
|
||||||
|
},
|
||||||
|
TaskId: req.GetImportTask().TaskId,
|
||||||
|
DatanodeId: node.NodeID,
|
||||||
|
State: commonpb.ImportState_ImportStarted,
|
||||||
|
Segments: make([]int64, 0),
|
||||||
|
AutoIds: make([]int64, 0),
|
||||||
|
RowCount: 0,
|
||||||
|
}
|
||||||
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||||
|
_, err := node.rootCoord.ReportImport(ctx, res)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if !node.isHealthy() {
|
if !node.isHealthy() {
|
||||||
log.Warn("DataNode.Import failed",
|
log.Warn("DataNode import failed",
|
||||||
zap.Int64("collection ID", req.GetImportTask().GetCollectionId()),
|
zap.Int64("collection ID", req.GetImportTask().GetCollectionId()),
|
||||||
zap.Int64("partition ID", req.GetImportTask().GetPartitionId()),
|
zap.Int64("partition ID", req.GetImportTask().GetPartitionId()),
|
||||||
zap.Int64("taskID", req.GetImportTask().GetTaskId()),
|
zap.Int64("taskID", req.GetImportTask().GetTaskId()),
|
||||||
zap.Error(errDataNodeIsUnhealthy(Params.DataNodeCfg.NodeID)))
|
zap.Error(errDataNodeIsUnhealthy(Params.DataNodeCfg.NodeID)))
|
||||||
|
|
||||||
|
msg := msgDataNodeIsUnhealthy(Params.DataNodeCfg.NodeID)
|
||||||
|
importResult.State = commonpb.ImportState_ImportFailed
|
||||||
|
importResult.Infos = append(importResult.Infos, &commonpb.KeyValuePair{Key: "failed_reason", Value: msg})
|
||||||
|
reportFunc(importResult)
|
||||||
return &commonpb.Status{
|
return &commonpb.Status{
|
||||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||||
Reason: msgDataNodeIsUnhealthy(Params.DataNodeCfg.NodeID),
|
Reason: msg,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
rep, err := node.rootCoord.AllocTimestamp(node.ctx, &rootcoordpb.AllocTimestampRequest{
|
rep, err := node.rootCoord.AllocTimestamp(ctx, &rootcoordpb.AllocTimestampRequest{
|
||||||
Base: &commonpb.MsgBase{
|
Base: &commonpb.MsgBase{
|
||||||
MsgType: commonpb.MsgType_RequestTSO,
|
MsgType: commonpb.MsgType_RequestTSO,
|
||||||
MsgID: 0,
|
MsgID: 0,
|
||||||
@ -796,11 +816,17 @@ func (node *DataNode) Import(ctx context.Context, req *datapb.ImportTaskRequest)
|
|||||||
},
|
},
|
||||||
Count: 1,
|
Count: 1,
|
||||||
})
|
})
|
||||||
|
|
||||||
if rep.Status.ErrorCode != commonpb.ErrorCode_Success || err != nil {
|
if rep.Status.ErrorCode != commonpb.ErrorCode_Success || err != nil {
|
||||||
|
msg := "DataNode alloc ts failed"
|
||||||
|
log.Warn(msg)
|
||||||
|
importResult.State = commonpb.ImportState_ImportFailed
|
||||||
|
importResult.Infos = append(importResult.Infos, &commonpb.KeyValuePair{Key: "failed_reason", Value: msg})
|
||||||
|
reportFunc(importResult)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &commonpb.Status{
|
return &commonpb.Status{
|
||||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||||
Reason: "DataNode alloc ts failed",
|
Reason: msg,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -810,35 +836,27 @@ func (node *DataNode) Import(ctx context.Context, req *datapb.ImportTaskRequest)
|
|||||||
metaService := newMetaService(node.rootCoord, req.GetImportTask().GetCollectionId())
|
metaService := newMetaService(node.rootCoord, req.GetImportTask().GetCollectionId())
|
||||||
schema, err := metaService.getCollectionSchema(ctx, req.GetImportTask().GetCollectionId(), 0)
|
schema, err := metaService.getCollectionSchema(ctx, req.GetImportTask().GetCollectionId(), 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &commonpb.Status{
|
importResult.State = commonpb.ImportState_ImportFailed
|
||||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
importResult.Infos = append(importResult.Infos, &commonpb.KeyValuePair{Key: "failed_reason", Value: err.Error()})
|
||||||
Reason: err.Error(),
|
reportFunc(importResult)
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
idAllocator, err := allocator2.NewIDAllocator(node.ctx, node.rootCoord, Params.DataNodeCfg.NodeID)
|
|
||||||
importResult := &rootcoordpb.ImportResult{
|
|
||||||
Status: &commonpb.Status{
|
|
||||||
ErrorCode: commonpb.ErrorCode_Success,
|
|
||||||
},
|
|
||||||
TaskId: req.GetImportTask().TaskId,
|
|
||||||
DatanodeId: node.NodeID,
|
|
||||||
State: commonpb.ImportState_ImportPersisted,
|
|
||||||
Segments: make([]int64, 0),
|
|
||||||
RowCount: 0,
|
|
||||||
}
|
|
||||||
importWrapper := importutil.NewImportWrapper(ctx, schema, 2, Params.DataNodeCfg.FlushInsertBufferSize, idAllocator, node.chunkManager,
|
|
||||||
importFlushReqFunc(node, req, importResult, schema, ts))
|
|
||||||
err = importWrapper.Import(req.GetImportTask().GetFiles(), req.GetImportTask().GetRowBased(), false)
|
|
||||||
if err != nil {
|
|
||||||
return &commonpb.Status{
|
return &commonpb.Status{
|
||||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||||
Reason: err.Error(),
|
Reason: err.Error(),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// report root coord that the import task has been finished
|
// temp id allocator service
|
||||||
_, err = node.rootCoord.ReportImport(ctx, importResult)
|
idAllocator, err := allocator2.NewIDAllocator(node.ctx, node.rootCoord, Params.DataNodeCfg.NodeID)
|
||||||
|
_ = idAllocator.Start()
|
||||||
|
defer idAllocator.Close()
|
||||||
|
|
||||||
|
importWrapper := importutil.NewImportWrapper(ctx, schema, 2, Params.DataNodeCfg.FlushInsertBufferSize, idAllocator, node.chunkManager,
|
||||||
|
importFlushReqFunc(node, req, importResult, schema, ts), importResult, reportFunc)
|
||||||
|
err = importWrapper.Import(req.GetImportTask().GetFiles(), req.GetImportTask().GetRowBased(), false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
importResult.State = commonpb.ImportState_ImportFailed
|
||||||
|
importResult.Infos = append(importResult.Infos, &commonpb.KeyValuePair{Key: "failed_reason", Value: err.Error()})
|
||||||
|
reportFunc(importResult)
|
||||||
return &commonpb.Status{
|
return &commonpb.Status{
|
||||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||||
Reason: err.Error(),
|
Reason: err.Error(),
|
||||||
|
|||||||
@ -400,6 +400,15 @@ func TestDataNode(t *testing.T) {
|
|||||||
stat, err := node.Import(context.WithValue(ctx, ctxKey{}, ""), req)
|
stat, err := node.Import(context.WithValue(ctx, ctxKey{}, ""), req)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, commonpb.ErrorCode_UnexpectedError, stat.ErrorCode)
|
assert.Equal(t, commonpb.ErrorCode_UnexpectedError, stat.ErrorCode)
|
||||||
|
|
||||||
|
stat, err = node.Import(context.WithValue(ctx, ctxKey{}, returnError), req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, commonpb.ErrorCode_UnexpectedError, stat.GetErrorCode())
|
||||||
|
|
||||||
|
node.State.Store(internalpb.StateCode_Abnormal)
|
||||||
|
stat, err = node.Import(context.WithValue(ctx, ctxKey{}, ""), req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, commonpb.ErrorCode_UnexpectedError, stat.GetErrorCode())
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("Test BackGroundGC", func(t *testing.T) {
|
t.Run("Test BackGroundGC", func(t *testing.T) {
|
||||||
|
|||||||
@ -890,6 +890,13 @@ func (m *RootCoordFactory) AllocTimestamp(ctx context.Context, in *rootcoordpb.A
|
|||||||
Status: &commonpb.Status{},
|
Status: &commonpb.Status{},
|
||||||
Timestamp: 1000,
|
Timestamp: 1000,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
v := ctx.Value(ctxKey{})
|
||||||
|
if v != nil && v.(string) == returnError {
|
||||||
|
resp.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||||
|
return resp, fmt.Errorf("injected error")
|
||||||
|
}
|
||||||
|
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -328,6 +328,7 @@ func (m *importManager) updateTaskState(ir *rootcoordpb.ImportResult) (*datapb.I
|
|||||||
v.State.StateCode = ir.GetState()
|
v.State.StateCode = ir.GetState()
|
||||||
v.State.Segments = ir.GetSegments()
|
v.State.Segments = ir.GetSegments()
|
||||||
v.State.RowCount = ir.GetRowCount()
|
v.State.RowCount = ir.GetRowCount()
|
||||||
|
v.State.RowIds = ir.AutoIds
|
||||||
for _, kv := range ir.GetInfos() {
|
for _, kv := range ir.GetInfos() {
|
||||||
if kv.GetKey() == FailedReason {
|
if kv.GetKey() == FailedReason {
|
||||||
v.State.ErrorMessage = kv.GetValue()
|
v.State.ErrorMessage = kv.GetValue()
|
||||||
|
|||||||
@ -14,6 +14,8 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/allocator"
|
"github.com/milvus-io/milvus/internal/allocator"
|
||||||
"github.com/milvus-io/milvus/internal/common"
|
"github.com/milvus-io/milvus/internal/common"
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||||
|
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||||
@ -34,10 +36,14 @@ type ImportWrapper struct {
|
|||||||
chunkManager storage.ChunkManager
|
chunkManager storage.ChunkManager
|
||||||
|
|
||||||
callFlushFunc ImportFlushFunc // call back function to flush a segment
|
callFlushFunc ImportFlushFunc // call back function to flush a segment
|
||||||
|
|
||||||
|
importResult *rootcoordpb.ImportResult // import result
|
||||||
|
reportFunc func(res *rootcoordpb.ImportResult) error // report import state to rootcoord
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewImportWrapper(ctx context.Context, collectionSchema *schemapb.CollectionSchema, shardNum int32, segmentSize int64,
|
func NewImportWrapper(ctx context.Context, collectionSchema *schemapb.CollectionSchema, shardNum int32, segmentSize int64,
|
||||||
idAlloc *allocator.IDAllocator, cm storage.ChunkManager, flushFunc ImportFlushFunc) *ImportWrapper {
|
idAlloc *allocator.IDAllocator, cm storage.ChunkManager, flushFunc ImportFlushFunc,
|
||||||
|
importResult *rootcoordpb.ImportResult, reportFunc func(res *rootcoordpb.ImportResult) error) *ImportWrapper {
|
||||||
if collectionSchema == nil {
|
if collectionSchema == nil {
|
||||||
log.Error("import error: collection schema is nil")
|
log.Error("import error: collection schema is nil")
|
||||||
return nil
|
return nil
|
||||||
@ -69,6 +75,8 @@ func NewImportWrapper(ctx context.Context, collectionSchema *schemapb.Collection
|
|||||||
rowIDAllocator: idAlloc,
|
rowIDAllocator: idAlloc,
|
||||||
callFlushFunc: flushFunc,
|
callFlushFunc: flushFunc,
|
||||||
chunkManager: cm,
|
chunkManager: cm,
|
||||||
|
importResult: importResult,
|
||||||
|
reportFunc: reportFunc,
|
||||||
}
|
}
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
@ -110,16 +118,23 @@ func (p *ImportWrapper) Import(filePaths []string, rowBased bool, onlyValidate b
|
|||||||
for i := 0; i < len(filePaths); i++ {
|
for i := 0; i < len(filePaths); i++ {
|
||||||
filePath := filePaths[i]
|
filePath := filePaths[i]
|
||||||
_, fileType := getFileNameAndExt(filePath)
|
_, fileType := getFileNameAndExt(filePath)
|
||||||
log.Info("imprort wrapper: row-based file ", zap.Any("filePath", filePath), zap.Any("fileType", fileType))
|
log.Info("import wrapper: row-based file ", zap.Any("filePath", filePath), zap.Any("fileType", fileType))
|
||||||
|
|
||||||
if fileType == JSONFileExt {
|
if fileType == JSONFileExt {
|
||||||
err := func() error {
|
err := func() error {
|
||||||
|
// for minio storage, chunkManager will download file into local memory
|
||||||
|
// for local storage, chunkManager open the file directly
|
||||||
file, err := p.chunkManager.Reader(filePath)
|
file, err := p.chunkManager.Reader(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
|
// report file process state
|
||||||
|
p.importResult.State = commonpb.ImportState_ImportDownloaded
|
||||||
|
p.reportFunc(p.importResult)
|
||||||
|
|
||||||
|
// parse file
|
||||||
reader := bufio.NewReader(file)
|
reader := bufio.NewReader(file)
|
||||||
parser := NewJSONParser(p.ctx, p.collectionSchema)
|
parser := NewJSONParser(p.ctx, p.collectionSchema)
|
||||||
var consumer *JSONRowConsumer
|
var consumer *JSONRowConsumer
|
||||||
@ -131,17 +146,26 @@ func (p *ImportWrapper) Import(filePaths []string, rowBased bool, onlyValidate b
|
|||||||
consumer = NewJSONRowConsumer(p.collectionSchema, p.rowIDAllocator, p.shardNum, p.segmentSize, flushFunc)
|
consumer = NewJSONRowConsumer(p.collectionSchema, p.rowIDAllocator, p.shardNum, p.segmentSize, flushFunc)
|
||||||
}
|
}
|
||||||
validator := NewJSONRowValidator(p.collectionSchema, consumer)
|
validator := NewJSONRowValidator(p.collectionSchema, consumer)
|
||||||
|
|
||||||
err = parser.ParseRows(reader, validator)
|
err = parser.ParseRows(reader, validator)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Error("import error: "+err.Error(), zap.String("filePath", filePath))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for row-based files, auto-id is generated within JSONRowConsumer
|
||||||
|
if consumer != nil {
|
||||||
|
p.importResult.AutoIds = append(p.importResult.AutoIds, consumer.IDRange()...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// report file process state
|
||||||
|
p.importResult.State = commonpb.ImportState_ImportParsed
|
||||||
|
p.reportFunc(p.importResult)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("imprort error: "+err.Error(), zap.String("filePath", filePath))
|
log.Error("import error: "+err.Error(), zap.String("filePath", filePath))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -160,7 +184,7 @@ func (p *ImportWrapper) Import(filePaths []string, rowBased bool, onlyValidate b
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
p.printFieldsDataInfo(fields, "imprort wrapper: combine field data", nil)
|
p.printFieldsDataInfo(fields, "import wrapper: combine field data", nil)
|
||||||
|
|
||||||
fieldNames := make([]storage.FieldID, 0)
|
fieldNames := make([]storage.FieldID, 0)
|
||||||
for k, v := range fields {
|
for k, v := range fields {
|
||||||
@ -193,17 +217,24 @@ func (p *ImportWrapper) Import(filePaths []string, rowBased bool, onlyValidate b
|
|||||||
for i := 0; i < len(filePaths); i++ {
|
for i := 0; i < len(filePaths); i++ {
|
||||||
filePath := filePaths[i]
|
filePath := filePaths[i]
|
||||||
fileName, fileType := getFileNameAndExt(filePath)
|
fileName, fileType := getFileNameAndExt(filePath)
|
||||||
log.Info("imprort wrapper: column-based file ", zap.Any("filePath", filePath), zap.Any("fileType", fileType))
|
log.Info("import wrapper: column-based file ", zap.Any("filePath", filePath), zap.Any("fileType", fileType))
|
||||||
|
|
||||||
if fileType == JSONFileExt {
|
if fileType == JSONFileExt {
|
||||||
err := func() error {
|
err := func() error {
|
||||||
|
// for minio storage, chunkManager will download file into local memory
|
||||||
|
// for local storage, chunkManager open the file directly
|
||||||
file, err := p.chunkManager.Reader(filePath)
|
file, err := p.chunkManager.Reader(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("imprort error: "+err.Error(), zap.String("filePath", filePath))
|
log.Error("import error: "+err.Error(), zap.String("filePath", filePath))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
|
// report file process state
|
||||||
|
p.importResult.State = commonpb.ImportState_ImportDownloaded
|
||||||
|
p.reportFunc(p.importResult)
|
||||||
|
|
||||||
|
// parse file
|
||||||
reader := bufio.NewReader(file)
|
reader := bufio.NewReader(file)
|
||||||
parser := NewJSONParser(p.ctx, p.collectionSchema)
|
parser := NewJSONParser(p.ctx, p.collectionSchema)
|
||||||
var consumer *JSONColumnConsumer
|
var consumer *JSONColumnConsumer
|
||||||
@ -214,24 +245,35 @@ func (p *ImportWrapper) Import(filePaths []string, rowBased bool, onlyValidate b
|
|||||||
|
|
||||||
err = parser.ParseColumns(reader, validator)
|
err = parser.ParseColumns(reader, validator)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("imprort error: "+err.Error(), zap.String("filePath", filePath))
|
log.Error("import error: "+err.Error(), zap.String("filePath", filePath))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// report file process state
|
||||||
|
p.importResult.State = commonpb.ImportState_ImportParsed
|
||||||
|
p.reportFunc(p.importResult)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("imprort error: "+err.Error(), zap.String("filePath", filePath))
|
log.Error("import error: "+err.Error(), zap.String("filePath", filePath))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else if fileType == NumpyFileExt {
|
} else if fileType == NumpyFileExt {
|
||||||
|
// for minio storage, chunkManager will download file into local memory
|
||||||
|
// for local storage, chunkManager open the file directly
|
||||||
file, err := p.chunkManager.Reader(filePath)
|
file, err := p.chunkManager.Reader(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("imprort error: "+err.Error(), zap.String("filePath", filePath))
|
log.Error("import error: "+err.Error(), zap.String("filePath", filePath))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
|
// report file process state
|
||||||
|
p.importResult.State = commonpb.ImportState_ImportDownloaded
|
||||||
|
p.reportFunc(p.importResult)
|
||||||
|
|
||||||
var id storage.FieldID
|
var id storage.FieldID
|
||||||
for _, field := range p.collectionSchema.Fields {
|
for _, field := range p.collectionSchema.Fields {
|
||||||
if field.GetName() == fileName {
|
if field.GetName() == fileName {
|
||||||
@ -251,21 +293,27 @@ func (p *ImportWrapper) Import(filePaths []string, rowBased bool, onlyValidate b
|
|||||||
parser := NewNumpyParser(p.ctx, p.collectionSchema, flushFunc)
|
parser := NewNumpyParser(p.ctx, p.collectionSchema, flushFunc)
|
||||||
err = parser.Parse(file, fileName, onlyValidate)
|
err = parser.Parse(file, fileName, onlyValidate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("imprort error: "+err.Error(), zap.String("filePath", filePath))
|
log.Error("import error: "+err.Error(), zap.String("filePath", filePath))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// report file process state
|
||||||
|
p.importResult.State = commonpb.ImportState_ImportParsed
|
||||||
|
p.reportFunc(p.importResult)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// split fields data into segments
|
// split fields data into segments
|
||||||
err := p.splitFieldsData(fieldsData, filePaths)
|
err := p.splitFieldsData(fieldsData, filePaths)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("imprort error: " + err.Error())
|
log.Error("import error: " + err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
// report file process state
|
||||||
|
p.importResult.State = commonpb.ImportState_ImportPersisted
|
||||||
|
return p.reportFunc(p.importResult)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *ImportWrapper) appendFunc(schema *schemapb.FieldSchema) func(src storage.FieldData, n int, target storage.FieldData) error {
|
func (p *ImportWrapper) appendFunc(schema *schemapb.FieldSchema) func(src storage.FieldData, n int, target storage.FieldData) error {
|
||||||
@ -346,7 +394,7 @@ func (p *ImportWrapper) appendFunc(schema *schemapb.FieldSchema) func(src storag
|
|||||||
|
|
||||||
func (p *ImportWrapper) splitFieldsData(fieldsData map[storage.FieldID]storage.FieldData, files []string) error {
|
func (p *ImportWrapper) splitFieldsData(fieldsData map[storage.FieldID]storage.FieldData, files []string) error {
|
||||||
if len(fieldsData) == 0 {
|
if len(fieldsData) == 0 {
|
||||||
return errors.New("imprort error: fields data is empty")
|
return errors.New("import error: fields data is empty")
|
||||||
}
|
}
|
||||||
|
|
||||||
var primaryKey *schemapb.FieldSchema
|
var primaryKey *schemapb.FieldSchema
|
||||||
@ -357,37 +405,43 @@ func (p *ImportWrapper) splitFieldsData(fieldsData map[storage.FieldID]storage.F
|
|||||||
} else {
|
} else {
|
||||||
_, ok := fieldsData[schema.GetFieldID()]
|
_, ok := fieldsData[schema.GetFieldID()]
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.New("imprort error: field " + schema.GetName() + " not provided")
|
return errors.New("import error: field " + schema.GetName() + " not provided")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if primaryKey == nil {
|
if primaryKey == nil {
|
||||||
return errors.New("imprort error: primary key field is not found")
|
return errors.New("import error: primary key field is not found")
|
||||||
}
|
}
|
||||||
|
|
||||||
rowCount := 0
|
rowCount := 0
|
||||||
for _, v := range fieldsData {
|
for _, v := range fieldsData {
|
||||||
rowCount = v.RowNum()
|
if v.RowNum() > rowCount {
|
||||||
break
|
rowCount = v.RowNum()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
primaryData, ok := fieldsData[primaryKey.GetFieldID()]
|
primaryData, ok := fieldsData[primaryKey.GetFieldID()]
|
||||||
if !ok {
|
if !ok {
|
||||||
// generate auto id for primary key
|
return errors.New("import error: primary key field is not provided")
|
||||||
if primaryKey.GetAutoID() {
|
}
|
||||||
var rowIDBegin typeutil.UniqueID
|
|
||||||
var rowIDEnd typeutil.UniqueID
|
|
||||||
rowIDBegin, rowIDEnd, _ = p.rowIDAllocator.Alloc(uint32(rowCount))
|
|
||||||
|
|
||||||
primaryDataArr := primaryData.(*storage.Int64FieldData)
|
// generate auto id for primary key
|
||||||
for i := rowIDBegin; i < rowIDEnd; i++ {
|
if primaryKey.GetAutoID() {
|
||||||
primaryDataArr.Data = append(primaryDataArr.Data, rowIDBegin+i)
|
log.Info("import wrapper: generating auto-id", zap.Any("rowCount", rowCount))
|
||||||
}
|
var rowIDBegin typeutil.UniqueID
|
||||||
|
var rowIDEnd typeutil.UniqueID
|
||||||
|
rowIDBegin, rowIDEnd, _ = p.rowIDAllocator.Alloc(uint32(rowCount))
|
||||||
|
|
||||||
|
primaryDataArr := primaryData.(*storage.Int64FieldData)
|
||||||
|
for i := rowIDBegin; i < rowIDEnd; i++ {
|
||||||
|
primaryDataArr.Data = append(primaryDataArr.Data, rowIDBegin+i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.importResult.AutoIds = append(p.importResult.AutoIds, rowIDBegin, rowIDEnd)
|
||||||
}
|
}
|
||||||
|
|
||||||
if primaryData.RowNum() <= 0 {
|
if primaryData.RowNum() <= 0 {
|
||||||
return errors.New("imprort error: primary key " + primaryKey.GetName() + " not provided")
|
return errors.New("import error: primary key " + primaryKey.GetName() + " not provided")
|
||||||
}
|
}
|
||||||
|
|
||||||
// prepare segemnts
|
// prepare segemnts
|
||||||
@ -406,7 +460,7 @@ func (p *ImportWrapper) splitFieldsData(fieldsData map[storage.FieldID]storage.F
|
|||||||
schema := p.collectionSchema.Fields[i]
|
schema := p.collectionSchema.Fields[i]
|
||||||
appendFunc := p.appendFunc(schema)
|
appendFunc := p.appendFunc(schema)
|
||||||
if appendFunc == nil {
|
if appendFunc == nil {
|
||||||
return errors.New("imprort error: unsupported field data type")
|
return errors.New("import error: unsupported field data type")
|
||||||
}
|
}
|
||||||
appendFunctions[schema.GetName()] = appendFunc
|
appendFunctions[schema.GetName()] = appendFunc
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14,6 +14,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/common"
|
"github.com/milvus-io/milvus/internal/common"
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||||
|
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/dependency"
|
"github.com/milvus-io/milvus/internal/util/dependency"
|
||||||
@ -29,7 +30,8 @@ func Test_NewImportWrapper(t *testing.T) {
|
|||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
cm, err := f.NewVectorStorageChunkManager(ctx)
|
cm, err := f.NewVectorStorageChunkManager(ctx)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
wrapper := NewImportWrapper(ctx, nil, 2, 1, nil, cm, nil)
|
|
||||||
|
wrapper := NewImportWrapper(ctx, nil, 2, 1, nil, cm, nil, nil, nil)
|
||||||
assert.Nil(t, wrapper)
|
assert.Nil(t, wrapper)
|
||||||
|
|
||||||
schema := &schemapb.CollectionSchema{
|
schema := &schemapb.CollectionSchema{
|
||||||
@ -47,7 +49,7 @@ func Test_NewImportWrapper(t *testing.T) {
|
|||||||
Description: "int64",
|
Description: "int64",
|
||||||
DataType: schemapb.DataType_Int64,
|
DataType: schemapb.DataType_Int64,
|
||||||
})
|
})
|
||||||
wrapper = NewImportWrapper(ctx, schema, 2, 1, nil, cm, nil)
|
wrapper = NewImportWrapper(ctx, schema, 2, 1, nil, cm, nil, nil, nil)
|
||||||
assert.NotNil(t, wrapper)
|
assert.NotNil(t, wrapper)
|
||||||
|
|
||||||
err = wrapper.Cancel()
|
err = wrapper.Cancel()
|
||||||
@ -93,12 +95,27 @@ func Test_ImportRowBased(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// success case
|
// success case
|
||||||
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc)
|
importResult := &rootcoordpb.ImportResult{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: commonpb.ErrorCode_Success,
|
||||||
|
},
|
||||||
|
TaskId: 1,
|
||||||
|
DatanodeId: 1,
|
||||||
|
State: commonpb.ImportState_ImportStarted,
|
||||||
|
Segments: make([]int64, 0),
|
||||||
|
AutoIds: make([]int64, 0),
|
||||||
|
RowCount: 0,
|
||||||
|
}
|
||||||
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
files := make([]string, 0)
|
files := make([]string, 0)
|
||||||
files = append(files, filePath)
|
files = append(files, filePath)
|
||||||
err = wrapper.Import(files, true, false)
|
err = wrapper.Import(files, true, false)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, 5, rowCount)
|
assert.Equal(t, 5, rowCount)
|
||||||
|
assert.Equal(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
||||||
|
|
||||||
// parse error
|
// parse error
|
||||||
content = []byte(`{
|
content = []byte(`{
|
||||||
@ -111,11 +128,13 @@ func Test_ImportRowBased(t *testing.T) {
|
|||||||
err = cm.Write(filePath, content)
|
err = cm.Write(filePath, content)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc)
|
importResult.State = commonpb.ImportState_ImportStarted
|
||||||
|
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
files = make([]string, 0)
|
files = make([]string, 0)
|
||||||
files = append(files, filePath)
|
files = append(files, filePath)
|
||||||
err = wrapper.Import(files, true, false)
|
err = wrapper.Import(files, true, false)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
assert.NotEqual(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
||||||
|
|
||||||
// file doesn't exist
|
// file doesn't exist
|
||||||
files = make([]string, 0)
|
files = make([]string, 0)
|
||||||
@ -178,12 +197,27 @@ func Test_ImportColumnBased_json(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// success case
|
// success case
|
||||||
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc)
|
importResult := &rootcoordpb.ImportResult{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: commonpb.ErrorCode_Success,
|
||||||
|
},
|
||||||
|
TaskId: 1,
|
||||||
|
DatanodeId: 1,
|
||||||
|
State: commonpb.ImportState_ImportStarted,
|
||||||
|
Segments: make([]int64, 0),
|
||||||
|
AutoIds: make([]int64, 0),
|
||||||
|
RowCount: 0,
|
||||||
|
}
|
||||||
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
files := make([]string, 0)
|
files := make([]string, 0)
|
||||||
files = append(files, filePath)
|
files = append(files, filePath)
|
||||||
err = wrapper.Import(files, false, false)
|
err = wrapper.Import(files, false, false)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, 5, rowCount)
|
assert.Equal(t, 5, rowCount)
|
||||||
|
assert.Equal(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
||||||
|
|
||||||
// parse error
|
// parse error
|
||||||
content = []byte(`{
|
content = []byte(`{
|
||||||
@ -194,11 +228,13 @@ func Test_ImportColumnBased_json(t *testing.T) {
|
|||||||
err = cm.Write(filePath, content)
|
err = cm.Write(filePath, content)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc)
|
importResult.State = commonpb.ImportState_ImportStarted
|
||||||
|
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
files = make([]string, 0)
|
files = make([]string, 0)
|
||||||
files = append(files, filePath)
|
files = append(files, filePath)
|
||||||
err = wrapper.Import(files, false, false)
|
err = wrapper.Import(files, false, false)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
assert.NotEqual(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
||||||
|
|
||||||
// file doesn't exist
|
// file doesn't exist
|
||||||
files = make([]string, 0)
|
files = make([]string, 0)
|
||||||
@ -268,11 +304,26 @@ func Test_ImportColumnBased_numpy(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// success case
|
// success case
|
||||||
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc)
|
importResult := &rootcoordpb.ImportResult{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: commonpb.ErrorCode_Success,
|
||||||
|
},
|
||||||
|
TaskId: 1,
|
||||||
|
DatanodeId: 1,
|
||||||
|
State: commonpb.ImportState_ImportStarted,
|
||||||
|
Segments: make([]int64, 0),
|
||||||
|
AutoIds: make([]int64, 0),
|
||||||
|
RowCount: 0,
|
||||||
|
}
|
||||||
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
|
|
||||||
err = wrapper.Import(files, false, false)
|
err = wrapper.Import(files, false, false)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, 5, rowCount)
|
assert.Equal(t, 5, rowCount)
|
||||||
|
assert.Equal(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
||||||
|
|
||||||
// parse error
|
// parse error
|
||||||
content = []byte(`{
|
content = []byte(`{
|
||||||
@ -283,11 +334,12 @@ func Test_ImportColumnBased_numpy(t *testing.T) {
|
|||||||
err = cm.Write(filePath, content)
|
err = cm.Write(filePath, content)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc)
|
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
files = make([]string, 0)
|
files = make([]string, 0)
|
||||||
files = append(files, filePath)
|
files = append(files, filePath)
|
||||||
err = wrapper.Import(files, false, false)
|
err = wrapper.Import(files, false, false)
|
||||||
assert.NotNil(t, err)
|
assert.NotNil(t, err)
|
||||||
|
assert.NotEqual(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
||||||
|
|
||||||
// file doesn't exist
|
// file doesn't exist
|
||||||
files = make([]string, 0)
|
files = make([]string, 0)
|
||||||
@ -403,7 +455,21 @@ func Test_ImportRowBased_perf(t *testing.T) {
|
|||||||
|
|
||||||
schema := perfSchema(dim)
|
schema := perfSchema(dim)
|
||||||
|
|
||||||
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, flushFunc)
|
importResult := &rootcoordpb.ImportResult{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: commonpb.ErrorCode_Success,
|
||||||
|
},
|
||||||
|
TaskId: 1,
|
||||||
|
DatanodeId: 1,
|
||||||
|
State: commonpb.ImportState_ImportStarted,
|
||||||
|
Segments: make([]int64, 0),
|
||||||
|
AutoIds: make([]int64, 0),
|
||||||
|
RowCount: 0,
|
||||||
|
}
|
||||||
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
files := make([]string, 0)
|
files := make([]string, 0)
|
||||||
files = append(files, filePath)
|
files = append(files, filePath)
|
||||||
err = wrapper.Import(files, true, false)
|
err = wrapper.Import(files, true, false)
|
||||||
@ -501,7 +567,21 @@ func Test_ImportColumnBased_perf(t *testing.T) {
|
|||||||
|
|
||||||
schema := perfSchema(dim)
|
schema := perfSchema(dim)
|
||||||
|
|
||||||
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, flushFunc)
|
importResult := &rootcoordpb.ImportResult{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: commonpb.ErrorCode_Success,
|
||||||
|
},
|
||||||
|
TaskId: 1,
|
||||||
|
DatanodeId: 1,
|
||||||
|
State: commonpb.ImportState_ImportStarted,
|
||||||
|
Segments: make([]int64, 0),
|
||||||
|
AutoIds: make([]int64, 0),
|
||||||
|
RowCount: 0,
|
||||||
|
}
|
||||||
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, flushFunc, importResult, reportFunc)
|
||||||
files := make([]string, 0)
|
files := make([]string, 0)
|
||||||
files = append(files, filePath1)
|
files = append(files, filePath1)
|
||||||
files = append(files, filePath2)
|
files = append(files, filePath2)
|
||||||
|
|||||||
@ -395,6 +395,7 @@ type JSONRowConsumer struct {
|
|||||||
segmentsData []map[storage.FieldID]storage.FieldData // in-memory segments data
|
segmentsData []map[storage.FieldID]storage.FieldData // in-memory segments data
|
||||||
segmentSize int64 // maximum size of a segment(unit:byte)
|
segmentSize int64 // maximum size of a segment(unit:byte)
|
||||||
primaryKey storage.FieldID // name of primary key
|
primaryKey storage.FieldID // name of primary key
|
||||||
|
autoIDRange []int64 // auto-generated id range, for example: [1, 10, 20, 25] means id from 1 to 10 and 20 to 25
|
||||||
|
|
||||||
callFlushFunc ImportFlushFunc // call back function to flush segment
|
callFlushFunc ImportFlushFunc // call back function to flush segment
|
||||||
}
|
}
|
||||||
@ -482,6 +483,7 @@ func NewJSONRowConsumer(collectionSchema *schemapb.CollectionSchema, idAlloc *al
|
|||||||
segmentSize: segmentSize,
|
segmentSize: segmentSize,
|
||||||
rowCounter: 0,
|
rowCounter: 0,
|
||||||
primaryKey: -1,
|
primaryKey: -1,
|
||||||
|
autoIDRange: make([]int64, 0),
|
||||||
callFlushFunc: flushFunc,
|
callFlushFunc: flushFunc,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -517,6 +519,10 @@ func NewJSONRowConsumer(collectionSchema *schemapb.CollectionSchema, idAlloc *al
|
|||||||
return v
|
return v
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (v *JSONRowConsumer) IDRange() []int64 {
|
||||||
|
return v.autoIDRange
|
||||||
|
}
|
||||||
|
|
||||||
func (v *JSONRowConsumer) flush(force bool) error {
|
func (v *JSONRowConsumer) flush(force bool) error {
|
||||||
// force flush all data
|
// force flush all data
|
||||||
if force {
|
if force {
|
||||||
@ -580,6 +586,7 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
|||||||
if rowIDEnd-rowIDBegin != int64(len(rows)) {
|
if rowIDEnd-rowIDBegin != int64(len(rows)) {
|
||||||
return errors.New("JSON row consumer: failed to allocate ID for " + strconv.Itoa(len(rows)) + " rows")
|
return errors.New("JSON row consumer: failed to allocate ID for " + strconv.Itoa(len(rows)) + " rows")
|
||||||
}
|
}
|
||||||
|
v.autoIDRange = append(v.autoIDRange, rowIDBegin, rowIDEnd)
|
||||||
}
|
}
|
||||||
|
|
||||||
// consume rows
|
// consume rows
|
||||||
|
|||||||
@ -103,7 +103,7 @@ func (p *NumpyParser) validate(adapter *NumpyAdapter, fieldName string) error {
|
|||||||
// 1. field data type should be consist to numpy data type
|
// 1. field data type should be consist to numpy data type
|
||||||
// 2. vector field dimension should be consist to numpy shape
|
// 2. vector field dimension should be consist to numpy shape
|
||||||
if schemapb.DataType_FloatVector == schema.DataType {
|
if schemapb.DataType_FloatVector == schema.DataType {
|
||||||
if elementType != schemapb.DataType_Float {
|
if elementType != schemapb.DataType_Float && elementType != schemapb.DataType_Double {
|
||||||
return errors.New("illegal data type " + adapter.GetType() + " for field " + schema.GetName())
|
return errors.New("illegal data type " + adapter.GetType() + " for field " + schema.GetName())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,11 +247,32 @@ func (p *NumpyParser) consume(adapter *NumpyAdapter) error {
|
|||||||
Dim: p.columnDesc.dimension,
|
Dim: p.columnDesc.dimension,
|
||||||
}
|
}
|
||||||
case schemapb.DataType_FloatVector:
|
case schemapb.DataType_FloatVector:
|
||||||
data, err := adapter.ReadFloat32(p.columnDesc.elementCount)
|
// for float vector, we support float32 and float64 numpy file because python float value is 64 bit
|
||||||
|
// for float64 numpy file, the performance is worse than float32 numpy file
|
||||||
|
// we don't check overflow here
|
||||||
|
elementType, err := convertNumpyType(adapter.GetType())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var data []float32
|
||||||
|
if elementType == schemapb.DataType_Float {
|
||||||
|
data, err = adapter.ReadFloat32(p.columnDesc.elementCount)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else if elementType == schemapb.DataType_Double {
|
||||||
|
data = make([]float32, 0, p.columnDesc.elementCount)
|
||||||
|
data64, err := adapter.ReadFloat64(p.columnDesc.elementCount)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f64 := range data64 {
|
||||||
|
data = append(data, float32(f64))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
p.columnData = &storage.FloatVectorFieldData{
|
p.columnData = &storage.FloatVectorFieldData{
|
||||||
NumRows: []int64{int64(p.columnDesc.elementCount)},
|
NumRows: []int64{int64(p.columnDesc.elementCount)},
|
||||||
Data: data,
|
Data: data,
|
||||||
|
|||||||
@ -442,7 +442,7 @@ func Test_Parse(t *testing.T) {
|
|||||||
}
|
}
|
||||||
checkFunc(data8, "field_binary_vector", flushFunc)
|
checkFunc(data8, "field_binary_vector", flushFunc)
|
||||||
|
|
||||||
// double vector
|
// double vector(element can be float32 or float64)
|
||||||
data9 := [][4]float32{{1.1, 2.1, 3.1, 4.1}, {5.2, 6.2, 7.2, 8.2}}
|
data9 := [][4]float32{{1.1, 2.1, 3.1, 4.1}, {5.2, 6.2, 7.2, 8.2}}
|
||||||
flushFunc = func(field storage.FieldData) error {
|
flushFunc = func(field storage.FieldData) error {
|
||||||
assert.NotNil(t, field)
|
assert.NotNil(t, field)
|
||||||
@ -458,6 +458,22 @@ func Test_Parse(t *testing.T) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
checkFunc(data9, "field_float_vector", flushFunc)
|
checkFunc(data9, "field_float_vector", flushFunc)
|
||||||
|
|
||||||
|
data10 := [][4]float64{{1.1, 2.1, 3.1, 4.1}, {5.2, 6.2, 7.2, 8.2}}
|
||||||
|
flushFunc = func(field storage.FieldData) error {
|
||||||
|
assert.NotNil(t, field)
|
||||||
|
assert.Equal(t, len(data10), field.RowNum())
|
||||||
|
|
||||||
|
for i := 0; i < len(data10); i++ {
|
||||||
|
row := field.GetRow(i).([]float32)
|
||||||
|
for k := 0; k < len(row); k++ {
|
||||||
|
assert.Equal(t, float32(data10[i][k]), row[k])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
checkFunc(data10, "field_float_vector", flushFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_Parse_perf(t *testing.T) {
|
func Test_Parse_perf(t *testing.T) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user