mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-28 14:35:27 +08:00
enhance: support specified version manifest write (#46331)
Related to #44956 **Support specified version manifest write** - Add `baseVersion` parameter to `NewPackedRecordManifestWriter` and `NewFFIPackedWriter` to support writing manifest based on a specific version instead of always overwriting the latest - Add `manifestPath` tracking in `BulkPackWriterV2` to maintain manifest state across writes - Add `GetManifestInfo` method to parse existing manifest path and extract base path and version - Add `UpdateManifestPath` metacache action to track manifest path in segment info - Update `transaction_begin` FFI call to use the specified base version --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
971085b033
commit
18fbaaca0a
@ -290,7 +290,7 @@ func (s *ClusteringCompactionTaskStorageV2Suite) initStorageV2Segments(rows int,
|
||||
bw := syncmgr.NewBulkPackWriterV2(mc, sch, cm, s.mockAlloc, packed.DefaultWriteBufferSize, 0, &indexpb.StorageConfig{
|
||||
StorageType: "local",
|
||||
RootPath: rootPath,
|
||||
}, columnGroups)
|
||||
}, columnGroups, "")
|
||||
return bw.Write(context.Background(), pack)
|
||||
}
|
||||
|
||||
|
||||
@ -333,7 +333,7 @@ func (s *MixCompactionTaskStorageV2Suite) initStorageV2Segments(rows int, seed i
|
||||
bw := syncmgr.NewBulkPackWriterV2(mc, s.meta.Schema, cm, alloc, packed.DefaultWriteBufferSize, 0, &indexpb.StorageConfig{
|
||||
StorageType: "local",
|
||||
RootPath: rootPath,
|
||||
}, columnGroups)
|
||||
}, columnGroups, "")
|
||||
return bw.Write(context.Background(), pack)
|
||||
}
|
||||
|
||||
|
||||
@ -111,7 +111,7 @@ func (s *NamespaceCompactorTestSuite) setupSortedSegments() {
|
||||
bw := syncmgr.NewBulkPackWriterV2(mc, s.schema, cm, alloc, packed.DefaultWriteBufferSize, 0, &indexpb.StorageConfig{
|
||||
StorageType: "local",
|
||||
RootPath: rootPath,
|
||||
}, columnGroups)
|
||||
}, columnGroups, "")
|
||||
inserts, _, _, _, _, _, err := bw.Write(context.Background(), pack)
|
||||
s.Require().NoError(err)
|
||||
s.sortedSegments = append(s.sortedSegments, &datapb.CompactionSegmentBinlogs{
|
||||
|
||||
@ -243,6 +243,12 @@ func SetStartPosRecorded(flag bool) SegmentAction {
|
||||
}
|
||||
}
|
||||
|
||||
func UpdateManifestPath(manifestPath string) SegmentAction {
|
||||
return func(info *SegmentInfo) {
|
||||
info.manifestPath = manifestPath
|
||||
}
|
||||
}
|
||||
|
||||
// MergeSegmentAction is the util function to merge multiple SegmentActions into one.
|
||||
func MergeSegmentAction(actions ...SegmentAction) SegmentAction {
|
||||
return func(info *SegmentInfo) {
|
||||
|
||||
@ -31,6 +31,7 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/internal/storagecommon"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/packed"
|
||||
"github.com/milvus-io/milvus/internal/util/hookutil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/common"
|
||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||
@ -51,11 +52,12 @@ type BulkPackWriterV2 struct {
|
||||
|
||||
storageConfig *indexpb.StorageConfig
|
||||
columnGroups []storagecommon.ColumnGroup
|
||||
manifestPath string
|
||||
}
|
||||
|
||||
func NewBulkPackWriterV2(metaCache metacache.MetaCache, schema *schemapb.CollectionSchema, chunkManager storage.ChunkManager,
|
||||
allocator allocator.Interface, bufferSize, multiPartUploadSize int64,
|
||||
storageConfig *indexpb.StorageConfig, columnGroups []storagecommon.ColumnGroup, writeRetryOpts ...retry.Option,
|
||||
storageConfig *indexpb.StorageConfig, columnGroups []storagecommon.ColumnGroup, curManifestPath string, writeRetryOpts ...retry.Option,
|
||||
) *BulkPackWriterV2 {
|
||||
return &BulkPackWriterV2{
|
||||
BulkPackWriter: &BulkPackWriter{
|
||||
@ -70,6 +72,7 @@ func NewBulkPackWriterV2(metaCache metacache.MetaCache, schema *schemapb.Collect
|
||||
multiPartUploadSize: multiPartUploadSize,
|
||||
storageConfig: storageConfig,
|
||||
columnGroups: columnGroups,
|
||||
manifestPath: curManifestPath,
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,6 +182,27 @@ func (bw *BulkPackWriterV2) writeInserts(ctx context.Context, pack *SyncPack) (m
|
||||
return logs, manifestPath, nil
|
||||
}
|
||||
|
||||
func (bw *BulkPackWriterV2) GetManifestInfo(pack *SyncPack) (basePath string, version int64, err error) {
|
||||
// empty info, shall be first write,
|
||||
// initialize manifestPath with -1 version
|
||||
if bw.manifestPath == "" {
|
||||
k := metautil.JoinIDPath(pack.collectionID, pack.partitionID, pack.segmentID)
|
||||
logicalPath := path.Join(bw.getRootPath(), common.SegmentInsertLogPath, k)
|
||||
bucketName := bw.getBucketName()
|
||||
// if storage config is not passed, use common config
|
||||
storageType := paramtable.Get().CommonCfg.StorageType.GetValue()
|
||||
if bw.storageConfig != nil {
|
||||
storageType = bw.storageConfig.GetStorageType()
|
||||
}
|
||||
if storageType != "local" {
|
||||
basePath = path.Join(bucketName, logicalPath)
|
||||
}
|
||||
return basePath, -1, nil
|
||||
}
|
||||
|
||||
return packed.UnmarshalManfestPath(bw.manifestPath)
|
||||
}
|
||||
|
||||
func (bw *BulkPackWriterV2) writeInsertsIntoStorage(_ context.Context,
|
||||
pluginContextPtr *indexcgopb.StoragePluginContext,
|
||||
pack *SyncPack,
|
||||
@ -203,10 +227,12 @@ func (bw *BulkPackWriterV2) writeInsertsIntoStorage(_ context.Context,
|
||||
}
|
||||
|
||||
var manifestPath string
|
||||
if paramtable.Get().CommonCfg.UseLoonFFI.GetAsBool() {
|
||||
k := metautil.JoinIDPath(pack.collectionID, pack.partitionID, pack.segmentID)
|
||||
basePath := path.Join(bw.getRootPath(), common.SegmentInsertLogPath, k)
|
||||
w, err := storage.NewPackedRecordManifestWriter(bucketName, basePath, bw.schema, bw.bufferSize, bw.multiPartUploadSize, columnGroups, bw.storageConfig, pluginContextPtr)
|
||||
if paramtable.Get().CommonCfg.UseLoonFFI.GetAsBool() || bw.manifestPath != "" {
|
||||
basePath, version, err := bw.GetManifestInfo(pack)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
w, err := storage.NewPackedRecordManifestWriter(bucketName, basePath, version, bw.schema, bw.bufferSize, bw.multiPartUploadSize, columnGroups, bw.storageConfig, pluginContextPtr)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
@ -143,7 +143,7 @@ func (s *PackWriterV2Suite) TestPackWriterV2_Write() {
|
||||
|
||||
pack := new(SyncPack).WithCollectionID(collectionID).WithPartitionID(partitionID).WithSegmentID(segmentID).WithChannelName(channelName).WithInsertData(genInsertData(rows, s.schema)).WithDeleteData(deletes)
|
||||
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit)
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit, "")
|
||||
|
||||
gotInserts, _, _, _, _, _, err := bw.Write(context.Background(), pack)
|
||||
s.NoError(err)
|
||||
@ -162,7 +162,7 @@ func (s *PackWriterV2Suite) TestWriteEmptyInsertData() {
|
||||
mc.EXPECT().GetSchema(mock.Anything).Return(s.schema).Maybe()
|
||||
|
||||
pack := new(SyncPack).WithCollectionID(collectionID).WithPartitionID(partitionID).WithSegmentID(segmentID).WithChannelName(channelName)
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit)
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit, "")
|
||||
|
||||
_, _, _, _, _, _, err := bw.Write(context.Background(), pack)
|
||||
s.NoError(err)
|
||||
@ -191,7 +191,7 @@ func (s *PackWriterV2Suite) TestNoPkField() {
|
||||
buf.Append(data)
|
||||
|
||||
pack := new(SyncPack).WithCollectionID(collectionID).WithPartitionID(partitionID).WithSegmentID(segmentID).WithChannelName(channelName).WithInsertData([]*storage.InsertData{buf})
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit)
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit, "")
|
||||
|
||||
_, _, _, _, _, _, err := bw.Write(context.Background(), pack)
|
||||
s.Error(err)
|
||||
@ -208,7 +208,7 @@ func (s *PackWriterV2Suite) TestAllocIDExhausedError() {
|
||||
mc.EXPECT().GetSchema(mock.Anything).Return(s.schema).Maybe()
|
||||
|
||||
pack := new(SyncPack).WithCollectionID(collectionID).WithPartitionID(partitionID).WithSegmentID(segmentID).WithChannelName(channelName).WithInsertData(genInsertData(rows, s.schema))
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit)
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit, "")
|
||||
|
||||
_, _, _, _, _, _, err := bw.Write(context.Background(), pack)
|
||||
s.Error(err)
|
||||
@ -229,7 +229,7 @@ func (s *PackWriterV2Suite) TestWriteInsertDataError() {
|
||||
buf.Append(data)
|
||||
|
||||
pack := new(SyncPack).WithCollectionID(collectionID).WithPartitionID(partitionID).WithSegmentID(segmentID).WithChannelName(channelName).WithInsertData([]*storage.InsertData{buf})
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit)
|
||||
bw := NewBulkPackWriterV2(mc, s.schema, s.cm, s.logIDAlloc, packed.DefaultWriteBufferSize, 0, nil, s.currentSplit, "")
|
||||
|
||||
_, _, _, _, _, _, err := bw.Write(context.Background(), pack)
|
||||
s.Error(err)
|
||||
|
||||
@ -136,10 +136,9 @@ func (t *SyncTask) Run(ctx context.Context) (err error) {
|
||||
|
||||
switch segmentInfo.GetStorageVersion() {
|
||||
case storage.StorageV2:
|
||||
// TODO change to return manifest after integrated
|
||||
// New sync task means needs to flush data immediately, so do not need to buffer data in writer again.
|
||||
writer := NewBulkPackWriterV2(t.metacache, t.schema, t.chunkManager, t.allocator, 0,
|
||||
packed.DefaultMultiPartUploadSize, t.storageConfig, columnGroups, t.writeRetryOpts...)
|
||||
packed.DefaultMultiPartUploadSize, t.storageConfig, columnGroups, segmentInfo.ManifestPath(), t.writeRetryOpts...)
|
||||
t.insertBinlogs, t.deltaBinlog, t.statsBinlogs, t.bm25Binlogs, t.manifestPath, t.flushedSize, err = writer.Write(ctx, t.pack)
|
||||
if err != nil {
|
||||
log.Warn("failed to write sync data with storage v2 format", zap.Error(err))
|
||||
@ -181,7 +180,7 @@ func (t *SyncTask) Run(ctx context.Context) (err error) {
|
||||
|
||||
t.pack.ReleaseData()
|
||||
|
||||
actions := []metacache.SegmentAction{metacache.FinishSyncing(t.batchRows)}
|
||||
actions := []metacache.SegmentAction{metacache.FinishSyncing(t.batchRows), metacache.UpdateManifestPath(t.manifestPath)}
|
||||
if columnGroups != nil {
|
||||
actions = append(actions, metacache.UpdateCurrentSplit(columnGroups))
|
||||
}
|
||||
|
||||
@ -361,7 +361,10 @@ func (pw *PackedManifestRecordWriter) initWriters(r Record) error {
|
||||
var err error
|
||||
k := metautil.JoinIDPath(pw.collectionID, pw.partitionID, pw.segmentID)
|
||||
basePath := path.Join(pw.storageConfig.GetRootPath(), common.SegmentInsertLogPath, k)
|
||||
pw.writer, err = NewPackedRecordManifestWriter(pw.storageConfig.GetBucketName(), basePath, pw.schema, pw.bufferSize, pw.multiPartUploadSize, pw.columnGroups, pw.storageConfig, pw.storagePluginContext)
|
||||
if pw.storageConfig.StorageType != "local" {
|
||||
basePath = path.Join(pw.storageConfig.GetBucketName(), basePath)
|
||||
}
|
||||
pw.writer, err = NewPackedRecordManifestWriter(pw.storageConfig.GetBucketName(), basePath, -1, pw.schema, pw.bufferSize, pw.multiPartUploadSize, pw.columnGroups, pw.storageConfig, pw.storagePluginContext)
|
||||
if err != nil {
|
||||
return merr.WrapErrServiceInternal(fmt.Sprintf("can not new packed record writer %s", err.Error()))
|
||||
}
|
||||
|
||||
@ -296,6 +296,7 @@ func (pw *packedRecordManifestWriter) Close() error {
|
||||
func NewPackedRecordManifestWriter(
|
||||
bucketName string,
|
||||
basePath string,
|
||||
baseVersion int64,
|
||||
schema *schemapb.CollectionSchema,
|
||||
bufferSize int64,
|
||||
multiPartUploadSize int64,
|
||||
@ -314,16 +315,8 @@ func NewPackedRecordManifestWriter(
|
||||
return nil, merr.WrapErrServiceInternal(
|
||||
fmt.Sprintf("can not convert collection schema %s to arrow schema: %s", schema.Name, err.Error()))
|
||||
}
|
||||
// if storage config is not passed, use common config
|
||||
storageType := paramtable.Get().CommonCfg.StorageType.GetValue()
|
||||
if storageConfig != nil {
|
||||
storageType = storageConfig.GetStorageType()
|
||||
}
|
||||
ffiBasePath := basePath
|
||||
if storageType != "local" {
|
||||
ffiBasePath = path.Join(bucketName, basePath)
|
||||
}
|
||||
writer, err := packed.NewFFIPackedWriter(ffiBasePath, arrowSchema, columnGroups, storageConfig, storagePluginContext)
|
||||
|
||||
writer, err := packed.NewFFIPackedWriter(basePath, baseVersion, arrowSchema, columnGroups, storageConfig, storagePluginContext)
|
||||
if err != nil {
|
||||
return nil, merr.WrapErrServiceInternal(
|
||||
fmt.Sprintf("can not new packed record writer %s", err.Error()))
|
||||
|
||||
@ -74,7 +74,7 @@ func createStorageConfig() *indexpb.StorageConfig {
|
||||
return storageConfig
|
||||
}
|
||||
|
||||
func NewFFIPackedWriter(basePath string, schema *arrow.Schema, columnGroups []storagecommon.ColumnGroup, storageConfig *indexpb.StorageConfig, storagePluginContext *indexcgopb.StoragePluginContext) (*FFIPackedWriter, error) {
|
||||
func NewFFIPackedWriter(basePath string, baseVersion int64, schema *arrow.Schema, columnGroups []storagecommon.ColumnGroup, storageConfig *indexpb.StorageConfig, storagePluginContext *indexcgopb.StoragePluginContext) (*FFIPackedWriter, error) {
|
||||
cBasePath := C.CString(basePath)
|
||||
defer C.free(unsafe.Pointer(cBasePath))
|
||||
|
||||
@ -143,6 +143,7 @@ func NewFFIPackedWriter(basePath string, schema *arrow.Schema, columnGroups []st
|
||||
|
||||
return &FFIPackedWriter{
|
||||
basePath: basePath,
|
||||
baseVersion: baseVersion,
|
||||
cWriterHandle: writerHandle,
|
||||
cProperties: cProperties,
|
||||
}, nil
|
||||
@ -178,7 +179,7 @@ func (pw *FFIPackedWriter) Close() (string, error) {
|
||||
|
||||
// TODO pass version
|
||||
// use -1 as latest
|
||||
result = C.transaction_begin(cBasePath, pw.cProperties, &transationHandle, C.int64_t(-1))
|
||||
result = C.transaction_begin(cBasePath, pw.cProperties, &transationHandle, C.int64_t(pw.baseVersion))
|
||||
if err := HandleFFIResult(result); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
@ -36,6 +36,7 @@ type PackedWriter struct {
|
||||
|
||||
type FFIPackedWriter struct {
|
||||
basePath string
|
||||
baseVersion int64
|
||||
cWriterHandle C.WriterHandle
|
||||
cProperties *C.Properties
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user