mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
fix: Implement NeededFields feature in RecordReader (#43523)
Related to #43522 Currently, passing partial schema to storage v2 packed reader may trigger SEGV during clustering compaction unit test. This patch implement `NeededFields` differently in each `RecordReader` imlementation. For now, v2 will implemented as no-op. This will be supported after packed reader support this API. --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
abb3aeacdf
commit
1cf8ed505f
@ -866,18 +866,13 @@ func (t *clusteringCompactionTask) scalarAnalyzeSegment(
|
|||||||
log.Debug("binlogNum", zap.Int("binlogNum", binlogNum))
|
log.Debug("binlogNum", zap.Int("binlogNum", binlogNum))
|
||||||
|
|
||||||
expiredFilter := compaction.NewEntityFilter(nil, t.plan.GetCollectionTtl(), t.currentTime)
|
expiredFilter := compaction.NewEntityFilter(nil, t.plan.GetCollectionTtl(), t.currentTime)
|
||||||
schema := &schemapb.CollectionSchema{
|
|
||||||
Fields: make([]*schemapb.FieldSchema, 0),
|
|
||||||
}
|
|
||||||
binlogs := make([]*datapb.FieldBinlog, 0)
|
binlogs := make([]*datapb.FieldBinlog, 0)
|
||||||
|
|
||||||
requiredFields := typeutil.NewSet[int64]()
|
requiredFields := typeutil.NewSet[int64]()
|
||||||
requiredFields.Insert(0, 1, t.primaryKeyField.GetFieldID(), t.clusteringKeyField.GetFieldID())
|
requiredFields.Insert(0, 1, t.primaryKeyField.GetFieldID(), t.clusteringKeyField.GetFieldID())
|
||||||
for _, field := range t.plan.GetSchema().GetFields() {
|
selectedFields := lo.Filter(t.plan.GetSchema().GetFields(), func(field *schemapb.FieldSchema, _ int) bool {
|
||||||
if requiredFields.Contain(field.GetFieldID()) {
|
return requiredFields.Contain(field.GetFieldID())
|
||||||
schema.Fields = append(schema.Fields, field)
|
})
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch segment.GetStorageVersion() {
|
switch segment.GetStorageVersion() {
|
||||||
case storage.StorageV1:
|
case storage.StorageV1:
|
||||||
@ -894,13 +889,14 @@ func (t *clusteringCompactionTask) scalarAnalyzeSegment(
|
|||||||
}
|
}
|
||||||
rr, err := storage.NewBinlogRecordReader(ctx,
|
rr, err := storage.NewBinlogRecordReader(ctx,
|
||||||
binlogs,
|
binlogs,
|
||||||
schema,
|
t.plan.GetSchema(),
|
||||||
storage.WithDownloader(func(ctx context.Context, paths []string) ([][]byte, error) {
|
storage.WithDownloader(func(ctx context.Context, paths []string) ([][]byte, error) {
|
||||||
return t.binlogIO.Download(ctx, paths)
|
return t.binlogIO.Download(ctx, paths)
|
||||||
}),
|
}),
|
||||||
storage.WithVersion(segment.StorageVersion),
|
storage.WithVersion(segment.StorageVersion),
|
||||||
storage.WithBufferSize(t.bufferSize),
|
storage.WithBufferSize(t.bufferSize),
|
||||||
storage.WithStorageConfig(t.compactionParams.StorageConfig),
|
storage.WithStorageConfig(t.compactionParams.StorageConfig),
|
||||||
|
storage.WithNeededFields(requiredFields),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("new binlog record reader wrong", zap.Error(err))
|
log.Warn("new binlog record reader wrong", zap.Error(err))
|
||||||
@ -908,7 +904,7 @@ func (t *clusteringCompactionTask) scalarAnalyzeSegment(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pkIter := storage.NewDeserializeReader(rr, func(r storage.Record, v []*storage.Value) error {
|
pkIter := storage.NewDeserializeReader(rr, func(r storage.Record, v []*storage.Value) error {
|
||||||
return storage.ValueDeserializer(r, v, schema.Fields)
|
return storage.ValueDeserializer(r, v, selectedFields)
|
||||||
})
|
})
|
||||||
defer pkIter.Close()
|
defer pkIter.Close()
|
||||||
analyzeResult, remained, err := t.iterAndGetScalarAnalyzeResult(pkIter, expiredFilter)
|
analyzeResult, remained, err := t.iterAndGetScalarAnalyzeResult(pkIter, expiredFilter)
|
||||||
|
|||||||
@ -32,6 +32,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -61,6 +62,7 @@ type rwOptions struct {
|
|||||||
multiPartUploadSize int64
|
multiPartUploadSize int64
|
||||||
columnGroups []storagecommon.ColumnGroup
|
columnGroups []storagecommon.ColumnGroup
|
||||||
storageConfig *indexpb.StorageConfig
|
storageConfig *indexpb.StorageConfig
|
||||||
|
neededFields typeutil.Set[int64]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *rwOptions) validate() error {
|
func (o *rwOptions) validate() error {
|
||||||
@ -141,6 +143,12 @@ func WithStorageConfig(storageConfig *indexpb.StorageConfig) RwOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithNeededFields(neededFields typeutil.Set[int64]) RwOption {
|
||||||
|
return func(options *rwOptions) {
|
||||||
|
options.neededFields = neededFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func makeBlobsReader(ctx context.Context, binlogs []*datapb.FieldBinlog, downloader downloaderFn) (ChunkedBlobsReader, error) {
|
func makeBlobsReader(ctx context.Context, binlogs []*datapb.FieldBinlog, downloader downloaderFn) (ChunkedBlobsReader, error) {
|
||||||
if len(binlogs) == 0 {
|
if len(binlogs) == 0 {
|
||||||
return func() ([]*Blob, error) {
|
return func() ([]*Blob, error) {
|
||||||
@ -212,7 +220,7 @@ func makeBlobsReader(ctx context.Context, binlogs []*datapb.FieldBinlog, downloa
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, schema *schemapb.CollectionSchema, option ...RwOption) (RecordReader, error) {
|
func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, schema *schemapb.CollectionSchema, option ...RwOption) (rr RecordReader, err error) {
|
||||||
rwOptions := DefaultReaderOptions()
|
rwOptions := DefaultReaderOptions()
|
||||||
for _, opt := range option {
|
for _, opt := range option {
|
||||||
opt(rwOptions)
|
opt(rwOptions)
|
||||||
@ -222,11 +230,13 @@ func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, s
|
|||||||
}
|
}
|
||||||
switch rwOptions.version {
|
switch rwOptions.version {
|
||||||
case StorageV1:
|
case StorageV1:
|
||||||
blobsReader, err := makeBlobsReader(ctx, binlogs, rwOptions.downloader)
|
var blobsReader ChunkedBlobsReader
|
||||||
|
blobsReader, err = makeBlobsReader(ctx, binlogs, rwOptions.downloader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return newCompositeBinlogRecordReader(schema, blobsReader)
|
|
||||||
|
rr, err = newCompositeBinlogRecordReader(schema, blobsReader)
|
||||||
case StorageV2:
|
case StorageV2:
|
||||||
if len(binlogs) <= 0 {
|
if len(binlogs) <= 0 {
|
||||||
return nil, sio.EOF
|
return nil, sio.EOF
|
||||||
@ -245,9 +255,17 @@ func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, s
|
|||||||
paths[j] = append(paths[j], logPath)
|
paths[j] = append(paths[j], logPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return newPackedRecordReader(paths, schema, rwOptions.bufferSize, rwOptions.storageConfig)
|
rr, err = newPackedRecordReader(paths, schema, rwOptions.bufferSize, rwOptions.storageConfig)
|
||||||
|
default:
|
||||||
|
return nil, merr.WrapErrServiceInternal(fmt.Sprintf("unsupported storage version %d", rwOptions.version))
|
||||||
}
|
}
|
||||||
return nil, merr.WrapErrServiceInternal(fmt.Sprintf("unsupported storage version %d", rwOptions.version))
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if rwOptions.neededFields != nil {
|
||||||
|
rr.SetNeededFields(rwOptions.neededFields)
|
||||||
|
}
|
||||||
|
return rr, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewBinlogRecordWriter(ctx context.Context, collectionID, partitionID, segmentID UniqueID,
|
func NewBinlogRecordWriter(ctx context.Context, collectionID, partitionID, segmentID UniqueID,
|
||||||
|
|||||||
@ -44,6 +44,7 @@ type Record interface {
|
|||||||
|
|
||||||
type RecordReader interface {
|
type RecordReader interface {
|
||||||
Next() (Record, error)
|
Next() (Record, error)
|
||||||
|
SetNeededFields(fields typeutil.Set[int64])
|
||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -151,6 +151,23 @@ func (crr *CompositeBinlogRecordReader) Next() (Record, error) {
|
|||||||
return r, nil
|
return r, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (crr *CompositeBinlogRecordReader) SetNeededFields(neededFields typeutil.Set[int64]) {
|
||||||
|
if neededFields == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
crr.schema = &schemapb.CollectionSchema{
|
||||||
|
Fields: lo.Filter(crr.schema.GetFields(), func(field *schemapb.FieldSchema, _ int) bool {
|
||||||
|
return neededFields.Contain(field.GetFieldID())
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
index := make(map[FieldID]int16)
|
||||||
|
for i, f := range crr.schema.Fields {
|
||||||
|
index[f.FieldID] = int16(i)
|
||||||
|
}
|
||||||
|
crr.index = index
|
||||||
|
}
|
||||||
|
|
||||||
func (crr *CompositeBinlogRecordReader) Close() error {
|
func (crr *CompositeBinlogRecordReader) Close() error {
|
||||||
if crr.brs != nil {
|
if crr.brs != nil {
|
||||||
for _, er := range crr.brs {
|
for _, er := range crr.brs {
|
||||||
@ -1042,6 +1059,10 @@ func (crr *simpleArrowRecordReader) Next() (Record, error) {
|
|||||||
return &crr.r, nil
|
return &crr.r, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (crr *simpleArrowRecordReader) SetNeededFields(_ typeutil.Set[int64]) {
|
||||||
|
// no-op for simple arrow record reader
|
||||||
|
}
|
||||||
|
|
||||||
func (crr *simpleArrowRecordReader) Close() error {
|
func (crr *simpleArrowRecordReader) Close() error {
|
||||||
if crr.closer != nil {
|
if crr.closer != nil {
|
||||||
crr.closer()
|
crr.closer()
|
||||||
|
|||||||
@ -95,6 +95,11 @@ func (pr *packedRecordReader) Next() (Record, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pr *packedRecordReader) SetNeededFields(fields typeutil.Set[int64]) {
|
||||||
|
// TODO, push down SetNeededFields to packedReader after implemented
|
||||||
|
// no-op for now
|
||||||
|
}
|
||||||
|
|
||||||
func (pr *packedRecordReader) Close() error {
|
func (pr *packedRecordReader) Close() error {
|
||||||
if pr.reader != nil {
|
if pr.reader != nil {
|
||||||
return pr.reader.Close()
|
return pr.reader.Close()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user