fix: [Storagev2] Close segment readers in mergeSort (#43116)

Related to #43062

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-07-04 23:56:44 +08:00 committed by GitHub
parent 0017fa8acc
commit d09764508a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 40 additions and 13 deletions

View File

@ -87,6 +87,12 @@ func mergeSortMultipleSegments(ctx context.Context,
segmentFilters[i] = compaction.NewEntityFilter(delta, collectionTtl, currentTime) segmentFilters[i] = compaction.NewEntityFilter(delta, collectionTtl, currentTime)
} }
defer func() {
for _, r := range segmentReaders {
r.Close()
}
}()
var predicate func(r storage.Record, ri, i int) bool var predicate func(r storage.Record, ri, i int) bool
switch pkField.DataType { switch pkField.DataType {
case schemapb.DataType_Int64: case schemapb.DataType_Int64:

View File

@ -286,10 +286,20 @@ func (t *mixCompactionTask) writeSegment(ctx context.Context,
rb.Append(r, sliceStart, r.Len()) rb.Append(r, sliceStart, r.Len())
} }
if rb.GetRowNum() > 0 { if rb.GetRowNum() > 0 {
mWriter.Write(rb.Build()) err := func() error {
rec := rb.Build()
defer rec.Release()
return mWriter.Write(rec)
}()
if err != nil {
return 0, 0, err
}
} }
} else { } else {
mWriter.Write(r) err := mWriter.Write(r)
if err != nil {
return 0, 0, err
}
} }
} }

View File

@ -38,9 +38,10 @@ func Sort(batchSize uint64, schema *schemapb.CollectionSchema, rr []RecordReader
} }
indices := make([]*index, 0) indices := make([]*index, 0)
// release cgo records
defer func() { defer func() {
for _, r := range records { for _, rec := range records {
r.Release() rec.Release()
} }
}() }()
@ -68,13 +69,6 @@ func Sort(batchSize uint64, schema *schemapb.CollectionSchema, rr []RecordReader
return 0, nil return 0, nil
} }
// release cgo records
defer func() {
for _, rec := range records {
rec.Release()
}
}()
pkField, err := typeutil.GetPrimaryFieldSchema(schema) pkField, err := typeutil.GetPrimaryFieldSchema(schema)
if err != nil { if err != nil {
return 0, err return 0, err
@ -261,6 +255,14 @@ func MergeSort(batchSize uint64, schema *schemapb.CollectionSchema, rr []RecordR
} }
rb := NewRecordBuilder(schema) rb := NewRecordBuilder(schema)
writeRecord := func() error {
rec := rb.Build()
defer rec.Release()
if rec.Len() > 0 {
return rw.Write(rec)
}
return nil
}
for pq.Len() > 0 { for pq.Len() > 0 {
idx := pq.Dequeue() idx := pq.Dequeue()
@ -269,7 +271,7 @@ func MergeSort(batchSize uint64, schema *schemapb.CollectionSchema, rr []RecordR
// small batch size will cause write performance degradation. To work around this issue, we accumulate // small batch size will cause write performance degradation. To work around this issue, we accumulate
// records and write them in batches. This requires additional memory copy. // records and write them in batches. This requires additional memory copy.
if rb.GetSize() >= batchSize { if rb.GetSize() >= batchSize {
if err := rw.Write(rb.Build()); err != nil { if err := writeRecord(); err != nil {
return 0, err return 0, err
} }
} }
@ -289,7 +291,7 @@ func MergeSort(batchSize uint64, schema *schemapb.CollectionSchema, rr []RecordR
// write the last batch // write the last batch
if rb.GetRowNum() > 0 { if rb.GetRowNum() > 0 {
if err := rw.Write(rb.Build()); err != nil { if err := writeRecord(); err != nil {
return 0, err return 0, err
} }
} }

View File

@ -98,6 +98,10 @@ func NewPackedReader(filePaths []string, schema *arrow.Schema, bufferSize int64,
} }
func (pr *PackedReader) ReadNext() (arrow.Record, error) { func (pr *PackedReader) ReadNext() (arrow.Record, error) {
if pr.currentBatch != nil {
pr.currentBatch.Release()
pr.currentBatch = nil
}
var cArr C.CArrowArray var cArr C.CArrowArray
var cSchema C.CArrowSchema var cSchema C.CArrowSchema
status := C.ReadNext(pr.cPackedReader, &cArr, &cSchema) status := C.ReadNext(pr.cPackedReader, &cArr, &cSchema)
@ -120,6 +124,7 @@ func (pr *PackedReader) ReadNext() (arrow.Record, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to convert ArrowArray to Record: %w", err) return nil, fmt.Errorf("failed to convert ArrowArray to Record: %w", err)
} }
pr.currentBatch = recordBatch
// Return the RecordBatch as an arrow.Record // Return the RecordBatch as an arrow.Record
return recordBatch, nil return recordBatch, nil
@ -129,6 +134,9 @@ func (pr *PackedReader) Close() error {
if pr.cPackedReader == nil { if pr.cPackedReader == nil {
return nil return nil
} }
if pr.currentBatch != nil {
pr.currentBatch.Release()
}
status := C.CloseReader(pr.cPackedReader) status := C.CloseReader(pr.cPackedReader)
if err := ConsumeCStatusIntoError(&status); err != nil { if err := ConsumeCStatusIntoError(&status); err != nil {
return err return err

View File

@ -36,6 +36,7 @@ type PackedReader struct {
cPackedReader C.CPackedReader cPackedReader C.CPackedReader
arr *cdata.CArrowArray arr *cdata.CArrowArray
schema *arrow.Schema schema *arrow.Schema
currentBatch arrow.Record
} }
type ( type (