fix: Fix import reader goroutine leak (#41869)

Close the chunk manager's reader after the import completes to prevent
goroutine leaks.

issues: https://github.com/milvus-io/milvus/issues/41868

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
yihao.dai 2025-05-16 10:18:35 +08:00 committed by GitHub
parent ae43230703
commit 6c1a37fca1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 32 additions and 17 deletions

View File

@ -151,7 +151,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport() {
cm := mocks.NewChunkManager(s.T())
ioReader := strings.NewReader(string(bytes))
cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil)
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil)
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader, Closer: io.NopCloser(ioReader)}, nil)
s.cm = cm
preimportReq := &datapb.PreImportRequest{
@ -205,7 +205,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport_Failed() {
}
ioReader := strings.NewReader(string(bytes))
cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil)
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil)
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader, Closer: io.NopCloser(ioReader)}, nil)
s.cm = cm
preimportReq := &datapb.PreImportRequest{
@ -244,7 +244,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import() {
cm := mocks.NewChunkManager(s.T())
ioReader := strings.NewReader(string(bytes))
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil)
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader, Closer: io.NopCloser(ioReader)}, nil)
s.cm = cm
s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task, callbacks ...func(error) error) (*conc.Future[struct{}], error) {
@ -305,7 +305,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import_Failed() {
cm := mocks.NewChunkManager(s.T())
ioReader := strings.NewReader(string(bytes))
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil)
cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader, Closer: io.NopCloser(ioReader)}, nil)
s.cm = cm
s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task, callbacks ...func(error) error) (*conc.Future[struct{}], error) {

View File

@ -126,6 +126,7 @@ func (r *reader) readDelete(deltaLogs []string, tsStart, tsEnd uint64) (map[any]
// no need to read nulls in DeleteEventType
rowsSet, _, err := readData(reader, storage.DeleteEventType)
if err != nil {
reader.Close()
return nil, err
}
for _, rows := range rowsSet {
@ -133,6 +134,7 @@ func (r *reader) readDelete(deltaLogs []string, tsStart, tsEnd uint64) (map[any]
dl := &storage.DeleteLog{}
err = dl.Parse(row)
if err != nil {
reader.Close()
return nil, err
}
if dl.Ts >= tsStart && dl.Ts <= tsEnd {
@ -143,6 +145,7 @@ func (r *reader) readDelete(deltaLogs []string, tsStart, tsEnd uint64) (map[any]
}
}
}
reader.Close()
}
return deleteData, nil
}

View File

@ -37,6 +37,7 @@ type Row = map[storage.FieldID]any
type reader struct {
ctx context.Context
cm storage.ChunkManager
cmr storage.FileReader
schema *schemapb.CollectionSchema
cr *csv.Reader
@ -74,6 +75,7 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
return &reader{
ctx: ctx,
cm: cm,
cmr: cmReader,
schema: schema,
cr: csvReader,
parser: rowParser,
@ -120,7 +122,11 @@ func (r *reader) Read() (*storage.InsertData, error) {
return insertData, nil
}
func (r *reader) Close() {}
func (r *reader) Close() {
if r.cmr != nil {
r.cmr.Close()
}
}
func (r *reader) Size() (int64, error) {
if size := r.fileSize.Load(); size != 0 {

View File

@ -40,6 +40,7 @@ type Row = map[storage.FieldID]any
type reader struct {
ctx context.Context
cm storage.ChunkManager
cmr storage.FileReader
schema *schemapb.CollectionSchema
fileSize *atomic.Int64
@ -65,6 +66,7 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
reader := &reader{
ctx: ctx,
cm: cm,
cmr: r,
schema: schema,
fileSize: atomic.NewInt64(0),
filePath: path,
@ -180,4 +182,8 @@ func (j *reader) Size() (int64, error) {
return size, nil
}
func (j *reader) Close() {}
func (j *reader) Close() {
if j.cmr != nil {
j.cmr.Close()
}
}

View File

@ -248,8 +248,6 @@ func (c *FieldReader) Next(count int64) (any, error) {
return data, nil
}
func (c *FieldReader) Close() {}
// setByteOrder sets BigEndian/LittleEndian, the logic of this method is copied from npyio lib
func (c *FieldReader) setByteOrder() {
var nativeEndian binary.ByteOrder

View File

@ -35,6 +35,7 @@ import (
type reader struct {
ctx context.Context
cm storage.ChunkManager
cmrs map[int64]storage.FileReader
schema *schemapb.CollectionSchema
fileSize *atomic.Int64
@ -72,6 +73,7 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
return &reader{
ctx: ctx,
cm: cm,
cmrs: readers,
schema: schema,
fileSize: atomic.NewInt64(0),
paths: paths,
@ -119,13 +121,13 @@ func (r *reader) Size() (int64, error) {
}
func (r *reader) Close() {
for _, cr := range r.frs {
cr.Close()
for _, cmr := range r.cmrs {
cmr.Close()
}
}
func CreateReaders(ctx context.Context, cm storage.ChunkManager, schema *schemapb.CollectionSchema, paths []string) (map[int64]io.Reader, error) {
readers := make(map[int64]io.Reader)
func CreateReaders(ctx context.Context, cm storage.ChunkManager, schema *schemapb.CollectionSchema, paths []string) (map[int64]storage.FileReader, error) {
readers := make(map[int64]storage.FileReader)
nameToPath := lo.SliceToMap(paths, func(path string) (string, string) {
nameWithExt := filepath.Base(path)
name := strings.TrimSuffix(nameWithExt, filepath.Ext(nameWithExt))

View File

@ -212,8 +212,6 @@ func (c *FieldReader) Next(count int64) (any, any, error) {
}
}
func (c *FieldReader) Close() {}
func ReadBoolData(pcr *FieldReader, count int64) (any, error) {
chunked, err := pcr.columnReader.NextBatch(count)
if err != nil {

View File

@ -38,6 +38,7 @@ import (
type reader struct {
ctx context.Context
cm storage.ChunkManager
cmr storage.FileReader
schema *schemapb.CollectionSchema
path string
@ -81,6 +82,7 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
return &reader{
ctx: ctx,
cm: cm,
cmr: cmReader,
schema: schema,
fileSize: atomic.NewInt64(0),
path: path,
@ -140,11 +142,11 @@ func (r *reader) Size() (int64, error) {
}
func (r *reader) Close() {
for _, cr := range r.frs {
cr.Close()
}
err := r.r.Close()
if err != nil {
log.Warn("close parquet reader failed", zap.Error(err))
}
if r.cmr != nil {
r.cmr.Close()
}
}