enhance: Fix parquet import OOM (#43756)

Each ColumnReader consumes ReaderProperties.BufferSize memory
independently. Therefore, the bufferSize should be divided by the number
of columns to ensure total memory usage stays within the intended limit.

issue: https://github.com/milvus-io/milvus/issues/43755

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
yihao.dai 2025-08-08 18:57:40 +08:00 committed by GitHub
parent 1a712aa13b
commit ad950368fe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -35,7 +35,7 @@ import (
"github.com/milvus-io/milvus/pkg/v2/util/merr"
)
const fileReaderBufferSize = int64(32 * 1024 * 1024)
const totalReadBufferSize = int64(64 * 1024 * 1024)
type reader struct {
ctx context.Context
@ -58,8 +58,14 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
if err != nil {
return nil, err
}
// Each ColumnReader consumes ReaderProperties.BufferSize memory independently.
// Therefore, the bufferSize should be divided by the number of columns
// to ensure total memory usage stays within the intended limit.
columnReaderBufferSize := totalReadBufferSize / int64(len(schema.GetFields()))
r, err := file.NewParquetReader(cmReader, file.WithReadProps(&parquet.ReaderProperties{
BufferSize: fileReaderBufferSize,
BufferSize: columnReaderBufferSize,
BufferedStreamEnabled: true,
}))
if err != nil {