mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
enhance: Fix parquet import OOM (#43756)
Each ColumnReader consumes ReaderProperties.BufferSize memory independently. Therefore, the bufferSize should be divided by the number of columns to ensure total memory usage stays within the intended limit. issue: https://github.com/milvus-io/milvus/issues/43755 Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
1a712aa13b
commit
ad950368fe
@ -35,7 +35,7 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||||
)
|
)
|
||||||
|
|
||||||
const fileReaderBufferSize = int64(32 * 1024 * 1024)
|
const totalReadBufferSize = int64(64 * 1024 * 1024)
|
||||||
|
|
||||||
type reader struct {
|
type reader struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
@ -58,8 +58,14 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Each ColumnReader consumes ReaderProperties.BufferSize memory independently.
|
||||||
|
// Therefore, the bufferSize should be divided by the number of columns
|
||||||
|
// to ensure total memory usage stays within the intended limit.
|
||||||
|
columnReaderBufferSize := totalReadBufferSize / int64(len(schema.GetFields()))
|
||||||
|
|
||||||
r, err := file.NewParquetReader(cmReader, file.WithReadProps(&parquet.ReaderProperties{
|
r, err := file.NewParquetReader(cmReader, file.WithReadProps(&parquet.ReaderProperties{
|
||||||
BufferSize: fileReaderBufferSize,
|
BufferSize: columnReaderBufferSize,
|
||||||
BufferedStreamEnabled: true,
|
BufferedStreamEnabled: true,
|
||||||
}))
|
}))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user