fix: [StorageV2] Use correct offset filling null bitmap (#42774)

Related to #39173

`null_bitmap_data()` returns raw pointer of null bitmap of Array. While
after slicing, this bitmap is not rewritten due to zero copy
implementation, so the current start pos maybe non-zero while
FillFieldData generating column `valid_data` array.

This PR add `offset` param for `FillFieldData` method, and force all
invocation pass correct offset of `null_bitmap_data` ptr.

Also update milvus-storage commit fixing reader failed to return data
when buffer size smaller than row group size problem.

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-06-17 10:08:38 +08:00 committed by GitHub
parent 9653ec8d8c
commit f01ff57f3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 115 additions and 73 deletions

View File

@ -53,7 +53,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(const void* source,
template <typename Type, bool is_type_entire_row>
void
FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
const void* field_data, const uint8_t* valid_data, ssize_t element_count) {
const void* field_data,
const uint8_t* valid_data,
ssize_t element_count,
ssize_t offset) {
AssertInfo(
nullable_,
"no need to fill valid_data, use the 2-argument version instead");
@ -73,7 +76,7 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
// means null_count == 0, will fill it with 0xFF
if (valid_data != nullptr) {
bitset::detail::ElementWiseBitsetPolicy<uint8_t>::op_copy(
valid_data, 0, valid_data_.data(), length_, element_count);
valid_data, offset, valid_data_.data(), length_, element_count);
}
length_ += element_count;
@ -129,7 +132,8 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
if (nullable_) {
return FillFieldData(values.data(),
bool_array->null_bitmap_data(),
element_count);
element_count,
bool_array->offset());
}
return FillFieldData(values.data(), element_count);
}
@ -138,8 +142,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
GetDataInfoFromArray<arrow::Int8Array, arrow::Type::type::INT8>(
array);
if (nullable_) {
return FillFieldData(
array_info.first, array->null_bitmap_data(), element_count);
return FillFieldData(array_info.first,
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(array_info.first, array_info.second);
}
@ -148,8 +154,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
GetDataInfoFromArray<arrow::Int16Array,
arrow::Type::type::INT16>(array);
if (nullable_) {
return FillFieldData(
array_info.first, array->null_bitmap_data(), element_count);
return FillFieldData(array_info.first,
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(array_info.first, array_info.second);
}
@ -158,8 +166,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
GetDataInfoFromArray<arrow::Int32Array,
arrow::Type::type::INT32>(array);
if (nullable_) {
return FillFieldData(
array_info.first, array->null_bitmap_data(), element_count);
return FillFieldData(array_info.first,
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(array_info.first, array_info.second);
}
@ -168,8 +178,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
GetDataInfoFromArray<arrow::Int64Array,
arrow::Type::type::INT64>(array);
if (nullable_) {
return FillFieldData(
array_info.first, array->null_bitmap_data(), element_count);
return FillFieldData(array_info.first,
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(array_info.first, array_info.second);
}
@ -178,8 +190,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
GetDataInfoFromArray<arrow::FloatArray,
arrow::Type::type::FLOAT>(array);
if (nullable_) {
return FillFieldData(
array_info.first, array->null_bitmap_data(), element_count);
return FillFieldData(array_info.first,
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(array_info.first, array_info.second);
}
@ -188,8 +202,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
GetDataInfoFromArray<arrow::DoubleArray,
arrow::Type::type::DOUBLE>(array);
if (nullable_) {
return FillFieldData(
array_info.first, array->null_bitmap_data(), element_count);
return FillFieldData(array_info.first,
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(array_info.first, array_info.second);
}
@ -205,8 +221,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
values[index] = string_array->GetString(index);
}
if (nullable_) {
return FillFieldData(
values.data(), array->null_bitmap_data(), element_count);
return FillFieldData(values.data(),
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(values.data(), element_count);
}
@ -223,8 +241,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
Json(simdjson::padded_string(json_array->GetString(index)));
}
if (nullable_) {
return FillFieldData(
values.data(), array->null_bitmap_data(), element_count);
return FillFieldData(values.data(),
array->null_bitmap_data(),
element_count,
array->offset());
}
return FillFieldData(values.data(), element_count);
}
@ -245,8 +265,10 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
values[index] = Array(field_data);
}
if (nullable_) {
return FillFieldData(
values.data(), array->null_bitmap_data(), element_count);
return FillFieldData(values.data(),
array->null_bitmap_data(),
element_count,
array->offset());
}
AssertInfo(null_number == 0, "get empty string when not nullable");
return FillFieldData(values.data(), element_count);
@ -322,70 +344,70 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->bool_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::INT8: {
FixedVector<int8_t> values(element_count);
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->int_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::INT16: {
FixedVector<int16_t> values(element_count);
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->int_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::INT32: {
FixedVector<int32_t> values(element_count);
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->int_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::INT64: {
FixedVector<int64_t> values(element_count);
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->long_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::FLOAT: {
FixedVector<float> values(element_count);
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->float_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::DOUBLE: {
FixedVector<double> values(element_count);
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->double_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::STRING:
case DataType::VARCHAR: {
@ -393,23 +415,23 @@ FieldDataImpl<Type, is_type_entire_row>::FillFieldData(
if (default_value.has_value()) {
std::fill(
values.begin(), values.end(), default_value->string_data());
return FillFieldData(values.data(), nullptr, element_count);
return FillFieldData(values.data(), nullptr, element_count, 0);
}
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::JSON: {
// The code here is not referenced.
// A subclass named FieldDataJsonImpl is implemented, which overloads this function.
FixedVector<Json> values(element_count);
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
case DataType::ARRAY: {
// todo: add array default_value
FixedVector<Array> values(element_count);
return FillFieldData(
values.data(), valid_data_ptr.get(), element_count);
values.data(), valid_data_ptr.get(), element_count, 0);
}
default: {
PanicInfo(DataTypeInvalid,

View File

@ -25,6 +25,7 @@
#include <mutex>
#include <shared_mutex>
#include "log/Log.h"
#include "Types.h"
#include "arrow/api.h"
#include "arrow/array/array_binary.h"
@ -57,7 +58,8 @@ class FieldDataBase {
virtual void
FillFieldData(const void* field_data,
const uint8_t* valid_data,
ssize_t element_count) = 0;
ssize_t element_count,
ssize_t offset) = 0;
virtual void
FillFieldData(const std::shared_ptr<arrow::ChunkedArray> arrays) = 0;
@ -164,7 +166,8 @@ class FieldBitsetImpl : public FieldDataBase {
void
FillFieldData(const void* field_data,
const uint8_t* valid_data,
ssize_t element_count) override {
ssize_t element_count,
ssize_t offset) override {
PanicInfo(NotImplemented,
"FillFieldData(const void* field_data, "
"const uint8_t* valid_data, ssize_t element_count)"
@ -378,7 +381,8 @@ class FieldDataImpl : public FieldDataBase {
void
FillFieldData(const void* field_data,
const uint8_t* valid_data,
ssize_t element_count) override;
ssize_t element_count,
ssize_t offset) override;
void
FillFieldData(const std::shared_ptr<arrow::ChunkedArray> arrays) override;
@ -598,7 +602,11 @@ class FieldDataStringImpl : public FieldDataImpl<std::string, true> {
auto valid_data = array->null_bitmap_data();
if (valid_data != nullptr) {
bitset::detail::ElementWiseBitsetPolicy<uint8_t>::op_copy(
valid_data, 0, valid_data_.data(), length_, n);
valid_data,
array->offset(),
valid_data_.data(),
length_,
n);
}
}
length_ += n;
@ -689,7 +697,11 @@ class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
auto valid_data = array->null_bitmap_data();
if (valid_data != nullptr) {
bitset::detail::ElementWiseBitsetPolicy<uint8_t>::op_copy(
valid_data, 0, valid_data_.data(), length_, n);
valid_data,
array->offset(),
valid_data_.data(),
length_,
n);
}
}
length_ += n;

View File

@ -1,4 +1,3 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@ -1006,7 +1005,7 @@ MergeFieldData(std::vector<FieldDataPtr>& data_array) {
for (const auto& data : data_array) {
if (merged_data->IsNullable()) {
merged_data->FillFieldData(
data->Data(), data->ValidData(), data->Length());
data->Data(), data->ValidData(), data->Length(), 0);
} else {
merged_data->FillFieldData(data->Data(), data->Length());
}

View File

@ -14,7 +14,7 @@
# Update milvus-storage_VERSION for the first occurrence
milvus_add_pkg_config("milvus-storage")
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
set( milvus-storage_VERSION 1238e21 )
set( milvus-storage_VERSION fa304fa )
set( GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git")
message(STATUS "milvus-storage repo: ${GIT_REPOSITORY}")
message(STATUS "milvus-storage version: ${milvus-storage_VERSION}")

View File

@ -201,7 +201,7 @@ class ArrayBitmapIndexTest : public testing::Test {
ptr[byteIndex] &= ~(1 << bitIndex);
}
}
field_data->FillFieldData(data_.data(), ptr, data_.size());
field_data->FillFieldData(data_.data(), ptr, data_.size(), 0);
delete[] ptr;
} else {
field_data->FillFieldData(data_.data(), data_.size());

View File

@ -132,7 +132,7 @@ class BitmapIndexTest : public testing::Test {
ptr[byteIndex] &= ~(1 << bitIndex);
}
}
field_data->FillFieldData(data_.data(), ptr, data_.size());
field_data->FillFieldData(data_.data(), ptr, data_.size(), 0);
delete[] ptr;
} else {
field_data->FillFieldData(data_.data(), data_.size());

View File

@ -125,7 +125,7 @@ TEST(chunk, test_variable_field_nullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::VARCHAR, true);
uint8_t* valid_data = new uint8_t[1]{0x15}; // 10101 in binary
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
delete[] valid_data;
storage::InsertEventData event_data;
@ -290,7 +290,7 @@ TEST(chunk, test_null_int64) {
// Set up validity bitmap: 10011 (1st, 4th, and 5th are valid)
uint8_t* valid_data = new uint8_t[1]{0x13}; // 10011 in binary
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
delete[] valid_data;
storage::InsertEventData event_data;
@ -413,7 +413,7 @@ TEST(chunk, test_null_array) {
// Set up validity bitmap: 10101 (1st, 3rd, and 5th are valid)
uint8_t* valid_data = new uint8_t[1]{0x15}; // 10101 in binary
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
delete[] valid_data;
storage::InsertEventData event_data;

View File

@ -66,7 +66,7 @@ TEST(storage, InsertDataBoolNullable) {
milvus::storage::CreateFieldData(storage::DataType::BOOL, true);
uint8_t* valid_data = new uint8_t[1]{0xF3};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -132,7 +132,7 @@ TEST(storage, InsertDataInt8Nullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::INT8, true);
uint8_t* valid_data = new uint8_t[1]{0xF3};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -196,7 +196,7 @@ TEST(storage, InsertDataInt16Nullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::INT16, true);
uint8_t* valid_data = new uint8_t[1]{0xF3};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -260,7 +260,7 @@ TEST(storage, InsertDataInt32Nullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::INT32, true);
uint8_t* valid_data = new uint8_t[1]{0xF3};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -324,7 +324,7 @@ TEST(storage, InsertDataInt64Nullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::INT64, true);
uint8_t* valid_data = new uint8_t[1]{0xF3};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -394,7 +394,7 @@ TEST(storage, InsertDataStringNullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::STRING, true);
uint8_t* valid_data = new uint8_t[1]{0xF3};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -461,7 +461,7 @@ TEST(storage, InsertDataFloatNullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::FLOAT, true);
std::array<uint8_t, 1> valid_data = {0xF3};
field_data->FillFieldData(data.data(), valid_data.data(), data.size());
field_data->FillFieldData(data.data(), valid_data.data(), data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -524,7 +524,7 @@ TEST(storage, InsertDataDoubleNullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::DOUBLE, true);
uint8_t* valid_data = new uint8_t[1]{0xF3};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -775,7 +775,7 @@ TEST(storage, InsertDataStringArrayNullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::ARRAY, true);
uint8_t* valid_data = new uint8_t[1]{0xFD};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);
@ -813,7 +813,7 @@ TEST(storage, InsertDataJsonNullable) {
auto field_data =
milvus::storage::CreateFieldData(storage::DataType::JSON, true);
uint8_t* valid_data = new uint8_t[1]{0xFC};
field_data->FillFieldData(data.data(), valid_data, data.size());
field_data->FillFieldData(data.data(), valid_data, data.size(), 0);
auto payload_reader =
std::make_shared<milvus::storage::PayloadReader>(field_data);

View File

@ -130,7 +130,7 @@ class HybridIndexTestV1 : public testing::Test {
ptr[byteIndex] &= ~(1 << bitIndex);
}
}
field_data->FillFieldData(data_.data(), ptr, data_.size());
field_data->FillFieldData(data_.data(), ptr, data_.size(), 0);
delete[] ptr;
} else {
field_data->FillFieldData(data_.data(), data_.size());

View File

@ -132,7 +132,7 @@ test_run() {
valid_data_[byteIndex] &= ~(1 << bitIndex);
}
}
field_data->FillFieldData(data.data(), valid_data_, data.size());
field_data->FillFieldData(data.data(), valid_data_, data.size(), 0);
delete[] valid_data_;
} else {
field_data->FillFieldData(data.data(), data.size());
@ -525,7 +525,7 @@ test_string() {
valid_data_[byteIndex] &= ~(1 << bitIndex);
}
}
field_data->FillFieldData(data.data(), valid_data_, data.size());
field_data->FillFieldData(data.data(), valid_data_, data.size(), 0);
delete[] valid_data_;
} else {
field_data->FillFieldData(data.data(), data.size());

View File

@ -85,7 +85,8 @@ class JsonKeyStatsIndexTest : public ::testing::TestWithParam<bool> {
valid_data_[byteIndex] &= ~(1 << bitIndex);
}
}
field_data->FillFieldData(data_.data(), valid_data_, data_.size());
field_data->FillFieldData(
data_.data(), valid_data_, data_.size(), 0);
delete[] valid_data_;
} else {
field_data->FillFieldData(data_.data(), data_.size());

View File

@ -1771,7 +1771,7 @@ TEST(Sealed, SkipIndexSkipUnaryRangeNullable) {
auto int64s_field_data =
storage::CreateFieldData(DataType::INT64, true, 1, 5);
int64s_field_data->FillFieldData(int64s.data(), valid_data.data(), 5);
int64s_field_data->FillFieldData(int64s.data(), valid_data.data(), 5, 0);
auto load_info = PrepareSingleFieldInsertBinlog(kCollectionID,
kPartitionID,
kSegmentID,
@ -1842,7 +1842,7 @@ TEST(Sealed, SkipIndexSkipBinaryRangeNullable) {
auto int64s_field_data =
storage::CreateFieldData(DataType::INT64, true, 1, 5);
int64s_field_data->FillFieldData(int64s.data(), valid_data.data(), 5);
int64s_field_data->FillFieldData(int64s.data(), valid_data.data(), 5, 0);
auto load_info = PrepareSingleFieldInsertBinlog(kCollectionID,
kPartitionID,
kSegmentID,

View File

@ -1128,7 +1128,7 @@ CreateFieldDataFromDataArray(ssize_t raw_count,
valid_data[byteIndex] &= ~(1 << bitIndex);
}
}
field_data->FillFieldData(raw_data, valid_data.data(), raw_count);
field_data->FillFieldData(raw_data, valid_data.data(), raw_count, 0);
};
if (field_meta.is_vector()) {

View File

@ -58,13 +58,19 @@ type rwOptions struct {
type RwOption func(*rwOptions)
func DefaultRwOptions() *rwOptions {
func DefaultWriterOptions() *rwOptions {
return &rwOptions{
bufferSize: packed.DefaultWriteBufferSize,
multiPartUploadSize: packed.DefaultMultiPartUploadSize,
}
}
func DefaultReaderOptions() *rwOptions {
return &rwOptions{
bufferSize: packed.DefaultReadBufferSize,
}
}
func WithVersion(version int64) RwOption {
return func(options *rwOptions) {
options.version = version
@ -192,7 +198,7 @@ func makeBlobsReader(ctx context.Context, binlogs []*datapb.FieldBinlog, downloa
}
func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, schema *schemapb.CollectionSchema, option ...RwOption) (RecordReader, error) {
rwOptions := DefaultRwOptions()
rwOptions := DefaultReaderOptions()
for _, opt := range option {
opt(rwOptions)
}
@ -229,7 +235,7 @@ func NewBinlogRecordWriter(ctx context.Context, collectionID, partitionID, segme
schema *schemapb.CollectionSchema, allocator allocator.Interface, chunkSize uint64, bucketName, rootPath string, maxRowNum int64,
option ...RwOption,
) (BinlogRecordWriter, error) {
rwOptions := DefaultRwOptions()
rwOptions := DefaultWriterOptions()
for _, opt := range option {
opt(rwOptions)
}

View File

@ -19,6 +19,8 @@ const (
ColumnGroupSizeThreshold = 1024 // 1KB
// DefaultBufferSize is the default buffer size for writing data to storage.
DefaultWriteBufferSize = 32 * 1024 * 1024 // 32MB
// DefaultBufferSize is the default buffer size for reading data from storage.
DefaultReadBufferSize = -1 // use -1 for unlimited
// DefaultMultiPartUploadSize is the default size of each part of a multipart upload.
DefaultMultiPartUploadSize = 10 * 1024 * 1024 // 10MB
// Arrow will convert these field IDs to a metadata key named PARQUET:field_id on the appropriate field.

View File

@ -132,7 +132,7 @@ func (suite *PackedTestSuite) TestPackedMultiFiles() {
defer rec.Release()
paths := []string{"/tmp/100", "/tmp/101"}
columnGroups := []storagecommon.ColumnGroup{{Columns: []int{2}}, {Columns: []int{0, 1}}}
bufferSize := int64(10 * 1024 * 1024) // 10MB
bufferSize := int64(-1) // unlimited
multiPartUploadSize := int64(0)
pw, err := NewPackedWriter(paths, suite.schema, bufferSize, multiPartUploadSize, columnGroups, nil)
suite.NoError(err)