From 6b4d977a10c849fa0bf8f3bb0bc006f95c3c6f0e Mon Sep 17 00:00:00 2001 From: congqixia Date: Fri, 5 Jul 2024 17:44:09 +0800 Subject: [PATCH] fix: Write padding into mmap file in case of SIGBUS (#34443) See also #34442 Signed-off-by: Congqi Xia --- internal/core/src/mmap/Column.h | 8 +++++-- internal/core/src/mmap/Utils.h | 39 +++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index 5aba7b08ca..64e46b7cfc 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -129,7 +129,9 @@ class ColumnBase { size_ = size; cap_size_ = size; - size_t mapped_size = cap_size_ + padding_; + // use exactly same size of file, padding shall be written in file already + // see also https://github.com/milvus-io/milvus/issues/34442 + size_t mapped_size = cap_size_; data_ = static_cast(mmap( nullptr, mapped_size, PROT_READ, MAP_SHARED, file.Descriptor(), 0)); AssertInfo(data_ != MAP_FAILED, @@ -156,7 +158,9 @@ class ColumnBase { mapping_type_(MappingType::MAP_WITH_FILE) { SetPaddingSize(data_type); - size_t mapped_size = cap_size_ + padding_; + // use exact same size of file, padding shall be written in file already + // see also https://github.com/milvus-io/milvus/issues/34442 + size_t mapped_size = cap_size_; data_ = static_cast(mmap( nullptr, mapped_size, PROT_READ, MAP_SHARED, file.Descriptor(), 0)); AssertInfo(data_ != MAP_FAILED, diff --git a/internal/core/src/mmap/Utils.h b/internal/core/src/mmap/Utils.h index f4a8a0811c..e67582da2e 100644 --- a/internal/core/src/mmap/Utils.h +++ b/internal/core/src/mmap/Utils.h @@ -39,6 +39,33 @@ namespace milvus { file.Path(), \ strerror(errno))); +/* +* If string field's value all empty, need a string padding to avoid +* mmap failing because size_ is zero which causing invalid arguement +* array has the same problem +* TODO: remove it when support NULL value +*/ +constexpr size_t FILE_STRING_PADDING = 1; +constexpr size_t FILE_ARRAY_PADDING = 1; + +inline size_t +PaddingSize(const DataType& type) { + switch (type) { + case DataType::JSON: + // simdjson requires a padding following the json data + return simdjson::SIMDJSON_PADDING; + case DataType::VARCHAR: + case DataType::STRING: + return FILE_STRING_PADDING; + break; + case DataType::ARRAY: + return FILE_ARRAY_PADDING; + default: + break; + } + return 0; +} + inline void WriteFieldData(File& file, DataType data_type, @@ -128,5 +155,17 @@ WriteFieldData(File& file, total_written += data->Size(i); } } + + // write padding 0 in file content directly + // see also https://github.com/milvus-io/milvus/issues/34442 + auto padding_size = PaddingSize(data_type); + if (padding_size > 0 ) { + std::vector padding(padding_size, 0); + ssize_t written = file.Write(padding.data(), padding_size); + if (written < padding_size) { + THROW_FILE_WRITE_ERROR + } + total_written += written; + } } } // namespace milvus