diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index 243fc1cd6d..49302770fc 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -135,6 +135,7 @@ class ColumnBase { // mmap mode ctor // User must call Seal to build the view for variable length column. + // !!! The incoming file must be write padings at the end of the file. ColumnBase(const File& file, size_t size, const FieldMeta& field_meta) : mapping_type_(MappingType::MAP_WITH_FILE) { auto data_type = field_meta.get_data_type(); @@ -143,18 +144,21 @@ class ColumnBase { type_size_ = field_meta.get_sizeof(); num_rows_ = size / type_size_; } + AssertInfo(size >= padding_, + "file size {} is less than padding size {}", + size, + padding_); data_size_ = size; - data_cap_size_ = size; + data_cap_size_ = size - padding_; // use exactly same size of file, padding shall be written in file already // see also https://github.com/milvus-io/milvus/issues/34442 - size_t mapped_size = data_cap_size_; - data_ = static_cast(mmap( - nullptr, mapped_size, PROT_READ, MAP_SHARED, file.Descriptor(), 0)); + data_ = static_cast( + mmap(nullptr, size, PROT_READ, MAP_SHARED, file.Descriptor(), 0)); AssertInfo(data_ != MAP_FAILED, "failed to create file-backed map, err: {}", strerror(errno)); - madvise(data_, mapped_size, MADV_WILLNEED); + madvise(data_, size, MADV_WILLNEED); // valid_data store in memory if (field_meta.is_nullable()) { @@ -162,31 +166,37 @@ class ColumnBase { valid_data_.reserve(num_rows_); } - UpdateMetricWhenMmap(mapped_size); + UpdateMetricWhenMmap(size); } // mmap mode ctor // User must call Seal to build the view for variable length column. + // !!! The incoming file must be write padings at the end of the file. ColumnBase(const File& file, size_t size, int dim, const DataType& data_type, bool nullable) : data_size_(size), - data_cap_size_(size), nullable_(nullable), mapping_type_(MappingType::MAP_WITH_FILE) { SetPaddingSize(data_type); // use exact same size of file, padding shall be written in file already // see also https://github.com/milvus-io/milvus/issues/34442 - size_t mapped_size = data_cap_size_; if (!IsVariableDataType(data_type)) { type_size_ = GetDataTypeSize(data_type, dim); num_rows_ = size / type_size_; } - data_ = static_cast(mmap( - nullptr, mapped_size, PROT_READ, MAP_SHARED, file.Descriptor(), 0)); + AssertInfo(size >= padding_, + "file size {} is less than padding size {}", + size, + padding_); + + data_cap_size_ = size - padding_; + + data_ = static_cast( + mmap(nullptr, size, PROT_READ, MAP_SHARED, file.Descriptor(), 0)); AssertInfo(data_ != MAP_FAILED, "failed to create file-backed map, err: {}", strerror(errno)); @@ -195,7 +205,7 @@ class ColumnBase { valid_data_.reserve(num_rows_); } - UpdateMetricWhenMmap(mapped_size); + UpdateMetricWhenMmap(size); } virtual ~ColumnBase() { @@ -351,22 +361,7 @@ class ColumnBase { void SetPaddingSize(const DataType& type) { - switch (type) { - case DataType::JSON: - // simdjson requires a padding following the json data - padding_ = simdjson::SIMDJSON_PADDING; - break; - case DataType::VARCHAR: - case DataType::STRING: - padding_ = STRING_PADDING; - break; - case DataType::ARRAY: - padding_ = ARRAY_PADDING; - break; - default: - padding_ = 0; - break; - } + padding_ = PaddingSize(type); } void