mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 09:38:39 +08:00
enhance: Deep copy arraw array (#43724)
Deep copy arrow array and make a new RecordBatch with the copied array. issue: https://github.com/milvus-io/milvus/issues/43310 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
f14c7d598c
commit
cb7be8885d
@ -15,16 +15,57 @@
|
|||||||
#include "segcore/column_groups_c.h"
|
#include "segcore/column_groups_c.h"
|
||||||
#include "segcore/packed_writer_c.h"
|
#include "segcore/packed_writer_c.h"
|
||||||
#include "milvus-storage/packed/writer.h"
|
#include "milvus-storage/packed/writer.h"
|
||||||
#include "milvus-storage/common/log.h"
|
|
||||||
#include "milvus-storage/common/config.h"
|
#include "milvus-storage/common/config.h"
|
||||||
#include "milvus-storage/filesystem/fs.h"
|
#include "milvus-storage/filesystem/fs.h"
|
||||||
|
|
||||||
#include <arrow/c/bridge.h>
|
#include <arrow/c/bridge.h>
|
||||||
#include <arrow/filesystem/filesystem.h>
|
#include <arrow/filesystem/filesystem.h>
|
||||||
|
#include <arrow/array.h>
|
||||||
|
#include <arrow/record_batch.h>
|
||||||
|
#include <arrow/memory_pool.h>
|
||||||
|
#include <arrow/device.h>
|
||||||
|
#include <cstring>
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
#include "common/type_c.h"
|
#include "common/type_c.h"
|
||||||
#include "monitor/scope_metric.h"
|
#include "monitor/scope_metric.h"
|
||||||
|
|
||||||
|
// Deep copy ArrowArray and return a copied RecordBatch
|
||||||
|
// This function creates a complete deep copy of the ArrowArray and returns it as a RecordBatch
|
||||||
|
static std::shared_ptr<arrow::RecordBatch>
|
||||||
|
ArrowArrayDeepCopyToRecordBatch(const struct ArrowArray* src,
|
||||||
|
const struct ArrowSchema* schema) {
|
||||||
|
AssertInfo(src != nullptr, "[StorageV2] Source ArrowArray is null");
|
||||||
|
AssertInfo(schema != nullptr, "[StorageV2] Source ArrowSchema is null");
|
||||||
|
|
||||||
|
auto record_batch =
|
||||||
|
arrow::ImportRecordBatch(const_cast<struct ArrowArray*>(src),
|
||||||
|
const_cast<struct ArrowSchema*>(schema))
|
||||||
|
.ValueOrDie();
|
||||||
|
|
||||||
|
// Get the default CPU memory manager for deep copy
|
||||||
|
auto memory_manager = arrow::default_cpu_memory_manager();
|
||||||
|
|
||||||
|
// For true deep copy, we'll use Arrow's CopyTo() function
|
||||||
|
std::vector<std::shared_ptr<arrow::Array>> copied_arrays;
|
||||||
|
for (int i = 0; i < record_batch->num_columns(); i++) {
|
||||||
|
auto original_array = record_batch->column(i);
|
||||||
|
|
||||||
|
auto copied_data_result =
|
||||||
|
original_array->data()->CopyTo(memory_manager);
|
||||||
|
AssertInfo(copied_data_result.ok(),
|
||||||
|
"[StorageV2] Failed to deep copy array data: {}",
|
||||||
|
copied_data_result.status().ToString());
|
||||||
|
|
||||||
|
auto copied_data = copied_data_result.ValueOrDie();
|
||||||
|
auto copied_array = arrow::MakeArray(copied_data);
|
||||||
|
copied_arrays.push_back(copied_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create and return a new RecordBatch with the copied arrays
|
||||||
|
return arrow::RecordBatch::Make(
|
||||||
|
record_batch->schema(), record_batch->num_rows(), copied_arrays);
|
||||||
|
}
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
NewPackedWriterWithStorageConfig(struct ArrowSchema* schema,
|
NewPackedWriterWithStorageConfig(struct ArrowSchema* schema,
|
||||||
const int64_t buffer_size,
|
const int64_t buffer_size,
|
||||||
@ -141,8 +182,13 @@ WriteRecordBatch(CPackedWriter c_packed_writer,
|
|||||||
auto packed_writer =
|
auto packed_writer =
|
||||||
static_cast<milvus_storage::PackedRecordBatchWriter*>(
|
static_cast<milvus_storage::PackedRecordBatchWriter*>(
|
||||||
c_packed_writer);
|
c_packed_writer);
|
||||||
auto record_batch =
|
|
||||||
arrow::ImportRecordBatch(array, schema).ValueOrDie();
|
// Deep copy the ArrowArray and get a copied RecordBatch
|
||||||
|
auto record_batch = ArrowArrayDeepCopyToRecordBatch(array, schema);
|
||||||
|
if (record_batch == nullptr) {
|
||||||
|
return milvus::FailureCStatus(milvus::ErrorCode::FileWriteFailed,
|
||||||
|
"Failed to copy ArrowArray");
|
||||||
|
}
|
||||||
auto status = packed_writer->Write(record_batch);
|
auto status = packed_writer->Write(record_batch);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
return milvus::FailureCStatus(milvus::ErrorCode::FileWriteFailed,
|
return milvus::FailureCStatus(milvus::ErrorCode::FileWriteFailed,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user