This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2c524f16a Align buffers from Python (FFI) (#6472)
2c524f16a is described below
commit 2c524f16a7ed484456700d69dc5ccfb9b3af3058
Author: Enrico Minack <[email protected]>
AuthorDate: Wed Oct 2 14:22:44 2024 +0200
Align buffers from Python (FFI) (#6472)
* Align buffers in RecordBatch.from_pyarrow_bound
* Update arrow/src/pyarrow.rs
* cargo fmt
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow/src/pyarrow.rs | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index b05c967d7..6effe1c03 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -363,13 +363,19 @@ impl FromPyArrow for RecordBatch {
let schema_ptr = unsafe {
schema_capsule.reference::<FFI_ArrowSchema>() };
let ffi_array = unsafe {
FFI_ArrowArray::from_raw(array_capsule.pointer().cast()) };
- let array_data = unsafe { ffi::from_ffi(ffi_array, schema_ptr)
}.map_err(to_py_err)?;
+ let mut array_data =
+ unsafe { ffi::from_ffi(ffi_array, schema_ptr)
}.map_err(to_py_err)?;
if !matches!(array_data.data_type(), DataType::Struct(_)) {
return Err(PyTypeError::new_err(
"Expected Struct type from __arrow_c_array.",
));
}
let options =
RecordBatchOptions::default().with_row_count(Some(array_data.len()));
+ // Ensure data is aligned (by potentially copying the buffers).
+ // This is needed because some python code (for example the
+ // python flight client) produces unaligned buffers
+ // See https://github.com/apache/arrow/issues/43552 for details
+ array_data.align_buffers();
let array = StructArray::from(array_data);
// StructArray does not embed metadata from schema. We need to
override
// the output schema with the schema from the capsule.