This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 9450f1e9cc Call `align_buffers()` in `from_ffi`, remove redundant call 
from `arrow-pyarrow` (#10030)
9450f1e9cc is described below

commit 9450f1e9cc17061049309e98b0cbccca91e7dfb7
Author: Matt Butrovich <[email protected]>
AuthorDate: Fri May 29 15:42:00 2026 -0400

    Call `align_buffers()` in `from_ffi`, remove redundant call from 
`arrow-pyarrow` (#10030)
    
    # Which issue does this PR close?
    
    - Closes #10028.
    
    # Rationale for this change
    
    `from_ffi` / `from_ffi_and_data_type` (and therefore
    `ArrowArrayStreamReader`) panic inside `ScalarBuffer::<i128>::from` when
    an FFI producer hands over a `Decimal128` buffer that is 8-byte aligned
    but not 16-byte aligned. The producer is spec-conformant — the C Data
    Interface only recommends 8-byte alignment — but `align_of::<i128>() ==
    16` since Rust 1.77 on x86 (always on ARM), so arrow-rs's typed arrays
    require 16. JVM producers like arrow-java's `NettyAllocationManager` hit
    this regularly.
    
    The IPC reader already handles this by calling
    `ArrayData::align_buffers()` on import (default of
    `IpcReadOptions::require_alignment`, see #5554), and `arrow-pyarrow` was
    patched the same way for #6471 / apache/arrow#43552. The C Data
    Interface entry points were the missing piece.
    
    # What changes are included in this PR?
    
    - `arrow::ffi::from_ffi` and `from_ffi_and_data_type`: call
    `data.align_buffers()` after `consume()`. No-op when buffers are already
    aligned; depends on #6462 making `align_buffers` recursive over child
    data.
    - `arrow-pyarrow`: drop the now-redundant `array_data.align_buffers()`
    call; it's covered by `from_ffi`.
    
    # Are these changes tested?
    
    Yes. New regression test `test_decimal128_under_aligned_round_trip` in
    `arrow-array/src/ffi.rs` constructs an 8-aligned-not-16-aligned
    `Decimal128` buffer via `Buffer::from_vec(...).slice(8)`, imports
    through `from_ffi`, and asserts the resulting `Decimal128Array` values
    are correct. The test panics without the fix with the exact error from
    #10028.
    
    # Are there any user-facing changes?
    
    No API changes. Behavior change: `from_ffi` / `from_ffi_and_data_type`
    (and `ArrowArrayStreamReader::next`) now silently realign under-aligned
    buffers instead of panicking. Already-aligned producers are unaffected;
    misaligned producers that previously panicked now succeed with a
    one-time copy of the offending buffer.
---
 arrow-array/src/ffi.rs   | 52 ++++++++++++++++++++++++++++++++++++++++++++++--
 arrow-pyarrow/src/lib.rs |  7 +------
 2 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs
index f50dd3420b..59b9f6b3b7 100644
--- a/arrow-array/src/ffi.rs
+++ b/arrow-array/src/ffi.rs
@@ -281,7 +281,14 @@ pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: 
&FFI_ArrowSchema) -> Resul
         data_type: dt,
         owner: &array,
     };
-    tmp.consume()
+    let mut data = tmp.consume()?;
+    // arrow-rs has stricter alignment requirements than the C Data Interface 
spec;
+    // a no-op when buffers are already aligned. Unreachable under
+    // `cfg(feature = "force_validate")`; tracked in #10034.
+    // See https://github.com/apache/arrow/issues/43552 and
+    // https://github.com/apache/arrow-rs/issues/10028 for context.
+    data.align_buffers();
+    Ok(data)
 }
 
 /// Import [ArrayData] from the C Data Interface
@@ -299,7 +306,14 @@ pub unsafe fn from_ffi_and_data_type(
         data_type,
         owner: &array,
     };
-    tmp.consume()
+    let mut data = tmp.consume()?;
+    // arrow-rs has stricter alignment requirements than the C Data Interface 
spec;
+    // a no-op when buffers are already aligned. Unreachable under
+    // `cfg(feature = "force_validate")`; tracked in #10034.
+    // See https://github.com/apache/arrow/issues/43552 and
+    // https://github.com/apache/arrow-rs/issues/10028 for context.
+    data.align_buffers();
+    Ok(data)
 }
 
 #[derive(Debug)]
@@ -667,6 +681,40 @@ mod tests_to_then_from_ffi {
     }
     // case with nulls is tested in the docs, through the example on this 
module.
 
+    #[test]
+    #[cfg(not(feature = "force_validate"))]
+    fn test_decimal128_under_aligned_round_trip() -> Result<()> {
+        // Construct an 8-aligned-but-not-16-aligned i128 data buffer to model
+        // an FFI producer that only guarantees the C Data Interface's
+        // recommended 8-byte alignment (e.g. arrow-java).
+        let aligned = Buffer::from_vec(vec![0_i128, 1_i128, 2_i128]);
+        let under_aligned = aligned.slice(8);
+        assert_eq!(under_aligned.as_ptr().align_offset(8), 0);
+        assert_ne!(under_aligned.as_ptr().align_offset(16), 0);
+
+        // SAFETY: buffer is large enough for 2 i128 elements; misaligned
+        // input is the condition under test.
+        let data = unsafe {
+            ArrayData::builder(DataType::Decimal128(10, 2))
+                .len(2)
+                .add_buffer(under_aligned)
+                .build_unchecked()
+        };
+
+        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
+        let array = FFI_ArrowArray::new(&data);
+
+        let imported = unsafe { from_ffi(array, &schema) }?;
+        let array = Decimal128Array::from(imported);
+
+        // The little-endian byte layout of [0i128, 1, 2] sliced 8 bytes in
+        // yields elements `1 << 64` and `2 << 64`.
+        assert_eq!(array.len(), 2);
+        assert_eq!(array.value(0), 1_i128 << 64);
+        assert_eq!(array.value(1), 2_i128 << 64);
+        Ok(())
+    }
+
     #[test]
     fn test_null_count_handling() {
         let int32_data = ArrayData::builder(DataType::Int32)
diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs
index d8f584e396..484324665c 100644
--- a/arrow-pyarrow/src/lib.rs
+++ b/arrow-pyarrow/src/lib.rs
@@ -353,7 +353,7 @@ impl FromPyArrow for RecordBatch {
                 .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))?
                 .cast::<FFI_ArrowArray>();
             let ffi_array = unsafe { 
FFI_ArrowArray::from_raw(array_ptr.as_ptr()) };
-            let mut array_data =
+            let array_data =
                 unsafe { ffi::from_ffi(ffi_array, schema_ptr.as_ref()) 
}.map_err(to_py_err)?;
             if !matches!(array_data.data_type(), DataType::Struct(_)) {
                 return Err(PyTypeError::new_err(
@@ -361,11 +361,6 @@ impl FromPyArrow for RecordBatch {
                 ));
             }
             let options = 
RecordBatchOptions::default().with_row_count(Some(array_data.len()));
-            // Ensure data is aligned (by potentially copying the buffers).
-            // This is needed because some python code (for example the
-            // python flight client) produces unaligned buffers
-            // See https://github.com/apache/arrow/issues/43552 for details
-            array_data.align_buffers();
             let array = StructArray::from(array_data);
             // StructArray does not embed metadata from schema. We need to 
override
             // the output schema with the schema from the capsule.

Reply via email to