This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new bb4fc5900 Cleanup FFI interface (#3684) (#3683) (#3685)
bb4fc5900 is described below
commit bb4fc59009e7c5861a6b1967a53e9daa2554d5c6
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Feb 9 20:45:32 2023 +0000
Cleanup FFI interface (#3684) (#3683) (#3685)
* Cleanup FFI interface (#3684) (#3683)
* Add import example
* Tweak doc example
* Use ManuallyDrop to model external memory
---
arrow/src/array/ffi.rs | 20 ++----
arrow/src/array/mod.rs | 1 +
arrow/src/ffi.rs | 184 +++++++++++++++++++++++++++++--------------------
arrow/src/pyarrow.rs | 16 ++---
4 files changed, 126 insertions(+), 95 deletions(-)
diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
index fb7771ac6..c7bc8e9f8 100644
--- a/arrow/src/array/ffi.rs
+++ b/arrow/src/array/ffi.rs
@@ -47,6 +47,8 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in
memory
/// representation and lifetime via the `release` mechanism.
+#[deprecated(note = "Use ArrowArray::new")]
+#[allow(deprecated)]
pub unsafe fn make_array_from_raw(
array: *const ffi::FFI_ArrowArray,
schema: *const ffi::FFI_ArrowSchema,
@@ -91,30 +93,22 @@ mod tests {
StructArray, UInt32Array, UInt64Array,
},
datatypes::{DataType, Field},
- ffi::ArrowArray,
+ ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema},
};
use std::convert::TryFrom;
use std::sync::Arc;
fn test_round_trip(expected: &ArrayData) -> Result<()> {
- // create a `ArrowArray` from the data.
- let d1 = ArrowArray::try_from(expected.clone())?;
-
- // here we export the array as 2 pointers. We would have no control
over ownership if it was not for
- // the release mechanism.
- let (array, schema) = ArrowArray::into_raw(d1);
+ // here we export the array
+ let array = FFI_ArrowArray::new(expected);
+ let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
// simulate an external consumer by being the consumer
- let d1 = unsafe { ArrowArray::try_from_raw(array, schema) }?;
+ let d1 = ArrowArray::new(array, schema);
let result = &ArrayData::try_from(d1)?;
assert_eq!(result, expected);
-
- unsafe {
- Arc::from_raw(array);
- Arc::from_raw(schema);
- }
Ok(())
}
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index 1a10725df..09348996e 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -34,6 +34,7 @@ pub use arrow_data::{
pub use arrow_data::transform::{Capacities, MutableArrayData};
#[cfg(feature = "ffi")]
+#[allow(deprecated)]
pub use self::ffi::{export_array_into_raw, make_array_from_raw};
// --------------------- Array's values comparison ---------------------
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 78dd1ef45..0f0f94c7a 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -24,68 +24,63 @@
//! The second interface maps native Rust types to the Rust-specific
implementation of Arrow such as `format` to `Datatype`,
//! `Buffer`, etc. This is handled by `ArrowArray`.
//!
+//!
+//! Export to FFI
+//!
//! ```rust
//! # use std::sync::Arc;
-//! # use arrow::array::{Int32Array, Array, ArrayData, export_array_into_raw,
make_array, make_array_from_raw};
-//! # use arrow::error::{Result, ArrowError};
+//! # use arrow::array::{Int32Array, Array, ArrayData, make_array};
+//! # use arrow::error::Result;
//! # use arrow::compute::kernels::arithmetic;
//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
-//! # use std::convert::TryFrom;
//! # fn main() -> Result<()> {
//! // create an array natively
//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
+//! let data = array.into_data();
//!
-//! // export it
-//!
-//! let ffi_array = ArrowArray::try_new(array.data().clone())?;
-//! let (array_ptr, schema_ptr) = ArrowArray::into_raw(ffi_array);
-//!
-//! // consumed and used by something else...
+//! // Export it
+//! let out_array = FFI_ArrowArray::new(&data);
+//! let out_schema = FFI_ArrowSchema::try_from(data.data_type())?;
//!
//! // import it
-//! let array = unsafe { make_array_from_raw(array_ptr, schema_ptr)? };
+//! let array = ArrowArray::new(out_array, out_schema);
+//! let array = Int32Array::from(ArrayData::try_from(array)?);
//!
//! // perform some operation
-//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
-//! ArrowError::ParseError("Expects an int32".to_string()),
-//! )?;
//! let array = arithmetic::add(&array, &array)?;
//!
//! // verify
//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
+//! #
+//! # Ok(())
+//! # }
+//! ```
//!
-//! // Simulate if raw pointers are provided by consumer
-//! let array = make_array(Int32Array::from(vec![Some(1), None,
Some(3)]).into_data());
-//!
-//! let out_array = Box::new(FFI_ArrowArray::empty());
-//! let out_schema = Box::new(FFI_ArrowSchema::empty());
-//! let out_array_ptr = Box::into_raw(out_array);
-//! let out_schema_ptr = Box::into_raw(out_schema);
-//!
-//! // export array into raw pointers from consumer
-//! unsafe { export_array_into_raw(array, out_array_ptr, out_schema_ptr)?; };
-//!
-//! // import it
-//! let array = unsafe { make_array_from_raw(out_array_ptr, out_schema_ptr)? };
+//! Import from FFI
//!
-//! // perform some operation
-//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
-//! ArrowError::ParseError("Expects an int32".to_string()),
-//! )?;
-//! let array = arithmetic::add(&array, &array)?;
-//!
-//! // verify
-//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
+//! ```
+//! # use std::ptr::addr_of_mut;
+//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
+//! # use arrow_array::{ArrayRef, make_array};
+//! # use arrow_schema::ArrowError;
+//! #
+//! /// A foreign data container that can export to C Data interface
+//! struct ForeignArray {};
//!
-//! // (drop/release)
-//! unsafe {
-//! Box::from_raw(out_array_ptr);
-//! Box::from_raw(out_schema_ptr);
-//! Arc::from_raw(array_ptr);
-//! Arc::from_raw(schema_ptr);
+//! impl ForeignArray {
+//! /// Export from foreign array representation to C Data interface
+//! /// e.g.
<https://github.com/apache/arrow/blob/fc1f9ebbc4c3ae77d5cfc2f9322f4373d3d19b8a/python/pyarrow/array.pxi#L1552>
+//! fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut
FFI_ArrowSchema) {
+//! // ...
+//! }
//! }
//!
-//! Ok(())
+//! /// Import an [`ArrayRef`] from a [`ForeignArray`]
+//! fn import_array(foreign: &ForeignArray) -> Result<ArrayRef, ArrowError> {
+//! let mut schema = FFI_ArrowSchema::empty();
+//! let mut array = FFI_ArrowArray::empty();
+//! foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
+//! Ok(make_array(ArrowArray::new(array, schema).try_into()?))
//! }
//! ```
@@ -139,7 +134,15 @@ bitflags! {
/// ABI-compatible struct for `ArrowSchema` from C Data Interface
/// See
<https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
-/// This was created by bindgen
+///
+/// ```
+/// # use arrow::ffi::FFI_ArrowSchema;
+/// # use arrow_data::ArrayData;
+/// fn array_schema(data: &ArrayData) -> FFI_ArrowSchema {
+/// FFI_ArrowSchema::try_from(data.data_type()).unwrap()
+/// }
+/// ```
+///
#[repr(C)]
#[derive(Debug)]
pub struct FFI_ArrowSchema {
@@ -394,7 +397,14 @@ fn bit_width(data_type: &DataType, i: usize) ->
Result<usize> {
/// ABI-compatible struct for ArrowArray from C Data Interface
/// See
<https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
-/// This was created by bindgen
+///
+/// ```
+/// # use arrow::ffi::FFI_ArrowArray;
+/// # use arrow_array::Array;
+/// fn export_array(array: &dyn Array) -> FFI_ArrowArray {
+/// FFI_ArrowArray::new(array.data())
+/// }
+/// ```
#[repr(C)]
#[derive(Debug)]
pub struct FFI_ArrowArray {
@@ -859,6 +869,14 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {
}
impl ArrowArray {
+ /// Creates a new [`ArrowArray`] from the provided array and schema
+ pub fn new(array: FFI_ArrowArray, schema: FFI_ArrowSchema) -> Self {
+ Self {
+ array: Arc::new(array),
+ schema: Arc::new(schema),
+ }
+ }
+
/// creates a new `ArrowArray`. This is used to export to the C Data
Interface.
///
/// # Memory Leaks
@@ -878,6 +896,7 @@ impl ArrowArray {
/// on managing the allocation of the structs by themselves.
/// # Error
/// Errors if any of the pointers is null
+ #[deprecated(note = "Use ArrowArray::new")]
pub unsafe fn try_from_raw(
array: *const FFI_ArrowArray,
schema: *const FFI_ArrowSchema,
@@ -911,6 +930,7 @@ impl ArrowArray {
}
/// exports [ArrowArray] to the C Data Interface
+ #[deprecated(note = "Use FFI_ArrowArray and FFI_ArrowSchema directly")]
pub fn into_raw(this: ArrowArray) -> (*const FFI_ArrowArray, *const
FFI_ArrowSchema) {
(Arc::into_raw(this.array), Arc::into_raw(this.schema))
}
@@ -946,28 +966,49 @@ mod tests {
use arrow_array::types::{Float64Type, Int32Type};
use arrow_array::{Float64Array, UnionArray};
use std::convert::TryFrom;
+ use std::mem::ManuallyDrop;
+ use std::ptr::addr_of_mut;
#[test]
- fn test_round_trip() -> Result<()> {
+ fn test_round_trip() {
// create an array natively
let array = Int32Array::from(vec![1, 2, 3]);
// export it
- let array = ArrowArray::try_from(array.into_data())?;
+ let array = ArrowArray::try_from(array.into_data()).unwrap();
// (simulate consumer) import it
- let data = ArrayData::try_from(array)?;
- let array = make_array(data);
-
- // perform some operation
- let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
- let array = kernels::arithmetic::add(array, array).unwrap();
+ let array = Int32Array::from(ArrayData::try_from(array).unwrap());
+ let array = kernels::arithmetic::add(&array, &array).unwrap();
// verify
assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
+ }
- // (drop/release)
- Ok(())
+ #[test]
+ fn test_import() {
+ // Model receiving const pointers from an external system
+
+ // Create an array natively
+ let data = Int32Array::from(vec![1, 2, 3]).into_data();
+ let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
+ let array = FFI_ArrowArray::new(&data);
+
+ // Use ManuallyDrop to avoid Box:Drop recursing
+ let schema = Box::new(ManuallyDrop::new(schema));
+ let array = Box::new(ManuallyDrop::new(array));
+
+ let schema_ptr = &**schema as *const _;
+ let array_ptr = &**array as *const _;
+
+ // We can read them back to memory
+ // SAFETY:
+ // Pointers are aligned and valid
+ let array =
+ unsafe { ArrowArray::new(ptr::read(array_ptr),
ptr::read(schema_ptr)) };
+
+ let array = Int32Array::from(ArrayData::try_from(array).unwrap());
+ assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
}
#[test]
@@ -1424,31 +1465,28 @@ mod tests {
let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
// Assume two raw pointers provided by the consumer
- let out_array = Box::new(FFI_ArrowArray::empty());
- let out_schema = Box::new(FFI_ArrowSchema::empty());
- let out_array_ptr = Box::into_raw(out_array);
- let out_schema_ptr = Box::into_raw(out_schema);
-
- unsafe {
- export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
+ let mut out_array = FFI_ArrowArray::empty();
+ let mut out_schema = FFI_ArrowSchema::empty();
+
+ {
+ let out_array_ptr = addr_of_mut!(out_array);
+ let out_schema_ptr = addr_of_mut!(out_schema);
+ unsafe {
+ export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
+ }
}
// (simulate consumer) import it
- unsafe {
- let array = ArrowArray::try_from_raw(out_array_ptr,
out_schema_ptr).unwrap();
- let data = ArrayData::try_from(array)?;
- let array = make_array(data);
-
- // perform some operation
- let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
- let array = kernels::arithmetic::add(array, array).unwrap();
+ let array = ArrowArray::new(out_array, out_schema);
+ let data = ArrayData::try_from(array)?;
+ let array = make_array(data);
- // verify
- assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
+ // perform some operation
+ let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+ let array = kernels::arithmetic::add(array, array).unwrap();
- drop(Box::from_raw(out_array_ptr));
- drop(Box::from_raw(out_schema_ptr));
- }
+ // verify
+ assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
Ok(())
}
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 09933304e..110fd9cfa 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -19,6 +19,7 @@
//! arrays from and to Python.
use std::convert::{From, TryFrom};
+use std::ptr::addr_of_mut;
use std::sync::Arc;
use pyo3::ffi::Py_uintptr_t;
@@ -30,7 +31,7 @@ use crate::array::{make_array, Array, ArrayData};
use crate::datatypes::{DataType, Field, Schema};
use crate::error::ArrowError;
use crate::ffi;
-use crate::ffi::FFI_ArrowSchema;
+use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
use crate::ffi_stream::{
export_reader_into_raw, ArrowArrayStreamReader, FFI_ArrowArrayStream,
};
@@ -111,8 +112,8 @@ impl PyArrowConvert for Schema {
impl PyArrowConvert for ArrayData {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
// prepare a pointer to receive the Array struct
- let (array_pointer, schema_pointer) =
- ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
+ let mut array = FFI_ArrowArray::empty();
+ let mut schema = FFI_ArrowSchema::empty();
// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe.
@@ -120,15 +121,12 @@ impl PyArrowConvert for ArrayData {
value.call_method1(
"_export_to_c",
(
- array_pointer as Py_uintptr_t,
- schema_pointer as Py_uintptr_t,
+ addr_of_mut!(array) as Py_uintptr_t,
+ addr_of_mut!(schema) as Py_uintptr_t,
),
)?;
- let ffi_array = unsafe {
- ffi::ArrowArray::try_from_raw(array_pointer, schema_pointer)
- .map_err(to_py_err)?
- };
+ let ffi_array = ffi::ArrowArray::new(array, schema);
let data = ArrayData::try_from(ffi_array).map_err(to_py_err)?;
Ok(data)