This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a3687a7506 Add FFI from_raw (#5082)
a3687a7506 is described below
commit a3687a750665780a5d3988a1d66d52a98814c568
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Nov 17 08:07:38 2023 +0000
Add FFI from_raw (#5082)
---
arrow-data/src/ffi.rs | 16 ++++++++++++++++
arrow-schema/src/ffi.rs | 16 ++++++++++++++++
arrow/src/ffi_stream.rs | 27 +++++++++++++++++++++------
arrow/src/pyarrow.rs | 12 ++++--------
4 files changed, 57 insertions(+), 14 deletions(-)
diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs
index 7623ced043..2b4d526012 100644
--- a/arrow-data/src/ffi.rs
+++ b/arrow-data/src/ffi.rs
@@ -191,6 +191,22 @@ impl FFI_ArrowArray {
}
}
+ /// Takes ownership of the pointed to [`FFI_ArrowArray`]
+ ///
+ /// This acts to [move] the data out of `array`, setting the release
callback to NULL
+ ///
+ /// # Safety
+ ///
+ /// * `array` must be [valid] for reads and writes
+ /// * `array` must be properly aligned
+ /// * `array` must point to a properly initialized value of
[`FFI_ArrowArray`]
+ ///
+ /// [move]:
https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
+ /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
+ pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
+ std::ptr::replace(array, Self::empty())
+ }
+
/// create an empty `FFI_ArrowArray`, which can be used to import data into
pub fn empty() -> Self {
Self {
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index 640a7de798..b4d10b814a 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -219,6 +219,22 @@ impl FFI_ArrowSchema {
Ok(self)
}
+ /// Takes ownership of the pointed to [`FFI_ArrowSchema`]
+ ///
+ /// This acts to [move] the data out of `schema`, setting the release
callback to NULL
+ ///
+ /// # Safety
+ ///
+ /// * `schema` must be [valid] for reads and writes
+ /// * `schema` must be properly aligned
+ /// * `schema` must point to a properly initialized value of
[`FFI_ArrowSchema`]
+ ///
+ /// [move]:
https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
+ /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
+ pub unsafe fn from_raw(schema: *mut FFI_ArrowSchema) -> Self {
+ std::ptr::replace(schema, Self::empty())
+ }
+
pub fn empty() -> Self {
Self {
format: std::ptr::null_mut(),
diff --git a/arrow/src/ffi_stream.rs b/arrow/src/ffi_stream.rs
index 73cf28d66d..123669aa61 100644
--- a/arrow/src/ffi_stream.rs
+++ b/arrow/src/ffi_stream.rs
@@ -171,6 +171,22 @@ impl FFI_ArrowArrayStream {
}
}
+ /// Takes ownership of the pointed to [`FFI_ArrowArrayStream`]
+ ///
+ /// This acts to [move] the data out of `raw_stream`, setting the release
callback to NULL
+ ///
+ /// # Safety
+ ///
+ /// * `raw_stream` must be [valid] for reads and writes
+ /// * `raw_stream` must be properly aligned
+ /// * `raw_stream` must point to a properly initialized value of
[`FFI_ArrowArrayStream`]
+ ///
+ /// [move]:
https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
+ /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
+ pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Self {
+ std::ptr::replace(raw_stream, Self::empty())
+ }
+
/// Creates a new empty [FFI_ArrowArrayStream]. Used to import from the C
Stream Interface.
pub fn empty() -> Self {
Self {
@@ -306,11 +322,10 @@ impl ArrowArrayStreamReader {
/// the pointer.
///
/// # Safety
- /// This function dereferences a raw pointer of `FFI_ArrowArrayStream`.
+ ///
+ /// See [`FFI_ArrowArrayStream::from_raw`]
pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) ->
Result<Self> {
- let stream_data = std::ptr::replace(raw_stream,
FFI_ArrowArrayStream::empty());
-
- Self::try_new(stream_data)
+ Self::try_new(FFI_ArrowArrayStream::from_raw(raw_stream))
}
/// Get the last error from `ArrowArrayStreamReader`
@@ -368,6 +383,7 @@ impl RecordBatchReader for ArrowArrayStreamReader {
/// # Safety
/// Assumes that the pointer represents valid C Stream Interfaces, both in
memory
/// representation and lifetime via the `release` mechanism.
+#[deprecated(note = "Use FFI_ArrowArrayStream::new")]
pub unsafe fn export_reader_into_raw(
reader: Box<dyn RecordBatchReader + Send>,
out_stream: *mut FFI_ArrowArrayStream,
@@ -426,8 +442,7 @@ mod tests {
let reader = TestRecordBatchReader::new(schema.clone(), iter);
// Export a `RecordBatchReader` through `FFI_ArrowArrayStream`
- let mut ffi_stream = FFI_ArrowArrayStream::empty();
- unsafe { export_reader_into_raw(reader, &mut ffi_stream) };
+ let mut ffi_stream = FFI_ArrowArrayStream::new(reader);
// Get schema from `FFI_ArrowArrayStream`
let mut ffi_schema = FFI_ArrowSchema::empty();
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 4d262b0d10..2ac550ad04 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -266,8 +266,7 @@ impl FromPyArrow for ArrayData {
validate_pycapsule(array_capsule, "arrow_array")?;
let schema_ptr = unsafe {
schema_capsule.reference::<FFI_ArrowSchema>() };
- let array_ptr = array_capsule.pointer() as *mut FFI_ArrowArray;
- let array = unsafe { std::ptr::replace(array_ptr,
FFI_ArrowArray::empty()) };
+ let array = unsafe {
FFI_ArrowArray::from_raw(array_capsule.pointer() as _) };
return ffi::from_ffi(array, schema_ptr).map_err(to_py_err);
}
@@ -348,8 +347,7 @@ impl FromPyArrow for RecordBatch {
validate_pycapsule(array_capsule, "arrow_array")?;
let schema_ptr = unsafe {
schema_capsule.reference::<FFI_ArrowSchema>() };
- let array_ptr = array_capsule.pointer() as *mut FFI_ArrowArray;
- let ffi_array = unsafe { std::ptr::replace(array_ptr,
FFI_ArrowArray::empty()) };
+ let ffi_array = unsafe {
FFI_ArrowArray::from_raw(array_capsule.pointer() as _) };
let array_data = ffi::from_ffi(ffi_array,
schema_ptr).map_err(to_py_err)?;
if !matches!(array_data.data_type(), DataType::Struct(_)) {
return Err(PyTypeError::new_err(
@@ -397,8 +395,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
validate_pycapsule(capsule, "arrow_array_stream")?;
- let stream_ptr = capsule.pointer() as *mut FFI_ArrowArrayStream;
- let stream = unsafe { std::ptr::replace(stream_ptr,
FFI_ArrowArrayStream::empty()) };
+ let stream = unsafe {
FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
let stream_reader = ArrowArrayStreamReader::try_new(stream)
.map_err(|err| PyValueError::new_err(err.to_string()))?;
@@ -430,8 +427,7 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
// We can't implement `ToPyArrow` for `T: RecordBatchReader + Send` because
// there is already a blanket implementation for `T: ToPyArrow`.
fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
- let mut stream = FFI_ArrowArrayStream::empty();
- unsafe { export_reader_into_raw(self, &mut stream) };
+ let mut stream = FFI_ArrowArrayStream::new(self);
let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
let module = py.import("pyarrow")?;