tustvold commented on code in PR #4232:
URL: https://github.com/apache/arrow-rs/pull/4232#discussion_r1200161791
##########
arrow/src/ffi_stream.rs:
##########
@@ -261,24 +256,21 @@ fn get_error_code(err: &ArrowError) -> i32 {
/// Struct used to fetch `RecordBatch` from the C Stream Interface.
/// Its main responsibility is to expose `RecordBatchReader` functionality
/// that requires [FFI_ArrowArrayStream].
-#[derive(Debug, Clone)]
+#[derive(Debug)]
pub struct ArrowArrayStreamReader {
- stream: Arc<FFI_ArrowArrayStream>,
+ stream: Box<FFI_ArrowArrayStream>,
Review Comment:
Do we even need a box here?
##########
arrow/src/ffi_stream.rs:
##########
@@ -37,25 +37,19 @@
//! let reader = Box::new(FileReader::try_new(file).unwrap());
//!
//! // export it
-//! let stream = Box::new(FFI_ArrowArrayStream::empty());
-//! let stream_ptr = Box::into_raw(stream) as *mut FFI_ArrowArrayStream;
-//! unsafe { export_reader_into_raw(reader, stream_ptr) };
+//! let mut stream = FFI_ArrowArrayStream::empty();
+//! unsafe { export_reader_into_raw(reader, &mut stream) };
//!
//! // consumed and used by something else...
//!
//! // import it
-//! let stream_reader = unsafe {
ArrowArrayStreamReader::from_raw(stream_ptr).unwrap() };
+//! let stream_reader = unsafe { ArrowArrayStreamReader::from_raw(&mut
stream).unwrap() };
Review Comment:
```suggestion
//! let stream_reader = ArrowArrayStreamReader::try_new(stream).unwrap();
```
I think we could deprecate from_raw
##########
arrow/src/pyarrow.rs:
##########
@@ -224,39 +253,42 @@ impl PyArrowConvert for ArrowArrayStreamReader {
Ok(stream_reader)
}
+}
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+impl IntoPyArrow for ArrowArrayStreamReader {
+ fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
let stream = Box::new(FFI_ArrowArrayStream::empty());
let stream_ptr = Box::into_raw(stream) as *mut FFI_ArrowArrayStream;
- unsafe { export_reader_into_raw(Box::new(self.clone()), stream_ptr) };
Review Comment:
I see why IntoPyArrow is needed, this needs to "consume" the stream
##########
arrow/src/pyarrow.rs:
##########
@@ -224,39 +253,42 @@ impl PyArrowConvert for ArrowArrayStreamReader {
Ok(stream_reader)
}
+}
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+impl IntoPyArrow for ArrowArrayStreamReader {
+ fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
let stream = Box::new(FFI_ArrowArrayStream::empty());
Review Comment:
This boxing is unnecessary, we could take the opportunity to clean it up
##########
arrow/src/pyarrow.rs:
##########
@@ -224,39 +253,42 @@ impl PyArrowConvert for ArrowArrayStreamReader {
Ok(stream_reader)
}
+}
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+impl IntoPyArrow for ArrowArrayStreamReader {
+ fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
let stream = Box::new(FFI_ArrowArrayStream::empty());
let stream_ptr = Box::into_raw(stream) as *mut FFI_ArrowArrayStream;
- unsafe { export_reader_into_raw(Box::new(self.clone()), stream_ptr) };
+ unsafe { export_reader_into_raw(Box::new(self), stream_ptr) };
let module = py.import("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
- let args = PyTuple::new(py, &[stream_ptr as Py_uintptr_t]);
+ let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;
+
Ok(PyObject::from(reader))
}
}
/// A newtype wrapper around a `T: PyArrowConvert` that implements
/// [`FromPyObject`] and [`IntoPy`] allowing usage with pyo3 macros
#[derive(Debug)]
-pub struct PyArrowType<T: PyArrowConvert>(pub T);
+pub struct PyArrowType<T: FromPyArrow + IntoPyArrow>(pub T);
-impl<'source, T: PyArrowConvert> FromPyObject<'source> for PyArrowType<T> {
+impl<'source, T: FromPyArrow + IntoPyArrow> FromPyObject<'source> for
PyArrowType<T> {
fn extract(value: &'source PyAny) -> PyResult<Self> {
Ok(Self(T::from_pyarrow(value)?))
}
}
-impl<'a, T: PyArrowConvert> IntoPy<PyObject> for PyArrowType<T> {
+impl<T: FromPyArrow + IntoPyArrow> IntoPy<PyObject> for PyArrowType<T> {
fn into_py(self, py: Python) -> PyObject {
- self.0.to_pyarrow(py).unwrap()
+ self.0.into_pyarrow(py).unwrap()
}
}
-impl<T: PyArrowConvert> From<T> for PyArrowType<T> {
+impl<T: FromPyArrow + IntoPyArrow> From<T> for PyArrowType<T> {
Review Comment:
```suggestion
impl<T> From<T> for PyArrowType<T> {
```
##########
arrow/src/pyarrow.rs:
##########
@@ -203,7 +232,7 @@ impl PyArrowConvert for RecordBatch {
}
}
-impl PyArrowConvert for ArrowArrayStreamReader {
+impl FromPyArrow for ArrowArrayStreamReader {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
// prepare a pointer to receive the stream struct
let stream = Box::new(FFI_ArrowArrayStream::empty());
Review Comment:
I think we could also take the opportunity to remove the unnecessary box here
##########
arrow/src/pyarrow.rs:
##########
@@ -224,39 +253,42 @@ impl PyArrowConvert for ArrowArrayStreamReader {
Ok(stream_reader)
}
+}
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+impl IntoPyArrow for ArrowArrayStreamReader {
+ fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
let stream = Box::new(FFI_ArrowArrayStream::empty());
let stream_ptr = Box::into_raw(stream) as *mut FFI_ArrowArrayStream;
- unsafe { export_reader_into_raw(Box::new(self.clone()), stream_ptr) };
+ unsafe { export_reader_into_raw(Box::new(self), stream_ptr) };
let module = py.import("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
- let args = PyTuple::new(py, &[stream_ptr as Py_uintptr_t]);
+ let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;
+
Ok(PyObject::from(reader))
}
}
/// A newtype wrapper around a `T: PyArrowConvert` that implements
/// [`FromPyObject`] and [`IntoPy`] allowing usage with pyo3 macros
#[derive(Debug)]
-pub struct PyArrowType<T: PyArrowConvert>(pub T);
+pub struct PyArrowType<T: FromPyArrow + IntoPyArrow>(pub T);
-impl<'source, T: PyArrowConvert> FromPyObject<'source> for PyArrowType<T> {
+impl<'source, T: FromPyArrow + IntoPyArrow> FromPyObject<'source> for
PyArrowType<T> {
fn extract(value: &'source PyAny) -> PyResult<Self> {
Ok(Self(T::from_pyarrow(value)?))
}
}
-impl<'a, T: PyArrowConvert> IntoPy<PyObject> for PyArrowType<T> {
+impl<T: FromPyArrow + IntoPyArrow> IntoPy<PyObject> for PyArrowType<T> {
Review Comment:
```suggestion
impl<T: IntoPyArrow> IntoPy<PyObject> for PyArrowType<T> {
```
##########
arrow/src/pyarrow.rs:
##########
@@ -224,39 +253,42 @@ impl PyArrowConvert for ArrowArrayStreamReader {
Ok(stream_reader)
}
+}
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+impl IntoPyArrow for ArrowArrayStreamReader {
+ fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
let stream = Box::new(FFI_ArrowArrayStream::empty());
let stream_ptr = Box::into_raw(stream) as *mut FFI_ArrowArrayStream;
- unsafe { export_reader_into_raw(Box::new(self.clone()), stream_ptr) };
+ unsafe { export_reader_into_raw(Box::new(self), stream_ptr) };
let module = py.import("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
- let args = PyTuple::new(py, &[stream_ptr as Py_uintptr_t]);
+ let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;
+
Ok(PyObject::from(reader))
}
}
/// A newtype wrapper around a `T: PyArrowConvert` that implements
/// [`FromPyObject`] and [`IntoPy`] allowing usage with pyo3 macros
#[derive(Debug)]
-pub struct PyArrowType<T: PyArrowConvert>(pub T);
+pub struct PyArrowType<T: FromPyArrow + IntoPyArrow>(pub T);
-impl<'source, T: PyArrowConvert> FromPyObject<'source> for PyArrowType<T> {
+impl<'source, T: FromPyArrow + IntoPyArrow> FromPyObject<'source> for
PyArrowType<T> {
Review Comment:
```suggestion
impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
```
##########
arrow/src/ffi_stream.rs:
##########
@@ -512,10 +487,9 @@ mod tests {
let reader = TestRecordBatchReader::new(schema.clone(), iter);
// Import through `FFI_ArrowArrayStream` as `ArrowArrayStreamReader`
- let stream = Arc::new(FFI_ArrowArrayStream::new(reader));
- let stream_ptr = Arc::into_raw(stream) as *mut FFI_ArrowArrayStream;
+ let mut stream = FFI_ArrowArrayStream::new(reader);
let stream_reader =
- unsafe { ArrowArrayStreamReader::from_raw(stream_ptr).unwrap() };
+ unsafe { ArrowArrayStreamReader::from_raw(&mut stream).unwrap() };
Review Comment:
```suggestion
ArrowArrayStreamReader::try_new(stream).unwrap();
```
##########
arrow/src/ffi_stream.rs:
##########
@@ -231,8 +227,7 @@ impl ExportedArrayStream {
let struct_array = StructArray::from(batch);
let array = FFI_ArrowArray::new(&struct_array.to_data());
- unsafe { std::ptr::copy(addr_of!(array), out, 1) };
- std::mem::forget(array);
+ unsafe { std::ptr::write_unaligned(out, array) };
Review Comment:
I think this will cause the array to be released when `array` goes out of
scope, i.e. immediately. I think eventually this will result in a double-free
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]