This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new cfba3ccc0c Updated arrow-pyarrow to use pyo3 0.27, updated deprecated
code warnings (#8773)
cfba3ccc0c is described below
commit cfba3ccc0c9460dba65ca000c34e6491c8043abc
Author: Stephen Carman <[email protected]>
AuthorDate: Fri Jan 9 15:56:40 2026 -0500
Updated arrow-pyarrow to use pyo3 0.27, updated deprecated code warnings
(#8773)
# Which issue does this PR close?
No issue, I'm aware of. Just updates pyo3 to 0.27 so delta-rs can start
looking at variant adoption.
# Rationale for this change
There is a long list of dependencies for arrow 57 to be supported in
enough places such that delta-rs can start to reliably build against
them. Updating pyo3 puts arrow in line with the other arrow based
dependencies.
# Are these changes tested?
We typically require tests for all PRs in order to:
Since this is a good amount of unsafe, and I'm on Windows (which I'd
imagine is not the common path) I'd like to see the CI/CD run just to
make sure everything looks good.
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
# Are there any user-facing changes?
Just that a dependency is now a later version
---------
Co-authored-by: Matthijs Brobbel <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-pyarrow-integration-testing/Cargo.toml | 2 +-
arrow-pyarrow-testing/Cargo.toml | 2 +-
arrow-pyarrow/Cargo.toml | 2 +-
arrow-pyarrow/src/lib.rs | 104 +++++++++++++++++++--------
4 files changed, 76 insertions(+), 34 deletions(-)
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml
b/arrow-pyarrow-integration-testing/Cargo.toml
index 8dae4e2258..1fa4197f51 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
[dependencies]
arrow = { path = "../arrow", features = ["pyarrow"] }
-pyo3 = { version = "0.26.0", features = ["extension-module"] }
+pyo3 = { version = "0.27.1", features = ["extension-module"] }
diff --git a/arrow-pyarrow-testing/Cargo.toml b/arrow-pyarrow-testing/Cargo.toml
index e1ea72f5f4..b38af3c3b4 100644
--- a/arrow-pyarrow-testing/Cargo.toml
+++ b/arrow-pyarrow-testing/Cargo.toml
@@ -48,4 +48,4 @@ publish = false
# Note no dependency on arrow, to ensure arrow-pyarrow can be used by itself
arrow-array = { path = "../arrow-array" }
arrow-pyarrow = { path = "../arrow-pyarrow" }
-pyo3 = { version = "0.26.0", default-features = false }
+pyo3 = { version = "0.27.1", default-features = false }
diff --git a/arrow-pyarrow/Cargo.toml b/arrow-pyarrow/Cargo.toml
index 9cfa235324..c508cabcfe 100644
--- a/arrow-pyarrow/Cargo.toml
+++ b/arrow-pyarrow/Cargo.toml
@@ -39,4 +39,4 @@ all-features = true
arrow-array = { workspace = true, features = ["ffi"] }
arrow-data = { workspace = true }
arrow-schema = { workspace = true }
-pyo3 = { version = "0.26.0", default-features = false }
+pyo3 = { version = "0.27.1", default-features = false }
diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs
index 1f8941ef1c..e2b3650709 100644
--- a/arrow-pyarrow/src/lib.rs
+++ b/arrow-pyarrow/src/lib.rs
@@ -60,6 +60,7 @@
//! forcing eager reading into `Vec<RecordBatch>`.
use std::convert::{From, TryFrom};
+use std::ffi::CStr;
use std::ptr::{addr_of, addr_of_mut};
use std::sync::Arc;
@@ -83,6 +84,10 @@ import_exception!(pyarrow, ArrowException);
/// Represents an exception raised by PyArrow.
pub type PyArrowException = ArrowException;
+const ARROW_ARRAY_STREAM_CAPSULE_NAME: &CStr = c"arrow_array_stream";
+const ARROW_SCHEMA_CAPSULE_NAME: &CStr = c"arrow_schema";
+const ARROW_ARRAY_CAPSULE_NAME: &CStr = c"arrow_array";
+
fn to_py_err(err: ArrowError) -> PyErr {
PyArrowException::new_err(err.to_string())
}
@@ -139,7 +144,7 @@ fn validate_pycapsule(capsule: &Bound<PyCapsule>, name:
&str) -> PyResult<()> {
));
}
- let capsule_name = capsule_name.unwrap().to_str()?;
+ let capsule_name = unsafe { capsule_name.unwrap().as_cstr().to_str()? };
if capsule_name != name {
return Err(PyValueError::new_err(format!(
"Expected name '{name}' in PyCapsule, instead got
'{capsule_name}'",
@@ -156,12 +161,16 @@ impl FromPyArrow for DataType {
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
- let capsule = capsule.downcast::<PyCapsule>()?;
+ let capsule = capsule.cast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;
- let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
- let dtype = DataType::try_from(schema_ptr).map_err(to_py_err)?;
- return Ok(dtype);
+ let schema_ptr = capsule
+ .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+ .cast::<FFI_ArrowSchema>();
+ unsafe {
+ let dtype =
DataType::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+ return Ok(dtype);
+ }
}
validate_class("DataType", value)?;
@@ -192,12 +201,16 @@ impl FromPyArrow for Field {
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
- let capsule = capsule.downcast::<PyCapsule>()?;
+ let capsule = capsule.cast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;
- let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
- let field = Field::try_from(schema_ptr).map_err(to_py_err)?;
- return Ok(field);
+ let schema_ptr = capsule
+ .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+ .cast::<FFI_ArrowSchema>();
+ unsafe {
+ let field =
Field::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+ return Ok(field);
+ }
}
validate_class("Field", value)?;
@@ -228,12 +241,16 @@ impl FromPyArrow for Schema {
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
- let capsule = capsule.downcast::<PyCapsule>()?;
+ let capsule = capsule.cast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;
- let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
- let schema = Schema::try_from(schema_ptr).map_err(to_py_err)?;
- return Ok(schema);
+ let schema_ptr = capsule
+ .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+ .cast::<FFI_ArrowSchema>();
+ unsafe {
+ let schema =
Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+ return Ok(schema);
+ }
}
validate_class("Schema", value)?;
@@ -272,16 +289,25 @@ impl FromPyArrow for ArrayData {
}
let schema_capsule = tuple.get_item(0)?;
- let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+ let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
let array_capsule = tuple.get_item(1)?;
- let array_capsule = array_capsule.downcast::<PyCapsule>()?;
+ let array_capsule = array_capsule.cast::<PyCapsule>()?;
validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
- let schema_ptr = unsafe {
schema_capsule.reference::<FFI_ArrowSchema>() };
- let array = unsafe {
FFI_ArrowArray::from_raw(array_capsule.pointer() as _) };
- return unsafe { ffi::from_ffi(array, schema_ptr)
}.map_err(to_py_err);
+ let schema_ptr = schema_capsule
+ .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+ .cast::<FFI_ArrowSchema>();
+ let array = unsafe {
+ FFI_ArrowArray::from_raw(
+ array_capsule
+ .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))?
+ .cast::<FFI_ArrowArray>()
+ .as_ptr(),
+ )
+ };
+ return unsafe { ffi::from_ffi(array, schema_ptr.as_ref())
}.map_err(to_py_err);
}
validate_class("Array", value)?;
@@ -325,7 +351,7 @@ impl ToPyArrow for ArrayData {
impl<T: FromPyArrow> FromPyArrow for Vec<T> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
- let list = value.downcast::<PyList>()?;
+ let list = value.cast::<PyList>()?;
list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
}
}
@@ -345,6 +371,7 @@ impl FromPyArrow for RecordBatch {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
if value.hasattr("__arrow_c_array__")? {
let tuple = value.getattr("__arrow_c_array__")?.call0()?;
@@ -355,17 +382,22 @@ impl FromPyArrow for RecordBatch {
}
let schema_capsule = tuple.get_item(0)?;
- let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+ let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
let array_capsule = tuple.get_item(1)?;
- let array_capsule = array_capsule.downcast::<PyCapsule>()?;
+ let array_capsule = array_capsule.cast::<PyCapsule>()?;
validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
- let schema_ptr = unsafe {
schema_capsule.reference::<FFI_ArrowSchema>() };
- let ffi_array = unsafe {
FFI_ArrowArray::from_raw(array_capsule.pointer().cast()) };
+ let schema_ptr = schema_capsule
+ .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+ .cast::<FFI_ArrowSchema>();
+ let array_ptr = array_capsule
+ .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))?
+ .cast::<FFI_ArrowArray>();
+ let ffi_array = unsafe {
FFI_ArrowArray::from_raw(array_ptr.as_ptr()) };
let mut array_data =
- unsafe { ffi::from_ffi(ffi_array, schema_ptr)
}.map_err(to_py_err)?;
+ unsafe { ffi::from_ffi(ffi_array, schema_ptr.as_ref())
}.map_err(to_py_err)?;
if !matches!(array_data.data_type(), DataType::Struct(_)) {
return Err(PyTypeError::new_err(
"Expected Struct type from __arrow_c_array.",
@@ -380,7 +412,8 @@ impl FromPyArrow for RecordBatch {
let array = StructArray::from(array_data);
// StructArray does not embed metadata from schema. We need to
override
// the output schema with the schema from the capsule.
- let schema =
Arc::new(Schema::try_from(schema_ptr).map_err(to_py_err)?);
+ let schema =
+ unsafe {
Arc::new(Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?) };
let (_fields, columns, nulls) = array.into_parts();
assert_eq!(
nulls.map(|n| n.null_count()).unwrap_or_default(),
@@ -397,7 +430,7 @@ impl FromPyArrow for RecordBatch {
let arrays = value.getattr("columns")?;
let arrays = arrays
- .downcast::<PyList>()?
+ .cast::<PyList>()?
.iter()
.map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
.collect::<PyResult<_>>()?;
@@ -432,10 +465,17 @@ impl FromPyArrow for ArrowArrayStreamReader {
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_stream__")? {
let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
- let capsule = capsule.downcast::<PyCapsule>()?;
+ let capsule = capsule.cast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_array_stream")?;
- let stream = unsafe {
FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
+ let stream = unsafe {
+ FFI_ArrowArrayStream::from_raw(
+ capsule
+
.pointer_checked(Some(ARROW_ARRAY_STREAM_CAPSULE_NAME))?
+ .cast::<FFI_ArrowArrayStream>()
+ .as_ptr(),
+ )
+ };
let stream_reader = ArrowArrayStreamReader::try_new(stream)
.map_err(|err| PyValueError::new_err(err.to_string()))?;
@@ -589,9 +629,11 @@ impl IntoPyArrow for Table {
#[derive(Debug)]
pub struct PyArrowType<T>(pub T);
-impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
- fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
- Ok(Self(T::from_pyarrow_bound(value)?))
+impl<T: FromPyArrow> FromPyObject<'_, '_> for PyArrowType<T> {
+ type Error = PyErr;
+
+ fn extract(value: Borrowed<'_, '_, PyAny>) -> PyResult<Self> {
+ Ok(Self(T::from_pyarrow_bound(&*value)?))
}
}