This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new cfba3ccc0c Updated arrow-pyarrow to use pyo3 0.27, updated deprecated 
code warnings (#8773)
cfba3ccc0c is described below

commit cfba3ccc0c9460dba65ca000c34e6491c8043abc
Author: Stephen Carman <[email protected]>
AuthorDate: Fri Jan 9 15:56:40 2026 -0500

    Updated arrow-pyarrow to use pyo3 0.27, updated deprecated code warnings 
(#8773)
    
    # Which issue does this PR close?
    
    No issue, I'm aware of. Just updates pyo3 to 0.27 so delta-rs can start
    looking at variant adoption.
    
    # Rationale for this change
    
    There is a long list of dependencies for arrow 57 to be supported in
    enough places such that delta-rs can start to reliably build against
    them. Updating pyo3 puts arrow in line with the other arrow based
    dependencies.
    
    # Are these changes tested?
    
    We typically require tests for all PRs in order to:
    Since this is a good amount of unsafe, and I'm on Windows (which I'd
    imagine is not the common path) I'd like to see the CI/CD run just to
    make sure everything looks good.
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    
    # Are there any user-facing changes?
    
    Just that a dependency is now a later version
    
    ---------
    
    Co-authored-by: Matthijs Brobbel <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow-pyarrow-integration-testing/Cargo.toml |   2 +-
 arrow-pyarrow-testing/Cargo.toml             |   2 +-
 arrow-pyarrow/Cargo.toml                     |   2 +-
 arrow-pyarrow/src/lib.rs                     | 104 +++++++++++++++++++--------
 4 files changed, 76 insertions(+), 34 deletions(-)

diff --git a/arrow-pyarrow-integration-testing/Cargo.toml 
b/arrow-pyarrow-integration-testing/Cargo.toml
index 8dae4e2258..1fa4197f51 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
 
 [dependencies]
 arrow = { path = "../arrow", features = ["pyarrow"] }
-pyo3 = { version = "0.26.0", features = ["extension-module"] }
+pyo3 = { version = "0.27.1", features = ["extension-module"] }
diff --git a/arrow-pyarrow-testing/Cargo.toml b/arrow-pyarrow-testing/Cargo.toml
index e1ea72f5f4..b38af3c3b4 100644
--- a/arrow-pyarrow-testing/Cargo.toml
+++ b/arrow-pyarrow-testing/Cargo.toml
@@ -48,4 +48,4 @@ publish = false
 # Note no dependency on arrow, to ensure arrow-pyarrow can be used by itself
 arrow-array = { path = "../arrow-array" }
 arrow-pyarrow = { path = "../arrow-pyarrow" }
-pyo3 = { version = "0.26.0", default-features = false }
+pyo3 = { version = "0.27.1", default-features = false }
diff --git a/arrow-pyarrow/Cargo.toml b/arrow-pyarrow/Cargo.toml
index 9cfa235324..c508cabcfe 100644
--- a/arrow-pyarrow/Cargo.toml
+++ b/arrow-pyarrow/Cargo.toml
@@ -39,4 +39,4 @@ all-features = true
 arrow-array = { workspace = true, features = ["ffi"] }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
-pyo3 = { version = "0.26.0", default-features = false }
+pyo3 = { version = "0.27.1", default-features = false }
diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs
index 1f8941ef1c..e2b3650709 100644
--- a/arrow-pyarrow/src/lib.rs
+++ b/arrow-pyarrow/src/lib.rs
@@ -60,6 +60,7 @@
 //! forcing eager reading into `Vec<RecordBatch>`.
 
 use std::convert::{From, TryFrom};
+use std::ffi::CStr;
 use std::ptr::{addr_of, addr_of_mut};
 use std::sync::Arc;
 
@@ -83,6 +84,10 @@ import_exception!(pyarrow, ArrowException);
 /// Represents an exception raised by PyArrow.
 pub type PyArrowException = ArrowException;
 
+const ARROW_ARRAY_STREAM_CAPSULE_NAME: &CStr = c"arrow_array_stream";
+const ARROW_SCHEMA_CAPSULE_NAME: &CStr = c"arrow_schema";
+const ARROW_ARRAY_CAPSULE_NAME: &CStr = c"arrow_array";
+
 fn to_py_err(err: ArrowError) -> PyErr {
     PyArrowException::new_err(err.to_string())
 }
@@ -139,7 +144,7 @@ fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: 
&str) -> PyResult<()> {
         ));
     }
 
-    let capsule_name = capsule_name.unwrap().to_str()?;
+    let capsule_name = unsafe { capsule_name.unwrap().as_cstr().to_str()? };
     if capsule_name != name {
         return Err(PyValueError::new_err(format!(
             "Expected name '{name}' in PyCapsule, instead got 
'{capsule_name}'",
@@ -156,12 +161,16 @@ impl FromPyArrow for DataType {
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
             let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
-            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
-            let dtype = DataType::try_from(schema_ptr).map_err(to_py_err)?;
-            return Ok(dtype);
+            let schema_ptr = capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            unsafe {
+                let dtype = 
DataType::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+                return Ok(dtype);
+            }
         }
 
         validate_class("DataType", value)?;
@@ -192,12 +201,16 @@ impl FromPyArrow for Field {
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
             let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
-            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
-            let field = Field::try_from(schema_ptr).map_err(to_py_err)?;
-            return Ok(field);
+            let schema_ptr = capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            unsafe {
+                let field = 
Field::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+                return Ok(field);
+            }
         }
 
         validate_class("Field", value)?;
@@ -228,12 +241,16 @@ impl FromPyArrow for Schema {
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
             let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
-            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
-            let schema = Schema::try_from(schema_ptr).map_err(to_py_err)?;
-            return Ok(schema);
+            let schema_ptr = capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            unsafe {
+                let schema = 
Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+                return Ok(schema);
+            }
         }
 
         validate_class("Schema", value)?;
@@ -272,16 +289,25 @@ impl FromPyArrow for ArrayData {
             }
 
             let schema_capsule = tuple.get_item(0)?;
-            let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+            let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
             let array_capsule = tuple.get_item(1)?;
-            let array_capsule = array_capsule.downcast::<PyCapsule>()?;
+            let array_capsule = array_capsule.cast::<PyCapsule>()?;
 
             validate_pycapsule(schema_capsule, "arrow_schema")?;
             validate_pycapsule(array_capsule, "arrow_array")?;
 
-            let schema_ptr = unsafe { 
schema_capsule.reference::<FFI_ArrowSchema>() };
-            let array = unsafe { 
FFI_ArrowArray::from_raw(array_capsule.pointer() as _) };
-            return unsafe { ffi::from_ffi(array, schema_ptr) 
}.map_err(to_py_err);
+            let schema_ptr = schema_capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            let array = unsafe {
+                FFI_ArrowArray::from_raw(
+                    array_capsule
+                        .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))?
+                        .cast::<FFI_ArrowArray>()
+                        .as_ptr(),
+                )
+            };
+            return unsafe { ffi::from_ffi(array, schema_ptr.as_ref()) 
}.map_err(to_py_err);
         }
 
         validate_class("Array", value)?;
@@ -325,7 +351,7 @@ impl ToPyArrow for ArrayData {
 
 impl<T: FromPyArrow> FromPyArrow for Vec<T> {
     fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
-        let list = value.downcast::<PyList>()?;
+        let list = value.cast::<PyList>()?;
         list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
     }
 }
@@ -345,6 +371,7 @@ impl FromPyArrow for RecordBatch {
         // Newer versions of PyArrow as well as other libraries with Arrow 
data implement this
         // method, so prefer it over _export_to_c.
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
         if value.hasattr("__arrow_c_array__")? {
             let tuple = value.getattr("__arrow_c_array__")?.call0()?;
 
@@ -355,17 +382,22 @@ impl FromPyArrow for RecordBatch {
             }
 
             let schema_capsule = tuple.get_item(0)?;
-            let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+            let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
             let array_capsule = tuple.get_item(1)?;
-            let array_capsule = array_capsule.downcast::<PyCapsule>()?;
+            let array_capsule = array_capsule.cast::<PyCapsule>()?;
 
             validate_pycapsule(schema_capsule, "arrow_schema")?;
             validate_pycapsule(array_capsule, "arrow_array")?;
 
-            let schema_ptr = unsafe { 
schema_capsule.reference::<FFI_ArrowSchema>() };
-            let ffi_array = unsafe { 
FFI_ArrowArray::from_raw(array_capsule.pointer().cast()) };
+            let schema_ptr = schema_capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            let array_ptr = array_capsule
+                .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))?
+                .cast::<FFI_ArrowArray>();
+            let ffi_array = unsafe { 
FFI_ArrowArray::from_raw(array_ptr.as_ptr()) };
             let mut array_data =
-                unsafe { ffi::from_ffi(ffi_array, schema_ptr) 
}.map_err(to_py_err)?;
+                unsafe { ffi::from_ffi(ffi_array, schema_ptr.as_ref()) 
}.map_err(to_py_err)?;
             if !matches!(array_data.data_type(), DataType::Struct(_)) {
                 return Err(PyTypeError::new_err(
                     "Expected Struct type from __arrow_c_array.",
@@ -380,7 +412,8 @@ impl FromPyArrow for RecordBatch {
             let array = StructArray::from(array_data);
             // StructArray does not embed metadata from schema. We need to 
override
             // the output schema with the schema from the capsule.
-            let schema = 
Arc::new(Schema::try_from(schema_ptr).map_err(to_py_err)?);
+            let schema =
+                unsafe { 
Arc::new(Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?) };
             let (_fields, columns, nulls) = array.into_parts();
             assert_eq!(
                 nulls.map(|n| n.null_count()).unwrap_or_default(),
@@ -397,7 +430,7 @@ impl FromPyArrow for RecordBatch {
 
         let arrays = value.getattr("columns")?;
         let arrays = arrays
-            .downcast::<PyList>()?
+            .cast::<PyList>()?
             .iter()
             .map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
             .collect::<PyResult<_>>()?;
@@ -432,10 +465,17 @@ impl FromPyArrow for ArrowArrayStreamReader {
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_stream__")? {
             let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_array_stream")?;
 
-            let stream = unsafe { 
FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
+            let stream = unsafe {
+                FFI_ArrowArrayStream::from_raw(
+                    capsule
+                        
.pointer_checked(Some(ARROW_ARRAY_STREAM_CAPSULE_NAME))?
+                        .cast::<FFI_ArrowArrayStream>()
+                        .as_ptr(),
+                )
+            };
 
             let stream_reader = ArrowArrayStreamReader::try_new(stream)
                 .map_err(|err| PyValueError::new_err(err.to_string()))?;
@@ -589,9 +629,11 @@ impl IntoPyArrow for Table {
 #[derive(Debug)]
 pub struct PyArrowType<T>(pub T);
 
-impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
-    fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
-        Ok(Self(T::from_pyarrow_bound(value)?))
+impl<T: FromPyArrow> FromPyObject<'_, '_> for PyArrowType<T> {
+    type Error = PyErr;
+
+    fn extract(value: Borrowed<'_, '_, PyAny>) -> PyResult<Self> {
+        Ok(Self(T::from_pyarrow_bound(&*value)?))
     }
 }
 

Reply via email to