This is an automated email from the ASF dual-hosted git repository.

jeffreyvo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b0ef0224e3 Update py03 from 0.20 to 0.21 (#5566)
1b0ef0224e3 is described below

commit 1b0ef0224e30bba75c970d6a8c2f64d48bbbced6
Author: Jeffrey Vo <[email protected]>
AuthorDate: Fri Apr 5 20:13:04 2024 +1100

    Update py03 from 0.20 to 0.21 (#5566)
    
    * Update py03 from 0.20 to 0.21
    
    * Bump pyo3 in arrow-pyarrow-integration-testing
    
    * Update pyarrow API to align with pyo3 0.21 changes
    
    * Fix arrow-pyarrow-integration-testing clippy
    
    * Minor
    
    * Fix typo
    
    * Use PyBackedStr when extracting
    
    * Bump to pyo3 0.21.1
    
    * Trigger
---
 arrow-pyarrow-integration-testing/Cargo.toml |  2 +-
 arrow-pyarrow-integration-testing/src/lib.rs | 25 +++-----
 arrow/Cargo.toml                             |  2 +-
 arrow/src/pyarrow.rs                         | 95 ++++++++++++++++------------
 arrow/tests/pyarrow.rs                       |  4 +-
 5 files changed, 67 insertions(+), 61 deletions(-)

diff --git a/arrow-pyarrow-integration-testing/Cargo.toml 
b/arrow-pyarrow-integration-testing/Cargo.toml
index 8c60c086c29..6f07d42d88c 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
 
 [dependencies]
 arrow = { path = "../arrow", features = ["pyarrow"] }
-pyo3 = { version = "0.20", features = ["extension-module"] }
+pyo3 = { version = "0.21.1", features = ["extension-module"] }
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs 
b/arrow-pyarrow-integration-testing/src/lib.rs
index a53447b53c3..918fa74e308 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -40,9 +40,9 @@ fn to_py_err(err: ArrowError) -> PyErr {
 
 /// Returns `array + array` of an int64 array.
 #[pyfunction]
-fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
+fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
     // import
-    let array = make_array(ArrayData::from_pyarrow(array)?);
+    let array = make_array(ArrayData::from_pyarrow_bound(&array)?);
 
     // perform some operation
     let array = array
@@ -60,7 +60,7 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
 /// calls a lambda function that receives and returns an array
 /// whose result must be the array multiplied by two
 #[pyfunction]
-fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
+fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
     // create
     let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
     let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as 
ArrayRef;
@@ -68,7 +68,7 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
     // to py
     let pyarray = array.to_data().to_pyarrow(py)?;
     let pyarray = lambda.call1((pyarray,))?;
-    let array = make_array(ArrayData::from_pyarrow(pyarray)?);
+    let array = make_array(ArrayData::from_pyarrow_bound(&pyarray)?);
 
     Ok(array == expected)
 }
@@ -82,16 +82,12 @@ fn make_empty_array(datatype: PyArrowType<DataType>, py: 
Python) -> PyResult<PyO
 
 /// Returns the substring
 #[pyfunction]
-fn substring(
-    array: PyArrowType<ArrayData>,
-    start: i64,
-) -> PyResult<PyArrowType<ArrayData>> {
+fn substring(array: PyArrowType<ArrayData>, start: i64) -> 
PyResult<PyArrowType<ArrayData>> {
     // import
     let array = make_array(array.0);
 
     // substring
-    let array =
-        kernels::substring::substring(array.as_ref(), start, 
None).map_err(to_py_err)?;
+    let array = kernels::substring::substring(array.as_ref(), start, 
None).map_err(to_py_err)?;
 
     Ok(array.to_data().into())
 }
@@ -102,8 +98,7 @@ fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> 
PyResult<PyObject>
     let array = make_array(array.0);
 
     // concat
-    let array =
-        kernels::concat::concat(&[array.as_ref(), 
array.as_ref()]).map_err(to_py_err)?;
+    let array = kernels::concat::concat(&[array.as_ref(), 
array.as_ref()]).map_err(to_py_err)?;
 
     array.to_data().to_pyarrow(py)
 }
@@ -129,9 +124,7 @@ fn round_trip_array(obj: PyArrowType<ArrayData>) -> 
PyResult<PyArrowType<ArrayDa
 }
 
 #[pyfunction]
-fn round_trip_record_batch(
-    obj: PyArrowType<RecordBatch>,
-) -> PyResult<PyArrowType<RecordBatch>> {
+fn round_trip_record_batch(obj: PyArrowType<RecordBatch>) -> 
PyResult<PyArrowType<RecordBatch>> {
     Ok(obj)
 }
 
@@ -168,7 +161,7 @@ fn boxed_reader_roundtrip(
 }
 
 #[pymodule]
-fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> 
PyResult<()> {
+fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound<PyModule>) -> 
PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(double))?;
     m.add_wrapped(wrap_pyfunction!(double_py))?;
     m.add_wrapped(wrap_pyfunction!(make_empty_array))?;
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 4f7fda9b807..a938d75b1a6 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -54,7 +54,7 @@ arrow-select = { workspace = true }
 arrow-string = { workspace = true }
 
 rand = { version = "0.8", default-features = false, features = ["std", 
"std_rng"], optional = true }
-pyo3 = { version = "0.20", default-features = false, optional = true }
+pyo3 = { version = "0.21.1", default-features = false, optional = true }
 
 [package.metadata.docs.rs]
 features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 39702ce01ae..1733067c738 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -64,6 +64,7 @@ use pyo3::exceptions::{PyTypeError, PyValueError};
 use pyo3::ffi::Py_uintptr_t;
 use pyo3::import_exception;
 use pyo3::prelude::*;
+use pyo3::pybacked::PyBackedStr;
 use pyo3::types::{PyCapsule, PyList, PyTuple};
 
 use crate::array::{make_array, ArrayData};
@@ -82,7 +83,12 @@ fn to_py_err(err: ArrowError) -> PyErr {
 }
 
 pub trait FromPyArrow: Sized {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
+    #[deprecated(since = "52.0.0", note = "Use from_pyarrow_bound")]
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        Self::from_pyarrow_bound(&value.as_borrowed())
+    }
+
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self>;
 }
 
 /// Create a new PyArrow object from a arrow-rs type.
@@ -101,15 +107,17 @@ impl<T: ToPyArrow> IntoPyArrow for T {
     }
 }
 
-fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
-    let pyarrow = PyModule::import(value.py(), "pyarrow")?;
+fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
+    let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
     let class = pyarrow.getattr(expected)?;
-    if !value.is_instance(class)? {
-        let expected_module = class.getattr("__module__")?.extract::<&str>()?;
-        let expected_name = class.getattr("__name__")?.extract::<&str>()?;
+    if !value.is_instance(&class)? {
+        let expected_module = 
class.getattr("__module__")?.extract::<PyBackedStr>()?;
+        let expected_name = 
class.getattr("__name__")?.extract::<PyBackedStr>()?;
         let found_class = value.get_type();
-        let found_module = 
found_class.getattr("__module__")?.extract::<&str>()?;
-        let found_name = found_class.getattr("__name__")?.extract::<&str>()?;
+        let found_module = found_class
+            .getattr("__module__")?
+            .extract::<PyBackedStr>()?;
+        let found_name = 
found_class.getattr("__name__")?.extract::<PyBackedStr>()?;
         return Err(PyTypeError::new_err(format!(
             "Expected instance of {}.{}, got {}.{}",
             expected_module, expected_name, found_module, found_name
@@ -118,7 +126,7 @@ fn validate_class(expected: &str, value: &PyAny) -> 
PyResult<()> {
     Ok(())
 }
 
-fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
+fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
     let capsule_name = capsule.name()?;
     if capsule_name.is_none() {
         return Err(PyValueError::new_err(
@@ -138,13 +146,13 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> 
PyResult<()> {
 }
 
 impl FromPyArrow for DataType {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
         // Newer versions of PyArrow as well as other libraries with Arrow 
data implement this
         // method, so prefer it over _export_to_c.
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
-            let capsule: &PyCapsule =
-                
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+            let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
+            let capsule = capsule.downcast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
             let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -166,7 +174,7 @@ impl ToPyArrow for DataType {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
+        let module = py.import_bound("pyarrow")?;
         let class = module.getattr("DataType")?;
         let dtype = class.call_method1("_import_from_c", (c_schema_ptr as 
Py_uintptr_t,))?;
         Ok(dtype.into())
@@ -174,13 +182,13 @@ impl ToPyArrow for DataType {
 }
 
 impl FromPyArrow for Field {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
         // Newer versions of PyArrow as well as other libraries with Arrow 
data implement this
         // method, so prefer it over _export_to_c.
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
-            let capsule: &PyCapsule =
-                
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+            let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
+            let capsule = capsule.downcast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
             let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -202,7 +210,7 @@ impl ToPyArrow for Field {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
+        let module = py.import_bound("pyarrow")?;
         let class = module.getattr("Field")?;
         let dtype = class.call_method1("_import_from_c", (c_schema_ptr as 
Py_uintptr_t,))?;
         Ok(dtype.into())
@@ -210,13 +218,13 @@ impl ToPyArrow for Field {
 }
 
 impl FromPyArrow for Schema {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
         // Newer versions of PyArrow as well as other libraries with Arrow 
data implement this
         // method, so prefer it over _export_to_c.
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
-            let capsule: &PyCapsule =
-                
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+            let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
+            let capsule = capsule.downcast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
             let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -238,7 +246,7 @@ impl ToPyArrow for Schema {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
+        let module = py.import_bound("pyarrow")?;
         let class = module.getattr("Schema")?;
         let schema = class.call_method1("_import_from_c", (c_schema_ptr as 
Py_uintptr_t,))?;
         Ok(schema.into())
@@ -246,7 +254,7 @@ impl ToPyArrow for Schema {
 }
 
 impl FromPyArrow for ArrayData {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
         // Newer versions of PyArrow as well as other libraries with Arrow 
data implement this
         // method, so prefer it over _export_to_c.
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
@@ -259,8 +267,10 @@ impl FromPyArrow for ArrayData {
                 ));
             }
 
-            let schema_capsule: &PyCapsule = 
PyTryInto::try_into(tuple.get_item(0)?)?;
-            let array_capsule: &PyCapsule = 
PyTryInto::try_into(tuple.get_item(1)?)?;
+            let schema_capsule = tuple.get_item(0)?;
+            let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+            let array_capsule = tuple.get_item(1)?;
+            let array_capsule = array_capsule.downcast::<PyCapsule>()?;
 
             validate_pycapsule(schema_capsule, "arrow_schema")?;
             validate_pycapsule(array_capsule, "arrow_array")?;
@@ -296,7 +306,7 @@ impl ToPyArrow for ArrayData {
         let array = FFI_ArrowArray::new(self);
         let schema = 
FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;
 
-        let module = py.import("pyarrow")?;
+        let module = py.import_bound("pyarrow")?;
         let class = module.getattr("Array")?;
         let array = class.call_method1(
             "_import_from_c",
@@ -310,9 +320,9 @@ impl ToPyArrow for ArrayData {
 }
 
 impl<T: FromPyArrow> FromPyArrow for Vec<T> {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
         let list = value.downcast::<PyList>()?;
-        list.iter().map(|x| T::from_pyarrow(x)).collect()
+        list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
     }
 }
 
@@ -327,7 +337,7 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
 }
 
 impl FromPyArrow for RecordBatch {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
         // Newer versions of PyArrow as well as other libraries with Arrow 
data implement this
         // method, so prefer it over _export_to_c.
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
@@ -340,8 +350,10 @@ impl FromPyArrow for RecordBatch {
                 ));
             }
 
-            let schema_capsule: &PyCapsule = 
PyTryInto::try_into(tuple.get_item(0)?)?;
-            let array_capsule: &PyCapsule = 
PyTryInto::try_into(tuple.get_item(1)?)?;
+            let schema_capsule = tuple.get_item(0)?;
+            let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+            let array_capsule = tuple.get_item(1)?;
+            let array_capsule = array_capsule.downcast::<PyCapsule>()?;
 
             validate_pycapsule(schema_capsule, "arrow_schema")?;
             validate_pycapsule(array_capsule, "arrow_array")?;
@@ -370,12 +382,13 @@ impl FromPyArrow for RecordBatch {
         validate_class("RecordBatch", value)?;
         // TODO(kszucs): implement the FFI conversions in arrow-rs for 
RecordBatches
         let schema = value.getattr("schema")?;
-        let schema = Arc::new(Schema::from_pyarrow(schema)?);
+        let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?);
 
-        let arrays = value.getattr("columns")?.downcast::<PyList>()?;
+        let arrays = value.getattr("columns")?;
         let arrays = arrays
+            .downcast::<PyList>()?
             .iter()
-            .map(|a| Ok(make_array(ArrayData::from_pyarrow(a)?)))
+            .map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
             .collect::<PyResult<_>>()?;
 
         let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?;
@@ -395,13 +408,13 @@ impl ToPyArrow for RecordBatch {
 
 /// Supports conversion from `pyarrow.RecordBatchReader` to 
[ArrowArrayStreamReader].
 impl FromPyArrow for ArrowArrayStreamReader {
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+    fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
         // Newer versions of PyArrow as well as other libraries with Arrow 
data implement this
         // method, so prefer it over _export_to_c.
         // See 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_stream__")? {
-            let capsule: &PyCapsule =
-                
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
+            let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
+            let capsule = capsule.downcast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_array_stream")?;
 
             let stream = unsafe { 
FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
@@ -421,7 +434,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
         // make the conversion through PyArrow's private API
         // this changes the pointer's memory and is thus unsafe.
         // In particular, `_export_to_c` can go out of bounds
-        let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]);
+        let args = PyTuple::new_bound(value.py(), [stream_ptr as 
Py_uintptr_t]);
         value.call_method1("_export_to_c", args)?;
 
         let stream_reader = ArrowArrayStreamReader::try_new(stream)
@@ -439,9 +452,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
         let mut stream = FFI_ArrowArrayStream::new(self);
 
         let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
-        let module = py.import("pyarrow")?;
+        let module = py.import_bound("pyarrow")?;
         let class = module.getattr("RecordBatchReader")?;
-        let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
+        let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
         let reader = class.call_method1("_import_from_c", args)?;
 
         Ok(PyObject::from(reader))
@@ -463,8 +476,8 @@ impl IntoPyArrow for ArrowArrayStreamReader {
 pub struct PyArrowType<T>(pub T);
 
 impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
-    fn extract(value: &'source PyAny) -> PyResult<Self> {
-        Ok(Self(T::from_pyarrow(value)?))
+    fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
+        Ok(Self(T::from_pyarrow_bound(value)?))
     }
 }
 
diff --git a/arrow/tests/pyarrow.rs b/arrow/tests/pyarrow.rs
index 4b6991da006..a1c365c3179 100644
--- a/arrow/tests/pyarrow.rs
+++ b/arrow/tests/pyarrow.rs
@@ -32,9 +32,9 @@ fn test_to_pyarrow() {
 
     let res = Python::with_gil(|py| {
         let py_input = input.to_pyarrow(py)?;
-        let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
+        let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?;
         let py_records = records.to_pyarrow(py)?;
-        RecordBatch::from_pyarrow(py_records.as_ref(py))
+        RecordBatch::from_pyarrow_bound(py_records.bind(py))
     })
     .unwrap();
 

Reply via email to