This is an automated email from the ASF dual-hosted git repository.
jeffreyvo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1b0ef0224e3 Update py03 from 0.20 to 0.21 (#5566)
1b0ef0224e3 is described below
commit 1b0ef0224e30bba75c970d6a8c2f64d48bbbced6
Author: Jeffrey Vo <[email protected]>
AuthorDate: Fri Apr 5 20:13:04 2024 +1100
Update py03 from 0.20 to 0.21 (#5566)
* Update py03 from 0.20 to 0.21
* Bump pyo3 in arrow-pyarrow-integration-testing
* Update pyarrow API to align with pyo3 0.21 changes
* Fix arrow-pyarrow-integration-testing clippy
* Minor
* Fix typo
* Use PyBackedStr when extracting
* Bump to pyo3 0.21.1
* Trigger
---
arrow-pyarrow-integration-testing/Cargo.toml | 2 +-
arrow-pyarrow-integration-testing/src/lib.rs | 25 +++-----
arrow/Cargo.toml | 2 +-
arrow/src/pyarrow.rs | 95 ++++++++++++++++------------
arrow/tests/pyarrow.rs | 4 +-
5 files changed, 67 insertions(+), 61 deletions(-)
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml
b/arrow-pyarrow-integration-testing/Cargo.toml
index 8c60c086c29..6f07d42d88c 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
[dependencies]
arrow = { path = "../arrow", features = ["pyarrow"] }
-pyo3 = { version = "0.20", features = ["extension-module"] }
+pyo3 = { version = "0.21.1", features = ["extension-module"] }
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs
b/arrow-pyarrow-integration-testing/src/lib.rs
index a53447b53c3..918fa74e308 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -40,9 +40,9 @@ fn to_py_err(err: ArrowError) -> PyErr {
/// Returns `array + array` of an int64 array.
#[pyfunction]
-fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
+fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
// import
- let array = make_array(ArrayData::from_pyarrow(array)?);
+ let array = make_array(ArrayData::from_pyarrow_bound(&array)?);
// perform some operation
let array = array
@@ -60,7 +60,7 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
/// calls a lambda function that receives and returns an array
/// whose result must be the array multiplied by two
#[pyfunction]
-fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
+fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
// create
let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as
ArrayRef;
@@ -68,7 +68,7 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
// to py
let pyarray = array.to_data().to_pyarrow(py)?;
let pyarray = lambda.call1((pyarray,))?;
- let array = make_array(ArrayData::from_pyarrow(pyarray)?);
+ let array = make_array(ArrayData::from_pyarrow_bound(&pyarray)?);
Ok(array == expected)
}
@@ -82,16 +82,12 @@ fn make_empty_array(datatype: PyArrowType<DataType>, py:
Python) -> PyResult<PyO
/// Returns the substring
#[pyfunction]
-fn substring(
- array: PyArrowType<ArrayData>,
- start: i64,
-) -> PyResult<PyArrowType<ArrayData>> {
+fn substring(array: PyArrowType<ArrayData>, start: i64) ->
PyResult<PyArrowType<ArrayData>> {
// import
let array = make_array(array.0);
// substring
- let array =
- kernels::substring::substring(array.as_ref(), start,
None).map_err(to_py_err)?;
+ let array = kernels::substring::substring(array.as_ref(), start,
None).map_err(to_py_err)?;
Ok(array.to_data().into())
}
@@ -102,8 +98,7 @@ fn concatenate(array: PyArrowType<ArrayData>, py: Python) ->
PyResult<PyObject>
let array = make_array(array.0);
// concat
- let array =
- kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).map_err(to_py_err)?;
+ let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).map_err(to_py_err)?;
array.to_data().to_pyarrow(py)
}
@@ -129,9 +124,7 @@ fn round_trip_array(obj: PyArrowType<ArrayData>) ->
PyResult<PyArrowType<ArrayDa
}
#[pyfunction]
-fn round_trip_record_batch(
- obj: PyArrowType<RecordBatch>,
-) -> PyResult<PyArrowType<RecordBatch>> {
+fn round_trip_record_batch(obj: PyArrowType<RecordBatch>) ->
PyResult<PyArrowType<RecordBatch>> {
Ok(obj)
}
@@ -168,7 +161,7 @@ fn boxed_reader_roundtrip(
}
#[pymodule]
-fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) ->
PyResult<()> {
+fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound<PyModule>) ->
PyResult<()> {
m.add_wrapped(wrap_pyfunction!(double))?;
m.add_wrapped(wrap_pyfunction!(double_py))?;
m.add_wrapped(wrap_pyfunction!(make_empty_array))?;
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 4f7fda9b807..a938d75b1a6 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -54,7 +54,7 @@ arrow-select = { workspace = true }
arrow-string = { workspace = true }
rand = { version = "0.8", default-features = false, features = ["std",
"std_rng"], optional = true }
-pyo3 = { version = "0.20", default-features = false, optional = true }
+pyo3 = { version = "0.21.1", default-features = false, optional = true }
[package.metadata.docs.rs]
features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 39702ce01ae..1733067c738 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -64,6 +64,7 @@ use pyo3::exceptions::{PyTypeError, PyValueError};
use pyo3::ffi::Py_uintptr_t;
use pyo3::import_exception;
use pyo3::prelude::*;
+use pyo3::pybacked::PyBackedStr;
use pyo3::types::{PyCapsule, PyList, PyTuple};
use crate::array::{make_array, ArrayData};
@@ -82,7 +83,12 @@ fn to_py_err(err: ArrowError) -> PyErr {
}
pub trait FromPyArrow: Sized {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
+ #[deprecated(since = "52.0.0", note = "Use from_pyarrow_bound")]
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ Self::from_pyarrow_bound(&value.as_borrowed())
+ }
+
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self>;
}
/// Create a new PyArrow object from a arrow-rs type.
@@ -101,15 +107,17 @@ impl<T: ToPyArrow> IntoPyArrow for T {
}
}
-fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
- let pyarrow = PyModule::import(value.py(), "pyarrow")?;
+fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
+ let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
let class = pyarrow.getattr(expected)?;
- if !value.is_instance(class)? {
- let expected_module = class.getattr("__module__")?.extract::<&str>()?;
- let expected_name = class.getattr("__name__")?.extract::<&str>()?;
+ if !value.is_instance(&class)? {
+ let expected_module =
class.getattr("__module__")?.extract::<PyBackedStr>()?;
+ let expected_name =
class.getattr("__name__")?.extract::<PyBackedStr>()?;
let found_class = value.get_type();
- let found_module =
found_class.getattr("__module__")?.extract::<&str>()?;
- let found_name = found_class.getattr("__name__")?.extract::<&str>()?;
+ let found_module = found_class
+ .getattr("__module__")?
+ .extract::<PyBackedStr>()?;
+ let found_name =
found_class.getattr("__name__")?.extract::<PyBackedStr>()?;
return Err(PyTypeError::new_err(format!(
"Expected instance of {}.{}, got {}.{}",
expected_module, expected_name, found_module, found_name
@@ -118,7 +126,7 @@ fn validate_class(expected: &str, value: &PyAny) ->
PyResult<()> {
Ok(())
}
-fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
+fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
let capsule_name = capsule.name()?;
if capsule_name.is_none() {
return Err(PyValueError::new_err(
@@ -138,13 +146,13 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) ->
PyResult<()> {
}
impl FromPyArrow for DataType {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
- let capsule: &PyCapsule =
-
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+ let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
+ let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;
let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -166,7 +174,7 @@ impl ToPyArrow for DataType {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- let module = py.import("pyarrow")?;
+ let module = py.import_bound("pyarrow")?;
let class = module.getattr("DataType")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as
Py_uintptr_t,))?;
Ok(dtype.into())
@@ -174,13 +182,13 @@ impl ToPyArrow for DataType {
}
impl FromPyArrow for Field {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
- let capsule: &PyCapsule =
-
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+ let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
+ let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;
let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -202,7 +210,7 @@ impl ToPyArrow for Field {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- let module = py.import("pyarrow")?;
+ let module = py.import_bound("pyarrow")?;
let class = module.getattr("Field")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as
Py_uintptr_t,))?;
Ok(dtype.into())
@@ -210,13 +218,13 @@ impl ToPyArrow for Field {
}
impl FromPyArrow for Schema {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
- let capsule: &PyCapsule =
-
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+ let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
+ let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;
let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -238,7 +246,7 @@ impl ToPyArrow for Schema {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- let module = py.import("pyarrow")?;
+ let module = py.import_bound("pyarrow")?;
let class = module.getattr("Schema")?;
let schema = class.call_method1("_import_from_c", (c_schema_ptr as
Py_uintptr_t,))?;
Ok(schema.into())
@@ -246,7 +254,7 @@ impl ToPyArrow for Schema {
}
impl FromPyArrow for ArrayData {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
@@ -259,8 +267,10 @@ impl FromPyArrow for ArrayData {
));
}
- let schema_capsule: &PyCapsule =
PyTryInto::try_into(tuple.get_item(0)?)?;
- let array_capsule: &PyCapsule =
PyTryInto::try_into(tuple.get_item(1)?)?;
+ let schema_capsule = tuple.get_item(0)?;
+ let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+ let array_capsule = tuple.get_item(1)?;
+ let array_capsule = array_capsule.downcast::<PyCapsule>()?;
validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
@@ -296,7 +306,7 @@ impl ToPyArrow for ArrayData {
let array = FFI_ArrowArray::new(self);
let schema =
FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;
- let module = py.import("pyarrow")?;
+ let module = py.import_bound("pyarrow")?;
let class = module.getattr("Array")?;
let array = class.call_method1(
"_import_from_c",
@@ -310,9 +320,9 @@ impl ToPyArrow for ArrayData {
}
impl<T: FromPyArrow> FromPyArrow for Vec<T> {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
let list = value.downcast::<PyList>()?;
- list.iter().map(|x| T::from_pyarrow(x)).collect()
+ list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
}
}
@@ -327,7 +337,7 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
}
impl FromPyArrow for RecordBatch {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
@@ -340,8 +350,10 @@ impl FromPyArrow for RecordBatch {
));
}
- let schema_capsule: &PyCapsule =
PyTryInto::try_into(tuple.get_item(0)?)?;
- let array_capsule: &PyCapsule =
PyTryInto::try_into(tuple.get_item(1)?)?;
+ let schema_capsule = tuple.get_item(0)?;
+ let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+ let array_capsule = tuple.get_item(1)?;
+ let array_capsule = array_capsule.downcast::<PyCapsule>()?;
validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
@@ -370,12 +382,13 @@ impl FromPyArrow for RecordBatch {
validate_class("RecordBatch", value)?;
// TODO(kszucs): implement the FFI conversions in arrow-rs for
RecordBatches
let schema = value.getattr("schema")?;
- let schema = Arc::new(Schema::from_pyarrow(schema)?);
+ let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?);
- let arrays = value.getattr("columns")?.downcast::<PyList>()?;
+ let arrays = value.getattr("columns")?;
let arrays = arrays
+ .downcast::<PyList>()?
.iter()
- .map(|a| Ok(make_array(ArrayData::from_pyarrow(a)?)))
+ .map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
.collect::<PyResult<_>>()?;
let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?;
@@ -395,13 +408,13 @@ impl ToPyArrow for RecordBatch {
/// Supports conversion from `pyarrow.RecordBatchReader` to
[ArrowArrayStreamReader].
impl FromPyArrow for ArrowArrayStreamReader {
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow
data implement this
// method, so prefer it over _export_to_c.
// See
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_stream__")? {
- let capsule: &PyCapsule =
-
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
+ let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
+ let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_array_stream")?;
let stream = unsafe {
FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
@@ -421,7 +434,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe.
// In particular, `_export_to_c` can go out of bounds
- let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]);
+ let args = PyTuple::new_bound(value.py(), [stream_ptr as
Py_uintptr_t]);
value.call_method1("_export_to_c", args)?;
let stream_reader = ArrowArrayStreamReader::try_new(stream)
@@ -439,9 +452,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
let mut stream = FFI_ArrowArrayStream::new(self);
let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
- let module = py.import("pyarrow")?;
+ let module = py.import_bound("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
- let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
+ let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;
Ok(PyObject::from(reader))
@@ -463,8 +476,8 @@ impl IntoPyArrow for ArrowArrayStreamReader {
pub struct PyArrowType<T>(pub T);
impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
- fn extract(value: &'source PyAny) -> PyResult<Self> {
- Ok(Self(T::from_pyarrow(value)?))
+ fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
+ Ok(Self(T::from_pyarrow_bound(value)?))
}
}
diff --git a/arrow/tests/pyarrow.rs b/arrow/tests/pyarrow.rs
index 4b6991da006..a1c365c3179 100644
--- a/arrow/tests/pyarrow.rs
+++ b/arrow/tests/pyarrow.rs
@@ -32,9 +32,9 @@ fn test_to_pyarrow() {
let res = Python::with_gil(|py| {
let py_input = input.to_pyarrow(py)?;
- let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
+ let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?;
let py_records = records.to_pyarrow(py)?;
- RecordBatch::from_pyarrow(py_records.as_ref(py))
+ RecordBatch::from_pyarrow_bound(py_records.bind(py))
})
.unwrap();