This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new a815910004 feat(pyarrow) `FromPyArrow` on `Vec<T>`: allow any iterable
for input (#10155)
a815910004 is described below
commit a8159100043a5a2706822f22d10ccaf48cd9df4c
Author: Thomas Tanon <[email protected]>
AuthorDate: Mon Jun 22 19:50:14 2026 +0200
feat(pyarrow) `FromPyArrow` on `Vec<T>`: allow any iterable for input
(#10155)
This is already the behavior of `FromPyObject` implementation on `Vec<T:
FromPyObject>`
---
.../tests/test_sql.py | 2 +-
arrow-pyarrow-testing/tests/pyarrow.rs | 24 ++++++++++++++++++----
arrow-pyarrow/src/lib.rs | 14 +++++++------
3 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py
b/arrow-pyarrow-integration-testing/tests/test_sql.py
index b9b04ddee5..48bae5e86f 100644
--- a/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -662,7 +662,7 @@ def test_table_empty():
"""
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()),
metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
table = pa.Table.from_batches([], schema=schema)
- new_table = rust.build_table([], schema=schema)
+ new_table = rust.build_table((), schema=schema)
assert table == new_table
assert table.schema == new_table.schema
diff --git a/arrow-pyarrow-testing/tests/pyarrow.rs
b/arrow-pyarrow-testing/tests/pyarrow.rs
index 6f3606478c..c70536b1c3 100644
--- a/arrow-pyarrow-testing/tests/pyarrow.rs
+++ b/arrow-pyarrow-testing/tests/pyarrow.rs
@@ -44,7 +44,7 @@ use arrow_array::{
use arrow_pyarrow::{FromPyArrow, ToPyArrow};
use pyo3::exceptions::PyTypeError;
use pyo3::types::{PyAnyMethods, PyModule};
-use pyo3::Python;
+use pyo3::{IntoPyObject, Python};
use std::ffi::CString;
use std::sync::Arc;
@@ -56,8 +56,7 @@ fn test_to_pyarrow() {
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"]));
// The "very long string" will not be inlined, and force the creation of a
data buffer.
let c: ArrayRef = Arc::new(StringViewArray::from(vec!["short", "a very
long string"]));
- let input = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c",
c)]).unwrap();
- println!("input: {input:?}");
+ let input = RecordBatch::try_from_iter([("a", a), ("b", b), ("c",
c)]).unwrap();
let res = Python::attach(|py| {
let py_input = input.to_pyarrow(py)?;
@@ -70,6 +69,24 @@ fn test_to_pyarrow() {
assert_eq!(input, res);
}
+#[test]
+fn test_to_pyarrow_pair() {
+ Python::initialize();
+
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
+ let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"]));
+ let input = RecordBatch::try_from_iter([("a", a), ("b", b)]).unwrap();
+
+ let res = Python::attach(|py| {
+ let record_batch = input.to_pyarrow(py)?;
+ let tuple = (record_batch.clone(), record_batch).into_pyobject(py)?;
+ Vec::<RecordBatch>::from_pyarrow_bound(&tuple)
+ })
+ .unwrap();
+ assert_eq!(input, res[0]);
+ assert_eq!(input, res[1]);
+}
+
#[test]
fn test_to_pyarrow_byte_view() {
Python::initialize();
@@ -84,7 +101,6 @@ fn test_to_pyarrow_byte_view() {
])
.unwrap();
- println!("input: {input:?}");
let res = Python::attach(|py| {
let py_input = input.to_pyarrow(py)?;
let records = RecordBatch::from_pyarrow_bound(&py_input)?;
diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs
index c0d91d0811..07063b37cf 100644
--- a/arrow-pyarrow/src/lib.rs
+++ b/arrow-pyarrow/src/lib.rs
@@ -321,18 +321,20 @@ impl ToPyArrow for ArrayData {
impl<T: FromPyArrow> FromPyArrow for Vec<T> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
- let list = value.cast::<PyList>()?;
- list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
+ let mut v = Vec::with_capacity(value.len().unwrap_or(0));
+ for item in value.try_iter()? {
+ v.push(T::from_pyarrow_bound(&item?)?);
+ }
+ Ok(v)
}
}
impl<T: ToPyArrow> ToPyArrow for Vec<T> {
fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
- let values = self
- .iter()
+ self.iter()
.map(|v| v.to_pyarrow(py))
- .collect::<PyResult<Vec<_>>>()?;
- Ok(PyList::new(py, values)?.into_any())
+ .collect::<PyResult<Vec<_>>>()?
+ .into_pyobject(py)
}
}