This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 34df13a PyO3 bridge for pyarrow interoperability (#691)
34df13a is described below
commit 34df13a97ce6ae7791599dc2abedf715b79ed690
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Sep 1 12:37:35 2021 +0200
PyO3 bridge for pyarrow interoperability (#691)
* PyO3 bridge for pyarrow interoperability
* Fix clippy warnings
* Simplify error handling
* Fix clippy warnings
* Fix integration test workflow
* Address review comments
* Virtualenv
* Fix integration test
---
.github/workflows/integration.yml | 21 +-
arrow-pyarrow-integration-testing/Cargo.toml | 4 +-
arrow-pyarrow-integration-testing/src/lib.rs | 260 ++++-----------------
.../tests/test_sql.py | 28 ++-
arrow/Cargo.toml | 7 +-
arrow/src/array/array.rs | 9 +-
arrow/src/array/equal/list.rs | 2 +-
arrow/src/lib.rs | 2 +
arrow/src/pyarrow.rs | 231 ++++++++++++++++++
9 files changed, 321 insertions(+), 243 deletions(-)
diff --git a/.github/workflows/integration.yml
b/.github/workflows/integration.yml
index a713d05..41b1dcb 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -78,22 +78,19 @@ jobs:
with:
python-version: '3.7'
- name: Upgrade pip and setuptools
- run: pip install --upgrade pip setuptools wheel
- - name: Install python dependencies
- run: pip install maturin==0.8.2 toml==0.10.1 pytest pytz
- - name: Install nightly pyarrow wheel
- # this points to a nightly pyarrow build containing neccessary
- # API for integration testing
(https://github.com/apache/arrow/pull/10529)
- # the hardcoded version is wrong and should be removed either
- # after https://issues.apache.org/jira/browse/ARROW-13083
- # gets fixes or pyarrow 5.0 gets released
- hardcoded version is wrong, bot contains
- run: pip install --index-url https://pypi.fury.io/arrow-nightlies/
pyarrow==3.1.0.dev1030
+ run: pip install --upgrade pip setuptools wheel virtualenv
+ - name: Create virtualenv and install dependencies
+ run: |
+ virtualenv venv
+ source venv/bin/activate
+ pip install maturin toml pytest pytz pyarrow>=5.0
- name: Run tests
env:
CARGO_HOME: "/home/runner/.cargo"
CARGO_TARGET_DIR: "/home/runner/target"
- working-directory: arrow-pyarrow-integration-testing
run: |
+ source venv/bin/activate
+ pushd arrow-pyarrow-integration-testing
maturin develop
pytest -v .
+ popd
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml
b/arrow-pyarrow-integration-testing/Cargo.toml
index 59a084f..f1d226d 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -31,8 +31,8 @@ name = "arrow_pyarrow_integration_testing"
crate-type = ["cdylib"]
[dependencies]
-arrow = { path = "../arrow", version = "6.0.0-SNAPSHOT" }
-pyo3 = { version = "0.12.1", features = ["extension-module"] }
+arrow = { path = "../arrow", version = "6.0.0-SNAPSHOT", features =
["pyarrow"] }
+pyo3 = { version = "0.14", features = ["extension-module"] }
[package.metadata.maturin]
requires-dist = ["pyarrow>=1"]
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs
b/arrow-pyarrow-integration-testing/src/lib.rs
index a601654..082a72e 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -18,269 +18,109 @@
//! This library demonstrates a minimal usage of Rust's C data interface to
pass
//! arrays from and to Python.
-use std::convert::TryFrom;
-use std::error;
-use std::fmt;
use std::sync::Arc;
-use pyo3::exceptions::PyOSError;
+use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
-use pyo3::{libc::uintptr_t, prelude::*};
-use arrow::array::{make_array_from_raw, ArrayRef, Int64Array};
+use arrow::array::{ArrayData, ArrayRef, Int64Array};
use arrow::compute::kernels;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::error::ArrowError;
-use arrow::ffi;
-use arrow::ffi::FFI_ArrowSchema;
-
-/// an error that bridges ArrowError with a Python error
-#[derive(Debug)]
-enum PyO3ArrowError {
- ArrowError(ArrowError),
-}
-
-impl fmt::Display for PyO3ArrowError {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- PyO3ArrowError::ArrowError(ref e) => e.fmt(f),
- }
- }
-}
-
-impl error::Error for PyO3ArrowError {
- fn source(&self) -> Option<&(dyn error::Error + 'static)> {
- match *self {
- // The cause is the underlying implementation error type. Is
implicitly
- // cast to the trait object `&error::Error`. This works because the
- // underlying type already implements the `Error` trait.
- PyO3ArrowError::ArrowError(ref e) => Some(e),
- }
- }
-}
-
-impl From<ArrowError> for PyO3ArrowError {
- fn from(err: ArrowError) -> PyO3ArrowError {
- PyO3ArrowError::ArrowError(err)
- }
-}
-
-impl From<PyO3ArrowError> for PyErr {
- fn from(err: PyO3ArrowError) -> PyErr {
- PyOSError::new_err(err.to_string())
- }
-}
-
-#[pyclass]
-struct PyDataType {
- inner: DataType,
-}
-
-#[pyclass]
-struct PyField {
- inner: Field,
-}
-
-#[pyclass]
-struct PySchema {
- inner: Schema,
-}
-
-#[pymethods]
-impl PyDataType {
- #[staticmethod]
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
- let c_schema = FFI_ArrowSchema::empty();
- let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- value.call_method1("_export_to_c", (c_schema_ptr as uintptr_t,))?;
- let dtype =
DataType::try_from(&c_schema).map_err(PyO3ArrowError::from)?;
- Ok(Self { inner: dtype })
- }
-
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
- let c_schema =
-
FFI_ArrowSchema::try_from(&self.inner).map_err(PyO3ArrowError::from)?;
- let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- let module = py.import("pyarrow")?;
- let class = module.getattr("DataType")?;
- let dtype = class.call_method1("_import_from_c", (c_schema_ptr as
uintptr_t,))?;
- Ok(dtype.into())
- }
-}
-
-#[pymethods]
-impl PyField {
- #[staticmethod]
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
- let c_schema = FFI_ArrowSchema::empty();
- let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- value.call_method1("_export_to_c", (c_schema_ptr as uintptr_t,))?;
- let field = Field::try_from(&c_schema).map_err(PyO3ArrowError::from)?;
- Ok(Self { inner: field })
- }
-
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
- let c_schema =
-
FFI_ArrowSchema::try_from(&self.inner).map_err(PyO3ArrowError::from)?;
- let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- let module = py.import("pyarrow")?;
- let class = module.getattr("Field")?;
- let dtype = class.call_method1("_import_from_c", (c_schema_ptr as
uintptr_t,))?;
- Ok(dtype.into())
- }
-}
-
-#[pymethods]
-impl PySchema {
- #[staticmethod]
- fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
- let c_schema = FFI_ArrowSchema::empty();
- let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- value.call_method1("_export_to_c", (c_schema_ptr as uintptr_t,))?;
- let schema =
Schema::try_from(&c_schema).map_err(PyO3ArrowError::from)?;
- Ok(Self { inner: schema })
- }
-
- fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
- let c_schema =
-
FFI_ArrowSchema::try_from(&self.inner).map_err(PyO3ArrowError::from)?;
- let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
- let module = py.import("pyarrow")?;
- let class = module.getattr("Schema")?;
- let schema =
- class.call_method1("_import_from_c", (c_schema_ptr as
uintptr_t,))?;
- Ok(schema.into())
- }
-}
-
-impl<'source> FromPyObject<'source> for PyDataType {
- fn extract(value: &'source PyAny) -> PyResult<Self> {
- PyDataType::from_pyarrow(value)
- }
-}
-
-impl<'source> FromPyObject<'source> for PyField {
- fn extract(value: &'source PyAny) -> PyResult<Self> {
- PyField::from_pyarrow(value)
- }
-}
-
-impl<'source> FromPyObject<'source> for PySchema {
- fn extract(value: &'source PyAny) -> PyResult<Self> {
- PySchema::from_pyarrow(value)
- }
-}
-
-fn array_to_rust(ob: PyObject, py: Python) -> PyResult<ArrayRef> {
- // prepare a pointer to receive the Array struct
- let (array_pointer, schema_pointer) =
- ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
-
- // make the conversion through PyArrow's private API
- // this changes the pointer's memory and is thus unsafe. In particular,
`_export_to_c` can go out of bounds
- ob.call_method1(
- py,
- "_export_to_c",
- (array_pointer as uintptr_t, schema_pointer as uintptr_t),
- )?;
-
- let array = unsafe { make_array_from_raw(array_pointer, schema_pointer) }
- .map_err(PyO3ArrowError::from)?;
- Ok(array)
-}
-
-fn array_to_py(array: ArrayRef, py: Python) -> PyResult<PyObject> {
- let (array_pointer, schema_pointer) =
array.to_raw().map_err(PyO3ArrowError::from)?;
-
- let pa = py.import("pyarrow")?;
-
- let array = pa.getattr("Array")?.call_method1(
- "_import_from_c",
- (array_pointer as uintptr_t, schema_pointer as uintptr_t),
- )?;
- Ok(array.to_object(py))
-}
+use arrow::pyarrow::PyArrowConvert;
+use arrow::record_batch::RecordBatch;
/// Returns `array + array` of an int64 array.
#[pyfunction]
-fn double(array: PyObject, py: Python) -> PyResult<PyObject> {
+fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
// import
- let array = array_to_rust(array, py)?;
+ let array = ArrayRef::from_pyarrow(array)?;
// perform some operation
- let array = array.as_any().downcast_ref::<Int64Array>().ok_or_else(|| {
- PyO3ArrowError::ArrowError(ArrowError::ParseError("Expects an
int64".to_string()))
- })?;
- let array = kernels::arithmetic::add(&array,
&array).map_err(PyO3ArrowError::from)?;
- let array = Arc::new(array);
+ let array = array
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .ok_or(ArrowError::ParseError("Expects an int64".to_string()))?;
+ let array = kernels::arithmetic::add(array, array)?;
// export
- array_to_py(array, py)
+ array.to_pyarrow(py)
}
/// calls a lambda function that receives and returns an array
/// whose result must be the array multiplied by two
#[pyfunction]
-fn double_py(lambda: PyObject, py: Python) -> PyResult<bool> {
+fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
// create
let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as
ArrayRef;
// to py
- let pyarray = array_to_py(array, py)?;
- let pyarray = lambda.call1(py, (pyarray,))?;
- let array = array_to_rust(pyarray, py)?;
+ let pyarray = array.to_pyarrow(py)?;
+ let pyarray = lambda.call1((pyarray,))?;
+ let array = ArrayRef::from_pyarrow(pyarray)?;
Ok(array == expected)
}
/// Returns the substring
#[pyfunction]
-fn substring(array: PyObject, start: i64, py: Python) -> PyResult<PyObject> {
+fn substring(array: ArrayData, start: i64) -> PyResult<ArrayData> {
// import
- let array = array_to_rust(array, py)?;
+ let array = ArrayRef::from(array);
// substring
- let array = kernels::substring::substring(array.as_ref(), start, &None)
- .map_err(PyO3ArrowError::from)?;
+ let array = kernels::substring::substring(array.as_ref(), start, &None)?;
- // export
- array_to_py(array, py)
+ Ok(array.data().to_owned())
}
/// Returns the concatenate
#[pyfunction]
-fn concatenate(array: PyObject, py: Python) -> PyResult<PyObject> {
- // import
- let array = array_to_rust(array, py)?;
+fn concatenate(array: ArrayData, py: Python) -> PyResult<PyObject> {
+ let array = ArrayRef::from(array);
// concat
- let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])
- .map_err(PyO3ArrowError::from)?;
+ let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])?;
- // export
- array_to_py(array, py)
+ array.to_pyarrow(py)
}
-/// Converts to rust and back to python
#[pyfunction]
-fn round_trip(pyarray: PyObject, py: Python) -> PyResult<PyObject> {
- // import
- let array = array_to_rust(pyarray, py)?;
+fn round_trip_type(obj: DataType) -> PyResult<DataType> {
+ Ok(obj)
+}
- // export
- array_to_py(array, py)
+#[pyfunction]
+fn round_trip_field(obj: Field) -> PyResult<Field> {
+ Ok(obj)
+}
+
+#[pyfunction]
+fn round_trip_schema(obj: Schema) -> PyResult<Schema> {
+ Ok(obj)
+}
+
+#[pyfunction]
+fn round_trip_array(obj: ArrayData) -> PyResult<ArrayData> {
+ Ok(obj)
+}
+
+#[pyfunction]
+fn round_trip_record_batch(obj: RecordBatch) -> PyResult<RecordBatch> {
+ Ok(obj)
}
#[pymodule]
fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) ->
PyResult<()> {
- m.add_class::<PyDataType>()?;
- m.add_class::<PyField>()?;
- m.add_class::<PySchema>()?;
m.add_wrapped(wrap_pyfunction!(double))?;
m.add_wrapped(wrap_pyfunction!(double_py))?;
m.add_wrapped(wrap_pyfunction!(substring))?;
m.add_wrapped(wrap_pyfunction!(concatenate))?;
- m.add_wrapped(wrap_pyfunction!(round_trip))?;
+ m.add_wrapped(wrap_pyfunction!(round_trip_type))?;
+ m.add_wrapped(wrap_pyfunction!(round_trip_field))?;
+ m.add_wrapped(wrap_pyfunction!(round_trip_schema))?;
+ m.add_wrapped(wrap_pyfunction!(round_trip_array))?;
+ m.add_wrapped(wrap_pyfunction!(round_trip_record_batch))?;
Ok(())
}
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py
b/arrow-pyarrow-integration-testing/tests/test_sql.py
index 301eac8..bacd118 100644
--- a/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -25,7 +25,6 @@ import pytest
import pyarrow as pa
import pytz
-from arrow_pyarrow_integration_testing import PyDataType, PyField, PySchema
import arrow_pyarrow_integration_testing as rust
@@ -113,43 +112,42 @@ _unsupported_pyarrow_types = [
@pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str)
def test_type_roundtrip(pyarrow_type):
- ty = PyDataType.from_pyarrow(pyarrow_type)
- restored = ty.to_pyarrow()
+ restored = rust.round_trip_type(pyarrow_type)
assert restored == pyarrow_type
assert restored is not pyarrow_type
@pytest.mark.parametrize("pyarrow_type", _unsupported_pyarrow_types, ids=str)
def test_type_roundtrip_raises(pyarrow_type):
- with pytest.raises(Exception):
- PyDataType.from_pyarrow(pyarrow_type)
+ with pytest.raises(pa.ArrowException):
+ rust.round_trip_type(pyarrow_type)
def test_dictionary_type_roundtrip():
# the dictionary type conversion is incomplete
pyarrow_type = pa.dictionary(pa.int32(), pa.string())
- ty = PyDataType.from_pyarrow(pyarrow_type)
- assert ty.to_pyarrow() == pa.int32()
+ ty = rust.round_trip_type(pyarrow_type)
+ assert ty == pa.int32()
@pytest.mark.parametrize('pyarrow_type', _supported_pyarrow_types, ids=str)
def test_field_roundtrip(pyarrow_type):
pyarrow_field = pa.field("test", pyarrow_type, nullable=True)
- field = PyField.from_pyarrow(pyarrow_field)
- assert field.to_pyarrow() == pyarrow_field
+ field = rust.round_trip_field(pyarrow_field)
+ assert field == pyarrow_field
if pyarrow_type != pa.null():
# A null type field may not be non-nullable
pyarrow_field = pa.field("test", pyarrow_type, nullable=False)
- field = PyField.from_pyarrow(pyarrow_field)
- assert field.to_pyarrow() == pyarrow_field
+ field = rust.round_trip_field(pyarrow_field)
+ assert field == pyarrow_field
def test_schema_roundtrip():
pyarrow_fields = zip(string.ascii_lowercase, _supported_pyarrow_types)
pyarrow_schema = pa.schema(pyarrow_fields)
- schema = PySchema.from_pyarrow(pyarrow_schema)
- assert schema.to_pyarrow() == pyarrow_schema
+ schema = rust.round_trip_schema(pyarrow_schema)
+ assert schema == pyarrow_schema
def test_primitive_python():
@@ -205,7 +203,7 @@ def test_list_array():
Python -> Rust -> Python
"""
a = pa.array([[], None, [1, 2], [4, 5, 6]], pa.list_(pa.int64()))
- b = rust.round_trip(a)
+ b = rust.round_trip_array(a)
b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type
@@ -261,7 +259,7 @@ def test_decimal_python():
None
]
a = pa.array(data, pa.decimal128(6, 2))
- b = rust.round_trip(a)
+ b = rust.round_trip_array(a)
assert a == b
del a
del b
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index cadd4ff..ae322d8 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -50,6 +50,8 @@ chrono = "0.4"
flatbuffers = { version = "=2.0.0", optional = true }
hex = "0.4"
comfy-table = { version = "4.0", optional = true, default-features = false }
+prettytable-rs = { version = "0.8.0", optional = true }
+pyo3 = { version = "0.14", optional = true }
lexical-core = "^0.7"
multiversion = "0.6.1"
bitflags = "1.2.1"
@@ -62,14 +64,15 @@ ipc = ["flatbuffers"]
simd = ["packed_simd"]
prettyprint = ["comfy-table"]
# The test utils feature enables code used in benchmarks and tests but
-# not the core arrow code itself. Be aware that `rand` must be kept as
-# an optional dependency for supporting compile to wasm32-unknown-unknown
+# not the core arrow code itself. Be aware that `rand` must be kept as
+# an optional dependency for supporting compile to wasm32-unknown-unknown
# target without assuming an environment containing JavaScript.
test_utils = ["rand"]
# this is only intended to be used in single-threaded programs: it verifies
that
# all allocated memory is being released (no memory leaks).
# See README for details
memory-check = []
+pyarrow = ["pyo3"]
[dev-dependencies]
rand = "0.8"
diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 5504c4a..8cbac31 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -15,9 +15,10 @@
// specific language governing permissions and limitations
// under the License.
+use std::any::Any;
+use std::convert::{From, TryFrom};
use std::fmt;
use std::sync::Arc;
-use std::{any::Any, convert::TryFrom};
use super::*;
use crate::array::equal_json::JsonEqual;
@@ -334,6 +335,12 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
}
}
+impl From<ArrayData> for ArrayRef {
+ fn from(data: ArrayData) -> Self {
+ make_array(data)
+ }
+}
+
/// Creates a new empty array
///
/// ```
diff --git a/arrow/src/array/equal/list.rs b/arrow/src/array/equal/list.rs
index 331cdc7..20e6400 100644
--- a/arrow/src/array/equal/list.rs
+++ b/arrow/src/array/equal/list.rs
@@ -138,7 +138,7 @@ pub(super) fn list_equal<T: OffsetSizeTrait>(
child_rhs_nulls.as_ref(),
lhs_offsets[lhs_start].to_usize().unwrap(),
rhs_offsets[rhs_start].to_usize().unwrap(),
- (lhs_offsets[len] - lhs_offsets[lhs_start])
+ (lhs_offsets[lhs_start + len] - lhs_offsets[lhs_start])
.to_usize()
.unwrap(),
)
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 1932b0d..2c2590c 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -156,6 +156,8 @@ pub mod ffi;
#[cfg(feature = "ipc")]
pub mod ipc;
pub mod json;
+#[cfg(feature = "pyarrow")]
+pub mod pyarrow;
pub mod record_batch;
pub mod temporal_conversions;
pub mod tensor;
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
new file mode 100644
index 0000000..12d4c0d
--- /dev/null
+++ b/arrow/src/pyarrow.rs
@@ -0,0 +1,231 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This library demonstrates a minimal usage of Rust's C data interface to
pass
+//! arrays from and to Python.
+
+use std::convert::{From, TryFrom};
+use std::sync::Arc;
+
+use pyo3::ffi::Py_uintptr_t;
+use pyo3::import_exception;
+use pyo3::prelude::*;
+use pyo3::types::PyList;
+
+use crate::array::{make_array, Array, ArrayData, ArrayRef};
+use crate::datatypes::{DataType, Field, Schema};
+use crate::error::ArrowError;
+use crate::ffi;
+use crate::ffi::FFI_ArrowSchema;
+use crate::record_batch::RecordBatch;
+
+import_exception!(pyarrow, ArrowException);
+pub type PyArrowException = ArrowException;
+
+impl From<ArrowError> for PyErr {
+ fn from(err: ArrowError) -> PyErr {
+ PyArrowException::new_err(err.to_string())
+ }
+}
+
+pub trait PyArrowConvert: Sized {
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject>;
+}
+
+impl PyArrowConvert for DataType {
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ let c_schema = FFI_ArrowSchema::empty();
+ let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+ value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
+ let dtype = DataType::try_from(&c_schema)?;
+ Ok(dtype)
+ }
+
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+ let c_schema = FFI_ArrowSchema::try_from(self)?;
+ let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+ let module = py.import("pyarrow")?;
+ let class = module.getattr("DataType")?;
+ let dtype =
+ class.call_method1("_import_from_c", (c_schema_ptr as
Py_uintptr_t,))?;
+ Ok(dtype.into())
+ }
+}
+
+impl PyArrowConvert for Field {
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ let c_schema = FFI_ArrowSchema::empty();
+ let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+ value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
+ let field = Field::try_from(&c_schema)?;
+ Ok(field)
+ }
+
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+ let c_schema = FFI_ArrowSchema::try_from(self)?;
+ let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+ let module = py.import("pyarrow")?;
+ let class = module.getattr("Field")?;
+ let dtype =
+ class.call_method1("_import_from_c", (c_schema_ptr as
Py_uintptr_t,))?;
+ Ok(dtype.into())
+ }
+}
+
+impl PyArrowConvert for Schema {
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ let c_schema = FFI_ArrowSchema::empty();
+ let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+ value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
+ let schema = Schema::try_from(&c_schema)?;
+ Ok(schema)
+ }
+
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+ let c_schema = FFI_ArrowSchema::try_from(self)?;
+ let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+ let module = py.import("pyarrow")?;
+ let class = module.getattr("Schema")?;
+ let schema =
+ class.call_method1("_import_from_c", (c_schema_ptr as
Py_uintptr_t,))?;
+ Ok(schema.into())
+ }
+}
+
+impl PyArrowConvert for ArrayData {
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ // prepare a pointer to receive the Array struct
+ let (array_pointer, schema_pointer) =
+ ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
+
+ // make the conversion through PyArrow's private API
+ // this changes the pointer's memory and is thus unsafe.
+ // In particular, `_export_to_c` can go out of bounds
+ value.call_method1(
+ "_export_to_c",
+ (
+ array_pointer as Py_uintptr_t,
+ schema_pointer as Py_uintptr_t,
+ ),
+ )?;
+
+ let ffi_array =
+ unsafe { ffi::ArrowArray::try_from_raw(array_pointer,
schema_pointer)? };
+ let data = ArrayData::try_from(ffi_array)?;
+
+ Ok(data)
+ }
+
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+ let array = ffi::ArrowArray::try_from(self.clone())?;
+ let (array_pointer, schema_pointer) = ffi::ArrowArray::into_raw(array);
+
+ let module = py.import("pyarrow")?;
+ let class = module.getattr("Array")?;
+ let array = class.call_method1(
+ "_import_from_c",
+ (
+ array_pointer as Py_uintptr_t,
+ schema_pointer as Py_uintptr_t,
+ ),
+ )?;
+ Ok(array.to_object(py))
+ }
+}
+
+impl PyArrowConvert for ArrayRef {
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ Ok(make_array(ArrayData::from_pyarrow(value)?))
+ }
+
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+ self.data().to_pyarrow(py)
+ }
+}
+
+impl<T> PyArrowConvert for T
+where
+ T: Array + From<ArrayData>,
+{
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ Ok(ArrayData::from_pyarrow(value)?.into())
+ }
+
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+ self.data().to_pyarrow(py)
+ }
+}
+
+impl PyArrowConvert for RecordBatch {
+ fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+ // TODO(kszucs): implement the FFI conversions in arrow-rs for
RecordBatches
+ let schema = value.getattr("schema")?;
+ let schema = Arc::new(Schema::from_pyarrow(schema)?);
+
+ let arrays = value.getattr("columns")?.downcast::<PyList>()?;
+ let arrays = arrays
+ .iter()
+ .map(ArrayRef::from_pyarrow)
+ .collect::<PyResult<_>>()?;
+
+ let batch = RecordBatch::try_new(schema, arrays)?;
+ Ok(batch)
+ }
+
+ fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+ let mut py_arrays = vec![];
+ let mut py_names = vec![];
+
+ let schema = self.schema();
+ let fields = schema.fields().iter();
+ let columns = self.columns().iter();
+
+ for (array, field) in columns.zip(fields) {
+ py_arrays.push(array.to_pyarrow(py)?);
+ py_names.push(field.name());
+ }
+
+ let module = py.import("pyarrow")?;
+ let class = module.getattr("RecordBatch")?;
+ let record = class.call_method1("from_arrays", (py_arrays, py_names))?;
+
+ Ok(PyObject::from(record))
+ }
+}
+
+macro_rules! add_conversion {
+ ($typ:ty) => {
+ impl<'source> FromPyObject<'source> for $typ {
+ fn extract(value: &'source PyAny) -> PyResult<Self> {
+ Self::from_pyarrow(value)
+ }
+ }
+
+ impl<'a> IntoPy<PyObject> for $typ {
+ fn into_py(self, py: Python) -> PyObject {
+ self.to_pyarrow(py).unwrap()
+ }
+ }
+ };
+}
+
+add_conversion!(DataType);
+add_conversion!(Field);
+add_conversion!(Schema);
+add_conversion!(ArrayData);
+add_conversion!(RecordBatch);