This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 34df13a  PyO3 bridge for pyarrow interoperability  (#691)
34df13a is described below

commit 34df13a97ce6ae7791599dc2abedf715b79ed690
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Sep 1 12:37:35 2021 +0200

    PyO3 bridge for pyarrow interoperability  (#691)
    
    * PyO3 bridge for pyarrow interoperability
    
    * Fix clippy warnings
    
    * Simplify error handling
    
    * Fix clippy warnings
    
    * Fix integration test workflow
    
    * Address review comments
    
    * Virtualenv
    
    * Fix integration test
---
 .github/workflows/integration.yml                  |  21 +-
 arrow-pyarrow-integration-testing/Cargo.toml       |   4 +-
 arrow-pyarrow-integration-testing/src/lib.rs       | 260 ++++-----------------
 .../tests/test_sql.py                              |  28 ++-
 arrow/Cargo.toml                                   |   7 +-
 arrow/src/array/array.rs                           |   9 +-
 arrow/src/array/equal/list.rs                      |   2 +-
 arrow/src/lib.rs                                   |   2 +
 arrow/src/pyarrow.rs                               | 231 ++++++++++++++++++
 9 files changed, 321 insertions(+), 243 deletions(-)

diff --git a/.github/workflows/integration.yml 
b/.github/workflows/integration.yml
index a713d05..41b1dcb 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -78,22 +78,19 @@ jobs:
         with:
           python-version: '3.7'
       - name: Upgrade pip and setuptools
-        run: pip install --upgrade pip setuptools wheel
-      - name: Install python dependencies
-        run: pip install maturin==0.8.2 toml==0.10.1 pytest pytz
-      - name: Install nightly pyarrow wheel
-        # this points to a nightly pyarrow build containing neccessary
-        # API for integration testing 
(https://github.com/apache/arrow/pull/10529)
-        # the hardcoded version is wrong and should be removed either
-        # after https://issues.apache.org/jira/browse/ARROW-13083
-        # gets fixes or pyarrow 5.0 gets released
-        hardcoded version is wrong, bot contains
-        run: pip install --index-url https://pypi.fury.io/arrow-nightlies/ 
pyarrow==3.1.0.dev1030
+        run: pip install --upgrade pip setuptools wheel virtualenv
+      - name: Create virtualenv and install dependencies
+        run: |
+          virtualenv venv
+          source venv/bin/activate
+          pip install maturin toml pytest pytz pyarrow>=5.0
       - name: Run tests
         env:
           CARGO_HOME: "/home/runner/.cargo"
           CARGO_TARGET_DIR: "/home/runner/target"
-        working-directory: arrow-pyarrow-integration-testing
         run: |
+          source venv/bin/activate
+          pushd arrow-pyarrow-integration-testing
           maturin develop
           pytest -v .
+          popd
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml 
b/arrow-pyarrow-integration-testing/Cargo.toml
index 59a084f..f1d226d 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -31,8 +31,8 @@ name = "arrow_pyarrow_integration_testing"
 crate-type = ["cdylib"]
 
 [dependencies]
-arrow = { path = "../arrow", version = "6.0.0-SNAPSHOT" }
-pyo3 = { version = "0.12.1", features = ["extension-module"] }
+arrow = { path = "../arrow", version = "6.0.0-SNAPSHOT", features = 
["pyarrow"] }
+pyo3 = { version = "0.14", features = ["extension-module"] }
 
 [package.metadata.maturin]
 requires-dist = ["pyarrow>=1"]
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs 
b/arrow-pyarrow-integration-testing/src/lib.rs
index a601654..082a72e 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -18,269 +18,109 @@
 //! This library demonstrates a minimal usage of Rust's C data interface to 
pass
 //! arrays from and to Python.
 
-use std::convert::TryFrom;
-use std::error;
-use std::fmt;
 use std::sync::Arc;
 
-use pyo3::exceptions::PyOSError;
+use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
-use pyo3::{libc::uintptr_t, prelude::*};
 
-use arrow::array::{make_array_from_raw, ArrayRef, Int64Array};
+use arrow::array::{ArrayData, ArrayRef, Int64Array};
 use arrow::compute::kernels;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::error::ArrowError;
-use arrow::ffi;
-use arrow::ffi::FFI_ArrowSchema;
-
-/// an error that bridges ArrowError with a Python error
-#[derive(Debug)]
-enum PyO3ArrowError {
-    ArrowError(ArrowError),
-}
-
-impl fmt::Display for PyO3ArrowError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            PyO3ArrowError::ArrowError(ref e) => e.fmt(f),
-        }
-    }
-}
-
-impl error::Error for PyO3ArrowError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match *self {
-            // The cause is the underlying implementation error type. Is 
implicitly
-            // cast to the trait object `&error::Error`. This works because the
-            // underlying type already implements the `Error` trait.
-            PyO3ArrowError::ArrowError(ref e) => Some(e),
-        }
-    }
-}
-
-impl From<ArrowError> for PyO3ArrowError {
-    fn from(err: ArrowError) -> PyO3ArrowError {
-        PyO3ArrowError::ArrowError(err)
-    }
-}
-
-impl From<PyO3ArrowError> for PyErr {
-    fn from(err: PyO3ArrowError) -> PyErr {
-        PyOSError::new_err(err.to_string())
-    }
-}
-
-#[pyclass]
-struct PyDataType {
-    inner: DataType,
-}
-
-#[pyclass]
-struct PyField {
-    inner: Field,
-}
-
-#[pyclass]
-struct PySchema {
-    inner: Schema,
-}
-
-#[pymethods]
-impl PyDataType {
-    #[staticmethod]
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
-        let c_schema = FFI_ArrowSchema::empty();
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        value.call_method1("_export_to_c", (c_schema_ptr as uintptr_t,))?;
-        let dtype = 
DataType::try_from(&c_schema).map_err(PyO3ArrowError::from)?;
-        Ok(Self { inner: dtype })
-    }
-
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
-        let c_schema =
-            
FFI_ArrowSchema::try_from(&self.inner).map_err(PyO3ArrowError::from)?;
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("DataType")?;
-        let dtype = class.call_method1("_import_from_c", (c_schema_ptr as 
uintptr_t,))?;
-        Ok(dtype.into())
-    }
-}
-
-#[pymethods]
-impl PyField {
-    #[staticmethod]
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
-        let c_schema = FFI_ArrowSchema::empty();
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        value.call_method1("_export_to_c", (c_schema_ptr as uintptr_t,))?;
-        let field = Field::try_from(&c_schema).map_err(PyO3ArrowError::from)?;
-        Ok(Self { inner: field })
-    }
-
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
-        let c_schema =
-            
FFI_ArrowSchema::try_from(&self.inner).map_err(PyO3ArrowError::from)?;
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("Field")?;
-        let dtype = class.call_method1("_import_from_c", (c_schema_ptr as 
uintptr_t,))?;
-        Ok(dtype.into())
-    }
-}
-
-#[pymethods]
-impl PySchema {
-    #[staticmethod]
-    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
-        let c_schema = FFI_ArrowSchema::empty();
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        value.call_method1("_export_to_c", (c_schema_ptr as uintptr_t,))?;
-        let schema = 
Schema::try_from(&c_schema).map_err(PyO3ArrowError::from)?;
-        Ok(Self { inner: schema })
-    }
-
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
-        let c_schema =
-            
FFI_ArrowSchema::try_from(&self.inner).map_err(PyO3ArrowError::from)?;
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("Schema")?;
-        let schema =
-            class.call_method1("_import_from_c", (c_schema_ptr as 
uintptr_t,))?;
-        Ok(schema.into())
-    }
-}
-
-impl<'source> FromPyObject<'source> for PyDataType {
-    fn extract(value: &'source PyAny) -> PyResult<Self> {
-        PyDataType::from_pyarrow(value)
-    }
-}
-
-impl<'source> FromPyObject<'source> for PyField {
-    fn extract(value: &'source PyAny) -> PyResult<Self> {
-        PyField::from_pyarrow(value)
-    }
-}
-
-impl<'source> FromPyObject<'source> for PySchema {
-    fn extract(value: &'source PyAny) -> PyResult<Self> {
-        PySchema::from_pyarrow(value)
-    }
-}
-
-fn array_to_rust(ob: PyObject, py: Python) -> PyResult<ArrayRef> {
-    // prepare a pointer to receive the Array struct
-    let (array_pointer, schema_pointer) =
-        ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
-
-    // make the conversion through PyArrow's private API
-    // this changes the pointer's memory and is thus unsafe. In particular, 
`_export_to_c` can go out of bounds
-    ob.call_method1(
-        py,
-        "_export_to_c",
-        (array_pointer as uintptr_t, schema_pointer as uintptr_t),
-    )?;
-
-    let array = unsafe { make_array_from_raw(array_pointer, schema_pointer) }
-        .map_err(PyO3ArrowError::from)?;
-    Ok(array)
-}
-
-fn array_to_py(array: ArrayRef, py: Python) -> PyResult<PyObject> {
-    let (array_pointer, schema_pointer) = 
array.to_raw().map_err(PyO3ArrowError::from)?;
-
-    let pa = py.import("pyarrow")?;
-
-    let array = pa.getattr("Array")?.call_method1(
-        "_import_from_c",
-        (array_pointer as uintptr_t, schema_pointer as uintptr_t),
-    )?;
-    Ok(array.to_object(py))
-}
+use arrow::pyarrow::PyArrowConvert;
+use arrow::record_batch::RecordBatch;
 
 /// Returns `array + array` of an int64 array.
 #[pyfunction]
-fn double(array: PyObject, py: Python) -> PyResult<PyObject> {
+fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
     // import
-    let array = array_to_rust(array, py)?;
+    let array = ArrayRef::from_pyarrow(array)?;
 
     // perform some operation
-    let array = array.as_any().downcast_ref::<Int64Array>().ok_or_else(|| {
-        PyO3ArrowError::ArrowError(ArrowError::ParseError("Expects an 
int64".to_string()))
-    })?;
-    let array = kernels::arithmetic::add(&array, 
&array).map_err(PyO3ArrowError::from)?;
-    let array = Arc::new(array);
+    let array = array
+        .as_any()
+        .downcast_ref::<Int64Array>()
+        .ok_or(ArrowError::ParseError("Expects an int64".to_string()))?;
+    let array = kernels::arithmetic::add(array, array)?;
 
     // export
-    array_to_py(array, py)
+    array.to_pyarrow(py)
 }
 
 /// calls a lambda function that receives and returns an array
 /// whose result must be the array multiplied by two
 #[pyfunction]
-fn double_py(lambda: PyObject, py: Python) -> PyResult<bool> {
+fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
     // create
     let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
     let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as 
ArrayRef;
 
     // to py
-    let pyarray = array_to_py(array, py)?;
-    let pyarray = lambda.call1(py, (pyarray,))?;
-    let array = array_to_rust(pyarray, py)?;
+    let pyarray = array.to_pyarrow(py)?;
+    let pyarray = lambda.call1((pyarray,))?;
+    let array = ArrayRef::from_pyarrow(pyarray)?;
 
     Ok(array == expected)
 }
 
 /// Returns the substring
 #[pyfunction]
-fn substring(array: PyObject, start: i64, py: Python) -> PyResult<PyObject> {
+fn substring(array: ArrayData, start: i64) -> PyResult<ArrayData> {
     // import
-    let array = array_to_rust(array, py)?;
+    let array = ArrayRef::from(array);
 
     // substring
-    let array = kernels::substring::substring(array.as_ref(), start, &None)
-        .map_err(PyO3ArrowError::from)?;
+    let array = kernels::substring::substring(array.as_ref(), start, &None)?;
 
-    // export
-    array_to_py(array, py)
+    Ok(array.data().to_owned())
 }
 
 /// Returns the concatenate
 #[pyfunction]
-fn concatenate(array: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = array_to_rust(array, py)?;
+fn concatenate(array: ArrayData, py: Python) -> PyResult<PyObject> {
+    let array = ArrayRef::from(array);
 
     // concat
-    let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])
-        .map_err(PyO3ArrowError::from)?;
+    let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])?;
 
-    // export
-    array_to_py(array, py)
+    array.to_pyarrow(py)
 }
 
-/// Converts to rust and back to python
 #[pyfunction]
-fn round_trip(pyarray: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = array_to_rust(pyarray, py)?;
+fn round_trip_type(obj: DataType) -> PyResult<DataType> {
+    Ok(obj)
+}
 
-    // export
-    array_to_py(array, py)
+#[pyfunction]
+fn round_trip_field(obj: Field) -> PyResult<Field> {
+    Ok(obj)
+}
+
+#[pyfunction]
+fn round_trip_schema(obj: Schema) -> PyResult<Schema> {
+    Ok(obj)
+}
+
+#[pyfunction]
+fn round_trip_array(obj: ArrayData) -> PyResult<ArrayData> {
+    Ok(obj)
+}
+
+#[pyfunction]
+fn round_trip_record_batch(obj: RecordBatch) -> PyResult<RecordBatch> {
+    Ok(obj)
 }
 
 #[pymodule]
 fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> 
PyResult<()> {
-    m.add_class::<PyDataType>()?;
-    m.add_class::<PyField>()?;
-    m.add_class::<PySchema>()?;
     m.add_wrapped(wrap_pyfunction!(double))?;
     m.add_wrapped(wrap_pyfunction!(double_py))?;
     m.add_wrapped(wrap_pyfunction!(substring))?;
     m.add_wrapped(wrap_pyfunction!(concatenate))?;
-    m.add_wrapped(wrap_pyfunction!(round_trip))?;
+    m.add_wrapped(wrap_pyfunction!(round_trip_type))?;
+    m.add_wrapped(wrap_pyfunction!(round_trip_field))?;
+    m.add_wrapped(wrap_pyfunction!(round_trip_schema))?;
+    m.add_wrapped(wrap_pyfunction!(round_trip_array))?;
+    m.add_wrapped(wrap_pyfunction!(round_trip_record_batch))?;
     Ok(())
 }
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py 
b/arrow-pyarrow-integration-testing/tests/test_sql.py
index 301eac8..bacd118 100644
--- a/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -25,7 +25,6 @@ import pytest
 import pyarrow as pa
 import pytz
 
-from arrow_pyarrow_integration_testing import PyDataType, PyField, PySchema
 import arrow_pyarrow_integration_testing as rust
 
 
@@ -113,43 +112,42 @@ _unsupported_pyarrow_types = [
 
 @pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str)
 def test_type_roundtrip(pyarrow_type):
-    ty = PyDataType.from_pyarrow(pyarrow_type)
-    restored = ty.to_pyarrow()
+    restored = rust.round_trip_type(pyarrow_type)
     assert restored == pyarrow_type
     assert restored is not pyarrow_type
 
 
 @pytest.mark.parametrize("pyarrow_type", _unsupported_pyarrow_types, ids=str)
 def test_type_roundtrip_raises(pyarrow_type):
-    with pytest.raises(Exception):
-        PyDataType.from_pyarrow(pyarrow_type)
+    with pytest.raises(pa.ArrowException):
+        rust.round_trip_type(pyarrow_type)
 
 
 def test_dictionary_type_roundtrip():
     # the dictionary type conversion is incomplete
     pyarrow_type = pa.dictionary(pa.int32(), pa.string())
-    ty = PyDataType.from_pyarrow(pyarrow_type)
-    assert ty.to_pyarrow() == pa.int32()
+    ty = rust.round_trip_type(pyarrow_type)
+    assert ty == pa.int32()
 
 
 @pytest.mark.parametrize('pyarrow_type', _supported_pyarrow_types, ids=str)
 def test_field_roundtrip(pyarrow_type):
     pyarrow_field = pa.field("test", pyarrow_type, nullable=True)
-    field = PyField.from_pyarrow(pyarrow_field)
-    assert field.to_pyarrow() == pyarrow_field
+    field = rust.round_trip_field(pyarrow_field)
+    assert field == pyarrow_field
 
     if pyarrow_type != pa.null():
         # A null type field may not be non-nullable
         pyarrow_field = pa.field("test", pyarrow_type, nullable=False)
-        field = PyField.from_pyarrow(pyarrow_field)
-        assert field.to_pyarrow() == pyarrow_field
+        field = rust.round_trip_field(pyarrow_field)
+        assert field == pyarrow_field
 
 
 def test_schema_roundtrip():
     pyarrow_fields = zip(string.ascii_lowercase, _supported_pyarrow_types)
     pyarrow_schema = pa.schema(pyarrow_fields)
-    schema = PySchema.from_pyarrow(pyarrow_schema)
-    assert schema.to_pyarrow() == pyarrow_schema
+    schema = rust.round_trip_schema(pyarrow_schema)
+    assert schema == pyarrow_schema
 
 
 def test_primitive_python():
@@ -205,7 +203,7 @@ def test_list_array():
     Python -> Rust -> Python
     """
     a = pa.array([[], None, [1, 2], [4, 5, 6]], pa.list_(pa.int64()))
-    b = rust.round_trip(a)
+    b = rust.round_trip_array(a)
     b.validate(full=True)
     assert a.to_pylist() == b.to_pylist()
     assert a.type == b.type
@@ -261,7 +259,7 @@ def test_decimal_python():
         None
     ]
     a = pa.array(data, pa.decimal128(6, 2))
-    b = rust.round_trip(a)
+    b = rust.round_trip_array(a)
     assert a == b
     del a
     del b
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index cadd4ff..ae322d8 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -50,6 +50,8 @@ chrono = "0.4"
 flatbuffers = { version = "=2.0.0", optional = true }
 hex = "0.4"
 comfy-table = { version = "4.0", optional = true, default-features = false }
+prettytable-rs = { version = "0.8.0", optional = true }
+pyo3 = { version = "0.14", optional = true }
 lexical-core = "^0.7"
 multiversion = "0.6.1"
 bitflags = "1.2.1"
@@ -62,14 +64,15 @@ ipc = ["flatbuffers"]
 simd = ["packed_simd"]
 prettyprint = ["comfy-table"]
 # The test utils feature enables code used in benchmarks and tests but
-# not the core arrow code itself. Be aware that `rand` must be kept as 
-# an optional dependency for supporting compile to wasm32-unknown-unknown 
+# not the core arrow code itself. Be aware that `rand` must be kept as
+# an optional dependency for supporting compile to wasm32-unknown-unknown
 # target without assuming an environment containing JavaScript.
 test_utils = ["rand"]
 # this is only intended to be used in single-threaded programs: it verifies 
that
 # all allocated memory is being released (no memory leaks).
 # See README for details
 memory-check = []
+pyarrow = ["pyo3"]
 
 [dev-dependencies]
 rand = "0.8"
diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 5504c4a..8cbac31 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -15,9 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
+use std::convert::{From, TryFrom};
 use std::fmt;
 use std::sync::Arc;
-use std::{any::Any, convert::TryFrom};
 
 use super::*;
 use crate::array::equal_json::JsonEqual;
@@ -334,6 +335,12 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
     }
 }
 
+impl From<ArrayData> for ArrayRef {
+    fn from(data: ArrayData) -> Self {
+        make_array(data)
+    }
+}
+
 /// Creates a new empty array
 ///
 /// ```
diff --git a/arrow/src/array/equal/list.rs b/arrow/src/array/equal/list.rs
index 331cdc7..20e6400 100644
--- a/arrow/src/array/equal/list.rs
+++ b/arrow/src/array/equal/list.rs
@@ -138,7 +138,7 @@ pub(super) fn list_equal<T: OffsetSizeTrait>(
             child_rhs_nulls.as_ref(),
             lhs_offsets[lhs_start].to_usize().unwrap(),
             rhs_offsets[rhs_start].to_usize().unwrap(),
-            (lhs_offsets[len] - lhs_offsets[lhs_start])
+            (lhs_offsets[lhs_start + len] - lhs_offsets[lhs_start])
                 .to_usize()
                 .unwrap(),
         )
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 1932b0d..2c2590c 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -156,6 +156,8 @@ pub mod ffi;
 #[cfg(feature = "ipc")]
 pub mod ipc;
 pub mod json;
+#[cfg(feature = "pyarrow")]
+pub mod pyarrow;
 pub mod record_batch;
 pub mod temporal_conversions;
 pub mod tensor;
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
new file mode 100644
index 0000000..12d4c0d
--- /dev/null
+++ b/arrow/src/pyarrow.rs
@@ -0,0 +1,231 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This library demonstrates a minimal usage of Rust's C data interface to 
pass
+//! arrays from and to Python.
+
+use std::convert::{From, TryFrom};
+use std::sync::Arc;
+
+use pyo3::ffi::Py_uintptr_t;
+use pyo3::import_exception;
+use pyo3::prelude::*;
+use pyo3::types::PyList;
+
+use crate::array::{make_array, Array, ArrayData, ArrayRef};
+use crate::datatypes::{DataType, Field, Schema};
+use crate::error::ArrowError;
+use crate::ffi;
+use crate::ffi::FFI_ArrowSchema;
+use crate::record_batch::RecordBatch;
+
+import_exception!(pyarrow, ArrowException);
+pub type PyArrowException = ArrowException;
+
+impl From<ArrowError> for PyErr {
+    fn from(err: ArrowError) -> PyErr {
+        PyArrowException::new_err(err.to_string())
+    }
+}
+
+pub trait PyArrowConvert: Sized {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject>;
+}
+
+impl PyArrowConvert for DataType {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        let c_schema = FFI_ArrowSchema::empty();
+        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+        value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
+        let dtype = DataType::try_from(&c_schema)?;
+        Ok(dtype)
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        let c_schema = FFI_ArrowSchema::try_from(self)?;
+        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+        let module = py.import("pyarrow")?;
+        let class = module.getattr("DataType")?;
+        let dtype =
+            class.call_method1("_import_from_c", (c_schema_ptr as 
Py_uintptr_t,))?;
+        Ok(dtype.into())
+    }
+}
+
+impl PyArrowConvert for Field {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        let c_schema = FFI_ArrowSchema::empty();
+        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+        value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
+        let field = Field::try_from(&c_schema)?;
+        Ok(field)
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        let c_schema = FFI_ArrowSchema::try_from(self)?;
+        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+        let module = py.import("pyarrow")?;
+        let class = module.getattr("Field")?;
+        let dtype =
+            class.call_method1("_import_from_c", (c_schema_ptr as 
Py_uintptr_t,))?;
+        Ok(dtype.into())
+    }
+}
+
+impl PyArrowConvert for Schema {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        let c_schema = FFI_ArrowSchema::empty();
+        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+        value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
+        let schema = Schema::try_from(&c_schema)?;
+        Ok(schema)
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        let c_schema = FFI_ArrowSchema::try_from(self)?;
+        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
+        let module = py.import("pyarrow")?;
+        let class = module.getattr("Schema")?;
+        let schema =
+            class.call_method1("_import_from_c", (c_schema_ptr as 
Py_uintptr_t,))?;
+        Ok(schema.into())
+    }
+}
+
+impl PyArrowConvert for ArrayData {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // prepare a pointer to receive the Array struct
+        let (array_pointer, schema_pointer) =
+            ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
+
+        // make the conversion through PyArrow's private API
+        // this changes the pointer's memory and is thus unsafe.
+        // In particular, `_export_to_c` can go out of bounds
+        value.call_method1(
+            "_export_to_c",
+            (
+                array_pointer as Py_uintptr_t,
+                schema_pointer as Py_uintptr_t,
+            ),
+        )?;
+
+        let ffi_array =
+            unsafe { ffi::ArrowArray::try_from_raw(array_pointer, 
schema_pointer)? };
+        let data = ArrayData::try_from(ffi_array)?;
+
+        Ok(data)
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        let array = ffi::ArrowArray::try_from(self.clone())?;
+        let (array_pointer, schema_pointer) = ffi::ArrowArray::into_raw(array);
+
+        let module = py.import("pyarrow")?;
+        let class = module.getattr("Array")?;
+        let array = class.call_method1(
+            "_import_from_c",
+            (
+                array_pointer as Py_uintptr_t,
+                schema_pointer as Py_uintptr_t,
+            ),
+        )?;
+        Ok(array.to_object(py))
+    }
+}
+
+impl PyArrowConvert for ArrayRef {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        Ok(make_array(ArrayData::from_pyarrow(value)?))
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        self.data().to_pyarrow(py)
+    }
+}
+
+impl<T> PyArrowConvert for T
+where
+    T: Array + From<ArrayData>,
+{
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        Ok(ArrayData::from_pyarrow(value)?.into())
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        self.data().to_pyarrow(py)
+    }
+}
+
+impl PyArrowConvert for RecordBatch {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // TODO(kszucs): implement the FFI conversions in arrow-rs for 
RecordBatches
+        let schema = value.getattr("schema")?;
+        let schema = Arc::new(Schema::from_pyarrow(schema)?);
+
+        let arrays = value.getattr("columns")?.downcast::<PyList>()?;
+        let arrays = arrays
+            .iter()
+            .map(ArrayRef::from_pyarrow)
+            .collect::<PyResult<_>>()?;
+
+        let batch = RecordBatch::try_new(schema, arrays)?;
+        Ok(batch)
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        let mut py_arrays = vec![];
+        let mut py_names = vec![];
+
+        let schema = self.schema();
+        let fields = schema.fields().iter();
+        let columns = self.columns().iter();
+
+        for (array, field) in columns.zip(fields) {
+            py_arrays.push(array.to_pyarrow(py)?);
+            py_names.push(field.name());
+        }
+
+        let module = py.import("pyarrow")?;
+        let class = module.getattr("RecordBatch")?;
+        let record = class.call_method1("from_arrays", (py_arrays, py_names))?;
+
+        Ok(PyObject::from(record))
+    }
+}
+
+macro_rules! add_conversion {
+    ($typ:ty) => {
+        impl<'source> FromPyObject<'source> for $typ {
+            fn extract(value: &'source PyAny) -> PyResult<Self> {
+                Self::from_pyarrow(value)
+            }
+        }
+
+        impl<'a> IntoPy<PyObject> for $typ {
+            fn into_py(self, py: Python) -> PyObject {
+                self.to_pyarrow(py).unwrap()
+            }
+        }
+    };
+}
+
+add_conversion!(DataType);
+add_conversion!(Field);
+add_conversion!(Schema);
+add_conversion!(ArrayData);
+add_conversion!(RecordBatch);

Reply via email to