This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new befea02c2 To pyarrow with schema (#3188)
befea02c2 is described below
commit befea02c2f277a95d1f80f00aa0e9591942bd723
Author: Jie Han <[email protected]>
AuthorDate: Sat Nov 26 21:14:15 2022 +0800
To pyarrow with schema (#3188)
* to pyarrow with schema
* only use schema
* add test
* Run python tests in CI
Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
.github/workflows/integration.yml | 10 +++++-----
arrow/Cargo.toml | 4 ++++
arrow/src/pyarrow.rs | 9 ++++-----
arrow/tests/pyarrow.rs | 42 +++++++++++++++++++++++++++++++++++++++
4 files changed, 55 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/integration.yml
b/.github/workflows/integration.yml
index 3ece06b29..656e56a65 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -149,13 +149,13 @@ jobs:
virtualenv venv
source venv/bin/activate
pip install maturin toml pytest pytz pyarrow>=5.0
+ - name: Run Rust tests
+ run: |
+ source venv/bin/activate
+ cargo test -p arrow --test pyarrow --features pyarrow
- name: Run tests
- env:
- CARGO_HOME: "/home/runner/.cargo"
- CARGO_TARGET_DIR: "/home/runner/target"
run: |
source venv/bin/activate
- pushd arrow-pyarrow-integration-testing
+ cd arrow-pyarrow-integration-testing
maturin develop
pytest -v .
- popd
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 8172615f2..a97ec1ac1 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -269,3 +269,7 @@ required-features = ["test_utils", "ipc"]
[[test]]
name = "csv"
required-features = ["csv", "chrono-tz"]
+
+[[test]]
+name = "pyarrow"
+required-features = ["pyarrow"]
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 7c365a434..5ddc3105a 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -184,20 +184,19 @@ impl PyArrowConvert for RecordBatch {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let mut py_arrays = vec![];
- let mut py_names = vec![];
let schema = self.schema();
- let fields = schema.fields().iter();
let columns = self.columns().iter();
- for (array, field) in columns.zip(fields) {
+ for array in columns {
py_arrays.push(array.data().to_pyarrow(py)?);
- py_names.push(field.name());
}
+ let py_schema = schema.to_pyarrow(py)?;
+
let module = py.import("pyarrow")?;
let class = module.getattr("RecordBatch")?;
- let record = class.call_method1("from_arrays", (py_arrays, py_names))?;
+ let record = class.call_method1("from_arrays", (py_arrays,
py_schema))?;
Ok(PyObject::from(record))
}
diff --git a/arrow/tests/pyarrow.rs b/arrow/tests/pyarrow.rs
new file mode 100644
index 000000000..4b1226c73
--- /dev/null
+++ b/arrow/tests/pyarrow.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, Int32Array, StringArray};
+use arrow::pyarrow::PyArrowConvert;
+use arrow::record_batch::RecordBatch;
+use pyo3::Python;
+use std::sync::Arc;
+
+#[test]
+fn test_to_pyarrow() {
+ pyo3::prepare_freethreaded_python();
+
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
+ let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"]));
+ let input = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap();
+ println!("input: {:?}", input);
+
+ let res = Python::with_gil(|py| {
+ let py_input = input.to_pyarrow(py)?;
+ let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
+ let py_records = records.to_pyarrow(py)?;
+ RecordBatch::from_pyarrow(py_records.as_ref(py))
+ })
+ .unwrap();
+
+ assert_eq!(input, res);
+}