This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new aba77a6 ARROW-3530: [Java/Python] Add conversion for pyarrow.Schema
from org.apache…pojo.Schema
aba77a6 is described below
commit aba77a6fdf475a058f99ab58bdf9b8bc7dbf3e73
Author: Korn, Uwe <[email protected]>
AuthorDate: Wed Oct 17 13:21:46 2018 -0400
ARROW-3530: [Java/Python] Add conversion for pyarrow.Schema from
org.apache…pojo.Schema
Author: Korn, Uwe <[email protected]>
Closes #2774 from xhochy/ARROW-3530 and squashes the following commits:
78c3df25a <Korn, Uwe> ARROW-3530: Add conversion for pyarrow.Schema from
org.apache…pojo.Schema
---
python/pyarrow/jvm.py | 23 +++++++++++++++++++++++
python/pyarrow/tests/test_jvm.py | 27 ++++++++++++++++++++++++++-
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/python/pyarrow/jvm.py b/python/pyarrow/jvm.py
index fe2efad..4a43493 100644
--- a/python/pyarrow/jvm.py
+++ b/python/pyarrow/jvm.py
@@ -230,6 +230,29 @@ def field(jvm_field):
return pa.field(name, typ, nullable, metadata)
+def schema(jvm_schema):
+ """
+ Construct a Schema from a org.apache.arrow.vector.types.pojo.Schema
+ instance.
+
+ Parameters
+ ----------
+ jvm_schema: org.apache.arrow.vector.types.pojo.Schema
+
+ Returns
+ -------
+ pyarrow.Schema
+ """
+ fields = jvm_schema.getFields()
+ fields = [field(f) for f in fields]
+ metadata = jvm_schema.getCustomMetadata()
+ if metadata.isEmpty():
+ meta = None
+ else:
+ meta = {k: metadata[k] for k in metadata.keySet()}
+ return pa.schema(fields, meta)
+
+
def array(jvm_array):
"""
Construct an (Python) Array from its JVM equivalent.
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index 8f47708..3ca874e 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -21,6 +21,7 @@ import os
import pyarrow as pa
import pyarrow.jvm as pa_jvm
import pytest
+import six
import sys
import xml.etree.ElementTree as ET
@@ -68,6 +69,20 @@ def _jvm_field(jvm_spec):
return om.readValue(jvm_spec, pojo_Field)
+def _jvm_schema(jvm_spec, metadata=None):
+ field = _jvm_field(jvm_spec)
+ schema_cls = jpype.JClass('org.apache.arrow.vector.types.pojo.Schema')
+ fields = jpype.JClass('java.util.ArrayList')()
+ fields.add(field)
+ if metadata:
+ dct = jpype.JClass('java.util.HashMap')()
+ for k, v in six.iteritems(metadata):
+ dct.put(k, v)
+ return schema_cls(fields, dct)
+ else:
+ return schema_cls(fields)
+
+
# In the following, we use the JSON serialization of the Field objects in Java.
# This ensures that we neither rely on the exact mechanics on how to construct
# them using Java code as well as enables us to define them as parameters
@@ -137,7 +152,17 @@ def test_jvm_types(root_allocator, typ, jvm_spec,
nullable):
}
jvm_field = _jvm_field(json.dumps(spec))
result = pa_jvm.field(jvm_field)
- assert result == pa.field('field_name', typ, nullable=nullable)
+ expected_field = pa.field('field_name', typ, nullable=nullable)
+ assert result == expected_field
+
+ jvm_schema = _jvm_schema(json.dumps(spec))
+ result = pa_jvm.schema(jvm_schema)
+ assert result == pa.schema([expected_field])
+
+ # Schema with custom metadata
+ jvm_schema = _jvm_schema(json.dumps(spec), {'meta': 'data'})
+ result = pa_jvm.schema(jvm_schema)
+ assert result == pa.schema([expected_field], {'meta': 'data'})
# These test parameters mostly use an integer range as an input as this is