This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 88b72df  ARROW-2073: [Python] Create struct array from sequence of 
tuples
88b72df is described below

commit 88b72df27514a8ec47736bca1da010d9a2f84f3c
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 8 13:22:23 2018 +0100

    ARROW-2073: [Python] Create struct array from sequence of tuples
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1572 from pitrou/ARROW-2073-struct-from-tuples and squashes the 
following commits:
    
    0a41ccc [Antoine Pitrou] ARROW-2073: [Python] Create struct array from 
sequence of tuples
---
 cpp/src/arrow/python/builtin_convert.cc      | 47 ++++++++++++++++++++++------
 python/benchmarks/convert_builtins.py        | 21 ++++++++++---
 python/pyarrow/tests/test_convert_builtin.py | 39 +++++++++++++++++++++++
 3 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index 1e431c2..f0e5449 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -771,18 +771,21 @@ class StructConverter : public 
TypedConverterVisitor<StructBuilder, StructConver
   // Append a non-missing item
   Status AppendItem(PyObject* obj) {
     RETURN_NOT_OK(typed_builder_->Append());
-    if (!PyDict_Check(obj)) {
-      return Status::TypeError("dict value expected for struct type");
+    // Note heterogenous sequences are not allowed
+    if (ARROW_PREDICT_FALSE(source_kind_ == UNKNOWN)) {
+      if (PyDict_Check(obj)) {
+        source_kind_ = DICTS;
+      } else if (PyTuple_Check(obj)) {
+        source_kind_ = TUPLES;
+      }
     }
-    // NOTE we're ignoring any extraneous dict items
-    for (int i = 0; i < num_fields_; i++) {
-      PyObject* nameobj = PyList_GET_ITEM(field_name_list_.obj(), i);
-      PyObject* valueobj = PyDict_GetItem(obj, nameobj);  // borrowed
-      RETURN_IF_PYERROR();
-      RETURN_NOT_OK(value_converters_[i]->AppendSingle(valueobj ? valueobj : 
Py_None));
+    if (PyDict_Check(obj) && source_kind_ == DICTS) {
+      return AppendDictItem(obj);
+    } else if (PyTuple_Check(obj) && source_kind_ == TUPLES) {
+      return AppendTupleItem(obj);
+    } else {
+      return Status::TypeError("Expected sequence of dicts or tuples for 
struct type");
     }
-
-    return Status::OK();
   }
 
   // Append a missing item
@@ -797,9 +800,33 @@ class StructConverter : public 
TypedConverterVisitor<StructBuilder, StructConver
   }
 
  protected:
+  Status AppendDictItem(PyObject* obj) {
+    // NOTE we're ignoring any extraneous dict items
+    for (int i = 0; i < num_fields_; i++) {
+      PyObject* nameobj = PyList_GET_ITEM(field_name_list_.obj(), i);
+      PyObject* valueobj = PyDict_GetItem(obj, nameobj);  // borrowed
+      RETURN_IF_PYERROR();
+      RETURN_NOT_OK(value_converters_[i]->AppendSingle(valueobj ? valueobj : 
Py_None));
+    }
+    return Status::OK();
+  }
+
+  Status AppendTupleItem(PyObject* obj) {
+    if (PyTuple_GET_SIZE(obj) != num_fields_) {
+      return Status::Invalid("Tuple size must be equal to number of struct 
fields");
+    }
+    for (int i = 0; i < num_fields_; i++) {
+      PyObject* valueobj = PyTuple_GET_ITEM(obj, i);
+      RETURN_NOT_OK(value_converters_[i]->AppendSingle(valueobj));
+    }
+    return Status::OK();
+  }
+
   std::vector<std::unique_ptr<SeqConverter>> value_converters_;
   OwnedRef field_name_list_;
   int num_fields_;
+  // Whether we're converting from a sequence of dicts or tuples
+  enum { UNKNOWN, DICTS, TUPLES } source_kind_ = UNKNOWN;
 };
 
 class DecimalConverter
diff --git a/python/benchmarks/convert_builtins.py 
b/python/benchmarks/convert_builtins.py
index 92b2b85..a4dc9f2 100644
--- a/python/benchmarks/convert_builtins.py
+++ b/python/benchmarks/convert_builtins.py
@@ -144,11 +144,21 @@ class BuiltinsGenerator(object):
             partial(self.generate_int_list, none_prob=none_prob),
             n, min_size, max_size, none_prob)
 
+    def generate_tuple_list(self, n, none_prob=DEFAULT_NONE_PROB):
+        """
+        Generate a list of tuples with random values.
+        Each tuple has the form `(int value, float value, bool value)`
+        """
+        dicts = self.generate_dict_list(n, none_prob=none_prob)
+        tuples = [(d.get('u'), d.get('v'), d.get('w'))
+                  if d is not None else None
+                  for d in dicts]
+        assert len(tuples) == n
+        return tuples
 
     def generate_dict_list(self, n, none_prob=DEFAULT_NONE_PROB):
         """
-        Generate a list of dicts with a random size between *min_size* and
-        *max_size*.
+        Generate a list of dicts with random values.
         Each dict has the form `{'u': int value, 'v': float value, 'w': bool 
value}`
         """
         ints = self.generate_int_list(n, none_prob=none_prob)
@@ -179,12 +189,14 @@ class BuiltinsGenerator(object):
         """
         size = None
 
-        if type_name in ('bool', 'ascii', 'unicode', 'int64 list', 'struct'):
+        if type_name in ('bool', 'ascii', 'unicode', 'int64 list'):
             kind = type_name
         elif type_name.startswith(('int', 'uint')):
             kind = 'int'
         elif type_name.startswith('float'):
             kind = 'float'
+        elif type_name.startswith('struct'):
+            kind = 'struct'
         elif type_name == 'binary':
             kind = 'varying binary'
         elif type_name.startswith('binary'):
@@ -226,6 +238,7 @@ class BuiltinsGenerator(object):
             'int64 list': partial(self.generate_int_list_list,
                                   min_size=0, max_size=20),
             'struct': self.generate_dict_list,
+            'struct from tuples': self.generate_tuple_list,
         }
         data = factories[kind](n)
         return ty, data
@@ -239,7 +252,7 @@ class ConvertPyListToArray(object):
     types = ('int32', 'uint32', 'int64', 'uint64',
              'float32', 'float64', 'bool',
              'binary', 'binary10', 'ascii', 'unicode',
-             'int64 list', 'struct')
+             'int64 list', 'struct', 'struct from tuples')
 
     param_names = ['type']
     params = [types]
diff --git a/python/pyarrow/tests/test_convert_builtin.py 
b/python/pyarrow/tests/test_convert_builtin.py
index ce54f23..5cd4a52 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -531,6 +531,45 @@ def test_struct_from_dicts():
     assert arr.to_pylist() == expected
 
 
+def test_struct_from_tuples():
+    ty = pa.struct([pa.field('a', pa.int32()),
+                    pa.field('b', pa.string()),
+                    pa.field('c', pa.bool_())])
+
+    data = [(5, 'foo', True),
+            (6, 'bar', False)]
+    expected = [{'a': 5, 'b': 'foo', 'c': True},
+                {'a': 6, 'b': 'bar', 'c': False}]
+    arr = pa.array(data, type=ty)
+    assert arr.to_pylist() == expected
+
+    # With omitted values
+    data = [(5, 'foo', None),
+            None,
+            (6, None, False)]
+    expected = [{'a': 5, 'b': 'foo', 'c': None},
+                None,
+                {'a': 6, 'b': None, 'c': False}]
+    arr = pa.array(data, type=ty)
+    assert arr.to_pylist() == expected
+
+    # Invalid tuple size
+    for tup in [(5, 'foo'), (), ('5', 'foo', True, None)]:
+        with pytest.raises(ValueError, match="(?i)tuple size"):
+            pa.array([tup], type=ty)
+
+
+def test_struct_from_mixed_sequence():
+    # It is forbidden to mix dicts and tuples when initializing a struct array
+    ty = pa.struct([pa.field('a', pa.int32()),
+                    pa.field('b', pa.string()),
+                    pa.field('c', pa.bool_())])
+    data = [(5, 'foo', True),
+            {'a': 6, 'b': 'bar', 'c': False}]
+    with pytest.raises(TypeError):
+        pa.array(data, type=ty)
+
+
 def test_structarray_from_arrays_coerce():
     # ARROW-1706
     ints = [None, 2, 3]

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to