[arrow] branch master updated: ARROW-3374: [Python] Implicitly set from_pandas=True when passing pandas.Categorical to pyarrow.array. Preserve ordered categories

wesm Mon, 01 Oct 2018 03:11:04 -0700

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new b83db61  ARROW-3374: [Python] Implicitly set from_pandas=True when 
passing pandas.Categorical to pyarrow.array. Preserve ordered categories
b83db61 is described below

commit b83db6116dc323eea3f6b516e176d36d66960475
Author: Wes McKinney <[email protected]>
AuthorDate: Mon Oct 1 06:10:38 2018 -0400

    ARROW-3374: [Python] Implicitly set from_pandas=True when passing 
pandas.Categorical to pyarrow.array. Preserve ordered categories
    
    Author: Wes McKinney <[email protected]>
    
    Closes #2670 from wesm/ARROW-3374 and squashes the following commits:
    
    914c7dddc <Wes McKinney> Implicitly set from_pandas=True when passing 
pandas.Categorical to pyarrow.array. Ensure that ordered categories are 
preserved
---
 cpp/src/arrow/python/arrow_to_pandas.cc     |  5 +++
 python/pyarrow/array.pxi                    |  4 +--
 python/pyarrow/tests/test_convert_pandas.py | 47 +++++++++++++++++++----------
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc 
b/cpp/src/arrow/python/arrow_to_pandas.cc
index a7d98da..be6e9b1 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -1898,6 +1898,11 @@ class ArrowDeserializer {
     PyDict_SetItemString(result_, "dictionary", block->dictionary());
     RETURN_IF_PYERROR();
 
+    PyObject* py_ordered = type.ordered() ? Py_True : Py_False;
+    Py_INCREF(py_ordered);
+    PyDict_SetItemString(result_, "ordered", py_ordered);
+    RETURN_IF_PYERROR();
+
     return Status::OK();
   }
 
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 7de2340..320852a 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -160,7 +160,7 @@ def array(object obj, type=None, mask=None, size=None, bint 
from_pandas=False,
             return DictionaryArray.from_arrays(
                 values.codes, values.categories.values,
                 mask=mask, ordered=values.ordered,
-                from_pandas=from_pandas, safe=safe,
+                from_pandas=True, safe=safe,
                 memory_pool=memory_pool)
         else:
             import pyarrow.pandas_compat as pdcompat
@@ -811,7 +811,7 @@ cdef wrap_array_output(PyObject* output):
     if isinstance(obj, dict):
         return Categorical(obj['indices'],
                            categories=obj['dictionary'],
-                           fastpath=True)
+                           ordered=obj['ordered'], fastpath=True)
     else:
         return obj
 
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index db87d9a..9bae5ea 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1983,26 +1983,41 @@ class TestConvertMisc(object):
         v1 = ['foo', None, 'bar', 'qux', np.nan]
         v2 = [4, 5, 6, 7, 8]
         v3 = [b'foo', None, b'bar', b'qux', np.nan]
-        df = pd.DataFrame({'cat_strings': pd.Categorical(v1 * repeats),
-                           'cat_ints': pd.Categorical(v2 * repeats),
-                           'cat_binary': pd.Categorical(v3 * repeats),
-                           'cat_strings_ordered': pd.Categorical(
-                               v1 * repeats, categories=['bar', 'qux', 'foo'],
-                               ordered=True),
-                           'ints': v2 * repeats,
-                           'ints2': v2 * repeats,
-                           'strings': v1 * repeats,
-                           'strings2': v1 * repeats,
-                           'strings3': v3 * repeats})
+
+        arrays = {
+            'cat_strings': pd.Categorical(v1 * repeats),
+            'cat_strings_with_na': pd.Categorical(v1 * repeats,
+                                                  categories=['foo', 'bar']),
+            'cat_ints': pd.Categorical(v2 * repeats),
+            'cat_binary': pd.Categorical(v3 * repeats),
+            'cat_strings_ordered': pd.Categorical(
+                v1 * repeats, categories=['bar', 'qux', 'foo'],
+                ordered=True),
+            'ints': v2 * repeats,
+            'ints2': v2 * repeats,
+            'strings': v1 * repeats,
+            'strings2': v1 * repeats,
+            'strings3': v3 * repeats}
+        df = pd.DataFrame(arrays)
         _check_pandas_roundtrip(df)
 
+        for k in arrays:
+            _check_array_roundtrip(arrays[k])
+
+    def test_category_implicit_from_pandas(self):
+        # ARROW-3374
+        def _check(v):
+            arr = pa.array(v)
+            result = arr.to_pandas()
+            tm.assert_series_equal(pd.Series(result), pd.Series(v))
+
         arrays = [
-            pd.Categorical(v1 * repeats),
-            pd.Categorical(v2 * repeats),
-            pd.Categorical(v3 * repeats)
+            pd.Categorical(['a', 'b', 'c'], categories=['a', 'b']),
+            pd.Categorical(['a', 'b', 'c'], categories=['a', 'b'],
+                           ordered=True)
         ]
-        for values in arrays:
-            _check_array_roundtrip(values)
+        for arr in arrays:
+            _check(arr)
 
     def test_empty_category(self):
         # ARROW-2443

[arrow] branch master updated: ARROW-3374: [Python] Implicitly set from_pandas=True when passing pandas.Categorical to pyarrow.array. Preserve ordered categories

Reply via email to