This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new dd3d3cd1be GH-40273: [Python] Support construction of Run-End Encoded 
arrays in pa.array(..) (#40341)
dd3d3cd1be is described below

commit dd3d3cd1be27da7c872bfced553f25b8a0240021
Author: Alenka Frim <[email protected]>
AuthorDate: Wed Mar 20 08:44:08 2024 +0100

    GH-40273: [Python] Support construction of Run-End Encoded arrays in 
pa.array(..) (#40341)
    
    ### Rationale for this change
    
    We want to enable the construction of a Run-End Encoded arrays with 
`pyarrow.array `constructor
    
    ### What changes are included in this PR?
    
    Added a check for Run-End Encoded Type in the `pyarrow.array` constructor 
code.
    
    ### Are these changes tested?
    
    Yes, added test_run_end_encoded_from_array_with_type.
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #40273
    
    Lead-authored-by: AlenkaF <[email protected]>
    Co-authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 python/pyarrow/array.pxi           | 18 +++++++++++++++---
 python/pyarrow/tests/test_array.py | 39 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index def4c5e9ba..59d2e91ef6 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -336,11 +336,23 @@ def array(object obj, type=None, mask=None, size=None, 
from_pandas=None,
             if pandas_api.have_pandas:
                 values, type = pandas_api.compat.get_datetimetz_type(
                     values, obj.dtype, type)
-            result = _ndarray_to_array(values, mask, type, c_from_pandas, safe,
-                                       pool)
+            if type and type.id == _Type_RUN_END_ENCODED:
+                arr = _ndarray_to_array(
+                    values, mask, type.value_type, c_from_pandas, safe, pool)
+                result = _pc().run_end_encode(arr, 
run_end_type=type.run_end_type,
+                                              memory_pool=memory_pool)
+            else:
+                result = _ndarray_to_array(values, mask, type, c_from_pandas, 
safe,
+                                           pool)
     else:
+        if type and type.id == _Type_RUN_END_ENCODED:
+            arr = _sequence_to_array(
+                obj, mask, size, type.value_type, pool, from_pandas)
+            result = _pc().run_end_encode(arr, run_end_type=type.run_end_type,
+                                          memory_pool=memory_pool)
         # ConvertPySequence does strict conversion if type is explicitly passed
-        result = _sequence_to_array(obj, mask, size, type, pool, c_from_pandas)
+        else:
+            result = _sequence_to_array(obj, mask, size, type, pool, 
c_from_pandas)
 
     if extension_type is not None:
         result = ExtensionArray.from_storage(extension_type, result)
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index a8cd20720e..999c1af453 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -3580,6 +3580,45 @@ def test_run_end_encoded_from_buffers():
                                            1, offset, children)
 
 
+def test_run_end_encoded_from_array_with_type():
+    run_ends = [1, 3, 6]
+    values = [1, 2, 3]
+    ree_type = pa.run_end_encoded(pa.int32(), pa.int64())
+    expected = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+                                                 ree_type)
+
+    arr = [1, 2, 2, 3, 3, 3]
+    result = pa.array(arr, type=ree_type)
+    assert result.equals(expected)
+    result = pa.array(np.array(arr), type=ree_type)
+    assert result.equals(expected)
+
+    ree_type_2 = pa.run_end_encoded(pa.int16(), pa.float32())
+    result = pa.array(arr, type=ree_type_2)
+    assert not result.equals(expected)
+    expected_2 = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+                                                   ree_type_2)
+    assert result.equals(expected_2)
+
+    run_ends = [1, 3, 5, 6]
+    values = [1, 2, 3, None]
+    expected = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+                                                 ree_type)
+
+    arr = [1, 2, 2, 3, 3, None]
+    result = pa.array(arr, type=ree_type)
+    assert result.equals(expected)
+
+    run_ends = [1, 3, 4, 5, 6]
+    values = [1, 2, None, 3, None]
+    expected = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+                                                 ree_type)
+
+    mask = pa.array([False, False, False, True, False, True])
+    result = pa.array(arr, type=ree_type, mask=mask)
+    assert result.equals(expected)
+
+
 @pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
                          [(pa.ListViewArray, pa.list_view),
                           (pa.LargeListViewArray, pa.large_list_view)])

Reply via email to