This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new dd3d3cd1be GH-40273: [Python] Support construction of Run-End Encoded
arrays in pa.array(..) (#40341)
dd3d3cd1be is described below
commit dd3d3cd1be27da7c872bfced553f25b8a0240021
Author: Alenka Frim <[email protected]>
AuthorDate: Wed Mar 20 08:44:08 2024 +0100
GH-40273: [Python] Support construction of Run-End Encoded arrays in
pa.array(..) (#40341)
### Rationale for this change
We want to enable the construction of a Run-End Encoded arrays with
`pyarrow.array `constructor
### What changes are included in this PR?
Added a check for Run-End Encoded Type in the `pyarrow.array` constructor
code.
### Are these changes tested?
Yes, added test_run_end_encoded_from_array_with_type.
### Are there any user-facing changes?
No.
* GitHub Issue: #40273
Lead-authored-by: AlenkaF <[email protected]>
Co-authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/array.pxi | 18 +++++++++++++++---
python/pyarrow/tests/test_array.py | 39 ++++++++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+), 3 deletions(-)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index def4c5e9ba..59d2e91ef6 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -336,11 +336,23 @@ def array(object obj, type=None, mask=None, size=None,
from_pandas=None,
if pandas_api.have_pandas:
values, type = pandas_api.compat.get_datetimetz_type(
values, obj.dtype, type)
- result = _ndarray_to_array(values, mask, type, c_from_pandas, safe,
- pool)
+ if type and type.id == _Type_RUN_END_ENCODED:
+ arr = _ndarray_to_array(
+ values, mask, type.value_type, c_from_pandas, safe, pool)
+ result = _pc().run_end_encode(arr,
run_end_type=type.run_end_type,
+ memory_pool=memory_pool)
+ else:
+ result = _ndarray_to_array(values, mask, type, c_from_pandas,
safe,
+ pool)
else:
+ if type and type.id == _Type_RUN_END_ENCODED:
+ arr = _sequence_to_array(
+ obj, mask, size, type.value_type, pool, from_pandas)
+ result = _pc().run_end_encode(arr, run_end_type=type.run_end_type,
+ memory_pool=memory_pool)
# ConvertPySequence does strict conversion if type is explicitly passed
- result = _sequence_to_array(obj, mask, size, type, pool, c_from_pandas)
+ else:
+ result = _sequence_to_array(obj, mask, size, type, pool,
c_from_pandas)
if extension_type is not None:
result = ExtensionArray.from_storage(extension_type, result)
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index a8cd20720e..999c1af453 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -3580,6 +3580,45 @@ def test_run_end_encoded_from_buffers():
1, offset, children)
+def test_run_end_encoded_from_array_with_type():
+ run_ends = [1, 3, 6]
+ values = [1, 2, 3]
+ ree_type = pa.run_end_encoded(pa.int32(), pa.int64())
+ expected = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+ ree_type)
+
+ arr = [1, 2, 2, 3, 3, 3]
+ result = pa.array(arr, type=ree_type)
+ assert result.equals(expected)
+ result = pa.array(np.array(arr), type=ree_type)
+ assert result.equals(expected)
+
+ ree_type_2 = pa.run_end_encoded(pa.int16(), pa.float32())
+ result = pa.array(arr, type=ree_type_2)
+ assert not result.equals(expected)
+ expected_2 = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+ ree_type_2)
+ assert result.equals(expected_2)
+
+ run_ends = [1, 3, 5, 6]
+ values = [1, 2, 3, None]
+ expected = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+ ree_type)
+
+ arr = [1, 2, 2, 3, 3, None]
+ result = pa.array(arr, type=ree_type)
+ assert result.equals(expected)
+
+ run_ends = [1, 3, 4, 5, 6]
+ values = [1, 2, None, 3, None]
+ expected = pa.RunEndEncodedArray.from_arrays(run_ends, values,
+ ree_type)
+
+ mask = pa.array([False, False, False, True, False, True])
+ result = pa.array(arr, type=ree_type, mask=mask)
+ assert result.equals(expected)
+
+
@pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
[(pa.ListViewArray, pa.list_view),
(pa.LargeListViewArray, pa.large_list_view)])