This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 210cf06139 ARROW-17256 [Python] Can't call combine_chunks on empty
ChunkedArray (#13757)
210cf06139 is described below
commit 210cf061390e96c032008a41dd01c683b4b8e6e3
Author: 0x26res <[email protected]>
AuthorDate: Mon Aug 8 20:22:09 2022 +0100
ARROW-17256 [Python] Can't call combine_chunks on empty ChunkedArray
(#13757)
https://issues.apache.org/jira/browse/ARROW-17256
Not 100% sure it should be fixed at this level. Should `combine_chunks`
receive a `DataType` instead?
Lead-authored-by: aandres <[email protected]>
Co-authored-by: 0x26res <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
python/pyarrow/table.pxi | 5 ++++-
python/pyarrow/tests/test_table.py | 10 ++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 17f88aca4e..5d84716fc9 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -697,7 +697,10 @@ cdef class ChunkedArray(_PandasConvertible):
100
]
"""
- return concat_arrays(self.chunks)
+ if self.num_chunks == 0:
+ return array([], type=self.type)
+ else:
+ return concat_arrays(self.chunks)
def unique(self):
"""
diff --git a/python/pyarrow/tests/test_table.py
b/python/pyarrow/tests/test_table.py
index 6474974b4f..dbd90ac907 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -118,6 +118,16 @@ def test_combine_chunks():
assert res.equals(expected)
+def test_chunked_array_can_combine_chunks_with_no_chunks():
+ # https://issues.apache.org/jira/browse/ARROW-17256
+ assert pa.chunked_array([], type=pa.bool_()).combine_chunks() == pa.array(
+ [], type=pa.bool_()
+ )
+ assert pa.chunked_array(
+ [pa.array([], type=pa.bool_())], type=pa.bool_()
+ ).combine_chunks() == pa.array([], type=pa.bool_())
+
+
def test_chunked_array_to_numpy():
data = pa.chunked_array([
[1, 2, 3],