kou commented on code in PR #38520:
URL: https://github.com/apache/arrow/pull/38520#discussion_r1391133051
##########
python/pyarrow/table.pxi:
##########
@@ -3986,6 +3986,59 @@ cdef class Table(_Tabular):
result.validate()
return result
+ @staticmethod
+ def from_struct_array(struct_array):
+ """
+ Construct a Table from a StructArray.
+
+ Each field in the StructArray will become a column in the resulting
+ ``Table``.
+
+ Parameters
+ ----------
+ struct_array : StructArray or ChunkedArray
+ Array to construct the table from.
+
+ Returns
+ -------
+ pyarrow.Table
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> struct = pa.array([{'n_legs': 2, 'animals': 'Parrot'},
+ ... {'year': 2022, 'n_legs': 4}])
+ >>> pa.Table.from_struct_array(struct).to_pandas()
+ animals n_legs year
+ 0 Parrot 2 NaN
+ 1 None 4 2022.0
+ """
+ if isinstance(struct_array, Array):
+ struct_array = chunked_array([struct_array])
+ return Table.from_batches([
+ RecordBatch.from_struct_array(chunk)
+ for chunk in struct_array.chunks
+ ])
Review Comment:
Can we avoid creating a needless chunked array?
```suggestion
if isinstance(struct_array, Array):
return
Table.from_batches([RecordBatch.from_struct_array(struct_array)])
else:
return Table.from_batches([
RecordBatch.from_struct_array(chunk)
for chunk in struct_array.chunks
])
```
##########
python/pyarrow/tests/test_table.py:
##########
@@ -878,6 +878,55 @@ def test_recordbatch_from_struct_array():
))
+def test_recordbatch_to_struct_array():
+ batch = pa.RecordBatch.from_arrays(
+ [
+ pa.array([1, None], type=pa.int32()),
+ pa.array([None, 1.0], type=pa.float32()),
+ ], ["ints", "floats"]
+ )
+ result = batch.to_struct_array()
+ assert result.equals(pa.array(
+ [{"ints": 1}, {"floats": 1.0}],
+ type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+ ))
+
+
+def test_table_from_struct_array_invalid():
+ with pytest.raises(TypeError):
+ pa.Table.from_struct_array(pa.array(range(5)))
+
+
+def test_table_from_struct_array():
+ struct_array = pa.array(
+ [{"ints": 1}, {"floats": 1.0}],
+ type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+ )
+ result = pa.Table.from_struct_array(struct_array)
+ assert result.equals(pa.Table.from_arrays(
+ [
+ pa.array([1, None], type=pa.int32()),
+ pa.array([None, 1.0], type=pa.float32()),
+ ], ["ints", "floats"]
+ ))
+
+
+def test_table_to_struct_array():
+ table = pa.Table.from_arrays(
+ [
+ pa.array([1, None], type=pa.int32()),
+ pa.array([None, 1.0], type=pa.float32()),
+ ], ["ints", "floats"]
+ )
+ result = table.to_struct_array()
+ assert result.equals(pa.chunked_array(
+ pa.array(
+ [{"ints": 1}, {"floats": 1.0}],
+ type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+ ),
+ ))
Review Comment:
Can we also add a test with `max_chunksize`?
##########
python/pyarrow/tests/test_table.py:
##########
@@ -878,6 +878,55 @@ def test_recordbatch_from_struct_array():
))
+def test_recordbatch_to_struct_array():
+ batch = pa.RecordBatch.from_arrays(
+ [
+ pa.array([1, None], type=pa.int32()),
+ pa.array([None, 1.0], type=pa.float32()),
+ ], ["ints", "floats"]
+ )
+ result = batch.to_struct_array()
+ assert result.equals(pa.array(
+ [{"ints": 1}, {"floats": 1.0}],
+ type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+ ))
+
+
+def test_table_from_struct_array_invalid():
+ with pytest.raises(TypeError):
+ pa.Table.from_struct_array(pa.array(range(5)))
+
+
+def test_table_from_struct_array():
+ struct_array = pa.array(
+ [{"ints": 1}, {"floats": 1.0}],
+ type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+ )
+ result = pa.Table.from_struct_array(struct_array)
+ assert result.equals(pa.Table.from_arrays(
+ [
+ pa.array([1, None], type=pa.int32()),
+ pa.array([None, 1.0], type=pa.float32()),
+ ], ["ints", "floats"]
+ ))
Review Comment:
Can we add `test_table_from_struct_array_chunked_array()` like this?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]