This is an automated email from the ASF dual-hosted git repository.
areeve pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b3e2e08e56 GH-48442: [Python] Remove workaround that excluded struct
types from `chunked_arrays` (#48443)
b3e2e08e56 is described below
commit b3e2e08e56c004355bfeecf83b821ab7138bbf4e
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Dec 18 11:11:01 2025 +0900
GH-48442: [Python] Remove workaround that excluded struct types from
`chunked_arrays` (#48443)
### Rationale for this change
The `chunked_arrays` hypothesis strategy had a workaround that excluded
struct types with the assumption that field metadata is not preserved (added
from
https://github.com/apache/arrow/commit/d06c664a1966da682a2382e46fe148be96cca1aa).
Testing confirms that field metadata is now correctly preserved in chunked
arrays with struct types, so the workaround is no longer necessary, and it is
fixed by
https://github.com/apache/arrow/commit/dd0988b49cb6726cf915bb9f53d7320e3a97b00b
Now it explicitly calls `CChunkedArray::Make()` instead of manual
construction of `CChunkedArray`.
### What changes are included in this PR?
Remove the assumption that field metadata is not preserved.
### Are these changes tested?
Manually tested the creation of metadata (generated by ChatGPT)
```python
import sys
import pyarrow as pa
# Create a struct type with custom field metadata
struct_type = pa.struct([
pa.field('a', pa.int32(), metadata={'custom_key': 'custom_value_a',
'description': 'field a'}),
pa.field('b', pa.string(), metadata={'custom_key': 'custom_value_b',
'description': 'field b'})
])
print("=== Original struct type ===")
print(f"Type: {struct_type}")
print(f"Field 'a' metadata: {struct_type[0].metadata}")
print(f"Field 'b' metadata: {struct_type[1].metadata}")
print()
# Create arrays with this struct type
arr1 = pa.array([
{'a': 1, 'b': 'foo'},
{'a': 2, 'b': 'bar'}
], type=struct_type)
arr2 = pa.array([
{'a': 3, 'b': 'baz'},
{'a': 4, 'b': 'qux'}
], type=struct_type)
print("=== Individual arrays ===")
print(f"arr1.type: {arr1.type}")
print(f"arr1.type[0].metadata: {arr1.type[0].metadata}")
print(f"arr2.type: {arr2.type}")
print(f"arr2.type[0].metadata: {arr2.type[0].metadata}")
print()
# Create chunked array WITH explicit type parameter (preserves metadata)
chunked_with_type = pa.chunked_array([arr1, arr2], type=struct_type)
print("=== Chunked array (with explicit type) ===")
print(f"Type: {chunked_with_type.type}")
print(f"Field 'a' metadata: {chunked_with_type.type[0].metadata}")
print(f"Field 'b' metadata: {chunked_with_type.type[1].metadata}")
print()
# Verify metadata is preserved
if (chunked_with_type.type[0].metadata == struct_type[0].metadata and
chunked_with_type.type[1].metadata == struct_type[1].metadata):
print("✓ SUCCESS: Field metadata IS preserved!")
print(f" Field 'a': {dict(chunked_with_type.type[0].metadata)}")
print(f" Field 'b': {dict(chunked_with_type.type[1].metadata)}")
exit_code = 0
else:
print("✗ FAILED: Field metadata was lost")
exit_code = 1
print()
print("=== Test without explicit type (for comparison) ===")
# What happens without explicit type? (inferred from first chunk)
chunked_without_type = pa.chunked_array([arr1, arr2])
print(f"Type: {chunked_without_type.type}")
print(f"Field 'a' metadata: {chunked_without_type.type[0].metadata}")
print(f"Field 'b' metadata: {chunked_without_type.type[1].metadata}")
if chunked_without_type.type[0].metadata == struct_type[0].metadata:
print(" → Metadata preserved even without explicit type (from first
chunk)")
else:
print(" → Note: Even without explicit type, metadata is preserved from
first chunk")
```
### Are there any user-facing changes?
No, test-only.
* GitHub Issue: #48442
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Adam Reeve <[email protected]>
---
python/pyarrow/tests/strategies.py | 3 ---
1 file changed, 3 deletions(-)
diff --git a/python/pyarrow/tests/strategies.py
b/python/pyarrow/tests/strategies.py
index 218176dbc5..8319c9ce3e 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -386,9 +386,6 @@ def chunked_arrays(draw, type, min_chunks=0,
max_chunks=None, chunk_size=None):
if isinstance(type, st.SearchStrategy):
type = draw(type)
- # TODO(kszucs): remove it, field metadata is not kept
- h.assume(not pa.types.is_struct(type))
-
chunk = arrays(type, size=chunk_size)
chunks = st.lists(chunk, min_size=min_chunks, max_size=max_chunks)