This is an automated email from the ASF dual-hosted git repository.

areeve pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b3e2e08e56 GH-48442: [Python] Remove workaround that excluded struct 
types from `chunked_arrays` (#48443)
b3e2e08e56 is described below

commit b3e2e08e56c004355bfeecf83b821ab7138bbf4e
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Dec 18 11:11:01 2025 +0900

    GH-48442: [Python] Remove workaround that excluded struct types from 
`chunked_arrays` (#48443)
    
    ### Rationale for this change
    
    The `chunked_arrays` hypothesis strategy had a workaround that excluded 
struct types with the assumption that field metadata is not preserved (added 
from 
https://github.com/apache/arrow/commit/d06c664a1966da682a2382e46fe148be96cca1aa).
    
    Testing confirms that field metadata is now correctly preserved in chunked 
arrays with struct types, so the workaround is no longer necessary, and it is 
fixed by 
https://github.com/apache/arrow/commit/dd0988b49cb6726cf915bb9f53d7320e3a97b00b
    
    Now it explicitly calls `CChunkedArray::Make()` instead of manual 
construction of `CChunkedArray`.
    
    ### What changes are included in this PR?
    
    Remove the assumption that field metadata is not preserved.
    
    ### Are these changes tested?
    
    Manually tested the creation of metadata (generated by ChatGPT)
    
    ```python
    import sys
    import pyarrow as pa
    
    # Create a struct type with custom field metadata
    struct_type = pa.struct([
        pa.field('a', pa.int32(), metadata={'custom_key': 'custom_value_a', 
'description': 'field a'}),
        pa.field('b', pa.string(), metadata={'custom_key': 'custom_value_b', 
'description': 'field b'})
    ])
    
    print("=== Original struct type ===")
    print(f"Type: {struct_type}")
    print(f"Field 'a' metadata: {struct_type[0].metadata}")
    print(f"Field 'b' metadata: {struct_type[1].metadata}")
    print()
    
    # Create arrays with this struct type
    arr1 = pa.array([
        {'a': 1, 'b': 'foo'},
        {'a': 2, 'b': 'bar'}
    ], type=struct_type)
    
    arr2 = pa.array([
        {'a': 3, 'b': 'baz'},
        {'a': 4, 'b': 'qux'}
    ], type=struct_type)
    
    print("=== Individual arrays ===")
    print(f"arr1.type: {arr1.type}")
    print(f"arr1.type[0].metadata: {arr1.type[0].metadata}")
    print(f"arr2.type: {arr2.type}")
    print(f"arr2.type[0].metadata: {arr2.type[0].metadata}")
    print()
    
    # Create chunked array WITH explicit type parameter (preserves metadata)
    chunked_with_type = pa.chunked_array([arr1, arr2], type=struct_type)
    
    print("=== Chunked array (with explicit type) ===")
    print(f"Type: {chunked_with_type.type}")
    print(f"Field 'a' metadata: {chunked_with_type.type[0].metadata}")
    print(f"Field 'b' metadata: {chunked_with_type.type[1].metadata}")
    print()
    
    # Verify metadata is preserved
    if (chunked_with_type.type[0].metadata == struct_type[0].metadata and
        chunked_with_type.type[1].metadata == struct_type[1].metadata):
        print("✓ SUCCESS: Field metadata IS preserved!")
        print(f"  Field 'a': {dict(chunked_with_type.type[0].metadata)}")
        print(f"  Field 'b': {dict(chunked_with_type.type[1].metadata)}")
        exit_code = 0
    else:
        print("✗ FAILED: Field metadata was lost")
        exit_code = 1
    
    print()
    print("=== Test without explicit type (for comparison) ===")
    # What happens without explicit type? (inferred from first chunk)
    chunked_without_type = pa.chunked_array([arr1, arr2])
    print(f"Type: {chunked_without_type.type}")
    print(f"Field 'a' metadata: {chunked_without_type.type[0].metadata}")
    print(f"Field 'b' metadata: {chunked_without_type.type[1].metadata}")
    
    if chunked_without_type.type[0].metadata == struct_type[0].metadata:
        print("  → Metadata preserved even without explicit type (from first 
chunk)")
    else:
        print("  → Note: Even without explicit type, metadata is preserved from 
first chunk")
    ```
    
    ### Are there any user-facing changes?
    
    No, test-only.
    * GitHub Issue: #48442
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Adam Reeve <[email protected]>
---
 python/pyarrow/tests/strategies.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/pyarrow/tests/strategies.py 
b/python/pyarrow/tests/strategies.py
index 218176dbc5..8319c9ce3e 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -386,9 +386,6 @@ def chunked_arrays(draw, type, min_chunks=0, 
max_chunks=None, chunk_size=None):
     if isinstance(type, st.SearchStrategy):
         type = draw(type)
 
-    # TODO(kszucs): remove it, field metadata is not kept
-    h.assume(not pa.types.is_struct(type))
-
     chunk = arrays(type, size=chunk_size)
     chunks = st.lists(chunk, min_size=min_chunks, max_size=max_chunks)
 

Reply via email to