(datafusion) branch main updated: feat: correct struct column names for `arrays_zip` return type (#20886)

github-bot Thu, 12 Mar 2026 08:52:37 -0700

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 57b275a8d3 feat: correct struct column names for `arrays_zip` return 
type (#20886)
57b275a8d3 is described below

commit 57b275a8d3cd3121be7c49923527054b10656a07
Author: Oleks V <[email protected]>
AuthorDate: Thu Mar 12 08:51:02 2026 -0700

    feat: correct struct column names for `arrays_zip` return type (#20886)
    
    ## Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax. For example
    `Closes #123` indicates that this PR will close issue #123.
    -->
    
    For `arrays_zip` the return type is slightly different comparing to
    DuckDB, namely the struct field names are different
    
    DuckDB expects a
    ```
    struct {
      1: ....,
      2: ....,
      3: ....,
      n: ......
    }
    ```
    DF got
    ```
    struct {
      c0: ....,
      c1: ....,
      c2: ....,
      cn: ......
    }
    ```
    
    
    Some future work - for Spark the field names are 0 based numeration
    ```
    struct {
      0: ....,
      1: ....,
      2: ....,
      n: ......
    }
    ```
    
    - Closes #.
    
    ## Rationale for this change
    
    
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    ## What changes are included in this PR?
    
    Rename structure field names for `arrays_zip` return type
    
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    ## Are these changes tested?
    
    <!--
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    2. Serve as another way to document the expected behavior of the code
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    -->
    
    ## Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    -->
    
    <!--
    If there are any breaking changes to public APIs, please add the `api
    change` label.
    -->
---
 datafusion/functions-nested/src/arrays_zip.rs |  4 +-
 datafusion/sqllogictest/test_files/array.slt  | 62 +++++++++++++--------------
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/datafusion/functions-nested/src/arrays_zip.rs 
b/datafusion/functions-nested/src/arrays_zip.rs
index 2ac30d0704..5035439286 100644
--- a/datafusion/functions-nested/src/arrays_zip.rs
+++ b/datafusion/functions-nested/src/arrays_zip.rs
@@ -125,7 +125,7 @@ impl ScalarUDFImpl for ArraysZip {
                     return exec_err!("arrays_zip expects array arguments, got 
{dt}");
                 }
             };
-            fields.push(Field::new(format!("c{i}"), element_type, true));
+            fields.push(Field::new(format!("{}", i + 1), element_type, true));
         }
 
         Ok(List(Arc::new(Field::new_list_field(
@@ -227,7 +227,7 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     let struct_fields: Fields = element_types
         .iter()
         .enumerate()
-        .map(|(i, dt)| Field::new(format!("c{i}"), dt.clone(), true))
+        .map(|(i, dt)| Field::new(format!("{}", i + 1), dt.clone(), true))
         .collect::<Vec<_>>()
         .into();
 
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index 112351c5ef..83e9c9cc9c 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -7112,61 +7112,61 @@ from array_distinct_table_2D_fixed;
 query ?
 select arrays_zip([1, 2, 3], [2, 3, 4]);
 ----
-[{c0: 1, c1: 2}, {c0: 2, c1: 3}, {c0: 3, c1: 4}]
+[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}]
 
 # Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4))
 query ?
 select arrays_zip([1, 2], [2, 3], [3, 4]);
 ----
-[{c0: 1, c1: 2, c2: 3}, {c0: 2, c1: 3, c2: 4}]
+[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}]
 
 # basic: two integer arrays of equal length
 query ?
 select arrays_zip([1, 2, 3], [10, 20, 30]);
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
 
 # basic: two arrays with different element types (int + string)
 query ?
 select arrays_zip([1, 2, 3], ['a', 'b', 'c']);
 ----
-[{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}]
+[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}]
 
 # three arrays of equal length
 query ?
 select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]);
 ----
-[{c0: 1, c1: 10, c2: 100}, {c0: 2, c1: 20, c2: 200}, {c0: 3, c1: 30, c2: 300}]
+[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}]
 
 # four arrays of equal length
 query ?
 select arrays_zip([1], [2], [3], [4]);
 ----
-[{c0: 1, c1: 2, c2: 3, c3: 4}]
+[{1: 1, 2: 2, 3: 3, 4: 4}]
 
 # mixed element types: float + boolean
 query ?
 select arrays_zip([1.5, 2.5], [true, false]);
 ----
-[{c0: 1.5, c1: true}, {c0: 2.5, c1: false}]
+[{1: 1.5, 2: true}, {1: 2.5, 2: false}]
 
 # different length arrays: shorter array padded with NULLs
 query ?
 select arrays_zip([1, 2], [3, 4, 5]);
 ----
-[{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: NULL, c1: 5}]
+[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}]
 
 # different length arrays: first longer
 query ?
 select arrays_zip([1, 2, 3], [10]);
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: NULL}, {c0: 3, c1: NULL}]
+[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}]
 
 # different length: one single element, other three elements
 query ?
 select arrays_zip([1], ['a', 'b', 'c']);
 ----
-[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: NULL, c1: c}]
+[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}]
 
 # empty arrays
 query ?
@@ -7178,19 +7178,19 @@ select arrays_zip([], []);
 query ?
 select arrays_zip([], [1, 2, 3]);
 ----
-[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}]
+[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}]
 
 # NULL elements inside arrays
 query ?
 select arrays_zip([1, NULL, 3], ['a', 'b', 'c']);
 ----
-[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: 3, c1: c}]
+[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}]
 
 # all NULL elements
 query ?
 select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]);
 ----
-[{c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}]
+[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}]
 
 # both args are NULL (entire list null)
 query ?
@@ -7202,35 +7202,35 @@ NULL
 query ?
 select arrays_zip(NULL::int[], [1, 2, 3]);
 ----
-[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}]
+[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}]
 
 # real array + NULL list
 query ?
 select arrays_zip([1, 2], NULL::text[]);
 ----
-[{c0: 1, c1: NULL}, {c0: 2, c1: NULL}]
+[{1: 1, 2: NULL}, {1: 2, 2: NULL}]
 
 # column-level test with multiple rows
 query ?
 select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]), 
([6], [60, 70])) as t(a, b);
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}]
-[{c0: 3, c1: 30}, {c0: 4, c1: NULL}, {c0: 5, c1: NULL}]
-[{c0: 6, c1: 60}, {c0: NULL, c1: 70}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}]
+[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}]
+[{1: 6, 2: 60}, {1: NULL, 2: 70}]
 
 # column-level test with NULL rows
 query ?
 select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), 
([5, 6], null)) as t(a, b);
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}]
-[{c0: NULL, c1: 30}, {c0: NULL, c1: 40}]
-[{c0: 5, c1: NULL}, {c0: 6, c1: NULL}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}]
+[{1: NULL, 2: 30}, {1: NULL, 2: 40}]
+[{1: 5, 2: NULL}, {1: 6, 2: NULL}]
 
 # alias: list_zip
 query ?
 select list_zip([1, 2], [3, 4]);
 ----
-[{c0: 1, c1: 3}, {c0: 2, c1: 4}]
+[{1: 1, 2: 3}, {1: 2, 2: 4}]
 
 # column test: total values equal (3 each) but per-row lengths differ
 # a: [1]     b: [10, 20]   → row 0: a has 1, b has 2
@@ -7239,14 +7239,14 @@ select list_zip([1, 2], [3, 4]);
 query ?
 select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a, 
b);
 ----
-[{c0: 1, c1: 10}, {c0: NULL, c1: 20}]
-[{c0: 2, c1: 30}, {c0: 3, c1: NULL}]
+[{1: 1, 2: 10}, {1: NULL, 2: 20}]
+[{1: 2, 2: 30}, {1: 3, 2: NULL}]
 
 # single element arrays
 query ?
 select arrays_zip([42], ['hello']);
 ----
-[{c0: 42, c1: hello}]
+[{1: 42, 2: hello}]
 
 # error: too few arguments
 statement error
@@ -7259,7 +7259,7 @@ select arrays_zip(
     arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
 );
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
 
 # arrays_zip with LargeList different lengths (padding)
 query ?
@@ -7268,7 +7268,7 @@ select arrays_zip(
     arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
 );
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: NULL, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}]
 
 # arrays_zip with FixedSizeList inputs
 query ?
@@ -7277,7 +7277,7 @@ select arrays_zip(
     arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)')
 );
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
 
 # arrays_zip mixing List and LargeList
 query ?
@@ -7286,7 +7286,7 @@ select arrays_zip(
     arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
 );
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
 
 # arrays_zip mixing List and FixedSizeList with different lengths (padding)
 query ?
@@ -7295,7 +7295,7 @@ select arrays_zip(
     arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)')
 );
 ----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: NULL}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}]
 
 # arrays_zip with LargeList and FixedSizeList mixed types
 query ?
@@ -7304,7 +7304,7 @@ select arrays_zip(
     arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)')
 );
 ----
-[{c0: 1, c1: a}, {c0: 2, c1: b}]
+[{1: 1, 2: a}, {1: 2, 2: b}]
 
 query ???
 select array_intersect(column1, column2),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: feat: correct struct column names for `arrays_zip` return type (#20886)

Reply via email to