This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 57b275a8d3 feat: correct struct column names for `arrays_zip` return
type (#20886)
57b275a8d3 is described below
commit 57b275a8d3cd3121be7c49923527054b10656a07
Author: Oleks V <[email protected]>
AuthorDate: Thu Mar 12 08:51:02 2026 -0700
feat: correct struct column names for `arrays_zip` return type (#20886)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
For `arrays_zip` the return type is slightly different comparing to
DuckDB, namely the struct field names are different
DuckDB expects a
```
struct {
1: ....,
2: ....,
3: ....,
n: ......
}
```
DF got
```
struct {
c0: ....,
c1: ....,
c2: ....,
cn: ......
}
```
Some future work - for Spark the field names are 0 based numeration
```
struct {
0: ....,
1: ....,
2: ....,
n: ......
}
```
- Closes #.
## Rationale for this change
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
## What changes are included in this PR?
Rename structure field names for `arrays_zip` return type
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
## Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
## Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
datafusion/functions-nested/src/arrays_zip.rs | 4 +-
datafusion/sqllogictest/test_files/array.slt | 62 +++++++++++++--------------
2 files changed, 33 insertions(+), 33 deletions(-)
diff --git a/datafusion/functions-nested/src/arrays_zip.rs
b/datafusion/functions-nested/src/arrays_zip.rs
index 2ac30d0704..5035439286 100644
--- a/datafusion/functions-nested/src/arrays_zip.rs
+++ b/datafusion/functions-nested/src/arrays_zip.rs
@@ -125,7 +125,7 @@ impl ScalarUDFImpl for ArraysZip {
return exec_err!("arrays_zip expects array arguments, got
{dt}");
}
};
- fields.push(Field::new(format!("c{i}"), element_type, true));
+ fields.push(Field::new(format!("{}", i + 1), element_type, true));
}
Ok(List(Arc::new(Field::new_list_field(
@@ -227,7 +227,7 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let struct_fields: Fields = element_types
.iter()
.enumerate()
- .map(|(i, dt)| Field::new(format!("c{i}"), dt.clone(), true))
+ .map(|(i, dt)| Field::new(format!("{}", i + 1), dt.clone(), true))
.collect::<Vec<_>>()
.into();
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index 112351c5ef..83e9c9cc9c 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -7112,61 +7112,61 @@ from array_distinct_table_2D_fixed;
query ?
select arrays_zip([1, 2, 3], [2, 3, 4]);
----
-[{c0: 1, c1: 2}, {c0: 2, c1: 3}, {c0: 3, c1: 4}]
+[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}]
# Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4))
query ?
select arrays_zip([1, 2], [2, 3], [3, 4]);
----
-[{c0: 1, c1: 2, c2: 3}, {c0: 2, c1: 3, c2: 4}]
+[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}]
# basic: two integer arrays of equal length
query ?
select arrays_zip([1, 2, 3], [10, 20, 30]);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
# basic: two arrays with different element types (int + string)
query ?
select arrays_zip([1, 2, 3], ['a', 'b', 'c']);
----
-[{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}]
+[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}]
# three arrays of equal length
query ?
select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]);
----
-[{c0: 1, c1: 10, c2: 100}, {c0: 2, c1: 20, c2: 200}, {c0: 3, c1: 30, c2: 300}]
+[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}]
# four arrays of equal length
query ?
select arrays_zip([1], [2], [3], [4]);
----
-[{c0: 1, c1: 2, c2: 3, c3: 4}]
+[{1: 1, 2: 2, 3: 3, 4: 4}]
# mixed element types: float + boolean
query ?
select arrays_zip([1.5, 2.5], [true, false]);
----
-[{c0: 1.5, c1: true}, {c0: 2.5, c1: false}]
+[{1: 1.5, 2: true}, {1: 2.5, 2: false}]
# different length arrays: shorter array padded with NULLs
query ?
select arrays_zip([1, 2], [3, 4, 5]);
----
-[{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: NULL, c1: 5}]
+[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}]
# different length arrays: first longer
query ?
select arrays_zip([1, 2, 3], [10]);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: NULL}, {c0: 3, c1: NULL}]
+[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}]
# different length: one single element, other three elements
query ?
select arrays_zip([1], ['a', 'b', 'c']);
----
-[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: NULL, c1: c}]
+[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}]
# empty arrays
query ?
@@ -7178,19 +7178,19 @@ select arrays_zip([], []);
query ?
select arrays_zip([], [1, 2, 3]);
----
-[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}]
+[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}]
# NULL elements inside arrays
query ?
select arrays_zip([1, NULL, 3], ['a', 'b', 'c']);
----
-[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: 3, c1: c}]
+[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}]
# all NULL elements
query ?
select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]);
----
-[{c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}]
+[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}]
# both args are NULL (entire list null)
query ?
@@ -7202,35 +7202,35 @@ NULL
query ?
select arrays_zip(NULL::int[], [1, 2, 3]);
----
-[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}]
+[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}]
# real array + NULL list
query ?
select arrays_zip([1, 2], NULL::text[]);
----
-[{c0: 1, c1: NULL}, {c0: 2, c1: NULL}]
+[{1: 1, 2: NULL}, {1: 2, 2: NULL}]
# column-level test with multiple rows
query ?
select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]),
([6], [60, 70])) as t(a, b);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}]
-[{c0: 3, c1: 30}, {c0: 4, c1: NULL}, {c0: 5, c1: NULL}]
-[{c0: 6, c1: 60}, {c0: NULL, c1: 70}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}]
+[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}]
+[{1: 6, 2: 60}, {1: NULL, 2: 70}]
# column-level test with NULL rows
query ?
select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]),
([5, 6], null)) as t(a, b);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}]
-[{c0: NULL, c1: 30}, {c0: NULL, c1: 40}]
-[{c0: 5, c1: NULL}, {c0: 6, c1: NULL}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}]
+[{1: NULL, 2: 30}, {1: NULL, 2: 40}]
+[{1: 5, 2: NULL}, {1: 6, 2: NULL}]
# alias: list_zip
query ?
select list_zip([1, 2], [3, 4]);
----
-[{c0: 1, c1: 3}, {c0: 2, c1: 4}]
+[{1: 1, 2: 3}, {1: 2, 2: 4}]
# column test: total values equal (3 each) but per-row lengths differ
# a: [1] b: [10, 20] → row 0: a has 1, b has 2
@@ -7239,14 +7239,14 @@ select list_zip([1, 2], [3, 4]);
query ?
select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a,
b);
----
-[{c0: 1, c1: 10}, {c0: NULL, c1: 20}]
-[{c0: 2, c1: 30}, {c0: 3, c1: NULL}]
+[{1: 1, 2: 10}, {1: NULL, 2: 20}]
+[{1: 2, 2: 30}, {1: 3, 2: NULL}]
# single element arrays
query ?
select arrays_zip([42], ['hello']);
----
-[{c0: 42, c1: hello}]
+[{1: 42, 2: hello}]
# error: too few arguments
statement error
@@ -7259,7 +7259,7 @@ select arrays_zip(
arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
# arrays_zip with LargeList different lengths (padding)
query ?
@@ -7268,7 +7268,7 @@ select arrays_zip(
arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: NULL, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}]
# arrays_zip with FixedSizeList inputs
query ?
@@ -7277,7 +7277,7 @@ select arrays_zip(
arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)')
);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
# arrays_zip mixing List and LargeList
query ?
@@ -7286,7 +7286,7 @@ select arrays_zip(
arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
# arrays_zip mixing List and FixedSizeList with different lengths (padding)
query ?
@@ -7295,7 +7295,7 @@ select arrays_zip(
arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)')
);
----
-[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: NULL}]
+[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}]
# arrays_zip with LargeList and FixedSizeList mixed types
query ?
@@ -7304,7 +7304,7 @@ select arrays_zip(
arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)')
);
----
-[{c0: 1, c1: a}, {c0: 2, c1: b}]
+[{1: 1, 2: a}, {1: 2, 2: b}]
query ???
select array_intersect(column1, column2),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]