This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a6ff1fe6a3 Use `struct` instead of `named_struct` when there are no 
aliases (#9897)
a6ff1fe6a3 is described below

commit a6ff1fe6a323d24e36e6845525ed7c7d5e9cec3e
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Apr 2 17:21:02 2024 -0400

    Use `struct` instead of `named_struct` when there are no aliases (#9897)
    
    * Revert "use alias (#9894)"
    
    This reverts commit 9487ca057353370aa75895453c92bb40b9f33ac6.
    
    * Use `struct` instead of `named_struct` when there are no aliases
    
    * Update docs
    
    * fmt
---
 datafusion/sql/src/expr/mod.rs                 | 48 ++++++++++++++++++++++++++
 datafusion/sqllogictest/test_files/explain.slt |  4 +--
 datafusion/sqllogictest/test_files/expr.slt    | 10 +++---
 datafusion/sqllogictest/test_files/struct.slt  |  6 ++--
 docs/source/user-guide/sql/scalar_functions.md | 10 ++++++
 5 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 4173e129f4..c2f72720af 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -589,6 +589,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         }
     }
 
+    /// Parses a struct(..) expression
     fn parse_struct(
         &self,
         values: Vec<SQLExpr>,
@@ -599,6 +600,25 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         if !fields.is_empty() {
             return not_impl_err!("Struct fields are not supported yet");
         }
+
+        if values
+            .iter()
+            .any(|value| matches!(value, SQLExpr::Named { .. }))
+        {
+            self.create_named_struct(values, input_schema, planner_context)
+        } else {
+            self.create_struct(values, input_schema, planner_context)
+        }
+    }
+
+    // Handles a call to struct(...) where the arguments are named. For example
+    // `struct (v as foo, v2 as bar)` by creating a call to the `named_struct` 
function
+    fn create_named_struct(
+        &self,
+        values: Vec<SQLExpr>,
+        input_schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+    ) -> Result<Expr> {
         let args = values
             .into_iter()
             .enumerate()
@@ -643,6 +663,34 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         )))
     }
 
+    // Handles a call to struct(...) where the arguments are not named. For 
example
+    // `struct (v, v2)` by creating a call to the `struct` function
+    // which will create a struct with fields named `c0`, `c1`, etc.
+    fn create_struct(
+        &self,
+        values: Vec<SQLExpr>,
+        input_schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+    ) -> Result<Expr> {
+        let args = values
+            .into_iter()
+            .map(|value| {
+                self.sql_expr_to_logical_expr(value, input_schema, 
planner_context)
+            })
+            .collect::<Result<Vec<_>>>()?;
+        let struct_func = self
+            .context_provider
+            .get_function_meta("struct")
+            .ok_or_else(|| {
+                internal_datafusion_err!("Unable to find expected 'struct' 
function")
+            })?;
+
+        Ok(Expr::ScalarFunction(ScalarFunction::new_udf(
+            struct_func,
+            args,
+        )))
+    }
+
     fn parse_array_agg(
         &self,
         array_agg: ArrayAgg,
diff --git a/datafusion/sqllogictest/test_files/explain.slt 
b/datafusion/sqllogictest/test_files/explain.slt
index 4653250cf9..b7ad36dace 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -390,8 +390,8 @@ query TT
 explain select struct(1, 2.3, 'abc');
 ----
 logical_plan
-Projection: Struct({c0:1,c1:2.3,c2:abc}) AS 
named_struct(Utf8("c0"),Int64(1),Utf8("c1"),Float64(2.3),Utf8("c2"),Utf8("abc"))
+Projection: Struct({c0:1,c1:2.3,c2:abc}) AS 
struct(Int64(1),Float64(2.3),Utf8("abc"))
 --EmptyRelation
 physical_plan
-ProjectionExec: expr=[{c0:1,c1:2.3,c2:abc} as 
named_struct(Utf8("c0"),Int64(1),Utf8("c1"),Float64(2.3),Utf8("c2"),Utf8("abc"))]
+ProjectionExec: expr=[{c0:1,c1:2.3,c2:abc} as 
struct(Int64(1),Float64(2.3),Utf8("abc"))]
 --PlaceholderRowExec
diff --git a/datafusion/sqllogictest/test_files/expr.slt 
b/datafusion/sqllogictest/test_files/expr.slt
index 60ab477788..2e0cbf50ca 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -2288,7 +2288,7 @@ select struct(time,load1,load2,host) from t1;
 
 # can have an aggregate function with an inner coalesce
 query TR
-select t2.info['c3'] as host, sum(coalesce(t2.info)['c1']) from (select 
struct(time,load1,load2,host) as info from t1) t2 where t2.info['c3'] IS NOT 
NULL group by t2.info['c3'] order by host;
+select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, 
sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c1']) from 
(select struct(time,load1,load2,host) from t1) t2 where 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
 ----
 host1 1.1
 host2 2.2
@@ -2296,7 +2296,7 @@ host3 3.3
 
 # can have an aggregate function with an inner CASE WHEN
 query TR
-select t2.info['c3'] as host, sum((case when t2.info['c3'] is not null then 
t2.info end)['c2']) from (select struct(time,load1,load2,host) as info from t1) 
t2 where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by host;
+select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, sum((case 
when t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c2']) from (select 
struct(time,load1,load2,host) from t1) t2 where 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
 ----
 host1 101
 host2 202
@@ -2304,7 +2304,7 @@ host3 303
 
 # can have 2 projections with aggr(short_circuited), with different 
short-circuited expr
 query TRR
-select t2.info['c3'] as host, sum(coalesce(t2.info)['c1']), sum((case when 
t2.info['c3'] is not null then t2.info end)['c2']) from (select 
struct(time,load1,load2,host) as info from t1) t2 where t2.info['c3'] IS NOT 
NULL group by t2.info['c3'] order by host;
+select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, 
sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c1']), sum((case 
when t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c2']) from (select 
struct(time,load1,load2,host) from t1) t2 where 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
 ----
 host1 1.1 101
 host2 2.2 202
@@ -2312,7 +2312,7 @@ host3 3.3 303
 
 # can have 2 projections with aggr(short_circuited), with the same 
short-circuited expr (e.g. CASE WHEN)
 query TRR
-select t2.info['c3'] as host, sum((case when t2.info['c3'] is not null then 
t2.info end)['c1']), sum((case when t2.info['c3'] is not null then t2.info 
end)['c2']) from (select struct(time,load1,load2,host) as info from t1) t2 
where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by host;
+select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, sum((case 
when t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c1']), sum((case when 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c2']) from (select 
struct(time,load1,load2,host) from t1) t2 where 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by [...]
 ----
 host1 1.1 101
 host2 2.2 202
@@ -2320,7 +2320,7 @@ host3 3.3 303
 
 # can have 2 projections with aggr(short_circuited), with the same 
short-circuited expr (e.g. coalesce)
 query TRR
-select t2.info['c3'] as host, sum(coalesce(t2.info)['c1']), 
sum(coalesce(t2.info)['c2']) from (select struct(time,load1,load2,host) as info 
from t1) t2 where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by 
host;
+select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, 
sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c1']), 
sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c2']) from 
(select struct(time,load1,load2,host) from t1) t2 where 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by 
t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
 ----
 host1 1.1 101
 host2 2.2 202
diff --git a/datafusion/sqllogictest/test_files/struct.slt 
b/datafusion/sqllogictest/test_files/struct.slt
index 2e0b699f6d..8a6256add6 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -85,10 +85,10 @@ query TT
 explain select struct(a, b, c) from values;
 ----
 logical_plan
-Projection: named_struct(Utf8("c0"), values.a, Utf8("c1"), values.b, 
Utf8("c2"), values.c)
+Projection: struct(values.a, values.b, values.c)
 --TableScan: values projection=[a, b, c]
 physical_plan
-ProjectionExec: expr=[named_struct(c0, a@0, c1, b@1, c2, c@2) as 
named_struct(Utf8("c0"),values.a,Utf8("c1"),values.b,Utf8("c2"),values.c)]
+ProjectionExec: expr=[struct(a@0, b@1, c@2) as 
struct(values.a,values.b,values.c)]
 --MemoryExec: partitions=1, partition_sizes=[1]
 
 # error on 0 arguments
@@ -179,4 +179,4 @@ drop table values;
 query T
 select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
 ----
-Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, 
dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: 
Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 
Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, 
dict_is_ordered: false, metadata: {} }])
\ No newline at end of file
+Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, 
dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: 
Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 
Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, 
dict_is_ordered: false, metadata: {} }])
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index e2e129a2e2..62b81ea7ea 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -3336,6 +3336,16 @@ select * from t;
 | 3 | 4 |
 +---+---+
 
+-- use default names `c0`, `c1`
+❯ select struct(a, b) from t;
++-----------------+
+| struct(t.a,t.b) |
++-----------------+
+| {c0: 1, c1: 2}  |
+| {c0: 3, c1: 4}  |
++-----------------+
+
+-- name the first field `field_a`
 select struct(a as field_a, b) from t;
 +--------------------------------------------------+
 | named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) |

Reply via email to