This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 42f906072a feat(substrait): add wildcard handling to producer (#12987)
42f906072a is described below

commit 42f906072a3000d005b8ced97654aaec2828a878
Author: Tornike Gurgenidze <[email protected]>
AuthorDate: Fri Oct 18 23:06:58 2024 +0400

    feat(substrait): add wildcard handling to producer (#12987)
    
    * feat(substrait): add wildcard expand rule in producer
    
    * add comment describing need for ExpandWildcardRule
---
 datafusion/substrait/src/logical_plan/producer.rs  | 10 ++++++-
 .../tests/cases/roundtrip_logical_plan.rs          | 34 +++++++++++++++++++++-
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/producer.rs 
b/datafusion/substrait/src/logical_plan/producer.rs
index 0e1375a8e0..7504a287c0 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use datafusion::config::ConfigOptions;
+use datafusion::optimizer::analyzer::expand_wildcard_rule::ExpandWildcardRule;
+use datafusion::optimizer::AnalyzerRule;
 use std::sync::Arc;
 use substrait::proto::expression_reference::ExprType;
 
@@ -103,9 +106,14 @@ pub fn to_substrait_plan(plan: &LogicalPlan, ctx: 
&SessionContext) -> Result<Box
     // Parse relation nodes
     // Generate PlanRel(s)
     // Note: Only 1 relation tree is currently supported
+
+    // We have to expand wildcard expressions first as wildcards can't be 
represented in substrait
+    let plan = Arc::new(ExpandWildcardRule::new())
+        .analyze(plan.clone(), &ConfigOptions::default())?;
+
     let plan_rels = vec![PlanRel {
         rel_type: Some(plan_rel::RelType::Root(RelRoot {
-            input: Some(*to_substrait_rel(plan, ctx, &mut extensions)?),
+            input: Some(*to_substrait_rel(&plan, ctx, &mut extensions)?),
             names: to_substrait_named_struct(plan.schema(), &mut 
extensions)?.names,
         })),
     }];
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs 
b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 23ac601a44..ae67b69244 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -183,7 +183,13 @@ async fn simple_select() -> Result<()> {
 
 #[tokio::test]
 async fn wildcard_select() -> Result<()> {
-    roundtrip("SELECT * FROM data").await
+    assert_expected_plan_unoptimized(
+        "SELECT * FROM data",
+        "Projection: data.a, data.b, data.c, data.d, data.e, data.f\
+        \n  TableScan: data",
+        true,
+    )
+    .await
 }
 
 #[tokio::test]
@@ -1174,6 +1180,32 @@ async fn verify_post_join_filter_value(proto: Box<Plan>) 
-> Result<()> {
     Ok(())
 }
 
+async fn assert_expected_plan_unoptimized(
+    sql: &str,
+    expected_plan_str: &str,
+    assert_schema: bool,
+) -> Result<()> {
+    let ctx = create_context().await?;
+    let df = ctx.sql(sql).await?;
+    let plan = df.into_unoptimized_plan();
+    let proto = to_substrait_plan(&plan, &ctx)?;
+    let plan2 = from_substrait_plan(&ctx, &proto).await?;
+
+    println!("{plan}");
+    println!("{plan2}");
+
+    println!("{proto:?}");
+
+    if assert_schema {
+        assert_eq!(plan.schema(), plan2.schema());
+    }
+
+    let plan2str = format!("{plan2}");
+    assert_eq!(expected_plan_str, &plan2str);
+
+    Ok(())
+}
+
 async fn assert_expected_plan(
     sql: &str,
     expected_plan_str: &str,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to