This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 42f906072a feat(substrait): add wildcard handling to producer (#12987)
42f906072a is described below
commit 42f906072a3000d005b8ced97654aaec2828a878
Author: Tornike Gurgenidze <[email protected]>
AuthorDate: Fri Oct 18 23:06:58 2024 +0400
feat(substrait): add wildcard handling to producer (#12987)
* feat(substrait): add wildcard expand rule in producer
* add comment describing need for ExpandWildcardRule
---
datafusion/substrait/src/logical_plan/producer.rs | 10 ++++++-
.../tests/cases/roundtrip_logical_plan.rs | 34 +++++++++++++++++++++-
2 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/datafusion/substrait/src/logical_plan/producer.rs
b/datafusion/substrait/src/logical_plan/producer.rs
index 0e1375a8e0..7504a287c0 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -15,6 +15,9 @@
// specific language governing permissions and limitations
// under the License.
+use datafusion::config::ConfigOptions;
+use datafusion::optimizer::analyzer::expand_wildcard_rule::ExpandWildcardRule;
+use datafusion::optimizer::AnalyzerRule;
use std::sync::Arc;
use substrait::proto::expression_reference::ExprType;
@@ -103,9 +106,14 @@ pub fn to_substrait_plan(plan: &LogicalPlan, ctx:
&SessionContext) -> Result<Box
// Parse relation nodes
// Generate PlanRel(s)
// Note: Only 1 relation tree is currently supported
+
+ // We have to expand wildcard expressions first as wildcards can't be
represented in substrait
+ let plan = Arc::new(ExpandWildcardRule::new())
+ .analyze(plan.clone(), &ConfigOptions::default())?;
+
let plan_rels = vec![PlanRel {
rel_type: Some(plan_rel::RelType::Root(RelRoot {
- input: Some(*to_substrait_rel(plan, ctx, &mut extensions)?),
+ input: Some(*to_substrait_rel(&plan, ctx, &mut extensions)?),
names: to_substrait_named_struct(plan.schema(), &mut
extensions)?.names,
})),
}];
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 23ac601a44..ae67b69244 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -183,7 +183,13 @@ async fn simple_select() -> Result<()> {
#[tokio::test]
async fn wildcard_select() -> Result<()> {
- roundtrip("SELECT * FROM data").await
+ assert_expected_plan_unoptimized(
+ "SELECT * FROM data",
+ "Projection: data.a, data.b, data.c, data.d, data.e, data.f\
+ \n TableScan: data",
+ true,
+ )
+ .await
}
#[tokio::test]
@@ -1174,6 +1180,32 @@ async fn verify_post_join_filter_value(proto: Box<Plan>)
-> Result<()> {
Ok(())
}
+async fn assert_expected_plan_unoptimized(
+ sql: &str,
+ expected_plan_str: &str,
+ assert_schema: bool,
+) -> Result<()> {
+ let ctx = create_context().await?;
+ let df = ctx.sql(sql).await?;
+ let plan = df.into_unoptimized_plan();
+ let proto = to_substrait_plan(&plan, &ctx)?;
+ let plan2 = from_substrait_plan(&ctx, &proto).await?;
+
+ println!("{plan}");
+ println!("{plan2}");
+
+ println!("{proto:?}");
+
+ if assert_schema {
+ assert_eq!(plan.schema(), plan2.schema());
+ }
+
+ let plan2str = format!("{plan2}");
+ assert_eq!(expected_plan_str, &plan2str);
+
+ Ok(())
+}
+
async fn assert_expected_plan(
sql: &str,
expected_plan_str: &str,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]