tokoko commented on code in PR #13127:
URL: https://github.com/apache/datafusion/pull/13127#discussion_r1825109525
##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -566,36 +568,46 @@ pub async fn from_substrait_rel(
let mut input = LogicalPlanBuilder::from(
from_substrait_rel(ctx, input, extensions).await?,
);
- let mut names: HashSet<String> = HashSet::new();
- let mut exprs: Vec<Expr> = vec![];
- for e in &p.expressions {
- let x =
- from_substrait_rex(ctx, e, input.clone().schema(),
extensions)
+ let original_schema = input.schema().clone();
+
+ // Ensure that all expressions have a unique display name, so
that
+ // validate_unique_names does not fail when constructing the
project.
+ let mut name_tracker = NameTracker::new();
+
+ // By default, a Substrait Project emits all inputs fields
followed by all expressions.
+ // We build the explicit expressions first, and then the input
expressions to avoid
+ // adding aliases to the explicit expressions (as part of
ensuring unique names).
+ //
+ // This is helpful for plan visualization and tests, because
when DataFusion produces
+ // Substrait Projects it adds an output mapping that excludes
all input columns
+ // leaving only explicit expressions.
+
+ let mut explicit_exprs: Vec<Expr> = vec![];
+ for expr in &p.expressions {
+ let e =
+ from_substrait_rex(ctx, expr, input.clone().schema(),
extensions)
.await?;
// if the expression is WindowFunction, wrap in a Window
relation
- if let Expr::WindowFunction(_) = &x {
+ if let Expr::WindowFunction(_) = &e {
// Adding the same expression here and in the project
below
// works because the project's builder uses
columnize_expr(..)
// to transform it into a column reference
- input = input.window(vec![x.clone()])?
+ input = input.window(vec![e.clone()])?
}
- // Ensure the expression has a unique display name, so
that project's
- // validate_unique_names doesn't fail
- let name = x.schema_name().to_string();
- let mut new_name = name.clone();
- let mut i = 0;
- while names.contains(&new_name) {
- new_name = format!("{}__temp__{}", name, i);
- i += 1;
- }
- if new_name != name {
- exprs.push(x.alias(new_name.clone()));
- } else {
- exprs.push(x);
- }
- names.insert(new_name);
+
explicit_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
}
- input.project(exprs)?.build()
+
+ let mut final_exprs: Vec<Expr> = vec![];
+ for index in 0..original_schema.fields().len() {
+ let e = Expr::Column(Column::from(
+ original_schema.qualified_field(index),
+ ));
+ final_exprs.push(name_tracker.get_uniquely_named_expr(e)?);
+ }
+ final_exprs.append(&mut explicit_exprs);
+
+ let plan = input.project(final_exprs)?.build()?;
+ apply_emit_kind(p.common.as_ref(), plan)
Review Comment:
minor: Can we call `apply_emit_kind` once on the output of the match
statement now instead of on every case?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]