vbarua commented on code in PR #13127:
URL: https://github.com/apache/datafusion/pull/13127#discussion_r1819479649


##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -566,36 +568,57 @@ pub async fn from_substrait_rel(
                 let mut input = LogicalPlanBuilder::from(
                     from_substrait_rel(ctx, input, extensions).await?,
                 );
-                let mut names: HashSet<String> = HashSet::new();
-                let mut exprs: Vec<Expr> = vec![];
-                for e in &p.expressions {
-                    let x =
-                        from_substrait_rex(ctx, e, input.clone().schema(), 
extensions)
+
+                let mut default_exprs: Vec<Expr> = vec![];
+
+                // By default, a Substrait Project emits all inputs fields
+                let input_schema = input.schema();
+                for index in 0..(input_schema.fields().len()) {
+                    let e =
+                        
Expr::Column(Column::from(input_schema.qualified_field(index)));
+                    default_exprs.push(e);
+                }
+                // followed by all expressions
+                for expr in &p.expressions {
+                    let e =
+                        from_substrait_rex(ctx, expr, input.clone().schema(), 
extensions)
                             .await?;
                     // if the expression is WindowFunction, wrap in a Window 
relation
-                    if let Expr::WindowFunction(_) = &x {
+                    if let Expr::WindowFunction(_) = &e {
                         // Adding the same expression here and in the project 
below
                         // works because the project's builder uses 
columnize_expr(..)
                         // to transform it into a column reference
-                        input = input.window(vec![x.clone()])?
+                        input = input.window(vec![e.clone()])?
                     }
-                    // Ensure the expression has a unique display name, so 
that project's
-                    // validate_unique_names doesn't fail
-                    let name = x.schema_name().to_string();
-                    let mut new_name = name.clone();
-                    let mut i = 0;
-                    while names.contains(&new_name) {
-                        new_name = format!("{}__temp__{}", name, i);
-                        i += 1;
+                    default_exprs.push(e);
+                }
+
+                // Ensure that all expressions have a unique display name, so 
that
+                // validate_unique_names does not fail when constructing the 
project.
+                let mut name_tracker = NameTracker::new();
+
+                let mut final_exprs: Vec<Expr> = vec![];
+                match retrieve_emit_kind(p.common.as_ref()) {

Review Comment:
   > I get that translating substrait Project into two DF Projects feels odd, 
but I think that would be more consistent with other Rels w/o treating Project 
as an exception.
   
   I would actually prefer to output 2 projections, and was actually what I did 
originally, but that breaks tests like 
https://github.com/apache/datafusion/blob/1fd6116dd9e1898540b4fbdbba735c4ebacc4227/datafusion/substrait/tests/cases/function_test.rs#L36-L41
 because we would need to add the extra projections everywhere. We could do 
that, but I think that would make writing and reading the tests more burdensome.
   
   > I think it would be better to simply trust DF optimizer to fold the extra 
SELECTs afterwards where necessary.
   
   I agree as well, but we also don't want to call the optimizer as part of 
consuming Substrait https://github.com/apache/datafusion/pull/12800
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to