viirya commented on a change in pull request #29975:
URL: https://github.com/apache/spark/pull/29975#discussion_r501475820



##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
##########
@@ -66,10 +66,22 @@ case class ProjectExec(projectList: Seq[NamedExpression], 
child: SparkPlan)
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: 
ExprCode): String = {
     val exprs = bindReferences[Expression](projectList, child.output)
-    val resultVars = exprs.map(_.genCode(ctx))
+    val (subExprsCode, resultVars) = if (conf.subexpressionEliminationEnabled) 
{
+      // subexpression elimination
+      val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(exprs)
+      val genVars = ctx.withSubExprEliminationExprs(subExprs.states) {
+        exprs.map(_.genCode(ctx))
+      }
+      (subExprs.codes.mkString("\n"), genVars)
+    } else {
+      ("", exprs.map(_.genCode(ctx)))
+    }
+
     // Evaluation of non-deterministic expressions can't be deferred.
     val nonDeterministicAttrs = 
projectList.filterNot(_.deterministic).map(_.toAttribute)
     s"""

Review comment:
       OK. I will update to the PR description.

##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
##########
@@ -66,10 +66,22 @@ case class ProjectExec(projectList: Seq[NamedExpression], 
child: SparkPlan)
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: 
ExprCode): String = {
     val exprs = bindReferences[Expression](projectList, child.output)
-    val resultVars = exprs.map(_.genCode(ctx))
+    val (subExprsCode, resultVars) = if (conf.subexpressionEliminationEnabled) 
{
+      // subexpression elimination
+      val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(exprs)
+      val genVars = ctx.withSubExprEliminationExprs(subExprs.states) {
+        exprs.map(_.genCode(ctx))
+      }
+      (subExprs.codes.mkString("\n"), genVars)
+    } else {
+      ("", exprs.map(_.genCode(ctx)))
+    }
+
     // Evaluation of non-deterministic expressions can't be deferred.
     val nonDeterministicAttrs = 
projectList.filterNot(_.deterministic).map(_.toAttribute)
     s"""

Review comment:
       Thanks for testing! It is late in my timezone, I will update the 
generated code tomorrow.

##########
File path: 
sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
##########
@@ -268,7 +268,7 @@ class DataSourceV2Suite extends QueryTest with 
SharedSparkSession with AdaptiveS
           }
         }
         // this input data will fail to read middle way.
-        val input = spark.range(10).select(failingUdf('id).as('i)).select('i, 
-'i as 'j)
+        val input = spark.range(15).select(failingUdf('id).as('i)).select('i, 
-'i as 'j)

Review comment:
       `failingUdf` is evaluated twice for each row. Now it is only once. So we 
need to increase range to make it throw exception as before.

##########
File path: 
sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
##########
@@ -268,7 +268,7 @@ class DataSourceV2Suite extends QueryTest with 
SharedSparkSession with AdaptiveS
           }
         }
         // this input data will fail to read middle way.
-        val input = spark.range(10).select(failingUdf('id).as('i)).select('i, 
-'i as 'j)
+        val input = spark.range(15).select(failingUdf('id).as('i)).select('i, 
-'i as 'j)

Review comment:
       `failingUdf` is evaluated twice for each row previously. Now it is only 
once. So we need to increase range to make it throw exception as before.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to