Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19813#discussion_r156286913
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
---
@@ -236,4 +237,24 @@ class WholeStageCodegenSuite extends QueryTest with
SharedSQLContext {
}
}
}
+
+ test("SPARK-22551: Fix 64kb limit for deeply nested expressions under
wholestage codegen") {
+ import testImplicits._
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+ val df = Seq(("abc", 1)).toDF("key", "int")
+ df.write.parquet(path)
+
+ var strExpr: Expression = col("key").expr
+ for (_ <- 1 to 150) {
+ strExpr = Decode(Encode(strExpr, Literal("utf-8")),
Literal("utf-8"))
+ }
+ val expressions = Seq(If(EqualTo(strExpr, strExpr), strExpr,
strExpr))
+
+ val df2 =
spark.read.parquet(path).select(expressions.map(Column(_)): _*)
+ val plan = df2.queryExecution.executedPlan
+ assert(plan.find(_.isInstanceOf[WholeStageCodegenExec]).isDefined)
--- End diff --
can you give some insights about how this test fail without your PR? In
`WholeStageCodegen.doExec`, we have
```
val (ctx, cleanedSource) = doCodeGen()
// try to compile and fallback if it failed
val (_, maxCodeSize) = try {
CodeGenerator.compile(cleanedSource)
} catch {
case _: Exception if !Utils.isTesting &&
sqlContext.conf.codegenFallback =>
// We should already saw the error message
logWarning(s"Whole-stage codegen disabled for this plan:\n
$treeString")
return child.execute()
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]