Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19480#discussion_r146775448
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
---
@@ -801,10 +834,46 @@ class CodegenContext {
| ${makeSplitFunction(body)}
|}
""".stripMargin
- addNewFunction(name, code)
+ addNewFunctionInternal(name, code, inlineToOuterClass = false)
}
- foldFunctions(functions.map(name =>
s"$name(${arguments.map(_._2).mkString(", ")})"))
+ // Here we store all the methods which have been added to the outer
class.
+ val outerClassFunctions = functions
+ .filter(_.subclassName.isEmpty)
+ .map(_.functionName)
+
+ // Here we handle all the methods which have been added to the
nested subclasses and
+ // not to the outer class.
+ // Since they can be many, their direct invocation in the outer
class adds many entries
+ // to the outer class' constant pool. This can cause the constant
pool to past JVM limit.
+ // To avoid this problem, we group them and we call only the
grouping methods in the
+ // outer class.
+ val innerClassFunctions = functions
+ .filter(_.subclassName.isDefined)
+ .foldLeft(ListMap.empty[(String, String), Seq[String]]) { case
(acc, f) =>
+ val key = (f.subclassName.get, f.subclassInstance.get)
+ acc.updated(key, acc.getOrElse(key, Seq.empty[String]) ++
Seq(f.functionName))
+ }
+ .flatMap { case ((subclassName, subclassInstance),
subclassFunctions) =>
+ if (subclassFunctions.size >
CodeGenerator.MERGE_SPLIT_METHODS_THRESHOLD) {
+ // Adding a new function to each subclass which contains
+ // the invocation of all the ones which have been added to
+ // that subclass
+ val code = s"""
+ |private $returnType $func($argString) {
+ |
${makeSplitFunction(foldFunctions(subclassFunctions.map(name =>
+ s"$name(${arguments.map(_._2).mkString(", ")})")))}
+ |}
+ """.stripMargin
+ addNewFunctionToClass(func, code, subclassName)
+ Seq(s"$subclassInstance.$func")
+ } else {
+ subclassFunctions.map(f => s"$subclassInstance.$f")
+ }
+ }
+
+ foldFunctions((outerClassFunctions ++ innerClassFunctions).map(
+ name => s"$name(${arguments.map(_._2).mkString(", ")})"))
--- End diff --
The same suggestion. Do not inline a complex expression.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]