Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19480#discussion_r147061319
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
---
@@ -801,10 +832,49 @@ class CodegenContext {
| ${makeSplitFunction(body)}
|}
""".stripMargin
- addNewFunction(name, code)
+ addNewFunctionInternal(name, code, inlineToOuterClass = false)
}
- foldFunctions(functions.map(name =>
s"$name(${arguments.map(_._2).mkString(", ")})"))
+ // Here we store all the methods which have been added to the outer
class.
+ val outerClassFunctions = functions
+ .filter(_.subclassName.isEmpty)
+ .map(_.functionName)
+
+ val argsString = arguments.map(_._2).mkString(", ")
+
+ // Here we handle all the methods which have been added to the
nested subclasses and
+ // not to the outer class.
+ // Since they can be many, their direct invocation in the outer
class adds many entries
+ // to the outer class' constant pool. This can cause the constant
pool to past JVM limit.
+ // To avoid this problem, we group them and we call only the
grouping methods in the
+ // outer class.
+ val innerClassToFunctions = functions
+ .filter(_.subclassName.isDefined)
+ .foldLeft(ListMap.empty[(String, String), Seq[String]]) { case
(acc, f) =>
+ val key = (f.subclassName.get, f.subclassInstance.get)
+ acc.updated(key, acc.getOrElse(key, Seq.empty[String]) ++
Seq(f.functionName))
+ }
+ val innerClassFunctions = innerClassToFunctions.flatMap {
+ case ((subclassName, subclassInstance), subclassFunctions) =>
+ if (subclassFunctions.size >
CodeGenerator.MERGE_SPLIT_METHODS_THRESHOLD) {
+ // Adding a new function to each subclass which contains
+ // the invocation of all the ones which have been added to
+ // that subclass
+ val body = foldFunctions(subclassFunctions.map(name =>
s"$name($argsString)"))
+ val code = s"""
+ |private $returnType $func($argString) {
+ | ${makeSplitFunction(body)}
+ |}
+ """.stripMargin
+ addNewFunctionToClass(func, code, subclassName)
+ Seq(s"$subclassInstance.$func")
+ } else {
+ subclassFunctions.map(f => s"$subclassInstance.$f")
+ }
+ }
--- End diff --
Build a separate private function for generating innerClassFunctions? Now,
the function `splitExpressions ` is pretty large after this PR.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]