Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19480#discussion_r147446840
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
---
@@ -801,10 +831,82 @@ class CodegenContext {
| ${makeSplitFunction(body)}
|}
""".stripMargin
- addNewFunction(name, code)
+ addNewFunctionInternal(name, code, inlineToOuterClass = false)
}
- foldFunctions(functions.map(name =>
s"$name(${arguments.map(_._2).mkString(", ")})"))
+ val (outerClassFunctions, innerClassFunctions) =
functions.partition(_.innerClassName.isEmpty)
+
+ val argsString = arguments.map(_._2).mkString(", ")
+ val outerClassFunctionCalls = outerClassFunctions.map(f =>
s"${f.functionName}($argsString)")
+
+ val innerClassFunctionCalls = generateInnerClassesFunctionCalls(
+ innerClassFunctions,
+ func,
+ arguments,
+ returnType,
+ makeSplitFunction,
+ foldFunctions)
+
+ foldFunctions(outerClassFunctionCalls ++ innerClassFunctionCalls)
+ }
+ }
+
+ /**
+ * Here we handle all the methods which have been added to the inner
classes and
+ * not to the outer class.
+ * Since they can be many, their direct invocation in the outer class
adds many entries
+ * to the outer class' constant pool. This can cause the constant pool
to past JVM limit.
+ * Moreover, this can cause also the outer class method where all the
invocations are
+ * performed to grow beyond the 64k limit.
+ * To avoid these problems, we group them and we call only the grouping
methods in the
+ * outer class.
+ *
+ * @param functions a [[Seq]] of [[NewFunctionSpec]] defined in the
inner classes
+ * @param funcName the split function name base.
+ * @param arguments the list of (type, name) of the arguments of the
split function.
+ * @param returnType the return type of the split function.
+ * @param makeSplitFunction makes split function body, e.g. add
preparation or cleanup.
+ * @param foldFunctions folds the split function calls.
+ * @return an [[Iterable]] containing the methods' invocations
+ */
+ private def generateInnerClassesFunctionCalls(
+ functions: Seq[NewFunctionSpec],
+ funcName: String,
+ arguments: Seq[(String, String)],
+ returnType: String,
+ makeSplitFunction: String => String,
+ foldFunctions: Seq[String] => String): Iterable[String] = {
+ val innerClassToFunctions = mutable.LinkedHashMap.empty[(String,
String), Seq[String]]
+ functions.foreach(f => {
+ val key = (f.innerClassName.get, f.innerClassInstance.get)
+ innerClassToFunctions.update(key, f.functionName +:
+ innerClassToFunctions.getOrElse(key, Seq.empty[String]))
+ })
+
+ val argDefinitionString = arguments.map { case (t, name) => s"$t
$name" }.mkString(", ")
+ val argInvocationString = arguments.map(_._2).mkString(", ")
+
+ innerClassToFunctions.flatMap {
+ case ((innerClassName, innerClassInstance), innerClassFunctions) =>
+ // for performance reasons, the functions are prepended, instead
of appended,
+ // thus here they are in reversed order
+ val orderedFunctions = innerClassFunctions.reverse
+ if (orderedFunctions.size >
CodeGenerator.MERGE_SPLIT_METHODS_THRESHOLD) {
+ // Adding a new function to each inner class which contains
+ // the invocation of all the ones which have been added to
+ // that inner class
+ val body = foldFunctions(orderedFunctions.map(name =>
+ s"$name($argInvocationString)"))
+ val code = s"""
+ |private $returnType $funcName($argDefinitionString) {
+ | ${makeSplitFunction(body)}
+ |}
+ """.stripMargin
--- End diff --
```Scala
// Adding a new function to each inner class which contains the
invocation of all the
// ones which have been added to that inner class. For example,
// private class NestedClass {
// private void apply_862(InternalRow i) { ... }
// private void apply_863(InternalRow i) { ... }
// ...
// private void apply(InternalRow i) {
// apply_862(i);
// apply_863(i);
// ...
// }
// }
val body = foldFunctions(orderedFunctions.map(name =>
s"$name($argInvocationString)"))
val code =
s"""
|private $returnType $funcName($argDefinitionString) {
| ${makeSplitFunction(body)}
|}
""".stripMargin
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]