Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19480#discussion_r147234320
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
---
@@ -801,10 +831,84 @@ class CodegenContext {
| ${makeSplitFunction(body)}
|}
""".stripMargin
- addNewFunction(name, code)
+ addNewFunctionInternal(name, code, inlineToOuterClass = false)
}
- foldFunctions(functions.map(name =>
s"$name(${arguments.map(_._2).mkString(", ")})"))
+ // Here we store all the methods which have been added to the outer
class.
+ val outerClassFunctions = functions
+ .filter(_.innerClassName.isEmpty)
+ .map(_.functionName)
+
+ val innerClassFunctions = generateInnerClassesMethodsCalls(
+ functions.filter(_.innerClassName.nonEmpty),
+ func,
+ arguments,
+ returnType,
+ makeSplitFunction,
+ foldFunctions)
+
+ val argsString = arguments.map(_._2).mkString(", ")
+ foldFunctions((outerClassFunctions ++ innerClassFunctions).map(
+ name => s"$name($argsString)"))
+ }
+ }
+
+ /**
+ * Here we handle all the methods which have been added to the inner
classes and
+ * not to the outer class.
+ * Since they can be many, their direct invocation in the outer class
adds many entries
+ * to the outer class' constant pool. This can cause the constant pool
to past JVM limit.
+ * Moreover, this can cause also the outer class method where all the
invocations are
+ * performed to grow beyond the 64k limit.
+ * To avoid these problems, we group them and we call only the grouping
methods in the
+ * outer class.
+ *
+ * @param functions a [[Seq]] of [[NewFunctionSpec]] defined in the
inner classes
+ * @param funcName the split function name base.
+ * @param arguments the list of (type, name) of the arguments of the
split function.
+ * @param returnType the return type of the split function.
+ * @param makeSplitFunction makes split function body, e.g. add
preparation or cleanup.
+ * @param foldFunctions folds the split function calls.
+ * @return an [[Iterable]] containing the methods' invocations
+ */
+ private def generateInnerClassesMethodsCalls(
+ functions: Seq[NewFunctionSpec],
+ funcName: String,
+ arguments: Seq[(String, String)],
+ returnType: String,
+ makeSplitFunction: String => String,
+ foldFunctions: Seq[String] => String): Iterable[String] = {
+ val innerClassToFunctions = mutable.ListMap.empty[(String, String),
Seq[String]]
+ functions.foreach(f => {
+ val key = (f.innerClassName.get, f.innerClassInstance.get)
+ innerClassToFunctions.update(key, f.functionName +:
+ innerClassToFunctions.getOrElse(key, Seq.empty[String]))
+ })
+ // for performance reasons, the functions are prepended, instead of
appended,
+ // thus they are in reversed order
+ innerClassToFunctions.transform { case (_, functions) =>
functions.reverse }
+
+ val argDefinitionString = arguments.map { case (t, name) => s"$t
$name" }.mkString(", ")
+ val argInvocationString = arguments.map(_._2).mkString(", ")
+
+ innerClassToFunctions.flatMap {
+ case ((innerClassName, innerClassInstance), innerClassFunctions) =>
+ if (innerClassFunctions.size >
CodeGenerator.MERGE_SPLIT_METHODS_THRESHOLD) {
+ // Adding a new function to each inner class which contains
+ // the invocation of all the ones which have been added to
+ // that inner class
+ val body = foldFunctions(innerClassFunctions.map(name =>
+ s"$name($argInvocationString)"))
+ val code = s"""
+ |private $returnType $funcName($argDefinitionString) {
+ | ${makeSplitFunction(body)}
+ |}
+ """.stripMargin
+ addNewFunctionToClass(funcName, code, innerClassName)
+ Seq(s"$innerClassInstance.$funcName")
+ } else {
+ innerClassFunctions.map(f => s"$innerClassInstance.$f")
--- End diff --
Here, we are returning the function's invocations. Thus, passing arguments
is more straightforward
```
innerClassFunctions.reverse.map(f =>
s"$innerClassInstance.$f($argInvocationString)")
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]