Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19480#discussion_r147443947
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
---
@@ -801,10 +831,82 @@ class CodegenContext {
| ${makeSplitFunction(body)}
|}
""".stripMargin
- addNewFunction(name, code)
+ addNewFunctionInternal(name, code, inlineToOuterClass = false)
}
- foldFunctions(functions.map(name =>
s"$name(${arguments.map(_._2).mkString(", ")})"))
+ val (outerClassFunctions, innerClassFunctions) =
functions.partition(_.innerClassName.isEmpty)
+
+ val argsString = arguments.map(_._2).mkString(", ")
+ val outerClassFunctionCalls = outerClassFunctions.map(f =>
s"${f.functionName}($argsString)")
+
+ val innerClassFunctionCalls = generateInnerClassesFunctionCalls(
+ innerClassFunctions,
+ func,
+ arguments,
+ returnType,
+ makeSplitFunction,
+ foldFunctions)
+
+ foldFunctions(outerClassFunctionCalls ++ innerClassFunctionCalls)
+ }
+ }
+
+ /**
+ * Here we handle all the methods which have been added to the inner
classes and
+ * not to the outer class.
+ * Since they can be many, their direct invocation in the outer class
adds many entries
+ * to the outer class' constant pool. This can cause the constant pool
to past JVM limit.
+ * Moreover, this can cause also the outer class method where all the
invocations are
+ * performed to grow beyond the 64k limit.
+ * To avoid these problems, we group them and we call only the grouping
methods in the
+ * outer class.
+ *
+ * @param functions a [[Seq]] of [[NewFunctionSpec]] defined in the
inner classes
+ * @param funcName the split function name base.
+ * @param arguments the list of (type, name) of the arguments of the
split function.
+ * @param returnType the return type of the split function.
+ * @param makeSplitFunction makes split function body, e.g. add
preparation or cleanup.
+ * @param foldFunctions folds the split function calls.
+ * @return an [[Iterable]] containing the methods' invocations
+ */
+ private def generateInnerClassesFunctionCalls(
+ functions: Seq[NewFunctionSpec],
+ funcName: String,
+ arguments: Seq[(String, String)],
+ returnType: String,
+ makeSplitFunction: String => String,
+ foldFunctions: Seq[String] => String): Iterable[String] = {
+ val innerClassToFunctions = mutable.LinkedHashMap.empty[(String,
String), Seq[String]]
+ functions.foreach(f => {
+ val key = (f.innerClassName.get, f.innerClassInstance.get)
+ innerClassToFunctions.update(key, f.functionName +:
+ innerClassToFunctions.getOrElse(key, Seq.empty[String]))
+ })
--- End diff --
```Scala
functions.foreach { f =>
val key = (f.innerClassName.get, f.innerClassInstance.get)
val value = f.functionName +: innerClassToFunctions.getOrElse(key,
Seq.empty[String])
innerClassToFunctions.put(key, value)
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]