Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/19480#discussion_r147234320 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala --- @@ -801,10 +831,84 @@ class CodegenContext { | ${makeSplitFunction(body)} |} """.stripMargin - addNewFunction(name, code) + addNewFunctionInternal(name, code, inlineToOuterClass = false) } - foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})")) + // Here we store all the methods which have been added to the outer class. + val outerClassFunctions = functions + .filter(_.innerClassName.isEmpty) + .map(_.functionName) + + val innerClassFunctions = generateInnerClassesMethodsCalls( + functions.filter(_.innerClassName.nonEmpty), + func, + arguments, + returnType, + makeSplitFunction, + foldFunctions) + + val argsString = arguments.map(_._2).mkString(", ") + foldFunctions((outerClassFunctions ++ innerClassFunctions).map( + name => s"$name($argsString)")) + } + } + + /** + * Here we handle all the methods which have been added to the inner classes and + * not to the outer class. + * Since they can be many, their direct invocation in the outer class adds many entries + * to the outer class' constant pool. This can cause the constant pool to past JVM limit. + * Moreover, this can cause also the outer class method where all the invocations are + * performed to grow beyond the 64k limit. + * To avoid these problems, we group them and we call only the grouping methods in the + * outer class. + * + * @param functions a [[Seq]] of [[NewFunctionSpec]] defined in the inner classes + * @param funcName the split function name base. + * @param arguments the list of (type, name) of the arguments of the split function. + * @param returnType the return type of the split function. + * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup. + * @param foldFunctions folds the split function calls. + * @return an [[Iterable]] containing the methods' invocations + */ + private def generateInnerClassesMethodsCalls( + functions: Seq[NewFunctionSpec], + funcName: String, + arguments: Seq[(String, String)], + returnType: String, + makeSplitFunction: String => String, + foldFunctions: Seq[String] => String): Iterable[String] = { + val innerClassToFunctions = mutable.ListMap.empty[(String, String), Seq[String]] + functions.foreach(f => { + val key = (f.innerClassName.get, f.innerClassInstance.get) + innerClassToFunctions.update(key, f.functionName +: + innerClassToFunctions.getOrElse(key, Seq.empty[String])) + }) + // for performance reasons, the functions are prepended, instead of appended, + // thus they are in reversed order + innerClassToFunctions.transform { case (_, functions) => functions.reverse } + + val argDefinitionString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ") + val argInvocationString = arguments.map(_._2).mkString(", ") + + innerClassToFunctions.flatMap { + case ((innerClassName, innerClassInstance), innerClassFunctions) => + if (innerClassFunctions.size > CodeGenerator.MERGE_SPLIT_METHODS_THRESHOLD) { + // Adding a new function to each inner class which contains + // the invocation of all the ones which have been added to + // that inner class + val body = foldFunctions(innerClassFunctions.map(name => + s"$name($argInvocationString)")) + val code = s""" + |private $returnType $funcName($argDefinitionString) { + | ${makeSplitFunction(body)} + |} + """.stripMargin + addNewFunctionToClass(funcName, code, innerClassName) + Seq(s"$innerClassInstance.$funcName") + } else { + innerClassFunctions.map(f => s"$innerClassInstance.$f") --- End diff -- Here, we are returning the function's invocations. Thus, passing arguments is more straightforward ``` innerClassFunctions.reverse.map(f => s"$innerClassInstance.$f($argInvocationString)") ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org