Repository: spark Updated Branches: refs/heads/master b81e3031f -> 3c614d056
[SPARK-25113][SQL] Add logging to CodeGenerator when any generated method's bytecode size goes above HugeMethodLimit ## What changes were proposed in this pull request? Add logging for all generated methods from the `CodeGenerator` whose bytecode size goes above 8000 bytes. This is to help with gathering stats on how often Spark is generating methods too big to be JIT'd. It covers all codegen scenarios, include whole-stage codegen and also individual expression codegen, e.g. unsafe projection, mutable projection, etc. ## How was this patch tested? Manually tested that logging did happen when generated method was above 8000 bytes. Also added a new unit test case to `CodeGenerationSuite` to verify that the logging did happen. Author: Kris Mok <kris....@databricks.com> Closes #22103 from rednaxelafx/codegen-8k-logging. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3c614d05 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3c614d05 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3c614d05 Branch: refs/heads/master Commit: 3c614d0565a9652a12970dcdf8545432a4ac6f68 Parents: b81e303 Author: Kris Mok <kris....@databricks.com> Authored: Tue Aug 14 16:40:00 2018 -0700 Committer: Xiao Li <gatorsm...@gmail.com> Committed: Tue Aug 14 16:40:00 2018 -0700 ---------------------------------------------------------------------- .../expressions/codegen/CodeGenerator.scala | 11 +++- .../expressions/CodeGenerationSuite.scala | 64 ++++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3c614d05/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 4b30de5..e2d74da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1258,7 +1258,8 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin object CodeGenerator extends Logging { - // This is the value of HugeMethodLimit in the OpenJDK JVM settings + // This is the default value of HugeMethodLimit in the OpenJDK HotSpot JVM, + // beyond which methods will be rejected from JIT compilation final val DEFAULT_JVM_HUGE_METHOD_LIMIT = 8000 // The max valid length of method parameters in JVM. @@ -1385,9 +1386,15 @@ object CodeGenerator extends Logging { try { val cf = new ClassFile(new ByteArrayInputStream(classBytes)) val stats = cf.methodInfos.asScala.flatMap { method => - method.getAttributes().filter(_.getClass.getName == codeAttr.getName).map { a => + method.getAttributes().filter(_.getClass eq codeAttr).map { a => val byteCodeSize = codeAttrField.get(a).asInstanceOf[Array[Byte]].length CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(byteCodeSize) + + if (byteCodeSize > DEFAULT_JVM_HUGE_METHOD_LIMIT) { + logInfo("Generated method too long to be JIT compiled: " + + s"${cf.getThisClassName}.${method.getName} is $byteCodeSize bytes") + } + byteCodeSize } } http://git-wip-us.apache.org/repos/asf/spark/blob/3c614d05/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 5b71bec..c383eec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -19,12 +19,16 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp +import org.apache.log4j.{Appender, AppenderSkeleton, Logger} +import org.apache.log4j.spi.LoggingEvent + import org.apache.spark.SparkFunSuite import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils} import org.apache.spark.sql.types._ @@ -499,4 +503,64 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { ctx.freshName("a_1") :: ctx.freshName("a_0") :: Nil assert(names2.distinct.length == 4) } + + test("SPARK-25113: should log when there exists generated methods above HugeMethodLimit") { + class MockAppender extends AppenderSkeleton { + var seenMessage = false + + override def append(loggingEvent: LoggingEvent): Unit = { + if (loggingEvent.getRenderedMessage().contains("Generated method too long")) { + seenMessage = true + } + } + + override def close(): Unit = {} + override def requiresLayout(): Boolean = false + } + + val appender = new MockAppender() + withLogAppender(appender) { + val x = 42 + val expr = HugeCodeIntExpression(x) + val proj = GenerateUnsafeProjection.generate(Seq(expr)) + val actual = proj(null) + assert(actual.getInt(0) == x) + } + assert(appender.seenMessage) + } + + private def withLogAppender(appender: Appender)(f: => Unit): Unit = { + val logger = + Logger.getLogger(classOf[CodeGenerator[_, _]].getName) + logger.addAppender(appender) + try f finally { + logger.removeAppender(appender) + } + } +} + +case class HugeCodeIntExpression(value: Int) extends Expression { + override def nullable: Boolean = true + override def dataType: DataType = IntegerType + override def children: Seq[Expression] = Nil + override def eval(input: InternalRow): Any = value + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + // Assuming HugeMethodLimit to be 8000 + val HugeMethodLimit = CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT + // A single "int dummyN = 0;" will be at least 2 bytes of bytecode: + // 0: iconst_0 + // 1: istore_1 + // and it'll become bigger as the number of local variables increases. + // So 4000 such dummy local variable definitions are sufficient to bump the bytecode size + // of a generated method to above 8000 bytes. + val hugeCode = (0 until (HugeMethodLimit / 2)).map(i => s"int dummy$i = 0;").mkString("\n") + val code = + code"""{ + | $hugeCode + |} + |boolean ${ev.isNull} = false; + |int ${ev.value} = $value; + """.stripMargin + ev.copy(code = code) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org