Repository: spark Updated Branches: refs/heads/master cecd285a2 -> 2a53fbfce
[SPARK-20871][SQL] limit logging of Janino code ## What changes were proposed in this pull request? When the code that is generated is greater than 64k, then Janino compile will fail and CodeGenerator.scala will log the entire code at Error level. SPARK-20871 suggests only logging the code at Debug level. Since, the code is already logged at debug level, this Pull Request proposes not including the formatted code in the Error logging and exception message at all. When an exception occurs, the code will be logged at Info level but truncated if it is more than 1000 lines long. ## How was this patch tested? Existing tests were run. An extra test test case was added to CodeFormatterSuite to test the new maxLines parameter, Author: pj.fanning <[email protected]> Closes #18658 from pjfanning/SPARK-20871. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a53fbfc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a53fbfc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a53fbfc Branch: refs/heads/master Commit: 2a53fbfce72b3faef020e39a1e8628d68bc95beb Parents: cecd285 Author: pj.fanning <[email protected]> Authored: Sun Jul 23 10:38:03 2017 -0700 Committer: gatorsmile <[email protected]> Committed: Sun Jul 23 10:38:03 2017 -0700 ---------------------------------------------------------------------- .../expressions/codegen/CodeFormatter.scala | 10 ++++- .../expressions/codegen/CodeGenerator.scala | 13 ++++--- .../org/apache/spark/sql/internal/SQLConf.scala | 10 +++++ .../codegen/CodeFormatterSuite.scala | 40 +++++++++++++++++--- 4 files changed, 61 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 05b7c96..60e600d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -28,14 +28,20 @@ import java.util.regex.Matcher object CodeFormatter { val commentHolder = """\/\*(.+?)\*\/""".r - def format(code: CodeAndComment): String = { + def format(code: CodeAndComment, maxLines: Int = -1): String = { val formatter = new CodeFormatter - code.body.split("\n").foreach { line => + val lines = code.body.split("\n") + val needToTruncate = maxLines >= 0 && lines.length > maxLines + val filteredLines = if (needToTruncate) lines.take(maxLines) else lines + filteredLines.foreach { line => val commentReplaced = commentHolder.replaceAllIn( line.trim, m => code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0))) formatter.addLine(commentReplaced) } + if (needToTruncate) { + formatter.addLine(s"[truncated to $maxLines lines (total lines is ${lines.length})]") + } formatter.result() } http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 7cf9daf..a014e2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -39,6 +39,7 @@ import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.types._ @@ -1037,12 +1038,10 @@ object CodeGenerator extends Logging { )) evaluator.setExtendedClass(classOf[GeneratedClass]) - lazy val formatted = CodeFormatter.format(code) - logDebug({ // Only add extra debugging info to byte code when we are going to print the source code. evaluator.setDebuggingInformation(true, true, false) - s"\n$formatted" + s"\n${CodeFormatter.format(code)}" }) try { @@ -1050,12 +1049,16 @@ object CodeGenerator extends Logging { recordCompilationStats(evaluator) } catch { case e: JaninoRuntimeException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) + val maxLines = SQLConf.get.loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new JaninoRuntimeException(msg, e) case e: CompileException => - val msg = s"failed to compile: $e\n$formatted" + val msg = s"failed to compile: $e" logError(msg, e) + val maxLines = SQLConf.get.loggingMaxLinesForCodegen + logInfo(s"\n${CodeFormatter.format(code, maxLines)}") throw new CompileException(msg, e.getLocation) } evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass] http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 824908d..a819cdd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -564,6 +564,14 @@ object SQLConf { .intConf .createWithDefault(20) + val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines") + .internal() + .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.") + .intConf + .checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " + + "disable logging or -1 to apply no limit.") + .createWithDefault(1000) + val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files.") .longConf @@ -1004,6 +1012,8 @@ class SQLConf extends Serializable with Logging { def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES) + def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES) + def tableRelationCacheSize: Int = getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE) http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index bc5a8f0..9d0a416 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -20,18 +20,18 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util._ - class CodeFormatterSuite extends SparkFunSuite { - def testCase(name: String)( - input: String, comment: Map[String, String] = Map.empty)(expected: String): Unit = { + def testCase(name: String)(input: String, + comment: Map[String, String] = Map.empty, maxLines: Int = -1)(expected: String): Unit = { test(name) { val sourceCode = new CodeAndComment(input.trim, comment) - if (CodeFormatter.format(sourceCode).trim !== expected.trim) { + if (CodeFormatter.format(sourceCode, maxLines).trim !== expected.trim) { fail( s""" |== FAIL: Formatted code doesn't match === - |${sideBySide(CodeFormatter.format(sourceCode).trim, expected.trim).mkString("\n")} + |${sideBySide(CodeFormatter.format(sourceCode, maxLines).trim, + expected.trim).mkString("\n")} """.stripMargin) } } @@ -129,6 +129,36 @@ class CodeFormatterSuite extends SparkFunSuite { """.stripMargin } + testCase("function calls with maxLines=0") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 0 + ) { + """ + |/* 001 */ [truncated to 0 lines (total lines is 4)] + """.stripMargin + } + + testCase("function calls with maxLines=2") ( + """ + |foo( + |a, + |b, + |c) + """.stripMargin, + maxLines = 2 + ) { + """ + |/* 001 */ foo( + |/* 002 */ a, + |/* 003 */ [truncated to 2 lines (total lines is 4)] + """.stripMargin + } + testCase("single line comments") { """ |// This is a comment about class A { { { ( ( --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
