Repository: spark
Updated Branches:
  refs/heads/master cecd285a2 -> 2a53fbfce


[SPARK-20871][SQL] limit logging of Janino code

## What changes were proposed in this pull request?

When the code that is generated is greater than 64k, then Janino compile will 
fail and CodeGenerator.scala will log the entire code at Error level.
SPARK-20871 suggests only logging the code at Debug level.
Since, the code is already logged at debug level, this Pull Request proposes 
not including the formatted code in the Error logging and exception message at 
all.
When an exception occurs, the code will be logged at Info level but truncated 
if it is more than 1000 lines long.

## How was this patch tested?

Existing tests were run.
An extra test test case was added to CodeFormatterSuite to test the new 
maxLines parameter,

Author: pj.fanning <[email protected]>

Closes #18658 from pjfanning/SPARK-20871.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a53fbfc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a53fbfc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a53fbfc

Branch: refs/heads/master
Commit: 2a53fbfce72b3faef020e39a1e8628d68bc95beb
Parents: cecd285
Author: pj.fanning <[email protected]>
Authored: Sun Jul 23 10:38:03 2017 -0700
Committer: gatorsmile <[email protected]>
Committed: Sun Jul 23 10:38:03 2017 -0700

----------------------------------------------------------------------
 .../expressions/codegen/CodeFormatter.scala     | 10 ++++-
 .../expressions/codegen/CodeGenerator.scala     | 13 ++++---
 .../org/apache/spark/sql/internal/SQLConf.scala | 10 +++++
 .../codegen/CodeFormatterSuite.scala            | 40 +++++++++++++++++---
 4 files changed, 61 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index 05b7c96..60e600d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -28,14 +28,20 @@ import java.util.regex.Matcher
 object CodeFormatter {
   val commentHolder = """\/\*(.+?)\*\/""".r
 
-  def format(code: CodeAndComment): String = {
+  def format(code: CodeAndComment, maxLines: Int = -1): String = {
     val formatter = new CodeFormatter
-    code.body.split("\n").foreach { line =>
+    val lines = code.body.split("\n")
+    val needToTruncate = maxLines >= 0 && lines.length > maxLines
+    val filteredLines = if (needToTruncate) lines.take(maxLines) else lines
+    filteredLines.foreach { line =>
       val commentReplaced = commentHolder.replaceAllIn(
         line.trim,
         m => 
code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0)))
       formatter.addLine(commentReplaced)
     }
+    if (needToTruncate) {
+      formatter.addLine(s"[truncated to $maxLines lines (total lines is 
${lines.length})]")
+    }
     formatter.result()
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 7cf9daf..a014e2a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -39,6 +39,7 @@ import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.types._
@@ -1037,12 +1038,10 @@ object CodeGenerator extends Logging {
     ))
     evaluator.setExtendedClass(classOf[GeneratedClass])
 
-    lazy val formatted = CodeFormatter.format(code)
-
     logDebug({
       // Only add extra debugging info to byte code when we are going to print 
the source code.
       evaluator.setDebuggingInformation(true, true, false)
-      s"\n$formatted"
+      s"\n${CodeFormatter.format(code)}"
     })
 
     try {
@@ -1050,12 +1049,16 @@ object CodeGenerator extends Logging {
       recordCompilationStats(evaluator)
     } catch {
       case e: JaninoRuntimeException =>
-        val msg = s"failed to compile: $e\n$formatted"
+        val msg = s"failed to compile: $e"
         logError(msg, e)
+        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
+        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
         throw new JaninoRuntimeException(msg, e)
       case e: CompileException =>
-        val msg = s"failed to compile: $e\n$formatted"
+        val msg = s"failed to compile: $e"
         logError(msg, e)
+        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
+        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
         throw new CompileException(msg, e.getLocation)
     }
     evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]

http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 824908d..a819cdd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -564,6 +564,14 @@ object SQLConf {
     .intConf
     .createWithDefault(20)
 
+  val CODEGEN_LOGGING_MAX_LINES = 
buildConf("spark.sql.codegen.logging.maxLines")
+    .internal()
+    .doc("The maximum number of codegen lines to log when errors occur. Use -1 
for unlimited.")
+    .intConf
+    .checkValue(maxLines => maxLines >= -1, "The maximum must be a positive 
integer, 0 to " +
+      "disable logging or -1 to apply no limit.")
+    .createWithDefault(1000)
+
   val FILES_MAX_PARTITION_BYTES = 
buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when 
reading files.")
     .longConf
@@ -1004,6 +1012,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES)
 
+  def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)
+
   def tableRelationCacheSize: Int =
     getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2a53fbfc/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
index bc5a8f0..9d0a416 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -20,18 +20,18 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util._
 
-
 class CodeFormatterSuite extends SparkFunSuite {
 
-  def testCase(name: String)(
-      input: String, comment: Map[String, String] = Map.empty)(expected: 
String): Unit = {
+  def testCase(name: String)(input: String,
+      comment: Map[String, String] = Map.empty, maxLines: Int = -1)(expected: 
String): Unit = {
     test(name) {
       val sourceCode = new CodeAndComment(input.trim, comment)
-      if (CodeFormatter.format(sourceCode).trim !== expected.trim) {
+      if (CodeFormatter.format(sourceCode, maxLines).trim !== expected.trim) {
         fail(
           s"""
              |== FAIL: Formatted code doesn't match ===
-             |${sideBySide(CodeFormatter.format(sourceCode).trim, 
expected.trim).mkString("\n")}
+             |${sideBySide(CodeFormatter.format(sourceCode, maxLines).trim,
+                 expected.trim).mkString("\n")}
            """.stripMargin)
       }
     }
@@ -129,6 +129,36 @@ class CodeFormatterSuite extends SparkFunSuite {
     """.stripMargin
   }
 
+  testCase("function calls with maxLines=0") (
+    """
+      |foo(
+      |a,
+      |b,
+      |c)
+    """.stripMargin,
+    maxLines = 0
+  ) {
+    """
+      |/* 001 */ [truncated to 0 lines (total lines is 4)]
+    """.stripMargin
+  }
+
+  testCase("function calls with maxLines=2") (
+    """
+      |foo(
+      |a,
+      |b,
+      |c)
+    """.stripMargin,
+    maxLines = 2
+  ) {
+    """
+      |/* 001 */ foo(
+      |/* 002 */   a,
+      |/* 003 */   [truncated to 2 lines (total lines is 4)]
+    """.stripMargin
+  }
+
   testCase("single line comments") {
     """
       |// This is a comment about class A { { { ( (


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to