Repository: spark
Updated Branches:
  refs/heads/master bd4eb9ce5 -> 76fb173dd


[SPARK-21751][SQL] CodeGeneraor.splitExpressions counts code size more precisely

## What changes were proposed in this pull request?

Current `CodeGeneraor.splitExpressions` splits statements into methods if the 
total length of statements is more than 1024 characters. The length may include 
comments or empty line.

This PR excludes comment or empty line from the length to reduce the number of 
generated methods in a class, by using 
`CodeFormatter.stripExtraNewLinesAndComments()` method.

## How was this patch tested?

Existing tests

Author: Kazuaki Ishizaki <[email protected]>

Closes #18966 from kiszk/SPARK-21751.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/76fb173d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/76fb173d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/76fb173d

Branch: refs/heads/master
Commit: 76fb173dd639baa9534486488155fc05a71f850e
Parents: bd4eb9c
Author: Kazuaki Ishizaki <[email protected]>
Authored: Tue Oct 10 20:29:02 2017 -0700
Committer: gatorsmile <[email protected]>
Committed: Tue Oct 10 20:29:02 2017 -0700

----------------------------------------------------------------------
 .../expressions/codegen/CodeFormatter.scala     |  8 +++++
 .../expressions/codegen/CodeGenerator.scala     |  5 ++-
 .../codegen/CodeFormatterSuite.scala            | 32 ++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/76fb173d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index 60e600d..7b398f4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -89,6 +89,14 @@ object CodeFormatter {
     }
     new CodeAndComment(code.result().trim(), map)
   }
+
+  def stripExtraNewLinesAndComments(input: String): String = {
+    val commentReg =
+      ("""([ |\t]*?\/\*[\s|\S]*?\*\/[ |\t]*?)|""" +    // strip /*comment*/
+       """([ |\t]*?\/\/[\s\S]*?\n)""").r               // strip //comment
+    val codeWithoutComment = commentReg.replaceAllIn(input, "")
+    codeWithoutComment.replaceAll("""\n\s*\n""", "\n") // strip ExtraNewLines
+  }
 }
 
 private class CodeFormatter {

http://git-wip-us.apache.org/repos/asf/spark/blob/76fb173d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index f9c5ef8..2cb6659 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -772,16 +772,19 @@ class CodegenContext {
       foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): 
String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
+    var length = 0
     for (code <- expressions) {
       // We can't know how many bytecode will be generated, so use the length 
of source code
       // as metric. A method should not go beyond 8K, otherwise it will not be 
JITted, should
       // also not be too small, or it will have many function calls (for wide 
table), see the
       // results in BenchmarkWideTable.
-      if (blockBuilder.length > 1024) {
+      if (length > 1024) {
         blocks += blockBuilder.toString()
         blockBuilder.clear()
+        length = 0
       }
       blockBuilder.append(code)
+      length += CodeFormatter.stripExtraNewLinesAndComments(code).length
     }
     blocks += blockBuilder.toString()
 

http://git-wip-us.apache.org/repos/asf/spark/blob/76fb173d/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
index 9d0a416..a0f1a64 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala
@@ -53,6 +53,38 @@ class CodeFormatterSuite extends SparkFunSuite {
     assert(reducedCode.body === "/*project_c4*/")
   }
 
+  test("removing extra new lines and comments") {
+    val code =
+      """
+        |/*
+        |  * multi
+        |  * line
+        |  * comments
+        |  */
+        |
+        |public function() {
+        |/*comment*/
+        |  /*comment_with_space*/
+        |code_body
+        |//comment
+        |code_body
+        |  //comment_with_space
+        |
+        |code_body
+        |}
+      """.stripMargin
+
+    val reducedCode = CodeFormatter.stripExtraNewLinesAndComments(code)
+    assert(reducedCode ===
+      """
+        |public function() {
+        |code_body
+        |code_body
+        |code_body
+        |}
+      """.stripMargin)
+  }
+
   testCase("basic example") {
     """
       |class A {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to