Repository: spark
Updated Branches:
  refs/heads/master aa6564f37 -> 14cf61e90


[SPARK-16331][SQL] Reduce code generation time

## What changes were proposed in this pull request?
During the code generation, a `LocalRelation` often has a huge `Vector` object 
as `data`. In the simple example below, a `LocalRelation` has a Vector with 
1000000 elements of `UnsafeRow`.

```
val numRows = 1000000
val ds = (1 to numRows).toDS().persist()
benchmark.addCase("filter+reduce") { iter =>
  ds.filter(a => (a & 1) == 0).reduce(_ + _)
}
```

At `TreeNode.transformChildren`, all elements of the vector is unnecessarily 
iterated to check whether any children exist in the vector since `Vector` is 
Traversable. This part significantly increases code generation time.

This patch avoids this overhead by checking the number of children before 
iterating all elements; `LocalRelation` does not have children since it extends 
`LeafNode`.

The performance of the above example
```
without this patch
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
Intel(R) Core(TM) i5-5257U CPU  2.70GHz
compilationTime:                         Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
------------------------------------------------------------------------------------------------
filter+reduce                                 4426 / 4533          0.2        
4426.0       1.0X

with this patch
compilationTime:                         Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
------------------------------------------------------------------------------------------------
filter+reduce                                 3117 / 3391          0.3        
3116.6       1.0X
```

## How was this patch tested?

using existing unit tests

Author: Hiroshi Inoue <inoue...@jp.ibm.com>

Closes #14000 from inouehrs/compilation-time-reduction.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/14cf61e9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/14cf61e9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/14cf61e9

Branch: refs/heads/master
Commit: 14cf61e909598d9f6b9c3b920de7299e9bc828e0
Parents: aa6564f
Author: Hiroshi Inoue <inoue...@jp.ibm.com>
Authored: Thu Jun 30 21:47:44 2016 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Thu Jun 30 21:47:44 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/trees/TreeNode.scala     | 82 ++++++++++----------
 1 file changed, 43 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/14cf61e9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 072445a..8bce404 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -315,25 +315,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
   protected def transformChildren(
       rule: PartialFunction[BaseType, BaseType],
       nextOperation: (BaseType, PartialFunction[BaseType, BaseType]) => 
BaseType): BaseType = {
-    var changed = false
-    val newArgs = mapProductIterator {
-      case arg: TreeNode[_] if containsChild(arg) =>
-        val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
-        if (!(newChild fastEquals arg)) {
-          changed = true
-          newChild
-        } else {
-          arg
-        }
-      case Some(arg: TreeNode[_]) if containsChild(arg) =>
-        val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
-        if (!(newChild fastEquals arg)) {
-          changed = true
-          Some(newChild)
-        } else {
-          Some(arg)
-        }
-      case m: Map[_, _] => m.mapValues {
+    if (children.nonEmpty) {
+      var changed = false
+      val newArgs = mapProductIterator {
         case arg: TreeNode[_] if containsChild(arg) =>
           val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
           if (!(newChild fastEquals arg)) {
@@ -342,33 +326,53 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
           } else {
             arg
           }
-        case other => other
-      }.view.force // `mapValues` is lazy and we need to force it to 
materialize
-      case d: DataType => d // Avoid unpacking Structs
-      case args: Traversable[_] => args.map {
-        case arg: TreeNode[_] if containsChild(arg) =>
+        case Some(arg: TreeNode[_]) if containsChild(arg) =>
           val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
           if (!(newChild fastEquals arg)) {
             changed = true
-            newChild
+            Some(newChild)
           } else {
-            arg
+            Some(arg)
           }
-        case tuple @ (arg1: TreeNode[_], arg2: TreeNode[_]) =>
-          val newChild1 = nextOperation(arg1.asInstanceOf[BaseType], rule)
-          val newChild2 = nextOperation(arg2.asInstanceOf[BaseType], rule)
-          if (!(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) {
-            changed = true
-            (newChild1, newChild2)
-          } else {
-            tuple
-          }
-        case other => other
+        case m: Map[_, _] => m.mapValues {
+          case arg: TreeNode[_] if containsChild(arg) =>
+            val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
+            if (!(newChild fastEquals arg)) {
+              changed = true
+              newChild
+            } else {
+              arg
+            }
+          case other => other
+        }.view.force // `mapValues` is lazy and we need to force it to 
materialize
+        case d: DataType => d // Avoid unpacking Structs
+        case args: Traversable[_] => args.map {
+          case arg: TreeNode[_] if containsChild(arg) =>
+            val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
+            if (!(newChild fastEquals arg)) {
+              changed = true
+              newChild
+            } else {
+              arg
+            }
+          case tuple@(arg1: TreeNode[_], arg2: TreeNode[_]) =>
+            val newChild1 = nextOperation(arg1.asInstanceOf[BaseType], rule)
+            val newChild2 = nextOperation(arg2.asInstanceOf[BaseType], rule)
+            if (!(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) {
+              changed = true
+              (newChild1, newChild2)
+            } else {
+              tuple
+            }
+          case other => other
+        }
+        case nonChild: AnyRef => nonChild
+        case null => null
       }
-      case nonChild: AnyRef => nonChild
-      case null => null
+      if (changed) makeCopy(newArgs) else this
+    } else {
+      this
     }
-    if (changed) makeCopy(newArgs) else this
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to