spark git commit: Revert "[SPARK-23195][SQL] Keep the Hint of Cached Data"

lixiao Tue, 23 Jan 2018 22:40:58 -0800

Repository: spark
Updated Branches:
  refs/heads/branch-2.3 9cfe90e5a -> d656be74b



Revert "[SPARK-23195][SQL] Keep the Hint of Cached Data"

This reverts commit a23f6b13e8a4f0471ee33879a14746786bbf0435.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d656be74
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d656be74
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d656be74

Branch: refs/heads/branch-2.3
Commit: d656be74b87746efc020d5cae3bfa294f8f98594
Parents: 9cfe90e
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Tue Jan 23 22:39:31 2018 -0800
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Tue Jan 23 22:39:31 2018 -0800

----------------------------------------------------------------------
 .../sql/execution/columnar/InMemoryRelation.scala   |  4 ++--
 .../sql/execution/joins/BroadcastJoinSuite.scala    | 16 ----------------
 2 files changed, 2 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d656be74/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 5945808..51928d9 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -63,7 +63,7 @@ case class InMemoryRelation(
     tableName: Option[String])(
     @transient var _cachedColumnBuffers: RDD[CachedBatch] = null,
     val batchStats: LongAccumulator = 
child.sqlContext.sparkContext.longAccumulator,
-    statsOfPlanToCache: Statistics)
+    statsOfPlanToCache: Statistics = null)
   extends logical.LeafNode with MultiInstanceRelation {
 
   override protected def innerChildren: Seq[SparkPlan] = Seq(child)
@@ -77,7 +77,7 @@ case class InMemoryRelation(
       // Underlying columnar RDD hasn't been materialized, use the stats from 
the plan to cache
       statsOfPlanToCache
     } else {
-      Statistics(sizeInBytes = batchStats.value.longValue, hints = 
statsOfPlanToCache.hints)
+      Statistics(sizeInBytes = batchStats.value.longValue)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d656be74/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 889cab0..1704bc8 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -139,22 +139,6 @@ class BroadcastJoinSuite extends QueryTest with 
SQLTestUtils {
     }
   }
 
-  test("broadcast hint is retained in a cached plan") {
-    Seq(true, false).foreach { materialized =>
-      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
-        val df1 = spark.createDataFrame(Seq((1, "4"), (2, "2"))).toDF("key", 
"value")
-        val df2 = spark.createDataFrame(Seq((1, "1"), (2, "2"))).toDF("key", 
"value")
-        broadcast(df2).cache()
-        if (materialized) df2.collect()
-        val df3 = df1.join(df2, Seq("key"), "inner")
-        val numBroadCastHashJoin = df3.queryExecution.executedPlan.collect {
-          case b: BroadcastHashJoinExec => b
-        }.size
-        assert(numBroadCastHashJoin === 1)
-      }
-    }
-  }
-
   private def assertBroadcastJoin(df : Dataset[Row]) : Unit = {
     val df1 = spark.createDataFrame(Seq((1, "4"), (2, "2"))).toDF("key", 
"value")
     val joined = df1.join(df, Seq("key"), "inner")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Revert "[SPARK-23195][SQL] Keep the Hint of Cached Data"

Reply via email to