gatorsmile commented on a change in pull request #24580: [SPARK-27674][SQL] the 
hint should not be dropped after cache lookup
URL: https://github.com/apache/spark/pull/24580#discussion_r283440785
 
 

 ##########
 File path: sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
 ##########
 @@ -939,22 +939,39 @@ class CachedTableSuite extends QueryTest with 
SQLTestUtils with SharedSQLContext
   }
 
   test("Cache should respect the broadcast hint") {
-    val df = broadcast(spark.range(1000)).cache()
-    val df2 = spark.range(1000).cache()
-    df.count()
-    df2.count()
-
-    // Test the broadcast hint.
-    val joinPlan = df.join(df2, "id").queryExecution.optimizedPlan
-    val hint = joinPlan.collect {
-      case Join(_, _, _, _, hint) => hint
-    }
-    assert(hint.size == 1)
-    assert(hint(0).leftHint.get.strategy.contains(BROADCAST))
-    assert(hint(0).rightHint.isEmpty)
-
-    // Clean-up
-    df.unpersist()
+    def testHint(df: Dataset[_]): Unit = {
+      val df2 = spark.range(2000).cache()
+      df2.count()
+
+      def checkHintExists(): Unit = {
+        // Test the broadcast hint.
+        val joinPlan = df.join(df2, "id").queryExecution.optimizedPlan
+        val hint = joinPlan.collect {
+          case Join(_, _, _, _, hint) => hint
+        }
+        assert(hint.size == 1)
+        assert(hint(0).leftHint.get.strategy.contains(BROADCAST))
+        assert(hint(0).rightHint.isEmpty)
+      }
+
+      // Make sure the hint does exist when `df` is not cached.
+      checkHintExists()
+
+      df.cache()
+      df.count()
+      // Make sure the hint still exists when `df` is cached.
+      checkHintExists()
+
+      // Clean-up
+      df.unpersist()
 
 Review comment:
   Use try finally?
   ```
   finally {
     df.unpersist()
   }
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to