dongjoon-hyun commented on a change in pull request #24047: [SPARK-25196][SQL] 
Extends Analyze commands for cached tables 
URL: https://github.com/apache/spark/pull/24047#discussion_r265842727
 
 

 ##########
 File path: 
sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
 ##########
 @@ -470,4 +471,34 @@ class StatisticsCollectionSuite extends 
StatisticsCollectionTestBase with Shared
       }
     }
   }
+
+  test("analyzes column statistics in cached query") {
+    withTempView("cachedTempView", "tempView") {
+      spark.sql(
+        """CACHE TABLE cachedTempView AS
+          |  SELECT c0, avg(c1) AS v1, avg(c2) AS v2
+          |  FROM (SELECT id % 3 AS c0, id % 5 AS c1, 2 AS c2 FROM range(1, 
30))
+          |  GROUP BY c0
+        """.stripMargin)
+
+      // Analyzes one column in the cached logical plan
+      spark.sql("ANALYZE TABLE cachedTempView COMPUTE STATISTICS FOR COLUMNS 
v1")
+      val queryStats1 = spark.table("cachedTempView").queryExecution
+        .optimizedPlan.stats.attributeStats
+      assert(queryStats1.map(_._1.name).toSet === Set("v1"))
+
+      // Analyzes two more columns
+      spark.sql("ANALYZE TABLE cachedTempView COMPUTE STATISTICS FOR COLUMNS 
c0, v2")
+      val queryStats2 = spark.table("cachedTempView").queryExecution
+        .optimizedPlan.stats.attributeStats
+      assert(queryStats2.map(_._1.name).toSet === Set("c0", "v1", "v2"))
+
+      // Analyzes in a temporary table
+      spark.sql("CREATE TEMPORARY VIEW tempView AS SELECT * FROM range(1, 30)")
+      val errMsg = intercept[NoSuchTableException] {
+        spark.sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id")
+      }.getMessage
+      assert(errMsg.contains("Table or view 'tempView' not found in database 
'default'"))
+    }
 
 Review comment:
   Also, please add a test coverage on the global temp view.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to