Repository: spark
Updated Branches:
  refs/heads/master b8e4d567a -> d540dfbff


[SPARK-21273][SQL][FOLLOW-UP] Add missing test cases back and revise code style

## What changes were proposed in this pull request?

Add missing test cases back and revise code style

Follow up the previous PR: https://github.com/apache/spark/pull/18479

## How was this patch tested?

Unit test

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Wang Gengliang <[email protected]>

Closes #18548 from gengliangwang/stat_propagation_revise.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d540dfbf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d540dfbf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d540dfbf

Branch: refs/heads/master
Commit: d540dfbff33aa2f8571e0de149dfa3f4e7321113
Parents: b8e4d56
Author: Wang Gengliang <[email protected]>
Authored: Thu Jul 6 19:12:15 2017 +0800
Committer: Wenchen Fan <[email protected]>
Committed: Thu Jul 6 19:12:15 2017 +0800

----------------------------------------------------------------------
 .../plans/logical/LogicalPlanVisitor.scala      |  2 +-
 .../BasicStatsEstimationSuite.scala             | 45 ++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d540dfbf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
index b230458..2652f6d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -38,10 +38,10 @@ trait LogicalPlanVisitor[T] {
     case p: Range => visitRange(p)
     case p: Repartition => visitRepartition(p)
     case p: RepartitionByExpression => visitRepartitionByExpr(p)
+    case p: ResolvedHint => visitHint(p)
     case p: Sample => visitSample(p)
     case p: ScriptTransformation => visitScriptTransform(p)
     case p: Union => visitUnion(p)
-    case p: ResolvedHint => visitHint(p)
     case p: LogicalPlan => default(p)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d540dfbf/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 5fd21a0..913be6d 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -78,6 +78,37 @@ class BasicStatsEstimationSuite extends PlanTest with 
StatsEstimationTestBase {
     checkStats(globalLimit, stats)
   }
 
+  test("sample estimation") {
+    val sample = Sample(0.0, 0.5, withReplacement = false, (math.random * 
1000).toLong, plan)
+    checkStats(sample, Statistics(sizeInBytes = 60, rowCount = Some(5)))
+
+    // Child doesn't have rowCount in stats
+    val childStats = Statistics(sizeInBytes = 120)
+    val childPlan = DummyLogicalPlan(childStats, childStats)
+    val sample2 =
+      Sample(0.0, 0.11, withReplacement = false, (math.random * 1000).toLong, 
childPlan)
+    checkStats(sample2, Statistics(sizeInBytes = 14))
+  }
+
+  test("estimate statistics when the conf changes") {
+    val expectedDefaultStats =
+      Statistics(
+        sizeInBytes = 40,
+        rowCount = Some(10),
+        attributeStats = AttributeMap(Seq(
+          AttributeReference("c1", IntegerType)() -> ColumnStat(10, Some(1), 
Some(10), 0, 4, 4))))
+    val expectedCboStats =
+      Statistics(
+        sizeInBytes = 4,
+        rowCount = Some(1),
+        attributeStats = AttributeMap(Seq(
+          AttributeReference("c1", IntegerType)() -> ColumnStat(1, Some(5), 
Some(5), 0, 4, 4))))
+
+    val plan = DummyLogicalPlan(defaultStats = expectedDefaultStats, cboStats 
= expectedCboStats)
+    checkStats(
+      plan, expectedStatsCboOn = expectedCboStats, expectedStatsCboOff = 
expectedDefaultStats)
+  }
+
   /** Check estimated stats when cbo is turned on/off. */
   private def checkStats(
       plan: LogicalPlan,
@@ -99,3 +130,17 @@ class BasicStatsEstimationSuite extends PlanTest with 
StatsEstimationTestBase {
   private def checkStats(plan: LogicalPlan, expectedStats: Statistics): Unit =
     checkStats(plan, expectedStats, expectedStats)
 }
+
+/**
+ * This class is used for unit-testing the cbo switch, it mimics a logical 
plan which computes
+ * a simple statistics or a cbo estimated statistics based on the conf.
+ */
+private case class DummyLogicalPlan(
+    defaultStats: Statistics,
+    cboStats: Statistics)
+  extends LeafNode {
+
+  override def output: Seq[Attribute] = Nil
+
+  override def computeStats(): Statistics = if (conf.cboEnabled) cboStats else 
defaultStats
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to