This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0518f3b0b6e [SPARK-43383][SQL] Add `rowCount` statistics to
LocalRelation
0518f3b0b6e is described below
commit 0518f3b0b6ede3a6b234dbc94b71c51ba830c960
Author: Yikf <[email protected]>
AuthorDate: Tue May 9 08:26:15 2023 -0700
[SPARK-43383][SQL] Add `rowCount` statistics to LocalRelation
### What changes were proposed in this pull request?
This PR aims to add statistics rowCount for `LocalRelation`
### Why are the changes needed?
Previously, statistics in `LocalRelation` were missing rowCount.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
new UT
Closes #41064 from Yikf/LocalRelation.
Authored-by: Yikf <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../apache/spark/sql/catalyst/plans/logical/LocalRelation.scala | 3 ++-
.../sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala | 5 ++---
.../scala/org/apache/spark/sql/StatisticsCollectionSuite.scala | 9 +++++++++
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
index e99f433fa6a..5cc9615bc0e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
@@ -79,7 +79,8 @@ case class LocalRelation(
}
override def computeStats(): Statistics =
- Statistics(sizeInBytes = EstimationUtils.getSizePerRow(output) *
data.length)
+ Statistics(sizeInBytes = EstimationUtils.getSizePerRow(output) *
data.length,
+ rowCount = Some(data.size))
def toSQL(inlineTableName: String): String = {
require(data.nonEmpty)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
index 939d5e07870..2a0147ede7e 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
@@ -228,17 +228,16 @@ class JoinReorderSuite extends JoinReorderPlanTestBase
with StatsEstimationTestB
}
test("SPARK-26352: join reordering should not change the order of
attributes") {
- // This test case does not rely on CBO.
// It's similar to the test case above, but catches a reordering bug that
the one above doesn't
val tab1 = LocalRelation($"x".int, $"y".int)
val tab2 = LocalRelation($"i".int, $"j".int)
val tab3 = LocalRelation($"a".int, $"b".int)
val original =
tab1.join(tab2, Cross)
- .join(tab3, Inner, Some($"a" === $"x" && $"b" === $"i"))
+ .join(tab3, Inner, Some($"a" === $"x"))
val expected =
tab1.join(tab3, Inner, Some($"a" === $"x"))
- .join(tab2, Cross, Some($"b" === $"i"))
+ .join(tab2, Cross)
.select(outputsOf(tab1, tab2, tab3): _*)
assertEqualJoinPlans(Optimize, original, expected)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index e6b74a328e5..0a7e1bc9488 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -849,4 +849,13 @@ class StatisticsCollectionSuite extends
StatisticsCollectionTestBase with Shared
errorClass = "SCHEMA_NOT_FOUND",
parameters = Map("schemaName" -> "`db_not_exists`"))
}
+
+ test("SPARK-43383: Add rowCount statistics to LocalRelation") {
+ val optimizedPlan = spark.sql("select * from
values(1),(2),(3),(4),(5),(6)")
+ .queryExecution.optimizedPlan
+ assert(optimizedPlan.isInstanceOf[LocalRelation])
+
+ val stats = optimizedPlan.stats
+ assert(stats.rowCount.isDefined && stats.rowCount.get == 6)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]