Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19757#discussion_r199550269
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
---
@@ -366,10 +366,16 @@ case class CatalogStatistics(
* Convert [[CatalogStatistics]] to [[Statistics]], and match column
stats to attributes based
* on column names.
*/
- def toPlanStats(planOutput: Seq[Attribute]): Statistics = {
- val matched = planOutput.flatMap(a => colStats.get(a.name).map(a -> _))
- Statistics(sizeInBytes = sizeInBytes, rowCount = rowCount,
- attributeStats = AttributeMap(matched))
+ def toPlanStats(planOutput: Seq[Attribute], cboEnabled: Boolean):
Statistics = {
+ if (cboEnabled) {
+ val attrStats = planOutput.flatMap(a => colStats.get(a.name).map(a
-> _))
+ Statistics(sizeInBytes = sizeInBytes, rowCount = rowCount,
+ attributeStats = AttributeMap(attrStats))
+ } else {
+ // When CBO is disabled, we apply the size-only estimation strategy,
so there's no need to
+ // propagate other statistics from catalog to the plan.
+ Statistics(sizeInBytes = sizeInBytes)
--- End diff --
If `rowCount` is available, why we ignore them?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]