Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/20485#discussion_r166436103
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownOperatorsToDataSource.scala
---
@@ -81,33 +81,44 @@ object PushDownOperatorsToDataSource extends
Rule[LogicalPlan] with PredicateHel
// TODO: add more push down rules.
- pushDownRequiredColumns(filterPushed, filterPushed.outputSet)
+ val columnPruned = pushDownRequiredColumns(filterPushed,
filterPushed.outputSet)
// After column pruning, we may have redundant PROJECT nodes in the
query plan, remove them.
- RemoveRedundantProject(filterPushed)
+ RemoveRedundantProject(columnPruned)
}
// TODO: nested fields pruning
- private def pushDownRequiredColumns(plan: LogicalPlan, requiredByParent:
AttributeSet): Unit = {
+ private def pushDownRequiredColumns(
+ plan: LogicalPlan, requiredByParent: AttributeSet): LogicalPlan = {
plan match {
- case Project(projectList, child) =>
+ case p @ Project(projectList, child) =>
val required = projectList.flatMap(_.references)
- pushDownRequiredColumns(child, AttributeSet(required))
+ p.copy(child = pushDownRequiredColumns(child,
AttributeSet(required)))
- case Filter(condition, child) =>
+ case f @ Filter(condition, child) =>
val required = requiredByParent ++ condition.references
- pushDownRequiredColumns(child, required)
+ f.copy(child = pushDownRequiredColumns(child, required))
case relation: DataSourceV2Relation => relation.reader match {
case reader: SupportsPushDownRequiredColumns =>
+ // TODO: Enable the below assert after we make
`DataSourceV2Relation` immutable. Fow now
--- End diff --
Typo: Fow
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]