Github user rdblue commented on a diff in the pull request:
https://github.com/apache/spark/pull/22009#discussion_r208384141
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
---
@@ -80,17 +80,17 @@ object DataSourceV2Strategy extends Strategy {
*/
// TODO: nested column pruning.
private def pruneColumns(
- reader: DataSourceReader,
+ configBuilder: ScanConfigBuilder,
relation: DataSourceV2Relation,
exprs: Seq[Expression]): Seq[AttributeReference] = {
- reader match {
+ configBuilder match {
case r: SupportsPushDownRequiredColumns =>
val requiredColumns = AttributeSet(exprs.flatMap(_.references))
val neededOutput = relation.output.filter(requiredColumns.contains)
if (neededOutput != relation.output) {
r.pruneColumns(neededOutput.toStructType)
val nameToAttr =
relation.output.map(_.name).zip(relation.output).toMap
- r.readSchema().toAttributes.map {
+ r.prunedSchema().toAttributes.map {
--- End diff --
As I noted earlier, this shouldn't get the scan's schema until the scan is
fully configured.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]