This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new 579b33b [SPARK-31590][SQL] Metadata-only queries should not include subquery in partition filters 579b33b is described below commit 579b33b5b80ec6d6992793b440f142b73527360b Author: sychen <syc...@ctrip.com> AuthorDate: Wed May 6 10:56:19 2020 +0900 [SPARK-31590][SQL] Metadata-only queries should not include subquery in partition filters ### What changes were proposed in this pull request? Metadata-only queries should not include subquery in partition filters. ### Why are the changes needed? Apply the `OptimizeMetadataOnlyQuery` rule again, will get the exception `Cannot evaluate expression: scalar-subquery`. ### Does this PR introduce any user-facing change? Yes. When `spark.sql.optimizer.metadataOnly` is enabled, it succeeds when the queries include subquery in partition filters. ### How was this patch tested? add UT Closes #28383 from cxzl25/fix_SPARK-31590. Authored-by: sychen <syc...@ctrip.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> (cherry picked from commit 588966d696373c11e963116a0e08ee33c30f0dfb) Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../spark/sql/execution/OptimizeMetadataOnlyQuery.scala | 2 +- .../sql/execution/OptimizeMetadataOnlyQuerySuite.scala | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala index 45e5f41..66e9f69 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala @@ -117,7 +117,7 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic case a: AttributeReference => a.withName(relation.output.find(_.semanticEquals(a)).get.name) } - } + }.filterNot(SubqueryExpression.hasSubquery) child transform { case plan if plan eq relation => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala index a543eb8..63893fc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala @@ -103,6 +103,20 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext { "select partcol2, min(partcol1) from srcpart where partcol1 = 0 group by partcol2", "select max(c1) from (select partcol1 + 1 as c1 from srcpart where partcol1 = 0) t") + testMetadataOnly( + "SPARK-31590 Metadata-only queries should not include subquery in partition filters", + """ + |SELECT partcol1, MAX(partcol2) AS partcol2 + |FROM srcpart + |WHERE partcol1 = ( + | SELECT MAX(partcol1) + | FROM srcpart + |) + |AND partcol2 = 'even' + |GROUP BY partcol1 + |""".stripMargin + ) + testNotMetadataOnly( "Don't optimize metadata only query for non-partition columns", "select col1 from srcpart group by col1", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org