viirya commented on a change in pull request #31595:
URL: https://github.com/apache/spark/pull/31595#discussion_r581621569
##########
File path: sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
##########
@@ -1013,118 +1014,120 @@ class SubquerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
}
test("SPARK-23957 Remove redundant sort from subquery plan(in subquery)") {
- withTempView("t1", "t2", "t3") {
- Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
- Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
- Seq((1, 1, 1), (2, 2, 2)).toDF("c1", "c2",
"c3").createOrReplaceTempView("t3")
-
- // Simple order by
- val query1 =
- """
- |SELECT c1 FROM t1
- |WHERE
- |c1 IN (SELECT c1 FROM t2 ORDER BY c1)
+ withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
RemoveNoopUnion.ruleName) {
Review comment:
Similar case, `Union` is removed and a check of number of `Sort` is
failed. I think we can add a simple Filter as other test.
##########
File path: sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
##########
@@ -1013,118 +1014,120 @@ class SubquerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
}
test("SPARK-23957 Remove redundant sort from subquery plan(in subquery)") {
- withTempView("t1", "t2", "t3") {
- Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
- Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
- Seq((1, 1, 1), (2, 2, 2)).toDF("c1", "c2",
"c3").createOrReplaceTempView("t3")
-
- // Simple order by
- val query1 =
- """
- |SELECT c1 FROM t1
- |WHERE
- |c1 IN (SELECT c1 FROM t2 ORDER BY c1)
+ withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
RemoveNoopUnion.ruleName) {
+ withTempView("t1", "t2", "t3") {
+ Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
+ Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
+ Seq((1, 1, 1), (2, 2, 2)).toDF("c1", "c2",
"c3").createOrReplaceTempView("t3")
+
+ // Simple order by
+ val query1 =
+ """
+ |SELECT c1 FROM t1
+ |WHERE
+ |c1 IN (SELECT c1 FROM t2 ORDER BY c1)
""".stripMargin
- assert(getNumSortsInQuery(query1) == 0)
+ assert(getNumSortsInQuery(query1) == 0)
- // Nested order bys
- val query2 =
- """
- |SELECT c1
- |FROM t1
- |WHERE c1 IN (SELECT c1
- | FROM (SELECT *
- | FROM t2
- | ORDER BY c2)
- | ORDER BY c1)
+ // Nested order bys
+ val query2 =
+ """
+ |SELECT c1
+ |FROM t1
+ |WHERE c1 IN (SELECT c1
+ | FROM (SELECT *
+ | FROM t2
+ | ORDER BY c2)
+ | ORDER BY c1)
""".stripMargin
- assert(getNumSortsInQuery(query2) == 0)
+ assert(getNumSortsInQuery(query2) == 0)
- // nested IN
- val query3 =
- """
- |SELECT c1
- |FROM t1
- |WHERE c1 IN (SELECT c1
- | FROM t2
- | WHERE c1 IN (SELECT c1
- | FROM t3
- | WHERE c1 = 1
- | ORDER BY c3)
- | ORDER BY c2)
+ // nested IN
+ val query3 =
+ """
+ |SELECT c1
+ |FROM t1
+ |WHERE c1 IN (SELECT c1
+ | FROM t2
+ | WHERE c1 IN (SELECT c1
+ | FROM t3
+ | WHERE c1 = 1
+ | ORDER BY c3)
+ | ORDER BY c2)
""".stripMargin
- assert(getNumSortsInQuery(query3) == 0)
+ assert(getNumSortsInQuery(query3) == 0)
- // Complex subplan and multiple sorts
- val query4 =
- """
- |SELECT c1
- |FROM t1
- |WHERE c1 IN (SELECT c1
- | FROM (SELECT c1, c2, count(*)
- | FROM t2
- | GROUP BY c1, c2
- | HAVING count(*) > 0
- | ORDER BY c2)
- | ORDER BY c1)
+ // Complex subplan and multiple sorts
+ val query4 =
+ """
+ |SELECT c1
+ |FROM t1
+ |WHERE c1 IN (SELECT c1
+ | FROM (SELECT c1, c2, count(*)
+ | FROM t2
+ | GROUP BY c1, c2
+ | HAVING count(*) > 0
+ | ORDER BY c2)
+ | ORDER BY c1)
""".stripMargin
- assert(getNumSortsInQuery(query4) == 0)
+ assert(getNumSortsInQuery(query4) == 0)
- // Join in subplan
- val query5 =
- """
- |SELECT c1 FROM t1
- |WHERE
- |c1 IN (SELECT t2.c1 FROM t2, t3
- | WHERE t2.c1 = t3.c1
- | ORDER BY t2.c1)
+ // Join in subplan
+ val query5 =
+ """
+ |SELECT c1 FROM t1
+ |WHERE
+ |c1 IN (SELECT t2.c1 FROM t2, t3
+ | WHERE t2.c1 = t3.c1
+ | ORDER BY t2.c1)
""".stripMargin
- assert(getNumSortsInQuery(query5) == 0)
+ assert(getNumSortsInQuery(query5) == 0)
- val query6 =
- """
- |SELECT c1
- |FROM t1
- |WHERE (c1, c2) IN (SELECT c1, max(c2)
- | FROM (SELECT c1, c2, count(*)
- | FROM t2
- | GROUP BY c1, c2
- | HAVING count(*) > 0
- | ORDER BY c2)
- | GROUP BY c1
- | HAVING max(c2) > 0
- | ORDER BY c1)
+ val query6 =
+ """
+ |SELECT c1
+ |FROM t1
+ |WHERE (c1, c2) IN (SELECT c1, max(c2)
+ | FROM (SELECT c1, c2, count(*)
+ | FROM t2
+ | GROUP BY c1, c2
+ | HAVING count(*) > 0
+ | ORDER BY c2)
+ | GROUP BY c1
+ | HAVING max(c2) > 0
+ | ORDER BY c1)
""".stripMargin
- assert(getNumSortsInQuery(query6) == 0)
+ assert(getNumSortsInQuery(query6) == 0)
- // Cases when sort is not removed from the plan
- // Limit on top of sort
- val query7 =
+ // Cases when sort is not removed from the plan
+ // Limit on top of sort
+ val query7 =
"""
- |SELECT c1 FROM t1
- |WHERE
- |c1 IN (SELECT c1 FROM t2 ORDER BY c1 limit 1)
+ |SELECT c1 FROM t1
+ |WHERE
+ |c1 IN (SELECT c1 FROM t2 ORDER BY c1 limit 1)
""".stripMargin
- assert(getNumSortsInQuery(query7) == 1)
+ assert(getNumSortsInQuery(query7) == 1)
- // Sort below a set operations (intersect, union)
- val query8 =
- """
- |SELECT c1 FROM t1
- |WHERE
- |c1 IN ((
- | SELECT c1 FROM t2
- | ORDER BY c1
- | )
- | UNION
- | (
- | SELECT c1 FROM t2
- | ORDER BY c1
- | ))
+ // Sort below a set operations (intersect, union)
+ val query8 =
+ """
+ |SELECT c1 FROM t1
+ |WHERE
+ |c1 IN ((
+ | SELECT c1 FROM t2
+ | ORDER BY c1
+ | )
+ | UNION
Review comment:
Sounds good.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]