wangyum commented on a change in pull request #27632: [WIP][SPARK-30872][SQL]
Constraints inferred from inferred attributes
URL: https://github.com/apache/spark/pull/27632#discussion_r381888828
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
##########
@@ -322,4 +322,13 @@ class DataFrameJoinSuite extends QueryTest
}
}
}
+
+ test("SPARK-30872: Constraints inferred from inferred attributes") {
+ withTable("t1") {
+ spark.range(20).selectExpr("id as a", "id as b", "id as
c").write.saveAsTable("t1")
+ checkAnswer(
+ spark.sql("select count(*) from t1 where a = b and b = c and (c = 3 or
c = 13)"),
+ Row(2) :: Nil)
+ }
+ }
Review comment:
It will throw `TreeNodeException: Once strategy's idempotence is broken for
batch Infer Filters` before this PR:
```java
[info] - SPARK-30872: Constraints inferred from inferred attributes ***
FAILED *** (222 milliseconds)
[info] org.apache.spark.sql.catalyst.errors.package$TreeNodeException:
Once strategy's idempotence is broken for batch Infer Filters
[info] Aggregate [count(1) AS count(1)#5842L]
Aggregate [count(1) AS count(1)#5842L]
[info] +- Project
+- Project
[info] ! +- Filter ((((((a#5839L = c#5841L) AND isnotnull(b#5840L)) AND
isnotnull(c#5841L)) AND ((b#5840L = 3) OR (b#5840L = 13))) AND
isnotnull(a#5839L)) AND (((a#5839L = b#5840L) AND (b#5840L = c#5841L)) AND
((c#5841L = 3) OR (c#5841L = 13)))) +- Filter (((a#5839L = 3) OR (a#5839L
= 13)) AND ((((((a#5839L = c#5841L) AND isnotnull(b#5840L)) AND
isnotnull(c#5841L)) AND ((b#5840L = 3) OR (b#5840L = 13))) AND
isnotnull(a#5839L)) AND (((a#5839L = b#5840L) AND (b#5840L = c#5841L)) AND
((c#5841L = 3) OR (c#5841L = 13)))))
[info] +- Relation[a#5839L,b#5840L,c#5841L] parquet
+- Relation[a#5839L,b#5840L,c#5841L] parquet
[info] , tree:
[info] Aggregate [count(1) AS count(1)#5842L]
[info] +- Project
[info] +- Filter (((a#5839L = 3) OR (a#5839L = 13)) AND ((((((a#5839L =
c#5841L) AND isnotnull(b#5840L)) AND isnotnull(c#5841L)) AND ((b#5840L = 3) OR
(b#5840L = 13))) AND isnotnull(a#5839L)) AND (((a#5839L = b#5840L) AND (b#5840L
= c#5841L)) AND ((c#5841L = 3) OR (c#5841L = 13)))))
[info] +- Relation[a#5839L,b#5840L,c#5841L] parquet
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.checkBatchIdempotence(RuleExecutor.scala:100)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:187)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:132)
[info] at scala.collection.immutable.List.foreach(List.scala:392)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:132)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:111)
[info] at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:111)
[info] at
org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:82)
[info] at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
[info] at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:119)
[info] at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:762)
[info] at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:119)
[info] at
org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:82)
[info] at
org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:79)
[info] at
org.apache.spark.sql.QueryTest.assertEmptyMissingInput(QueryTest.scala:231)
[info] at org.apache.spark.sql.QueryTest.checkAnswer(QueryTest.scala:154)
[info] at
org.apache.spark.sql.DataFrameJoinSuite.$anonfun$new$51(DataFrameJoinSuite.scala:332)
[info] at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
[info] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
[info] at
org.apache.spark.sql.test.SQLTestUtilsBase.withTable(SQLTestUtils.scala:290)
[info] at
org.apache.spark.sql.test.SQLTestUtilsBase.withTable$(SQLTestUtils.scala:288)
[info] at
org.apache.spark.sql.DataFrameJoinSuite.withTable(DataFrameJoinSuite.scala:29)
[info] at
org.apache.spark.sql.DataFrameJoinSuite.$anonfun$new$50(DataFrameJoinSuite.scala:328)
[info] at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
[info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info] at org.scalatest.Transformer.apply(Transformer.scala:22)
[info] at org.scalatest.Transformer.apply(Transformer.scala:20)
[info] at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
[info] at
org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:151)
[info] at
org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184)
[info] at
org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196)
[info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:286)
[info] at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196)
[info] at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178)
[info] at
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:58)
[info] at
org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:221)
[info] at
org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:214)
[info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:58)
[info] at
org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229)
[info] at
org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:393)
[info] at scala.collection.immutable.List.foreach(List.scala:392)
[info] at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:381)
[info] at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:376)
[info] at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:458)
[info] at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229)
[info] at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228)
[info] at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
[info] at org.scalatest.Suite.run(Suite.scala:1124)
[info] at org.scalatest.Suite.run$(Suite.scala:1106)
[info] at
org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
[info] at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233)
[info] at org.scalatest.SuperEngine.runImpl(Engine.scala:518)
[info] at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233)
[info] at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232)
[info] at
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:58)
[info] at
org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
[info] at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
[info] at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
[info] at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:58)
[info] at
org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:317)
[info] at
org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:510)
[info] at sbt.ForkMain$Run$2.call(ForkMain.java:296)
[info] at sbt.ForkMain$Run$2.call(ForkMain.java:286)
[info] at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[info] at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[info] at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[info] at java.lang.Thread.run(Thread.java:748)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]