Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/23035#discussion_r233696401
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
---
@@ -2554,4 +2554,34 @@ class DataFrameSuite extends QueryTest with
SharedSQLContext {
checkAnswer(swappedDf.filter($"key"($"map") > "a"), Row(2, Map(2 ->
"b")))
}
+
+ test("SPARK-26057: attribute deduplication on already analyzed plans") {
+ withTempView("cc", "p", "c") {
+ val df1 = Seq(("1-1", "sp", 6)).toDF("id", "layout", "n")
+ df1.createOrReplaceTempView("cc")
+ val df2 = Seq(("sp", 1)).toDF("layout", "ts")
+ df2.createOrReplaceTempView("p")
+ val df3 = Seq(("1-1", "sp", 3)).toDF("id", "layout", "ts")
+ df3.createOrReplaceTempView("c")
+ spark.sql(
+ """
+ |SELECT cc.id, cc.layout, count(*) as m
+ |FROM cc
+ |JOIN p USING(layout)
+ |WHERE EXISTS(
+ | SELECT 1
+ | FROM c
+ | WHERE c.id = cc.id AND c.layout = cc.layout AND c.ts > p.ts)
+ |GROUP BY cc.id, cc.layout
+ """.stripMargin).createOrReplaceTempView("pcc")
+ val res = spark.sql(
--- End diff --
good catch on the problem! Do you think it's possible to simplify the test?
I think we just need a temp view with subquery, and use it in a join.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]