[spark] branch branch-3.0 updated: [SPARK-31956][SQL] Do not fail if there is no ambiguous self join
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 62fbff8 [SPARK-31956][SQL] Do not fail if there is no ambiguous self join 62fbff8 is described below commit 62fbff8ad127f3a6dd2360f3c02a20f4391cdad4 Author: Wenchen Fan AuthorDate: Wed Jun 10 13:11:24 2020 -0700 [SPARK-31956][SQL] Do not fail if there is no ambiguous self join ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/28695 , to fix the problem completely. The root cause is that, `df("col").as("name")` is not a column reference anymore, and should not have the special column metadata. However, this was broken in https://github.com/apache/spark/commit/ba7adc494923de8104ab37d412edd78afe540f45#diff-ac415c903887e49486ba542a65eec980L1050-L1053 This PR fixes the regression, by strip the special column metadata in `Column.name`, which is the behavior before https://github.com/apache/spark/pull/28326 . ### Why are the changes needed? Fix a regression. We shouldn't fail if there is no ambiguous self-join. ### Does this PR introduce _any_ user-facing change? Yes, the query in the test can run now. ### How was this patch tested? updated test Closes #28783 from cloud-fan/self-join. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun (cherry picked from commit c40051932290db3a63f80324900a116019b1e589) Signed-off-by: Dongjoon Hyun --- sql/core/src/main/scala/org/apache/spark/sql/Column.scala | 2 +- .../test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala | 7 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 2144472..e6f7b1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1042,7 +1042,7 @@ class Column(val expr: Expression) extends Logging { * @since 2.0.0 */ def name(alias: String): Column = withExpr { -Alias(expr, alias)() +Alias(normalizedExpr(), alias)() } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala index fb58c98..3b3b54f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala @@ -204,7 +204,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { } } - test("SPARK-28344: don't fail as ambiguous self join when there is no join") { + test("SPARK-28344: don't fail if there is no ambiguous self join") { withSQLConf( SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true") { val df = Seq(1, 1, 2, 2).toDF("a") @@ -212,6 +212,11 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { checkAnswer( df.select(df("a").alias("x"), sum(df("a")).over(w)), Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple)) + + val joined = df.join(spark.range(1)).select($"a") + checkAnswer( +joined.select(joined("a").alias("x"), sum(joined("a")).over(w)), +Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple)) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.0 updated: [SPARK-31956][SQL] Do not fail if there is no ambiguous self join
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 62fbff8 [SPARK-31956][SQL] Do not fail if there is no ambiguous self join 62fbff8 is described below commit 62fbff8ad127f3a6dd2360f3c02a20f4391cdad4 Author: Wenchen Fan AuthorDate: Wed Jun 10 13:11:24 2020 -0700 [SPARK-31956][SQL] Do not fail if there is no ambiguous self join ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/28695 , to fix the problem completely. The root cause is that, `df("col").as("name")` is not a column reference anymore, and should not have the special column metadata. However, this was broken in https://github.com/apache/spark/commit/ba7adc494923de8104ab37d412edd78afe540f45#diff-ac415c903887e49486ba542a65eec980L1050-L1053 This PR fixes the regression, by strip the special column metadata in `Column.name`, which is the behavior before https://github.com/apache/spark/pull/28326 . ### Why are the changes needed? Fix a regression. We shouldn't fail if there is no ambiguous self-join. ### Does this PR introduce _any_ user-facing change? Yes, the query in the test can run now. ### How was this patch tested? updated test Closes #28783 from cloud-fan/self-join. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun (cherry picked from commit c40051932290db3a63f80324900a116019b1e589) Signed-off-by: Dongjoon Hyun --- sql/core/src/main/scala/org/apache/spark/sql/Column.scala | 2 +- .../test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala | 7 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 2144472..e6f7b1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1042,7 +1042,7 @@ class Column(val expr: Expression) extends Logging { * @since 2.0.0 */ def name(alias: String): Column = withExpr { -Alias(expr, alias)() +Alias(normalizedExpr(), alias)() } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala index fb58c98..3b3b54f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala @@ -204,7 +204,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { } } - test("SPARK-28344: don't fail as ambiguous self join when there is no join") { + test("SPARK-28344: don't fail if there is no ambiguous self join") { withSQLConf( SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true") { val df = Seq(1, 1, 2, 2).toDF("a") @@ -212,6 +212,11 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { checkAnswer( df.select(df("a").alias("x"), sum(df("a")).over(w)), Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple)) + + val joined = df.join(spark.range(1)).select($"a") + checkAnswer( +joined.select(joined("a").alias("x"), sum(joined("a")).over(w)), +Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple)) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.0 updated: [SPARK-31956][SQL] Do not fail if there is no ambiguous self join
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 62fbff8 [SPARK-31956][SQL] Do not fail if there is no ambiguous self join 62fbff8 is described below commit 62fbff8ad127f3a6dd2360f3c02a20f4391cdad4 Author: Wenchen Fan AuthorDate: Wed Jun 10 13:11:24 2020 -0700 [SPARK-31956][SQL] Do not fail if there is no ambiguous self join ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/28695 , to fix the problem completely. The root cause is that, `df("col").as("name")` is not a column reference anymore, and should not have the special column metadata. However, this was broken in https://github.com/apache/spark/commit/ba7adc494923de8104ab37d412edd78afe540f45#diff-ac415c903887e49486ba542a65eec980L1050-L1053 This PR fixes the regression, by strip the special column metadata in `Column.name`, which is the behavior before https://github.com/apache/spark/pull/28326 . ### Why are the changes needed? Fix a regression. We shouldn't fail if there is no ambiguous self-join. ### Does this PR introduce _any_ user-facing change? Yes, the query in the test can run now. ### How was this patch tested? updated test Closes #28783 from cloud-fan/self-join. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun (cherry picked from commit c40051932290db3a63f80324900a116019b1e589) Signed-off-by: Dongjoon Hyun --- sql/core/src/main/scala/org/apache/spark/sql/Column.scala | 2 +- .../test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala | 7 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 2144472..e6f7b1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1042,7 +1042,7 @@ class Column(val expr: Expression) extends Logging { * @since 2.0.0 */ def name(alias: String): Column = withExpr { -Alias(expr, alias)() +Alias(normalizedExpr(), alias)() } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala index fb58c98..3b3b54f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala @@ -204,7 +204,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { } } - test("SPARK-28344: don't fail as ambiguous self join when there is no join") { + test("SPARK-28344: don't fail if there is no ambiguous self join") { withSQLConf( SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true") { val df = Seq(1, 1, 2, 2).toDF("a") @@ -212,6 +212,11 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { checkAnswer( df.select(df("a").alias("x"), sum(df("a")).over(w)), Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple)) + + val joined = df.join(spark.range(1)).select($"a") + checkAnswer( +joined.select(joined("a").alias("x"), sum(joined("a")).over(w)), +Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple)) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org