Repository: spark Updated Branches: refs/heads/branch-2.3 0856b82b3 -> 34191e663
[SPARK-25051][SQL] FixNullability should not stop on AnalysisBarrier ## What changes were proposed in this pull request? The introduction of `AnalysisBarrier` prevented `FixNullability` to go through all the nodes. This introduced a bug, which can lead to wrong results, as the nullability of the output attributes of an outer join can be wrong. The PR makes `FixNullability` going through the `AnalysisBarrier`s. ## How was this patch tested? added UT Author: Marco Gaido <[email protected]> Closes #22102 from mgaido91/SPARK-25051. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34191e66 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34191e66 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34191e66 Branch: refs/heads/branch-2.3 Commit: 34191e66360b987d5415397bd0b8a0c4c1c3dfef Parents: 0856b82 Author: Marco Gaido <[email protected]> Authored: Tue Aug 14 10:25:29 2018 -0700 Committer: Xiao Li <[email protected]> Committed: Tue Aug 14 10:25:29 2018 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 1 + .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 6 ++++++ 2 files changed, 7 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/34191e66/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 5963c14..531f3d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1704,6 +1704,7 @@ class Analyzer( def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case p if !p.resolved => p // Skip unresolved nodes. + case ab: AnalysisBarrier => apply(ab.child) case p: LogicalPlan if p.resolved => val childrenOutput = p.children.flatMap(c => c.output).groupBy(_.exprId).flatMap { case (exprId, attributes) => http://git-wip-us.apache.org/repos/asf/spark/blob/34191e66/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 9f8d337..3640f6a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2300,4 +2300,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { checkAnswer(aggPlusFilter1, aggPlusFilter2.collect()) } } + + test("SPARK-25051: fix nullabilities of outer join attributes doesn't stop on AnalysisBarrier") { + val df1 = spark.range(4).selectExpr("id", "cast(id as string) as name") + val df2 = spark.range(3).selectExpr("id") + assert(df1.join(df2, Seq("id"), "left_outer").where(df2("id").isNull).collect().length == 1) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
