Repository: spark Updated Branches: refs/heads/master c8d71a418 -> c2b50d693
[SPARK-9292] Analysis should check that join conditions' data types are BooleanType This patch adds an analysis check to ensure that join conditions' data types are BooleanType. This check is necessary in order to report proper errors for non-boolean DataFrame join conditions. Author: Josh Rosen <[email protected]> Closes #7630 from JoshRosen/SPARK-9292 and squashes the following commits: aec6c7b [Josh Rosen] Check condition type in resolved() 75a3ea6 [Josh Rosen] Fix SPARK-9292. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2b50d69 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2b50d69 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2b50d69 Branch: refs/heads/master Commit: c2b50d693e469558e3b3c9cbb9d76089d259b587 Parents: c8d71a4 Author: Josh Rosen <[email protected]> Authored: Fri Jul 24 09:49:50 2015 -0700 Committer: Michael Armbrust <[email protected]> Committed: Fri Jul 24 09:49:50 2015 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 +++++ .../spark/sql/catalyst/plans/logical/basicOperators.scala | 5 ++++- .../apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +++++ 3 files changed, 14 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c2b50d69/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index c203fce..c23ab3c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -83,6 +83,11 @@ trait CheckAnalysis { s"filter expression '${f.condition.prettyString}' " + s"of type ${f.condition.dataType.simpleString} is not a boolean.") + case j @ Join(_, _, _, Some(condition)) if condition.dataType != BooleanType => + failAnalysis( + s"join condition '${condition.prettyString}' " + + s"of type ${condition.dataType.simpleString} is not a boolean.") + case Aggregate(groupingExprs, aggregateExprs, child) => def checkValidAggregateExpression(expr: Expression): Unit = expr match { case _: AggregateExpression => // OK http://git-wip-us.apache.org/repos/asf/spark/blob/c2b50d69/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala index 6aefa9f..57a1282 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala @@ -128,7 +128,10 @@ case class Join( // Joins are only resolved if they don't introduce ambiguous expression ids. override lazy val resolved: Boolean = { - childrenResolved && expressions.forall(_.resolved) && selfJoinResolved + childrenResolved && + expressions.forall(_.resolved) && + selfJoinResolved && + condition.forall(_.dataType == BooleanType) } } http://git-wip-us.apache.org/repos/asf/spark/blob/c2b50d69/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index dca8c88..7bf678e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -119,6 +119,11 @@ class AnalysisErrorSuite extends SparkFunSuite with BeforeAndAfter { "filter" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil) errorTest( + "non-boolean join conditions", + testRelation.join(testRelation, condition = Some(Literal(1))), + "condition" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil) + + errorTest( "missing group by", testRelation2.groupBy('a)('b), "'b'" :: "group by" :: Nil --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
