This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 89fdb8a6fb6 [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time 89fdb8a6fb6 is described below commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89 Author: yangjie01 <yangji...@baidu.com> AuthorDate: Thu Jun 2 13:06:14 2022 +0300 [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time ### What changes were proposed in this pull request? After SPARK-38118, `dataTypeMismatchError` related to `Filter` will be checked and throw in `RemoveTempResolvedColumn`, this will cause compatibility issue with exception message presentation. For example, the following case: ``` spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;") spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show ``` The expected message is ``` Table or view not found: t2 ``` But the actual message is ``` org.apache.spark.sql.AnalysisException: cannot resolve 'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires date type, however, ''2020-12-27'' is of string type.; line 1 pos 76 ``` For forward compatibility, this pr change to only records `DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis` ### Why are the changes needed? Fix analysis exception message compatibility. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions and add a new test case Closes #36746 from LuciferYang/SPARK-39354. Authored-by: yangjie01 <yangji...@baidu.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++----- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 17 ++++++++++++++++- .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 16 ++++++++++++++-- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b13dede2acc..3017fc10dfd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst._ -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR} +import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _} @@ -4361,10 +4361,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] { case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure => e.checkInputDataTypes() match { case TypeCheckResult.TypeCheckFailure(message) => - e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true) - e.failAnalysis( - s"cannot resolve '${e.sql}' due to data type mismatch: $message" + - extraHintForAnsiTypeCoercionExpression(plan)) + e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message) } case _ => }) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 95b0226f00d..ed2e9ba2b6b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError") + val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError") + protected def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg) } @@ -174,7 +176,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { } } - getAllExpressions(operator).foreach(_.foreachUp { + val expressions = getAllExpressions(operator) + + expressions.foreach(_.foreachUp { + case e: Expression => + e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match { + case Some(message) => + e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" + + extraHintForAnsiTypeCoercionExpression(operator)) + case _ => + } + case _ => + }) + + expressions.foreach(_.foreachUp { case a: Attribute if !a.resolved => val missingCol = a.sql val candidates = operator.inputSet.toSeq.map(_.qualifiedName) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 84f9c6c5e76..a6e952fd865 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -1170,13 +1170,25 @@ class AnalysisSuite extends AnalysisTest with Matchers { |WITH t as (SELECT true c, false d) |SELECT (t.c AND t.d) c |FROM t - |GROUP BY t.c + |GROUP BY t.c, t.d |HAVING ${func}(c) > 0d""".stripMargin), - Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"), + Seq(s"cannot resolve '$func(c)' due to data type mismatch"), false) } } + test("SPARK-39354: should be `Table or view not found`") { + assertAnalysisError(parsePlan( + s""" + |WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt) + |SELECT * + |FROM t1 + |JOIN t2 ON t1.user_id = t2.user_id + |WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin), + Seq(s"Table or view not found: t2"), + false) + } + test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") { val innerRelation = SubqueryAlias("src1", testRelation) val outerRelation = SubqueryAlias("src2", testRelation) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org