[ https://issues.apache.org/jira/browse/SPARK-41500?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
JacobZheng resolved SPARK-41500. -------------------------------- Resolution: Won't Fix > auto generate concat as Double when string minus an INTERVAL type > ----------------------------------------------------------------- > > Key: SPARK-41500 > URL: https://issues.apache.org/jira/browse/SPARK-41500 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.2.0, 3.2.1, 3.2.2 > Reporter: JacobZheng > Priority: Major > > h2. *Describe the bug* > Here is a sql. > {code:sql} > select '2022-02-01'- INTERVAL 1 year > {code} > spark generate cast('2022-02-01' as double) - INTERVAL 1 year automatically > and type mismatch happened. > h2. *To Reproduce* > On Spark 3.0.1 using spark-shell > {code:java} > scala> spark.sql("select '2022-02-01'- interval 1 year").show > +------------------------------------------------------------------+ > > |CAST(CAST(2022-02-01 AS TIMESTAMP) - INTERVAL '1 years' AS STRING)| > +------------------------------------------------------------------+ > | 2021-02-01 00:00:00| > +------------------------------------------------------------------+ > {code} > On Spark 3.2.1 using spark-shell > {code:java} > scala> spark.sql("select '2022-02-01'- interval 1 year").show > org.apache.spark.sql.AnalysisException: cannot resolve '(CAST('2022-02-01' AS > DOUBLE) - INTERVAL '1' YEAR)' due to data type mismatch: differing types in > '(CAST('2022-02-01' AS DOUBLE) - INTERVAL '1' YEAR)' (double and interval > year).; line 1 pos 7; > 'Project [unresolvedalias((cast(2022-02-01 as double) - INTERVAL '1' YEAR), > None)] > +- OneRowRelation > at > org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$$nestedInanonfun$checkAnalysis$1$2.applyOrElse(CheckAnalysis.scala:190) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$$nestedInanonfun$checkAnalysis$1$2.applyOrElse(CheckAnalysis.scala:175) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUpWithPruning$2(TreeNode.scala:535) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUpWithPruning(TreeNode.scala:535) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUpWithPruning$1(TreeNode.scala:532) > at > org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1128) > at > org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1127) > at > org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:467) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUpWithPruning(TreeNode.scala:532) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$transformExpressionsUpWithPruning$1(QueryPlan.scala:181) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:193) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:193) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:204) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:209) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) > at scala.collection.immutable.List.foreach(List.scala:431) > at scala.collection.TraversableLike.map(TraversableLike.scala:286) > at scala.collection.TraversableLike.map$(TraversableLike.scala:279) > at scala.collection.immutable.List.map(List.scala:305) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:209) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$4(QueryPlan.scala:214) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:323) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:214) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUpWithPruning(QueryPlan.scala:181) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:161) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:175) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:94) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:263) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:94) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:91) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:172) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:195) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:192) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:88) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:196) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) > at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:196) > at > org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:88) > at > org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:86) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:78) > at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:98) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) > at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:618) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:613) > ... 47 elided > {code} > This problem is related to SPARK-27790 which bring new interval type > DayTimeIntervalType. > +org.apache.spark.sql.catalyst.analysis.TypeCoercion.PromoteStrings#transform+ > {code:scala} > override def transform: PartialFunction[Expression, Expression] = { > // Skip nodes who's children have not been resolved yet. > case e if !e.childrenResolved => e > case a @ BinaryArithmetic(left @ StringType(), right) > if right.dataType != CalendarIntervalType => > a.makeCopy(Array(Cast(left, DoubleType), right)) > case a @ BinaryArithmetic(left, right @ StringType()) > if left.dataType != CalendarIntervalType => > a.makeCopy(Array(left, Cast(right, DoubleType))) > ... > } > {code} > This code is the reason for the typecast to double. I wonder if this is a bug > or by design? > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org