[
https://issues.apache.org/jira/browse/SPARK-15776?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Weizhong updated SPARK-15776:
-
Description:
{code:sql}
CREATE TABLE cdr (
debet_dt int ,
srv_typ_cdstring ,
b_brnd_cd smallint ,
call_dur int
)
ROW FORMAT delimited fields terminated by ','
STORED AS TEXTFILE;
{code}
{code:sql}
SELECT debet_dt,
SUM(CASE WHEN srv_typ_cd LIKE '0%' THEN call_dur / 60 ELSE 0 END)
FROM cdr
GROUP BY debet_dt
ORDER BY debet_dt;
{code}
{noformat}
== Analyzed Logical Plan ==
debet_dt: int, sum(CASE WHEN srv_typ_cd LIKE 0% THEN (call_dur / 60) ELSE 0
END): bigint
Project [debet_dt#16, sum(CASE WHEN srv_typ_cd LIKE 0% THEN (call_dur / 60)
ELSE 0 END)#27L]
+- Sort [debet_dt#16 ASC], true
+- Aggregate [debet_dt#16], [debet_dt#16, sum(cast(CASE WHEN srv_typ_cd#18
LIKE 0% THEN (cast(call_dur#21 as double) / cast(60 as double)) ELSE cast(0 as
double) END as bigint)) AS sum(CASE WHEN srv_typ_cd LIKE 0% THEN (call_dur /
60) ELSE 0 END)#27L]
+- MetastoreRelation default, cdr
{noformat}
{code:sql}
SELECT debet_dt,
SUM(CASE WHEN b_brnd_cd IN(1) THEN call_dur / 60 ELSE 0 END)
FROM cdr
GROUP BY debet_dt
ORDER BY debet_dt;
{code}
{noformat}
== Analyzed Logical Plan ==
debet_dt: int, sum(CASE WHEN (CAST(b_brnd_cd AS INT) IN (CAST(1 AS INT))) THEN
(CAST(call_dur AS DOUBLE) / CAST(60 AS DOUBLE)) ELSE CAST(0 AS DOUBLE) END):
double
Project [debet_dt#76, sum(CASE WHEN (CAST(b_brnd_cd AS INT) IN (CAST(1 AS
INT))) THEN (CAST(call_dur AS DOUBLE) / CAST(60 AS DOUBLE)) ELSE CAST(0 AS
DOUBLE) END)#87]
+- Sort [debet_dt#76 ASC], true
+- Aggregate [debet_dt#76], [debet_dt#76, sum(CASE WHEN cast(b_brnd_cd#80 as
int) IN (cast(1 as int)) THEN (cast(call_dur#81 as double) / cast(60 as
double)) ELSE cast(0 as double) END) AS sum(CASE WHEN (CAST(b_brnd_cd AS INT)
IN (CAST(1 AS INT))) THEN (CAST(call_dur AS DOUBLE) / CAST(60 AS DOUBLE)) ELSE
CAST(0 AS DOUBLE) END)#87]
+- MetastoreRelation default, cdr
{noformat}
The only difference is WHEN condition, but will result different output column
type(one is bigint, one is double)
We need to apply "Division" before "FunctionArgumentConversion", like below:
{code:java}
val typeCoercionRules =
PropagateTypes ::
InConversion ::
WidenSetOperationTypes ::
PromoteStrings ::
DecimalPrecision ::
BooleanEquality ::
StringToIntegralCasts ::
Division ::
FunctionArgumentConversion ::
CaseWhenCoercion ::
IfCoercion ::
PropagateTypes ::
ImplicitTypeCasts ::
DateTimeOperations ::
Nil
{code}
was:
{code:sql}
CREATE TABLE cdr (
debet_dt int ,
srv_typ_cdstring ,
b_brnd_cd smallint ,
call_dur int
)
ROW FORMAT delimited fields terminated by ','
STORED AS TEXTFILE;
{code}
{code:sql}
SELECT debet_dt,
SUM(CASE WHEN srv_typ_cd LIKE '0%' THEN call_dur / 60 ELSE 0 END)
FROM cdr
GROUP BY debet_dt
ORDER BY debet_dt;
{code}
{noformat}
== Analyzed Logical Plan ==
debet_dt: int, sum(CASE WHEN srv_typ_cd LIKE 0% THEN (call_dur / 60) ELSE 0
END): bigint
Project [debet_dt#16, sum(CASE WHEN srv_typ_cd LIKE 0% THEN (call_dur / 60)
ELSE 0 END)#27L]
+- Sort [debet_dt#16 ASC], true
+- Aggregate [debet_dt#16], [debet_dt#16, sum(cast(CASE WHEN srv_typ_cd#18
LIKE 0% THEN (cast(call_dur#21 as double) / cast(60 as double)) ELSE cast(0 as
double) END as bigint)) AS sum(CASE WHEN srv_typ_cd LIKE 0% THEN (call_dur /
60) ELSE 0 END)#27L]
+- MetastoreRelation default, cdr
{noformat}
{code:sql}
SELECT debet_dt,
SUM(CASE WHEN b_brnd_cd IN(1) THEN call_dur / 60 ELSE 0 END)
FROM cdr
GROUP BY debet_dt
ORDER BY debet_dt;
{code}
{noformat}
== Analyzed Logical Plan ==
debet_dt: int, sum(CASE WHEN (CAST(b_brnd_cd AS INT) IN (CAST(1 AS INT))) THEN
(CAST(call_dur AS DOUBLE) / CAST(60 AS DOUBLE)) ELSE CAST(0 AS DOUBLE) END):
double
Project [debet_dt#76, sum(CASE WHEN (CAST(b_brnd_cd AS INT) IN (CAST(1 AS
INT))) THEN (CAST(call_dur AS DOUBLE) / CAST(60 AS DOUBLE)) ELSE CAST(0 AS
DOUBLE) END)#87]
+- Sort [debet_dt#76 ASC], true
+- Aggregate [debet_dt#76], [debet_dt#76, sum(CASE WHEN cast(b_brnd_cd#80 as
int) IN (cast(1 as int)) THEN (cast(call_dur#81 as double) / cast(60 as
double)) ELSE cast(0 as double) END) AS sum(CASE WHEN (CAST(b_brnd_cd AS INT)
IN (CAST(1 AS INT))) THEN (CAST(call_dur AS DOUBLE) / CAST(60 AS DOUBLE)) ELSE
CAST(0 AS DOUBLE) END)#87]
+- MetastoreRelation default, cdr
{noformat}
The only difference is WHEN condition, but will result different output column
type(one is bigint, one is double)
> Type coercion incorrect
> ---
>
> Key: SPARK-15776
> URL: https://issues.apache.org/jira/browse/SPARK-15776
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Environment: Spark based