[ 
https://issues.apache.org/jira/browse/SPARK-29274?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16939862#comment-16939862
 ] 

Yuming Wang commented on SPARK-29274:
-------------------------------------

SQL server will cast string type to decimal type:

{noformat}
create table t1 (incdata_id decimal(21,0), v VARCHAR(21))
create table t2 (incdata_id VARCHAR(210), v VARCHAR(21))
insert into t1 values(100000000001636981212, '1')
insert into t2 values(100000000001636981213, '2')

1> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)
2> go
StmtText                                                                        
                                                           StmtId      NodeId   
   Parent      PhysicalOp                     LogicalOp                      
Argument                                                                        
                              DefinedValues                                     
                             EstimateRows   EstimateIO     EstimateCPU    
AvgRowSize  TotalSubtreeCost OutputList                                         
                                                                   Warnings 
Type                                                             Parallel 
EstimateExecutions
------------------------------------------------------------------------------------------------------------------------------------------
 ----------- ----------- ----------- ------------------------------ 
------------------------------ 
-------------------------------------------------------------------------------------------------------------
 ------------------------------------------------------------------------------ 
-------------- -------------- -------------- ----------- ---------------- 
---------------------------------------------------------------------------------------------------------------------
 -------- ---------------------------------------------------------------- 
-------- ------------------
select * from t1 join t2 on (t1.incdata_id = t2.incdata_id);                    
                                                                     1          
 1           0 NULL                           NULL                           1  
                                                                                
                           NULL                                                 
                                     1.0           NULL           NULL        
NULL     2.4348916E-2 NULL                                                      
                                                            NULL     SELECT     
                                                             0               
NULL
  |--Hash Match(Inner Join, 
HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), 
RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006]))            1           
2           1 Hash Match                     Inner Join                     
HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), 
RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006])  NULL                    
                                                                  1.0           
 0.0   1.7779617E-2          59     2.4348916E-2 
[master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v], 
[master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v]  NULL     PLAN_ROW    
                                                            0                1.0
       |--Table Scan(OBJECT:([master].[dbo].[t1]))                              
                                                                     1          
 3           2 Table Scan                     Table Scan                     
OBJECT:([master].[dbo].[t1])                                                    
                              [master].[dbo].[t1].[incdata_id], 
[master].[dbo].[t1].[v]                                 1.0       0.003125      
0.0001581          24        0.0032831 [master].[dbo].[t1].[incdata_id], 
[master].[dbo].[t1].[v]                                                         
    NULL     PLAN_ROW                                                           
     0                1.0
       |--Compute 
Scalar(DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0)))
                                     1           4           2 Compute Scalar   
              Compute Scalar                 
DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0))
                        
[Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0)   
          1.0            0.0      0.0000001          57     3.2832001E-3 
[master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v], [Expr1006]           
                                      NULL     PLAN_ROW                         
                                       0                1.0
            |--Table Scan(OBJECT:([master].[dbo].[t2]))                         
                                                                     1          
 5           4 Table Scan                     Table Scan                     
OBJECT:([master].[dbo].[t2])                                                    
                              [master].[dbo].[t2].[incdata_id], 
[master].[dbo].[t2].[v]                                 1.0       0.003125      
0.0001581          44        0.0032831 [master].[dbo].[t2].[incdata_id], 
[master].[dbo].[t2].[v]                                                         
    NULL     PLAN_ROW                                                           
     0                1.0

(5 rows affected)

1> select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)
2> go
incdata_id              v incdata_id            v
----------------------- - --------------------- ---------------------

(0 rows affected)
{noformat}

> Can not coerce decimal type to double type when it's join key
> -------------------------------------------------------------
>
>                 Key: SPARK-29274
>                 URL: https://issues.apache.org/jira/browse/SPARK-29274
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.3.4, 2.4.4, 3.0.0
>            Reporter: Yuming Wang
>            Assignee: Pengfei Chang
>            Priority: Major
>         Attachments: image-2019-09-27-20-20-24-238.png
>
>
> How to reproduce this issue:
> {code:sql}
> create table t1 (incdata_id decimal(21,0), v string) using parquet;
> create table t2 (incdata_id string, v string) using parquet;
> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id);
> == Physical Plan ==
> *(5) SortMergeJoin 
> [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31 as 
> double)))], 
> [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#33 as 
> double)))], Inner
> :- *(2) Sort 
> [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31 as 
> double))) ASC NULLS FIRST], false, 0
> :  +- Exchange 
> hashpartitioning(knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31
>  as double))), 200), true, [id=#104]
> :     +- *(1) Filter isnotnull(incdata_id#31)
> :        +- Scan hive default.t1 [incdata_id#31, v#32], HiveTableRelation 
> `default`.`t1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, 
> [incdata_id#31, v#32], Statistics(sizeInBytes=8.0 EiB)
> +- *(4) Sort 
> [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#33 as 
> double))) ASC NULLS FIRST], false, 0
>    +- Exchange 
> hashpartitioning(knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#33
>  as double))), 200), true, [id=#112]
>       +- *(3) Filter isnotnull(incdata_id#33)
>          +- Scan hive default.t2 [incdata_id#33, v#34], HiveTableRelation 
> `default`.`t2`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, 
> [incdata_id#33, v#34], Statistics(sizeInBytes=8.0 EiB)
> {code}
> {code:sql}
> select cast(v1 as double) as v3, cast(v2 as double) as v4,
>   cast(v1 as double) = cast(v2 as double), v1 = v2 
> from (select cast('100000000001636981212' as decimal(21, 0)) as v1,
>       cast('100000000001636981213' as decimal(21, 0)) as v2) t;
> 1.0000000000163697E20 1.0000000000163697E20   true    false
> {code}
>  
> It's a realy case in our production:
> !image-2019-09-27-20-20-24-238.png|width=100%!



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to