[jira] [Comment Edited] (SPARK-29274) Can not coerce decimal type to double type when it's join key
[ https://issues.apache.org/jira/browse/SPARK-29274?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16939879#comment-16939879 ] Yuming Wang edited comment on SPARK-29274 at 9/28/19 2:22 PM: -- Mysql also returns incorrect values. {noformat} create table t1 (incdata_id decimal(21,0), v VARCHAR(210)); create table t2 (incdata_id VARCHAR(210), v VARCHAR(210)); insert into t1 values(1001636981212, '1'); insert into t2 values(1001636981213, '2'); mysql> select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); +---+--+---+--+ | incdata_id| v| incdata_id| v| +---+--+---+--+ | 1001636981212 | 1| 1001636981213 | 2| +---+--+---+--+ 1 row in set (0.00 sec) mysql> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); ++-+---++--+---+--+-+--+--+--++ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | ++-+---++--+---+--+-+--+--+--++ | 1 | SIMPLE | t1| NULL | ALL | NULL | NULL | NULL | NULL |1 | 100.00 | NULL | | 1 | SIMPLE | t2| NULL | ALL | NULL | NULL | NULL | NULL |1 | 100.00 | Using where; Using join buffer (Block Nested Loop) | ++-+---++--+---+--+-+--+--+--++ 2 rows in set, 1 warning (0.01 sec) {noformat} was (Author: q79969786): Mysql also return incorrect values. {noformat} create table t1 (incdata_id decimal(21,0), v VARCHAR(210)); create table t2 (incdata_id VARCHAR(210), v VARCHAR(210)); insert into t1 values(1001636981212, '1'); insert into t2 values(1001636981213, '2'); mysql> select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); +---+--+---+--+ | incdata_id| v| incdata_id| v| +---+--+---+--+ | 1001636981212 | 1| 1001636981213 | 2| +---+--+---+--+ 1 row in set (0.00 sec) mysql> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); ++-+---++--+---+--+-+--+--+--++ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | ++-+---++--+---+--+-+--+--+--++ | 1 | SIMPLE | t1| NULL | ALL | NULL | NULL | NULL | NULL |1 | 100.00 | NULL | | 1 | SIMPLE | t2| NULL | ALL | NULL | NULL | NULL | NULL |1 | 100.00 | Using where; Using join buffer (Block Nested Loop) | ++-+---++--+---+--+-+--+--+--++ 2 rows in set, 1 warning (0.01 sec) {noformat} > Can not coerce decimal type to double type when it's join key > - > > Key: SPARK-29274 > URL: https://issues.apache.org/jira/browse/SPARK-29274 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.3.4, 2.4.4, 3.0.0 >Reporter: Yuming Wang >Assignee: Pengfei Chang >Priority: Major > Attachments: image-2019-09-27-20-20-24-238.png > > > How to reproduce this issue: > {code:sql} > create table t1 (incdata_id decimal(21,0), v string) using parquet; > create table t2 (incdata_id string, v string) using parquet; > explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); > == Physical Plan == > *(5) SortMergeJoin > [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31 as > double)))], > [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#33 as > double)))], Inner > :- *(2) Sort > [knownfloatingpointnormalized(normalizenanandzero(cast(incdata_id#31 as > double))) ASC NULLS FIRST], false, 0 > : +-
[jira] [Comment Edited] (SPARK-29274) Can not coerce decimal type to double type when it's join key
[ https://issues.apache.org/jira/browse/SPARK-29274?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16939875#comment-16939875 ] Yuming Wang edited comment on SPARK-29274 at 9/28/19 6:22 AM: -- I don't konw how DB2 work. But it returns correct values {noformat} create table t1 (incdata_id decimal(21,0), v VARCHAR(210)); create table t2 (incdata_id VARCHAR(210), v VARCHAR(210)); insert into t1 values(1001636981212, '1'); insert into t2 values(1001636981213, '2'); [db2inst1@2f3c821d36b7 ~]$ db2 "select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)" INCDATA_ID V INCDATA_ID V --- -- -- -- 0 record(s) selected. [db2inst1@2f3c821d36b7 ~]$ db2expln -database sample -t -g -f "test.sql" DB2 Universal Database Version 10.5, 5622-044 (c) Copyright IBM Corp. 1991, 2012 Licensed Material - Program Property of IBM IBM DB2 Universal Database SQL and XQUERY Explain Tool DB2 Universal Database Version 10.5, 5622-044 (c) Copyright IBM Corp. 1991, 2012 Licensed Material - Program Property of IBM IBM DB2 Universal Database SQL and XQUERY Explain Tool DYNAMIC *** STATEMENT == Isolation Level = Cursor Stability Blocking = Block Unambiguous Cursors Query Optimization Class = 5 Partition Parallel = No Intra-Partition Parallel = No SQL Path = "SYSIBM", "SYSFUN", "SYSPROC", "SYSIBMADM", "DB2INST1" Statement: select * from t1 join t2 on (t1.incdata_id =t2.incdata_id) Section Code Page = 1208 Estimated Cost = 13.630928 Estimated Cardinality = 1.00 Access Table Name = DB2INST1.T1 ID = 3,6 | #Columns = 2 | Skip Inserted Rows | Avoid Locking Committed Data | Currently Committed for Cursor Stability | May participate in Scan Sharing structures | Scan may start anywhere and wrap, for completion | Fast scan, for purposes of scan sharing management | Scan can be throttled in scan sharing management | Relation Scan | | Prefetch: Eligible | Lock Intents | | Table: Intent Share | | Row : Next Key Share | Sargable Predicate(s) | | Process Build Table for Hash Join Hash Join | Estimated Build Size: 4000 | Estimated Probe Size: 4000 | Access Table Name = DB2INST1.T2 ID = 3,7 | | #Columns = 2 | | Skip Inserted Rows | | Avoid Locking Committed Data | | Currently Committed for Cursor Stability | | May participate in Scan Sharing structures | | Scan may start anywhere and wrap, for completion | | Fast scan, for purposes of scan sharing management | | Scan can be throttled in scan sharing management | | Relation Scan | | | Prefetch: Eligible | | Lock Intents | | | Table: Intent Share | | | Row : Next Key Share | | Sargable Predicate(s) | | | Process Probe Table for Hash Join Return Data to Application | #Columns = 4 End of section Optimizer Plan: Rows Operator (ID) Cost 1 RETURN ( 1) 13.6309 | 1 HSJOIN ( 2) 13.6309 / \ 1 1 TBSCANTBSCAN ( 3) ( 4) 6.81521 6.81521 | | 1 1 Table:Table: DB2INST1 DB2INST1 T2T1 {noformat} was (Author: q79969786): I don't konw how DB2 work. But ir returns correct values {noformat} create table t1 (incdata_id decimal(21,0), v VARCHAR(210)); create table t2 (incdata_id VARCHAR(210), v VARCHAR(210)); insert into t1 values(1001636981212, '1'); insert into t2 values(1001636981213, '2'); [db2inst1@2f3c821d36b7 ~]$ db2 "select * from t1 join t2 on (t1.incdata_id = t2.incdata_id)" INCDATA_ID V
[jira] [Comment Edited] (SPARK-29274) Can not coerce decimal type to double type when it's join key
[ https://issues.apache.org/jira/browse/SPARK-29274?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16939862#comment-16939862 ] Yuming Wang edited comment on SPARK-29274 at 9/28/19 5:26 AM: -- SQL Server will cast string type to decimal type: {noformat} create table t1 (incdata_id decimal(21,0), v VARCHAR(21)) create table t2 (incdata_id VARCHAR(210), v VARCHAR(21)) insert into t1 values(1001636981212, '1') insert into t2 values(1001636981213, '2') 1> explain select * from t1 join t2 on (t1.incdata_id = t2.incdata_id) 2> go StmtText StmtId NodeId Parent PhysicalOp LogicalOp Argument DefinedValues EstimateRows EstimateIO EstimateCPU AvgRowSize TotalSubtreeCost OutputList Warnings Type Parallel EstimateExecutions -- --- --- --- -- -- - -- -- -- -- --- - -- select * from t1 join t2 on (t1.incdata_id = t2.incdata_id); 1 1 0 NULL NULL 1 NULL 1.0 NULL NULL NULL 2.4348916E-2 NULL NULL SELECT 0 NULL |--Hash Match(Inner Join, HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006]))1 2 1 Hash Match Inner Join HASH:([master].[dbo].[t1].[incdata_id])=([Expr1006]), RESIDUAL:([master].[dbo].[t1].[incdata_id]=[Expr1006]) NULL 1.0 0.0 1.7779617E-2 59 2.4348916E-2 [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v], [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v] NULL PLAN_ROW 01.0 |--Table Scan(OBJECT:([master].[dbo].[t1])) 1 3 2 Table Scan Table Scan OBJECT:([master].[dbo].[t1]) [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v] 1.0 0.003125 0.0001581 240.0032831 [master].[dbo].[t1].[incdata_id], [master].[dbo].[t1].[v] NULL PLAN_ROW 01.0 |--Compute Scalar(DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0))) 1 4 2 Compute Scalar Compute Scalar DEFINE:([Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0)) [Expr1006]=CONVERT_IMPLICIT(decimal(21,0),[master].[dbo].[t2].[incdata_id],0) 1.00.0 0.001 57 3.2832001E-3 [master].[dbo].[t2].[incdata_id], [master].[dbo].[t2].[v], [Expr1006] NULL PLAN_ROW 0