[ 
https://issues.apache.org/jira/browse/HIVE-21054?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

daveKim updated HIVE-21054:
---------------------------
    Summary: union all and join result wrong  (was: union all and join )

> union all and join result wrong
> -------------------------------
>
>                 Key: HIVE-21054
>                 URL: https://issues.apache.org/jira/browse/HIVE-21054
>             Project: Hive
>          Issue Type: Bug
>          Components: Query Planning
>    Affects Versions: 3.1.1
>            Reporter: daveKim
>            Priority: Major
>
>  i make 3 tables 
>  
> {code:java}
> // 코드 자리 표시자
> CREATE TABLE `testdb`.`tab` ( 
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT '' 
> )
> CREATE TABLE `testdb`.`space` ( 
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT '' 
> )
> CREATE TABLE `testdb`.`colon` ( 
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT '' 
> )
> {code}
>  
>  
> and execute query at below : 
> {code:java}
> // 
> set hive.vectorized.execution.enabled=FALSE;
> SELECT distinct t1.ctype, t1.id_all , t2.list19
> FROM (
> SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
> FROM tab WHERE stn1='20130101010100'
> AND ctype22 BETWEEN 2 AND 5   --result 45 row
> UNION ALL 
> SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
> FROM space 
> WHERE stn1='20130102010100'
> AND ctype22 BETWEEN 2 AND 5   --result empty
> ) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join 
> matched 3 rows
> {code}
> expected result 
> ||ctype||id_all||list19||
> |2|104|ITEM30|
> |2|683|ITEM30|
> |1|970|ITEM30|
> but, actual result empty..
>  
>  
> instead make table "union all" query result
> {code:java}
> // 코드 자리 표시자
> set hive.vectorized.execution.enabled=FALSE;
> create table unionalltbl as
> SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
> FROM tab WHERE stn1='20130101010100'
> AND ctype22 BETWEEN 2 AND 5 
> UNION ALL 
> SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
> FROM space 
> WHERE stn1='20130102010100'
> AND ctype22 BETWEEN 2 AND 5
> {code}
>  
> and retry query 
> {code:java}
> // 코드 자리 표시자
> set hive.vectorized.execution.enabled=FALSE;
> select DISTINCT t1.close_type, t1.id_all, t2.list19
> from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30'
> {code}
> i got expected result 
>  
> and i compare two query explain
> first query
> {code:java}
> // 코드 자리 표시자
> rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[]
> (
> input=rel#18655:HiveProject.HIVE.[]
> (
> input=rel#18653:HiveJoin.HIVE.[]
> (
> left=rel#18650:HiveProject.HIVE.[]
> (
> input=rel#18648:HiveUnion.HIVE.[]
> (
> input#0=rel#18641:HiveProject.HIVE.[]
> (
> input=rel#18639:HiveFilter.HIVE.[]
> (
> input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false]
> ,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5))
> )
> ,close_type=$21,id_all=$19,item_list=$18
> )
> ,input#1=rel#18646:HiveProject.HIVE.[]
> ( 
> input=rel#18644:HiveFilter.HIVE.[]
> (
> input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false]
> ,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5))
> ),close_type=$21,id_all=$19,item_list=$18
> )
> ,all=true
> )
> ,close_type=$0,id_all=$1,item_list=$2
> ),
> right=rel#18619:HiveTableScan.HIVE.[]
> (
> table=[testdb.colon],table:alias=t2
> )
> [false],condition=AND
> (
> =($1, $22), =($21, _UTF-16LE'ITEM30')
> )
> ,joinType=inner,algorithm=none,cost=not available
> )
> ,$f0=$0,$f1=$1,$f2=$21
> )
> ,group={0, 1, 2}
> )
> ,close_type=$0,id_all=$1,list19=$2)
> {code}
> second query
> {code:java}
> // 코드 자리 표시자
> rel#17893:HiveProject.HIVE.[]
> (
> input=rel#17891:HiveAggregate.HIVE.[]
> (
> input=rel#17889:HiveProject.HIVE.[]
> (
> input=rel#17887:HiveJoin.HIVE.[]
> (
> left=rel#17872:HiveTableScan.HIVE.[]
> (able=[testdb.unionalltbl],table:alias=t1)[false],
> right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false],
> condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')),
> joinType=inner,algorithm=none,cost=not available
> ),
> $f0=$0,$f1=$1,$f2=$24
> ),
> group={0, 1, 2}
> )
> ,close_type=$0,id_all=$1,list19=$2
> )
> {code}
> why right table 'colon' condition column number different? 
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to