[ 
https://issues.apache.org/jira/browse/HIVE-16310?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15944556#comment-15944556
 ] 

Pengcheng Xiong commented on HIVE-16310:
----------------------------------------

tested with the q file in the description, before the patch is
{code}
{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT 
STAGES":"Stage-1"}},"STAGE 
PLANS":{"Stage-1":{"Tez":{"DagId:":"pxiong_20170327214643_9a73bc1d-d074-4308-9b7c-aa0f5feac22f:33","Edges:":{"Map
 4":{"parent":"Reducer 3","type":"BROADCAST_EDGE"},"Reducer 2":[{"parent":"Map 
1","type":"SIMPLE_EDGE"},{"parent":"Map 4","type":"SIMPLE_EDGE"}],"Reducer 
3":{"parent":"Map 
1","type":"CUSTOM_SIMPLE_EDGE"}},"DagName:":"","Vertices:":{"Map 1":{"Map 
Operator Tree:":[{"TableScan":{"alias:":"t1","filterExpr:":"(tinyint_col_3 > 3) 
(type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE","OperatorId:":"TS_0","children":{"Filter 
Operator":{"predicate:":"(tinyint_col_3 > 3) (type: 
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE","OperatorId:":"FIL_26","children":{"Select 
Operator":{"expressions:":"tinyint_col_3 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_27","children":[{"Reduce Output Operator":{"key 
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition 
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: NONE","OperatorId:":"RS_28"}},{"Select 
Operator":{"expressions:":"_col0 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_29","children":{"Group By 
Operator":{"aggregations:":["min(_col0)","max(_col0)","bloom_filter(_col0, 
expectedEntries=2)"],"mode:":"hash","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
 rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: 
NONE","OperatorId:":"GBY_30","children":{"Reduce Output Operator":{"sort 
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: binary)","OperatorId:":"RS_31"}}}}}}]}}}}}}],"Execution 
mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Map 4":{"Map Operator 
Tree:":[{"TableScan":{"alias:":"t2","filterExpr:":"((tinyint_col_20 > 3) and 
(tinyint_col_20 BETWEEN DynamicValue(RS_6_t1_tinyint_col_20_min) AND 
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20, 
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type: 
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE","OperatorId:":"TS_3","children":{"Filter 
Operator":{"predicate:":"((tinyint_col_20 > 3) and (tinyint_col_20 BETWEEN 
DynamicValue(RS_6_t1_tinyint_col_20_min) AND 
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20, 
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type: 
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE","OperatorId:":"FIL_34","children":{"Select 
Operator":{"expressions:":"tinyint_col_20 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_35","children":{"Reduce Output Operator":{"key 
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition 
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: NONE","OperatorId:":"RS_36"}}}}}}}}],"Execution 
mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Reducer 2":{"Execution 
mode:":"llap","Reduce Operator Tree:":{"Merge Join Operator":{"condition 
map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col0 (type: 
tinyint)","1":"_col0 (type: 
tinyint)"},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"MERGEJOIN_25","children":{"Select 
Operator":{"expressions:":"_col1 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_9","children":{"File Output 
Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: NONE","table:":{"input 
format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_10"}}}}}}},"Reducer
 3":{"Execution mode:":"vectorized, llap","Reduce Operator Tree:":{"Group By 
Operator":{"aggregations:":["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 
expectedEntries=2)"],"mode:":"final","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
 rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: 
NONE","OperatorId:":"GBY_32","children":{"Reduce Output Operator":{"sort 
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: binary)","OperatorId:":"RS_33"}}}}}}}},"Stage-0":{"Fetch 
Operator":{"limit:":"-1","Processor 
Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}}
{code}
after the patch is
{code}
{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT 
STAGES":"Stage-1"}},"STAGE 
PLANS":{"Stage-1":{"Tez":{"DagId:":"pxiong_20170327210546_07b8bd9f-275b-4ce6-92f2-974f3eafcc83:33","Edges:":{"Map
 4":{"parent":"Reducer 3","type":"BROADCAST_EDGE"},"Reducer 2":[{"parent":"Map 
1","type":"SIMPLE_EDGE"},{"parent":"Map 4","type":"SIMPLE_EDGE"}],"Reducer 
3":{"parent":"Map 
1","type":"CUSTOM_SIMPLE_EDGE"}},"DagName:":"","Vertices:":{"Map 1":{"Map 
Operator Tree:":[{"TableScan":{"alias:":"t1","filterExpr:":"(tinyint_col_3 > 3) 
(type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE","OperatorId:":"TS_0","children":{"Filter 
Operator":{"predicate:":"(tinyint_col_3 > 3) (type: 
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE","OperatorId:":"FIL_13","children":{"Select 
Operator":{"expressions:":"tinyint_col_3 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_2","children":[{"Reduce Output Operator":{"key 
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition 
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: 
NONE","OperatorId:":"RS_6","OutputOperators:":"[JOIN_8]"}},{"Select 
Operator":{"expressions:":"_col0 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_20","children":{"Group By 
Operator":{"aggregations:":["min(_col0)","max(_col0)","bloom_filter(_col0, 
expectedEntries=2)"],"mode:":"hash","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
 rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: 
NONE","OperatorId:":"GBY_21","children":{"Reduce Output Operator":{"sort 
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: 
binary)","OperatorId:":"RS_22","OutputOperators:":"[GBY_23]"}}}}}}]}}}}}}],"Execution
 mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Map 4":{"Map Operator 
Tree:":[{"TableScan":{"alias:":"t2","filterExpr:":"((tinyint_col_20 > 3) and 
(tinyint_col_20 BETWEEN DynamicValue(RS_6_t1_tinyint_col_20_min) AND 
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20, 
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type: 
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE","OperatorId:":"TS_3","children":{"Filter 
Operator":{"predicate:":"((tinyint_col_20 > 3) and (tinyint_col_20 BETWEEN 
DynamicValue(RS_6_t1_tinyint_col_20_min) AND 
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20, 
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type: 
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE","OperatorId:":"FIL_14","children":{"Select 
Operator":{"expressions:":"tinyint_col_20 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"key 
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition 
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: 
NONE","OperatorId:":"RS_7","OutputOperators:":"[JOIN_8]"}}}}}}}}],"Execution 
mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Reducer 2":{"Execution 
mode:":"llap","Reduce Operator Tree:":{"Merge Join Operator":{"condition 
map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col0 (type: 
tinyint)","1":"_col0 (type: 
tinyint)"},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"MERGEJOIN_25","children":{"Select 
Operator":{"expressions:":"_col1 (type: 
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 
0 Basic stats: PARTIAL Column stats: 
NONE","OperatorId:":"SEL_9","children":{"File Output 
Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: NONE","table:":{"input 
format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_10"}}}}}}},"Reducer
 3":{"Execution mode:":"vectorized, llap","Reduce Operator Tree:":{"Group By 
Operator":{"aggregations:":["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 
expectedEntries=2)"],"mode:":"final","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
 rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: 
NONE","OperatorId:":"GBY_23","children":{"Reduce Output Operator":{"sort 
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: 
binary)","OperatorId:":"RS_24","OutputOperators:":"[TS_3]"}}}}}}}},"Stage-0":{"Fetch
 Operator":{"limit:":"-1","Processor 
Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}}
{code}
Due to the masked pattern, it is not necessary to include a q test.

> Get the output operators of Reducesink when vectorization is on
> ---------------------------------------------------------------
>
>                 Key: HIVE-16310
>                 URL: https://issues.apache.org/jira/browse/HIVE-16310
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Pengcheng Xiong
>            Assignee: Pengcheng Xiong
>
> {code}
> set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999;
> set hive.auto.convert.join=false;
> set hive.vectorized.execution.enabled=true;
> CREATE TABLE `table_1`(
>   `bigint_col_7` bigint,
>   `decimal2016_col_26` decimal(20,16),
>   `tinyint_col_3` tinyint,
>   `decimal2612_col_77` decimal(26,12),
>   `timestamp_col_9` timestamp);
> CREATE TABLE `table_18`(
>   `tinyint_col_15` tinyint,
>   `decimal2709_col_9` decimal(27,9),
>   `tinyint_col_20` tinyint,
>   `smallint_col_19` smallint,
>   `decimal1911_col_16` decimal(19,11),
>   `timestamp_col_18` timestamp);
> explain formatted select t2.tinyint_col_20 from table_1 t1 join table_18 t2 
> on t1.tinyint_col_3 = t2.tinyint_col_20 where t2.tinyint_col_20 > 3
> ;
> drop table table_1;
> drop table table_18;
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to