[
https://issues.apache.org/jira/browse/HIVE-16310?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15944556#comment-15944556
]
Pengcheng Xiong commented on HIVE-16310:
----------------------------------------
tested with the q file in the description, before the patch is
{code}
{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT
STAGES":"Stage-1"}},"STAGE
PLANS":{"Stage-1":{"Tez":{"DagId:":"pxiong_20170327214643_9a73bc1d-d074-4308-9b7c-aa0f5feac22f:33","Edges:":{"Map
4":{"parent":"Reducer 3","type":"BROADCAST_EDGE"},"Reducer 2":[{"parent":"Map
1","type":"SIMPLE_EDGE"},{"parent":"Map 4","type":"SIMPLE_EDGE"}],"Reducer
3":{"parent":"Map
1","type":"CUSTOM_SIMPLE_EDGE"}},"DagName:":"","Vertices:":{"Map 1":{"Map
Operator Tree:":[{"TableScan":{"alias:":"t1","filterExpr:":"(tinyint_col_3 > 3)
(type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE","OperatorId:":"TS_0","children":{"Filter
Operator":{"predicate:":"(tinyint_col_3 > 3) (type:
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE","OperatorId:":"FIL_26","children":{"Select
Operator":{"expressions:":"tinyint_col_3 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_27","children":[{"Reduce Output Operator":{"key
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic
stats: PARTIAL Column stats: NONE","OperatorId:":"RS_28"}},{"Select
Operator":{"expressions:":"_col0 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_29","children":{"Group By
Operator":{"aggregations:":["min(_col0)","max(_col0)","bloom_filter(_col0,
expectedEntries=2)"],"mode:":"hash","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
rows: 1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE","OperatorId:":"GBY_30","children":{"Reduce Output Operator":{"sort
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type:
tinyint), _col2 (type: binary)","OperatorId:":"RS_31"}}}}}}]}}}}}}],"Execution
mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Map 4":{"Map Operator
Tree:":[{"TableScan":{"alias:":"t2","filterExpr:":"((tinyint_col_20 > 3) and
(tinyint_col_20 BETWEEN DynamicValue(RS_6_t1_tinyint_col_20_min) AND
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20,
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type:
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE","OperatorId:":"TS_3","children":{"Filter
Operator":{"predicate:":"((tinyint_col_20 > 3) and (tinyint_col_20 BETWEEN
DynamicValue(RS_6_t1_tinyint_col_20_min) AND
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20,
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type:
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE","OperatorId:":"FIL_34","children":{"Select
Operator":{"expressions:":"tinyint_col_20 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_35","children":{"Reduce Output Operator":{"key
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic
stats: PARTIAL Column stats: NONE","OperatorId:":"RS_36"}}}}}}}}],"Execution
mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Reducer 2":{"Execution
mode:":"llap","Reduce Operator Tree:":{"Merge Join Operator":{"condition
map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col0 (type:
tinyint)","1":"_col0 (type:
tinyint)"},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"MERGEJOIN_25","children":{"Select
Operator":{"expressions:":"_col1 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_9","children":{"File Output
Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 0 Basic
stats: PARTIAL Column stats: NONE","table:":{"input
format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output
format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_10"}}}}}}},"Reducer
3":{"Execution mode:":"vectorized, llap","Reduce Operator Tree:":{"Group By
Operator":{"aggregations:":["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
expectedEntries=2)"],"mode:":"final","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
rows: 1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE","OperatorId:":"GBY_32","children":{"Reduce Output Operator":{"sort
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type:
tinyint), _col2 (type: binary)","OperatorId:":"RS_33"}}}}}}}},"Stage-0":{"Fetch
Operator":{"limit:":"-1","Processor
Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}}
{code}
after the patch is
{code}
{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT
STAGES":"Stage-1"}},"STAGE
PLANS":{"Stage-1":{"Tez":{"DagId:":"pxiong_20170327210546_07b8bd9f-275b-4ce6-92f2-974f3eafcc83:33","Edges:":{"Map
4":{"parent":"Reducer 3","type":"BROADCAST_EDGE"},"Reducer 2":[{"parent":"Map
1","type":"SIMPLE_EDGE"},{"parent":"Map 4","type":"SIMPLE_EDGE"}],"Reducer
3":{"parent":"Map
1","type":"CUSTOM_SIMPLE_EDGE"}},"DagName:":"","Vertices:":{"Map 1":{"Map
Operator Tree:":[{"TableScan":{"alias:":"t1","filterExpr:":"(tinyint_col_3 > 3)
(type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE","OperatorId:":"TS_0","children":{"Filter
Operator":{"predicate:":"(tinyint_col_3 > 3) (type:
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE","OperatorId:":"FIL_13","children":{"Select
Operator":{"expressions:":"tinyint_col_3 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_2","children":[{"Reduce Output Operator":{"key
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic
stats: PARTIAL Column stats:
NONE","OperatorId:":"RS_6","OutputOperators:":"[JOIN_8]"}},{"Select
Operator":{"expressions:":"_col0 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_20","children":{"Group By
Operator":{"aggregations:":["min(_col0)","max(_col0)","bloom_filter(_col0,
expectedEntries=2)"],"mode:":"hash","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
rows: 1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE","OperatorId:":"GBY_21","children":{"Reduce Output Operator":{"sort
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type:
tinyint), _col2 (type:
binary)","OperatorId:":"RS_22","OutputOperators:":"[GBY_23]"}}}}}}]}}}}}}],"Execution
mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Map 4":{"Map Operator
Tree:":[{"TableScan":{"alias:":"t2","filterExpr:":"((tinyint_col_20 > 3) and
(tinyint_col_20 BETWEEN DynamicValue(RS_6_t1_tinyint_col_20_min) AND
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20,
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type:
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE","OperatorId:":"TS_3","children":{"Filter
Operator":{"predicate:":"((tinyint_col_20 > 3) and (tinyint_col_20 BETWEEN
DynamicValue(RS_6_t1_tinyint_col_20_min) AND
DynamicValue(RS_6_t1_tinyint_col_20_max) and in_bloom_filter(tinyint_col_20,
DynamicValue(RS_6_t1_tinyint_col_20_bloom_filter)))) (type:
boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE","OperatorId:":"FIL_14","children":{"Select
Operator":{"expressions:":"tinyint_col_20 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"key
expressions:":"_col0 (type: tinyint)","sort order:":"+","Map-reduce partition
columns:":"_col0 (type: tinyint)","Statistics:":"Num rows: 1 Data size: 0 Basic
stats: PARTIAL Column stats:
NONE","OperatorId:":"RS_7","OutputOperators:":"[JOIN_8]"}}}}}}}}],"Execution
mode:":"vectorized, llap","LLAP IO:":"no inputs"},"Reducer 2":{"Execution
mode:":"llap","Reduce Operator Tree:":{"Merge Join Operator":{"condition
map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col0 (type:
tinyint)","1":"_col0 (type:
tinyint)"},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"MERGEJOIN_25","children":{"Select
Operator":{"expressions:":"_col1 (type:
tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size:
0 Basic stats: PARTIAL Column stats:
NONE","OperatorId:":"SEL_9","children":{"File Output
Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 0 Basic
stats: PARTIAL Column stats: NONE","table:":{"input
format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output
format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_10"}}}}}}},"Reducer
3":{"Execution mode:":"vectorized, llap","Reduce Operator Tree:":{"Group By
Operator":{"aggregations:":["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
expectedEntries=2)"],"mode:":"final","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num
rows: 1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE","OperatorId:":"GBY_23","children":{"Reduce Output Operator":{"sort
order:":"","Statistics:":"Num rows: 1 Data size: 12 Basic stats: COMPLETE
Column stats: NONE","value expressions:":"_col0 (type: tinyint), _col1 (type:
tinyint), _col2 (type:
binary)","OperatorId:":"RS_24","OutputOperators:":"[TS_3]"}}}}}}}},"Stage-0":{"Fetch
Operator":{"limit:":"-1","Processor
Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}}
{code}
Due to the masked pattern, it is not necessary to include a q test.
> Get the output operators of Reducesink when vectorization is on
> ---------------------------------------------------------------
>
> Key: HIVE-16310
> URL: https://issues.apache.org/jira/browse/HIVE-16310
> Project: Hive
> Issue Type: Bug
> Reporter: Pengcheng Xiong
> Assignee: Pengcheng Xiong
>
> {code}
> set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999;
> set hive.auto.convert.join=false;
> set hive.vectorized.execution.enabled=true;
> CREATE TABLE `table_1`(
> `bigint_col_7` bigint,
> `decimal2016_col_26` decimal(20,16),
> `tinyint_col_3` tinyint,
> `decimal2612_col_77` decimal(26,12),
> `timestamp_col_9` timestamp);
> CREATE TABLE `table_18`(
> `tinyint_col_15` tinyint,
> `decimal2709_col_9` decimal(27,9),
> `tinyint_col_20` tinyint,
> `smallint_col_19` smallint,
> `decimal1911_col_16` decimal(19,11),
> `timestamp_col_18` timestamp);
> explain formatted select t2.tinyint_col_20 from table_1 t1 join table_18 t2
> on t1.tinyint_col_3 = t2.tinyint_col_20 where t2.tinyint_col_20 > 3
> ;
> drop table table_1;
> drop table table_18;
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)