Repository: hive Updated Branches: refs/heads/master 48b201ee1 -> 7981904fa
HIVE-12478: Improve Hive/Calcite Transitive Predicate inference (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran) (addendum) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7981904f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7981904f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7981904f Branch: refs/heads/master Commit: 7981904fab059c96cf64757fa47853d90d76e8e2 Parents: 48b201e Author: Jesus Camacho Rodriguez <[email protected]> Authored: Wed Jan 27 09:44:14 2016 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Wed Jan 27 09:44:14 2016 +0100 ---------------------------------------------------------------------- .../results/clientpositive/perf/query97.q.out | 239 ++++++++++--------- 1 file changed, 127 insertions(+), 112 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7981904f/ql/src/test/results/clientpositive/perf/query97.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query97.q.out b/ql/src/test/results/clientpositive/perf/query97.q.out index 4e254e3..ec5f125 100644 --- a/ql/src/test/results/clientpositive/perf/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/query97.q.out @@ -2,7 +2,7 @@ PREHOOK: query: explain select sum(case when ssci.customer_sk is not null and cs PREHOOK: type: QUERY POSTHOOK: query: explain select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog from ( select ss_customer_sk customer_sk ,ss_item_sk item_sk from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by ss_customer_sk ,ss_item_sk) ssci full outer join ( select cs_bill_customer_sk customer_sk ,cs_item_sk item_sk from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by cs_bill_customer_sk ,cs_item_sk) csci on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) limit 100 POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) @@ -17,132 +17,147 @@ Stage-0 limit:100 Stage-1 Reducer 5 - File Output Operator [FS_35] + File Output Operator [FS_38] compressed:false Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Limit [LIM_34] + Limit [LIM_37] Number of rows:100 Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator [GBY_32] + Group By Operator [GBY_35] | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_31] + Reduce Output Operator [RS_34] sort order: Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions:_col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Group By Operator [GBY_30] - aggregations:["sum(CASE WHEN ((_col0 is not null and _col2 is null)) THEN (1) ELSE (0) END)","sum(CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END)","sum(CASE WHEN ((_col0 is not null and _col2 is not null)) THEN (1) ELSE (0) END)"] + Group By Operator [GBY_33] + aggregations:["sum(_col0)","sum(_col1)","sum(_col2)"] outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_46] - | condition map:[{"":"Outer Join 0 to 1"}] - | keys:{"0":"_col0 (type: int), _col1 (type: int)","1":"_col0 (type: int), _col1 (type: int)"} - | outputColumnNames:["_col0","_col2"] - | Statistics:Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_26] - | key expressions:_col0 (type: int), _col1 (type: int) - | Map-reduce partition columns:_col0 (type: int), _col1 (type: int) - | sort order:++ - | Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_11] - | | keys:KEY._col0 (type: int), KEY._col1 (type: int) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_10] - | key expressions:_col0 (type: int), _col1 (type: int) - | Map-reduce partition columns:_col0 (type: int), _col1 (type: int) - | sort order:++ - | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | keys:_col3 (type: int), _col2 (type: int) - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_8] - | outputColumnNames:["_col3","_col2"] - | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE - | Merge Join Operator [MERGEJOIN_44] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"0":"ss_sold_date_sk (type: int)","1":"d_date_sk (type: int)"} - | | outputColumnNames:["_col2","_col3"] - | | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE - | |<-Map 1 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_3] - | | key expressions:ss_sold_date_sk (type: int) - | | Map-reduce partition columns:ss_sold_date_sk (type: int) - | | sort order:+ - | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - | | value expressions:ss_item_sk (type: int), ss_customer_sk (type: int) - | | Filter Operator [FIL_40] - | | predicate:ss_sold_date_sk is not null (type: boolean) - | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - | | TableScan [TS_0] - | | alias:store_sales - | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - | |<-Map 6 [SIMPLE_EDGE] - | Reduce Output Operator [RS_5] - | key expressions:d_date_sk (type: int) - | Map-reduce partition columns:d_date_sk (type: int) - | sort order:+ - | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_41] - | predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean) - | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_1] - | alias:date_dim - | Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 9 [SIMPLE_EDGE] - Reduce Output Operator [RS_27] - key expressions:_col0 (type: int), _col1 (type: int) - Map-reduce partition columns:_col0 (type: int), _col1 (type: int) - sort order:++ - Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE - Group By Operator [GBY_24] - | keys:KEY._col0 (type: int), KEY._col1 (type: int) - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_23] - key expressions:_col0 (type: int), _col1 (type: int) - Map-reduce partition columns:_col0 (type: int), _col1 (type: int) - sort order:++ - Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE - Group By Operator [GBY_22] - keys:_col3 (type: int), _col15 (type: int) - outputColumnNames:["_col0","_col1"] + Select Operator [SEL_31] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_49] + | condition map:[{"":"Outer Join 0 to 1"}] + | keys:{"0":"_col0 (type: int), _col1 (type: int)","1":"_col0 (type: int), _col1 (type: int)"} + | outputColumnNames:["_col0","_col2"] + | Statistics:Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_28] + | key expressions:_col0 (type: int), _col1 (type: int) + | Map-reduce partition columns:_col0 (type: int), _col1 (type: int) + | sort order:++ + | Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_13] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_12] + | | keys:KEY._col0 (type: int), KEY._col1 (type: int) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:_col0 (type: int), _col1 (type: int) + | Map-reduce partition columns:_col0 (type: int), _col1 (type: int) + | sort order:++ + | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_10] + | keys:_col1 (type: int), _col2 (type: int) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_47] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} + | | outputColumnNames:["_col1","_col2"] + | | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_6] + | | key expressions:_col0 (type: int) + | | Map-reduce partition columns:_col0 (type: int) + | | sort order:+ + | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | | value expressions:_col1 (type: int), _col2 (type: int) + | | Select Operator [SEL_2] + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | | Filter Operator [FIL_43] + | | predicate:ss_sold_date_sk is not null (type: boolean) + | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | | TableScan [TS_0] + | | alias:store_sales + | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: int) + | Map-reduce partition columns:_col0 (type: int) + | sort order:+ + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_44] + | predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean) + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:date_dim + | Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_29] + key expressions:_col0 (type: int), _col1 (type: int) + Map-reduce partition columns:_col0 (type: int), _col1 (type: int) + sort order:++ + Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_26] + | keys:KEY._col0 (type: int), KEY._col1 (type: int) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_25] + key expressions:_col0 (type: int), _col1 (type: int) + Map-reduce partition columns:_col0 (type: int), _col1 (type: int) + sort order:++ Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_45] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"0":"cs_sold_date_sk (type: int)","1":"d_date_sk (type: int)"} - | outputColumnNames:["_col3","_col15"] - | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] - | key expressions:d_date_sk (type: int) - | Map-reduce partition columns:d_date_sk (type: int) - | sort order:+ - | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_43] - | predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean) - | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_14] - | alias:date_dim - | Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - |<-Map 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_16] - key expressions:cs_sold_date_sk (type: int) - Map-reduce partition columns:cs_sold_date_sk (type: int) - sort order:+ - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions:cs_bill_customer_sk (type: int), cs_item_sk (type: int) - Filter Operator [FIL_42] - predicate:cs_sold_date_sk is not null (type: boolean) + Group By Operator [GBY_24] + keys:_col1 (type: int), _col2 (type: int) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_48] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} + | outputColumnNames:["_col1","_col2"] + | Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | key expressions:_col0 (type: int) + | Map-reduce partition columns:_col0 (type: int) + | sort order:+ + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_19] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_46] + | predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean) + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_17] + | alias:date_dim + | Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TableScan [TS_13] - alias:catalog_sales + value expressions:_col1 (type: int), _col2 (type: int) + Select Operator [SEL_16] + outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator [FIL_45] + predicate:cs_sold_date_sk is not null (type: boolean) + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan [TS_14] + alias:catalog_sales + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
