HIVE-20210 : Simple Fetch optimizer should lead to MapReduce when filter on non-partition column and conversion is minimal (Jeffery Yan via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/65f02d2f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/65f02d2f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/65f02d2f Branch: refs/heads/master Commit: 65f02d2f99b990cb28fd6a832fa3425042e60a04 Parents: 6fa9f63 Author: Jeffrey(Xilang) Yan <[email protected]> Authored: Mon Jul 30 15:35:56 2018 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Mon Jul 30 15:35:56 2018 -0700 ---------------------------------------------------------------------- .../hive/ql/optimizer/SimpleFetchOptimizer.java | 31 +- .../test/queries/clientpositive/nonmr_fetch.q | 4 + .../clientpositive/annotate_stats_part.q.out | 117 ++++--- .../clientpositive/cbo_rp_simple_select.q.out | 232 +++++++++----- .../clientpositive/cbo_simple_select.q.out | 232 +++++++++----- .../clientpositive/filter_in_or_dup.q.out | 117 ++++--- .../test/results/clientpositive/input42.q.out | 148 ++++++--- .../results/clientpositive/input_part9.q.out | 75 +++-- .../clientpositive/list_bucket_dml_1.q.out | 72 +++-- .../clientpositive/list_bucket_dml_11.q.out | 72 +++-- .../clientpositive/list_bucket_dml_12.q.out | 144 ++++++--- .../clientpositive/list_bucket_dml_13.q.out | 72 +++-- .../clientpositive/list_bucket_dml_2.q.out | 72 +++-- .../clientpositive/list_bucket_dml_3.q.out | 72 +++-- .../clientpositive/list_bucket_dml_4.q.out | 72 +++-- .../clientpositive/list_bucket_dml_5.q.out | 75 +++-- .../clientpositive/list_bucket_dml_6.q.out | 75 +++-- .../clientpositive/list_bucket_dml_7.q.out | 75 +++-- .../clientpositive/list_bucket_dml_8.q.out | 75 +++-- .../clientpositive/list_bucket_dml_9.q.out | 72 +++-- .../list_bucket_query_multiskew_1.q.out | 288 ++++++++++++----- .../list_bucket_query_multiskew_2.q.out | 316 +++++++++++++++---- .../list_bucket_query_multiskew_3.q.out | 216 +++++++++---- .../list_bucket_query_oneskew_1.q.out | 216 +++++++++---- .../list_bucket_query_oneskew_2.q.out | 144 ++++++--- .../list_bucket_query_oneskew_3.q.out | 166 ++++++++-- .../results/clientpositive/nonmr_fetch.q.out | 108 ++++++- .../results/clientpositive/ppr_pushdown3.q.out | 39 ++- .../clientpositive/rand_partitionpruner3.q.out | 142 ++++++--- .../truncate_column_list_bucket.q.out | 144 ++++++--- .../results/clientpositive/union_view.q.out | 117 ++++--- 31 files changed, 2814 insertions(+), 986 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index ffd47a2..89f6ee1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -211,11 +211,38 @@ public class SimpleFetchOptimizer extends Transform { bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions(); } } - if (!aggressive && !bypassFilter) { + + boolean onlyPruningFilter = bypassFilter; + Operator<?> op = ts; + while (onlyPruningFilter) { + if (op instanceof FileSinkOperator || op.getChildOperators() == null) { + break; + } else if (op.getChildOperators().size() != 1) { + onlyPruningFilter = false; + break; + } else { + op = op.getChildOperators().get(0); + } + + if (op instanceof FilterOperator) { + ExprNodeDesc predicate = ((FilterOperator) op).getConf().getPredicate(); + if (predicate instanceof ExprNodeConstantDesc + && "boolean".equals(predicate.getTypeInfo().getTypeName())) { + continue; + } else if (PartitionPruner.onlyContainsPartnCols(table, predicate)) { + continue; + } else { + onlyPruningFilter = false; + } + } + } + + if (!aggressive && !onlyPruningFilter) { return null; } + PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts); - FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter); + FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, onlyPruningFilter); return checkOperators(fetch, aggressive, bypassFilter); } http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/queries/clientpositive/nonmr_fetch.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/nonmr_fetch.q b/ql/src/test/queries/clientpositive/nonmr_fetch.q index 1b5ab56..5b7e61e 100644 --- a/ql/src/test/queries/clientpositive/nonmr_fetch.q +++ b/ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -15,6 +15,10 @@ select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select key from src limit 10; select key from src limit 10; +-- negative, filter on partition column and non-partition column +explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10; +select * from srcpart where ds='2008-04-08' AND key > 100 limit 10; + -- negative, filter on non-partition column explain select * from srcpart where key > 100 limit 10; select * from srcpart where key > 100 limit 10; http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/annotate_stats_part.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out index 29ef214..bafc6de 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -441,73 +441,112 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select locid from loc_orc_n4 where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and year='2001' PREHOOK: type: QUERY POSTHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001' PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out index d12b5f6..2e7d796 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out @@ -746,25 +746,38 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) @@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) @@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) @@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = null) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = null) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = null) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = null) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) @@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) @@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/cbo_simple_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_simple_select.q.out index 588d924..33f0e71 100644 --- a/ql/src/test/results/clientpositive/cbo_simple_select.q.out +++ b/ql/src/test/results/clientpositive/cbo_simple_select.q.out @@ -746,25 +746,38 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) @@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) @@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) @@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = null) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = null) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = null) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = null) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) @@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) @@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/filter_in_or_dup.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out index b821717..b50027d 100644 --- a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out +++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out @@ -11,25 +11,38 @@ WHERE (f.key = '1' OR f.key='2') AND f.key IN ('1', '2') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT f.key @@ -44,25 +57,38 @@ WHERE (f.key = '1' OR f.key = '2') AND f.key IN ('1', '2', '3') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT f.key @@ -77,23 +103,36 @@ WHERE (f.key = '1' OR f.key='2' OR f.key='3') AND f.key IN ('1', '2') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/input42.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/input42.q.out b/ql/src/test/results/clientpositive/input42.q.out index df98800..98c2fd0 100644 --- a/ql/src/test/results/clientpositive/input42.q.out +++ b/ql/src/test/results/clientpositive/input42.q.out @@ -1143,14 +1143,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` < 200 STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1197,7 +1238,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1244,21 +1287,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: a - filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and key < 200 PREHOOK: type: QUERY @@ -1660,14 +1697,54 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND RAND(100) < 0.1 STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (rand(100) < 0.1D) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1714,7 +1791,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1761,20 +1840,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (rand(100) < 0.1D) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/input_part9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/input_part9.q.out b/ql/src/test/results/clientpositive/input_part9.q.out index 9440167..5ecaeb1 100644 --- a/ql/src/test/results/clientpositive/input_part9.q.out +++ b/ql/src/test/results/clientpositive/input_part9.q.out @@ -8,14 +8,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((ds = '2008-04-08') and key is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -62,7 +103,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -109,21 +152,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [x] + /srcpart/ds=2008-04-08/hr=12 [x] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: x - filterExpr: ((ds = '2008-04-08') and key is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE key IS NOT NULL AND ds = '2008-04-08' PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index d13edd6..226e778 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -409,14 +409,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value` FROM `default`.`list_bucketing_dynamic_part_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_dynamic_part_n0 + filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '484') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: key=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -463,21 +504,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part_n0 name: default.list_bucketing_dynamic_part_n0 + Truncated Path -> Alias: + /list_bucketing_dynamic_part_n0/ds=2008-04-08/hr=11/key=484 [list_bucketing_dynamic_part_n0] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_dynamic_part_n0 - filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '484') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select key, value from list_bucketing_dynamic_part_n0 where ds='2008-04-08' and hr='11' and key = "484" PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 44b712b..11f7dc1 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -290,14 +290,55 @@ OPTIMIZED SQL: SELECT `key`, CAST('val_466' AS STRING) AS `value` FROM `default`.`list_bucketing_static_part_n3` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `value` = 'val_466' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_static_part_n3 + filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 'val_466')) (type: boolean) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 'val_466') (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), 'val_466' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -344,21 +385,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part_n3 name: default.list_bucketing_static_part_n3 + Truncated Path -> Alias: + /list_bucketing_static_part_n3/ds=2008-04-08/hr=11/value=val_466 [list_bucketing_static_part_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_static_part_n3 - filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 'val_466')) (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 'val_466') (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), 'val_466' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select key, value from list_bucketing_static_part_n3 where ds='2008-04-08' and hr='11' and value = "val_466" PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index f5e643e..f1c20e5 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -298,14 +298,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS `col2`, `col3`, CAST('val FROM `default`.`list_bucketing_mul_col_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '466' AND `col4` = 'val_466' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col_n0 + filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: col4=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -352,21 +393,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col_n0 name: default.list_bucketing_mul_col_n0 + Truncated Path -> Alias: + /list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col_n0] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_mul_col_n0 - filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_mul_col_n0 where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" @@ -395,14 +429,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('382' AS STRING) AS `col2`, `col3`, CAST('val FROM `default`.`list_bucketing_mul_col_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '382' AND `col4` = 'val_382' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col_n0 + filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -449,21 +524,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col_n0 name: default.list_bucketing_mul_col_n0 + Truncated Path -> Alias: + /list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col_n0] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_mul_col_n0 - filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_mul_col_n0 where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
