http://git-wip-us.apache.org/repos/asf/hive/blob/330c31ae/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out b/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out index 2fb148a..02e24c3 100644 --- a/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out +++ b/ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out @@ -97,11 +97,11 @@ STAGE PLANS: Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string) - outputColumnNames: l_shipdate + outputColumnNames: _col0 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(l_shipdate) - keys: l_shipdate (type: string) + aggregations: count(_col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE @@ -264,15 +264,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) - outputColumnNames: l_shipdate, _count_of_l_shipdate + outputColumnNames: _col0, _count_of_l_shipdate Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) - keys: l_shipdate (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE @@ -447,12 +447,12 @@ STAGE PLANS: alias: lineitem_ix Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_shipdate (type: string) - outputColumnNames: l_shipdate + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(l_shipdate) - keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + aggregations: count(_col2) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE @@ -585,15 +585,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) - outputColumnNames: l_shipdate, _count_of_l_shipdate + expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string), _count_of_l_shipdate (type: bigint) + outputColumnNames: _col0, _col1, _col2, _count_of_l_shipdate Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) - keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE @@ -912,15 +912,15 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: null-subquery1:dummy-subquery1:default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) - outputColumnNames: l_shipdate, _count_of_l_shipdate + outputColumnNames: _col0, _count_of_l_shipdate Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) - keys: l_shipdate (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE @@ -1004,11 +1004,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 1 (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(key) - keys: key (type: int) + aggregations: count(_col0) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1058,15 +1058,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: sum(_count_of_key) - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1168,11 +1168,11 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: sum(_count_of_key) @@ -1226,10 +1226,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1281,10 +1281,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: int), key (type: int) - outputColumnNames: value, key + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: value (type: int), key (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1343,10 +1343,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 3 (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1401,10 +1401,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1455,11 +1455,11 @@ STAGE PLANS: alias: tbl Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: key + expressions: key (type: int), substr(key, 2, 3) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), substr(key, 2, 3) (type: string) + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1515,10 +1515,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: value (type: int), key (type: int) - outputColumnNames: value, key + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: value (type: int), key (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1577,10 +1577,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 1 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1632,10 +1632,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1687,10 +1687,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1742,10 +1742,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1797,10 +1797,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1855,10 +1855,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 2 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1913,10 +1913,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 3 (type: int), 2 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -1969,16 +1969,20 @@ STAGE PLANS: Filter Operator predicate: (value = key) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: key (type: int), value (type: int) - mode: hash + Select Operator + expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -2023,16 +2027,20 @@ STAGE PLANS: Filter Operator predicate: (value = key) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: key (type: int), substr(value, 2, 3) (type: string) - mode: hash + Select Operator + expressions: key (type: int), substr(value, 2, 3) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -2075,11 +2083,11 @@ STAGE PLANS: alias: tbl Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: int), value (type: int) - outputColumnNames: key, value + expressions: key (type: int), substr(value, 2, 3) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), substr(value, 2, 3) (type: string) + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -2134,10 +2142,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 2 (type: int) - outputColumnNames: key, value + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - keys: key (type: int), value (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -2153,17 +2161,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), 2 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2279,18 +2283,22 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2373,18 +2381,18 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tblpart_tbl_part_index__ + alias: $hdt$_0:default__tblpart_tbl_part_index__ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_key) - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -2488,11 +2496,11 @@ STAGE PLANS: Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(key) - keys: key (type: int) + aggregations: count(_col0) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -2568,15 +2576,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) - outputColumnNames: key, _count_of_key + outputColumnNames: _col0, _count_of_key Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_key) - keys: key (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/330c31ae/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index 36af99a..12f1abb 100644 --- a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -118,29 +118,36 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -148,11 +155,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -162,55 +171,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -251,12 +234,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 - hr 12 + ds 2008-04-09 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -297,12 +280,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -341,14 +324,41 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -356,13 +366,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -372,48 +380,48 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] + /src [a] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 filter mappings: - 1 [0, 1] + 0 [1, 1] filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} + 0 {(VALUE._col1 = '2008-04-08')} + 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -607,16 +615,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -723,16 +735,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -789,16 +805,16 @@ STAGE PLANS: condition map: Right Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -983,29 +999,36 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1013,11 +1036,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1027,55 +1052,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1114,14 +1113,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1129,13 +1153,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1145,41 +1167,43 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] + /src [a] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1365,20 +1389,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1474,103 +1502,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] - /srcpart/ds=2008-04-09/hr=11 [a] - /srcpart/ds=2008-04-09/hr=12 [a] Map 3 Map Operator Tree: TableScan @@ -1579,16 +1513,20 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false + predicate: (((((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1643,41 +1581,37 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col7, _col8 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1697,8 +1631,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM srcpart a @@ -1712,8 +1644,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 http://git-wip-us.apache.org/repos/asf/hive/blob/330c31ae/ql/src/test/results/clientpositive/spark/semijoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out index c9c09e8..1f6aac2 100644 --- a/ql/src/test/results/clientpositive/spark/semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -2516,14 +2516,18 @@ STAGE PLANS: alias: a Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key > 100)) (type: boolean) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + predicate: ((key > 100) and value is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 29 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan @@ -2534,10 +2538,10 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: value + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: value (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE @@ -2552,7 +2556,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 value (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE
